00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <cstdlib>
00011 #include <cstring>
00012 #include <boost/regex.hpp>
00013 #include <boost/assert.hpp>
00014 #include <boost/logic/tribool.hpp>
00015 #include <boost/algorithm/string.hpp>
00016 #include <pion/algorithm.hpp>
00017 #include <pion/http/parser.hpp>
00018 #include <pion/http/request.hpp>
00019 #include <pion/http/response.hpp>
00020 #include <pion/http/message.hpp>
00021
00022
00023 namespace pion {
00024 namespace http {
00025
00026
00027
00028
00029 const boost::uint32_t parser::STATUS_MESSAGE_MAX = 1024;
00030 const boost::uint32_t parser::METHOD_MAX = 1024;
00031 const boost::uint32_t parser::RESOURCE_MAX = 256 * 1024;
00032 const boost::uint32_t parser::QUERY_STRING_MAX = 1024 * 1024;
00033 const boost::uint32_t parser::HEADER_NAME_MAX = 1024;
00034 const boost::uint32_t parser::HEADER_VALUE_MAX = 1024 * 1024;
00035 const boost::uint32_t parser::QUERY_NAME_MAX = 1024;
00036 const boost::uint32_t parser::QUERY_VALUE_MAX = 1024 * 1024;
00037 const boost::uint32_t parser::COOKIE_NAME_MAX = 1024;
00038 const boost::uint32_t parser::COOKIE_VALUE_MAX = 1024 * 1024;
00039 const std::size_t parser::DEFAULT_CONTENT_MAX = 1024 * 1024;
00040 parser::error_category_t * parser::m_error_category_ptr = NULL;
00041 boost::once_flag parser::m_instance_flag = BOOST_ONCE_INIT;
00042
00043
00044
00045
00046 boost::tribool parser::parse(http::message& http_msg,
00047 boost::system::error_code& ec)
00048 {
00049 BOOST_ASSERT(! eof() );
00050
00051 boost::tribool rc = boost::indeterminate;
00052 std::size_t total_bytes_parsed = 0;
00053
00054 if(http_msg.has_missing_packets()) {
00055 http_msg.set_data_after_missing_packet(true);
00056 }
00057
00058 do {
00059 switch (m_message_parse_state) {
00060
00061 case PARSE_START:
00062 m_message_parse_state = PARSE_HEADERS;
00063
00064
00065
00066 case PARSE_HEADERS:
00067 case PARSE_FOOTERS:
00068 rc = parse_headers(http_msg, ec);
00069 total_bytes_parsed += m_bytes_last_read;
00070
00071 if (rc == true && m_message_parse_state == PARSE_HEADERS) {
00072
00073
00074 rc = finish_header_parsing(http_msg, ec);
00075 }
00076 break;
00077
00078
00079 case PARSE_CHUNKS:
00080 rc = parse_chunks(http_msg.get_chunk_cache(), ec);
00081 total_bytes_parsed += m_bytes_last_read;
00082
00083 if (rc == true && !m_payload_handler) {
00084 http_msg.concatenate_chunks();
00085
00086
00087 rc = ((m_message_parse_state == PARSE_FOOTERS) ?
00088 boost::indeterminate : (boost::tribool)true);
00089 }
00090 break;
00091
00092
00093 case PARSE_CONTENT:
00094 rc = consume_content(http_msg, ec);
00095 total_bytes_parsed += m_bytes_last_read;
00096 break;
00097
00098
00099 case PARSE_CONTENT_NO_LENGTH:
00100 consume_content_as_next_chunk(http_msg.get_chunk_cache());
00101 total_bytes_parsed += m_bytes_last_read;
00102 break;
00103
00104
00105 case PARSE_END:
00106 rc = true;
00107 break;
00108 }
00109 } while ( boost::indeterminate(rc) && ! eof() );
00110
00111
00112 if (rc == true) {
00113 m_message_parse_state = PARSE_END;
00114 finish(http_msg);
00115 } else if(rc == false) {
00116 compute_msg_status(http_msg, false);
00117 }
00118
00119
00120 m_bytes_last_read = total_bytes_parsed;
00121
00122 return rc;
00123 }
00124
00125 boost::tribool parser::parse_missing_data(http::message& http_msg,
00126 std::size_t len, boost::system::error_code& ec)
00127 {
00128 static const char MISSING_DATA_CHAR = 'X';
00129 boost::tribool rc = boost::indeterminate;
00130
00131 http_msg.set_missing_packets(true);
00132
00133 switch (m_message_parse_state) {
00134
00135
00136 case PARSE_START:
00137 case PARSE_HEADERS:
00138 case PARSE_FOOTERS:
00139 set_error(ec, ERROR_MISSING_HEADER_DATA);
00140 rc = false;
00141 break;
00142
00143
00144 case PARSE_CHUNKS:
00145
00146 if (m_chunked_content_parse_state == PARSE_CHUNK
00147 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00148 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00149 {
00150
00151 if (m_payload_handler) {
00152 for (std::size_t n = 0; n < len; ++n)
00153 m_payload_handler(&MISSING_DATA_CHAR, 1);
00154 } else {
00155 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00156 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00157 }
00158
00159 m_bytes_read_in_current_chunk += len;
00160 m_bytes_last_read = len;
00161 m_bytes_total_read += len;
00162 m_bytes_content_read += len;
00163
00164 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00165 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00166 }
00167 } else {
00168
00169 set_error(ec, ERROR_MISSING_CHUNK_DATA);
00170 rc = false;
00171 }
00172 break;
00173
00174
00175 case PARSE_CONTENT:
00176
00177 if (m_bytes_content_remaining == 0) {
00178
00179 rc = true;
00180 } else if (m_bytes_content_remaining < len) {
00181
00182 set_error(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
00183 rc = false;
00184 } else {
00185
00186
00187 if (m_payload_handler) {
00188 for (std::size_t n = 0; n < len; ++n)
00189 m_payload_handler(&MISSING_DATA_CHAR, 1);
00190 } else if ( (m_bytes_content_read+len) <= m_max_content_length) {
00191
00192 for (std::size_t n = 0; n < len; ++n)
00193 http_msg.get_content()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00194 } else {
00195 m_bytes_content_read += len;
00196 }
00197
00198 m_bytes_content_remaining -= len;
00199 m_bytes_total_read += len;
00200 m_bytes_last_read = len;
00201
00202 if (m_bytes_content_remaining == 0)
00203 rc = true;
00204 }
00205 break;
00206
00207
00208 case PARSE_CONTENT_NO_LENGTH:
00209
00210 if (m_payload_handler) {
00211 for (std::size_t n = 0; n < len; ++n)
00212 m_payload_handler(&MISSING_DATA_CHAR, 1);
00213 } else {
00214 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00215 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00216 }
00217 m_bytes_last_read = len;
00218 m_bytes_total_read += len;
00219 m_bytes_content_read += len;
00220 break;
00221
00222
00223 case PARSE_END:
00224 rc = true;
00225 break;
00226 }
00227
00228
00229 if (rc == true) {
00230 m_message_parse_state = PARSE_END;
00231 finish(http_msg);
00232 } else if(rc == false) {
00233 compute_msg_status(http_msg, false);
00234 }
00235
00236 return rc;
00237 }
00238
00239 boost::tribool parser::parse_headers(http::message& http_msg,
00240 boost::system::error_code& ec)
00241 {
00242
00243
00244
00245
00246
00247
00248
00249 const char *read_start_ptr = m_read_ptr;
00250 m_bytes_last_read = 0;
00251 while (m_read_ptr < m_read_end_ptr) {
00252
00253 if (m_save_raw_headers)
00254 m_raw_headers += *m_read_ptr;
00255
00256 switch (m_headers_parse_state) {
00257 case PARSE_METHOD_START:
00258
00259 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') {
00260 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00261 set_error(ec, ERROR_METHOD_CHAR);
00262 return false;
00263 }
00264 m_headers_parse_state = PARSE_METHOD;
00265 m_method.erase();
00266 m_method.push_back(*m_read_ptr);
00267 }
00268 break;
00269
00270 case PARSE_METHOD:
00271
00272 if (*m_read_ptr == ' ') {
00273 m_resource.erase();
00274 m_headers_parse_state = PARSE_URI_STEM;
00275 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00276 set_error(ec, ERROR_METHOD_CHAR);
00277 return false;
00278 } else if (m_method.size() >= METHOD_MAX) {
00279 set_error(ec, ERROR_METHOD_SIZE);
00280 return false;
00281 } else {
00282 m_method.push_back(*m_read_ptr);
00283 }
00284 break;
00285
00286 case PARSE_URI_STEM:
00287
00288 if (*m_read_ptr == ' ') {
00289 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00290 } else if (*m_read_ptr == '?') {
00291 m_query_string.erase();
00292 m_headers_parse_state = PARSE_URI_QUERY;
00293 } else if (*m_read_ptr == '\r') {
00294 http_msg.set_version_major(0);
00295 http_msg.set_version_minor(0);
00296 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00297 } else if (*m_read_ptr == '\n') {
00298 http_msg.set_version_major(0);
00299 http_msg.set_version_minor(0);
00300 m_headers_parse_state = PARSE_EXPECTING_CR;
00301 } else if (is_control(*m_read_ptr)) {
00302 set_error(ec, ERROR_URI_CHAR);
00303 return false;
00304 } else if (m_resource.size() >= RESOURCE_MAX) {
00305 set_error(ec, ERROR_URI_SIZE);
00306 return false;
00307 } else {
00308 m_resource.push_back(*m_read_ptr);
00309 }
00310 break;
00311
00312 case PARSE_URI_QUERY:
00313
00314 if (*m_read_ptr == ' ') {
00315 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00316 } else if (*m_read_ptr == '\r') {
00317 http_msg.set_version_major(0);
00318 http_msg.set_version_minor(0);
00319 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00320 } else if (*m_read_ptr == '\n') {
00321 http_msg.set_version_major(0);
00322 http_msg.set_version_minor(0);
00323 m_headers_parse_state = PARSE_EXPECTING_CR;
00324 } else if (is_control(*m_read_ptr)) {
00325 set_error(ec, ERROR_QUERY_CHAR);
00326 return false;
00327 } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00328 set_error(ec, ERROR_QUERY_SIZE);
00329 return false;
00330 } else {
00331 m_query_string.push_back(*m_read_ptr);
00332 }
00333 break;
00334
00335 case PARSE_HTTP_VERSION_H:
00336
00337 if (*m_read_ptr == '\r') {
00338
00339 if (! m_is_request) {
00340 set_error(ec, ERROR_VERSION_EMPTY);
00341 return false;
00342 }
00343 http_msg.set_version_major(0);
00344 http_msg.set_version_minor(0);
00345 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00346 } else if (*m_read_ptr == '\n') {
00347
00348 if (! m_is_request) {
00349 set_error(ec, ERROR_VERSION_EMPTY);
00350 return false;
00351 }
00352 http_msg.set_version_major(0);
00353 http_msg.set_version_minor(0);
00354 m_headers_parse_state = PARSE_EXPECTING_CR;
00355 } else if (*m_read_ptr != 'H') {
00356 set_error(ec, ERROR_VERSION_CHAR);
00357 return false;
00358 }
00359 m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00360 break;
00361
00362 case PARSE_HTTP_VERSION_T_1:
00363
00364 if (*m_read_ptr != 'T') {
00365 set_error(ec, ERROR_VERSION_CHAR);
00366 return false;
00367 }
00368 m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00369 break;
00370
00371 case PARSE_HTTP_VERSION_T_2:
00372
00373 if (*m_read_ptr != 'T') {
00374 set_error(ec, ERROR_VERSION_CHAR);
00375 return false;
00376 }
00377 m_headers_parse_state = PARSE_HTTP_VERSION_P;
00378 break;
00379
00380 case PARSE_HTTP_VERSION_P:
00381
00382 if (*m_read_ptr != 'P') {
00383 set_error(ec, ERROR_VERSION_CHAR);
00384 return false;
00385 }
00386 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00387 break;
00388
00389 case PARSE_HTTP_VERSION_SLASH:
00390
00391 if (*m_read_ptr != '/') {
00392 set_error(ec, ERROR_VERSION_CHAR);
00393 return false;
00394 }
00395 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00396 break;
00397
00398 case PARSE_HTTP_VERSION_MAJOR_START:
00399
00400 if (!is_digit(*m_read_ptr)) {
00401 set_error(ec, ERROR_VERSION_CHAR);
00402 return false;
00403 }
00404 http_msg.set_version_major(*m_read_ptr - '0');
00405 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00406 break;
00407
00408 case PARSE_HTTP_VERSION_MAJOR:
00409
00410 if (*m_read_ptr == '.') {
00411 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00412 } else if (is_digit(*m_read_ptr)) {
00413 http_msg.set_version_major( (http_msg.get_version_major() * 10)
00414 + (*m_read_ptr - '0') );
00415 } else {
00416 set_error(ec, ERROR_VERSION_CHAR);
00417 return false;
00418 }
00419 break;
00420
00421 case PARSE_HTTP_VERSION_MINOR_START:
00422
00423 if (!is_digit(*m_read_ptr)) {
00424 set_error(ec, ERROR_VERSION_CHAR);
00425 return false;
00426 }
00427 http_msg.set_version_minor(*m_read_ptr - '0');
00428 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00429 break;
00430
00431 case PARSE_HTTP_VERSION_MINOR:
00432
00433 if (*m_read_ptr == ' ') {
00434
00435 if (! m_is_request) {
00436 m_headers_parse_state = PARSE_STATUS_CODE_START;
00437 }
00438 } else if (*m_read_ptr == '\r') {
00439
00440 if (! m_is_request) {
00441 set_error(ec, ERROR_STATUS_EMPTY);
00442 return false;
00443 }
00444 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00445 } else if (*m_read_ptr == '\n') {
00446
00447 if (! m_is_request) {
00448 set_error(ec, ERROR_STATUS_EMPTY);
00449 return false;
00450 }
00451 m_headers_parse_state = PARSE_EXPECTING_CR;
00452 } else if (is_digit(*m_read_ptr)) {
00453 http_msg.set_version_minor( (http_msg.get_version_minor() * 10)
00454 + (*m_read_ptr - '0') );
00455 } else {
00456 set_error(ec, ERROR_VERSION_CHAR);
00457 return false;
00458 }
00459 break;
00460
00461 case PARSE_STATUS_CODE_START:
00462
00463 if (!is_digit(*m_read_ptr)) {
00464 set_error(ec, ERROR_STATUS_CHAR);
00465 return false;
00466 }
00467 m_status_code = (*m_read_ptr - '0');
00468 m_headers_parse_state = PARSE_STATUS_CODE;
00469 break;
00470
00471 case PARSE_STATUS_CODE:
00472
00473 if (*m_read_ptr == ' ') {
00474 m_status_message.erase();
00475 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00476 } else if (is_digit(*m_read_ptr)) {
00477 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00478 } else if (*m_read_ptr == '\r') {
00479
00480 m_status_message.erase();
00481 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00482 } else if (*m_read_ptr == '\n') {
00483
00484 m_status_message.erase();
00485 m_headers_parse_state = PARSE_EXPECTING_CR;
00486 } else {
00487 set_error(ec, ERROR_STATUS_CHAR);
00488 return false;
00489 }
00490 break;
00491
00492 case PARSE_STATUS_MESSAGE:
00493
00494 if (*m_read_ptr == '\r') {
00495 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00496 } else if (*m_read_ptr == '\n') {
00497 m_headers_parse_state = PARSE_EXPECTING_CR;
00498 } else if (is_control(*m_read_ptr)) {
00499 set_error(ec, ERROR_STATUS_CHAR);
00500 return false;
00501 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00502 set_error(ec, ERROR_STATUS_CHAR);
00503 return false;
00504 } else {
00505 m_status_message.push_back(*m_read_ptr);
00506 }
00507 break;
00508
00509 case PARSE_EXPECTING_NEWLINE:
00510
00511 if (*m_read_ptr == '\n') {
00512
00513 if (m_is_request && http_msg.get_version_major() == 0) {
00514 PION_LOG_DEBUG(m_logger, "HTTP 0.9 Simple-Request found");
00515 ++m_read_ptr;
00516 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00517 m_bytes_total_read += m_bytes_last_read;
00518 return true;
00519 } else {
00520 m_headers_parse_state = PARSE_HEADER_START;
00521 }
00522 } else if (*m_read_ptr == '\r') {
00523
00524
00525
00526 ++m_read_ptr;
00527 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00528 m_bytes_total_read += m_bytes_last_read;
00529 return true;
00530 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00531 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00532 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00533 set_error(ec, ERROR_HEADER_CHAR);
00534 return false;
00535 } else {
00536
00537 m_header_name.erase();
00538 m_header_name.push_back(*m_read_ptr);
00539 m_headers_parse_state = PARSE_HEADER_NAME;
00540 }
00541 break;
00542
00543 case PARSE_EXPECTING_CR:
00544
00545 if (*m_read_ptr == '\r') {
00546 m_headers_parse_state = PARSE_HEADER_START;
00547 } else if (*m_read_ptr == '\n') {
00548
00549
00550
00551 ++m_read_ptr;
00552 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00553 m_bytes_total_read += m_bytes_last_read;
00554 return true;
00555 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00556 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00557 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00558 set_error(ec, ERROR_HEADER_CHAR);
00559 return false;
00560 } else {
00561
00562 m_header_name.erase();
00563 m_header_name.push_back(*m_read_ptr);
00564 m_headers_parse_state = PARSE_HEADER_NAME;
00565 }
00566 break;
00567
00568 case PARSE_HEADER_WHITESPACE:
00569
00570 if (*m_read_ptr == '\r') {
00571 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00572 } else if (*m_read_ptr == '\n') {
00573 m_headers_parse_state = PARSE_EXPECTING_CR;
00574 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00575 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00576 set_error(ec, ERROR_HEADER_CHAR);
00577 return false;
00578 }
00579
00580 m_header_name.erase();
00581 m_header_name.push_back(*m_read_ptr);
00582 m_headers_parse_state = PARSE_HEADER_NAME;
00583 }
00584 break;
00585
00586 case PARSE_HEADER_START:
00587
00588 if (*m_read_ptr == '\r') {
00589 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00590 } else if (*m_read_ptr == '\n') {
00591 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00592 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00593 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00594 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00595 set_error(ec, ERROR_HEADER_CHAR);
00596 return false;
00597 } else {
00598
00599 m_header_name.erase();
00600 m_header_name.push_back(*m_read_ptr);
00601 m_headers_parse_state = PARSE_HEADER_NAME;
00602 }
00603 break;
00604
00605 case PARSE_HEADER_NAME:
00606
00607 if (*m_read_ptr == ':') {
00608 m_header_value.erase();
00609 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00610 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00611 set_error(ec, ERROR_HEADER_CHAR);
00612 return false;
00613 } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00614 set_error(ec, ERROR_HEADER_NAME_SIZE);
00615 return false;
00616 } else {
00617
00618 m_header_name.push_back(*m_read_ptr);
00619 }
00620 break;
00621
00622 case PARSE_SPACE_BEFORE_HEADER_VALUE:
00623
00624 if (*m_read_ptr == ' ') {
00625 m_headers_parse_state = PARSE_HEADER_VALUE;
00626 } else if (*m_read_ptr == '\r') {
00627 http_msg.add_header(m_header_name, m_header_value);
00628 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00629 } else if (*m_read_ptr == '\n') {
00630 http_msg.add_header(m_header_name, m_header_value);
00631 m_headers_parse_state = PARSE_EXPECTING_CR;
00632 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00633 set_error(ec, ERROR_HEADER_CHAR);
00634 return false;
00635 } else {
00636
00637 m_header_value.push_back(*m_read_ptr);
00638 m_headers_parse_state = PARSE_HEADER_VALUE;
00639 }
00640 break;
00641
00642 case PARSE_HEADER_VALUE:
00643
00644 if (*m_read_ptr == '\r') {
00645 http_msg.add_header(m_header_name, m_header_value);
00646 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00647 } else if (*m_read_ptr == '\n') {
00648 http_msg.add_header(m_header_name, m_header_value);
00649 m_headers_parse_state = PARSE_EXPECTING_CR;
00650 } else if (*m_read_ptr != '\t' && is_control(*m_read_ptr)) {
00651
00652
00653
00654
00655
00656
00657 set_error(ec, ERROR_HEADER_CHAR);
00658 return false;
00659 } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00660 set_error(ec, ERROR_HEADER_VALUE_SIZE);
00661 return false;
00662 } else {
00663
00664 m_header_value.push_back(*m_read_ptr);
00665 }
00666 break;
00667
00668 case PARSE_EXPECTING_FINAL_NEWLINE:
00669 if (*m_read_ptr == '\n') ++m_read_ptr;
00670 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00671 m_bytes_total_read += m_bytes_last_read;
00672 return true;
00673
00674 case PARSE_EXPECTING_FINAL_CR:
00675 if (*m_read_ptr == '\r') ++m_read_ptr;
00676 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00677 m_bytes_total_read += m_bytes_last_read;
00678 return true;
00679 }
00680
00681 ++m_read_ptr;
00682 }
00683
00684 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00685 m_bytes_total_read += m_bytes_last_read;
00686 return boost::indeterminate;
00687 }
00688
00689 void parser::update_message_with_header_data(http::message& http_msg) const
00690 {
00691 if (is_parsing_request()) {
00692
00693
00694
00695 http::request& http_request(dynamic_cast<http::request&>(http_msg));
00696 http_request.set_method(m_method);
00697 http_request.set_resource(m_resource);
00698 http_request.set_query_string(m_query_string);
00699
00700
00701 if (! m_query_string.empty()) {
00702 if (! parse_url_encoded(http_request.get_queries(),
00703 m_query_string.c_str(),
00704 m_query_string.size()))
00705 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
00706 }
00707
00708
00709 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00710 cookie_pair = http_request.get_headers().equal_range(http::types::HEADER_COOKIE);
00711 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00712 cookie_iterator != http_request.get_headers().end()
00713 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00714 {
00715 if (! parse_cookie_header(http_request.get_cookies(),
00716 cookie_iterator->second, false) )
00717 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00718 }
00719
00720 } else {
00721
00722
00723
00724 http::response& http_response(dynamic_cast<http::response&>(http_msg));
00725 http_response.set_status_code(m_status_code);
00726 http_response.set_status_message(m_status_message);
00727
00728
00729 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00730 cookie_pair = http_response.get_headers().equal_range(http::types::HEADER_SET_COOKIE);
00731 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00732 cookie_iterator != http_response.get_headers().end()
00733 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00734 {
00735 if (! parse_cookie_header(http_response.get_cookies(),
00736 cookie_iterator->second, true) )
00737 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00738 }
00739
00740 }
00741 }
00742
00743 boost::tribool parser::finish_header_parsing(http::message& http_msg,
00744 boost::system::error_code& ec)
00745 {
00746 boost::tribool rc = boost::indeterminate;
00747
00748 m_bytes_content_remaining = m_bytes_content_read = 0;
00749 http_msg.set_content_length(0);
00750 http_msg.update_transfer_encoding_using_header();
00751 update_message_with_header_data(http_msg);
00752
00753 if (http_msg.is_chunked()) {
00754
00755
00756 m_message_parse_state = PARSE_CHUNKS;
00757
00758
00759 if (m_parse_headers_only)
00760 rc = true;
00761
00762 } else if (http_msg.is_content_length_implied()) {
00763
00764
00765 m_message_parse_state = PARSE_END;
00766 rc = true;
00767
00768 } else {
00769
00770
00771 if (http_msg.has_header(http::types::HEADER_CONTENT_LENGTH)) {
00772
00773
00774 try {
00775 http_msg.update_content_length_using_header();
00776 } catch (...) {
00777 PION_LOG_ERROR(m_logger, "Unable to update content length");
00778 set_error(ec, ERROR_INVALID_CONTENT_LENGTH);
00779 return false;
00780 }
00781
00782
00783 if (http_msg.get_content_length() == 0) {
00784 m_message_parse_state = PARSE_END;
00785 rc = true;
00786 } else {
00787 m_message_parse_state = PARSE_CONTENT;
00788 m_bytes_content_remaining = http_msg.get_content_length();
00789
00790
00791 if (m_bytes_content_remaining > m_max_content_length)
00792 http_msg.set_content_length(m_max_content_length);
00793
00794 if (m_parse_headers_only) {
00795
00796 rc = true;
00797 } else {
00798
00799 http_msg.create_content_buffer();
00800 }
00801 }
00802
00803 } else {
00804
00805
00806
00807
00808 if (! m_is_request) {
00809
00810 http_msg.get_chunk_cache().clear();
00811
00812
00813 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00814
00815
00816 if (m_parse_headers_only)
00817 rc = true;
00818 } else {
00819 m_message_parse_state = PARSE_END;
00820 rc = true;
00821 }
00822 }
00823 }
00824
00825 finished_parsing_headers(ec);
00826
00827 return rc;
00828 }
00829
00830 bool parser::parse_uri(const std::string& uri, std::string& proto,
00831 std::string& host, boost::uint16_t& port,
00832 std::string& path, std::string& query)
00833 {
00834 size_t proto_end = uri.find("://");
00835 size_t proto_len = 0;
00836
00837 if(proto_end != std::string::npos) {
00838 proto = uri.substr(0, proto_end);
00839 proto_len = proto_end + 3;
00840 } else {
00841 proto.clear();
00842 }
00843
00844
00845
00846 size_t server_port_end = uri.find('/', proto_len);
00847 if(server_port_end == std::string::npos) {
00848 return false;
00849 }
00850
00851
00852 std::string t;
00853 t = uri.substr(proto_len, server_port_end - proto_len);
00854 size_t port_pos = t.find(':', 0);
00855
00856
00857
00858 host = t.substr(0, port_pos);
00859 if(host.length() == 0) {
00860 return false;
00861 }
00862
00863
00864 if(port_pos != std::string::npos) {
00865 try {
00866 port = boost::lexical_cast<int>(t.substr(port_pos+1));
00867 } catch (boost::bad_lexical_cast &) {
00868 return false;
00869 }
00870 } else if (proto == "http" || proto == "HTTP") {
00871 port = 80;
00872 } else if (proto == "https" || proto == "HTTPS") {
00873 port = 443;
00874 } else {
00875 port = 0;
00876 }
00877
00878
00879 path = uri.substr(server_port_end);
00880
00881
00882 size_t query_pos = path.find('?', 0);
00883
00884 if(query_pos != std::string::npos) {
00885 query = path.substr(query_pos + 1, path.length() - query_pos - 1);
00886 path = path.substr(0, query_pos);
00887 } else {
00888 query.clear();
00889 }
00890
00891 return true;
00892 }
00893
00894 bool parser::parse_url_encoded(ihash_multimap& dict,
00895 const char *ptr, const size_t len)
00896 {
00897
00898 if (ptr == NULL || len == 0)
00899 return true;
00900
00901
00902 enum QueryParseState {
00903 QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00904 } parse_state = QUERY_PARSE_NAME;
00905
00906
00907 const char * const end = ptr + len;
00908 std::string query_name;
00909 std::string query_value;
00910
00911
00912 while (ptr < end) {
00913 switch (parse_state) {
00914
00915 case QUERY_PARSE_NAME:
00916
00917 if (*ptr == '=') {
00918
00919 parse_state = QUERY_PARSE_VALUE;
00920 } else if (*ptr == '&') {
00921
00922 if (! query_name.empty()) {
00923
00924 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00925 query_name.erase();
00926 }
00927 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00928
00929 } else if (is_control(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00930
00931 return false;
00932 } else {
00933
00934 query_name.push_back(*ptr);
00935 }
00936 break;
00937
00938 case QUERY_PARSE_VALUE:
00939
00940 if (*ptr == '&') {
00941
00942 if (! query_name.empty()) {
00943 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00944 query_name.erase();
00945 }
00946 query_value.erase();
00947 parse_state = QUERY_PARSE_NAME;
00948 } else if (*ptr == ',') {
00949
00950 if (! query_name.empty())
00951 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00952 query_value.erase();
00953 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00954
00955 } else if (is_control(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00956
00957 return false;
00958 } else {
00959
00960 query_value.push_back(*ptr);
00961 }
00962 break;
00963 }
00964
00965 ++ptr;
00966 }
00967
00968
00969 if (! query_name.empty())
00970 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00971
00972 return true;
00973 }
00974
00975 bool parser::parse_multipart_form_data(ihash_multimap& dict,
00976 const std::string& content_type,
00977 const char *ptr, const size_t len)
00978 {
00979
00980 if (ptr == NULL || len == 0)
00981 return true;
00982
00983
00984 std::size_t pos = content_type.find("boundary=");
00985 if (pos == std::string::npos)
00986 return false;
00987 const std::string boundary = std::string("--") + content_type.substr(pos+9);
00988
00989
00990 enum MultiPartParseState {
00991 MP_PARSE_START,
00992 MP_PARSE_HEADER_CR, MP_PARSE_HEADER_LF,
00993 MP_PARSE_HEADER_NAME, MP_PARSE_HEADER_SPACE, MP_PARSE_HEADER_VALUE,
00994 MP_PARSE_HEADER_LAST_LF, MP_PARSE_FIELD_DATA
00995 } parse_state = MP_PARSE_START;
00996
00997
00998 std::string header_name;
00999 std::string header_value;
01000 std::string field_name;
01001 std::string field_value;
01002 bool found_parameter = false;
01003 bool save_current_field = true;
01004 const char * const end_ptr = ptr + len;
01005
01006 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01007
01008 while (ptr != NULL && ptr < end_ptr) {
01009 switch (parse_state) {
01010 case MP_PARSE_START:
01011
01012 header_name.clear();
01013 header_value.clear();
01014 field_name.clear();
01015 field_value.clear();
01016 save_current_field = true;
01017 ptr += boundary.size() - 1;
01018 parse_state = MP_PARSE_HEADER_CR;
01019 break;
01020 case MP_PARSE_HEADER_CR:
01021
01022 if (*ptr == '\r') {
01023
01024 parse_state = MP_PARSE_HEADER_LF;
01025 } else if (*ptr == '\n') {
01026
01027 parse_state = MP_PARSE_HEADER_NAME;
01028 } else if (*ptr == '-' && ptr+1 < end_ptr && ptr[1] == '-') {
01029
01030 return true;
01031 } else return false;
01032 break;
01033 case MP_PARSE_HEADER_LF:
01034
01035 if (*ptr == '\n') {
01036
01037 parse_state = MP_PARSE_HEADER_NAME;
01038 } else return false;
01039 break;
01040 case MP_PARSE_HEADER_NAME:
01041
01042 if (*ptr == '\r' || *ptr == '\n') {
01043 if (header_name.empty()) {
01044
01045 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LAST_LF : MP_PARSE_FIELD_DATA);
01046 } else {
01047
01048 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01049 }
01050 } else if (*ptr == ':') {
01051
01052 parse_state = MP_PARSE_HEADER_SPACE;
01053 } else {
01054
01055 header_name += *ptr;
01056 }
01057 break;
01058 case MP_PARSE_HEADER_SPACE:
01059
01060 if (*ptr == '\r') {
01061
01062 parse_state = MP_PARSE_HEADER_LF;
01063 } else if (*ptr == '\n') {
01064
01065 parse_state = MP_PARSE_HEADER_NAME;
01066 } else if (*ptr != ' ') {
01067
01068 header_value += *ptr;
01069 parse_state = MP_PARSE_HEADER_VALUE;
01070 }
01071
01072 break;
01073 case MP_PARSE_HEADER_VALUE:
01074
01075 if (*ptr == '\r' || *ptr == '\n') {
01076
01077 if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) {
01078
01079 save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/");
01080 } else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) {
01081
01082 std::size_t name_pos = header_value.find("name=\"");
01083 if (name_pos != std::string::npos) {
01084 for (name_pos += 6; name_pos < header_value.size() && header_value[name_pos] != '\"'; ++name_pos) {
01085 field_name += header_value[name_pos];
01086 }
01087 }
01088 }
01089
01090 header_name.clear();
01091 header_value.clear();
01092 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01093 } else {
01094
01095 header_value += *ptr;
01096 }
01097 break;
01098 case MP_PARSE_HEADER_LAST_LF:
01099
01100 if (*ptr == '\n') {
01101
01102 if (save_current_field && !field_name.empty()) {
01103
01104 parse_state = MP_PARSE_FIELD_DATA;
01105 } else {
01106
01107 parse_state = MP_PARSE_START;
01108 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01109 }
01110 } else return false;
01111 break;
01112 case MP_PARSE_FIELD_DATA:
01113
01114 const char *field_end_ptr = end_ptr;
01115 const char *next_ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01116 if (next_ptr) {
01117
01118 const char *temp_ptr = next_ptr - 2;
01119 if (temp_ptr[0] == '\r' && temp_ptr[1] == '\n')
01120 field_end_ptr = temp_ptr;
01121 else field_end_ptr = next_ptr;
01122 }
01123 field_value.assign(ptr, field_end_ptr - ptr);
01124
01125 dict.insert( std::make_pair(field_name, field_value) );
01126 found_parameter = true;
01127
01128 parse_state = MP_PARSE_START;
01129 ptr = next_ptr;
01130 break;
01131 }
01132
01133 if (parse_state != MP_PARSE_START)
01134 ++ptr;
01135 }
01136
01137 return found_parameter;
01138 }
01139
01140 bool parser::parse_cookie_header(ihash_multimap& dict,
01141 const char *ptr, const size_t len,
01142 bool set_cookie_header)
01143 {
01144
01145
01146
01147
01148
01149
01150
01151 enum CookieParseState {
01152 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
01153 } parse_state = COOKIE_PARSE_NAME;
01154
01155
01156 const char * const end = ptr + len;
01157 std::string cookie_name;
01158 std::string cookie_value;
01159 char value_quote_character = '\0';
01160
01161
01162 while (ptr < end) {
01163 switch (parse_state) {
01164
01165 case COOKIE_PARSE_NAME:
01166
01167 if (*ptr == '=') {
01168
01169 value_quote_character = '\0';
01170 parse_state = COOKIE_PARSE_VALUE;
01171 } else if (*ptr == ';' || *ptr == ',') {
01172
01173
01174 if (! cookie_name.empty()) {
01175
01176 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01177 dict.insert( std::make_pair(cookie_name, cookie_value) );
01178 cookie_name.erase();
01179 }
01180 } else if (*ptr != ' ') {
01181
01182 if (is_control(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
01183 return false;
01184
01185 cookie_name.push_back(*ptr);
01186 }
01187 break;
01188
01189 case COOKIE_PARSE_VALUE:
01190
01191 if (value_quote_character == '\0') {
01192
01193 if (*ptr == ';' || *ptr == ',') {
01194
01195 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01196 dict.insert( std::make_pair(cookie_name, cookie_value) );
01197 cookie_name.erase();
01198 cookie_value.erase();
01199 parse_state = COOKIE_PARSE_NAME;
01200 } else if (*ptr == '\'' || *ptr == '"') {
01201 if (cookie_value.empty()) {
01202
01203 value_quote_character = *ptr;
01204 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01205
01206 return false;
01207 } else {
01208
01209 cookie_value.push_back(*ptr);
01210 }
01211 } else if (*ptr != ' ' || !cookie_value.empty()) {
01212
01213 if (is_control(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
01214 return false;
01215
01216 cookie_value.push_back(*ptr);
01217 }
01218 } else {
01219
01220 if (*ptr == value_quote_character) {
01221
01222 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01223 dict.insert( std::make_pair(cookie_name, cookie_value) );
01224 cookie_name.erase();
01225 cookie_value.erase();
01226 parse_state = COOKIE_PARSE_IGNORE;
01227 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01228
01229 return false;
01230 } else {
01231
01232 cookie_value.push_back(*ptr);
01233 }
01234 }
01235 break;
01236
01237 case COOKIE_PARSE_IGNORE:
01238
01239 if (*ptr == ';' || *ptr == ',')
01240 parse_state = COOKIE_PARSE_NAME;
01241 break;
01242 }
01243
01244 ++ptr;
01245 }
01246
01247
01248 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01249 dict.insert( std::make_pair(cookie_name, cookie_value) );
01250
01251 return true;
01252 }
01253
01254 boost::tribool parser::parse_chunks(http::message::chunk_cache_t& chunks,
01255 boost::system::error_code& ec)
01256 {
01257
01258
01259
01260
01261
01262
01263
01264 const char *read_start_ptr = m_read_ptr;
01265 m_bytes_last_read = 0;
01266 while (m_read_ptr < m_read_end_ptr) {
01267
01268 switch (m_chunked_content_parse_state) {
01269 case PARSE_CHUNK_SIZE_START:
01270
01271 if (is_hex_digit(*m_read_ptr)) {
01272 m_chunk_size_str.erase();
01273 m_chunk_size_str.push_back(*m_read_ptr);
01274 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
01275 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
01276
01277
01278 break;
01279 } else {
01280 set_error(ec, ERROR_CHUNK_CHAR);
01281 return false;
01282 }
01283 break;
01284
01285 case PARSE_CHUNK_SIZE:
01286 if (is_hex_digit(*m_read_ptr)) {
01287 m_chunk_size_str.push_back(*m_read_ptr);
01288 } else if (*m_read_ptr == '\x0D') {
01289 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01290 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01291
01292
01293 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
01294 } else if (*m_read_ptr == ';') {
01295
01296
01297 m_chunked_content_parse_state = PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE;
01298 } else {
01299 set_error(ec, ERROR_CHUNK_CHAR);
01300 return false;
01301 }
01302 break;
01303
01304 case PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE:
01305 if (*m_read_ptr == '\x0D') {
01306 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01307 }
01308 break;
01309
01310 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
01311 if (*m_read_ptr == '\x0D') {
01312 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01313 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01314
01315
01316 break;
01317 } else {
01318 set_error(ec, ERROR_CHUNK_CHAR);
01319 return false;
01320 }
01321 break;
01322
01323 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
01324
01325
01326 if (*m_read_ptr == '\x0A') {
01327 m_bytes_read_in_current_chunk = 0;
01328 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
01329 if (m_size_of_current_chunk == 0) {
01330 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK;
01331 } else {
01332 m_chunked_content_parse_state = PARSE_CHUNK;
01333 }
01334 } else {
01335 set_error(ec, ERROR_CHUNK_CHAR);
01336 return false;
01337 }
01338 break;
01339
01340 case PARSE_CHUNK:
01341 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
01342 if (m_payload_handler) {
01343 const std::size_t bytes_avail = bytes_available();
01344 const std::size_t bytes_in_chunk = m_size_of_current_chunk - m_bytes_read_in_current_chunk;
01345 const std::size_t len = (bytes_in_chunk > bytes_avail) ? bytes_avail : bytes_in_chunk;
01346 m_payload_handler(m_read_ptr, len);
01347 m_bytes_read_in_current_chunk += len;
01348 if (len > 1) m_read_ptr += (len - 1);
01349 } else if (chunks.size() < m_max_content_length) {
01350 chunks.push_back(*m_read_ptr);
01351 m_bytes_read_in_current_chunk++;
01352 }
01353 }
01354 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
01355 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
01356 }
01357 break;
01358
01359 case PARSE_EXPECTING_CR_AFTER_CHUNK:
01360
01361 if (*m_read_ptr == '\x0D') {
01362 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
01363 } else {
01364 set_error(ec, ERROR_CHUNK_CHAR);
01365 return false;
01366 }
01367 break;
01368
01369 case PARSE_EXPECTING_LF_AFTER_CHUNK:
01370
01371 if (*m_read_ptr == '\x0A') {
01372 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
01373 } else {
01374 set_error(ec, ERROR_CHUNK_CHAR);
01375 return false;
01376 }
01377 break;
01378
01379 case PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK:
01380
01381 if (*m_read_ptr == '\x0D') {
01382 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
01383 } else {
01384
01385
01386 m_message_parse_state = PARSE_FOOTERS;
01387 m_headers_parse_state = PARSE_HEADER_START;
01388 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01389 m_bytes_total_read += m_bytes_last_read;
01390 m_bytes_content_read += m_bytes_last_read;
01391 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01392 return true;
01393 }
01394 break;
01395
01396 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
01397
01398 if (*m_read_ptr == '\x0A') {
01399 ++m_read_ptr;
01400 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01401 m_bytes_total_read += m_bytes_last_read;
01402 m_bytes_content_read += m_bytes_last_read;
01403 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01404 return true;
01405 } else {
01406 set_error(ec, ERROR_CHUNK_CHAR);
01407 return false;
01408 }
01409 }
01410
01411 ++m_read_ptr;
01412 }
01413
01414 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01415 m_bytes_total_read += m_bytes_last_read;
01416 m_bytes_content_read += m_bytes_last_read;
01417 return boost::indeterminate;
01418 }
01419
01420 boost::tribool parser::consume_content(http::message& http_msg,
01421 boost::system::error_code& ec)
01422 {
01423 size_t content_bytes_to_read;
01424 size_t content_bytes_available = bytes_available();
01425 boost::tribool rc = boost::indeterminate;
01426
01427 if (m_bytes_content_remaining == 0) {
01428
01429 return true;
01430 } else {
01431 if (content_bytes_available >= m_bytes_content_remaining) {
01432
01433 rc = true;
01434 content_bytes_to_read = m_bytes_content_remaining;
01435 } else {
01436
01437 content_bytes_to_read = content_bytes_available;
01438 }
01439 m_bytes_content_remaining -= content_bytes_to_read;
01440 }
01441
01442
01443 if (m_payload_handler) {
01444 m_payload_handler(m_read_ptr, content_bytes_to_read);
01445 } else if (m_bytes_content_read < m_max_content_length) {
01446 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01447
01448
01449 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr,
01450 m_max_content_length - m_bytes_content_read);
01451 } else {
01452
01453 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01454 }
01455 }
01456
01457 m_read_ptr += content_bytes_to_read;
01458 m_bytes_content_read += content_bytes_to_read;
01459 m_bytes_total_read += content_bytes_to_read;
01460 m_bytes_last_read = content_bytes_to_read;
01461
01462 return rc;
01463 }
01464
01465 std::size_t parser::consume_content_as_next_chunk(http::message::chunk_cache_t& chunks)
01466 {
01467 if (bytes_available() == 0) {
01468 m_bytes_last_read = 0;
01469 } else {
01470
01471 m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01472 if (m_payload_handler) {
01473 m_payload_handler(m_read_ptr, m_bytes_last_read);
01474 m_read_ptr += m_bytes_last_read;
01475 } else {
01476 while (m_read_ptr < m_read_end_ptr) {
01477 if (chunks.size() < m_max_content_length)
01478 chunks.push_back(*m_read_ptr);
01479 ++m_read_ptr;
01480 }
01481 }
01482 m_bytes_total_read += m_bytes_last_read;
01483 m_bytes_content_read += m_bytes_last_read;
01484 }
01485 return m_bytes_last_read;
01486 }
01487
01488 void parser::finish(http::message& http_msg) const
01489 {
01490 switch (m_message_parse_state) {
01491 case PARSE_START:
01492 http_msg.set_is_valid(false);
01493 http_msg.set_content_length(0);
01494 http_msg.create_content_buffer();
01495 return;
01496 case PARSE_END:
01497 http_msg.set_is_valid(true);
01498 break;
01499 case PARSE_HEADERS:
01500 case PARSE_FOOTERS:
01501 http_msg.set_is_valid(false);
01502 update_message_with_header_data(http_msg);
01503 http_msg.set_content_length(0);
01504 http_msg.create_content_buffer();
01505 break;
01506 case PARSE_CONTENT:
01507 http_msg.set_is_valid(false);
01508 if (get_content_bytes_read() < m_max_content_length)
01509 http_msg.set_content_length(get_content_bytes_read());
01510 break;
01511 case PARSE_CHUNKS:
01512 http_msg.set_is_valid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
01513 if (!m_payload_handler)
01514 http_msg.concatenate_chunks();
01515 break;
01516 case PARSE_CONTENT_NO_LENGTH:
01517 http_msg.set_is_valid(true);
01518 if (!m_payload_handler)
01519 http_msg.concatenate_chunks();
01520 break;
01521 }
01522
01523 compute_msg_status(http_msg, http_msg.is_valid());
01524
01525 if (is_parsing_request() && !m_payload_handler && !m_parse_headers_only) {
01526
01527
01528
01529 http::request& http_request(dynamic_cast<http::request&>(http_msg));
01530 const std::string& content_type_header = http_request.get_header(http::types::HEADER_CONTENT_TYPE);
01531 if (content_type_header.compare(0, http::types::CONTENT_TYPE_URLENCODED.length(),
01532 http::types::CONTENT_TYPE_URLENCODED) == 0)
01533 {
01534 if (! parse_url_encoded(http_request.get_queries(),
01535 http_request.get_content(),
01536 http_request.get_content_length()))
01537 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST urlencoded)");
01538 } else if (content_type_header.compare(0, http::types::CONTENT_TYPE_MULTIPART_FORM_DATA.length(),
01539 http::types::CONTENT_TYPE_MULTIPART_FORM_DATA) == 0)
01540 {
01541 if (! parse_multipart_form_data(http_request.get_queries(),
01542 content_type_header,
01543 http_request.get_content(),
01544 http_request.get_content_length()))
01545 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST multipart)");
01546 }
01547 }
01548 }
01549
01550 void parser::compute_msg_status(http::message& http_msg, bool msg_parsed_ok )
01551 {
01552 http::message::data_status_t st = http::message::STATUS_NONE;
01553
01554 if(http_msg.has_missing_packets()) {
01555 st = http_msg.has_data_after_missing_packets() ?
01556 http::message::STATUS_PARTIAL : http::message::STATUS_TRUNCATED;
01557 } else {
01558 st = msg_parsed_ok ? http::message::STATUS_OK : http::message::STATUS_TRUNCATED;
01559 }
01560
01561 http_msg.set_status(st);
01562 }
01563
01564 void parser::create_error_category(void)
01565 {
01566 static error_category_t UNIQUE_ERROR_CATEGORY;
01567 m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
01568 }
01569
01570 bool parser::parse_forwarded_for(const std::string& header, std::string& public_ip)
01571 {
01572
01573 static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
01574
01580 static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
01581
01582
01583 if (header.empty())
01584 return false;
01585
01586
01587 boost::match_results<std::string::const_iterator> m;
01588 std::string::const_iterator start_it = header.begin();
01589
01590
01591 while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
01592
01593 std::string ip_str(m[0].first, m[0].second);
01594
01595 if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
01596
01597 public_ip = ip_str;
01598 return true;
01599 }
01600
01601 start_it = m[0].second;
01602 }
01603
01604
01605 return false;
01606 }
01607
01608 }
01609 }