From df63f42c34c2a20a7c2de12ec3203deee66aaff2 Mon Sep 17 00:00:00 2001 From: Malini Kothapalli Date: Thu, 23 Jan 2020 17:42:00 -0800 Subject: [PATCH] Strict CRLF parsing as per the http spec. --- picohttpparser.c | 83 +++++++++++++++++++++++++++++++++++++----------- picohttpparser.h | 2 ++ test.c | 19 +++++++---- 3 files changed, 78 insertions(+), 26 deletions(-) diff --git a/picohttpparser.c b/picohttpparser.c index 74ccc3e..22884a5 100644 --- a/picohttpparser.c +++ b/picohttpparser.c @@ -520,6 +520,25 @@ static int decode_hex(int ch) } } +/* + * chunked-body = *chunk + * last-chunk + * trailer-part + * CRLF + * chunk = chunk-size [ chunk-ext ] CRLF + * chunk-data CRLF + * chunk-size = 1*HEXDIG + * last-chunk = 1*("0") [ chunk-ext ] CRLF + * chunk-data = 1*OCTET ; a sequence of chunk-size octets + * chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) + * chunk-ext-name = token + * chunk-ext-val = token / quoted-string + * trailer-part = *( header-field CRLF ) + *** + * returns -2 - incomplete + * -1 - error + * >= 0 bytes unprocessed in the buffer +**/ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) { size_t dst = 0, src = 0, bufsz = *_bufsz; @@ -547,6 +566,7 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_ ++decoder->_hex_count; } decoder->_hex_count = 0; + decoder->cr_seen = 0; decoder->_state = CHUNKED_IN_CHUNK_EXT; /* fallthru */ case CHUNKED_IN_CHUNK_EXT: @@ -554,16 +574,24 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_ for (;; ++src) { if (src == bufsz) goto Exit; - if (buf[src] == '\012') - break; + if (buf[src] == '\015') { + decoder->cr_seen = 1; + } else if (buf[src] == '\012' && decoder->cr_seen) { + break; //CRLF seen, process next line + } else if ((buf[src] != '\012' && decoder->cr_seen) || (buf[src] == '\012' && !decoder->cr_seen)) { + // LF must follow CR + ret = -1; + goto Exit; + } else if (decoder->cr_seen) { + decoder->cr_seen = 0; + } } ++src; if (decoder->bytes_left_in_chunk == 0) { + decoder->last_chunk = 1; if (decoder->consume_trailer) { decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; break; - } else { - goto Complete; } } decoder->_state = CHUNKED_IN_CHUNK_DATA; @@ -584,41 +612,58 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_ dst += decoder->bytes_left_in_chunk; decoder->bytes_left_in_chunk = 0; decoder->_state = CHUNKED_IN_CHUNK_CRLF; + decoder->cr_seen = 0; } /* fallthru */ case CHUNKED_IN_CHUNK_CRLF: for (;; ++src) { if (src == bufsz) goto Exit; - if (buf[src] != '\015') + if ((!decoder->cr_seen && buf[src] != '\015') || + (decoder->cr_seen && buf[src] != '\012')) { + ret = -1; + goto Exit; + } + if (decoder->cr_seen) { /* CRLF seen in the buffer */ break; - } - if (buf[src] != '\012') { - ret = -1; - goto Exit; + } + decoder->cr_seen = 1; } ++src; + if (decoder->last_chunk) { + goto Complete; + } decoder->_state = CHUNKED_IN_CHUNK_SIZE; break; case CHUNKED_IN_TRAILERS_LINE_HEAD: - for (;; ++src) { - if (src == bufsz) - goto Exit; - if (buf[src] != '\015') - break; + if (src == bufsz) + goto Exit; + decoder->cr_seen = 0; + if (buf[src] == '\015') { + decoder->_state = CHUNKED_IN_CHUNK_CRLF; + break; } - if (buf[src++] == '\012') - goto Complete; + ++src; decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; - /* fallthru */ + /* fallthru */ case CHUNKED_IN_TRAILERS_LINE_MIDDLE: for (;; ++src) { if (src == bufsz) goto Exit; - if (buf[src] == '\012') + if (buf[src] == '\015') { + decoder->cr_seen = 1; + } else if (buf[src] == '\012' && decoder->cr_seen) { + // CRLF seen, process next line break; + } else if ((buf[src] != '\012' && decoder->cr_seen) || (buf[src] == '\012' && !decoder->cr_seen)) { + // LF must follow CR + ret = -1; + goto Exit; + } else if (decoder->cr_seen) { + decoder->cr_seen = 0; + } } - ++src; + src++; decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; break; default: diff --git a/picohttpparser.h b/picohttpparser.h index 0849f84..f50c964 100644 --- a/picohttpparser.h +++ b/picohttpparser.h @@ -62,6 +62,8 @@ int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, s struct phr_chunked_decoder { size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ char consume_trailer; /* if trailing headers should be consumed */ + char last_chunk; /* parsing the last chunk */ + char cr_seen; /* if in CHUNK_CRLF state, have we already processed CR */ char _hex_count; char _state; }; diff --git a/test.c b/test.c index abcd0a9..3db19e8 100644 --- a/test.c +++ b/test.c @@ -397,12 +397,11 @@ static void test_chunked(void) size_t i; for (i = 0; chunked_test_runners[i] != NULL; ++i) { - chunked_test_runners[i](__LINE__, 0, "b\r\nhello world\r\n0\r\n", "hello world", 0); - chunked_test_runners[i](__LINE__, 0, "6\r\nhello \r\n5\r\nworld\r\n0\r\n", "hello world", 0); - chunked_test_runners[i](__LINE__, 0, "6;comment=hi\r\nhello \r\n5\r\nworld\r\n0\r\n", "hello world", 0); - chunked_test_runners[i](__LINE__, 0, "6\r\nhello \r\n5\r\nworld\r\n0\r\na: b\r\nc: d\r\n\r\n", "hello world", - sizeof("a: b\r\nc: d\r\n\r\n") - 1); - chunked_test_runners[i](__LINE__, 0, "b\r\nhello world\r\n0\r\n", "hello world", 0); + chunked_test_runners[i](__LINE__, 0, "b\r\nhello world\r\n0\r\n\r\n", "hello world", 0); + chunked_test_runners[i](__LINE__, 0, "6\r\nhello \r\n5\r\nworld\r\n0\r\n\r\n", "hello world", 0); + chunked_test_runners[i](__LINE__, 0, "6\r\nhello \r\n5\r\nworld\r\n0\r\n\r\n", "hello world", 0); + chunked_test_runners[i](__LINE__, 0, "6;comment=hi\r\nhello \r\n5\r\nworld\r\n0\r\n\r\n", "hello world", 0); + chunked_test_runners[i](__LINE__, 1, "b\r\nhello world\r\n0\r\nTrailer: Content-Type\r\n\r\n", "hello world", 0); } note("failures"); @@ -410,6 +409,10 @@ static void test_chunked(void) if (sizeof(size_t) == 8) { test_chunked_failure(__LINE__, "6\r\nhello \r\nffffffffffffffff\r\nabcdefg", -2); test_chunked_failure(__LINE__, "6\r\nhello \r\nfffffffffffffffff\r\nabcdefg", -1); + test_chunked_failure(__LINE__, "6\r\nhello \r\nfffffffffffffffff\r\nabcdefg", -1); + test_chunked_failure(__LINE__, "6\r\nhello \n0\r\n\r\n", -1); + test_chunked_failure(__LINE__, "6\rhello \n0\r\n\r\n", -1); + test_chunked_failure(__LINE__, "6\nhello \n0\r\n\r\n", -1); } } @@ -422,8 +425,10 @@ static void test_chunked_consume_trailer(void) chunked_test_runners[i](__LINE__, 1, "6\r\nhello \r\n5\r\nworld\r\n0\r\n", "hello world", -2); chunked_test_runners[i](__LINE__, 1, "6;comment=hi\r\nhello \r\n5\r\nworld\r\n0\r\n", "hello world", -2); chunked_test_runners[i](__LINE__, 1, "b\r\nhello world\r\n0\r\n\r\n", "hello world", 0); - chunked_test_runners[i](__LINE__, 1, "b\nhello world\n0\n\n", "hello world", 0); + chunked_test_runners[i](__LINE__, 1, "b\r\nhello world\r\n0\n", "hello world", -1); chunked_test_runners[i](__LINE__, 1, "6\r\nhello \r\n5\r\nworld\r\n0\r\na: b\r\nc: d\r\n\r\n", "hello world", 0); + chunked_test_runners[i](__LINE__, 1, "6\r\nhello \r\n5\r\nworld\r\n0\r\na: b\n", "hello world", -1); + chunked_test_runners[i](__LINE__, 1, "6\r\nhello \r\n5\r\nworld\r\n0\r\na: b\r", "hello world", -2); } }