1 /******************************************************************************
2 
3     HTTP request message parser
4 
5     Before parsing an HTTP request message, the names of all header fields whose
6     values will be required must be added by calling `addCustomHeaders`, except
7     the General-Header and Request-Header fields specified in RFC 2616 section
8     4.5 and 5.3, respectively.
9     After parse() has finished parsing the message hader, the values of these
10     message header fields of interest can be obtained by the ParamSet
11     (HttpRequest super class) methods. A null value indicates that the request
12     message does not contain a header line whose name matches the corresponding
13     key.
14     Specification of General-Header fields:
15 
16     See_Also: http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.5
17 
18     Specification of Request-Header fields:
19 
20     See_Also: http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.3
21 
22     Specification of Entity-Header fields:
23 
24     See_Also: http://www.w3.org/Protocols/rfc2616/rfc2616-sec7.html#sec7.1
25 
26     For the definition of the categories the standard request message header
27     fields are of
28 
29     See_Also: http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5
30 
31     Copyright:
32         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
33         All rights reserved.
34 
35     License:
36         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
37         Alternatively, this file may be distributed under the terms of the Tango
38         3-Clause BSD License (see LICENSE_BSD.txt for details).
39 
40  ******************************************************************************/
41 
42 module ocean.net.http.HttpRequest;
43 
44 
45 import ocean.meta.types.Qualifiers;
46 
47 import ocean.net.http.message.HttpHeader;
48 
49 import ocean.net.http.message.HttpHeaderParser,
50        ocean.net.http.consts.HttpMethod;
51 
52 import ocean.net.http.consts.HttpVersion: HttpVersionIds;
53 
54 import ocean.net.http.HttpException: HttpException, HeaderParameterException;
55 
56 import ocean.core.Enforce;
57 import ocean.core.Verify;
58 import ocean.net.Uri: Uri;
59 
60 import ocean.net.http.HttpConst: HttpResponseCode;
61 import ocean.net.http.time.HttpTimeParser;
62 
63 /******************************************************************************/
64 
65 class HttpRequest : HttpHeader
66 {
67     /**************************************************************************
68 
69         Maximum accepted request URI length
70 
71      **************************************************************************/
72 
73     public uint max_uri_length = 16 * 1024;
74 
75     /**************************************************************************
76 
77         Requested HTTP method
78 
79      **************************************************************************/
80 
81     public HttpMethod method;
82 
83     /**************************************************************************
84 
85         URI parser
86 
87      **************************************************************************/
88 
89     public Uri uri ( )
90     {
91         return this._uri;
92     }
93 
94     private Uri _uri;
95 
96     /**************************************************************************
97 
98         Message header parser instance to get header parse results and set
99         limitations.
100 
101      **************************************************************************/
102 
103     public IHttpHeaderParser header;
104 
105     /**************************************************************************
106 
107         Request message body
108 
109      **************************************************************************/
110 
111     private mstring msg_body_;
112 
113     /**************************************************************************
114 
115         Request message body position counter
116 
117      **************************************************************************/
118 
119     private size_t msg_body_pos;
120 
121     /**************************************************************************
122 
123         Message header parser
124 
125      **************************************************************************/
126 
127     private HttpHeaderParser parser;
128 
129     /**************************************************************************
130 
131         Tells whether the end of the message header has been reached and we are
132         receiving the message body, if any
133 
134      **************************************************************************/
135 
136     private bool header_complete;
137 
138     /**************************************************************************
139 
140         Reusable exception instances
141 
142      **************************************************************************/
143 
144     package HttpException               http_exception;
145     private HeaderParameterException    header_param_exception;
146 
147     /**************************************************************************
148 
149         Constructor
150 
151         If the server supports HTTP methods that expect a request message body
152         (such as POST or PUT), set add_entity_headers to true to add the
153         standard Entity header fields. (The standard General-Header and
154         Request-Header fields are added automatically.)
155 
156         Note that a non-zero value for msg_body_prealloc_length is only
157         sensible when requests with a message body (POST, PUT etc.) are
158         supported by this server.
159 
160         Params:
161             add_entity_headers       = set to true to add the standard Entity
162                                        header fields as well
163             msg_body_prealloc_length = expected message body length for
164                                        preallocation;
165             uri_prealloc_length      = the initial amount of memory the
166                                        contained Uri object will be asked to
167                                        pre-allocate to hold the URI-decoded
168                                        URI of a request.
169 
170      **************************************************************************/
171 
172     public this ( bool add_entity_headers = false, size_t msg_body_prealloc_length = 0,
173             uint uri_prealloc_length = 512)
174     {
175         super(HeaderFieldNames.Request.NameList,
176               add_entity_headers? HeaderFieldNames.Entity.NameList : null);
177 
178         this.header = this.parser = new HttpHeaderParser;
179 
180         this._uri = new Uri(uri_prealloc_length);
181 
182         this.msg_body_ = new char[msg_body_prealloc_length];
183 
184         this.http_exception         = new HttpException;
185         this.header_param_exception = new HeaderParameterException;
186 
187         this.reset();
188     }
189 
190     /**************************************************************************
191 
192         ditto
193 
194      **************************************************************************/
195 
196     public this ( size_t msg_body_prealloc_length )
197     {
198         this(false, msg_body_prealloc_length);
199     }
200 
201     /**************************************************************************
202 
203         Returns:
204             slice to the method name in the message header start line if the
205             start line has already been parsed or null otherwise
206 
207      **************************************************************************/
208 
209     public cstring method_name ( )
210     {
211         return this.parser.start_line_tokens[0];
212     }
213 
214     /**************************************************************************
215 
216         Returns:
217             URI instance which is set to the requested URI if the start line has
218             already been parsed
219 
220      **************************************************************************/
221 
222     public cstring uri_string ( )
223     {
224         return this.parser.start_line_tokens[1];
225     }
226 
227     /**************************************************************************
228 
229         Obtains the request message body (which may be empty). It may be
230         incomplete if parse() did not yet reach the end of the request message
231         or null if parse() did not yet reach the end of the request message
232         header.
233 
234         Returns:
235             request message body parsed so far or null if parse() did not yet
236             reach the end of the request message header
237 
238      **************************************************************************/
239 
240     public cstring msg_body ( )
241     {
242         return this.msg_body_;
243     }
244 
245     /**************************************************************************
246 
247         Obtains the integer value of the request header field corresponding to
248         header_field_name. The header field value is expected to represent an
249         unsigned integer number in decimal representation.
250 
251         Params:
252             header_field_name = request header field name (case-insensitive;
253                                 must be one of the message header field values
254                                 of interest passed on instantiation)
255 
256         Returns:
257             integer value of the request header field
258 
259         Throws:
260             HeaderParameterException if
261                 - the field is missing in the header or
262                 - the field does not contain an unsigned integer value in
263                   decimal representation.
264 
265      **************************************************************************/
266 
267     public uint getUint ( T = uint ) ( cstring header_field_name )
268     {
269         uint n;
270 
271         bool is_set,
272              ok = super.getUnsigned(header_field_name, n, is_set);
273 
274         enforce(this.header_param_exception.set("Missing header parameter : ")
275                 .append(header_field_name),
276                 is_set);
277         enforce(this.header_param_exception.set("Expected decimal unsigned integer for header : ")
278                 .append(header_field_name),
279                 ok);
280 
281         return n;
282     }
283 
284     /**************************************************************************
285 
286         Parses content which is expected to be either the start of a HTTP
287         message or a HTTP message fragment that continues the content passed on
288         the last call to this method.
289         If this method is called again after having finished, it will reset the
290         status first and start parsing a new request message.
291 
292         Params:
293             content         = content to parse
294             msg_body_length = callback returning the message body length; will
295                               be called at most once after the message header
296                               has been parsed.
297 
298         Returns:
299             number of elements consumed from content.
300 
301         Throws:
302             HttpParseException
303                 - on parse error: if
304                     * the number of start line tokens is different from 3 or
305                     * a regular header_line does not contain a ':';
306                 - on limit excess: if
307                     * the header size in bytes exceeds the requested limit or
308                     * the number of header lines in exceeds the requested limit.
309 
310             HttpException if
311                 - the HTTP method is unknown or
312                 - the HTTP version identifier is unknown or
313                 - the URI is missing or
314                 - the URI length exceeds the requested max_uri_length.
315 
316             Note that msg_body_length() may throw a HttpException, especially if
317                 - the announced message body length exceeds an allowed limit or
318                 - the announced message body length cannot be determined because
319                   header parameters are missing.
320 
321      **************************************************************************/
322 
323     public size_t parse ( cstring content, lazy size_t msg_body_length )
324     {
325         size_t consumed;
326 
327         if (this.finished)
328         {
329             this.reset();
330         }
331 
332         if (this.header_complete)
333         {
334             consumed = this.appendMsgBody(content);
335         }
336         else
337         {
338             cstring msg_body_start = this.parser.parse(content);
339 
340             consumed = content.length - msg_body_start.length;
341 
342             if (msg_body_start !is null)
343             {
344                 this.header_complete = true;
345 
346                 this.setRequestLine();
347 
348                 foreach (element; this.parser.header_elements)
349                 {
350                     this.set(element.key, element.val);
351                 }
352 
353                 this.msg_body_.length = msg_body_length();
354                 assumeSafeAppend(this.msg_body_);
355 
356                 consumed += this.appendMsgBody(msg_body_start);
357             }
358         }
359 
360         verify(consumed == content.length || this.finished);
361 
362         return consumed;
363     }
364 
365     /**************************************************************************
366 
367         Returns:
368             true if parse() has finished parsing the message or false otherwise
369 
370      **************************************************************************/
371 
372     public bool finished ( )
373     {
374         return this.header_complete && this.msg_body_pos >= this.msg_body_.length;
375     }
376 
377     /**************************************************************************
378 
379         Appends chunk to the message body as long as the message body length
380         does not exceed the length reported to parse() by the msg_body_length
381         parameter.
382 
383         Params:
384             chunk = chunk to append to the message body
385 
386         Returns:
387             number of elements appended
388 
389      **************************************************************************/
390 
391     private size_t appendMsgBody ( cstring chunk )
392     {
393         size_t len = min(chunk.length, this.msg_body_.length - this.msg_body_pos),
394                end = this.msg_body_pos + len;
395 
396         this.msg_body_[this.msg_body_pos .. end] = chunk[0 .. len];
397 
398         this.msg_body_pos = end;
399 
400         return len;
401     }
402 
403     /**************************************************************************
404 
405         Obtains the request line parameters.
406 
407         Throws:
408             HttpException if
409                 - the HTTP method is unknown or
410                 - the HTTP version identifier is unknown or
411                 - the URI is missing or
412                 - the URI length exceeds the requested max_uri_length.
413 
414      **************************************************************************/
415 
416     private void setRequestLine ( )
417     {
418         this.method = HttpMethodNames[this.method_name];
419 
420         enforce(this.http_exception.set(HttpResponseCode.BadRequest)
421                 .append(" : invalid HTTP method"),
422                 this.method);
423 
424         this.http_version_ = HttpVersionIds[this.parser.start_line_tokens[2]];
425 
426         if (!this.http_version_)
427         {
428             this.http_version_ = this.http_version_.v1_0;
429 
430             if (HttpVersionIds.validSyntax(this.parser.start_line_tokens[2]))
431             {
432                 throw this.http_exception.set(HttpResponseCode.VersionNotSupported);
433             }
434             else
435             {
436                 throw this.http_exception.set(HttpResponseCode.BadRequest)
437                     .append(" : invalid HTTP version");
438             }
439         }
440 
441         enforce(this.http_exception.set(HttpResponseCode.BadRequest)
442                 .append(" : no uri in request"),
443                 this.parser.start_line_tokens[1].length);
444         enforce(this.http_exception.set(HttpResponseCode.RequestURITooLarge),
445                 this.parser.start_line_tokens[1].length <= this.max_uri_length);
446 
447         this._uri.parse(this.parser.start_line_tokens[1]);
448     }
449 
450     /**************************************************************************
451 
452         Resets the state
453 
454      **************************************************************************/
455 
456     public override void reset ( )
457     {
458         this.method          = this.method.init;
459         this.http_version_   = this.http_version_.init;
460         this.msg_body_pos    = 0;
461         this.header_complete = false;
462         this._uri.reset();
463         this.parser.reset();
464 
465         super.reset();
466     }
467 
468     /**************************************************************************
469 
470         Returns the minimum of a and b.
471 
472         Returns:
473             minimum of a and b
474 
475      **************************************************************************/
476 
477     static size_t min ( size_t a, size_t b )
478     {
479         return ((a < b)? a : b);
480     }
481 }
482 
483 //version = OceanPerformanceTest;
484 
485 version (OceanPerformanceTest)
486 {
487     import ocean.io.Stdout;
488     import ocean.core.internal.gcInterface: gc_disable, gc_enable;
489 }
490 
491 version (unittest)
492 {
493     import ocean.core.Test;
494     import core.stdc.time: time;
495     import core.sys.posix.stdlib: srand48, drand48;
496 }
497 
498 unittest
499 {
500     static immutable istring lorem_ipsum =
501         "Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod "
502       ~ "tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim "
503       ~ "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex "
504       ~ "ea commodi consequat. Quis aute iure reprehenderit in voluptate velit "
505       ~ "esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat "
506       ~ "cupiditat non proident, sunt in culpa qui officia deserunt mollit "
507       ~ "anim id est laborum. Duis autem vel eum iriure dolor in hendrerit in "
508       ~ "vulputate velit esse molestie consequat, vel illum dolore eu feugiat "
509       ~ "nulla facilisis at vero eros et accumsan et iusto odio dignissim qui "
510       ~ "blandit praesent luptatum zzril delenit augue duis dolore te feugait "
511       ~ "nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing "
512       ~ "elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna "
513       ~ "aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud "
514       ~ "exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea "
515       ~ "commodo consequat. Duis autem vel eum iriure dolor in hendrerit in "
516       ~ "vulputate velit esse molestie consequat, vel illum dolore eu feugiat "
517       ~ "nulla facilisis at vero eros et accumsan et iusto odio dignissim qui "
518       ~ "blandit praesent luptatum zzril delenit augue duis dolore te feugait "
519       ~ "nulla facilisi. Nam liber tempor cum soluta nobis eleifend option "
520       ~ "congue nihil imperdiet doming id quod mazim placerat facer possim "
521       ~ "assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed "
522       ~ "diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam "
523       ~ "erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci "
524       ~ "tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo "
525       ~ "consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate "
526       ~ "velit esse molestie consequat, vel illum dolore eu feugiat nulla "
527       ~ "facilisis. At vero eos et accusam et justo duo dolores et ea rebum. "
528       ~ "Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum "
529       ~ "dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing "
530       ~ "elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore "
531       ~ "magna aliquyam erat, sed diam voluptua. At vero eos et accusam et "
532       ~ "justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea "
533       ~ "takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor "
534       ~ "sit amet, consetetur sadipscing elitr, At accusam aliquyam diam diam "
535       ~ "dolore dolores duo eirmod eos erat, et nonumy sed tempor et et "
536       ~ "invidunt justo labore Stet clita ea et gubergren, kasd magna no "
537       ~ "rebum. sanctus sea sed takimata ut vero voluptua. est Lorem ipsum "
538       ~ "dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing "
539       ~ "elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore "
540       ~ "magna aliquyam erat. Consetetur sadipscing elitr, sed diam nonumy "
541       ~ "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed "
542       ~ "diam voluptua. At vero eos et accusam et justo duo dolores et ea "
543       ~ "rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem "
544       ~ "ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur "
545       ~ "sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et "
546       ~ "dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam "
547       ~ "et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea "
548       ~ "takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor "
549       ~ "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor "
550       ~ "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. "
551       ~ "At vero eos et accusam et justo duo dolores et ea rebum. Stet clita "
552       ~ "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit "
553       ~ "amet.";
554 
555     static immutable istring content =
556         "GET /dir?query=Hello%20World!&abc=def&ghi HTTP/1.1\r\n"
557       ~ "Host: www.example.org:12345\r\n"
558       ~ "User-Agent: Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.2.17) Gecko/20110422 Ubuntu/9.10 (karmic) Firefox/3.6.17\r\n"
559       ~ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n"
560       ~ "Accept-Language: de-de,de;q=0.8,en-us;q=0.5,en;q=0.3\r\n"
561       ~ "Accept-Encoding: gzip,deflate\r\n"
562       ~ "Accept-Charset: UTF-8,*\r\n"
563       ~ "Keep-Alive: 115\r\n"
564       ~ "Connection: keep-alive\r\n"
565       ~ "Cache-Control: max-age=0\r\n"
566       ~ "\r\n" ~
567         lorem_ipsum;
568 
569     static immutable parts = 3;
570 
571     /*
572      * content will be split into parts parts where the length of each part is
573      * content.length / parts + d with d a random number in the range
574      * [-(content.length / parts) / 3, +(content.length / parts) / 3].
575      */
576 
577     static size_t random_chunk_length ( )
578     {
579         static immutable c = content.length * (2.0f / (parts * 3));
580 
581         static assert (c >= 3, "too many parts");
582 
583         return cast (size_t) (c + cast (float) drand48() * c);
584     }
585 
586     scope request = new HttpRequest;
587 
588     request.addCustomHeaders("Keep-Alive");
589 
590     srand48(time(null));
591 
592     version (OceanPerformanceTest)
593     {
594         static immutable n = 1000_000;
595     }
596     else
597     {
598         static immutable n = 10;
599     }
600 
601     version (OceanPerformanceTest)
602     {
603         gc_disable();
604 
605         scope (exit) gc_enable();
606     }
607 
608     for (uint i = 0; i < n; i++)
609     {
610         {
611             size_t len = request.min(random_chunk_length(), content.length),
612                    ret = request.parse(content[0 .. len], lorem_ipsum.length);
613 
614             for (size_t pos = len; !request.finished; pos += len)
615             {
616                 len = request.min(random_chunk_length() + pos, content.length - pos);
617                 ret = request.parse(content[pos .. pos + len], lorem_ipsum.length);
618             }
619         }
620 
621         test!("==")(request.method_name           ,"GET"[]);
622         test!("==")(request.method                ,request.method.Get);
623         test!("==")(request.uri_string            ,"/dir?query=Hello%20World!&abc=def&ghi"[]);
624         test!("==")(request.http_version          ,request.http_version.v1_1);
625         test!("==")(request["user-agent"]         ,"Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.2.17) Gecko/20110422 Ubuntu/9.10 (karmic) Firefox/3.6.17"[]);
626         test!("==")(request["Accept"]             ,"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"[]);
627         test!("==")(request["Accept-Language"]    ,"de-de,de;q=0.8,en-us;q=0.5,en;q=0.3"[]);
628         test!("==")(request["Accept-Encoding"]    ,"gzip,deflate"[]);
629         test!("==")(request["Accept-Charset"]     ,"UTF-8,*"[]);
630         test!("==")(request.getUint("keep-alive"), 115);
631 
632         test!("==")(request["connection"]         ,"keep-alive"[]);
633 
634         test(request.msg_body == lorem_ipsum, ">" ~ request.msg_body ~ "<");
635 
636         version (OceanPerformanceTest)
637         {
638             uint j = i + 1;
639 
640             if (!(j % 10_000))
641             {
642                 Stderr(HttpRequest.stringof)(' ')(j)("\n").flush();
643             }
644         }
645     }
646 }