/[james]/httplint/httplint.c
ViewVC logotype

Contents of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 59 - (show annotations) (download) (as text)
Mon Apr 5 11:16:27 2004 UTC (20 years ago) by james
File MIME type: text/x-csrc
File size: 35151 byte(s)
Fix summer time bugs.

1 /*
2 * HTTP Header Lint
3 * Licensed under the MIT License
4 * http://www.opensource.org/licenses/mit-license
5 * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
6 */
7
8 /*
9 * Compile using
10 * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11 *
12 * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13 */
14
15 #define _GNU_SOURCE
16 #define __USE_XOPEN
17
18 #include <limits.h>
19 #include <math.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <sys/types.h>
26 #include <regex.h>
27 #include <curl/curl.h>
28
29
30 #define NUMBER "0123456789"
31 #define UNUSED(x) x = x
32
33
34 bool start;
35 bool html = false;
36 CURL *curl;
37 int status_code;
38 char error_buffer[CURL_ERROR_SIZE];
39 regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
40 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
41 re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires;
42
43
44 void init(void);
45 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
46 void check_url(const char *url);
47 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
48 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
49 void check_status_line(const char *s);
50 void check_header(const char *name, const char *value);
51 bool parse_date(const char *s, struct tm *tm);
52 int month(const char *s);
53 time_t mktime_from_utc(struct tm *t);
54 const char *skip_lws(const char *s);
55 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
56 void (*callback)(const char *s, regmatch_t pmatch[]));
57 void header_accept_ranges(const char *s);
58 void header_age(const char *s);
59 void header_allow(const char *s);
60 void header_cache_control(const char *s);
61 void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
62 void header_connection(const char *s);
63 void header_content_encoding(const char *s);
64 void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
65 void header_content_language(const char *s);
66 void header_content_length(const char *s);
67 void header_content_location(const char *s);
68 void header_content_md5(const char *s);
69 void header_content_range(const char *s);
70 void header_content_type(const char *s);
71 void header_date(const char *s);
72 void header_etag(const char *s);
73 void header_expires(const char *s);
74 void header_last_modified(const char *s);
75 void header_location(const char *s);
76 void header_pragma(const char *s);
77 void header_retry_after(const char *s);
78 void header_server(const char *s);
79 void header_trailer(const char *s);
80 void header_transfer_encoding(const char *s);
81 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
82 void header_upgrade(const char *s);
83 void header_vary(const char *s);
84 void header_via(const char *s);
85 void header_set_cookie(const char *s);
86 void die(const char *error);
87 void print(const char *s, size_t len);
88 void lookup(const char *key);
89
90
91 struct header_entry {
92 char name[40];
93 void (*handler)(const char *s);
94 int count;
95 char *missing;
96 } header_table[] = {
97 { "Accept-Ranges", header_accept_ranges, 0, 0 },
98 { "Age", header_age, 0, 0 },
99 { "Allow", header_allow, 0, 0 },
100 { "Cache-Control", header_cache_control, 0, 0 },
101 { "Connection", header_connection, 0, 0 },
102 { "Content-Encoding", header_content_encoding, 0, 0 },
103 { "Content-Language", header_content_language, 0, "missingcontlang" },
104 { "Content-Length", header_content_length, 0, 0 },
105 { "Content-Location", header_content_location, 0, 0 },
106 { "Content-MD5", header_content_md5, 0, 0 },
107 { "Content-Range", header_content_range, 0, 0 },
108 { "Content-Type", header_content_type, 0, "missingcontenttype" },
109 { "Date", header_date, 0, "missingdate" },
110 { "ETag", header_etag, 0, 0 },
111 { "Expires", header_expires, 0, 0 },
112 { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113 { "Location", header_location, 0, 0 },
114 { "Pragma", header_pragma, 0, 0 },
115 { "Retry-After", header_retry_after, 0, 0 },
116 { "Server", header_server, 0, 0 },
117 { "Set-Cookie", header_set_cookie, 0, 0 },
118 { "Trailer", header_trailer, 0, 0 },
119 { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
120 { "Upgrade", header_upgrade, 0, 0 },
121 { "Vary", header_vary, 0, 0 },
122 { "Via", header_via, 0, 0 }
123 };
124
125
126 /**
127 * Main entry point.
128 */
129 int main(int argc, char *argv[])
130 {
131 int i = 1;
132
133 if (argc < 2)
134 die("Usage: httplint [--html] url [url ...]");
135
136 init();
137
138 if (1 < argc && strcmp(argv[1], "--html") == 0) {
139 html = true;
140 i++;
141 }
142
143 for (; i != argc; i++)
144 check_url(argv[i]);
145
146 curl_global_cleanup();
147
148 return 0;
149 }
150
151
152 /**
153 * Initialise the curl handle and compile regular expressions.
154 */
155 void init(void)
156 {
157 struct curl_slist *request_headers = 0;
158
159 if (curl_global_init(CURL_GLOBAL_ALL))
160 die("Failed to initialise libcurl");
161
162 curl = curl_easy_init();
163 if (!curl)
164 die("Failed to create curl handle");
165
166 if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
167 die("Failed to set curl options");
168 if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
169 die("Failed to set curl options");
170 if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
171 die("Failed to set curl options");
172 if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
173 die("Failed to set curl options");
174
175 /* remove libcurl default headers */
176 request_headers = curl_slist_append(request_headers, "Accept:");
177 request_headers = curl_slist_append(request_headers, "Pragma:");
178 if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
179 die("Failed to set curl options");
180
181 /* compile regular expressions */
182 regcomp_wrapper(&re_status_line,
183 "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
184 REG_EXTENDED);
185 regcomp_wrapper(&re_token,
186 "^([-0-9a-zA-Z_.!]+)",
187 REG_EXTENDED);
188 regcomp_wrapper(&re_token_value,
189 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
190 REG_EXTENDED);
191 regcomp_wrapper(&re_content_type,
192 "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
193 "(;[ \t]*([-0-9a-zA-Z_.]+)="
194 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
195 REG_EXTENDED);
196 regcomp_wrapper(&re_absolute_uri,
197 "^[a-zA-Z0-9]+://[^ ]+$",
198 REG_EXTENDED);
199 regcomp_wrapper(&re_etag,
200 "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
201 REG_EXTENDED);
202 regcomp_wrapper(&re_server,
203 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
204 REG_EXTENDED);
205 regcomp_wrapper(&re_transfer_coding,
206 "^([-0-9a-zA-Z_.]+)[ \t]*"
207 "(;[ \t]*([-0-9a-zA-Z_.]+)="
208 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
209 REG_EXTENDED);
210 regcomp_wrapper(&re_upgrade,
211 "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
212 REG_EXTENDED);
213 regcomp_wrapper(&re_ugly,
214 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
215 REG_EXTENDED);
216 regcomp_wrapper(&re_rfc1123,
217 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
218 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
219 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
220 REG_EXTENDED);
221 regcomp_wrapper(&re_rfc1036,
222 "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
223 "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
224 "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
225 REG_EXTENDED);
226 regcomp_wrapper(&re_asctime,
227 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
228 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
229 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
230 REG_EXTENDED);
231 regcomp_wrapper(&re_cookie_nameval,
232 "^[^;, ]+=[^;, ]*$",
233 REG_EXTENDED);
234 regcomp_wrapper(&re_cookie_expires,
235 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-"
236 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) "
237 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
238 REG_EXTENDED);
239 }
240
241
242 /**
243 * Compile a regular expression, handling errors.
244 */
245 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
246 {
247 char errbuf[200];
248 int r;
249 r = regcomp(preg, regex, cflags);
250 if (r) {
251 regerror(r, preg, errbuf, sizeof errbuf);
252 fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
253 die(errbuf);
254 }
255 }
256
257
258 /**
259 * Fetch and check the headers for the specified url.
260 */
261 void check_url(const char *url)
262 {
263 int i, r;
264 CURLcode code;
265
266 start = true;
267 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
268 header_table[i].count = 0;
269
270 if (!html)
271 printf("Checking URL %s\n", url);
272 if (strncmp(url, "http", 4)) {
273 if (html)
274 printf("<p class='warning'>");
275 printf("Warning: this is not an http or https url");
276 if (html)
277 printf("</p>");
278 printf("\n");
279 }
280
281 if (curl_easy_setopt(curl, CURLOPT_URL, url))
282 die("Failed to set curl options");
283
284 if (html)
285 printf("<ul>\n");
286 code = curl_easy_perform(curl);
287 if (html)
288 printf("</ul>\n");
289 if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
290 if (html)
291 printf("<p class='error'>");
292 printf("Error: ");
293 print(error_buffer, strlen(error_buffer));
294 printf(".");
295 if (html)
296 printf("</p>");
297 printf("\n");
298 return;
299 } else {
300 printf("\n");
301 if (html)
302 printf("<ul>");
303 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
304 if (header_table[i].count == 0 && header_table[i].missing)
305 lookup(header_table[i].missing);
306 }
307 }
308
309 r = regexec(&re_ugly, url, 0, 0, 0);
310 if (r)
311 lookup("ugly");
312
313 if (html)
314 printf("</ul>");
315 }
316
317
318 /**
319 * Callback for received header data.
320 */
321 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
322 {
323 const size_t size = msize * nmemb;
324 char s[400], *name, *value;
325
326 UNUSED(stream);
327
328 printf(html ? "<li><code>" : "* ");
329 print(ptr, size);
330 printf(html ? "</code><ul>" : "\n");
331
332 if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
333 lookup("notcrlf");
334 if (html)
335 printf("</ul></li>\n");
336 return size;
337 }
338 if (sizeof s <= size) {
339 lookup("headertoolong");
340 if (html)
341 printf("</ul></li>\n");
342 return size;
343 }
344 strncpy(s, ptr, size);
345 s[size - 2] = 0;
346
347 name = s;
348 value = strchr(s, ':');
349
350 if (s[0] == 0) {
351 /* empty header indicates end of headers */
352 lookup("endofheaders");
353 if (html)
354 printf("</ul></li>\n");
355 return 0;
356
357 } else if (start) {
358 /* Status-Line [6.1] */
359 check_status_line(s);
360 start = false;
361
362 } else if (!value) {
363 lookup("missingcolon");
364
365 } else {
366 *value = 0;
367 value++;
368
369 check_header(name, skip_lws(value));
370 }
371
372 if (html)
373 printf("</ul></li>\n");
374 return size;
375 }
376
377
378 /**
379 * Callback for received body data.
380 *
381 * We are not interested in the body, so abort the fetch by returning 0.
382 */
383 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
384 {
385 UNUSED(ptr);
386 UNUSED(size);
387 UNUSED(nmemb);
388 UNUSED(stream);
389
390 return 0;
391 }
392
393
394 /**
395 * Check the syntax and content of the response Status-Line [6.1].
396 */
397 void check_status_line(const char *s)
398 {
399 const char *reason;
400 unsigned int major = 0, minor = 0;
401 int r;
402 regmatch_t pmatch[5];
403
404 r = regexec(&re_status_line, s, 5, pmatch, 0);
405 if (r) {
406 lookup("badstatusline");
407 return;
408 }
409
410 major = atoi(s + pmatch[1].rm_so);
411 minor = atoi(s + pmatch[2].rm_so);
412 status_code = atoi(s + pmatch[3].rm_so);
413 reason = s + pmatch[4].rm_so;
414
415 if (major < 1 || (major == 1 && minor == 0)) {
416 lookup("oldhttp");
417 } else if ((major == 1 && 1 < minor) || 1 < major) {
418 lookup("futurehttp");
419 } else {
420 if (status_code < 100 || 600 <= status_code) {
421 lookup("badstatus");
422 } else {
423 char key[] = "xxx";
424 key[0] = '0' + status_code / 100;
425 lookup(key);
426 }
427 }
428 }
429
430
431 /**
432 * Check the syntax and content of a header.
433 */
434 void check_header(const char *name, const char *value)
435 {
436 struct header_entry *header;
437
438 header = bsearch(name, header_table,
439 sizeof header_table / sizeof header_table[0],
440 sizeof header_table[0],
441 (int (*)(const void *, const void *)) strcasecmp);
442
443 if (header) {
444 header->count++;
445 header->handler(value);
446 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
447 lookup("xheader");
448 } else {
449 lookup("nonstandard");
450 }
451 }
452
453
454 /**
455 * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
456 */
457 bool parse_date(const char *s, struct tm *tm)
458 {
459 int r;
460 int len = strlen(s);
461 regmatch_t pmatch[20];
462
463 tm->tm_isdst = 0;
464 tm->tm_gmtoff = 0;
465 tm->tm_zone = "GMT";
466
467 if (len == 29) {
468 /* RFC 1123 */
469 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
470 if (r == 0) {
471 tm->tm_mday = atoi(s + pmatch[2].rm_so);
472 tm->tm_mon = month(s + pmatch[3].rm_so);
473 tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
474 tm->tm_hour = atoi(s + pmatch[5].rm_so);
475 tm->tm_min = atoi(s + pmatch[6].rm_so);
476 tm->tm_sec = atoi(s + pmatch[7].rm_so);
477 return true;
478 }
479
480 } else if (len == 24) {
481 /* asctime() format */
482 r = regexec(&re_asctime, s, 20, pmatch, 0);
483 if (r == 0) {
484 if (s[pmatch[3].rm_so] == ' ')
485 tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
486 else
487 tm->tm_mday = atoi(s + pmatch[3].rm_so);
488 tm->tm_mon = month(s + pmatch[2].rm_so);
489 tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
490 tm->tm_hour = atoi(s + pmatch[4].rm_so);
491 tm->tm_min = atoi(s + pmatch[5].rm_so);
492 tm->tm_sec = atoi(s + pmatch[6].rm_so);
493 lookup("asctime");
494 return true;
495 }
496
497 } else {
498 /* RFC 1036 */
499 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
500 if (r == 0) {
501 tm->tm_mday = atoi(s + pmatch[2].rm_so);
502 tm->tm_mon = month(s + pmatch[3].rm_so);
503 tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
504 tm->tm_hour = atoi(s + pmatch[5].rm_so);
505 tm->tm_min = atoi(s + pmatch[6].rm_so);
506 tm->tm_sec = atoi(s + pmatch[7].rm_so);
507 lookup("rfc1036");
508 return true;
509 }
510
511 }
512
513 lookup("baddate");
514 return false;
515 }
516
517
518 /**
519 * Convert a month name to the month number.
520 */
521 int month(const char *s)
522 {
523 switch (s[0]) {
524 case 'J':
525 switch (s[1]) {
526 case 'a':
527 return 0;
528 case 'u':
529 return s[2] == 'n' ? 5 : 6;
530 }
531 case 'F':
532 return 1;
533 case 'M':
534 return s[2] == 'r' ? 2 : 4;
535 case 'A':
536 return s[1] == 'p' ? 3 : 7;
537 case 'S':
538 return 8;
539 case 'O':
540 return 9;
541 case 'N':
542 return 10;
543 case 'D':
544 return 11;
545 }
546 return 0;
547 }
548
549
550 /**
551 * UTC version of mktime, from
552 * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
553 */
554 time_t mktime_from_utc(struct tm *t)
555 {
556 time_t tl, tb;
557 struct tm *tg;
558
559 tl = mktime (t);
560 if (tl == -1)
561 {
562 t->tm_hour--;
563 tl = mktime (t);
564 if (tl == -1)
565 return -1; /* can't deal with output from strptime */
566 tl += 3600;
567 }
568 tg = gmtime (&tl);
569 tg->tm_isdst = 0;
570 tb = mktime (tg);
571 if (tb == -1)
572 {
573 tg->tm_hour--;
574 tb = mktime (tg);
575 if (tb == -1)
576 return -1; /* can't deal with output from gmtime */
577 tb += 3600;
578 }
579 return (tl - (tb - tl));
580 }
581
582
583 /**
584 * Skip optional LWS (linear white space) [2.2]
585 */
586 const char *skip_lws(const char *s)
587 {
588 if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
589 s += 2;
590 while (*s == ' ' || *s == '\t')
591 s++;
592 return s;
593 }
594
595
596 /**
597 * Parse a list of elements (#rule in [2.1]).
598 */
599 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
600 void (*callback)(const char *s, regmatch_t pmatch[]))
601 {
602 int r;
603 unsigned int items = 0;
604 regmatch_t pmatch[20];
605
606 do {
607 r = regexec(preg, s, 20, pmatch, 0);
608 if (r) {
609 if (html)
610 printf("<li class='error'>");
611 printf(" Failed to match list item %i\n", items + 1);
612 if (html)
613 printf("</li>\n");
614 return false;
615 }
616
617 if (callback)
618 callback(s, pmatch);
619 items++;
620
621 s += pmatch[0].rm_eo;
622 s = skip_lws(s);
623 if (*s == 0)
624 break;
625 if (*s != ',') {
626 if (html)
627 printf("<li class='error'>");
628 printf(" Expecting , after list item %i\n", items);
629 if (html)
630 printf("</li>\n");
631 return false;
632 }
633 while (*s == ',')
634 s = skip_lws(s + 1);
635 } while (*s != 0);
636
637 if (items < n || m < items) {
638 if (html)
639 printf("<li class='error'>");
640 printf(" %i items in list, but there should be ", items);
641 if (m == UINT_MAX)
642 printf("at least %i\n", n);
643 else
644 printf("between %i and %i\n", n, m);
645 if (html)
646 printf("</li>\n");
647 return false;
648 }
649
650 return true;
651 }
652
653
654 /* Header-specific validation. */
655 void header_accept_ranges(const char *s)
656 {
657 if (strcmp(s, "bytes") == 0)
658 lookup("ok");
659 else if (strcmp(s, "none") == 0)
660 lookup("ok");
661 else
662 lookup("unknownrange");
663 }
664
665 void header_age(const char *s)
666 {
667 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
668 lookup("badage");
669 else
670 lookup("ok");
671 }
672
673 void header_allow(const char *s)
674 {
675 if (parse_list(s, &re_token, 0, UINT_MAX, 0))
676 lookup("ok");
677 else
678 lookup("badallow");
679 }
680
681 void header_cache_control(const char *s)
682 {
683 if (parse_list(s, &re_token_value, 1, UINT_MAX,
684 header_cache_control_callback))
685 lookup("ok");
686 else
687 lookup("badcachecont");
688 }
689
690 char cache_control_list[][20] = {
691 "max-age", "max-stale", "min-fresh", "must-revalidate",
692 "no-cache", "no-store", "no-transform", "only-if-cached",
693 "private", "proxy-revalidate", "public", "s-maxage"
694 };
695
696 void header_cache_control_callback(const char *s, regmatch_t pmatch[])
697 {
698 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
699 char name[20];
700 char *dir;
701
702 if (19 < len) {
703 lookup("unknowncachecont");
704 return;
705 }
706
707 strncpy(name, s + pmatch[1].rm_so, len);
708 name[len] = 0;
709
710 dir = bsearch(name, cache_control_list,
711 sizeof cache_control_list / sizeof cache_control_list[0],
712 sizeof cache_control_list[0],
713 (int (*)(const void *, const void *)) strcasecmp);
714
715 if (!dir) {
716 if (html)
717 printf("<li class='warning'>");
718 printf(" Cache-Control directive '");
719 print(name, strlen(name));
720 printf("':\n");
721 if (html)
722 printf("</li>\n");
723 lookup("unknowncachecont");
724 }
725 }
726
727 void header_connection(const char *s)
728 {
729 if (strcmp(s, "close") == 0)
730 lookup("ok");
731 else
732 lookup("badconnection");
733 }
734
735 void header_content_encoding(const char *s)
736 {
737 if (parse_list(s, &re_token, 1, UINT_MAX,
738 header_content_encoding_callback))
739 lookup("ok");
740 else
741 lookup("badcontenc");
742 }
743
744 char content_coding_list[][20] = {
745 "compress", "deflate", "gzip", "identity"
746 };
747
748 void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
749 {
750 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
751 char name[20];
752 char *dir;
753
754 if (19 < len) {
755 lookup("unknowncontenc");
756 return;
757 }
758
759 strncpy(name, s + pmatch[1].rm_so, len);
760 name[len] = 0;
761
762 dir = bsearch(name, content_coding_list,
763 sizeof content_coding_list / sizeof content_coding_list[0],
764 sizeof content_coding_list[0],
765 (int (*)(const void *, const void *)) strcasecmp);
766 if (!dir) {
767 if (html)
768 printf("<li class='warning'>");
769 printf(" Content-Encoding '%s':\n", name);
770 if (html)
771 printf("</li>\n");
772 lookup("unknowncontenc");
773 }
774 }
775
776 void header_content_language(const char *s)
777 {
778 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
779 lookup("ok");
780 else
781 lookup("badcontlang");
782 }
783
784 void header_content_length(const char *s)
785 {
786 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
787 lookup("badcontlen");
788 else
789 lookup("ok");
790 }
791
792 void header_content_location(const char *s)
793 {
794 if (strchr(s, ' '))
795 lookup("badcontloc");
796 else
797 lookup("ok");
798 }
799
800 void header_content_md5(const char *s)
801 {
802 if (strlen(s) != 24)
803 lookup("badcontmd5");
804 else
805 lookup("ok");
806 }
807
808 void header_content_range(const char *s)
809 {
810 UNUSED(s);
811 lookup("contentrange");
812 }
813
814 void header_content_type(const char *s)
815 {
816 bool charset = false;
817 char *type, *subtype;
818 unsigned int i;
819 int r;
820 regmatch_t pmatch[30];
821
822 r = regexec(&re_content_type, s, 30, pmatch, 0);
823 if (r) {
824 lookup("badcontenttype");
825 return;
826 }
827
828 type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
829 subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
830
831 /* parameters */
832 for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
833 char *attrib, *value;
834
835 attrib = strndup(s + pmatch[i + 1].rm_so,
836 pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
837 value = strndup(s + pmatch[i + 2].rm_so,
838 pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
839
840 if (strcasecmp(attrib, "charset") == 0)
841 charset = true;
842 }
843
844 if (strcasecmp(type, "text") == 0 && !charset)
845 lookup("nocharset");
846 else
847 lookup("ok");
848 }
849
850 void header_date(const char *s)
851 {
852 double diff;
853 time_t time0, time1;
854 struct tm tm;
855
856 time0 = time(0);
857 if (!parse_date(s, &tm))
858 return;
859 time1 = mktime_from_utc(&tm);
860
861 diff = difftime(time0, time1);
862 if (10 < fabs(diff))
863 lookup("wrongdate");
864 else
865 lookup("ok");
866 }
867
868 void header_etag(const char *s)
869 {
870 int r;
871 r = regexec(&re_etag, s, 0, 0, 0);
872 if (r)
873 lookup("badetag");
874 else
875 lookup("ok");
876 }
877
878 void header_expires(const char *s)
879 {
880 struct tm tm;
881 if (parse_date(s, &tm))
882 lookup("ok");
883 }
884
885 void header_last_modified(const char *s)
886 {
887 double diff;
888 time_t time0, time1;
889 struct tm tm;
890
891 time0 = time(0);
892 if (!parse_date(s, &tm))
893 return;
894 time1 = mktime_from_utc(&tm);
895
896 diff = difftime(time1, time0);
897 if (10 < diff)
898 lookup("futurelastmod");
899 else
900 lookup("ok");
901 }
902
903 void header_location(const char *s)
904 {
905 int r;
906 r = regexec(&re_absolute_uri, s, 0, 0, 0);
907 if (r)
908 lookup("badlocation");
909 else
910 lookup("ok");
911 }
912
913 void header_pragma(const char *s)
914 {
915 if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
916 lookup("ok");
917 else
918 lookup("badpragma");
919 }
920
921 void header_retry_after(const char *s)
922 {
923 struct tm tm;
924
925 if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
926 lookup("ok");
927 return;
928 }
929
930 if (!parse_date(s, &tm))
931 return;
932
933 lookup("ok");
934 }
935
936 void header_server(const char *s)
937 {
938 int r;
939 r = regexec(&re_server, s, 0, 0, 0);
940 if (r)
941 lookup("badserver");
942 else
943 lookup("ok");
944 }
945
946 void header_trailer(const char *s)
947 {
948 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
949 lookup("ok");
950 else
951 lookup("badtrailer");
952 }
953
954 void header_transfer_encoding(const char *s)
955 {
956 if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
957 header_transfer_encoding_callback))
958 lookup("ok");
959 else
960 lookup("badtransenc");
961 }
962
963 char transfer_coding_list[][20] = {
964 "chunked", "compress", "deflate", "gzip", "identity"
965 };
966
967 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
968 {
969 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
970 char name[20];
971 char *dir;
972
973 if (19 < len) {
974 lookup("unknowntransenc");
975 return;
976 }
977
978 strncpy(name, s + pmatch[1].rm_so, len);
979 name[len] = 0;
980
981 dir = bsearch(name, transfer_coding_list,
982 sizeof transfer_coding_list / sizeof transfer_coding_list[0],
983 sizeof transfer_coding_list[0],
984 (int (*)(const void *, const void *)) strcasecmp);
985 if (!dir) {
986 if (html)
987 printf("<li class='warning'>");
988 printf(" Transfer-Encoding '%s':\n", name);
989 if (html)
990 printf("</li>\n");
991 lookup("unknowntransenc");
992 }
993 }
994
995 void header_upgrade(const char *s)
996 {
997 int r;
998 r = regexec(&re_upgrade, s, 0, 0, 0);
999 if (r)
1000 lookup("badupgrade");
1001 else
1002 lookup("ok");
1003 }
1004
1005 void header_vary(const char *s)
1006 {
1007 if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
1008 lookup("ok");
1009 else
1010 lookup("badvary");
1011 }
1012
1013 void header_via(const char *s)
1014 {
1015 UNUSED(s);
1016 lookup("via");
1017 }
1018
1019 /* http://wp.netscape.com/newsref/std/cookie_spec.html */
1020 void header_set_cookie(const char *s)
1021 {
1022 bool ok = true;
1023 int r;
1024 const char *semi = strchr(s, ';');
1025 const char *s2;
1026 struct tm tm;
1027 double diff;
1028 time_t time0, time1;
1029 regmatch_t pmatch[20];
1030
1031 if (semi)
1032 s2 = strndup(s, semi - s);
1033 else
1034 s2 = s;
1035
1036 r = regexec(&re_cookie_nameval, s2, 0, 0, 0);
1037 if (r) {
1038 lookup("cookiebadnameval");
1039 ok = false;
1040 }
1041
1042 if (!semi)
1043 return;
1044
1045 s = skip_lws(semi + 1);
1046
1047 while (*s) {
1048 semi = strchr(s, ';');
1049 if (semi)
1050 s2 = strndup(s, semi - s);
1051 else
1052 s2 = s;
1053
1054 if (strncasecmp(s2, "expires=", 8) == 0) {
1055 s2 += 8;
1056 r = regexec(&re_cookie_expires, s2, 20, pmatch, 0);
1057 if (r == 0) {
1058 tm.tm_mday = atoi(s2 + pmatch[2].rm_so);
1059 tm.tm_mon = month(s2 + pmatch[3].rm_so);
1060 tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900;
1061 tm.tm_hour = atoi(s2 + pmatch[5].rm_so);
1062 tm.tm_min = atoi(s2 + pmatch[6].rm_so);
1063 tm.tm_sec = atoi(s2 + pmatch[7].rm_so);
1064
1065 time0 = time(0);
1066 time1 = mktime_from_utc(&tm);
1067
1068 diff = difftime(time0, time1);
1069 if (10 < diff) {
1070 lookup("cookiepastdate");
1071 ok = false;
1072 }
1073 } else {
1074 lookup("cookiebaddate");
1075 ok = false;
1076 }
1077 } else if (strncasecmp(s2, "domain=", 7) == 0) {
1078 } else if (strncasecmp(s2, "path=", 5) == 0) {
1079 if (s2[5] != '/') {
1080 lookup("cookiebadpath");
1081 ok = false;
1082 }
1083 } else if (strcasecmp(s, "secure") == 0) {
1084 } else {
1085 if (html)
1086 printf("<li class='warning'>");
1087 printf(" Set-Cookie field '%s':\n", s2);
1088 if (html)
1089 printf("</li>\n");
1090 lookup("cookieunknownfield");
1091 ok = false;
1092 }
1093
1094 if (semi)
1095 s = skip_lws(semi + 1);
1096 else
1097 break;
1098 }
1099
1100 if (ok)
1101 lookup("ok");
1102 }
1103
1104
1105 /**
1106 * Print an error message and exit.
1107 */
1108 void die(const char *error)
1109 {
1110 fprintf(stderr, "httplint: %s\n", error);
1111 exit(EXIT_FAILURE);
1112 }
1113
1114
1115 /**
1116 * Print a string which contains control characters.
1117 */
1118 void print(const char *s, size_t len)
1119 {
1120 size_t i;
1121 for (i = 0; i != len; i++) {
1122 if (html && s[i] == '<')
1123 printf("&lt;");
1124 else if (html && s[i] == '>')
1125 printf("&gt;");
1126 else if (html && s[i] == '&')
1127 printf("&amp;");
1128 else if (31 < s[i] && s[i] < 127)
1129 putchar(s[i]);
1130 else {
1131 if (html)
1132 printf("<span class='cc'>");
1133 printf("[%.2x]", s[i]);
1134 if (html)
1135 printf("</span>");
1136 }
1137 }
1138 }
1139
1140
1141 struct message_entry {
1142 const char key[20];
1143 const char *value;
1144 } message_table[] = {
1145 { "1xx", "A response status code in the range 100 - 199 indicates a "
1146 "'provisional response'." },
1147 { "2xx", "A response status code in the range 200 - 299 indicates that "
1148 "the request was successful." },
1149 { "3xx", "A response status code in the range 300 - 399 indicates that "
1150 "the client should redirect to a new URL." },
1151 { "4xx", "A response status code in the range 400 - 499 indicates that "
1152 "the request could not be fulfilled due to client error." },
1153 { "5xx", "A response status code in the range 500 - 599 indicates that "
1154 "an error occurred on the server." },
1155 { "asctime", "Warning: This date is in the obsolete asctime() format. "
1156 "Consider using the RFC 1123 format instead." },
1157 { "badage", "Error: The Age header must be one number." },
1158 { "badallow", "Error: The Allow header must be a comma-separated list of "
1159 "HTTP methods." },
1160 { "badcachecont", "Error: The Cache-Control header must be a "
1161 "comma-separated list of directives." },
1162 { "badconnection", "Warning: The only value of the Connection header "
1163 "defined by HTTP/1.1 is \"close\"." },
1164 { "badcontenc", "Error: The Content-Encoding header must be a "
1165 "comma-separated list of encodings." },
1166 { "badcontenttype", "Error: The Content-Type header must be of the form "
1167 "'type/subtype (; optional parameters)'." },
1168 { "badcontlang", "Error: The Content-Language header must be a "
1169 "comma-separated list of language tags." },
1170 { "badcontlen", "Error: The Content-Length header must be a number." },
1171 { "badcontloc", "Error: The Content-Location header must be an absolute "
1172 "or relative URI." },
1173 { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1174 "MD5 sum." },
1175 { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1176 "1123 format." },
1177 { "badetag", "Error: The ETag header must be a quoted string (optionally "
1178 "preceded by \"W/\" for a weak tag)." },
1179 { "badlocation", "Error: The Location header must be an absolute URI. "
1180 "Relative URIs are not permitted." },
1181 { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1182 "directives." },
1183 { "badserver", "Error: The Server header must be a space-separated list of "
1184 "products of the form Name/optional-version and comments "
1185 "in ()." },
1186 { "badstatus", "Warning: The response status code is outside the standard "
1187 "range 100 - 599." },
1188 { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1189 "status line must be of the form 'HTTP/n.n <3-digit "
1190 "status> <reason phrase>'." },
1191 { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1192 "of header names." },
1193 { "badtransenc", "Error: The Transfer-Encoding header must be a "
1194 "comma-separated of encodings." },
1195 { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1196 "of product identifiers." },
1197 { "badvary", "Error: The Vary header must be a comma-separated list "
1198 "of header names, or \"*\"." },
1199 { "contentrange", "Warning: The Content-Range header should not be returned "
1200 "by the server for this request." },
1201 { "cookiebaddate", "Error: The expires date must be in the form "
1202 "\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." },
1203 { "cookiebadnameval", "Error: A Set-Cookie header must start with "
1204 "name=value, each excluding semi-colon, comma and "
1205 "white space." },
1206 { "cookiebadpath", "Error: The path does not start with \"/\"." },
1207 { "cookiepastdate", "Warning: The expires date is in the past. The cookie "
1208 "will be deleted by browsers." },
1209 { "cookieunknownfield", "Warning: This is not a standard Set-Cookie "
1210 "field." },
1211 { "endofheaders", "End of headers." },
1212 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1213 "version of this tool." },
1214 { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1215 "the future." },
1216 { "headertoolong", "Warning: Header too long: ignored." },
1217 { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1218 { "missingcontenttype", "Warning: No Content-Type header was present. The "
1219 "client will have to guess the media type or ask "
1220 "the user. Adding a Content-Type header is strongly "
1221 "recommended." },
1222 { "missingcontlang", "Consider adding a Content-Language header if "
1223 "applicable for this document." },
1224 { "missingdate", "Warning: No Date header was present. A Date header must "
1225 "be present, unless the server does not have a clock, or "
1226 "the response is 100, 101, or 500 - 599." },
1227 { "missinglastmod", "No Last-Modified header was present. The "
1228 "HTTP/1.1 specification states that this header should "
1229 "be sent whenever feasible." },
1230 { "nocharset", "Warning: No character set is specified in the Content-Type. "
1231 "Clients may assume the default of ISO-8859-1. Consider "
1232 "appending '; charset=...'." },
1233 { "nonstandard", "Warning: I don't know anything about this header. Is it "
1234 "a standard HTTP response header?" },
1235 { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1236 "that all header lines end with CR LF." },
1237 { "ok", "OK." },
1238 { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1239 "to HTTP/1.1." },
1240 { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1241 "Consider using the RFC 1123 format instead." },
1242 { "ugly", "This URL appears to contain implementation-specific parts such "
1243 "as an extension or a query string. This may make the URL liable "
1244 "to change when the implementation is changed, resulting in "
1245 "broken links. Consider using URL rewriting or equivalent to "
1246 "implement a future-proof URL space. See "
1247 "http://www.w3.org/Provider/Style/URI for more information." },
1248 { "unknowncachecont", "Warning: This Cache-Control directive is "
1249 "non-standard and will have limited support." },
1250 { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1251 { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1252 "range." },
1253 { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1254 { "via", "This header was added by a proxy, cache or gateway." },
1255 { "wrongdate", "Warning: The server date-time differs from this system's "
1256 "date-time by more than 10 seconds. Check that both the "
1257 "system clocks are correct." },
1258 { "xheader", "This is an extension header. I don't know how to check it." }
1259 };
1260
1261
1262 /**
1263 * Look up and output the string referenced by a key.
1264 */
1265 void lookup(const char *key)
1266 {
1267 const char *s, *spc;
1268 int x;
1269 struct message_entry *message;
1270
1271 message = bsearch(key, message_table,
1272 sizeof message_table / sizeof message_table[0],
1273 sizeof message_table[0],
1274 (int (*)(const void *, const void *)) strcasecmp);
1275 if (message)
1276 s = message->value;
1277 else
1278 s = key;
1279
1280 if (html) {
1281 if (strncmp(s, "Warning:", 8) == 0)
1282 printf("<li class='warning'>");
1283 else if (strncmp(s, "Error:", 6) == 0)
1284 printf("<li class='error'>");
1285 else if (strncmp(s, "OK", 2) == 0)
1286 printf("<li class='ok'>");
1287 else
1288 printf("<li>");
1289 for (; *s; s++) {
1290 if (strncmp(s, "http://", 7) == 0) {
1291 spc = strchr(s, ' ');
1292 printf("<a href='%.*s'>%.*s</a>", spc - s, s, spc - s, s);
1293 s = spc;
1294 }
1295 switch (*s) {
1296 case '<': printf("&lt;"); break;
1297 case '>': printf("&gt;"); break;
1298 case '&': printf("&amp;"); break;
1299 default: printf("%c", *s); break;
1300 }
1301 }
1302 printf("</li>\n");
1303
1304 } else {
1305 printf(" ");
1306 x = 4;
1307 while (*s) {
1308 spc = strchr(s, ' ');
1309 if (!spc)
1310 spc = s + strlen(s);
1311 if (75 < x + (spc - s)) {
1312 printf("\n ");
1313 x = 4;
1314 }
1315 x += spc - s + 1;
1316 printf("%.*s ", spc - s, s);
1317 if (*spc)
1318 s = spc + 1;
1319 else
1320 s = spc;
1321 }
1322 printf("\n\n");
1323 }
1324 }
1325

  ViewVC Help
Powered by ViewVC 1.1.26