/[james]/httplint/httplint.c
ViewVC logotype

Contents of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 56 - (show annotations) (download) (as text)
Fri Mar 12 21:54:02 2004 UTC (20 years, 10 months ago) by james
File MIME type: text/x-csrc
File size: 35086 byte(s)
Implement HTML output.

1 /*
2 * HTTP Header Lint
3 * Licensed under the MIT License
4 * http://www.opensource.org/licenses/mit-license
5 * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
6 */
7
8 /*
9 * Compile using
10 * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11 *
12 * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13 */
14
15 #define _GNU_SOURCE
16 #define __USE_XOPEN
17
18 #include <limits.h>
19 #include <math.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <sys/types.h>
26 #include <regex.h>
27 #include <curl/curl.h>
28
29
30 #define NUMBER "0123456789"
31 #define UNUSED(x) x = x
32
33
34 bool start;
35 bool html = false;
36 CURL *curl;
37 int status_code;
38 char error_buffer[CURL_ERROR_SIZE];
39 regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
40 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
41 re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires;
42
43
44 void init(void);
45 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
46 void check_url(const char *url);
47 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
48 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
49 void check_status_line(const char *s);
50 void check_header(const char *name, const char *value);
51 bool parse_date(const char *s, struct tm *tm);
52 int month(const char *s);
53 time_t mktime_from_utc(struct tm *t);
54 const char *skip_lws(const char *s);
55 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
56 void (*callback)(const char *s, regmatch_t pmatch[]));
57 void header_accept_ranges(const char *s);
58 void header_age(const char *s);
59 void header_allow(const char *s);
60 void header_cache_control(const char *s);
61 void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
62 void header_connection(const char *s);
63 void header_content_encoding(const char *s);
64 void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
65 void header_content_language(const char *s);
66 void header_content_length(const char *s);
67 void header_content_location(const char *s);
68 void header_content_md5(const char *s);
69 void header_content_range(const char *s);
70 void header_content_type(const char *s);
71 void header_date(const char *s);
72 void header_etag(const char *s);
73 void header_expires(const char *s);
74 void header_last_modified(const char *s);
75 void header_location(const char *s);
76 void header_pragma(const char *s);
77 void header_retry_after(const char *s);
78 void header_server(const char *s);
79 void header_trailer(const char *s);
80 void header_transfer_encoding(const char *s);
81 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
82 void header_upgrade(const char *s);
83 void header_vary(const char *s);
84 void header_via(const char *s);
85 void header_set_cookie(const char *s);
86 void die(const char *error);
87 void print(const char *s, size_t len);
88 void lookup(const char *key);
89
90
91 struct header_entry {
92 char name[40];
93 void (*handler)(const char *s);
94 int count;
95 char *missing;
96 } header_table[] = {
97 { "Accept-Ranges", header_accept_ranges, 0, 0 },
98 { "Age", header_age, 0, 0 },
99 { "Allow", header_allow, 0, 0 },
100 { "Cache-Control", header_cache_control, 0, 0 },
101 { "Connection", header_connection, 0, 0 },
102 { "Content-Encoding", header_content_encoding, 0, 0 },
103 { "Content-Language", header_content_language, 0, "missingcontlang" },
104 { "Content-Length", header_content_length, 0, 0 },
105 { "Content-Location", header_content_location, 0, 0 },
106 { "Content-MD5", header_content_md5, 0, 0 },
107 { "Content-Range", header_content_range, 0, 0 },
108 { "Content-Type", header_content_type, 0, "missingcontenttype" },
109 { "Date", header_date, 0, "missingdate" },
110 { "ETag", header_etag, 0, 0 },
111 { "Expires", header_expires, 0, 0 },
112 { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113 { "Location", header_location, 0, 0 },
114 { "Pragma", header_pragma, 0, 0 },
115 { "Retry-After", header_retry_after, 0, 0 },
116 { "Server", header_server, 0, 0 },
117 { "Set-Cookie", header_set_cookie, 0, 0 },
118 { "Trailer", header_trailer, 0, 0 },
119 { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
120 { "Upgrade", header_upgrade, 0, 0 },
121 { "Vary", header_vary, 0, 0 },
122 { "Via", header_via, 0, 0 }
123 };
124
125
126 /**
127 * Main entry point.
128 */
129 int main(int argc, char *argv[])
130 {
131 int i = 1;
132
133 if (argc < 2)
134 die("Usage: httplint [--html] url [url ...]");
135
136 init();
137
138 if (1 < argc && strcmp(argv[1], "--html") == 0) {
139 html = true;
140 i++;
141 }
142
143 for (; i != argc; i++)
144 check_url(argv[i]);
145
146 curl_global_cleanup();
147
148 return 0;
149 }
150
151
152 /**
153 * Initialise the curl handle and compile regular expressions.
154 */
155 void init(void)
156 {
157 struct curl_slist *request_headers = 0;
158
159 if (curl_global_init(CURL_GLOBAL_ALL))
160 die("Failed to initialise libcurl");
161
162 curl = curl_easy_init();
163 if (!curl)
164 die("Failed to create curl handle");
165
166 if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
167 die("Failed to set curl options");
168 if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
169 die("Failed to set curl options");
170 if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
171 die("Failed to set curl options");
172 if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
173 die("Failed to set curl options");
174
175 /* remove libcurl default headers */
176 request_headers = curl_slist_append(request_headers, "Accept:");
177 request_headers = curl_slist_append(request_headers, "Pragma:");
178 if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
179 die("Failed to set curl options");
180
181 /* compile regular expressions */
182 regcomp_wrapper(&re_status_line,
183 "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
184 REG_EXTENDED);
185 regcomp_wrapper(&re_token,
186 "^([-0-9a-zA-Z_.!]+)",
187 REG_EXTENDED);
188 regcomp_wrapper(&re_token_value,
189 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
190 REG_EXTENDED);
191 regcomp_wrapper(&re_content_type,
192 "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
193 "(;[ \t]*([-0-9a-zA-Z_.]+)="
194 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
195 REG_EXTENDED);
196 regcomp_wrapper(&re_absolute_uri,
197 "^[a-zA-Z0-9]+://[^ ]+$",
198 REG_EXTENDED);
199 regcomp_wrapper(&re_etag,
200 "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
201 REG_EXTENDED);
202 regcomp_wrapper(&re_server,
203 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
204 REG_EXTENDED);
205 regcomp_wrapper(&re_transfer_coding,
206 "^([-0-9a-zA-Z_.]+)[ \t]*"
207 "(;[ \t]*([-0-9a-zA-Z_.]+)="
208 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
209 REG_EXTENDED);
210 regcomp_wrapper(&re_upgrade,
211 "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
212 REG_EXTENDED);
213 regcomp_wrapper(&re_ugly,
214 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
215 REG_EXTENDED);
216 regcomp_wrapper(&re_rfc1123,
217 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
218 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
219 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
220 REG_EXTENDED);
221 regcomp_wrapper(&re_rfc1036,
222 "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
223 "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
224 "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
225 REG_EXTENDED);
226 regcomp_wrapper(&re_asctime,
227 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
228 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
229 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
230 REG_EXTENDED);
231 regcomp_wrapper(&re_cookie_nameval,
232 "^[^;, ]+=[^;, ]*$",
233 REG_EXTENDED);
234 regcomp_wrapper(&re_cookie_expires,
235 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-"
236 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) "
237 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
238 REG_EXTENDED);
239 }
240
241
242 /**
243 * Compile a regular expression, handling errors.
244 */
245 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
246 {
247 char errbuf[200];
248 int r;
249 r = regcomp(preg, regex, cflags);
250 if (r) {
251 regerror(r, preg, errbuf, sizeof errbuf);
252 fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
253 die(errbuf);
254 }
255 }
256
257
258 /**
259 * Fetch and check the headers for the specified url.
260 */
261 void check_url(const char *url)
262 {
263 int i, r;
264 CURLcode code;
265
266 start = true;
267 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
268 header_table[i].count = 0;
269
270 if (!html)
271 printf("Checking URL %s\n", url);
272 if (strncmp(url, "http", 4)) {
273 if (html)
274 printf("<p class='warning'>");
275 printf("Warning: this is not an http or https url");
276 if (html)
277 printf("</p>");
278 printf("\n");
279 }
280
281 if (curl_easy_setopt(curl, CURLOPT_URL, url))
282 die("Failed to set curl options");
283
284 if (html)
285 printf("<ul>\n");
286 code = curl_easy_perform(curl);
287 if (html)
288 printf("</ul>\n");
289 if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
290 if (html)
291 printf("<p class='error'>");
292 printf("Error: ");
293 print(error_buffer, strlen(error_buffer));
294 printf(".");
295 if (html)
296 printf("</p>");
297 printf("\n");
298 return;
299 } else {
300 printf("\n");
301 if (html)
302 printf("<ul>");
303 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
304 if (header_table[i].count == 0 && header_table[i].missing)
305 lookup(header_table[i].missing);
306 }
307 }
308
309 r = regexec(&re_ugly, url, 0, 0, 0);
310 if (r)
311 lookup("ugly");
312
313 if (html)
314 printf("</ul>");
315 }
316
317
318 /**
319 * Callback for received header data.
320 */
321 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
322 {
323 const size_t size = msize * nmemb;
324 char s[400], *name, *value;
325
326 UNUSED(stream);
327
328 printf(html ? "<li><code>" : "* ");
329 print(ptr, size);
330 printf(html ? "</code><ul>" : "\n");
331
332 if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
333 lookup("notcrlf");
334 if (html)
335 printf("</ul></li>\n");
336 return size;
337 }
338 if (sizeof s <= size) {
339 lookup("headertoolong");
340 if (html)
341 printf("</ul></li>\n");
342 return size;
343 }
344 strncpy(s, ptr, size);
345 s[size - 2] = 0;
346
347 name = s;
348 value = strchr(s, ':');
349
350 if (s[0] == 0) {
351 /* empty header indicates end of headers */
352 lookup("endofheaders");
353 if (html)
354 printf("</ul></li>\n");
355 return 0;
356
357 } else if (start) {
358 /* Status-Line [6.1] */
359 check_status_line(s);
360 start = false;
361
362 } else if (!value) {
363 lookup("missingcolon");
364
365 } else {
366 *value = 0;
367 value++;
368
369 check_header(name, skip_lws(value));
370 }
371
372 if (html)
373 printf("</ul></li>\n");
374 return size;
375 }
376
377
378 /**
379 * Callback for received body data.
380 *
381 * We are not interested in the body, so abort the fetch by returning 0.
382 */
383 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
384 {
385 UNUSED(ptr);
386 UNUSED(size);
387 UNUSED(nmemb);
388 UNUSED(stream);
389
390 return 0;
391 }
392
393
394 /**
395 * Check the syntax and content of the response Status-Line [6.1].
396 */
397 void check_status_line(const char *s)
398 {
399 const char *reason;
400 unsigned int major = 0, minor = 0;
401 int r;
402 regmatch_t pmatch[5];
403
404 r = regexec(&re_status_line, s, 5, pmatch, 0);
405 if (r) {
406 lookup("badstatusline");
407 return;
408 }
409
410 major = atoi(s + pmatch[1].rm_so);
411 minor = atoi(s + pmatch[2].rm_so);
412 status_code = atoi(s + pmatch[3].rm_so);
413 reason = s + pmatch[4].rm_so;
414
415 if (major < 1 || (major == 1 && minor == 0)) {
416 lookup("oldhttp");
417 } else if ((major == 1 && 1 < minor) || 1 < major) {
418 lookup("futurehttp");
419 } else {
420 if (status_code < 100 || 600 <= status_code) {
421 lookup("badstatus");
422 } else {
423 char key[] = "xxx";
424 key[0] = '0' + status_code / 100;
425 lookup(key);
426 }
427 }
428 }
429
430
431 /**
432 * Check the syntax and content of a header.
433 */
434 void check_header(const char *name, const char *value)
435 {
436 struct header_entry *header;
437
438 header = bsearch(name, header_table,
439 sizeof header_table / sizeof header_table[0],
440 sizeof header_table[0],
441 (int (*)(const void *, const void *)) strcasecmp);
442
443 if (header) {
444 header->count++;
445 header->handler(value);
446 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
447 lookup("xheader");
448 } else {
449 lookup("nonstandard");
450 }
451 }
452
453
454 /**
455 * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
456 */
457 bool parse_date(const char *s, struct tm *tm)
458 {
459 int r;
460 int len = strlen(s);
461 regmatch_t pmatch[20];
462
463 if (len == 29) {
464 /* RFC 1123 */
465 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
466 if (r == 0) {
467 tm->tm_mday = atoi(s + pmatch[2].rm_so);
468 tm->tm_mon = month(s + pmatch[3].rm_so);
469 tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
470 tm->tm_hour = atoi(s + pmatch[5].rm_so);
471 tm->tm_min = atoi(s + pmatch[6].rm_so);
472 tm->tm_sec = atoi(s + pmatch[7].rm_so);
473 return true;
474 }
475
476 } else if (len == 24) {
477 /* asctime() format */
478 r = regexec(&re_asctime, s, 20, pmatch, 0);
479 if (r == 0) {
480 if (s[pmatch[3].rm_so] == ' ')
481 tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
482 else
483 tm->tm_mday = atoi(s + pmatch[3].rm_so);
484 tm->tm_mon = month(s + pmatch[2].rm_so);
485 tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
486 tm->tm_hour = atoi(s + pmatch[4].rm_so);
487 tm->tm_min = atoi(s + pmatch[5].rm_so);
488 tm->tm_sec = atoi(s + pmatch[6].rm_so);
489 lookup("asctime");
490 return true;
491 }
492
493 } else {
494 /* RFC 1036 */
495 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
496 if (r == 0) {
497 tm->tm_mday = atoi(s + pmatch[2].rm_so);
498 tm->tm_mon = month(s + pmatch[3].rm_so);
499 tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
500 tm->tm_hour = atoi(s + pmatch[5].rm_so);
501 tm->tm_min = atoi(s + pmatch[6].rm_so);
502 tm->tm_sec = atoi(s + pmatch[7].rm_so);
503 lookup("rfc1036");
504 return true;
505 }
506
507 }
508
509 lookup("baddate");
510 return false;
511 }
512
513
514 /**
515 * Convert a month name to the month number.
516 */
517 int month(const char *s)
518 {
519 switch (s[0]) {
520 case 'J':
521 switch (s[1]) {
522 case 'a':
523 return 0;
524 case 'u':
525 return s[2] == 'n' ? 5 : 6;
526 }
527 case 'F':
528 return 1;
529 case 'M':
530 return s[2] == 'r' ? 2 : 4;
531 case 'A':
532 return s[1] == 'p' ? 3 : 7;
533 case 'S':
534 return 8;
535 case 'O':
536 return 9;
537 case 'N':
538 return 10;
539 case 'D':
540 return 11;
541 }
542 return 0;
543 }
544
545
546 /**
547 * UTC version of mktime, from
548 * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
549 */
550 time_t mktime_from_utc(struct tm *t)
551 {
552 time_t tl, tb;
553 struct tm *tg;
554
555 tl = mktime (t);
556 if (tl == -1)
557 {
558 t->tm_hour--;
559 tl = mktime (t);
560 if (tl == -1)
561 return -1; /* can't deal with output from strptime */
562 tl += 3600;
563 }
564 tg = gmtime (&tl);
565 tg->tm_isdst = 0;
566 tb = mktime (tg);
567 if (tb == -1)
568 {
569 tg->tm_hour--;
570 tb = mktime (tg);
571 if (tb == -1)
572 return -1; /* can't deal with output from gmtime */
573 tb += 3600;
574 }
575 return (tl - (tb - tl));
576 }
577
578
579 /**
580 * Skip optional LWS (linear white space) [2.2]
581 */
582 const char *skip_lws(const char *s)
583 {
584 if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
585 s += 2;
586 while (*s == ' ' || *s == '\t')
587 s++;
588 return s;
589 }
590
591
592 /**
593 * Parse a list of elements (#rule in [2.1]).
594 */
595 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
596 void (*callback)(const char *s, regmatch_t pmatch[]))
597 {
598 int r;
599 unsigned int items = 0;
600 regmatch_t pmatch[20];
601
602 do {
603 r = regexec(preg, s, 20, pmatch, 0);
604 if (r) {
605 if (html)
606 printf("<li class='error'>");
607 printf(" Failed to match list item %i\n", items + 1);
608 if (html)
609 printf("</li>\n");
610 return false;
611 }
612
613 if (callback)
614 callback(s, pmatch);
615 items++;
616
617 s += pmatch[0].rm_eo;
618 s = skip_lws(s);
619 if (*s == 0)
620 break;
621 if (*s != ',') {
622 if (html)
623 printf("<li class='error'>");
624 printf(" Expecting , after list item %i\n", items);
625 if (html)
626 printf("</li>\n");
627 return false;
628 }
629 while (*s == ',')
630 s = skip_lws(s + 1);
631 } while (*s != 0);
632
633 if (items < n || m < items) {
634 if (html)
635 printf("<li class='error'>");
636 printf(" %i items in list, but there should be ", items);
637 if (m == UINT_MAX)
638 printf("at least %i\n", n);
639 else
640 printf("between %i and %i\n", n, m);
641 if (html)
642 printf("</li>\n");
643 return false;
644 }
645
646 return true;
647 }
648
649
650 /* Header-specific validation. */
651 void header_accept_ranges(const char *s)
652 {
653 if (strcmp(s, "bytes") == 0)
654 lookup("ok");
655 else if (strcmp(s, "none") == 0)
656 lookup("ok");
657 else
658 lookup("unknownrange");
659 }
660
661 void header_age(const char *s)
662 {
663 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
664 lookup("badage");
665 else
666 lookup("ok");
667 }
668
669 void header_allow(const char *s)
670 {
671 if (parse_list(s, &re_token, 0, UINT_MAX, 0))
672 lookup("ok");
673 else
674 lookup("badallow");
675 }
676
677 void header_cache_control(const char *s)
678 {
679 if (parse_list(s, &re_token_value, 1, UINT_MAX,
680 header_cache_control_callback))
681 lookup("ok");
682 else
683 lookup("badcachecont");
684 }
685
686 char cache_control_list[][20] = {
687 "max-age", "max-stale", "min-fresh", "must-revalidate",
688 "no-cache", "no-store", "no-transform", "only-if-cached",
689 "private", "proxy-revalidate", "public", "s-maxage"
690 };
691
692 void header_cache_control_callback(const char *s, regmatch_t pmatch[])
693 {
694 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
695 char name[20];
696 char *dir;
697
698 if (19 < len) {
699 lookup("unknowncachecont");
700 return;
701 }
702
703 strncpy(name, s + pmatch[1].rm_so, len);
704 name[len] = 0;
705
706 dir = bsearch(name, cache_control_list,
707 sizeof cache_control_list / sizeof cache_control_list[0],
708 sizeof cache_control_list[0],
709 (int (*)(const void *, const void *)) strcasecmp);
710
711 if (!dir) {
712 if (html)
713 printf("<li class='warning'>");
714 printf(" Cache-Control directive '");
715 print(name, strlen(name));
716 printf("':\n");
717 if (html)
718 printf("</li>\n");
719 lookup("unknowncachecont");
720 }
721 }
722
723 void header_connection(const char *s)
724 {
725 if (strcmp(s, "close") == 0)
726 lookup("ok");
727 else
728 lookup("badconnection");
729 }
730
731 void header_content_encoding(const char *s)
732 {
733 if (parse_list(s, &re_token, 1, UINT_MAX,
734 header_content_encoding_callback))
735 lookup("ok");
736 else
737 lookup("badcontenc");
738 }
739
740 char content_coding_list[][20] = {
741 "compress", "deflate", "gzip", "identity"
742 };
743
744 void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
745 {
746 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
747 char name[20];
748 char *dir;
749
750 if (19 < len) {
751 lookup("unknowncontenc");
752 return;
753 }
754
755 strncpy(name, s + pmatch[1].rm_so, len);
756 name[len] = 0;
757
758 dir = bsearch(name, content_coding_list,
759 sizeof content_coding_list / sizeof content_coding_list[0],
760 sizeof content_coding_list[0],
761 (int (*)(const void *, const void *)) strcasecmp);
762 if (!dir) {
763 if (html)
764 printf("<li class='warning'>");
765 printf(" Content-Encoding '%s':\n", name);
766 if (html)
767 printf("</li>\n");
768 lookup("unknowncontenc");
769 }
770 }
771
772 void header_content_language(const char *s)
773 {
774 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
775 lookup("ok");
776 else
777 lookup("badcontlang");
778 }
779
780 void header_content_length(const char *s)
781 {
782 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
783 lookup("badcontlen");
784 else
785 lookup("ok");
786 }
787
788 void header_content_location(const char *s)
789 {
790 if (strchr(s, ' '))
791 lookup("badcontloc");
792 else
793 lookup("ok");
794 }
795
796 void header_content_md5(const char *s)
797 {
798 if (strlen(s) != 24)
799 lookup("badcontmd5");
800 else
801 lookup("ok");
802 }
803
804 void header_content_range(const char *s)
805 {
806 UNUSED(s);
807 lookup("contentrange");
808 }
809
810 void header_content_type(const char *s)
811 {
812 bool charset = false;
813 char *type, *subtype;
814 unsigned int i;
815 int r;
816 regmatch_t pmatch[30];
817
818 r = regexec(&re_content_type, s, 30, pmatch, 0);
819 if (r) {
820 lookup("badcontenttype");
821 return;
822 }
823
824 type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
825 subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
826
827 /* parameters */
828 for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
829 char *attrib, *value;
830
831 attrib = strndup(s + pmatch[i + 1].rm_so,
832 pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
833 value = strndup(s + pmatch[i + 2].rm_so,
834 pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
835
836 if (strcasecmp(attrib, "charset") == 0)
837 charset = true;
838 }
839
840 if (strcasecmp(type, "text") == 0 && !charset)
841 lookup("nocharset");
842 else
843 lookup("ok");
844 }
845
846 void header_date(const char *s)
847 {
848 double diff;
849 time_t time0, time1;
850 struct tm tm;
851
852 time0 = time(0);
853 if (!parse_date(s, &tm))
854 return;
855 time1 = mktime_from_utc(&tm);
856
857 diff = difftime(time0, time1);
858 if (10 < fabs(diff))
859 lookup("wrongdate");
860 else
861 lookup("ok");
862 }
863
864 void header_etag(const char *s)
865 {
866 int r;
867 r = regexec(&re_etag, s, 0, 0, 0);
868 if (r)
869 lookup("badetag");
870 else
871 lookup("ok");
872 }
873
874 void header_expires(const char *s)
875 {
876 struct tm tm;
877 if (parse_date(s, &tm))
878 lookup("ok");
879 }
880
881 void header_last_modified(const char *s)
882 {
883 double diff;
884 time_t time0, time1;
885 struct tm tm;
886
887 time0 = time(0);
888 if (!parse_date(s, &tm))
889 return;
890 time1 = mktime_from_utc(&tm);
891
892 diff = difftime(time1, time0);
893 if (10 < diff)
894 lookup("futurelastmod");
895 else
896 lookup("ok");
897 }
898
899 void header_location(const char *s)
900 {
901 int r;
902 r = regexec(&re_absolute_uri, s, 0, 0, 0);
903 if (r)
904 lookup("badlocation");
905 else
906 lookup("ok");
907 }
908
909 void header_pragma(const char *s)
910 {
911 if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
912 lookup("ok");
913 else
914 lookup("badpragma");
915 }
916
917 void header_retry_after(const char *s)
918 {
919 struct tm tm;
920
921 if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
922 lookup("ok");
923 return;
924 }
925
926 if (!parse_date(s, &tm))
927 return;
928
929 lookup("ok");
930 }
931
932 void header_server(const char *s)
933 {
934 int r;
935 r = regexec(&re_server, s, 0, 0, 0);
936 if (r)
937 lookup("badserver");
938 else
939 lookup("ok");
940 }
941
942 void header_trailer(const char *s)
943 {
944 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
945 lookup("ok");
946 else
947 lookup("badtrailer");
948 }
949
950 void header_transfer_encoding(const char *s)
951 {
952 if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
953 header_transfer_encoding_callback))
954 lookup("ok");
955 else
956 lookup("badtransenc");
957 }
958
959 char transfer_coding_list[][20] = {
960 "chunked", "compress", "deflate", "gzip", "identity"
961 };
962
963 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
964 {
965 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
966 char name[20];
967 char *dir;
968
969 if (19 < len) {
970 lookup("unknowntransenc");
971 return;
972 }
973
974 strncpy(name, s + pmatch[1].rm_so, len);
975 name[len] = 0;
976
977 dir = bsearch(name, transfer_coding_list,
978 sizeof transfer_coding_list / sizeof transfer_coding_list[0],
979 sizeof transfer_coding_list[0],
980 (int (*)(const void *, const void *)) strcasecmp);
981 if (!dir) {
982 if (html)
983 printf("<li class='warning'>");
984 printf(" Transfer-Encoding '%s':\n", name);
985 if (html)
986 printf("</li>\n");
987 lookup("unknowntransenc");
988 }
989 }
990
991 void header_upgrade(const char *s)
992 {
993 int r;
994 r = regexec(&re_upgrade, s, 0, 0, 0);
995 if (r)
996 lookup("badupgrade");
997 else
998 lookup("ok");
999 }
1000
1001 void header_vary(const char *s)
1002 {
1003 if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
1004 lookup("ok");
1005 else
1006 lookup("badvary");
1007 }
1008
1009 void header_via(const char *s)
1010 {
1011 UNUSED(s);
1012 lookup("via");
1013 }
1014
1015 /* http://wp.netscape.com/newsref/std/cookie_spec.html */
1016 void header_set_cookie(const char *s)
1017 {
1018 bool ok = true;
1019 int r;
1020 const char *semi = strchr(s, ';');
1021 const char *s2;
1022 struct tm tm;
1023 double diff;
1024 time_t time0, time1;
1025 regmatch_t pmatch[20];
1026
1027 if (semi)
1028 s2 = strndup(s, semi - s);
1029 else
1030 s2 = s;
1031
1032 r = regexec(&re_cookie_nameval, s2, 0, 0, 0);
1033 if (r) {
1034 lookup("cookiebadnameval");
1035 ok = false;
1036 }
1037
1038 if (!semi)
1039 return;
1040
1041 s = skip_lws(semi + 1);
1042
1043 while (*s) {
1044 semi = strchr(s, ';');
1045 if (semi)
1046 s2 = strndup(s, semi - s);
1047 else
1048 s2 = s;
1049
1050 if (strncasecmp(s2, "expires=", 8) == 0) {
1051 s2 += 8;
1052 r = regexec(&re_cookie_expires, s2, 20, pmatch, 0);
1053 if (r == 0) {
1054 tm.tm_mday = atoi(s2 + pmatch[2].rm_so);
1055 tm.tm_mon = month(s2 + pmatch[3].rm_so);
1056 tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900;
1057 tm.tm_hour = atoi(s2 + pmatch[5].rm_so);
1058 tm.tm_min = atoi(s2 + pmatch[6].rm_so);
1059 tm.tm_sec = atoi(s2 + pmatch[7].rm_so);
1060
1061 time0 = time(0);
1062 time1 = mktime_from_utc(&tm);
1063
1064 diff = difftime(time0, time1);
1065 if (10 < diff) {
1066 lookup("cookiepastdate");
1067 ok = false;
1068 }
1069 } else {
1070 lookup("cookiebaddate");
1071 ok = false;
1072 }
1073 } else if (strncasecmp(s2, "domain=", 7) == 0) {
1074 } else if (strncasecmp(s2, "path=", 5) == 0) {
1075 if (s2[5] != '/') {
1076 lookup("cookiebadpath");
1077 ok = false;
1078 }
1079 } else if (strcasecmp(s, "secure") == 0) {
1080 } else {
1081 if (html)
1082 printf("<li class='warning'>");
1083 printf(" Set-Cookie field '%s':\n", s2);
1084 if (html)
1085 printf("</li>\n");
1086 lookup("cookieunknownfield");
1087 ok = false;
1088 }
1089
1090 if (semi)
1091 s = skip_lws(semi + 1);
1092 else
1093 break;
1094 }
1095
1096 if (ok)
1097 lookup("ok");
1098 }
1099
1100
1101 /**
1102 * Print an error message and exit.
1103 */
1104 void die(const char *error)
1105 {
1106 fprintf(stderr, "httplint: %s\n", error);
1107 exit(EXIT_FAILURE);
1108 }
1109
1110
1111 /**
1112 * Print a string which contains control characters.
1113 */
1114 void print(const char *s, size_t len)
1115 {
1116 size_t i;
1117 for (i = 0; i != len; i++) {
1118 if (html && s[i] == '<')
1119 printf("&lt;");
1120 else if (html && s[i] == '>')
1121 printf("&gt;");
1122 else if (html && s[i] == '&')
1123 printf("&amp;");
1124 else if (31 < s[i] && s[i] < 127)
1125 putchar(s[i]);
1126 else {
1127 if (html)
1128 printf("<span class='cc'>");
1129 printf("[%.2x]", s[i]);
1130 if (html)
1131 printf("</span>");
1132 }
1133 }
1134 }
1135
1136
1137 struct message_entry {
1138 const char key[20];
1139 const char *value;
1140 } message_table[] = {
1141 { "1xx", "A response status code in the range 100 - 199 indicates a "
1142 "'provisional response'." },
1143 { "2xx", "A response status code in the range 200 - 299 indicates that "
1144 "the request was successful." },
1145 { "3xx", "A response status code in the range 300 - 399 indicates that "
1146 "the client should redirect to a new URL." },
1147 { "4xx", "A response status code in the range 400 - 499 indicates that "
1148 "the request could not be fulfilled due to client error." },
1149 { "5xx", "A response status code in the range 500 - 599 indicates that "
1150 "an error occurred on the server." },
1151 { "asctime", "Warning: This date is in the obsolete asctime() format. "
1152 "Consider using the RFC 1123 format instead." },
1153 { "badage", "Error: The Age header must be one number." },
1154 { "badallow", "Error: The Allow header must be a comma-separated list of "
1155 "HTTP methods." },
1156 { "badcachecont", "Error: The Cache-Control header must be a "
1157 "comma-separated list of directives." },
1158 { "badconnection", "Warning: The only value of the Connection header "
1159 "defined by HTTP/1.1 is \"close\"." },
1160 { "badcontenc", "Error: The Content-Encoding header must be a "
1161 "comma-separated list of encodings." },
1162 { "badcontenttype", "Error: The Content-Type header must be of the form "
1163 "'type/subtype (; optional parameters)'." },
1164 { "badcontlang", "Error: The Content-Language header must be a "
1165 "comma-separated list of language tags." },
1166 { "badcontlen", "Error: The Content-Length header must be a number." },
1167 { "badcontloc", "Error: The Content-Location header must be an absolute "
1168 "or relative URI." },
1169 { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1170 "MD5 sum." },
1171 { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1172 "1123 format." },
1173 { "badetag", "Error: The ETag header must be a quoted string (optionally "
1174 "preceded by \"W/\" for a weak tag)." },
1175 { "badlocation", "Error: The Location header must be an absolute URI. "
1176 "Relative URIs are not permitted." },
1177 { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1178 "directives." },
1179 { "badserver", "Error: The Server header must be a space-separated list of "
1180 "products of the form Name/optional-version and comments "
1181 "in ()." },
1182 { "badstatus", "Warning: The response status code is outside the standard "
1183 "range 100 - 599." },
1184 { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1185 "status line must be of the form 'HTTP/n.n <3-digit "
1186 "status> <reason phrase>'." },
1187 { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1188 "of header names." },
1189 { "badtransenc", "Error: The Transfer-Encoding header must be a "
1190 "comma-separated of encodings." },
1191 { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1192 "of product identifiers." },
1193 { "badvary", "Error: The Vary header must be a comma-separated list "
1194 "of header names, or \"*\"." },
1195 { "contentrange", "Warning: The Content-Range header should not be returned "
1196 "by the server for this request." },
1197 { "cookiebaddate", "Error: The expires date must be in the form "
1198 "\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." },
1199 { "cookiebadnameval", "Error: A Set-Cookie header must start with "
1200 "name=value, each excluding semi-colon, comma and "
1201 "white space." },
1202 { "cookiebadpath", "Error: The path does not start with \"/\"." },
1203 { "cookiepastdate", "Warning: The expires date is in the past. The cookie "
1204 "will be deleted by browsers." },
1205 { "cookieunknownfield", "Warning: This is not a standard Set-Cookie "
1206 "field." },
1207 { "endofheaders", "End of headers." },
1208 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1209 "version of this tool." },
1210 { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1211 "the future." },
1212 { "headertoolong", "Warning: Header too long: ignored." },
1213 { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1214 { "missingcontenttype", "Warning: No Content-Type header was present. The "
1215 "client will have to guess the media type or ask "
1216 "the user. Adding a Content-Type header is strongly "
1217 "recommended." },
1218 { "missingcontlang", "Consider adding a Content-Language header if "
1219 "applicable for this document." },
1220 { "missingdate", "Warning: No Date header was present. A Date header must "
1221 "be present, unless the server does not have a clock, or "
1222 "the response is 100, 101, or 500 - 599." },
1223 { "missinglastmod", "No Last-Modified header was present. The "
1224 "HTTP/1.1 specification states that this header should "
1225 "be sent whenever feasible." },
1226 { "nocharset", "Warning: No character set is specified in the Content-Type. "
1227 "Clients may assume the default of ISO-8859-1. Consider "
1228 "appending '; charset=...'." },
1229 { "nonstandard", "Warning: I don't know anything about this header. Is it "
1230 "a standard HTTP response header?" },
1231 { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1232 "that all header lines end with CR LF." },
1233 { "ok", "OK." },
1234 { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1235 "to HTTP/1.1." },
1236 { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1237 "Consider using the RFC 1123 format instead." },
1238 { "ugly", "This URL appears to contain implementation-specific parts such "
1239 "as an extension or a query string. This may make the URL liable "
1240 "to change when the implementation is changed, resulting in "
1241 "broken links. Consider using URL rewriting or equivalent to "
1242 "implement a future-proof URL space. See "
1243 "http://www.w3.org/Provider/Style/URI for more information." },
1244 { "unknowncachecont", "Warning: This Cache-Control directive is "
1245 "non-standard and will have limited support." },
1246 { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1247 { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1248 "range." },
1249 { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1250 { "via", "This header was added by a proxy, cache or gateway." },
1251 { "wrongdate", "Warning: The server date-time differs from this system's "
1252 "date-time by more than 10 seconds. Check that both the "
1253 "system clocks are correct." },
1254 { "xheader", "This is an extension header. I don't know how to check it." }
1255 };
1256
1257
1258 /**
1259 * Look up and output the string referenced by a key.
1260 */
1261 void lookup(const char *key)
1262 {
1263 const char *s, *spc;
1264 int x;
1265 struct message_entry *message;
1266
1267 message = bsearch(key, message_table,
1268 sizeof message_table / sizeof message_table[0],
1269 sizeof message_table[0],
1270 (int (*)(const void *, const void *)) strcasecmp);
1271 if (message)
1272 s = message->value;
1273 else
1274 s = key;
1275
1276 if (html) {
1277 if (strncmp(s, "Warning:", 8) == 0)
1278 printf("<li class='warning'>");
1279 else if (strncmp(s, "Error:", 6) == 0)
1280 printf("<li class='error'>");
1281 else if (strncmp(s, "OK", 2) == 0)
1282 printf("<li class='ok'>");
1283 else
1284 printf("<li>");
1285 for (; *s; s++) {
1286 if (strncmp(s, "http://", 7) == 0) {
1287 spc = strchr(s, ' ');
1288 printf("<a href='%.*s'>%.*s</a>", spc - s, s, spc - s, s);
1289 s = spc;
1290 }
1291 switch (*s) {
1292 case '<': printf("&lt;"); break;
1293 case '>': printf("&gt;"); break;
1294 case '&': printf("&amp;"); break;
1295 default: printf("%c", *s); break;
1296 }
1297 }
1298 printf("</li>\n");
1299
1300 } else {
1301 printf(" ");
1302 x = 4;
1303 while (*s) {
1304 spc = strchr(s, ' ');
1305 if (!spc)
1306 spc = s + strlen(s);
1307 if (75 < x + (spc - s)) {
1308 printf("\n ");
1309 x = 4;
1310 }
1311 x += spc - s + 1;
1312 printf("%.*s ", spc - s, s);
1313 if (*spc)
1314 s = spc + 1;
1315 else
1316 s = spc;
1317 }
1318 printf("\n\n");
1319 }
1320 }
1321

  ViewVC Help
Powered by ViewVC 1.1.26