/[james]/httplint/httplint.c
ViewVC logotype

Annotation of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 59 - (hide annotations) (download) (as text)
Mon Apr 5 11:16:27 2004 UTC (20 years, 9 months ago) by james
File MIME type: text/x-csrc
File size: 35151 byte(s)
Fix summer time bugs.

1 james 40 /*
2     * HTTP Header Lint
3 james 50 * Licensed under the MIT License
4     * http://www.opensource.org/licenses/mit-license
5     * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
6 james 40 */
7    
8     /*
9     * Compile using
10     * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11     *
12     * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13     */
14    
15     #define _GNU_SOURCE
16     #define __USE_XOPEN
17    
18     #include <limits.h>
19     #include <math.h>
20     #include <stdbool.h>
21     #include <stdio.h>
22     #include <stdlib.h>
23     #include <string.h>
24     #include <time.h>
25     #include <sys/types.h>
26     #include <regex.h>
27     #include <curl/curl.h>
28    
29    
30     #define NUMBER "0123456789"
31     #define UNUSED(x) x = x
32    
33    
34     bool start;
35 james 56 bool html = false;
36 james 40 CURL *curl;
37     int status_code;
38     char error_buffer[CURL_ERROR_SIZE];
39     regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
40 james 41 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
41 james 48 re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires;
42 james 40
43    
44     void init(void);
45     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
46     void check_url(const char *url);
47     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
48     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
49     void check_status_line(const char *s);
50     void check_header(const char *name, const char *value);
51     bool parse_date(const char *s, struct tm *tm);
52 james 41 int month(const char *s);
53 james 42 time_t mktime_from_utc(struct tm *t);
54 james 40 const char *skip_lws(const char *s);
55     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
56     void (*callback)(const char *s, regmatch_t pmatch[]));
57     void header_accept_ranges(const char *s);
58     void header_age(const char *s);
59     void header_allow(const char *s);
60     void header_cache_control(const char *s);
61     void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
62     void header_connection(const char *s);
63     void header_content_encoding(const char *s);
64     void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
65     void header_content_language(const char *s);
66     void header_content_length(const char *s);
67     void header_content_location(const char *s);
68     void header_content_md5(const char *s);
69     void header_content_range(const char *s);
70     void header_content_type(const char *s);
71     void header_date(const char *s);
72     void header_etag(const char *s);
73     void header_expires(const char *s);
74     void header_last_modified(const char *s);
75     void header_location(const char *s);
76     void header_pragma(const char *s);
77     void header_retry_after(const char *s);
78     void header_server(const char *s);
79     void header_trailer(const char *s);
80     void header_transfer_encoding(const char *s);
81     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
82     void header_upgrade(const char *s);
83     void header_vary(const char *s);
84     void header_via(const char *s);
85 james 48 void header_set_cookie(const char *s);
86 james 40 void die(const char *error);
87     void print(const char *s, size_t len);
88     void lookup(const char *key);
89    
90    
91     struct header_entry {
92     char name[40];
93     void (*handler)(const char *s);
94     int count;
95     char *missing;
96     } header_table[] = {
97     { "Accept-Ranges", header_accept_ranges, 0, 0 },
98     { "Age", header_age, 0, 0 },
99     { "Allow", header_allow, 0, 0 },
100     { "Cache-Control", header_cache_control, 0, 0 },
101     { "Connection", header_connection, 0, 0 },
102     { "Content-Encoding", header_content_encoding, 0, 0 },
103     { "Content-Language", header_content_language, 0, "missingcontlang" },
104     { "Content-Length", header_content_length, 0, 0 },
105     { "Content-Location", header_content_location, 0, 0 },
106     { "Content-MD5", header_content_md5, 0, 0 },
107     { "Content-Range", header_content_range, 0, 0 },
108     { "Content-Type", header_content_type, 0, "missingcontenttype" },
109     { "Date", header_date, 0, "missingdate" },
110     { "ETag", header_etag, 0, 0 },
111     { "Expires", header_expires, 0, 0 },
112     { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113     { "Location", header_location, 0, 0 },
114     { "Pragma", header_pragma, 0, 0 },
115     { "Retry-After", header_retry_after, 0, 0 },
116     { "Server", header_server, 0, 0 },
117 james 48 { "Set-Cookie", header_set_cookie, 0, 0 },
118 james 40 { "Trailer", header_trailer, 0, 0 },
119     { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
120     { "Upgrade", header_upgrade, 0, 0 },
121     { "Vary", header_vary, 0, 0 },
122     { "Via", header_via, 0, 0 }
123     };
124    
125    
126     /**
127     * Main entry point.
128     */
129     int main(int argc, char *argv[])
130     {
131 james 56 int i = 1;
132 james 40
133     if (argc < 2)
134 james 56 die("Usage: httplint [--html] url [url ...]");
135 james 40
136     init();
137    
138 james 56 if (1 < argc && strcmp(argv[1], "--html") == 0) {
139     html = true;
140     i++;
141     }
142    
143     for (; i != argc; i++)
144 james 40 check_url(argv[i]);
145    
146     curl_global_cleanup();
147    
148     return 0;
149     }
150    
151    
152     /**
153     * Initialise the curl handle and compile regular expressions.
154     */
155     void init(void)
156     {
157     struct curl_slist *request_headers = 0;
158    
159     if (curl_global_init(CURL_GLOBAL_ALL))
160     die("Failed to initialise libcurl");
161    
162     curl = curl_easy_init();
163     if (!curl)
164     die("Failed to create curl handle");
165    
166     if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
167     die("Failed to set curl options");
168     if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
169     die("Failed to set curl options");
170     if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
171     die("Failed to set curl options");
172     if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
173     die("Failed to set curl options");
174    
175     /* remove libcurl default headers */
176     request_headers = curl_slist_append(request_headers, "Accept:");
177     request_headers = curl_slist_append(request_headers, "Pragma:");
178     if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
179     die("Failed to set curl options");
180    
181     /* compile regular expressions */
182     regcomp_wrapper(&re_status_line,
183     "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
184     REG_EXTENDED);
185     regcomp_wrapper(&re_token,
186 james 44 "^([-0-9a-zA-Z_.!]+)",
187 james 40 REG_EXTENDED);
188     regcomp_wrapper(&re_token_value,
189 james 44 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
190 james 40 REG_EXTENDED);
191     regcomp_wrapper(&re_content_type,
192     "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
193     "(;[ \t]*([-0-9a-zA-Z_.]+)="
194     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
195     REG_EXTENDED);
196     regcomp_wrapper(&re_absolute_uri,
197     "^[a-zA-Z0-9]+://[^ ]+$",
198     REG_EXTENDED);
199     regcomp_wrapper(&re_etag,
200     "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
201     REG_EXTENDED);
202     regcomp_wrapper(&re_server,
203 james 44 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
204 james 40 REG_EXTENDED);
205     regcomp_wrapper(&re_transfer_coding,
206     "^([-0-9a-zA-Z_.]+)[ \t]*"
207     "(;[ \t]*([-0-9a-zA-Z_.]+)="
208     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
209     REG_EXTENDED);
210     regcomp_wrapper(&re_upgrade,
211     "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
212     REG_EXTENDED);
213     regcomp_wrapper(&re_ugly,
214 james 43 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
215 james 40 REG_EXTENDED);
216 james 41 regcomp_wrapper(&re_rfc1123,
217     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
218     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
219     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
220     REG_EXTENDED);
221     regcomp_wrapper(&re_rfc1036,
222     "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
223     "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
224     "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
225     REG_EXTENDED);
226     regcomp_wrapper(&re_asctime,
227     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
228     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
229     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
230     REG_EXTENDED);
231 james 48 regcomp_wrapper(&re_cookie_nameval,
232     "^[^;, ]+=[^;, ]*$",
233     REG_EXTENDED);
234     regcomp_wrapper(&re_cookie_expires,
235     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-"
236     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) "
237     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
238     REG_EXTENDED);
239 james 40 }
240    
241    
242     /**
243     * Compile a regular expression, handling errors.
244     */
245     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
246     {
247     char errbuf[200];
248     int r;
249     r = regcomp(preg, regex, cflags);
250     if (r) {
251     regerror(r, preg, errbuf, sizeof errbuf);
252     fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
253     die(errbuf);
254     }
255     }
256    
257    
258     /**
259     * Fetch and check the headers for the specified url.
260     */
261     void check_url(const char *url)
262     {
263     int i, r;
264     CURLcode code;
265    
266     start = true;
267     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
268     header_table[i].count = 0;
269    
270 james 56 if (!html)
271     printf("Checking URL %s\n", url);
272     if (strncmp(url, "http", 4)) {
273     if (html)
274     printf("<p class='warning'>");
275     printf("Warning: this is not an http or https url");
276     if (html)
277     printf("</p>");
278     printf("\n");
279     }
280 james 40
281     if (curl_easy_setopt(curl, CURLOPT_URL, url))
282     die("Failed to set curl options");
283    
284 james 56 if (html)
285     printf("<ul>\n");
286 james 40 code = curl_easy_perform(curl);
287 james 56 if (html)
288     printf("</ul>\n");
289 james 40 if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
290 james 56 if (html)
291     printf("<p class='error'>");
292     printf("Error: ");
293     print(error_buffer, strlen(error_buffer));
294     printf(".");
295     if (html)
296     printf("</p>");
297     printf("\n");
298 james 40 return;
299     } else {
300     printf("\n");
301 james 56 if (html)
302     printf("<ul>");
303 james 40 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
304     if (header_table[i].count == 0 && header_table[i].missing)
305     lookup(header_table[i].missing);
306     }
307     }
308    
309     r = regexec(&re_ugly, url, 0, 0, 0);
310     if (r)
311     lookup("ugly");
312 james 56
313     if (html)
314     printf("</ul>");
315 james 40 }
316    
317    
318     /**
319     * Callback for received header data.
320     */
321     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
322     {
323     const size_t size = msize * nmemb;
324     char s[400], *name, *value;
325    
326     UNUSED(stream);
327    
328 james 56 printf(html ? "<li><code>" : "* ");
329 james 40 print(ptr, size);
330 james 56 printf(html ? "</code><ul>" : "\n");
331 james 40
332     if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
333     lookup("notcrlf");
334 james 56 if (html)
335     printf("</ul></li>\n");
336 james 40 return size;
337     }
338     if (sizeof s <= size) {
339 james 56 lookup("headertoolong");
340     if (html)
341     printf("</ul></li>\n");
342 james 40 return size;
343     }
344     strncpy(s, ptr, size);
345     s[size - 2] = 0;
346    
347     name = s;
348     value = strchr(s, ':');
349    
350     if (s[0] == 0) {
351     /* empty header indicates end of headers */
352 james 56 lookup("endofheaders");
353     if (html)
354     printf("</ul></li>\n");
355 james 40 return 0;
356    
357     } else if (start) {
358     /* Status-Line [6.1] */
359     check_status_line(s);
360     start = false;
361    
362     } else if (!value) {
363     lookup("missingcolon");
364    
365     } else {
366     *value = 0;
367     value++;
368    
369     check_header(name, skip_lws(value));
370     }
371    
372 james 56 if (html)
373     printf("</ul></li>\n");
374 james 40 return size;
375     }
376    
377    
378     /**
379     * Callback for received body data.
380     *
381     * We are not interested in the body, so abort the fetch by returning 0.
382     */
383     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
384     {
385     UNUSED(ptr);
386     UNUSED(size);
387     UNUSED(nmemb);
388     UNUSED(stream);
389    
390     return 0;
391     }
392    
393    
394     /**
395     * Check the syntax and content of the response Status-Line [6.1].
396     */
397     void check_status_line(const char *s)
398     {
399     const char *reason;
400     unsigned int major = 0, minor = 0;
401     int r;
402     regmatch_t pmatch[5];
403    
404     r = regexec(&re_status_line, s, 5, pmatch, 0);
405     if (r) {
406     lookup("badstatusline");
407     return;
408     }
409    
410     major = atoi(s + pmatch[1].rm_so);
411     minor = atoi(s + pmatch[2].rm_so);
412     status_code = atoi(s + pmatch[3].rm_so);
413     reason = s + pmatch[4].rm_so;
414    
415     if (major < 1 || (major == 1 && minor == 0)) {
416     lookup("oldhttp");
417     } else if ((major == 1 && 1 < minor) || 1 < major) {
418     lookup("futurehttp");
419     } else {
420     if (status_code < 100 || 600 <= status_code) {
421     lookup("badstatus");
422     } else {
423     char key[] = "xxx";
424     key[0] = '0' + status_code / 100;
425     lookup(key);
426     }
427     }
428     }
429    
430    
431     /**
432     * Check the syntax and content of a header.
433     */
434     void check_header(const char *name, const char *value)
435     {
436     struct header_entry *header;
437    
438     header = bsearch(name, header_table,
439     sizeof header_table / sizeof header_table[0],
440     sizeof header_table[0],
441     (int (*)(const void *, const void *)) strcasecmp);
442    
443     if (header) {
444     header->count++;
445     header->handler(value);
446 james 44 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
447     lookup("xheader");
448     } else {
449 james 40 lookup("nonstandard");
450 james 44 }
451 james 40 }
452    
453    
454     /**
455     * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
456     */
457     bool parse_date(const char *s, struct tm *tm)
458     {
459 james 41 int r;
460 james 40 int len = strlen(s);
461 james 41 regmatch_t pmatch[20];
462 james 40
463 james 59 tm->tm_isdst = 0;
464     tm->tm_gmtoff = 0;
465     tm->tm_zone = "GMT";
466    
467 james 40 if (len == 29) {
468     /* RFC 1123 */
469 james 41 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
470     if (r == 0) {
471     tm->tm_mday = atoi(s + pmatch[2].rm_so);
472     tm->tm_mon = month(s + pmatch[3].rm_so);
473     tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
474     tm->tm_hour = atoi(s + pmatch[5].rm_so);
475     tm->tm_min = atoi(s + pmatch[6].rm_so);
476     tm->tm_sec = atoi(s + pmatch[7].rm_so);
477 james 40 return true;
478 james 41 }
479 james 40
480     } else if (len == 24) {
481     /* asctime() format */
482 james 41 r = regexec(&re_asctime, s, 20, pmatch, 0);
483     if (r == 0) {
484     if (s[pmatch[3].rm_so] == ' ')
485     tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
486     else
487     tm->tm_mday = atoi(s + pmatch[3].rm_so);
488     tm->tm_mon = month(s + pmatch[2].rm_so);
489     tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
490     tm->tm_hour = atoi(s + pmatch[4].rm_so);
491     tm->tm_min = atoi(s + pmatch[5].rm_so);
492     tm->tm_sec = atoi(s + pmatch[6].rm_so);
493 james 40 lookup("asctime");
494     return true;
495     }
496    
497     } else {
498     /* RFC 1036 */
499 james 41 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
500     if (r == 0) {
501     tm->tm_mday = atoi(s + pmatch[2].rm_so);
502     tm->tm_mon = month(s + pmatch[3].rm_so);
503     tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
504     tm->tm_hour = atoi(s + pmatch[5].rm_so);
505     tm->tm_min = atoi(s + pmatch[6].rm_so);
506     tm->tm_sec = atoi(s + pmatch[7].rm_so);
507 james 40 lookup("rfc1036");
508     return true;
509     }
510    
511     }
512    
513     lookup("baddate");
514     return false;
515     }
516    
517    
518     /**
519 james 41 * Convert a month name to the month number.
520     */
521     int month(const char *s)
522     {
523     switch (s[0]) {
524     case 'J':
525     switch (s[1]) {
526     case 'a':
527     return 0;
528     case 'u':
529     return s[2] == 'n' ? 5 : 6;
530     }
531     case 'F':
532     return 1;
533     case 'M':
534     return s[2] == 'r' ? 2 : 4;
535     case 'A':
536     return s[1] == 'p' ? 3 : 7;
537     case 'S':
538     return 8;
539     case 'O':
540     return 9;
541     case 'N':
542     return 10;
543     case 'D':
544     return 11;
545     }
546     return 0;
547     }
548    
549    
550     /**
551 james 42 * UTC version of mktime, from
552     * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
553     */
554     time_t mktime_from_utc(struct tm *t)
555     {
556     time_t tl, tb;
557     struct tm *tg;
558    
559     tl = mktime (t);
560     if (tl == -1)
561     {
562     t->tm_hour--;
563     tl = mktime (t);
564     if (tl == -1)
565     return -1; /* can't deal with output from strptime */
566     tl += 3600;
567     }
568     tg = gmtime (&tl);
569     tg->tm_isdst = 0;
570     tb = mktime (tg);
571     if (tb == -1)
572     {
573     tg->tm_hour--;
574     tb = mktime (tg);
575     if (tb == -1)
576     return -1; /* can't deal with output from gmtime */
577     tb += 3600;
578     }
579     return (tl - (tb - tl));
580     }
581    
582    
583     /**
584 james 40 * Skip optional LWS (linear white space) [2.2]
585     */
586     const char *skip_lws(const char *s)
587     {
588     if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
589     s += 2;
590     while (*s == ' ' || *s == '\t')
591     s++;
592     return s;
593     }
594    
595    
596     /**
597     * Parse a list of elements (#rule in [2.1]).
598     */
599     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
600     void (*callback)(const char *s, regmatch_t pmatch[]))
601     {
602     int r;
603     unsigned int items = 0;
604     regmatch_t pmatch[20];
605    
606     do {
607     r = regexec(preg, s, 20, pmatch, 0);
608     if (r) {
609 james 56 if (html)
610     printf("<li class='error'>");
611 james 40 printf(" Failed to match list item %i\n", items + 1);
612 james 56 if (html)
613     printf("</li>\n");
614 james 40 return false;
615     }
616    
617     if (callback)
618     callback(s, pmatch);
619     items++;
620    
621     s += pmatch[0].rm_eo;
622     s = skip_lws(s);
623     if (*s == 0)
624     break;
625     if (*s != ',') {
626 james 56 if (html)
627     printf("<li class='error'>");
628 james 40 printf(" Expecting , after list item %i\n", items);
629 james 56 if (html)
630     printf("</li>\n");
631 james 40 return false;
632     }
633     while (*s == ',')
634     s = skip_lws(s + 1);
635     } while (*s != 0);
636    
637     if (items < n || m < items) {
638 james 56 if (html)
639     printf("<li class='error'>");
640 james 40 printf(" %i items in list, but there should be ", items);
641     if (m == UINT_MAX)
642     printf("at least %i\n", n);
643     else
644     printf("between %i and %i\n", n, m);
645 james 56 if (html)
646     printf("</li>\n");
647 james 40 return false;
648     }
649    
650     return true;
651     }
652    
653    
654     /* Header-specific validation. */
655     void header_accept_ranges(const char *s)
656     {
657     if (strcmp(s, "bytes") == 0)
658     lookup("ok");
659     else if (strcmp(s, "none") == 0)
660     lookup("ok");
661     else
662     lookup("unknownrange");
663     }
664    
665     void header_age(const char *s)
666     {
667     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
668     lookup("badage");
669     else
670     lookup("ok");
671     }
672    
673     void header_allow(const char *s)
674     {
675     if (parse_list(s, &re_token, 0, UINT_MAX, 0))
676     lookup("ok");
677     else
678     lookup("badallow");
679     }
680    
681     void header_cache_control(const char *s)
682     {
683     if (parse_list(s, &re_token_value, 1, UINT_MAX,
684     header_cache_control_callback))
685     lookup("ok");
686     else
687     lookup("badcachecont");
688     }
689    
690     char cache_control_list[][20] = {
691     "max-age", "max-stale", "min-fresh", "must-revalidate",
692     "no-cache", "no-store", "no-transform", "only-if-cached",
693     "private", "proxy-revalidate", "public", "s-maxage"
694     };
695    
696     void header_cache_control_callback(const char *s, regmatch_t pmatch[])
697     {
698     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
699     char name[20];
700     char *dir;
701    
702     if (19 < len) {
703     lookup("unknowncachecont");
704     return;
705     }
706    
707     strncpy(name, s + pmatch[1].rm_so, len);
708     name[len] = 0;
709    
710     dir = bsearch(name, cache_control_list,
711     sizeof cache_control_list / sizeof cache_control_list[0],
712     sizeof cache_control_list[0],
713     (int (*)(const void *, const void *)) strcasecmp);
714    
715     if (!dir) {
716 james 56 if (html)
717     printf("<li class='warning'>");
718     printf(" Cache-Control directive '");
719     print(name, strlen(name));
720     printf("':\n");
721     if (html)
722     printf("</li>\n");
723 james 40 lookup("unknowncachecont");
724     }
725     }
726    
727     void header_connection(const char *s)
728     {
729     if (strcmp(s, "close") == 0)
730     lookup("ok");
731     else
732     lookup("badconnection");
733     }
734    
735     void header_content_encoding(const char *s)
736     {
737     if (parse_list(s, &re_token, 1, UINT_MAX,
738     header_content_encoding_callback))
739     lookup("ok");
740     else
741     lookup("badcontenc");
742     }
743    
744     char content_coding_list[][20] = {
745     "compress", "deflate", "gzip", "identity"
746     };
747    
748     void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
749     {
750     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
751     char name[20];
752     char *dir;
753    
754     if (19 < len) {
755     lookup("unknowncontenc");
756     return;
757     }
758    
759     strncpy(name, s + pmatch[1].rm_so, len);
760     name[len] = 0;
761    
762     dir = bsearch(name, content_coding_list,
763     sizeof content_coding_list / sizeof content_coding_list[0],
764     sizeof content_coding_list[0],
765     (int (*)(const void *, const void *)) strcasecmp);
766     if (!dir) {
767 james 56 if (html)
768     printf("<li class='warning'>");
769 james 40 printf(" Content-Encoding '%s':\n", name);
770 james 56 if (html)
771     printf("</li>\n");
772 james 40 lookup("unknowncontenc");
773     }
774     }
775    
776     void header_content_language(const char *s)
777     {
778     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
779     lookup("ok");
780     else
781     lookup("badcontlang");
782     }
783    
784     void header_content_length(const char *s)
785     {
786     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
787     lookup("badcontlen");
788     else
789     lookup("ok");
790     }
791    
792     void header_content_location(const char *s)
793     {
794     if (strchr(s, ' '))
795     lookup("badcontloc");
796     else
797     lookup("ok");
798     }
799    
800     void header_content_md5(const char *s)
801     {
802     if (strlen(s) != 24)
803     lookup("badcontmd5");
804     else
805     lookup("ok");
806     }
807    
808     void header_content_range(const char *s)
809     {
810     UNUSED(s);
811     lookup("contentrange");
812     }
813    
814     void header_content_type(const char *s)
815     {
816     bool charset = false;
817     char *type, *subtype;
818     unsigned int i;
819     int r;
820     regmatch_t pmatch[30];
821    
822     r = regexec(&re_content_type, s, 30, pmatch, 0);
823     if (r) {
824     lookup("badcontenttype");
825     return;
826     }
827    
828     type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
829     subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
830    
831     /* parameters */
832     for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
833     char *attrib, *value;
834    
835     attrib = strndup(s + pmatch[i + 1].rm_so,
836     pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
837     value = strndup(s + pmatch[i + 2].rm_so,
838     pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
839    
840     if (strcasecmp(attrib, "charset") == 0)
841     charset = true;
842     }
843    
844     if (strcasecmp(type, "text") == 0 && !charset)
845     lookup("nocharset");
846     else
847     lookup("ok");
848     }
849    
850     void header_date(const char *s)
851     {
852     double diff;
853     time_t time0, time1;
854     struct tm tm;
855    
856     time0 = time(0);
857     if (!parse_date(s, &tm))
858     return;
859 james 42 time1 = mktime_from_utc(&tm);
860 james 40
861     diff = difftime(time0, time1);
862     if (10 < fabs(diff))
863     lookup("wrongdate");
864     else
865     lookup("ok");
866     }
867    
868     void header_etag(const char *s)
869     {
870     int r;
871     r = regexec(&re_etag, s, 0, 0, 0);
872     if (r)
873     lookup("badetag");
874     else
875     lookup("ok");
876     }
877    
878     void header_expires(const char *s)
879     {
880     struct tm tm;
881     if (parse_date(s, &tm))
882     lookup("ok");
883     }
884    
885     void header_last_modified(const char *s)
886     {
887     double diff;
888     time_t time0, time1;
889     struct tm tm;
890    
891     time0 = time(0);
892     if (!parse_date(s, &tm))
893     return;
894 james 44 time1 = mktime_from_utc(&tm);
895 james 40
896     diff = difftime(time1, time0);
897     if (10 < diff)
898     lookup("futurelastmod");
899     else
900     lookup("ok");
901     }
902    
903     void header_location(const char *s)
904     {
905     int r;
906     r = regexec(&re_absolute_uri, s, 0, 0, 0);
907     if (r)
908     lookup("badlocation");
909     else
910     lookup("ok");
911     }
912    
913     void header_pragma(const char *s)
914     {
915     if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
916     lookup("ok");
917     else
918     lookup("badpragma");
919     }
920    
921     void header_retry_after(const char *s)
922     {
923     struct tm tm;
924    
925     if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
926     lookup("ok");
927     return;
928     }
929    
930     if (!parse_date(s, &tm))
931     return;
932    
933     lookup("ok");
934     }
935    
936     void header_server(const char *s)
937     {
938     int r;
939     r = regexec(&re_server, s, 0, 0, 0);
940     if (r)
941     lookup("badserver");
942     else
943     lookup("ok");
944     }
945    
946     void header_trailer(const char *s)
947     {
948     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
949     lookup("ok");
950     else
951     lookup("badtrailer");
952     }
953    
954     void header_transfer_encoding(const char *s)
955     {
956     if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
957     header_transfer_encoding_callback))
958     lookup("ok");
959     else
960     lookup("badtransenc");
961     }
962    
963     char transfer_coding_list[][20] = {
964     "chunked", "compress", "deflate", "gzip", "identity"
965     };
966    
967     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
968     {
969     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
970     char name[20];
971     char *dir;
972    
973     if (19 < len) {
974     lookup("unknowntransenc");
975     return;
976     }
977    
978     strncpy(name, s + pmatch[1].rm_so, len);
979     name[len] = 0;
980    
981     dir = bsearch(name, transfer_coding_list,
982     sizeof transfer_coding_list / sizeof transfer_coding_list[0],
983     sizeof transfer_coding_list[0],
984     (int (*)(const void *, const void *)) strcasecmp);
985     if (!dir) {
986 james 56 if (html)
987     printf("<li class='warning'>");
988 james 40 printf(" Transfer-Encoding '%s':\n", name);
989 james 56 if (html)
990     printf("</li>\n");
991 james 40 lookup("unknowntransenc");
992     }
993     }
994    
995     void header_upgrade(const char *s)
996     {
997     int r;
998     r = regexec(&re_upgrade, s, 0, 0, 0);
999     if (r)
1000     lookup("badupgrade");
1001     else
1002     lookup("ok");
1003     }
1004    
1005     void header_vary(const char *s)
1006     {
1007     if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
1008     lookup("ok");
1009     else
1010     lookup("badvary");
1011     }
1012    
1013     void header_via(const char *s)
1014     {
1015     UNUSED(s);
1016     lookup("via");
1017     }
1018    
1019 james 48 /* http://wp.netscape.com/newsref/std/cookie_spec.html */
1020     void header_set_cookie(const char *s)
1021     {
1022     bool ok = true;
1023     int r;
1024     const char *semi = strchr(s, ';');
1025     const char *s2;
1026     struct tm tm;
1027     double diff;
1028     time_t time0, time1;
1029     regmatch_t pmatch[20];
1030 james 40
1031 james 48 if (semi)
1032     s2 = strndup(s, semi - s);
1033     else
1034     s2 = s;
1035    
1036     r = regexec(&re_cookie_nameval, s2, 0, 0, 0);
1037     if (r) {
1038     lookup("cookiebadnameval");
1039     ok = false;
1040     }
1041 james 50
1042 james 48 if (!semi)
1043     return;
1044    
1045     s = skip_lws(semi + 1);
1046    
1047     while (*s) {
1048     semi = strchr(s, ';');
1049     if (semi)
1050     s2 = strndup(s, semi - s);
1051     else
1052     s2 = s;
1053    
1054 james 56 if (strncasecmp(s2, "expires=", 8) == 0) {
1055 james 48 s2 += 8;
1056     r = regexec(&re_cookie_expires, s2, 20, pmatch, 0);
1057     if (r == 0) {
1058     tm.tm_mday = atoi(s2 + pmatch[2].rm_so);
1059     tm.tm_mon = month(s2 + pmatch[3].rm_so);
1060     tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900;
1061     tm.tm_hour = atoi(s2 + pmatch[5].rm_so);
1062     tm.tm_min = atoi(s2 + pmatch[6].rm_so);
1063     tm.tm_sec = atoi(s2 + pmatch[7].rm_so);
1064    
1065     time0 = time(0);
1066     time1 = mktime_from_utc(&tm);
1067    
1068     diff = difftime(time0, time1);
1069     if (10 < diff) {
1070     lookup("cookiepastdate");
1071     ok = false;
1072     }
1073     } else {
1074     lookup("cookiebaddate");
1075     ok = false;
1076     }
1077 james 56 } else if (strncasecmp(s2, "domain=", 7) == 0) {
1078     } else if (strncasecmp(s2, "path=", 5) == 0) {
1079 james 48 if (s2[5] != '/') {
1080     lookup("cookiebadpath");
1081     ok = false;
1082     }
1083 james 56 } else if (strcasecmp(s, "secure") == 0) {
1084 james 48 } else {
1085 james 56 if (html)
1086     printf("<li class='warning'>");
1087 james 48 printf(" Set-Cookie field '%s':\n", s2);
1088 james 56 if (html)
1089     printf("</li>\n");
1090 james 48 lookup("cookieunknownfield");
1091     ok = false;
1092     }
1093    
1094     if (semi)
1095     s = skip_lws(semi + 1);
1096     else
1097     break;
1098     }
1099    
1100     if (ok)
1101     lookup("ok");
1102     }
1103    
1104    
1105 james 40 /**
1106     * Print an error message and exit.
1107     */
1108     void die(const char *error)
1109     {
1110     fprintf(stderr, "httplint: %s\n", error);
1111     exit(EXIT_FAILURE);
1112     }
1113    
1114    
1115     /**
1116     * Print a string which contains control characters.
1117     */
1118     void print(const char *s, size_t len)
1119     {
1120     size_t i;
1121     for (i = 0; i != len; i++) {
1122 james 56 if (html && s[i] == '<')
1123     printf("&lt;");
1124     else if (html && s[i] == '>')
1125     printf("&gt;");
1126     else if (html && s[i] == '&')
1127     printf("&amp;");
1128     else if (31 < s[i] && s[i] < 127)
1129 james 40 putchar(s[i]);
1130 james 56 else {
1131     if (html)
1132     printf("<span class='cc'>");
1133 james 40 printf("[%.2x]", s[i]);
1134 james 56 if (html)
1135     printf("</span>");
1136     }
1137 james 40 }
1138     }
1139    
1140    
1141     struct message_entry {
1142     const char key[20];
1143     const char *value;
1144     } message_table[] = {
1145     { "1xx", "A response status code in the range 100 - 199 indicates a "
1146     "'provisional response'." },
1147     { "2xx", "A response status code in the range 200 - 299 indicates that "
1148     "the request was successful." },
1149     { "3xx", "A response status code in the range 300 - 399 indicates that "
1150     "the client should redirect to a new URL." },
1151     { "4xx", "A response status code in the range 400 - 499 indicates that "
1152     "the request could not be fulfilled due to client error." },
1153     { "5xx", "A response status code in the range 500 - 599 indicates that "
1154     "an error occurred on the server." },
1155     { "asctime", "Warning: This date is in the obsolete asctime() format. "
1156     "Consider using the RFC 1123 format instead." },
1157     { "badage", "Error: The Age header must be one number." },
1158     { "badallow", "Error: The Allow header must be a comma-separated list of "
1159     "HTTP methods." },
1160     { "badcachecont", "Error: The Cache-Control header must be a "
1161     "comma-separated list of directives." },
1162     { "badconnection", "Warning: The only value of the Connection header "
1163     "defined by HTTP/1.1 is \"close\"." },
1164     { "badcontenc", "Error: The Content-Encoding header must be a "
1165     "comma-separated list of encodings." },
1166     { "badcontenttype", "Error: The Content-Type header must be of the form "
1167     "'type/subtype (; optional parameters)'." },
1168     { "badcontlang", "Error: The Content-Language header must be a "
1169     "comma-separated list of language tags." },
1170     { "badcontlen", "Error: The Content-Length header must be a number." },
1171     { "badcontloc", "Error: The Content-Location header must be an absolute "
1172     "or relative URI." },
1173     { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1174     "MD5 sum." },
1175     { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1176     "1123 format." },
1177     { "badetag", "Error: The ETag header must be a quoted string (optionally "
1178     "preceded by \"W/\" for a weak tag)." },
1179     { "badlocation", "Error: The Location header must be an absolute URI. "
1180     "Relative URIs are not permitted." },
1181     { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1182     "directives." },
1183     { "badserver", "Error: The Server header must be a space-separated list of "
1184     "products of the form Name/optional-version and comments "
1185     "in ()." },
1186     { "badstatus", "Warning: The response status code is outside the standard "
1187     "range 100 - 599." },
1188     { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1189     "status line must be of the form 'HTTP/n.n <3-digit "
1190     "status> <reason phrase>'." },
1191     { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1192     "of header names." },
1193     { "badtransenc", "Error: The Transfer-Encoding header must be a "
1194     "comma-separated of encodings." },
1195     { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1196     "of product identifiers." },
1197     { "badvary", "Error: The Vary header must be a comma-separated list "
1198     "of header names, or \"*\"." },
1199     { "contentrange", "Warning: The Content-Range header should not be returned "
1200     "by the server for this request." },
1201 james 48 { "cookiebaddate", "Error: The expires date must be in the form "
1202     "\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." },
1203     { "cookiebadnameval", "Error: A Set-Cookie header must start with "
1204     "name=value, each excluding semi-colon, comma and "
1205     "white space." },
1206     { "cookiebadpath", "Error: The path does not start with \"/\"." },
1207     { "cookiepastdate", "Warning: The expires date is in the past. The cookie "
1208     "will be deleted by browsers." },
1209     { "cookieunknownfield", "Warning: This is not a standard Set-Cookie "
1210     "field." },
1211 james 56 { "endofheaders", "End of headers." },
1212 james 40 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1213     "version of this tool." },
1214     { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1215     "the future." },
1216 james 56 { "headertoolong", "Warning: Header too long: ignored." },
1217 james 40 { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1218     { "missingcontenttype", "Warning: No Content-Type header was present. The "
1219     "client will have to guess the media type or ask "
1220     "the user. Adding a Content-Type header is strongly "
1221     "recommended." },
1222     { "missingcontlang", "Consider adding a Content-Language header if "
1223     "applicable for this document." },
1224     { "missingdate", "Warning: No Date header was present. A Date header must "
1225     "be present, unless the server does not have a clock, or "
1226     "the response is 100, 101, or 500 - 599." },
1227     { "missinglastmod", "No Last-Modified header was present. The "
1228     "HTTP/1.1 specification states that this header should "
1229     "be sent whenever feasible." },
1230     { "nocharset", "Warning: No character set is specified in the Content-Type. "
1231     "Clients may assume the default of ISO-8859-1. Consider "
1232     "appending '; charset=...'." },
1233     { "nonstandard", "Warning: I don't know anything about this header. Is it "
1234     "a standard HTTP response header?" },
1235     { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1236     "that all header lines end with CR LF." },
1237     { "ok", "OK." },
1238     { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1239     "to HTTP/1.1." },
1240     { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1241     "Consider using the RFC 1123 format instead." },
1242     { "ugly", "This URL appears to contain implementation-specific parts such "
1243     "as an extension or a query string. This may make the URL liable "
1244     "to change when the implementation is changed, resulting in "
1245     "broken links. Consider using URL rewriting or equivalent to "
1246     "implement a future-proof URL space. See "
1247     "http://www.w3.org/Provider/Style/URI for more information." },
1248     { "unknowncachecont", "Warning: This Cache-Control directive is "
1249     "non-standard and will have limited support." },
1250     { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1251     { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1252     "range." },
1253     { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1254     { "via", "This header was added by a proxy, cache or gateway." },
1255     { "wrongdate", "Warning: The server date-time differs from this system's "
1256     "date-time by more than 10 seconds. Check that both the "
1257 james 44 "system clocks are correct." },
1258     { "xheader", "This is an extension header. I don't know how to check it." }
1259 james 40 };
1260    
1261    
1262     /**
1263     * Look up and output the string referenced by a key.
1264     */
1265     void lookup(const char *key)
1266     {
1267     const char *s, *spc;
1268     int x;
1269     struct message_entry *message;
1270    
1271     message = bsearch(key, message_table,
1272     sizeof message_table / sizeof message_table[0],
1273     sizeof message_table[0],
1274     (int (*)(const void *, const void *)) strcasecmp);
1275     if (message)
1276     s = message->value;
1277     else
1278     s = key;
1279    
1280 james 56 if (html) {
1281     if (strncmp(s, "Warning:", 8) == 0)
1282     printf("<li class='warning'>");
1283     else if (strncmp(s, "Error:", 6) == 0)
1284     printf("<li class='error'>");
1285     else if (strncmp(s, "OK", 2) == 0)
1286     printf("<li class='ok'>");
1287     else
1288     printf("<li>");
1289     for (; *s; s++) {
1290     if (strncmp(s, "http://", 7) == 0) {
1291     spc = strchr(s, ' ');
1292     printf("<a href='%.*s'>%.*s</a>", spc - s, s, spc - s, s);
1293     s = spc;
1294     }
1295     switch (*s) {
1296     case '<': printf("&lt;"); break;
1297     case '>': printf("&gt;"); break;
1298     case '&': printf("&amp;"); break;
1299     default: printf("%c", *s); break;
1300     }
1301 james 40 }
1302 james 56 printf("</li>\n");
1303    
1304     } else {
1305     printf(" ");
1306     x = 4;
1307     while (*s) {
1308     spc = strchr(s, ' ');
1309     if (!spc)
1310     spc = s + strlen(s);
1311     if (75 < x + (spc - s)) {
1312     printf("\n ");
1313     x = 4;
1314     }
1315     x += spc - s + 1;
1316     printf("%.*s ", spc - s, s);
1317     if (*spc)
1318     s = spc + 1;
1319     else
1320     s = spc;
1321     }
1322     printf("\n\n");
1323 james 40 }
1324     }
1325 james 56

  ViewVC Help
Powered by ViewVC 1.1.26