/[james]/httplint/httplint.c
ViewVC logotype

Annotation of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 50 - (hide annotations) (download) (as text)
Fri Feb 20 20:49:40 2004 UTC (20 years, 2 months ago) by james
File MIME type: text/x-csrc
File size: 32801 byte(s)
Update license to MIT License.

1 james 40 /*
2     * HTTP Header Lint
3 james 50 * Licensed under the MIT License
4     * http://www.opensource.org/licenses/mit-license
5     * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
6 james 40 */
7    
8     /*
9     * Compile using
10     * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11     *
12     * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13     */
14    
15     #define _GNU_SOURCE
16     #define __USE_XOPEN
17    
18     #include <limits.h>
19     #include <math.h>
20     #include <stdbool.h>
21     #include <stdio.h>
22     #include <stdlib.h>
23     #include <string.h>
24     #include <time.h>
25     #include <sys/types.h>
26     #include <regex.h>
27     #include <curl/curl.h>
28    
29    
30     #define NUMBER "0123456789"
31     #define UNUSED(x) x = x
32    
33    
34     bool start;
35     CURL *curl;
36     int status_code;
37     char error_buffer[CURL_ERROR_SIZE];
38     regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
39 james 41 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
40 james 48 re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires;
41 james 40
42    
43     void init(void);
44     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
45     void check_url(const char *url);
46     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
47     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
48     void check_status_line(const char *s);
49     void check_header(const char *name, const char *value);
50     bool parse_date(const char *s, struct tm *tm);
51 james 41 int month(const char *s);
52 james 42 time_t mktime_from_utc(struct tm *t);
53 james 40 const char *skip_lws(const char *s);
54     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
55     void (*callback)(const char *s, regmatch_t pmatch[]));
56     void header_accept_ranges(const char *s);
57     void header_age(const char *s);
58     void header_allow(const char *s);
59     void header_cache_control(const char *s);
60     void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
61     void header_connection(const char *s);
62     void header_content_encoding(const char *s);
63     void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
64     void header_content_language(const char *s);
65     void header_content_length(const char *s);
66     void header_content_location(const char *s);
67     void header_content_md5(const char *s);
68     void header_content_range(const char *s);
69     void header_content_type(const char *s);
70     void header_date(const char *s);
71     void header_etag(const char *s);
72     void header_expires(const char *s);
73     void header_last_modified(const char *s);
74     void header_location(const char *s);
75     void header_pragma(const char *s);
76     void header_retry_after(const char *s);
77     void header_server(const char *s);
78     void header_trailer(const char *s);
79     void header_transfer_encoding(const char *s);
80     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
81     void header_upgrade(const char *s);
82     void header_vary(const char *s);
83     void header_via(const char *s);
84 james 48 void header_set_cookie(const char *s);
85 james 40 void die(const char *error);
86     void warning(const char *message);
87     void error(const char *message);
88     void print(const char *s, size_t len);
89     void lookup(const char *key);
90    
91    
92     struct header_entry {
93     char name[40];
94     void (*handler)(const char *s);
95     int count;
96     char *missing;
97     } header_table[] = {
98     { "Accept-Ranges", header_accept_ranges, 0, 0 },
99     { "Age", header_age, 0, 0 },
100     { "Allow", header_allow, 0, 0 },
101     { "Cache-Control", header_cache_control, 0, 0 },
102     { "Connection", header_connection, 0, 0 },
103     { "Content-Encoding", header_content_encoding, 0, 0 },
104     { "Content-Language", header_content_language, 0, "missingcontlang" },
105     { "Content-Length", header_content_length, 0, 0 },
106     { "Content-Location", header_content_location, 0, 0 },
107     { "Content-MD5", header_content_md5, 0, 0 },
108     { "Content-Range", header_content_range, 0, 0 },
109     { "Content-Type", header_content_type, 0, "missingcontenttype" },
110     { "Date", header_date, 0, "missingdate" },
111     { "ETag", header_etag, 0, 0 },
112     { "Expires", header_expires, 0, 0 },
113     { "Last-Modified", header_last_modified, 0, "missinglastmod" },
114     { "Location", header_location, 0, 0 },
115     { "Pragma", header_pragma, 0, 0 },
116     { "Retry-After", header_retry_after, 0, 0 },
117     { "Server", header_server, 0, 0 },
118 james 48 { "Set-Cookie", header_set_cookie, 0, 0 },
119 james 40 { "Trailer", header_trailer, 0, 0 },
120     { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
121     { "Upgrade", header_upgrade, 0, 0 },
122     { "Vary", header_vary, 0, 0 },
123     { "Via", header_via, 0, 0 }
124     };
125    
126    
127     /**
128     * Main entry point.
129     */
130     int main(int argc, char *argv[])
131     {
132     int i;
133    
134     if (argc < 2)
135     die("Usage: httplint url [url ...]");
136    
137     init();
138    
139     for (i = 1; i != argc; i++)
140     check_url(argv[i]);
141    
142     curl_global_cleanup();
143    
144     return 0;
145     }
146    
147    
148     /**
149     * Initialise the curl handle and compile regular expressions.
150     */
151     void init(void)
152     {
153     struct curl_slist *request_headers = 0;
154    
155     if (curl_global_init(CURL_GLOBAL_ALL))
156     die("Failed to initialise libcurl");
157    
158     curl = curl_easy_init();
159     if (!curl)
160     die("Failed to create curl handle");
161    
162     if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
163     die("Failed to set curl options");
164     if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
165     die("Failed to set curl options");
166     if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
167     die("Failed to set curl options");
168     if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
169     die("Failed to set curl options");
170    
171     /* remove libcurl default headers */
172     request_headers = curl_slist_append(request_headers, "Accept:");
173     request_headers = curl_slist_append(request_headers, "Pragma:");
174     if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
175     die("Failed to set curl options");
176    
177     /* compile regular expressions */
178     regcomp_wrapper(&re_status_line,
179     "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
180     REG_EXTENDED);
181     regcomp_wrapper(&re_token,
182 james 44 "^([-0-9a-zA-Z_.!]+)",
183 james 40 REG_EXTENDED);
184     regcomp_wrapper(&re_token_value,
185 james 44 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
186 james 40 REG_EXTENDED);
187     regcomp_wrapper(&re_content_type,
188     "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
189     "(;[ \t]*([-0-9a-zA-Z_.]+)="
190     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
191     REG_EXTENDED);
192     regcomp_wrapper(&re_absolute_uri,
193     "^[a-zA-Z0-9]+://[^ ]+$",
194     REG_EXTENDED);
195     regcomp_wrapper(&re_etag,
196     "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
197     REG_EXTENDED);
198     regcomp_wrapper(&re_server,
199 james 44 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
200 james 40 REG_EXTENDED);
201     regcomp_wrapper(&re_transfer_coding,
202     "^([-0-9a-zA-Z_.]+)[ \t]*"
203     "(;[ \t]*([-0-9a-zA-Z_.]+)="
204     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
205     REG_EXTENDED);
206     regcomp_wrapper(&re_upgrade,
207     "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
208     REG_EXTENDED);
209     regcomp_wrapper(&re_ugly,
210 james 43 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
211 james 40 REG_EXTENDED);
212 james 41 regcomp_wrapper(&re_rfc1123,
213     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
214     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
215     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
216     REG_EXTENDED);
217     regcomp_wrapper(&re_rfc1036,
218     "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
219     "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
220     "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
221     REG_EXTENDED);
222     regcomp_wrapper(&re_asctime,
223     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
224     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
225     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
226     REG_EXTENDED);
227 james 48 regcomp_wrapper(&re_cookie_nameval,
228     "^[^;, ]+=[^;, ]*$",
229     REG_EXTENDED);
230     regcomp_wrapper(&re_cookie_expires,
231     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-"
232     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) "
233     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
234     REG_EXTENDED);
235 james 40 }
236    
237    
238     /**
239     * Compile a regular expression, handling errors.
240     */
241     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
242     {
243     char errbuf[200];
244     int r;
245     r = regcomp(preg, regex, cflags);
246     if (r) {
247     regerror(r, preg, errbuf, sizeof errbuf);
248     fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
249     die(errbuf);
250     }
251     }
252    
253    
254     /**
255     * Fetch and check the headers for the specified url.
256     */
257     void check_url(const char *url)
258     {
259     int i, r;
260     CURLcode code;
261    
262     start = true;
263     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
264     header_table[i].count = 0;
265    
266     printf("Checking URL %s\n", url);
267     if (strncmp(url, "http", 4))
268     warning("this is not an http or https url");
269    
270     if (curl_easy_setopt(curl, CURLOPT_URL, url))
271     die("Failed to set curl options");
272    
273     code = curl_easy_perform(curl);
274     if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
275     error(error_buffer);
276     return;
277     } else {
278     printf("\n");
279     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
280     if (header_table[i].count == 0 && header_table[i].missing)
281     lookup(header_table[i].missing);
282     }
283     }
284    
285     r = regexec(&re_ugly, url, 0, 0, 0);
286     if (r)
287     lookup("ugly");
288     }
289    
290    
291     /**
292     * Callback for received header data.
293     */
294     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
295     {
296     const size_t size = msize * nmemb;
297     char s[400], *name, *value;
298    
299     UNUSED(stream);
300    
301     printf("* ");
302     print(ptr, size);
303     printf("\n");
304    
305     if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
306     lookup("notcrlf");
307     return size;
308     }
309     if (sizeof s <= size) {
310     warning("header too long: ignored\n");
311     return size;
312     }
313     strncpy(s, ptr, size);
314     s[size - 2] = 0;
315    
316     name = s;
317     value = strchr(s, ':');
318    
319     if (s[0] == 0) {
320     /* empty header indicates end of headers */
321     puts("End of headers.");
322     return 0;
323    
324     } else if (start) {
325     /* Status-Line [6.1] */
326     check_status_line(s);
327     start = false;
328    
329     } else if (!value) {
330     lookup("missingcolon");
331    
332     } else {
333     *value = 0;
334     value++;
335    
336     check_header(name, skip_lws(value));
337     }
338    
339     return size;
340     }
341    
342    
343     /**
344     * Callback for received body data.
345     *
346     * We are not interested in the body, so abort the fetch by returning 0.
347     */
348     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
349     {
350     UNUSED(ptr);
351     UNUSED(size);
352     UNUSED(nmemb);
353     UNUSED(stream);
354    
355     return 0;
356     }
357    
358    
359     /**
360     * Check the syntax and content of the response Status-Line [6.1].
361     */
362     void check_status_line(const char *s)
363     {
364     const char *reason;
365     unsigned int major = 0, minor = 0;
366     int r;
367     regmatch_t pmatch[5];
368    
369     r = regexec(&re_status_line, s, 5, pmatch, 0);
370     if (r) {
371     lookup("badstatusline");
372     return;
373     }
374    
375     major = atoi(s + pmatch[1].rm_so);
376     minor = atoi(s + pmatch[2].rm_so);
377     status_code = atoi(s + pmatch[3].rm_so);
378     reason = s + pmatch[4].rm_so;
379    
380     if (major < 1 || (major == 1 && minor == 0)) {
381     lookup("oldhttp");
382     } else if ((major == 1 && 1 < minor) || 1 < major) {
383     lookup("futurehttp");
384     } else {
385     if (status_code < 100 || 600 <= status_code) {
386     lookup("badstatus");
387     } else {
388     char key[] = "xxx";
389     key[0] = '0' + status_code / 100;
390     lookup(key);
391     }
392     }
393     }
394    
395    
396     /**
397     * Check the syntax and content of a header.
398     */
399     void check_header(const char *name, const char *value)
400     {
401     struct header_entry *header;
402    
403     header = bsearch(name, header_table,
404     sizeof header_table / sizeof header_table[0],
405     sizeof header_table[0],
406     (int (*)(const void *, const void *)) strcasecmp);
407    
408     if (header) {
409     header->count++;
410     header->handler(value);
411 james 44 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
412     lookup("xheader");
413     } else {
414 james 40 lookup("nonstandard");
415 james 44 }
416 james 40 }
417    
418    
419     /**
420     * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
421     */
422     bool parse_date(const char *s, struct tm *tm)
423     {
424 james 41 int r;
425 james 40 int len = strlen(s);
426 james 41 regmatch_t pmatch[20];
427 james 40
428     if (len == 29) {
429     /* RFC 1123 */
430 james 41 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
431     if (r == 0) {
432     tm->tm_mday = atoi(s + pmatch[2].rm_so);
433     tm->tm_mon = month(s + pmatch[3].rm_so);
434     tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
435     tm->tm_hour = atoi(s + pmatch[5].rm_so);
436     tm->tm_min = atoi(s + pmatch[6].rm_so);
437     tm->tm_sec = atoi(s + pmatch[7].rm_so);
438 james 40 return true;
439 james 41 }
440 james 40
441     } else if (len == 24) {
442     /* asctime() format */
443 james 41 r = regexec(&re_asctime, s, 20, pmatch, 0);
444     if (r == 0) {
445     if (s[pmatch[3].rm_so] == ' ')
446     tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
447     else
448     tm->tm_mday = atoi(s + pmatch[3].rm_so);
449     tm->tm_mon = month(s + pmatch[2].rm_so);
450     tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
451     tm->tm_hour = atoi(s + pmatch[4].rm_so);
452     tm->tm_min = atoi(s + pmatch[5].rm_so);
453     tm->tm_sec = atoi(s + pmatch[6].rm_so);
454 james 40 lookup("asctime");
455     return true;
456     }
457    
458     } else {
459     /* RFC 1036 */
460 james 41 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
461     if (r == 0) {
462     tm->tm_mday = atoi(s + pmatch[2].rm_so);
463     tm->tm_mon = month(s + pmatch[3].rm_so);
464     tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
465     tm->tm_hour = atoi(s + pmatch[5].rm_so);
466     tm->tm_min = atoi(s + pmatch[6].rm_so);
467     tm->tm_sec = atoi(s + pmatch[7].rm_so);
468 james 40 lookup("rfc1036");
469     return true;
470     }
471    
472     }
473    
474     lookup("baddate");
475     return false;
476     }
477    
478    
479     /**
480 james 41 * Convert a month name to the month number.
481     */
482     int month(const char *s)
483     {
484     switch (s[0]) {
485     case 'J':
486     switch (s[1]) {
487     case 'a':
488     return 0;
489     case 'u':
490     return s[2] == 'n' ? 5 : 6;
491     }
492     case 'F':
493     return 1;
494     case 'M':
495     return s[2] == 'r' ? 2 : 4;
496     case 'A':
497     return s[1] == 'p' ? 3 : 7;
498     case 'S':
499     return 8;
500     case 'O':
501     return 9;
502     case 'N':
503     return 10;
504     case 'D':
505     return 11;
506     }
507     return 0;
508     }
509    
510    
511     /**
512 james 42 * UTC version of mktime, from
513     * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
514     */
515     time_t mktime_from_utc(struct tm *t)
516     {
517     time_t tl, tb;
518     struct tm *tg;
519    
520     tl = mktime (t);
521     if (tl == -1)
522     {
523     t->tm_hour--;
524     tl = mktime (t);
525     if (tl == -1)
526     return -1; /* can't deal with output from strptime */
527     tl += 3600;
528     }
529     tg = gmtime (&tl);
530     tg->tm_isdst = 0;
531     tb = mktime (tg);
532     if (tb == -1)
533     {
534     tg->tm_hour--;
535     tb = mktime (tg);
536     if (tb == -1)
537     return -1; /* can't deal with output from gmtime */
538     tb += 3600;
539     }
540     return (tl - (tb - tl));
541     }
542    
543    
544     /**
545 james 40 * Skip optional LWS (linear white space) [2.2]
546     */
547     const char *skip_lws(const char *s)
548     {
549     if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
550     s += 2;
551     while (*s == ' ' || *s == '\t')
552     s++;
553     return s;
554     }
555    
556    
557     /**
558     * Parse a list of elements (#rule in [2.1]).
559     */
560     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
561     void (*callback)(const char *s, regmatch_t pmatch[]))
562     {
563     int r;
564     unsigned int items = 0;
565     regmatch_t pmatch[20];
566    
567     do {
568     r = regexec(preg, s, 20, pmatch, 0);
569     if (r) {
570     printf(" Failed to match list item %i\n", items + 1);
571     return false;
572     }
573    
574     if (callback)
575     callback(s, pmatch);
576     items++;
577    
578     s += pmatch[0].rm_eo;
579     s = skip_lws(s);
580     if (*s == 0)
581     break;
582     if (*s != ',') {
583     printf(" Expecting , after list item %i\n", items);
584     return false;
585     }
586     while (*s == ',')
587     s = skip_lws(s + 1);
588     } while (*s != 0);
589    
590     if (items < n || m < items) {
591     printf(" %i items in list, but there should be ", items);
592     if (m == UINT_MAX)
593     printf("at least %i\n", n);
594     else
595     printf("between %i and %i\n", n, m);
596     return false;
597     }
598    
599     return true;
600     }
601    
602    
603     /* Header-specific validation. */
604     void header_accept_ranges(const char *s)
605     {
606     if (strcmp(s, "bytes") == 0)
607     lookup("ok");
608     else if (strcmp(s, "none") == 0)
609     lookup("ok");
610     else
611     lookup("unknownrange");
612     }
613    
614     void header_age(const char *s)
615     {
616     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
617     lookup("badage");
618     else
619     lookup("ok");
620     }
621    
622     void header_allow(const char *s)
623     {
624     if (parse_list(s, &re_token, 0, UINT_MAX, 0))
625     lookup("ok");
626     else
627     lookup("badallow");
628     }
629    
630     void header_cache_control(const char *s)
631     {
632     if (parse_list(s, &re_token_value, 1, UINT_MAX,
633     header_cache_control_callback))
634     lookup("ok");
635     else
636     lookup("badcachecont");
637     }
638    
639     char cache_control_list[][20] = {
640     "max-age", "max-stale", "min-fresh", "must-revalidate",
641     "no-cache", "no-store", "no-transform", "only-if-cached",
642     "private", "proxy-revalidate", "public", "s-maxage"
643     };
644    
645     void header_cache_control_callback(const char *s, regmatch_t pmatch[])
646     {
647     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
648     char name[20];
649     char *dir;
650    
651     if (19 < len) {
652     lookup("unknowncachecont");
653     return;
654     }
655    
656     strncpy(name, s + pmatch[1].rm_so, len);
657     name[len] = 0;
658    
659     dir = bsearch(name, cache_control_list,
660     sizeof cache_control_list / sizeof cache_control_list[0],
661     sizeof cache_control_list[0],
662     (int (*)(const void *, const void *)) strcasecmp);
663    
664     if (!dir) {
665     printf(" Cache-Control directive '%s':\n", name);
666     lookup("unknowncachecont");
667     }
668     }
669    
670     void header_connection(const char *s)
671     {
672     if (strcmp(s, "close") == 0)
673     lookup("ok");
674     else
675     lookup("badconnection");
676     }
677    
678     void header_content_encoding(const char *s)
679     {
680     if (parse_list(s, &re_token, 1, UINT_MAX,
681     header_content_encoding_callback))
682     lookup("ok");
683     else
684     lookup("badcontenc");
685     }
686    
687     char content_coding_list[][20] = {
688     "compress", "deflate", "gzip", "identity"
689     };
690    
691     void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
692     {
693     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
694     char name[20];
695     char *dir;
696    
697     if (19 < len) {
698     lookup("unknowncontenc");
699     return;
700     }
701    
702     strncpy(name, s + pmatch[1].rm_so, len);
703     name[len] = 0;
704    
705     dir = bsearch(name, content_coding_list,
706     sizeof content_coding_list / sizeof content_coding_list[0],
707     sizeof content_coding_list[0],
708     (int (*)(const void *, const void *)) strcasecmp);
709     if (!dir) {
710     printf(" Content-Encoding '%s':\n", name);
711     lookup("unknowncontenc");
712     }
713     }
714    
715     void header_content_language(const char *s)
716     {
717     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
718     lookup("ok");
719     else
720     lookup("badcontlang");
721     }
722    
723     void header_content_length(const char *s)
724     {
725     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
726     lookup("badcontlen");
727     else
728     lookup("ok");
729     }
730    
731     void header_content_location(const char *s)
732     {
733     if (strchr(s, ' '))
734     lookup("badcontloc");
735     else
736     lookup("ok");
737     }
738    
739     void header_content_md5(const char *s)
740     {
741     if (strlen(s) != 24)
742     lookup("badcontmd5");
743     else
744     lookup("ok");
745     }
746    
747     void header_content_range(const char *s)
748     {
749     UNUSED(s);
750     lookup("contentrange");
751     }
752    
753     void header_content_type(const char *s)
754     {
755     bool charset = false;
756     char *type, *subtype;
757     unsigned int i;
758     int r;
759     regmatch_t pmatch[30];
760    
761     r = regexec(&re_content_type, s, 30, pmatch, 0);
762     if (r) {
763     lookup("badcontenttype");
764     return;
765     }
766    
767     type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
768     subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
769    
770     /* parameters */
771     for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
772     char *attrib, *value;
773    
774     attrib = strndup(s + pmatch[i + 1].rm_so,
775     pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
776     value = strndup(s + pmatch[i + 2].rm_so,
777     pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
778    
779     if (strcasecmp(attrib, "charset") == 0)
780     charset = true;
781     }
782    
783     if (strcasecmp(type, "text") == 0 && !charset)
784     lookup("nocharset");
785     else
786     lookup("ok");
787     }
788    
789     void header_date(const char *s)
790     {
791     double diff;
792     time_t time0, time1;
793     struct tm tm;
794    
795     time0 = time(0);
796     if (!parse_date(s, &tm))
797     return;
798 james 42 time1 = mktime_from_utc(&tm);
799 james 40
800     diff = difftime(time0, time1);
801     if (10 < fabs(diff))
802     lookup("wrongdate");
803     else
804     lookup("ok");
805     }
806    
807     void header_etag(const char *s)
808     {
809     int r;
810     r = regexec(&re_etag, s, 0, 0, 0);
811     if (r)
812     lookup("badetag");
813     else
814     lookup("ok");
815     }
816    
817     void header_expires(const char *s)
818     {
819     struct tm tm;
820     if (parse_date(s, &tm))
821     lookup("ok");
822     }
823    
824     void header_last_modified(const char *s)
825     {
826     double diff;
827     time_t time0, time1;
828     struct tm tm;
829    
830     time0 = time(0);
831     if (!parse_date(s, &tm))
832     return;
833 james 44 time1 = mktime_from_utc(&tm);
834 james 40
835     diff = difftime(time1, time0);
836     if (10 < diff)
837     lookup("futurelastmod");
838     else
839     lookup("ok");
840     }
841    
842     void header_location(const char *s)
843     {
844     int r;
845     r = regexec(&re_absolute_uri, s, 0, 0, 0);
846     if (r)
847     lookup("badlocation");
848     else
849     lookup("ok");
850     }
851    
852     void header_pragma(const char *s)
853     {
854     if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
855     lookup("ok");
856     else
857     lookup("badpragma");
858     }
859    
860     void header_retry_after(const char *s)
861     {
862     struct tm tm;
863    
864     if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
865     lookup("ok");
866     return;
867     }
868    
869     if (!parse_date(s, &tm))
870     return;
871    
872     lookup("ok");
873     }
874    
875     void header_server(const char *s)
876     {
877     int r;
878     r = regexec(&re_server, s, 0, 0, 0);
879     if (r)
880     lookup("badserver");
881     else
882     lookup("ok");
883     }
884    
885     void header_trailer(const char *s)
886     {
887     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
888     lookup("ok");
889     else
890     lookup("badtrailer");
891     }
892    
893     void header_transfer_encoding(const char *s)
894     {
895     if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
896     header_transfer_encoding_callback))
897     lookup("ok");
898     else
899     lookup("badtransenc");
900     }
901    
902     char transfer_coding_list[][20] = {
903     "chunked", "compress", "deflate", "gzip", "identity"
904     };
905    
906     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
907     {
908     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
909     char name[20];
910     char *dir;
911    
912     if (19 < len) {
913     lookup("unknowntransenc");
914     return;
915     }
916    
917     strncpy(name, s + pmatch[1].rm_so, len);
918     name[len] = 0;
919    
920     dir = bsearch(name, transfer_coding_list,
921     sizeof transfer_coding_list / sizeof transfer_coding_list[0],
922     sizeof transfer_coding_list[0],
923     (int (*)(const void *, const void *)) strcasecmp);
924     if (!dir) {
925     printf(" Transfer-Encoding '%s':\n", name);
926     lookup("unknowntransenc");
927     }
928     }
929    
930     void header_upgrade(const char *s)
931     {
932     int r;
933     r = regexec(&re_upgrade, s, 0, 0, 0);
934     if (r)
935     lookup("badupgrade");
936     else
937     lookup("ok");
938     }
939    
940     void header_vary(const char *s)
941     {
942     if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
943     lookup("ok");
944     else
945     lookup("badvary");
946     }
947    
948     void header_via(const char *s)
949     {
950     UNUSED(s);
951     lookup("via");
952     }
953    
954 james 48 /* http://wp.netscape.com/newsref/std/cookie_spec.html */
955     void header_set_cookie(const char *s)
956     {
957     bool ok = true;
958     int r;
959     const char *semi = strchr(s, ';');
960     const char *s2;
961     struct tm tm;
962     double diff;
963     time_t time0, time1;
964     regmatch_t pmatch[20];
965 james 40
966 james 48 if (semi)
967     s2 = strndup(s, semi - s);
968     else
969     s2 = s;
970    
971     r = regexec(&re_cookie_nameval, s2, 0, 0, 0);
972     if (r) {
973     lookup("cookiebadnameval");
974     ok = false;
975     }
976 james 50
977 james 48 if (!semi)
978     return;
979    
980     s = skip_lws(semi + 1);
981    
982     while (*s) {
983     semi = strchr(s, ';');
984     if (semi)
985     s2 = strndup(s, semi - s);
986     else
987     s2 = s;
988    
989     if (strncmp(s2, "expires=", 8) == 0) {
990     s2 += 8;
991     r = regexec(&re_cookie_expires, s2, 20, pmatch, 0);
992     if (r == 0) {
993     tm.tm_mday = atoi(s2 + pmatch[2].rm_so);
994     tm.tm_mon = month(s2 + pmatch[3].rm_so);
995     tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900;
996     tm.tm_hour = atoi(s2 + pmatch[5].rm_so);
997     tm.tm_min = atoi(s2 + pmatch[6].rm_so);
998     tm.tm_sec = atoi(s2 + pmatch[7].rm_so);
999    
1000     time0 = time(0);
1001     time1 = mktime_from_utc(&tm);
1002    
1003     diff = difftime(time0, time1);
1004     if (10 < diff) {
1005     lookup("cookiepastdate");
1006     ok = false;
1007     }
1008     } else {
1009     lookup("cookiebaddate");
1010     ok = false;
1011     }
1012     } else if (strncmp(s2, "domain=", 7) == 0) {
1013     } else if (strncmp(s2, "path=", 5) == 0) {
1014     if (s2[5] != '/') {
1015     lookup("cookiebadpath");
1016     ok = false;
1017     }
1018     } else if (strcmp(s, "secure") == 0) {
1019     } else {
1020     printf(" Set-Cookie field '%s':\n", s2);
1021     lookup("cookieunknownfield");
1022     ok = false;
1023     }
1024    
1025     if (semi)
1026     s = skip_lws(semi + 1);
1027     else
1028     break;
1029     }
1030    
1031     if (ok)
1032     lookup("ok");
1033     }
1034    
1035    
1036 james 40 /**
1037     * Print an error message and exit.
1038     */
1039     void die(const char *error)
1040     {
1041     fprintf(stderr, "httplint: %s\n", error);
1042     exit(EXIT_FAILURE);
1043     }
1044    
1045    
1046     /**
1047     * Print a warning message.
1048     */
1049     void warning(const char *message)
1050     {
1051     printf("Warning: %s\n", message);
1052     }
1053    
1054    
1055     /**
1056     * Print an error message.
1057     */
1058     void error(const char *message)
1059     {
1060     printf("Error: %s\n", message);
1061     }
1062    
1063    
1064     /**
1065     * Print a string which contains control characters.
1066     */
1067     void print(const char *s, size_t len)
1068     {
1069     size_t i;
1070     for (i = 0; i != len; i++) {
1071     if (31 < s[i] && s[i] < 127)
1072     putchar(s[i]);
1073     else
1074     printf("[%.2x]", s[i]);
1075     }
1076     }
1077    
1078    
1079     struct message_entry {
1080     const char key[20];
1081     const char *value;
1082     } message_table[] = {
1083     { "1xx", "A response status code in the range 100 - 199 indicates a "
1084     "'provisional response'." },
1085     { "2xx", "A response status code in the range 200 - 299 indicates that "
1086     "the request was successful." },
1087     { "3xx", "A response status code in the range 300 - 399 indicates that "
1088     "the client should redirect to a new URL." },
1089     { "4xx", "A response status code in the range 400 - 499 indicates that "
1090     "the request could not be fulfilled due to client error." },
1091     { "5xx", "A response status code in the range 500 - 599 indicates that "
1092     "an error occurred on the server." },
1093     { "asctime", "Warning: This date is in the obsolete asctime() format. "
1094     "Consider using the RFC 1123 format instead." },
1095     { "badage", "Error: The Age header must be one number." },
1096     { "badallow", "Error: The Allow header must be a comma-separated list of "
1097     "HTTP methods." },
1098     { "badcachecont", "Error: The Cache-Control header must be a "
1099     "comma-separated list of directives." },
1100     { "badconnection", "Warning: The only value of the Connection header "
1101     "defined by HTTP/1.1 is \"close\"." },
1102     { "badcontenc", "Error: The Content-Encoding header must be a "
1103     "comma-separated list of encodings." },
1104     { "badcontenttype", "Error: The Content-Type header must be of the form "
1105     "'type/subtype (; optional parameters)'." },
1106     { "badcontlang", "Error: The Content-Language header must be a "
1107     "comma-separated list of language tags." },
1108     { "badcontlen", "Error: The Content-Length header must be a number." },
1109     { "badcontloc", "Error: The Content-Location header must be an absolute "
1110     "or relative URI." },
1111     { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1112     "MD5 sum." },
1113     { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1114     "1123 format." },
1115     { "badetag", "Error: The ETag header must be a quoted string (optionally "
1116     "preceded by \"W/\" for a weak tag)." },
1117     { "badlocation", "Error: The Location header must be an absolute URI. "
1118     "Relative URIs are not permitted." },
1119     { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1120     "directives." },
1121     { "badserver", "Error: The Server header must be a space-separated list of "
1122     "products of the form Name/optional-version and comments "
1123     "in ()." },
1124     { "badstatus", "Warning: The response status code is outside the standard "
1125     "range 100 - 599." },
1126     { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1127     "status line must be of the form 'HTTP/n.n <3-digit "
1128     "status> <reason phrase>'." },
1129     { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1130     "of header names." },
1131     { "badtransenc", "Error: The Transfer-Encoding header must be a "
1132     "comma-separated of encodings." },
1133     { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1134     "of product identifiers." },
1135     { "badvary", "Error: The Vary header must be a comma-separated list "
1136     "of header names, or \"*\"." },
1137     { "contentrange", "Warning: The Content-Range header should not be returned "
1138     "by the server for this request." },
1139 james 48 { "cookiebaddate", "Error: The expires date must be in the form "
1140     "\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." },
1141     { "cookiebadnameval", "Error: A Set-Cookie header must start with "
1142     "name=value, each excluding semi-colon, comma and "
1143     "white space." },
1144     { "cookiebadpath", "Error: The path does not start with \"/\"." },
1145     { "cookiepastdate", "Warning: The expires date is in the past. The cookie "
1146     "will be deleted by browsers." },
1147     { "cookieunknownfield", "Warning: This is not a standard Set-Cookie "
1148     "field." },
1149 james 40 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1150     "version of this tool." },
1151     { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1152     "the future." },
1153     { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1154     { "missingcontenttype", "Warning: No Content-Type header was present. The "
1155     "client will have to guess the media type or ask "
1156     "the user. Adding a Content-Type header is strongly "
1157     "recommended." },
1158     { "missingcontlang", "Consider adding a Content-Language header if "
1159     "applicable for this document." },
1160     { "missingdate", "Warning: No Date header was present. A Date header must "
1161     "be present, unless the server does not have a clock, or "
1162     "the response is 100, 101, or 500 - 599." },
1163     { "missinglastmod", "No Last-Modified header was present. The "
1164     "HTTP/1.1 specification states that this header should "
1165     "be sent whenever feasible." },
1166     { "nocharset", "Warning: No character set is specified in the Content-Type. "
1167     "Clients may assume the default of ISO-8859-1. Consider "
1168     "appending '; charset=...'." },
1169     { "nonstandard", "Warning: I don't know anything about this header. Is it "
1170     "a standard HTTP response header?" },
1171     { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1172     "that all header lines end with CR LF." },
1173     { "ok", "OK." },
1174     { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1175     "to HTTP/1.1." },
1176     { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1177     "Consider using the RFC 1123 format instead." },
1178     { "ugly", "This URL appears to contain implementation-specific parts such "
1179     "as an extension or a query string. This may make the URL liable "
1180     "to change when the implementation is changed, resulting in "
1181     "broken links. Consider using URL rewriting or equivalent to "
1182     "implement a future-proof URL space. See "
1183     "http://www.w3.org/Provider/Style/URI for more information." },
1184     { "unknowncachecont", "Warning: This Cache-Control directive is "
1185     "non-standard and will have limited support." },
1186     { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1187     { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1188     "range." },
1189     { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1190     { "via", "This header was added by a proxy, cache or gateway." },
1191     { "wrongdate", "Warning: The server date-time differs from this system's "
1192     "date-time by more than 10 seconds. Check that both the "
1193 james 44 "system clocks are correct." },
1194     { "xheader", "This is an extension header. I don't know how to check it." }
1195 james 40 };
1196    
1197    
1198     /**
1199     * Look up and output the string referenced by a key.
1200     */
1201     void lookup(const char *key)
1202     {
1203     const char *s, *spc;
1204     int x;
1205     struct message_entry *message;
1206    
1207     message = bsearch(key, message_table,
1208     sizeof message_table / sizeof message_table[0],
1209     sizeof message_table[0],
1210     (int (*)(const void *, const void *)) strcasecmp);
1211     if (message)
1212     s = message->value;
1213     else
1214     s = key;
1215    
1216     printf(" ");
1217     x = 4;
1218     while (*s) {
1219     spc = strchr(s, ' ');
1220     if (!spc)
1221     spc = s + strlen(s);
1222     if (75 < x + (spc - s)) {
1223     printf("\n ");
1224     x = 4;
1225     }
1226     x += spc - s + 1;
1227     printf("%.*s ", spc - s, s);
1228     if (*spc)
1229     s = spc + 1;
1230     else
1231     s = spc;
1232     }
1233     printf("\n\n");
1234     }

  ViewVC Help
Powered by ViewVC 1.1.26