/[james]/httplint/httplint.c
ViewVC logotype

Annotation of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 44 - (hide annotations) (download) (as text)
Thu Dec 18 00:52:50 2003 UTC (20 years, 4 months ago) by james
File MIME type: text/x-csrc
File size: 29967 byte(s)
Fix Last-Modified, add X- header message, add ! to tokens.

1 james 40 /*
2     * HTTP Header Lint
3     * Licensed under the same license as Curl
4     * http://curl.haxx.se/docs/copyright.html
5     * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
6     */
7    
8     /*
9     * Compile using
10     * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11     *
12     * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13     */
14    
15     #define _GNU_SOURCE
16     #define __USE_XOPEN
17    
18     #include <limits.h>
19     #include <math.h>
20     #include <stdbool.h>
21     #include <stdio.h>
22     #include <stdlib.h>
23     #include <string.h>
24     #include <time.h>
25     #include <sys/types.h>
26     #include <regex.h>
27     #include <curl/curl.h>
28    
29    
30     #define NUMBER "0123456789"
31     #define UNUSED(x) x = x
32    
33    
34     bool start;
35     CURL *curl;
36     int status_code;
37     char error_buffer[CURL_ERROR_SIZE];
38     regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
39 james 41 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
40     re_rfc1123, re_rfc1036, re_asctime;
41 james 40
42    
43     void init(void);
44     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
45     void check_url(const char *url);
46     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
47     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
48     void check_status_line(const char *s);
49     void check_header(const char *name, const char *value);
50     bool parse_date(const char *s, struct tm *tm);
51 james 41 int month(const char *s);
52 james 42 time_t mktime_from_utc(struct tm *t);
53 james 40 const char *skip_lws(const char *s);
54     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
55     void (*callback)(const char *s, regmatch_t pmatch[]));
56     void header_accept_ranges(const char *s);
57     void header_age(const char *s);
58     void header_allow(const char *s);
59     void header_cache_control(const char *s);
60     void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
61     void header_connection(const char *s);
62     void header_content_encoding(const char *s);
63     void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
64     void header_content_language(const char *s);
65     void header_content_length(const char *s);
66     void header_content_location(const char *s);
67     void header_content_md5(const char *s);
68     void header_content_range(const char *s);
69     void header_content_type(const char *s);
70     void header_date(const char *s);
71     void header_etag(const char *s);
72     void header_expires(const char *s);
73     void header_last_modified(const char *s);
74     void header_location(const char *s);
75     void header_pragma(const char *s);
76     void header_retry_after(const char *s);
77     void header_server(const char *s);
78     void header_trailer(const char *s);
79     void header_transfer_encoding(const char *s);
80     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
81     void header_upgrade(const char *s);
82     void header_vary(const char *s);
83     void header_via(const char *s);
84     void die(const char *error);
85     void warning(const char *message);
86     void error(const char *message);
87     void print(const char *s, size_t len);
88     void lookup(const char *key);
89    
90    
91     struct header_entry {
92     char name[40];
93     void (*handler)(const char *s);
94     int count;
95     char *missing;
96     } header_table[] = {
97     { "Accept-Ranges", header_accept_ranges, 0, 0 },
98     { "Age", header_age, 0, 0 },
99     { "Allow", header_allow, 0, 0 },
100     { "Cache-Control", header_cache_control, 0, 0 },
101     { "Connection", header_connection, 0, 0 },
102     { "Content-Encoding", header_content_encoding, 0, 0 },
103     { "Content-Language", header_content_language, 0, "missingcontlang" },
104     { "Content-Length", header_content_length, 0, 0 },
105     { "Content-Location", header_content_location, 0, 0 },
106     { "Content-MD5", header_content_md5, 0, 0 },
107     { "Content-Range", header_content_range, 0, 0 },
108     { "Content-Type", header_content_type, 0, "missingcontenttype" },
109     { "Date", header_date, 0, "missingdate" },
110     { "ETag", header_etag, 0, 0 },
111     { "Expires", header_expires, 0, 0 },
112     { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113     { "Location", header_location, 0, 0 },
114     { "Pragma", header_pragma, 0, 0 },
115     { "Retry-After", header_retry_after, 0, 0 },
116     { "Server", header_server, 0, 0 },
117     { "Trailer", header_trailer, 0, 0 },
118     { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
119     { "Upgrade", header_upgrade, 0, 0 },
120     { "Vary", header_vary, 0, 0 },
121     { "Via", header_via, 0, 0 }
122     };
123    
124    
125     /**
126     * Main entry point.
127     */
128     int main(int argc, char *argv[])
129     {
130     int i;
131    
132     if (argc < 2)
133     die("Usage: httplint url [url ...]");
134    
135     init();
136    
137     for (i = 1; i != argc; i++)
138     check_url(argv[i]);
139    
140     curl_global_cleanup();
141    
142     return 0;
143     }
144    
145    
146     /**
147     * Initialise the curl handle and compile regular expressions.
148     */
149     void init(void)
150     {
151     struct curl_slist *request_headers = 0;
152    
153     if (curl_global_init(CURL_GLOBAL_ALL))
154     die("Failed to initialise libcurl");
155    
156     curl = curl_easy_init();
157     if (!curl)
158     die("Failed to create curl handle");
159    
160     if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
161     die("Failed to set curl options");
162     if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
163     die("Failed to set curl options");
164     if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
165     die("Failed to set curl options");
166     if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
167     die("Failed to set curl options");
168    
169     /* remove libcurl default headers */
170     request_headers = curl_slist_append(request_headers, "Accept:");
171     request_headers = curl_slist_append(request_headers, "Pragma:");
172     if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
173     die("Failed to set curl options");
174    
175     /* compile regular expressions */
176     regcomp_wrapper(&re_status_line,
177     "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
178     REG_EXTENDED);
179     regcomp_wrapper(&re_token,
180 james 44 "^([-0-9a-zA-Z_.!]+)",
181 james 40 REG_EXTENDED);
182     regcomp_wrapper(&re_token_value,
183 james 44 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
184 james 40 REG_EXTENDED);
185     regcomp_wrapper(&re_content_type,
186     "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
187     "(;[ \t]*([-0-9a-zA-Z_.]+)="
188     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
189     REG_EXTENDED);
190     regcomp_wrapper(&re_absolute_uri,
191     "^[a-zA-Z0-9]+://[^ ]+$",
192     REG_EXTENDED);
193     regcomp_wrapper(&re_etag,
194     "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
195     REG_EXTENDED);
196     regcomp_wrapper(&re_server,
197 james 44 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
198 james 40 REG_EXTENDED);
199     regcomp_wrapper(&re_transfer_coding,
200     "^([-0-9a-zA-Z_.]+)[ \t]*"
201     "(;[ \t]*([-0-9a-zA-Z_.]+)="
202     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
203     REG_EXTENDED);
204     regcomp_wrapper(&re_upgrade,
205     "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
206     REG_EXTENDED);
207     regcomp_wrapper(&re_ugly,
208 james 43 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
209 james 40 REG_EXTENDED);
210 james 41 regcomp_wrapper(&re_rfc1123,
211     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
212     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
213     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
214     REG_EXTENDED);
215     regcomp_wrapper(&re_rfc1036,
216     "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
217     "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
218     "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
219     REG_EXTENDED);
220     regcomp_wrapper(&re_asctime,
221     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
222     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
223     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
224     REG_EXTENDED);
225 james 40 }
226    
227    
228     /**
229     * Compile a regular expression, handling errors.
230     */
231     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
232     {
233     char errbuf[200];
234     int r;
235     r = regcomp(preg, regex, cflags);
236     if (r) {
237     regerror(r, preg, errbuf, sizeof errbuf);
238     fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
239     die(errbuf);
240     }
241     }
242    
243    
244     /**
245     * Fetch and check the headers for the specified url.
246     */
247     void check_url(const char *url)
248     {
249     int i, r;
250     CURLcode code;
251    
252     start = true;
253     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
254     header_table[i].count = 0;
255    
256     printf("Checking URL %s\n", url);
257     if (strncmp(url, "http", 4))
258     warning("this is not an http or https url");
259    
260     if (curl_easy_setopt(curl, CURLOPT_URL, url))
261     die("Failed to set curl options");
262    
263     code = curl_easy_perform(curl);
264     if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
265     error(error_buffer);
266     return;
267     } else {
268     printf("\n");
269     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
270     if (header_table[i].count == 0 && header_table[i].missing)
271     lookup(header_table[i].missing);
272     }
273     }
274    
275     r = regexec(&re_ugly, url, 0, 0, 0);
276     if (r)
277     lookup("ugly");
278     }
279    
280    
281     /**
282     * Callback for received header data.
283     */
284     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
285     {
286     const size_t size = msize * nmemb;
287     char s[400], *name, *value;
288    
289     UNUSED(stream);
290    
291     printf("* ");
292     print(ptr, size);
293     printf("\n");
294    
295     if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
296     lookup("notcrlf");
297     return size;
298     }
299     if (sizeof s <= size) {
300     warning("header too long: ignored\n");
301     return size;
302     }
303     strncpy(s, ptr, size);
304     s[size - 2] = 0;
305    
306     name = s;
307     value = strchr(s, ':');
308    
309     if (s[0] == 0) {
310     /* empty header indicates end of headers */
311     puts("End of headers.");
312     return 0;
313    
314     } else if (start) {
315     /* Status-Line [6.1] */
316     check_status_line(s);
317     start = false;
318    
319     } else if (!value) {
320     lookup("missingcolon");
321    
322     } else {
323     *value = 0;
324     value++;
325    
326     check_header(name, skip_lws(value));
327     }
328    
329     return size;
330     }
331    
332    
333     /**
334     * Callback for received body data.
335     *
336     * We are not interested in the body, so abort the fetch by returning 0.
337     */
338     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
339     {
340     UNUSED(ptr);
341     UNUSED(size);
342     UNUSED(nmemb);
343     UNUSED(stream);
344    
345     return 0;
346     }
347    
348    
349     /**
350     * Check the syntax and content of the response Status-Line [6.1].
351     */
352     void check_status_line(const char *s)
353     {
354     const char *reason;
355     unsigned int major = 0, minor = 0;
356     int r;
357     regmatch_t pmatch[5];
358    
359     r = regexec(&re_status_line, s, 5, pmatch, 0);
360     if (r) {
361     lookup("badstatusline");
362     return;
363     }
364    
365     major = atoi(s + pmatch[1].rm_so);
366     minor = atoi(s + pmatch[2].rm_so);
367     status_code = atoi(s + pmatch[3].rm_so);
368     reason = s + pmatch[4].rm_so;
369    
370     if (major < 1 || (major == 1 && minor == 0)) {
371     lookup("oldhttp");
372     } else if ((major == 1 && 1 < minor) || 1 < major) {
373     lookup("futurehttp");
374     } else {
375     if (status_code < 100 || 600 <= status_code) {
376     lookup("badstatus");
377     } else {
378     char key[] = "xxx";
379     key[0] = '0' + status_code / 100;
380     lookup(key);
381     }
382     }
383     }
384    
385    
386     /**
387     * Check the syntax and content of a header.
388     */
389     void check_header(const char *name, const char *value)
390     {
391     struct header_entry *header;
392    
393     header = bsearch(name, header_table,
394     sizeof header_table / sizeof header_table[0],
395     sizeof header_table[0],
396     (int (*)(const void *, const void *)) strcasecmp);
397    
398     if (header) {
399     header->count++;
400     header->handler(value);
401 james 44 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
402     lookup("xheader");
403     } else {
404 james 40 lookup("nonstandard");
405 james 44 }
406 james 40 }
407    
408    
409     /**
410     * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
411     */
412     bool parse_date(const char *s, struct tm *tm)
413     {
414 james 41 int r;
415 james 40 int len = strlen(s);
416 james 41 regmatch_t pmatch[20];
417 james 40
418     if (len == 29) {
419     /* RFC 1123 */
420 james 41 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
421     if (r == 0) {
422     tm->tm_mday = atoi(s + pmatch[2].rm_so);
423     tm->tm_mon = month(s + pmatch[3].rm_so);
424     tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
425     tm->tm_hour = atoi(s + pmatch[5].rm_so);
426     tm->tm_min = atoi(s + pmatch[6].rm_so);
427     tm->tm_sec = atoi(s + pmatch[7].rm_so);
428 james 40 return true;
429 james 41 }
430 james 40
431     } else if (len == 24) {
432     /* asctime() format */
433 james 41 r = regexec(&re_asctime, s, 20, pmatch, 0);
434     if (r == 0) {
435     if (s[pmatch[3].rm_so] == ' ')
436     tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
437     else
438     tm->tm_mday = atoi(s + pmatch[3].rm_so);
439     tm->tm_mon = month(s + pmatch[2].rm_so);
440     tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
441     tm->tm_hour = atoi(s + pmatch[4].rm_so);
442     tm->tm_min = atoi(s + pmatch[5].rm_so);
443     tm->tm_sec = atoi(s + pmatch[6].rm_so);
444 james 40 lookup("asctime");
445     return true;
446     }
447    
448     } else {
449     /* RFC 1036 */
450 james 41 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
451     if (r == 0) {
452     tm->tm_mday = atoi(s + pmatch[2].rm_so);
453     tm->tm_mon = month(s + pmatch[3].rm_so);
454     tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
455     tm->tm_hour = atoi(s + pmatch[5].rm_so);
456     tm->tm_min = atoi(s + pmatch[6].rm_so);
457     tm->tm_sec = atoi(s + pmatch[7].rm_so);
458 james 40 lookup("rfc1036");
459     return true;
460     }
461    
462     }
463    
464     lookup("baddate");
465     return false;
466     }
467    
468    
469     /**
470 james 41 * Convert a month name to the month number.
471     */
472     int month(const char *s)
473     {
474     switch (s[0]) {
475     case 'J':
476     switch (s[1]) {
477     case 'a':
478     return 0;
479     case 'u':
480     return s[2] == 'n' ? 5 : 6;
481     }
482     case 'F':
483     return 1;
484     case 'M':
485     return s[2] == 'r' ? 2 : 4;
486     case 'A':
487     return s[1] == 'p' ? 3 : 7;
488     case 'S':
489     return 8;
490     case 'O':
491     return 9;
492     case 'N':
493     return 10;
494     case 'D':
495     return 11;
496     }
497     return 0;
498     }
499    
500    
501     /**
502 james 42 * UTC version of mktime, from
503     * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
504     */
505     time_t mktime_from_utc(struct tm *t)
506     {
507     time_t tl, tb;
508     struct tm *tg;
509    
510     tl = mktime (t);
511     if (tl == -1)
512     {
513     t->tm_hour--;
514     tl = mktime (t);
515     if (tl == -1)
516     return -1; /* can't deal with output from strptime */
517     tl += 3600;
518     }
519     tg = gmtime (&tl);
520     tg->tm_isdst = 0;
521     tb = mktime (tg);
522     if (tb == -1)
523     {
524     tg->tm_hour--;
525     tb = mktime (tg);
526     if (tb == -1)
527     return -1; /* can't deal with output from gmtime */
528     tb += 3600;
529     }
530     return (tl - (tb - tl));
531     }
532    
533    
534     /**
535 james 40 * Skip optional LWS (linear white space) [2.2]
536     */
537     const char *skip_lws(const char *s)
538     {
539     if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
540     s += 2;
541     while (*s == ' ' || *s == '\t')
542     s++;
543     return s;
544     }
545    
546    
547     /**
548     * Parse a list of elements (#rule in [2.1]).
549     */
550     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
551     void (*callback)(const char *s, regmatch_t pmatch[]))
552     {
553     int r;
554     unsigned int items = 0;
555     regmatch_t pmatch[20];
556    
557     do {
558     r = regexec(preg, s, 20, pmatch, 0);
559     if (r) {
560     printf(" Failed to match list item %i\n", items + 1);
561     return false;
562     }
563    
564     if (callback)
565     callback(s, pmatch);
566     items++;
567    
568     s += pmatch[0].rm_eo;
569     s = skip_lws(s);
570     if (*s == 0)
571     break;
572     if (*s != ',') {
573     printf(" Expecting , after list item %i\n", items);
574     return false;
575     }
576     while (*s == ',')
577     s = skip_lws(s + 1);
578     } while (*s != 0);
579    
580     if (items < n || m < items) {
581     printf(" %i items in list, but there should be ", items);
582     if (m == UINT_MAX)
583     printf("at least %i\n", n);
584     else
585     printf("between %i and %i\n", n, m);
586     return false;
587     }
588    
589     return true;
590     }
591    
592    
593     /* Header-specific validation. */
594     void header_accept_ranges(const char *s)
595     {
596     if (strcmp(s, "bytes") == 0)
597     lookup("ok");
598     else if (strcmp(s, "none") == 0)
599     lookup("ok");
600     else
601     lookup("unknownrange");
602     }
603    
604     void header_age(const char *s)
605     {
606     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
607     lookup("badage");
608     else
609     lookup("ok");
610     }
611    
612     void header_allow(const char *s)
613     {
614     if (parse_list(s, &re_token, 0, UINT_MAX, 0))
615     lookup("ok");
616     else
617     lookup("badallow");
618     }
619    
620     void header_cache_control(const char *s)
621     {
622     if (parse_list(s, &re_token_value, 1, UINT_MAX,
623     header_cache_control_callback))
624     lookup("ok");
625     else
626     lookup("badcachecont");
627     }
628    
629     char cache_control_list[][20] = {
630     "max-age", "max-stale", "min-fresh", "must-revalidate",
631     "no-cache", "no-store", "no-transform", "only-if-cached",
632     "private", "proxy-revalidate", "public", "s-maxage"
633     };
634    
635     void header_cache_control_callback(const char *s, regmatch_t pmatch[])
636     {
637     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
638     char name[20];
639     char *dir;
640    
641     if (19 < len) {
642     lookup("unknowncachecont");
643     return;
644     }
645    
646     strncpy(name, s + pmatch[1].rm_so, len);
647     name[len] = 0;
648    
649     dir = bsearch(name, cache_control_list,
650     sizeof cache_control_list / sizeof cache_control_list[0],
651     sizeof cache_control_list[0],
652     (int (*)(const void *, const void *)) strcasecmp);
653    
654     if (!dir) {
655     printf(" Cache-Control directive '%s':\n", name);
656     lookup("unknowncachecont");
657     }
658     }
659    
660     void header_connection(const char *s)
661     {
662     if (strcmp(s, "close") == 0)
663     lookup("ok");
664     else
665     lookup("badconnection");
666     }
667    
668     void header_content_encoding(const char *s)
669     {
670     if (parse_list(s, &re_token, 1, UINT_MAX,
671     header_content_encoding_callback))
672     lookup("ok");
673     else
674     lookup("badcontenc");
675     }
676    
677     char content_coding_list[][20] = {
678     "compress", "deflate", "gzip", "identity"
679     };
680    
681     void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
682     {
683     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
684     char name[20];
685     char *dir;
686    
687     if (19 < len) {
688     lookup("unknowncontenc");
689     return;
690     }
691    
692     strncpy(name, s + pmatch[1].rm_so, len);
693     name[len] = 0;
694    
695     dir = bsearch(name, content_coding_list,
696     sizeof content_coding_list / sizeof content_coding_list[0],
697     sizeof content_coding_list[0],
698     (int (*)(const void *, const void *)) strcasecmp);
699     if (!dir) {
700     printf(" Content-Encoding '%s':\n", name);
701     lookup("unknowncontenc");
702     }
703     }
704    
705     void header_content_language(const char *s)
706     {
707     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
708     lookup("ok");
709     else
710     lookup("badcontlang");
711     }
712    
713     void header_content_length(const char *s)
714     {
715     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
716     lookup("badcontlen");
717     else
718     lookup("ok");
719     }
720    
721     void header_content_location(const char *s)
722     {
723     if (strchr(s, ' '))
724     lookup("badcontloc");
725     else
726     lookup("ok");
727     }
728    
729     void header_content_md5(const char *s)
730     {
731     if (strlen(s) != 24)
732     lookup("badcontmd5");
733     else
734     lookup("ok");
735     }
736    
737     void header_content_range(const char *s)
738     {
739     UNUSED(s);
740     lookup("contentrange");
741     }
742    
743     void header_content_type(const char *s)
744     {
745     bool charset = false;
746     char *type, *subtype;
747     unsigned int i;
748     int r;
749     regmatch_t pmatch[30];
750    
751     r = regexec(&re_content_type, s, 30, pmatch, 0);
752     if (r) {
753     lookup("badcontenttype");
754     return;
755     }
756    
757     type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
758     subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
759    
760     /* parameters */
761     for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
762     char *attrib, *value;
763    
764     attrib = strndup(s + pmatch[i + 1].rm_so,
765     pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
766     value = strndup(s + pmatch[i + 2].rm_so,
767     pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
768    
769     if (strcasecmp(attrib, "charset") == 0)
770     charset = true;
771     }
772    
773     if (strcasecmp(type, "text") == 0 && !charset)
774     lookup("nocharset");
775     else
776     lookup("ok");
777     }
778    
779     void header_date(const char *s)
780     {
781     double diff;
782     time_t time0, time1;
783     struct tm tm;
784    
785     time0 = time(0);
786     if (!parse_date(s, &tm))
787     return;
788 james 42 time1 = mktime_from_utc(&tm);
789 james 40
790     diff = difftime(time0, time1);
791     if (10 < fabs(diff))
792     lookup("wrongdate");
793     else
794     lookup("ok");
795     }
796    
797     void header_etag(const char *s)
798     {
799     int r;
800     r = regexec(&re_etag, s, 0, 0, 0);
801     if (r)
802     lookup("badetag");
803     else
804     lookup("ok");
805     }
806    
807     void header_expires(const char *s)
808     {
809     struct tm tm;
810     if (parse_date(s, &tm))
811     lookup("ok");
812     }
813    
814     void header_last_modified(const char *s)
815     {
816     double diff;
817     time_t time0, time1;
818     struct tm tm;
819    
820     time0 = time(0);
821     if (!parse_date(s, &tm))
822     return;
823 james 44 time1 = mktime_from_utc(&tm);
824 james 40
825     diff = difftime(time1, time0);
826     if (10 < diff)
827     lookup("futurelastmod");
828     else
829     lookup("ok");
830     }
831    
832     void header_location(const char *s)
833     {
834     int r;
835     r = regexec(&re_absolute_uri, s, 0, 0, 0);
836     if (r)
837     lookup("badlocation");
838     else
839     lookup("ok");
840     }
841    
842     void header_pragma(const char *s)
843     {
844     if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
845     lookup("ok");
846     else
847     lookup("badpragma");
848     }
849    
850     void header_retry_after(const char *s)
851     {
852     struct tm tm;
853    
854     if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
855     lookup("ok");
856     return;
857     }
858    
859     if (!parse_date(s, &tm))
860     return;
861    
862     lookup("ok");
863     }
864    
865     void header_server(const char *s)
866     {
867     int r;
868     r = regexec(&re_server, s, 0, 0, 0);
869     if (r)
870     lookup("badserver");
871     else
872     lookup("ok");
873     }
874    
875     void header_trailer(const char *s)
876     {
877     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
878     lookup("ok");
879     else
880     lookup("badtrailer");
881     }
882    
883     void header_transfer_encoding(const char *s)
884     {
885     if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
886     header_transfer_encoding_callback))
887     lookup("ok");
888     else
889     lookup("badtransenc");
890     }
891    
892     char transfer_coding_list[][20] = {
893     "chunked", "compress", "deflate", "gzip", "identity"
894     };
895    
896     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
897     {
898     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
899     char name[20];
900     char *dir;
901    
902     if (19 < len) {
903     lookup("unknowntransenc");
904     return;
905     }
906    
907     strncpy(name, s + pmatch[1].rm_so, len);
908     name[len] = 0;
909    
910     dir = bsearch(name, transfer_coding_list,
911     sizeof transfer_coding_list / sizeof transfer_coding_list[0],
912     sizeof transfer_coding_list[0],
913     (int (*)(const void *, const void *)) strcasecmp);
914     if (!dir) {
915     printf(" Transfer-Encoding '%s':\n", name);
916     lookup("unknowntransenc");
917     }
918     }
919    
920     void header_upgrade(const char *s)
921     {
922     int r;
923     r = regexec(&re_upgrade, s, 0, 0, 0);
924     if (r)
925     lookup("badupgrade");
926     else
927     lookup("ok");
928     }
929    
930     void header_vary(const char *s)
931     {
932     if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
933     lookup("ok");
934     else
935     lookup("badvary");
936     }
937    
938     void header_via(const char *s)
939     {
940     UNUSED(s);
941     lookup("via");
942     }
943    
944    
945     /**
946     * Print an error message and exit.
947     */
948     void die(const char *error)
949     {
950     fprintf(stderr, "httplint: %s\n", error);
951     exit(EXIT_FAILURE);
952     }
953    
954    
955     /**
956     * Print a warning message.
957     */
958     void warning(const char *message)
959     {
960     printf("Warning: %s\n", message);
961     }
962    
963    
964     /**
965     * Print an error message.
966     */
967     void error(const char *message)
968     {
969     printf("Error: %s\n", message);
970     }
971    
972    
973     /**
974     * Print a string which contains control characters.
975     */
976     void print(const char *s, size_t len)
977     {
978     size_t i;
979     for (i = 0; i != len; i++) {
980     if (31 < s[i] && s[i] < 127)
981     putchar(s[i]);
982     else
983     printf("[%.2x]", s[i]);
984     }
985     }
986    
987    
988     struct message_entry {
989     const char key[20];
990     const char *value;
991     } message_table[] = {
992     { "1xx", "A response status code in the range 100 - 199 indicates a "
993     "'provisional response'." },
994     { "2xx", "A response status code in the range 200 - 299 indicates that "
995     "the request was successful." },
996     { "3xx", "A response status code in the range 300 - 399 indicates that "
997     "the client should redirect to a new URL." },
998     { "4xx", "A response status code in the range 400 - 499 indicates that "
999     "the request could not be fulfilled due to client error." },
1000     { "5xx", "A response status code in the range 500 - 599 indicates that "
1001     "an error occurred on the server." },
1002     { "asctime", "Warning: This date is in the obsolete asctime() format. "
1003     "Consider using the RFC 1123 format instead." },
1004     { "badage", "Error: The Age header must be one number." },
1005     { "badallow", "Error: The Allow header must be a comma-separated list of "
1006     "HTTP methods." },
1007     { "badcachecont", "Error: The Cache-Control header must be a "
1008     "comma-separated list of directives." },
1009     { "badconnection", "Warning: The only value of the Connection header "
1010     "defined by HTTP/1.1 is \"close\"." },
1011     { "badcontenc", "Error: The Content-Encoding header must be a "
1012     "comma-separated list of encodings." },
1013     { "badcontenttype", "Error: The Content-Type header must be of the form "
1014     "'type/subtype (; optional parameters)'." },
1015     { "badcontlang", "Error: The Content-Language header must be a "
1016     "comma-separated list of language tags." },
1017     { "badcontlen", "Error: The Content-Length header must be a number." },
1018     { "badcontloc", "Error: The Content-Location header must be an absolute "
1019     "or relative URI." },
1020     { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1021     "MD5 sum." },
1022     { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1023     "1123 format." },
1024     { "badetag", "Error: The ETag header must be a quoted string (optionally "
1025     "preceded by \"W/\" for a weak tag)." },
1026     { "badlocation", "Error: The Location header must be an absolute URI. "
1027     "Relative URIs are not permitted." },
1028     { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1029     "directives." },
1030     { "badserver", "Error: The Server header must be a space-separated list of "
1031     "products of the form Name/optional-version and comments "
1032     "in ()." },
1033     { "badstatus", "Warning: The response status code is outside the standard "
1034     "range 100 - 599." },
1035     { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1036     "status line must be of the form 'HTTP/n.n <3-digit "
1037     "status> <reason phrase>'." },
1038     { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1039     "of header names." },
1040     { "badtransenc", "Error: The Transfer-Encoding header must be a "
1041     "comma-separated of encodings." },
1042     { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1043     "of product identifiers." },
1044     { "badvary", "Error: The Vary header must be a comma-separated list "
1045     "of header names, or \"*\"." },
1046     { "contentrange", "Warning: The Content-Range header should not be returned "
1047     "by the server for this request." },
1048     { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1049     "version of this tool." },
1050     { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1051     "the future." },
1052     { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1053     { "missingcontenttype", "Warning: No Content-Type header was present. The "
1054     "client will have to guess the media type or ask "
1055     "the user. Adding a Content-Type header is strongly "
1056     "recommended." },
1057     { "missingcontlang", "Consider adding a Content-Language header if "
1058     "applicable for this document." },
1059     { "missingdate", "Warning: No Date header was present. A Date header must "
1060     "be present, unless the server does not have a clock, or "
1061     "the response is 100, 101, or 500 - 599." },
1062     { "missinglastmod", "No Last-Modified header was present. The "
1063     "HTTP/1.1 specification states that this header should "
1064     "be sent whenever feasible." },
1065     { "nocharset", "Warning: No character set is specified in the Content-Type. "
1066     "Clients may assume the default of ISO-8859-1. Consider "
1067     "appending '; charset=...'." },
1068     { "nonstandard", "Warning: I don't know anything about this header. Is it "
1069     "a standard HTTP response header?" },
1070     { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1071     "that all header lines end with CR LF." },
1072     { "ok", "OK." },
1073     { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1074     "to HTTP/1.1." },
1075     { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1076     "Consider using the RFC 1123 format instead." },
1077     { "ugly", "This URL appears to contain implementation-specific parts such "
1078     "as an extension or a query string. This may make the URL liable "
1079     "to change when the implementation is changed, resulting in "
1080     "broken links. Consider using URL rewriting or equivalent to "
1081     "implement a future-proof URL space. See "
1082     "http://www.w3.org/Provider/Style/URI for more information." },
1083     { "unknowncachecont", "Warning: This Cache-Control directive is "
1084     "non-standard and will have limited support." },
1085     { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1086     { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1087     "range." },
1088     { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1089     { "via", "This header was added by a proxy, cache or gateway." },
1090     { "wrongdate", "Warning: The server date-time differs from this system's "
1091     "date-time by more than 10 seconds. Check that both the "
1092 james 44 "system clocks are correct." },
1093     { "xheader", "This is an extension header. I don't know how to check it." }
1094 james 40 };
1095    
1096    
1097     /**
1098     * Look up and output the string referenced by a key.
1099     */
1100     void lookup(const char *key)
1101     {
1102     const char *s, *spc;
1103     int x;
1104     struct message_entry *message;
1105    
1106     message = bsearch(key, message_table,
1107     sizeof message_table / sizeof message_table[0],
1108     sizeof message_table[0],
1109     (int (*)(const void *, const void *)) strcasecmp);
1110     if (message)
1111     s = message->value;
1112     else
1113     s = key;
1114    
1115     printf(" ");
1116     x = 4;
1117     while (*s) {
1118     spc = strchr(s, ' ');
1119     if (!spc)
1120     spc = s + strlen(s);
1121     if (75 < x + (spc - s)) {
1122     printf("\n ");
1123     x = 4;
1124     }
1125     x += spc - s + 1;
1126     printf("%.*s ", spc - s, s);
1127     if (*spc)
1128     s = spc + 1;
1129     else
1130     s = spc;
1131     }
1132     printf("\n\n");
1133     }

  ViewVC Help
Powered by ViewVC 1.1.26