/[james]/httplint/httplint.c
ViewVC logotype

Annotation of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 43 - (hide annotations) (download) (as text)
Wed Dec 17 21:54:46 2003 UTC (20 years, 11 months ago) by james
File MIME type: text/x-csrc
File size: 29777 byte(s)
Fix range in regexp.

1 james 40 /*
2     * HTTP Header Lint
3     * Licensed under the same license as Curl
4     * http://curl.haxx.se/docs/copyright.html
5     * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
6     */
7    
8     /*
9     * Compile using
10     * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11     *
12     * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13     */
14    
15     #define _GNU_SOURCE
16     #define __USE_XOPEN
17    
18     #include <limits.h>
19     #include <math.h>
20     #include <stdbool.h>
21     #include <stdio.h>
22     #include <stdlib.h>
23     #include <string.h>
24     #include <time.h>
25     #include <sys/types.h>
26     #include <regex.h>
27     #include <curl/curl.h>
28    
29    
30     #define NUMBER "0123456789"
31     #define UNUSED(x) x = x
32    
33    
34     bool start;
35     CURL *curl;
36     int status_code;
37     char error_buffer[CURL_ERROR_SIZE];
38     regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
39 james 41 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
40     re_rfc1123, re_rfc1036, re_asctime;
41 james 40
42    
43     void init(void);
44     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
45     void check_url(const char *url);
46     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
47     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
48     void check_status_line(const char *s);
49     void check_header(const char *name, const char *value);
50     bool parse_date(const char *s, struct tm *tm);
51 james 41 int month(const char *s);
52 james 42 time_t mktime_from_utc(struct tm *t);
53 james 40 const char *skip_lws(const char *s);
54     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
55     void (*callback)(const char *s, regmatch_t pmatch[]));
56     void header_accept_ranges(const char *s);
57     void header_age(const char *s);
58     void header_allow(const char *s);
59     void header_cache_control(const char *s);
60     void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
61     void header_connection(const char *s);
62     void header_content_encoding(const char *s);
63     void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
64     void header_content_language(const char *s);
65     void header_content_length(const char *s);
66     void header_content_location(const char *s);
67     void header_content_md5(const char *s);
68     void header_content_range(const char *s);
69     void header_content_type(const char *s);
70     void header_date(const char *s);
71     void header_etag(const char *s);
72     void header_expires(const char *s);
73     void header_last_modified(const char *s);
74     void header_location(const char *s);
75     void header_pragma(const char *s);
76     void header_retry_after(const char *s);
77     void header_server(const char *s);
78     void header_trailer(const char *s);
79     void header_transfer_encoding(const char *s);
80     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
81     void header_upgrade(const char *s);
82     void header_vary(const char *s);
83     void header_via(const char *s);
84     void die(const char *error);
85     void warning(const char *message);
86     void error(const char *message);
87     void print(const char *s, size_t len);
88     void lookup(const char *key);
89    
90    
91     struct header_entry {
92     char name[40];
93     void (*handler)(const char *s);
94     int count;
95     char *missing;
96     } header_table[] = {
97     { "Accept-Ranges", header_accept_ranges, 0, 0 },
98     { "Age", header_age, 0, 0 },
99     { "Allow", header_allow, 0, 0 },
100     { "Cache-Control", header_cache_control, 0, 0 },
101     { "Connection", header_connection, 0, 0 },
102     { "Content-Encoding", header_content_encoding, 0, 0 },
103     { "Content-Language", header_content_language, 0, "missingcontlang" },
104     { "Content-Length", header_content_length, 0, 0 },
105     { "Content-Location", header_content_location, 0, 0 },
106     { "Content-MD5", header_content_md5, 0, 0 },
107     { "Content-Range", header_content_range, 0, 0 },
108     { "Content-Type", header_content_type, 0, "missingcontenttype" },
109     { "Date", header_date, 0, "missingdate" },
110     { "ETag", header_etag, 0, 0 },
111     { "Expires", header_expires, 0, 0 },
112     { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113     { "Location", header_location, 0, 0 },
114     { "Pragma", header_pragma, 0, 0 },
115     { "Retry-After", header_retry_after, 0, 0 },
116     { "Server", header_server, 0, 0 },
117     { "Trailer", header_trailer, 0, 0 },
118     { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
119     { "Upgrade", header_upgrade, 0, 0 },
120     { "Vary", header_vary, 0, 0 },
121     { "Via", header_via, 0, 0 }
122     };
123    
124    
125     /**
126     * Main entry point.
127     */
128     int main(int argc, char *argv[])
129     {
130     int i;
131    
132     if (argc < 2)
133     die("Usage: httplint url [url ...]");
134    
135     init();
136    
137     for (i = 1; i != argc; i++)
138     check_url(argv[i]);
139    
140     curl_global_cleanup();
141    
142     return 0;
143     }
144    
145    
146     /**
147     * Initialise the curl handle and compile regular expressions.
148     */
149     void init(void)
150     {
151     struct curl_slist *request_headers = 0;
152    
153     if (curl_global_init(CURL_GLOBAL_ALL))
154     die("Failed to initialise libcurl");
155    
156     curl = curl_easy_init();
157     if (!curl)
158     die("Failed to create curl handle");
159    
160     if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
161     die("Failed to set curl options");
162     if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
163     die("Failed to set curl options");
164     if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
165     die("Failed to set curl options");
166     if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
167     die("Failed to set curl options");
168    
169     /* remove libcurl default headers */
170     request_headers = curl_slist_append(request_headers, "Accept:");
171     request_headers = curl_slist_append(request_headers, "Pragma:");
172     if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
173     die("Failed to set curl options");
174    
175     /* compile regular expressions */
176     regcomp_wrapper(&re_status_line,
177     "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
178     REG_EXTENDED);
179     regcomp_wrapper(&re_token,
180     "^([-0-9a-zA-Z_.]+)",
181     REG_EXTENDED);
182     regcomp_wrapper(&re_token_value,
183     "^([-0-9a-zA-Z_.]+)(=([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\"))?",
184     REG_EXTENDED);
185     regcomp_wrapper(&re_content_type,
186     "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
187     "(;[ \t]*([-0-9a-zA-Z_.]+)="
188     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
189     REG_EXTENDED);
190     regcomp_wrapper(&re_absolute_uri,
191     "^[a-zA-Z0-9]+://[^ ]+$",
192     REG_EXTENDED);
193     regcomp_wrapper(&re_etag,
194     "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
195     REG_EXTENDED);
196     regcomp_wrapper(&re_server,
197     "^((([-0-9a-zA-Z_.]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
198     REG_EXTENDED);
199     regcomp_wrapper(&re_transfer_coding,
200     "^([-0-9a-zA-Z_.]+)[ \t]*"
201     "(;[ \t]*([-0-9a-zA-Z_.]+)="
202     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
203     REG_EXTENDED);
204     regcomp_wrapper(&re_upgrade,
205     "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
206     REG_EXTENDED);
207     regcomp_wrapper(&re_ugly,
208 james 43 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
209 james 40 REG_EXTENDED);
210 james 41 regcomp_wrapper(&re_rfc1123,
211     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
212     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
213     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
214     REG_EXTENDED);
215     regcomp_wrapper(&re_rfc1036,
216     "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
217     "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
218     "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
219     REG_EXTENDED);
220     regcomp_wrapper(&re_asctime,
221     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
222     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
223     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
224     REG_EXTENDED);
225 james 40 }
226    
227    
228     /**
229     * Compile a regular expression, handling errors.
230     */
231     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
232     {
233     char errbuf[200];
234     int r;
235     r = regcomp(preg, regex, cflags);
236     if (r) {
237     regerror(r, preg, errbuf, sizeof errbuf);
238     fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
239     die(errbuf);
240     }
241     }
242    
243    
244     /**
245     * Fetch and check the headers for the specified url.
246     */
247     void check_url(const char *url)
248     {
249     int i, r;
250     CURLcode code;
251    
252     start = true;
253     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
254     header_table[i].count = 0;
255    
256     printf("Checking URL %s\n", url);
257     if (strncmp(url, "http", 4))
258     warning("this is not an http or https url");
259    
260     if (curl_easy_setopt(curl, CURLOPT_URL, url))
261     die("Failed to set curl options");
262    
263     code = curl_easy_perform(curl);
264     if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
265     error(error_buffer);
266     return;
267     } else {
268     printf("\n");
269     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
270     if (header_table[i].count == 0 && header_table[i].missing)
271     lookup(header_table[i].missing);
272     }
273     }
274    
275     r = regexec(&re_ugly, url, 0, 0, 0);
276     if (r)
277     lookup("ugly");
278     }
279    
280    
281     /**
282     * Callback for received header data.
283     */
284     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
285     {
286     const size_t size = msize * nmemb;
287     char s[400], *name, *value;
288    
289     UNUSED(stream);
290    
291     printf("* ");
292     print(ptr, size);
293     printf("\n");
294    
295     if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
296     lookup("notcrlf");
297     return size;
298     }
299     if (sizeof s <= size) {
300     warning("header too long: ignored\n");
301     return size;
302     }
303     strncpy(s, ptr, size);
304     s[size - 2] = 0;
305    
306     name = s;
307     value = strchr(s, ':');
308    
309     if (s[0] == 0) {
310     /* empty header indicates end of headers */
311     puts("End of headers.");
312     return 0;
313    
314     } else if (start) {
315     /* Status-Line [6.1] */
316     check_status_line(s);
317     start = false;
318    
319     } else if (!value) {
320     lookup("missingcolon");
321    
322     } else {
323     *value = 0;
324     value++;
325    
326     check_header(name, skip_lws(value));
327     }
328    
329     return size;
330     }
331    
332    
333     /**
334     * Callback for received body data.
335     *
336     * We are not interested in the body, so abort the fetch by returning 0.
337     */
338     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
339     {
340     UNUSED(ptr);
341     UNUSED(size);
342     UNUSED(nmemb);
343     UNUSED(stream);
344    
345     return 0;
346     }
347    
348    
349     /**
350     * Check the syntax and content of the response Status-Line [6.1].
351     */
352     void check_status_line(const char *s)
353     {
354     const char *reason;
355     unsigned int major = 0, minor = 0;
356     int r;
357     regmatch_t pmatch[5];
358    
359     r = regexec(&re_status_line, s, 5, pmatch, 0);
360     if (r) {
361     lookup("badstatusline");
362     return;
363     }
364    
365     major = atoi(s + pmatch[1].rm_so);
366     minor = atoi(s + pmatch[2].rm_so);
367     status_code = atoi(s + pmatch[3].rm_so);
368     reason = s + pmatch[4].rm_so;
369    
370     if (major < 1 || (major == 1 && minor == 0)) {
371     lookup("oldhttp");
372     } else if ((major == 1 && 1 < minor) || 1 < major) {
373     lookup("futurehttp");
374     } else {
375     if (status_code < 100 || 600 <= status_code) {
376     lookup("badstatus");
377     } else {
378     char key[] = "xxx";
379     key[0] = '0' + status_code / 100;
380     lookup(key);
381     }
382     }
383     }
384    
385    
386     /**
387     * Check the syntax and content of a header.
388     */
389     void check_header(const char *name, const char *value)
390     {
391     struct header_entry *header;
392    
393     header = bsearch(name, header_table,
394     sizeof header_table / sizeof header_table[0],
395     sizeof header_table[0],
396     (int (*)(const void *, const void *)) strcasecmp);
397    
398     if (header) {
399     header->count++;
400     header->handler(value);
401     } else
402     lookup("nonstandard");
403     }
404    
405    
406     /**
407     * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
408     */
409     bool parse_date(const char *s, struct tm *tm)
410     {
411 james 41 int r;
412 james 40 int len = strlen(s);
413 james 41 regmatch_t pmatch[20];
414 james 40
415     if (len == 29) {
416     /* RFC 1123 */
417 james 41 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
418     if (r == 0) {
419     tm->tm_mday = atoi(s + pmatch[2].rm_so);
420     tm->tm_mon = month(s + pmatch[3].rm_so);
421     tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
422     tm->tm_hour = atoi(s + pmatch[5].rm_so);
423     tm->tm_min = atoi(s + pmatch[6].rm_so);
424     tm->tm_sec = atoi(s + pmatch[7].rm_so);
425 james 40 return true;
426 james 41 }
427 james 40
428     } else if (len == 24) {
429     /* asctime() format */
430 james 41 r = regexec(&re_asctime, s, 20, pmatch, 0);
431     if (r == 0) {
432     if (s[pmatch[3].rm_so] == ' ')
433     tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
434     else
435     tm->tm_mday = atoi(s + pmatch[3].rm_so);
436     tm->tm_mon = month(s + pmatch[2].rm_so);
437     tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
438     tm->tm_hour = atoi(s + pmatch[4].rm_so);
439     tm->tm_min = atoi(s + pmatch[5].rm_so);
440     tm->tm_sec = atoi(s + pmatch[6].rm_so);
441 james 40 lookup("asctime");
442     return true;
443     }
444    
445     } else {
446     /* RFC 1036 */
447 james 41 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
448     if (r == 0) {
449     tm->tm_mday = atoi(s + pmatch[2].rm_so);
450     tm->tm_mon = month(s + pmatch[3].rm_so);
451     tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
452     tm->tm_hour = atoi(s + pmatch[5].rm_so);
453     tm->tm_min = atoi(s + pmatch[6].rm_so);
454     tm->tm_sec = atoi(s + pmatch[7].rm_so);
455 james 40 lookup("rfc1036");
456     return true;
457     }
458    
459     }
460    
461     lookup("baddate");
462     return false;
463     }
464    
465    
466     /**
467 james 41 * Convert a month name to the month number.
468     */
469     int month(const char *s)
470     {
471     switch (s[0]) {
472     case 'J':
473     switch (s[1]) {
474     case 'a':
475     return 0;
476     case 'u':
477     return s[2] == 'n' ? 5 : 6;
478     }
479     case 'F':
480     return 1;
481     case 'M':
482     return s[2] == 'r' ? 2 : 4;
483     case 'A':
484     return s[1] == 'p' ? 3 : 7;
485     case 'S':
486     return 8;
487     case 'O':
488     return 9;
489     case 'N':
490     return 10;
491     case 'D':
492     return 11;
493     }
494     return 0;
495     }
496    
497    
498     /**
499 james 42 * UTC version of mktime, from
500     * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
501     */
502     time_t mktime_from_utc(struct tm *t)
503     {
504     time_t tl, tb;
505     struct tm *tg;
506    
507     tl = mktime (t);
508     if (tl == -1)
509     {
510     t->tm_hour--;
511     tl = mktime (t);
512     if (tl == -1)
513     return -1; /* can't deal with output from strptime */
514     tl += 3600;
515     }
516     tg = gmtime (&tl);
517     tg->tm_isdst = 0;
518     tb = mktime (tg);
519     if (tb == -1)
520     {
521     tg->tm_hour--;
522     tb = mktime (tg);
523     if (tb == -1)
524     return -1; /* can't deal with output from gmtime */
525     tb += 3600;
526     }
527     return (tl - (tb - tl));
528     }
529    
530    
531     /**
532 james 40 * Skip optional LWS (linear white space) [2.2]
533     */
534     const char *skip_lws(const char *s)
535     {
536     if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
537     s += 2;
538     while (*s == ' ' || *s == '\t')
539     s++;
540     return s;
541     }
542    
543    
544     /**
545     * Parse a list of elements (#rule in [2.1]).
546     */
547     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
548     void (*callback)(const char *s, regmatch_t pmatch[]))
549     {
550     int r;
551     unsigned int items = 0;
552     regmatch_t pmatch[20];
553    
554     do {
555     r = regexec(preg, s, 20, pmatch, 0);
556     if (r) {
557     printf(" Failed to match list item %i\n", items + 1);
558     return false;
559     }
560    
561     if (callback)
562     callback(s, pmatch);
563     items++;
564    
565     s += pmatch[0].rm_eo;
566     s = skip_lws(s);
567     if (*s == 0)
568     break;
569     if (*s != ',') {
570     printf(" Expecting , after list item %i\n", items);
571     return false;
572     }
573     while (*s == ',')
574     s = skip_lws(s + 1);
575     } while (*s != 0);
576    
577     if (items < n || m < items) {
578     printf(" %i items in list, but there should be ", items);
579     if (m == UINT_MAX)
580     printf("at least %i\n", n);
581     else
582     printf("between %i and %i\n", n, m);
583     return false;
584     }
585    
586     return true;
587     }
588    
589    
590     /* Header-specific validation. */
591     void header_accept_ranges(const char *s)
592     {
593     if (strcmp(s, "bytes") == 0)
594     lookup("ok");
595     else if (strcmp(s, "none") == 0)
596     lookup("ok");
597     else
598     lookup("unknownrange");
599     }
600    
601     void header_age(const char *s)
602     {
603     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
604     lookup("badage");
605     else
606     lookup("ok");
607     }
608    
609     void header_allow(const char *s)
610     {
611     if (parse_list(s, &re_token, 0, UINT_MAX, 0))
612     lookup("ok");
613     else
614     lookup("badallow");
615     }
616    
617     void header_cache_control(const char *s)
618     {
619     if (parse_list(s, &re_token_value, 1, UINT_MAX,
620     header_cache_control_callback))
621     lookup("ok");
622     else
623     lookup("badcachecont");
624     }
625    
626     char cache_control_list[][20] = {
627     "max-age", "max-stale", "min-fresh", "must-revalidate",
628     "no-cache", "no-store", "no-transform", "only-if-cached",
629     "private", "proxy-revalidate", "public", "s-maxage"
630     };
631    
632     void header_cache_control_callback(const char *s, regmatch_t pmatch[])
633     {
634     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
635     char name[20];
636     char *dir;
637    
638     if (19 < len) {
639     lookup("unknowncachecont");
640     return;
641     }
642    
643     strncpy(name, s + pmatch[1].rm_so, len);
644     name[len] = 0;
645    
646     dir = bsearch(name, cache_control_list,
647     sizeof cache_control_list / sizeof cache_control_list[0],
648     sizeof cache_control_list[0],
649     (int (*)(const void *, const void *)) strcasecmp);
650    
651     if (!dir) {
652     printf(" Cache-Control directive '%s':\n", name);
653     lookup("unknowncachecont");
654     }
655     }
656    
657     void header_connection(const char *s)
658     {
659     if (strcmp(s, "close") == 0)
660     lookup("ok");
661     else
662     lookup("badconnection");
663     }
664    
665     void header_content_encoding(const char *s)
666     {
667     if (parse_list(s, &re_token, 1, UINT_MAX,
668     header_content_encoding_callback))
669     lookup("ok");
670     else
671     lookup("badcontenc");
672     }
673    
674     char content_coding_list[][20] = {
675     "compress", "deflate", "gzip", "identity"
676     };
677    
678     void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
679     {
680     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
681     char name[20];
682     char *dir;
683    
684     if (19 < len) {
685     lookup("unknowncontenc");
686     return;
687     }
688    
689     strncpy(name, s + pmatch[1].rm_so, len);
690     name[len] = 0;
691    
692     dir = bsearch(name, content_coding_list,
693     sizeof content_coding_list / sizeof content_coding_list[0],
694     sizeof content_coding_list[0],
695     (int (*)(const void *, const void *)) strcasecmp);
696     if (!dir) {
697     printf(" Content-Encoding '%s':\n", name);
698     lookup("unknowncontenc");
699     }
700     }
701    
702     void header_content_language(const char *s)
703     {
704     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
705     lookup("ok");
706     else
707     lookup("badcontlang");
708     }
709    
710     void header_content_length(const char *s)
711     {
712     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
713     lookup("badcontlen");
714     else
715     lookup("ok");
716     }
717    
718     void header_content_location(const char *s)
719     {
720     if (strchr(s, ' '))
721     lookup("badcontloc");
722     else
723     lookup("ok");
724     }
725    
726     void header_content_md5(const char *s)
727     {
728     if (strlen(s) != 24)
729     lookup("badcontmd5");
730     else
731     lookup("ok");
732     }
733    
734     void header_content_range(const char *s)
735     {
736     UNUSED(s);
737     lookup("contentrange");
738     }
739    
740     void header_content_type(const char *s)
741     {
742     bool charset = false;
743     char *type, *subtype;
744     unsigned int i;
745     int r;
746     regmatch_t pmatch[30];
747    
748     r = regexec(&re_content_type, s, 30, pmatch, 0);
749     if (r) {
750     lookup("badcontenttype");
751     return;
752     }
753    
754     type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
755     subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
756    
757     /* parameters */
758     for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
759     char *attrib, *value;
760    
761     attrib = strndup(s + pmatch[i + 1].rm_so,
762     pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
763     value = strndup(s + pmatch[i + 2].rm_so,
764     pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
765    
766     if (strcasecmp(attrib, "charset") == 0)
767     charset = true;
768     }
769    
770     if (strcasecmp(type, "text") == 0 && !charset)
771     lookup("nocharset");
772     else
773     lookup("ok");
774     }
775    
776     void header_date(const char *s)
777     {
778     double diff;
779     time_t time0, time1;
780     struct tm tm;
781    
782     time0 = time(0);
783     if (!parse_date(s, &tm))
784     return;
785 james 42 time1 = mktime_from_utc(&tm);
786 james 40
787     diff = difftime(time0, time1);
788     if (10 < fabs(diff))
789     lookup("wrongdate");
790     else
791     lookup("ok");
792     }
793    
794     void header_etag(const char *s)
795     {
796     int r;
797     r = regexec(&re_etag, s, 0, 0, 0);
798     if (r)
799     lookup("badetag");
800     else
801     lookup("ok");
802     }
803    
804     void header_expires(const char *s)
805     {
806     struct tm tm;
807     if (parse_date(s, &tm))
808     lookup("ok");
809     }
810    
811     void header_last_modified(const char *s)
812     {
813     double diff;
814     time_t time0, time1;
815     struct tm tm;
816    
817     time0 = time(0);
818     if (!parse_date(s, &tm))
819     return;
820     time1 = mktime(&tm);
821    
822     diff = difftime(time1, time0);
823     if (10 < diff)
824     lookup("futurelastmod");
825     else
826     lookup("ok");
827     }
828    
829     void header_location(const char *s)
830     {
831     int r;
832     r = regexec(&re_absolute_uri, s, 0, 0, 0);
833     if (r)
834     lookup("badlocation");
835     else
836     lookup("ok");
837     }
838    
839     void header_pragma(const char *s)
840     {
841     if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
842     lookup("ok");
843     else
844     lookup("badpragma");
845     }
846    
847     void header_retry_after(const char *s)
848     {
849     struct tm tm;
850    
851     if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
852     lookup("ok");
853     return;
854     }
855    
856     if (!parse_date(s, &tm))
857     return;
858    
859     lookup("ok");
860     }
861    
862     void header_server(const char *s)
863     {
864     int r;
865     r = regexec(&re_server, s, 0, 0, 0);
866     if (r)
867     lookup("badserver");
868     else
869     lookup("ok");
870     }
871    
872     void header_trailer(const char *s)
873     {
874     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
875     lookup("ok");
876     else
877     lookup("badtrailer");
878     }
879    
880     void header_transfer_encoding(const char *s)
881     {
882     if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
883     header_transfer_encoding_callback))
884     lookup("ok");
885     else
886     lookup("badtransenc");
887     }
888    
889     char transfer_coding_list[][20] = {
890     "chunked", "compress", "deflate", "gzip", "identity"
891     };
892    
893     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
894     {
895     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
896     char name[20];
897     char *dir;
898    
899     if (19 < len) {
900     lookup("unknowntransenc");
901     return;
902     }
903    
904     strncpy(name, s + pmatch[1].rm_so, len);
905     name[len] = 0;
906    
907     dir = bsearch(name, transfer_coding_list,
908     sizeof transfer_coding_list / sizeof transfer_coding_list[0],
909     sizeof transfer_coding_list[0],
910     (int (*)(const void *, const void *)) strcasecmp);
911     if (!dir) {
912     printf(" Transfer-Encoding '%s':\n", name);
913     lookup("unknowntransenc");
914     }
915     }
916    
917     void header_upgrade(const char *s)
918     {
919     int r;
920     r = regexec(&re_upgrade, s, 0, 0, 0);
921     if (r)
922     lookup("badupgrade");
923     else
924     lookup("ok");
925     }
926    
927     void header_vary(const char *s)
928     {
929     if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
930     lookup("ok");
931     else
932     lookup("badvary");
933     }
934    
935     void header_via(const char *s)
936     {
937     UNUSED(s);
938     lookup("via");
939     }
940    
941    
942     /**
943     * Print an error message and exit.
944     */
945     void die(const char *error)
946     {
947     fprintf(stderr, "httplint: %s\n", error);
948     exit(EXIT_FAILURE);
949     }
950    
951    
952     /**
953     * Print a warning message.
954     */
955     void warning(const char *message)
956     {
957     printf("Warning: %s\n", message);
958     }
959    
960    
961     /**
962     * Print an error message.
963     */
964     void error(const char *message)
965     {
966     printf("Error: %s\n", message);
967     }
968    
969    
970     /**
971     * Print a string which contains control characters.
972     */
973     void print(const char *s, size_t len)
974     {
975     size_t i;
976     for (i = 0; i != len; i++) {
977     if (31 < s[i] && s[i] < 127)
978     putchar(s[i]);
979     else
980     printf("[%.2x]", s[i]);
981     }
982     }
983    
984    
985     struct message_entry {
986     const char key[20];
987     const char *value;
988     } message_table[] = {
989     { "1xx", "A response status code in the range 100 - 199 indicates a "
990     "'provisional response'." },
991     { "2xx", "A response status code in the range 200 - 299 indicates that "
992     "the request was successful." },
993     { "3xx", "A response status code in the range 300 - 399 indicates that "
994     "the client should redirect to a new URL." },
995     { "4xx", "A response status code in the range 400 - 499 indicates that "
996     "the request could not be fulfilled due to client error." },
997     { "5xx", "A response status code in the range 500 - 599 indicates that "
998     "an error occurred on the server." },
999     { "asctime", "Warning: This date is in the obsolete asctime() format. "
1000     "Consider using the RFC 1123 format instead." },
1001     { "badage", "Error: The Age header must be one number." },
1002     { "badallow", "Error: The Allow header must be a comma-separated list of "
1003     "HTTP methods." },
1004     { "badcachecont", "Error: The Cache-Control header must be a "
1005     "comma-separated list of directives." },
1006     { "badconnection", "Warning: The only value of the Connection header "
1007     "defined by HTTP/1.1 is \"close\"." },
1008     { "badcontenc", "Error: The Content-Encoding header must be a "
1009     "comma-separated list of encodings." },
1010     { "badcontenttype", "Error: The Content-Type header must be of the form "
1011     "'type/subtype (; optional parameters)'." },
1012     { "badcontlang", "Error: The Content-Language header must be a "
1013     "comma-separated list of language tags." },
1014     { "badcontlen", "Error: The Content-Length header must be a number." },
1015     { "badcontloc", "Error: The Content-Location header must be an absolute "
1016     "or relative URI." },
1017     { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1018     "MD5 sum." },
1019     { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1020     "1123 format." },
1021     { "badetag", "Error: The ETag header must be a quoted string (optionally "
1022     "preceded by \"W/\" for a weak tag)." },
1023     { "badlocation", "Error: The Location header must be an absolute URI. "
1024     "Relative URIs are not permitted." },
1025     { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1026     "directives." },
1027     { "badserver", "Error: The Server header must be a space-separated list of "
1028     "products of the form Name/optional-version and comments "
1029     "in ()." },
1030     { "badstatus", "Warning: The response status code is outside the standard "
1031     "range 100 - 599." },
1032     { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1033     "status line must be of the form 'HTTP/n.n <3-digit "
1034     "status> <reason phrase>'." },
1035     { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1036     "of header names." },
1037     { "badtransenc", "Error: The Transfer-Encoding header must be a "
1038     "comma-separated of encodings." },
1039     { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1040     "of product identifiers." },
1041     { "badvary", "Error: The Vary header must be a comma-separated list "
1042     "of header names, or \"*\"." },
1043     { "contentrange", "Warning: The Content-Range header should not be returned "
1044     "by the server for this request." },
1045     { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1046     "version of this tool." },
1047     { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1048     "the future." },
1049     { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1050     { "missingcontenttype", "Warning: No Content-Type header was present. The "
1051     "client will have to guess the media type or ask "
1052     "the user. Adding a Content-Type header is strongly "
1053     "recommended." },
1054     { "missingcontlang", "Consider adding a Content-Language header if "
1055     "applicable for this document." },
1056     { "missingdate", "Warning: No Date header was present. A Date header must "
1057     "be present, unless the server does not have a clock, or "
1058     "the response is 100, 101, or 500 - 599." },
1059     { "missinglastmod", "No Last-Modified header was present. The "
1060     "HTTP/1.1 specification states that this header should "
1061     "be sent whenever feasible." },
1062     { "nocharset", "Warning: No character set is specified in the Content-Type. "
1063     "Clients may assume the default of ISO-8859-1. Consider "
1064     "appending '; charset=...'." },
1065     { "nonstandard", "Warning: I don't know anything about this header. Is it "
1066     "a standard HTTP response header?" },
1067     { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1068     "that all header lines end with CR LF." },
1069     { "ok", "OK." },
1070     { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1071     "to HTTP/1.1." },
1072     { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1073     "Consider using the RFC 1123 format instead." },
1074     { "ugly", "This URL appears to contain implementation-specific parts such "
1075     "as an extension or a query string. This may make the URL liable "
1076     "to change when the implementation is changed, resulting in "
1077     "broken links. Consider using URL rewriting or equivalent to "
1078     "implement a future-proof URL space. See "
1079     "http://www.w3.org/Provider/Style/URI for more information." },
1080     { "unknowncachecont", "Warning: This Cache-Control directive is "
1081     "non-standard and will have limited support." },
1082     { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1083     { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1084     "range." },
1085     { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1086     { "via", "This header was added by a proxy, cache or gateway." },
1087     { "wrongdate", "Warning: The server date-time differs from this system's "
1088     "date-time by more than 10 seconds. Check that both the "
1089     "system clocks are correct." }
1090     };
1091    
1092    
1093     /**
1094     * Look up and output the string referenced by a key.
1095     */
1096     void lookup(const char *key)
1097     {
1098     const char *s, *spc;
1099     int x;
1100     struct message_entry *message;
1101    
1102     message = bsearch(key, message_table,
1103     sizeof message_table / sizeof message_table[0],
1104     sizeof message_table[0],
1105     (int (*)(const void *, const void *)) strcasecmp);
1106     if (message)
1107     s = message->value;
1108     else
1109     s = key;
1110    
1111     printf(" ");
1112     x = 4;
1113     while (*s) {
1114     spc = strchr(s, ' ');
1115     if (!spc)
1116     spc = s + strlen(s);
1117     if (75 < x + (spc - s)) {
1118     printf("\n ");
1119     x = 4;
1120     }
1121     x += spc - s + 1;
1122     printf("%.*s ", spc - s, s);
1123     if (*spc)
1124     s = spc + 1;
1125     else
1126     s = spc;
1127     }
1128     printf("\n\n");
1129     }

  ViewVC Help
Powered by ViewVC 1.1.26