/[james]/httplint/httplint.c
ViewVC logotype

Annotation of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 56 - (hide annotations) (download) (as text)
Fri Mar 12 21:54:02 2004 UTC (20 years, 8 months ago) by james
File MIME type: text/x-csrc
File size: 35086 byte(s)
Implement HTML output.

1 james 40 /*
2     * HTTP Header Lint
3 james 50 * Licensed under the MIT License
4     * http://www.opensource.org/licenses/mit-license
5     * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
6 james 40 */
7    
8     /*
9     * Compile using
10     * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11     *
12     * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13     */
14    
15     #define _GNU_SOURCE
16     #define __USE_XOPEN
17    
18     #include <limits.h>
19     #include <math.h>
20     #include <stdbool.h>
21     #include <stdio.h>
22     #include <stdlib.h>
23     #include <string.h>
24     #include <time.h>
25     #include <sys/types.h>
26     #include <regex.h>
27     #include <curl/curl.h>
28    
29    
30     #define NUMBER "0123456789"
31     #define UNUSED(x) x = x
32    
33    
34     bool start;
35 james 56 bool html = false;
36 james 40 CURL *curl;
37     int status_code;
38     char error_buffer[CURL_ERROR_SIZE];
39     regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
40 james 41 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
41 james 48 re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires;
42 james 40
43    
44     void init(void);
45     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
46     void check_url(const char *url);
47     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
48     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
49     void check_status_line(const char *s);
50     void check_header(const char *name, const char *value);
51     bool parse_date(const char *s, struct tm *tm);
52 james 41 int month(const char *s);
53 james 42 time_t mktime_from_utc(struct tm *t);
54 james 40 const char *skip_lws(const char *s);
55     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
56     void (*callback)(const char *s, regmatch_t pmatch[]));
57     void header_accept_ranges(const char *s);
58     void header_age(const char *s);
59     void header_allow(const char *s);
60     void header_cache_control(const char *s);
61     void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
62     void header_connection(const char *s);
63     void header_content_encoding(const char *s);
64     void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
65     void header_content_language(const char *s);
66     void header_content_length(const char *s);
67     void header_content_location(const char *s);
68     void header_content_md5(const char *s);
69     void header_content_range(const char *s);
70     void header_content_type(const char *s);
71     void header_date(const char *s);
72     void header_etag(const char *s);
73     void header_expires(const char *s);
74     void header_last_modified(const char *s);
75     void header_location(const char *s);
76     void header_pragma(const char *s);
77     void header_retry_after(const char *s);
78     void header_server(const char *s);
79     void header_trailer(const char *s);
80     void header_transfer_encoding(const char *s);
81     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
82     void header_upgrade(const char *s);
83     void header_vary(const char *s);
84     void header_via(const char *s);
85 james 48 void header_set_cookie(const char *s);
86 james 40 void die(const char *error);
87     void print(const char *s, size_t len);
88     void lookup(const char *key);
89    
90    
91     struct header_entry {
92     char name[40];
93     void (*handler)(const char *s);
94     int count;
95     char *missing;
96     } header_table[] = {
97     { "Accept-Ranges", header_accept_ranges, 0, 0 },
98     { "Age", header_age, 0, 0 },
99     { "Allow", header_allow, 0, 0 },
100     { "Cache-Control", header_cache_control, 0, 0 },
101     { "Connection", header_connection, 0, 0 },
102     { "Content-Encoding", header_content_encoding, 0, 0 },
103     { "Content-Language", header_content_language, 0, "missingcontlang" },
104     { "Content-Length", header_content_length, 0, 0 },
105     { "Content-Location", header_content_location, 0, 0 },
106     { "Content-MD5", header_content_md5, 0, 0 },
107     { "Content-Range", header_content_range, 0, 0 },
108     { "Content-Type", header_content_type, 0, "missingcontenttype" },
109     { "Date", header_date, 0, "missingdate" },
110     { "ETag", header_etag, 0, 0 },
111     { "Expires", header_expires, 0, 0 },
112     { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113     { "Location", header_location, 0, 0 },
114     { "Pragma", header_pragma, 0, 0 },
115     { "Retry-After", header_retry_after, 0, 0 },
116     { "Server", header_server, 0, 0 },
117 james 48 { "Set-Cookie", header_set_cookie, 0, 0 },
118 james 40 { "Trailer", header_trailer, 0, 0 },
119     { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
120     { "Upgrade", header_upgrade, 0, 0 },
121     { "Vary", header_vary, 0, 0 },
122     { "Via", header_via, 0, 0 }
123     };
124    
125    
126     /**
127     * Main entry point.
128     */
129     int main(int argc, char *argv[])
130     {
131 james 56 int i = 1;
132 james 40
133     if (argc < 2)
134 james 56 die("Usage: httplint [--html] url [url ...]");
135 james 40
136     init();
137    
138 james 56 if (1 < argc && strcmp(argv[1], "--html") == 0) {
139     html = true;
140     i++;
141     }
142    
143     for (; i != argc; i++)
144 james 40 check_url(argv[i]);
145    
146     curl_global_cleanup();
147    
148     return 0;
149     }
150    
151    
152     /**
153     * Initialise the curl handle and compile regular expressions.
154     */
155     void init(void)
156     {
157     struct curl_slist *request_headers = 0;
158    
159     if (curl_global_init(CURL_GLOBAL_ALL))
160     die("Failed to initialise libcurl");
161    
162     curl = curl_easy_init();
163     if (!curl)
164     die("Failed to create curl handle");
165    
166     if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
167     die("Failed to set curl options");
168     if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
169     die("Failed to set curl options");
170     if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
171     die("Failed to set curl options");
172     if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
173     die("Failed to set curl options");
174    
175     /* remove libcurl default headers */
176     request_headers = curl_slist_append(request_headers, "Accept:");
177     request_headers = curl_slist_append(request_headers, "Pragma:");
178     if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
179     die("Failed to set curl options");
180    
181     /* compile regular expressions */
182     regcomp_wrapper(&re_status_line,
183     "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
184     REG_EXTENDED);
185     regcomp_wrapper(&re_token,
186 james 44 "^([-0-9a-zA-Z_.!]+)",
187 james 40 REG_EXTENDED);
188     regcomp_wrapper(&re_token_value,
189 james 44 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
190 james 40 REG_EXTENDED);
191     regcomp_wrapper(&re_content_type,
192     "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
193     "(;[ \t]*([-0-9a-zA-Z_.]+)="
194     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
195     REG_EXTENDED);
196     regcomp_wrapper(&re_absolute_uri,
197     "^[a-zA-Z0-9]+://[^ ]+$",
198     REG_EXTENDED);
199     regcomp_wrapper(&re_etag,
200     "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
201     REG_EXTENDED);
202     regcomp_wrapper(&re_server,
203 james 44 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
204 james 40 REG_EXTENDED);
205     regcomp_wrapper(&re_transfer_coding,
206     "^([-0-9a-zA-Z_.]+)[ \t]*"
207     "(;[ \t]*([-0-9a-zA-Z_.]+)="
208     "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
209     REG_EXTENDED);
210     regcomp_wrapper(&re_upgrade,
211     "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
212     REG_EXTENDED);
213     regcomp_wrapper(&re_ugly,
214 james 43 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
215 james 40 REG_EXTENDED);
216 james 41 regcomp_wrapper(&re_rfc1123,
217     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
218     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
219     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
220     REG_EXTENDED);
221     regcomp_wrapper(&re_rfc1036,
222     "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
223     "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
224     "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
225     REG_EXTENDED);
226     regcomp_wrapper(&re_asctime,
227     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
228     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
229     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
230     REG_EXTENDED);
231 james 48 regcomp_wrapper(&re_cookie_nameval,
232     "^[^;, ]+=[^;, ]*$",
233     REG_EXTENDED);
234     regcomp_wrapper(&re_cookie_expires,
235     "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-"
236     "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) "
237     "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
238     REG_EXTENDED);
239 james 40 }
240    
241    
242     /**
243     * Compile a regular expression, handling errors.
244     */
245     void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
246     {
247     char errbuf[200];
248     int r;
249     r = regcomp(preg, regex, cflags);
250     if (r) {
251     regerror(r, preg, errbuf, sizeof errbuf);
252     fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
253     die(errbuf);
254     }
255     }
256    
257    
258     /**
259     * Fetch and check the headers for the specified url.
260     */
261     void check_url(const char *url)
262     {
263     int i, r;
264     CURLcode code;
265    
266     start = true;
267     for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
268     header_table[i].count = 0;
269    
270 james 56 if (!html)
271     printf("Checking URL %s\n", url);
272     if (strncmp(url, "http", 4)) {
273     if (html)
274     printf("<p class='warning'>");
275     printf("Warning: this is not an http or https url");
276     if (html)
277     printf("</p>");
278     printf("\n");
279     }
280 james 40
281     if (curl_easy_setopt(curl, CURLOPT_URL, url))
282     die("Failed to set curl options");
283    
284 james 56 if (html)
285     printf("<ul>\n");
286 james 40 code = curl_easy_perform(curl);
287 james 56 if (html)
288     printf("</ul>\n");
289 james 40 if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
290 james 56 if (html)
291     printf("<p class='error'>");
292     printf("Error: ");
293     print(error_buffer, strlen(error_buffer));
294     printf(".");
295     if (html)
296     printf("</p>");
297     printf("\n");
298 james 40 return;
299     } else {
300     printf("\n");
301 james 56 if (html)
302     printf("<ul>");
303 james 40 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
304     if (header_table[i].count == 0 && header_table[i].missing)
305     lookup(header_table[i].missing);
306     }
307     }
308    
309     r = regexec(&re_ugly, url, 0, 0, 0);
310     if (r)
311     lookup("ugly");
312 james 56
313     if (html)
314     printf("</ul>");
315 james 40 }
316    
317    
318     /**
319     * Callback for received header data.
320     */
321     size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
322     {
323     const size_t size = msize * nmemb;
324     char s[400], *name, *value;
325    
326     UNUSED(stream);
327    
328 james 56 printf(html ? "<li><code>" : "* ");
329 james 40 print(ptr, size);
330 james 56 printf(html ? "</code><ul>" : "\n");
331 james 40
332     if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
333     lookup("notcrlf");
334 james 56 if (html)
335     printf("</ul></li>\n");
336 james 40 return size;
337     }
338     if (sizeof s <= size) {
339 james 56 lookup("headertoolong");
340     if (html)
341     printf("</ul></li>\n");
342 james 40 return size;
343     }
344     strncpy(s, ptr, size);
345     s[size - 2] = 0;
346    
347     name = s;
348     value = strchr(s, ':');
349    
350     if (s[0] == 0) {
351     /* empty header indicates end of headers */
352 james 56 lookup("endofheaders");
353     if (html)
354     printf("</ul></li>\n");
355 james 40 return 0;
356    
357     } else if (start) {
358     /* Status-Line [6.1] */
359     check_status_line(s);
360     start = false;
361    
362     } else if (!value) {
363     lookup("missingcolon");
364    
365     } else {
366     *value = 0;
367     value++;
368    
369     check_header(name, skip_lws(value));
370     }
371    
372 james 56 if (html)
373     printf("</ul></li>\n");
374 james 40 return size;
375     }
376    
377    
378     /**
379     * Callback for received body data.
380     *
381     * We are not interested in the body, so abort the fetch by returning 0.
382     */
383     size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
384     {
385     UNUSED(ptr);
386     UNUSED(size);
387     UNUSED(nmemb);
388     UNUSED(stream);
389    
390     return 0;
391     }
392    
393    
394     /**
395     * Check the syntax and content of the response Status-Line [6.1].
396     */
397     void check_status_line(const char *s)
398     {
399     const char *reason;
400     unsigned int major = 0, minor = 0;
401     int r;
402     regmatch_t pmatch[5];
403    
404     r = regexec(&re_status_line, s, 5, pmatch, 0);
405     if (r) {
406     lookup("badstatusline");
407     return;
408     }
409    
410     major = atoi(s + pmatch[1].rm_so);
411     minor = atoi(s + pmatch[2].rm_so);
412     status_code = atoi(s + pmatch[3].rm_so);
413     reason = s + pmatch[4].rm_so;
414    
415     if (major < 1 || (major == 1 && minor == 0)) {
416     lookup("oldhttp");
417     } else if ((major == 1 && 1 < minor) || 1 < major) {
418     lookup("futurehttp");
419     } else {
420     if (status_code < 100 || 600 <= status_code) {
421     lookup("badstatus");
422     } else {
423     char key[] = "xxx";
424     key[0] = '0' + status_code / 100;
425     lookup(key);
426     }
427     }
428     }
429    
430    
431     /**
432     * Check the syntax and content of a header.
433     */
434     void check_header(const char *name, const char *value)
435     {
436     struct header_entry *header;
437    
438     header = bsearch(name, header_table,
439     sizeof header_table / sizeof header_table[0],
440     sizeof header_table[0],
441     (int (*)(const void *, const void *)) strcasecmp);
442    
443     if (header) {
444     header->count++;
445     header->handler(value);
446 james 44 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
447     lookup("xheader");
448     } else {
449 james 40 lookup("nonstandard");
450 james 44 }
451 james 40 }
452    
453    
454     /**
455     * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
456     */
457     bool parse_date(const char *s, struct tm *tm)
458     {
459 james 41 int r;
460 james 40 int len = strlen(s);
461 james 41 regmatch_t pmatch[20];
462 james 40
463     if (len == 29) {
464     /* RFC 1123 */
465 james 41 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
466     if (r == 0) {
467     tm->tm_mday = atoi(s + pmatch[2].rm_so);
468     tm->tm_mon = month(s + pmatch[3].rm_so);
469     tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
470     tm->tm_hour = atoi(s + pmatch[5].rm_so);
471     tm->tm_min = atoi(s + pmatch[6].rm_so);
472     tm->tm_sec = atoi(s + pmatch[7].rm_so);
473 james 40 return true;
474 james 41 }
475 james 40
476     } else if (len == 24) {
477     /* asctime() format */
478 james 41 r = regexec(&re_asctime, s, 20, pmatch, 0);
479     if (r == 0) {
480     if (s[pmatch[3].rm_so] == ' ')
481     tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
482     else
483     tm->tm_mday = atoi(s + pmatch[3].rm_so);
484     tm->tm_mon = month(s + pmatch[2].rm_so);
485     tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
486     tm->tm_hour = atoi(s + pmatch[4].rm_so);
487     tm->tm_min = atoi(s + pmatch[5].rm_so);
488     tm->tm_sec = atoi(s + pmatch[6].rm_so);
489 james 40 lookup("asctime");
490     return true;
491     }
492    
493     } else {
494     /* RFC 1036 */
495 james 41 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
496     if (r == 0) {
497     tm->tm_mday = atoi(s + pmatch[2].rm_so);
498     tm->tm_mon = month(s + pmatch[3].rm_so);
499     tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
500     tm->tm_hour = atoi(s + pmatch[5].rm_so);
501     tm->tm_min = atoi(s + pmatch[6].rm_so);
502     tm->tm_sec = atoi(s + pmatch[7].rm_so);
503 james 40 lookup("rfc1036");
504     return true;
505     }
506    
507     }
508    
509     lookup("baddate");
510     return false;
511     }
512    
513    
514     /**
515 james 41 * Convert a month name to the month number.
516     */
517     int month(const char *s)
518     {
519     switch (s[0]) {
520     case 'J':
521     switch (s[1]) {
522     case 'a':
523     return 0;
524     case 'u':
525     return s[2] == 'n' ? 5 : 6;
526     }
527     case 'F':
528     return 1;
529     case 'M':
530     return s[2] == 'r' ? 2 : 4;
531     case 'A':
532     return s[1] == 'p' ? 3 : 7;
533     case 'S':
534     return 8;
535     case 'O':
536     return 9;
537     case 'N':
538     return 10;
539     case 'D':
540     return 11;
541     }
542     return 0;
543     }
544    
545    
546     /**
547 james 42 * UTC version of mktime, from
548     * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
549     */
550     time_t mktime_from_utc(struct tm *t)
551     {
552     time_t tl, tb;
553     struct tm *tg;
554    
555     tl = mktime (t);
556     if (tl == -1)
557     {
558     t->tm_hour--;
559     tl = mktime (t);
560     if (tl == -1)
561     return -1; /* can't deal with output from strptime */
562     tl += 3600;
563     }
564     tg = gmtime (&tl);
565     tg->tm_isdst = 0;
566     tb = mktime (tg);
567     if (tb == -1)
568     {
569     tg->tm_hour--;
570     tb = mktime (tg);
571     if (tb == -1)
572     return -1; /* can't deal with output from gmtime */
573     tb += 3600;
574     }
575     return (tl - (tb - tl));
576     }
577    
578    
579     /**
580 james 40 * Skip optional LWS (linear white space) [2.2]
581     */
582     const char *skip_lws(const char *s)
583     {
584     if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
585     s += 2;
586     while (*s == ' ' || *s == '\t')
587     s++;
588     return s;
589     }
590    
591    
592     /**
593     * Parse a list of elements (#rule in [2.1]).
594     */
595     bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
596     void (*callback)(const char *s, regmatch_t pmatch[]))
597     {
598     int r;
599     unsigned int items = 0;
600     regmatch_t pmatch[20];
601    
602     do {
603     r = regexec(preg, s, 20, pmatch, 0);
604     if (r) {
605 james 56 if (html)
606     printf("<li class='error'>");
607 james 40 printf(" Failed to match list item %i\n", items + 1);
608 james 56 if (html)
609     printf("</li>\n");
610 james 40 return false;
611     }
612    
613     if (callback)
614     callback(s, pmatch);
615     items++;
616    
617     s += pmatch[0].rm_eo;
618     s = skip_lws(s);
619     if (*s == 0)
620     break;
621     if (*s != ',') {
622 james 56 if (html)
623     printf("<li class='error'>");
624 james 40 printf(" Expecting , after list item %i\n", items);
625 james 56 if (html)
626     printf("</li>\n");
627 james 40 return false;
628     }
629     while (*s == ',')
630     s = skip_lws(s + 1);
631     } while (*s != 0);
632    
633     if (items < n || m < items) {
634 james 56 if (html)
635     printf("<li class='error'>");
636 james 40 printf(" %i items in list, but there should be ", items);
637     if (m == UINT_MAX)
638     printf("at least %i\n", n);
639     else
640     printf("between %i and %i\n", n, m);
641 james 56 if (html)
642     printf("</li>\n");
643 james 40 return false;
644     }
645    
646     return true;
647     }
648    
649    
650     /* Header-specific validation. */
651     void header_accept_ranges(const char *s)
652     {
653     if (strcmp(s, "bytes") == 0)
654     lookup("ok");
655     else if (strcmp(s, "none") == 0)
656     lookup("ok");
657     else
658     lookup("unknownrange");
659     }
660    
661     void header_age(const char *s)
662     {
663     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
664     lookup("badage");
665     else
666     lookup("ok");
667     }
668    
669     void header_allow(const char *s)
670     {
671     if (parse_list(s, &re_token, 0, UINT_MAX, 0))
672     lookup("ok");
673     else
674     lookup("badallow");
675     }
676    
677     void header_cache_control(const char *s)
678     {
679     if (parse_list(s, &re_token_value, 1, UINT_MAX,
680     header_cache_control_callback))
681     lookup("ok");
682     else
683     lookup("badcachecont");
684     }
685    
686     char cache_control_list[][20] = {
687     "max-age", "max-stale", "min-fresh", "must-revalidate",
688     "no-cache", "no-store", "no-transform", "only-if-cached",
689     "private", "proxy-revalidate", "public", "s-maxage"
690     };
691    
692     void header_cache_control_callback(const char *s, regmatch_t pmatch[])
693     {
694     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
695     char name[20];
696     char *dir;
697    
698     if (19 < len) {
699     lookup("unknowncachecont");
700     return;
701     }
702    
703     strncpy(name, s + pmatch[1].rm_so, len);
704     name[len] = 0;
705    
706     dir = bsearch(name, cache_control_list,
707     sizeof cache_control_list / sizeof cache_control_list[0],
708     sizeof cache_control_list[0],
709     (int (*)(const void *, const void *)) strcasecmp);
710    
711     if (!dir) {
712 james 56 if (html)
713     printf("<li class='warning'>");
714     printf(" Cache-Control directive '");
715     print(name, strlen(name));
716     printf("':\n");
717     if (html)
718     printf("</li>\n");
719 james 40 lookup("unknowncachecont");
720     }
721     }
722    
723     void header_connection(const char *s)
724     {
725     if (strcmp(s, "close") == 0)
726     lookup("ok");
727     else
728     lookup("badconnection");
729     }
730    
731     void header_content_encoding(const char *s)
732     {
733     if (parse_list(s, &re_token, 1, UINT_MAX,
734     header_content_encoding_callback))
735     lookup("ok");
736     else
737     lookup("badcontenc");
738     }
739    
740     char content_coding_list[][20] = {
741     "compress", "deflate", "gzip", "identity"
742     };
743    
744     void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
745     {
746     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
747     char name[20];
748     char *dir;
749    
750     if (19 < len) {
751     lookup("unknowncontenc");
752     return;
753     }
754    
755     strncpy(name, s + pmatch[1].rm_so, len);
756     name[len] = 0;
757    
758     dir = bsearch(name, content_coding_list,
759     sizeof content_coding_list / sizeof content_coding_list[0],
760     sizeof content_coding_list[0],
761     (int (*)(const void *, const void *)) strcasecmp);
762     if (!dir) {
763 james 56 if (html)
764     printf("<li class='warning'>");
765 james 40 printf(" Content-Encoding '%s':\n", name);
766 james 56 if (html)
767     printf("</li>\n");
768 james 40 lookup("unknowncontenc");
769     }
770     }
771    
772     void header_content_language(const char *s)
773     {
774     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
775     lookup("ok");
776     else
777     lookup("badcontlang");
778     }
779    
780     void header_content_length(const char *s)
781     {
782     if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
783     lookup("badcontlen");
784     else
785     lookup("ok");
786     }
787    
788     void header_content_location(const char *s)
789     {
790     if (strchr(s, ' '))
791     lookup("badcontloc");
792     else
793     lookup("ok");
794     }
795    
796     void header_content_md5(const char *s)
797     {
798     if (strlen(s) != 24)
799     lookup("badcontmd5");
800     else
801     lookup("ok");
802     }
803    
804     void header_content_range(const char *s)
805     {
806     UNUSED(s);
807     lookup("contentrange");
808     }
809    
810     void header_content_type(const char *s)
811     {
812     bool charset = false;
813     char *type, *subtype;
814     unsigned int i;
815     int r;
816     regmatch_t pmatch[30];
817    
818     r = regexec(&re_content_type, s, 30, pmatch, 0);
819     if (r) {
820     lookup("badcontenttype");
821     return;
822     }
823    
824     type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
825     subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
826    
827     /* parameters */
828     for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
829     char *attrib, *value;
830    
831     attrib = strndup(s + pmatch[i + 1].rm_so,
832     pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
833     value = strndup(s + pmatch[i + 2].rm_so,
834     pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
835    
836     if (strcasecmp(attrib, "charset") == 0)
837     charset = true;
838     }
839    
840     if (strcasecmp(type, "text") == 0 && !charset)
841     lookup("nocharset");
842     else
843     lookup("ok");
844     }
845    
846     void header_date(const char *s)
847     {
848     double diff;
849     time_t time0, time1;
850     struct tm tm;
851    
852     time0 = time(0);
853     if (!parse_date(s, &tm))
854     return;
855 james 42 time1 = mktime_from_utc(&tm);
856 james 40
857     diff = difftime(time0, time1);
858     if (10 < fabs(diff))
859     lookup("wrongdate");
860     else
861     lookup("ok");
862     }
863    
864     void header_etag(const char *s)
865     {
866     int r;
867     r = regexec(&re_etag, s, 0, 0, 0);
868     if (r)
869     lookup("badetag");
870     else
871     lookup("ok");
872     }
873    
874     void header_expires(const char *s)
875     {
876     struct tm tm;
877     if (parse_date(s, &tm))
878     lookup("ok");
879     }
880    
881     void header_last_modified(const char *s)
882     {
883     double diff;
884     time_t time0, time1;
885     struct tm tm;
886    
887     time0 = time(0);
888     if (!parse_date(s, &tm))
889     return;
890 james 44 time1 = mktime_from_utc(&tm);
891 james 40
892     diff = difftime(time1, time0);
893     if (10 < diff)
894     lookup("futurelastmod");
895     else
896     lookup("ok");
897     }
898    
899     void header_location(const char *s)
900     {
901     int r;
902     r = regexec(&re_absolute_uri, s, 0, 0, 0);
903     if (r)
904     lookup("badlocation");
905     else
906     lookup("ok");
907     }
908    
909     void header_pragma(const char *s)
910     {
911     if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
912     lookup("ok");
913     else
914     lookup("badpragma");
915     }
916    
917     void header_retry_after(const char *s)
918     {
919     struct tm tm;
920    
921     if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
922     lookup("ok");
923     return;
924     }
925    
926     if (!parse_date(s, &tm))
927     return;
928    
929     lookup("ok");
930     }
931    
932     void header_server(const char *s)
933     {
934     int r;
935     r = regexec(&re_server, s, 0, 0, 0);
936     if (r)
937     lookup("badserver");
938     else
939     lookup("ok");
940     }
941    
942     void header_trailer(const char *s)
943     {
944     if (parse_list(s, &re_token, 1, UINT_MAX, 0))
945     lookup("ok");
946     else
947     lookup("badtrailer");
948     }
949    
950     void header_transfer_encoding(const char *s)
951     {
952     if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
953     header_transfer_encoding_callback))
954     lookup("ok");
955     else
956     lookup("badtransenc");
957     }
958    
959     char transfer_coding_list[][20] = {
960     "chunked", "compress", "deflate", "gzip", "identity"
961     };
962    
963     void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
964     {
965     size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
966     char name[20];
967     char *dir;
968    
969     if (19 < len) {
970     lookup("unknowntransenc");
971     return;
972     }
973    
974     strncpy(name, s + pmatch[1].rm_so, len);
975     name[len] = 0;
976    
977     dir = bsearch(name, transfer_coding_list,
978     sizeof transfer_coding_list / sizeof transfer_coding_list[0],
979     sizeof transfer_coding_list[0],
980     (int (*)(const void *, const void *)) strcasecmp);
981     if (!dir) {
982 james 56 if (html)
983     printf("<li class='warning'>");
984 james 40 printf(" Transfer-Encoding '%s':\n", name);
985 james 56 if (html)
986     printf("</li>\n");
987 james 40 lookup("unknowntransenc");
988     }
989     }
990    
991     void header_upgrade(const char *s)
992     {
993     int r;
994     r = regexec(&re_upgrade, s, 0, 0, 0);
995     if (r)
996     lookup("badupgrade");
997     else
998     lookup("ok");
999     }
1000    
1001     void header_vary(const char *s)
1002     {
1003     if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
1004     lookup("ok");
1005     else
1006     lookup("badvary");
1007     }
1008    
1009     void header_via(const char *s)
1010     {
1011     UNUSED(s);
1012     lookup("via");
1013     }
1014    
1015 james 48 /* http://wp.netscape.com/newsref/std/cookie_spec.html */
1016     void header_set_cookie(const char *s)
1017     {
1018     bool ok = true;
1019     int r;
1020     const char *semi = strchr(s, ';');
1021     const char *s2;
1022     struct tm tm;
1023     double diff;
1024     time_t time0, time1;
1025     regmatch_t pmatch[20];
1026 james 40
1027 james 48 if (semi)
1028     s2 = strndup(s, semi - s);
1029     else
1030     s2 = s;
1031    
1032     r = regexec(&re_cookie_nameval, s2, 0, 0, 0);
1033     if (r) {
1034     lookup("cookiebadnameval");
1035     ok = false;
1036     }
1037 james 50
1038 james 48 if (!semi)
1039     return;
1040    
1041     s = skip_lws(semi + 1);
1042    
1043     while (*s) {
1044     semi = strchr(s, ';');
1045     if (semi)
1046     s2 = strndup(s, semi - s);
1047     else
1048     s2 = s;
1049    
1050 james 56 if (strncasecmp(s2, "expires=", 8) == 0) {
1051 james 48 s2 += 8;
1052     r = regexec(&re_cookie_expires, s2, 20, pmatch, 0);
1053     if (r == 0) {
1054     tm.tm_mday = atoi(s2 + pmatch[2].rm_so);
1055     tm.tm_mon = month(s2 + pmatch[3].rm_so);
1056     tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900;
1057     tm.tm_hour = atoi(s2 + pmatch[5].rm_so);
1058     tm.tm_min = atoi(s2 + pmatch[6].rm_so);
1059     tm.tm_sec = atoi(s2 + pmatch[7].rm_so);
1060    
1061     time0 = time(0);
1062     time1 = mktime_from_utc(&tm);
1063    
1064     diff = difftime(time0, time1);
1065     if (10 < diff) {
1066     lookup("cookiepastdate");
1067     ok = false;
1068     }
1069     } else {
1070     lookup("cookiebaddate");
1071     ok = false;
1072     }
1073 james 56 } else if (strncasecmp(s2, "domain=", 7) == 0) {
1074     } else if (strncasecmp(s2, "path=", 5) == 0) {
1075 james 48 if (s2[5] != '/') {
1076     lookup("cookiebadpath");
1077     ok = false;
1078     }
1079 james 56 } else if (strcasecmp(s, "secure") == 0) {
1080 james 48 } else {
1081 james 56 if (html)
1082     printf("<li class='warning'>");
1083 james 48 printf(" Set-Cookie field '%s':\n", s2);
1084 james 56 if (html)
1085     printf("</li>\n");
1086 james 48 lookup("cookieunknownfield");
1087     ok = false;
1088     }
1089    
1090     if (semi)
1091     s = skip_lws(semi + 1);
1092     else
1093     break;
1094     }
1095    
1096     if (ok)
1097     lookup("ok");
1098     }
1099    
1100    
1101 james 40 /**
1102     * Print an error message and exit.
1103     */
1104     void die(const char *error)
1105     {
1106     fprintf(stderr, "httplint: %s\n", error);
1107     exit(EXIT_FAILURE);
1108     }
1109    
1110    
1111     /**
1112     * Print a string which contains control characters.
1113     */
1114     void print(const char *s, size_t len)
1115     {
1116     size_t i;
1117     for (i = 0; i != len; i++) {
1118 james 56 if (html && s[i] == '<')
1119     printf("&lt;");
1120     else if (html && s[i] == '>')
1121     printf("&gt;");
1122     else if (html && s[i] == '&')
1123     printf("&amp;");
1124     else if (31 < s[i] && s[i] < 127)
1125 james 40 putchar(s[i]);
1126 james 56 else {
1127     if (html)
1128     printf("<span class='cc'>");
1129 james 40 printf("[%.2x]", s[i]);
1130 james 56 if (html)
1131     printf("</span>");
1132     }
1133 james 40 }
1134     }
1135    
1136    
1137     struct message_entry {
1138     const char key[20];
1139     const char *value;
1140     } message_table[] = {
1141     { "1xx", "A response status code in the range 100 - 199 indicates a "
1142     "'provisional response'." },
1143     { "2xx", "A response status code in the range 200 - 299 indicates that "
1144     "the request was successful." },
1145     { "3xx", "A response status code in the range 300 - 399 indicates that "
1146     "the client should redirect to a new URL." },
1147     { "4xx", "A response status code in the range 400 - 499 indicates that "
1148     "the request could not be fulfilled due to client error." },
1149     { "5xx", "A response status code in the range 500 - 599 indicates that "
1150     "an error occurred on the server." },
1151     { "asctime", "Warning: This date is in the obsolete asctime() format. "
1152     "Consider using the RFC 1123 format instead." },
1153     { "badage", "Error: The Age header must be one number." },
1154     { "badallow", "Error: The Allow header must be a comma-separated list of "
1155     "HTTP methods." },
1156     { "badcachecont", "Error: The Cache-Control header must be a "
1157     "comma-separated list of directives." },
1158     { "badconnection", "Warning: The only value of the Connection header "
1159     "defined by HTTP/1.1 is \"close\"." },
1160     { "badcontenc", "Error: The Content-Encoding header must be a "
1161     "comma-separated list of encodings." },
1162     { "badcontenttype", "Error: The Content-Type header must be of the form "
1163     "'type/subtype (; optional parameters)'." },
1164     { "badcontlang", "Error: The Content-Language header must be a "
1165     "comma-separated list of language tags." },
1166     { "badcontlen", "Error: The Content-Length header must be a number." },
1167     { "badcontloc", "Error: The Content-Location header must be an absolute "
1168     "or relative URI." },
1169     { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1170     "MD5 sum." },
1171     { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1172     "1123 format." },
1173     { "badetag", "Error: The ETag header must be a quoted string (optionally "
1174     "preceded by \"W/\" for a weak tag)." },
1175     { "badlocation", "Error: The Location header must be an absolute URI. "
1176     "Relative URIs are not permitted." },
1177     { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1178     "directives." },
1179     { "badserver", "Error: The Server header must be a space-separated list of "
1180     "products of the form Name/optional-version and comments "
1181     "in ()." },
1182     { "badstatus", "Warning: The response status code is outside the standard "
1183     "range 100 - 599." },
1184     { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1185     "status line must be of the form 'HTTP/n.n <3-digit "
1186     "status> <reason phrase>'." },
1187     { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1188     "of header names." },
1189     { "badtransenc", "Error: The Transfer-Encoding header must be a "
1190     "comma-separated of encodings." },
1191     { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1192     "of product identifiers." },
1193     { "badvary", "Error: The Vary header must be a comma-separated list "
1194     "of header names, or \"*\"." },
1195     { "contentrange", "Warning: The Content-Range header should not be returned "
1196     "by the server for this request." },
1197 james 48 { "cookiebaddate", "Error: The expires date must be in the form "
1198     "\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." },
1199     { "cookiebadnameval", "Error: A Set-Cookie header must start with "
1200     "name=value, each excluding semi-colon, comma and "
1201     "white space." },
1202     { "cookiebadpath", "Error: The path does not start with \"/\"." },
1203     { "cookiepastdate", "Warning: The expires date is in the past. The cookie "
1204     "will be deleted by browsers." },
1205     { "cookieunknownfield", "Warning: This is not a standard Set-Cookie "
1206     "field." },
1207 james 56 { "endofheaders", "End of headers." },
1208 james 40 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1209     "version of this tool." },
1210     { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1211     "the future." },
1212 james 56 { "headertoolong", "Warning: Header too long: ignored." },
1213 james 40 { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1214     { "missingcontenttype", "Warning: No Content-Type header was present. The "
1215     "client will have to guess the media type or ask "
1216     "the user. Adding a Content-Type header is strongly "
1217     "recommended." },
1218     { "missingcontlang", "Consider adding a Content-Language header if "
1219     "applicable for this document." },
1220     { "missingdate", "Warning: No Date header was present. A Date header must "
1221     "be present, unless the server does not have a clock, or "
1222     "the response is 100, 101, or 500 - 599." },
1223     { "missinglastmod", "No Last-Modified header was present. The "
1224     "HTTP/1.1 specification states that this header should "
1225     "be sent whenever feasible." },
1226     { "nocharset", "Warning: No character set is specified in the Content-Type. "
1227     "Clients may assume the default of ISO-8859-1. Consider "
1228     "appending '; charset=...'." },
1229     { "nonstandard", "Warning: I don't know anything about this header. Is it "
1230     "a standard HTTP response header?" },
1231     { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1232     "that all header lines end with CR LF." },
1233     { "ok", "OK." },
1234     { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1235     "to HTTP/1.1." },
1236     { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1237     "Consider using the RFC 1123 format instead." },
1238     { "ugly", "This URL appears to contain implementation-specific parts such "
1239     "as an extension or a query string. This may make the URL liable "
1240     "to change when the implementation is changed, resulting in "
1241     "broken links. Consider using URL rewriting or equivalent to "
1242     "implement a future-proof URL space. See "
1243     "http://www.w3.org/Provider/Style/URI for more information." },
1244     { "unknowncachecont", "Warning: This Cache-Control directive is "
1245     "non-standard and will have limited support." },
1246     { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1247     { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1248     "range." },
1249     { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1250     { "via", "This header was added by a proxy, cache or gateway." },
1251     { "wrongdate", "Warning: The server date-time differs from this system's "
1252     "date-time by more than 10 seconds. Check that both the "
1253 james 44 "system clocks are correct." },
1254     { "xheader", "This is an extension header. I don't know how to check it." }
1255 james 40 };
1256    
1257    
1258     /**
1259     * Look up and output the string referenced by a key.
1260     */
1261     void lookup(const char *key)
1262     {
1263     const char *s, *spc;
1264     int x;
1265     struct message_entry *message;
1266    
1267     message = bsearch(key, message_table,
1268     sizeof message_table / sizeof message_table[0],
1269     sizeof message_table[0],
1270     (int (*)(const void *, const void *)) strcasecmp);
1271     if (message)
1272     s = message->value;
1273     else
1274     s = key;
1275    
1276 james 56 if (html) {
1277     if (strncmp(s, "Warning:", 8) == 0)
1278     printf("<li class='warning'>");
1279     else if (strncmp(s, "Error:", 6) == 0)
1280     printf("<li class='error'>");
1281     else if (strncmp(s, "OK", 2) == 0)
1282     printf("<li class='ok'>");
1283     else
1284     printf("<li>");
1285     for (; *s; s++) {
1286     if (strncmp(s, "http://", 7) == 0) {
1287     spc = strchr(s, ' ');
1288     printf("<a href='%.*s'>%.*s</a>", spc - s, s, spc - s, s);
1289     s = spc;
1290     }
1291     switch (*s) {
1292     case '<': printf("&lt;"); break;
1293     case '>': printf("&gt;"); break;
1294     case '&': printf("&amp;"); break;
1295     default: printf("%c", *s); break;
1296     }
1297 james 40 }
1298 james 56 printf("</li>\n");
1299    
1300     } else {
1301     printf(" ");
1302     x = 4;
1303     while (*s) {
1304     spc = strchr(s, ' ');
1305     if (!spc)
1306     spc = s + strlen(s);
1307     if (75 < x + (spc - s)) {
1308     printf("\n ");
1309     x = 4;
1310     }
1311     x += spc - s + 1;
1312     printf("%.*s ", spc - s, s);
1313     if (*spc)
1314     s = spc + 1;
1315     else
1316     s = spc;
1317     }
1318     printf("\n\n");
1319 james 40 }
1320     }
1321 james 56

  ViewVC Help
Powered by ViewVC 1.1.26