/[james]/httplint/httplint.c
ViewVC logotype

Contents of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 48 - (show annotations) (download) (as text)
Fri Feb 20 20:13:07 2004 UTC (20 years, 10 months ago) by james
File MIME type: text/x-csrc
File size: 32805 byte(s)
Add Set-Cookie header support.

1 /*
2 * HTTP Header Lint
3 * Licensed under the same license as Curl
4 * http://curl.haxx.se/docs/copyright.html
5 * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
6 */
7
8 /*
9 * Compile using
10 * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11 *
12 * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13 */
14
15 #define _GNU_SOURCE
16 #define __USE_XOPEN
17
18 #include <limits.h>
19 #include <math.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <sys/types.h>
26 #include <regex.h>
27 #include <curl/curl.h>
28
29
30 #define NUMBER "0123456789"
31 #define UNUSED(x) x = x
32
33
34 bool start;
35 CURL *curl;
36 int status_code;
37 char error_buffer[CURL_ERROR_SIZE];
38 regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
39 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
40 re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires;
41
42
43 void init(void);
44 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
45 void check_url(const char *url);
46 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
47 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
48 void check_status_line(const char *s);
49 void check_header(const char *name, const char *value);
50 bool parse_date(const char *s, struct tm *tm);
51 int month(const char *s);
52 time_t mktime_from_utc(struct tm *t);
53 const char *skip_lws(const char *s);
54 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
55 void (*callback)(const char *s, regmatch_t pmatch[]));
56 void header_accept_ranges(const char *s);
57 void header_age(const char *s);
58 void header_allow(const char *s);
59 void header_cache_control(const char *s);
60 void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
61 void header_connection(const char *s);
62 void header_content_encoding(const char *s);
63 void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
64 void header_content_language(const char *s);
65 void header_content_length(const char *s);
66 void header_content_location(const char *s);
67 void header_content_md5(const char *s);
68 void header_content_range(const char *s);
69 void header_content_type(const char *s);
70 void header_date(const char *s);
71 void header_etag(const char *s);
72 void header_expires(const char *s);
73 void header_last_modified(const char *s);
74 void header_location(const char *s);
75 void header_pragma(const char *s);
76 void header_retry_after(const char *s);
77 void header_server(const char *s);
78 void header_trailer(const char *s);
79 void header_transfer_encoding(const char *s);
80 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
81 void header_upgrade(const char *s);
82 void header_vary(const char *s);
83 void header_via(const char *s);
84 void header_set_cookie(const char *s);
85 void die(const char *error);
86 void warning(const char *message);
87 void error(const char *message);
88 void print(const char *s, size_t len);
89 void lookup(const char *key);
90
91
92 struct header_entry {
93 char name[40];
94 void (*handler)(const char *s);
95 int count;
96 char *missing;
97 } header_table[] = {
98 { "Accept-Ranges", header_accept_ranges, 0, 0 },
99 { "Age", header_age, 0, 0 },
100 { "Allow", header_allow, 0, 0 },
101 { "Cache-Control", header_cache_control, 0, 0 },
102 { "Connection", header_connection, 0, 0 },
103 { "Content-Encoding", header_content_encoding, 0, 0 },
104 { "Content-Language", header_content_language, 0, "missingcontlang" },
105 { "Content-Length", header_content_length, 0, 0 },
106 { "Content-Location", header_content_location, 0, 0 },
107 { "Content-MD5", header_content_md5, 0, 0 },
108 { "Content-Range", header_content_range, 0, 0 },
109 { "Content-Type", header_content_type, 0, "missingcontenttype" },
110 { "Date", header_date, 0, "missingdate" },
111 { "ETag", header_etag, 0, 0 },
112 { "Expires", header_expires, 0, 0 },
113 { "Last-Modified", header_last_modified, 0, "missinglastmod" },
114 { "Location", header_location, 0, 0 },
115 { "Pragma", header_pragma, 0, 0 },
116 { "Retry-After", header_retry_after, 0, 0 },
117 { "Server", header_server, 0, 0 },
118 { "Set-Cookie", header_set_cookie, 0, 0 },
119 { "Trailer", header_trailer, 0, 0 },
120 { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
121 { "Upgrade", header_upgrade, 0, 0 },
122 { "Vary", header_vary, 0, 0 },
123 { "Via", header_via, 0, 0 }
124 };
125
126
127 /**
128 * Main entry point.
129 */
130 int main(int argc, char *argv[])
131 {
132 int i;
133
134 if (argc < 2)
135 die("Usage: httplint url [url ...]");
136
137 init();
138
139 for (i = 1; i != argc; i++)
140 check_url(argv[i]);
141
142 curl_global_cleanup();
143
144 return 0;
145 }
146
147
148 /**
149 * Initialise the curl handle and compile regular expressions.
150 */
151 void init(void)
152 {
153 struct curl_slist *request_headers = 0;
154
155 if (curl_global_init(CURL_GLOBAL_ALL))
156 die("Failed to initialise libcurl");
157
158 curl = curl_easy_init();
159 if (!curl)
160 die("Failed to create curl handle");
161
162 if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
163 die("Failed to set curl options");
164 if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
165 die("Failed to set curl options");
166 if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
167 die("Failed to set curl options");
168 if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
169 die("Failed to set curl options");
170
171 /* remove libcurl default headers */
172 request_headers = curl_slist_append(request_headers, "Accept:");
173 request_headers = curl_slist_append(request_headers, "Pragma:");
174 if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
175 die("Failed to set curl options");
176
177 /* compile regular expressions */
178 regcomp_wrapper(&re_status_line,
179 "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
180 REG_EXTENDED);
181 regcomp_wrapper(&re_token,
182 "^([-0-9a-zA-Z_.!]+)",
183 REG_EXTENDED);
184 regcomp_wrapper(&re_token_value,
185 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
186 REG_EXTENDED);
187 regcomp_wrapper(&re_content_type,
188 "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
189 "(;[ \t]*([-0-9a-zA-Z_.]+)="
190 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
191 REG_EXTENDED);
192 regcomp_wrapper(&re_absolute_uri,
193 "^[a-zA-Z0-9]+://[^ ]+$",
194 REG_EXTENDED);
195 regcomp_wrapper(&re_etag,
196 "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
197 REG_EXTENDED);
198 regcomp_wrapper(&re_server,
199 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
200 REG_EXTENDED);
201 regcomp_wrapper(&re_transfer_coding,
202 "^([-0-9a-zA-Z_.]+)[ \t]*"
203 "(;[ \t]*([-0-9a-zA-Z_.]+)="
204 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
205 REG_EXTENDED);
206 regcomp_wrapper(&re_upgrade,
207 "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
208 REG_EXTENDED);
209 regcomp_wrapper(&re_ugly,
210 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
211 REG_EXTENDED);
212 regcomp_wrapper(&re_rfc1123,
213 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
214 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
215 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
216 REG_EXTENDED);
217 regcomp_wrapper(&re_rfc1036,
218 "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
219 "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
220 "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
221 REG_EXTENDED);
222 regcomp_wrapper(&re_asctime,
223 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
224 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
225 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
226 REG_EXTENDED);
227 regcomp_wrapper(&re_cookie_nameval,
228 "^[^;, ]+=[^;, ]*$",
229 REG_EXTENDED);
230 regcomp_wrapper(&re_cookie_expires,
231 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-"
232 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) "
233 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
234 REG_EXTENDED);
235 }
236
237
238 /**
239 * Compile a regular expression, handling errors.
240 */
241 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
242 {
243 char errbuf[200];
244 int r;
245 r = regcomp(preg, regex, cflags);
246 if (r) {
247 regerror(r, preg, errbuf, sizeof errbuf);
248 fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
249 die(errbuf);
250 }
251 }
252
253
254 /**
255 * Fetch and check the headers for the specified url.
256 */
257 void check_url(const char *url)
258 {
259 int i, r;
260 CURLcode code;
261
262 start = true;
263 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
264 header_table[i].count = 0;
265
266 printf("Checking URL %s\n", url);
267 if (strncmp(url, "http", 4))
268 warning("this is not an http or https url");
269
270 if (curl_easy_setopt(curl, CURLOPT_URL, url))
271 die("Failed to set curl options");
272
273 code = curl_easy_perform(curl);
274 if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
275 error(error_buffer);
276 return;
277 } else {
278 printf("\n");
279 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
280 if (header_table[i].count == 0 && header_table[i].missing)
281 lookup(header_table[i].missing);
282 }
283 }
284
285 r = regexec(&re_ugly, url, 0, 0, 0);
286 if (r)
287 lookup("ugly");
288 }
289
290
291 /**
292 * Callback for received header data.
293 */
294 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
295 {
296 const size_t size = msize * nmemb;
297 char s[400], *name, *value;
298
299 UNUSED(stream);
300
301 printf("* ");
302 print(ptr, size);
303 printf("\n");
304
305 if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
306 lookup("notcrlf");
307 return size;
308 }
309 if (sizeof s <= size) {
310 warning("header too long: ignored\n");
311 return size;
312 }
313 strncpy(s, ptr, size);
314 s[size - 2] = 0;
315
316 name = s;
317 value = strchr(s, ':');
318
319 if (s[0] == 0) {
320 /* empty header indicates end of headers */
321 puts("End of headers.");
322 return 0;
323
324 } else if (start) {
325 /* Status-Line [6.1] */
326 check_status_line(s);
327 start = false;
328
329 } else if (!value) {
330 lookup("missingcolon");
331
332 } else {
333 *value = 0;
334 value++;
335
336 check_header(name, skip_lws(value));
337 }
338
339 return size;
340 }
341
342
343 /**
344 * Callback for received body data.
345 *
346 * We are not interested in the body, so abort the fetch by returning 0.
347 */
348 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
349 {
350 UNUSED(ptr);
351 UNUSED(size);
352 UNUSED(nmemb);
353 UNUSED(stream);
354
355 return 0;
356 }
357
358
359 /**
360 * Check the syntax and content of the response Status-Line [6.1].
361 */
362 void check_status_line(const char *s)
363 {
364 const char *reason;
365 unsigned int major = 0, minor = 0;
366 int r;
367 regmatch_t pmatch[5];
368
369 r = regexec(&re_status_line, s, 5, pmatch, 0);
370 if (r) {
371 lookup("badstatusline");
372 return;
373 }
374
375 major = atoi(s + pmatch[1].rm_so);
376 minor = atoi(s + pmatch[2].rm_so);
377 status_code = atoi(s + pmatch[3].rm_so);
378 reason = s + pmatch[4].rm_so;
379
380 if (major < 1 || (major == 1 && minor == 0)) {
381 lookup("oldhttp");
382 } else if ((major == 1 && 1 < minor) || 1 < major) {
383 lookup("futurehttp");
384 } else {
385 if (status_code < 100 || 600 <= status_code) {
386 lookup("badstatus");
387 } else {
388 char key[] = "xxx";
389 key[0] = '0' + status_code / 100;
390 lookup(key);
391 }
392 }
393 }
394
395
396 /**
397 * Check the syntax and content of a header.
398 */
399 void check_header(const char *name, const char *value)
400 {
401 struct header_entry *header;
402
403 header = bsearch(name, header_table,
404 sizeof header_table / sizeof header_table[0],
405 sizeof header_table[0],
406 (int (*)(const void *, const void *)) strcasecmp);
407
408 if (header) {
409 header->count++;
410 header->handler(value);
411 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
412 lookup("xheader");
413 } else {
414 lookup("nonstandard");
415 }
416 }
417
418
419 /**
420 * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
421 */
422 bool parse_date(const char *s, struct tm *tm)
423 {
424 int r;
425 int len = strlen(s);
426 regmatch_t pmatch[20];
427
428 if (len == 29) {
429 /* RFC 1123 */
430 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
431 if (r == 0) {
432 tm->tm_mday = atoi(s + pmatch[2].rm_so);
433 tm->tm_mon = month(s + pmatch[3].rm_so);
434 tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
435 tm->tm_hour = atoi(s + pmatch[5].rm_so);
436 tm->tm_min = atoi(s + pmatch[6].rm_so);
437 tm->tm_sec = atoi(s + pmatch[7].rm_so);
438 return true;
439 }
440
441 } else if (len == 24) {
442 /* asctime() format */
443 r = regexec(&re_asctime, s, 20, pmatch, 0);
444 if (r == 0) {
445 if (s[pmatch[3].rm_so] == ' ')
446 tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
447 else
448 tm->tm_mday = atoi(s + pmatch[3].rm_so);
449 tm->tm_mon = month(s + pmatch[2].rm_so);
450 tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
451 tm->tm_hour = atoi(s + pmatch[4].rm_so);
452 tm->tm_min = atoi(s + pmatch[5].rm_so);
453 tm->tm_sec = atoi(s + pmatch[6].rm_so);
454 lookup("asctime");
455 return true;
456 }
457
458 } else {
459 /* RFC 1036 */
460 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
461 if (r == 0) {
462 tm->tm_mday = atoi(s + pmatch[2].rm_so);
463 tm->tm_mon = month(s + pmatch[3].rm_so);
464 tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
465 tm->tm_hour = atoi(s + pmatch[5].rm_so);
466 tm->tm_min = atoi(s + pmatch[6].rm_so);
467 tm->tm_sec = atoi(s + pmatch[7].rm_so);
468 lookup("rfc1036");
469 return true;
470 }
471
472 }
473
474 lookup("baddate");
475 return false;
476 }
477
478
479 /**
480 * Convert a month name to the month number.
481 */
482 int month(const char *s)
483 {
484 switch (s[0]) {
485 case 'J':
486 switch (s[1]) {
487 case 'a':
488 return 0;
489 case 'u':
490 return s[2] == 'n' ? 5 : 6;
491 }
492 case 'F':
493 return 1;
494 case 'M':
495 return s[2] == 'r' ? 2 : 4;
496 case 'A':
497 return s[1] == 'p' ? 3 : 7;
498 case 'S':
499 return 8;
500 case 'O':
501 return 9;
502 case 'N':
503 return 10;
504 case 'D':
505 return 11;
506 }
507 return 0;
508 }
509
510
511 /**
512 * UTC version of mktime, from
513 * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
514 */
515 time_t mktime_from_utc(struct tm *t)
516 {
517 time_t tl, tb;
518 struct tm *tg;
519
520 tl = mktime (t);
521 if (tl == -1)
522 {
523 t->tm_hour--;
524 tl = mktime (t);
525 if (tl == -1)
526 return -1; /* can't deal with output from strptime */
527 tl += 3600;
528 }
529 tg = gmtime (&tl);
530 tg->tm_isdst = 0;
531 tb = mktime (tg);
532 if (tb == -1)
533 {
534 tg->tm_hour--;
535 tb = mktime (tg);
536 if (tb == -1)
537 return -1; /* can't deal with output from gmtime */
538 tb += 3600;
539 }
540 return (tl - (tb - tl));
541 }
542
543
544 /**
545 * Skip optional LWS (linear white space) [2.2]
546 */
547 const char *skip_lws(const char *s)
548 {
549 if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
550 s += 2;
551 while (*s == ' ' || *s == '\t')
552 s++;
553 return s;
554 }
555
556
557 /**
558 * Parse a list of elements (#rule in [2.1]).
559 */
560 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
561 void (*callback)(const char *s, regmatch_t pmatch[]))
562 {
563 int r;
564 unsigned int items = 0;
565 regmatch_t pmatch[20];
566
567 do {
568 r = regexec(preg, s, 20, pmatch, 0);
569 if (r) {
570 printf(" Failed to match list item %i\n", items + 1);
571 return false;
572 }
573
574 if (callback)
575 callback(s, pmatch);
576 items++;
577
578 s += pmatch[0].rm_eo;
579 s = skip_lws(s);
580 if (*s == 0)
581 break;
582 if (*s != ',') {
583 printf(" Expecting , after list item %i\n", items);
584 return false;
585 }
586 while (*s == ',')
587 s = skip_lws(s + 1);
588 } while (*s != 0);
589
590 if (items < n || m < items) {
591 printf(" %i items in list, but there should be ", items);
592 if (m == UINT_MAX)
593 printf("at least %i\n", n);
594 else
595 printf("between %i and %i\n", n, m);
596 return false;
597 }
598
599 return true;
600 }
601
602
603 /* Header-specific validation. */
604 void header_accept_ranges(const char *s)
605 {
606 if (strcmp(s, "bytes") == 0)
607 lookup("ok");
608 else if (strcmp(s, "none") == 0)
609 lookup("ok");
610 else
611 lookup("unknownrange");
612 }
613
614 void header_age(const char *s)
615 {
616 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
617 lookup("badage");
618 else
619 lookup("ok");
620 }
621
622 void header_allow(const char *s)
623 {
624 if (parse_list(s, &re_token, 0, UINT_MAX, 0))
625 lookup("ok");
626 else
627 lookup("badallow");
628 }
629
630 void header_cache_control(const char *s)
631 {
632 if (parse_list(s, &re_token_value, 1, UINT_MAX,
633 header_cache_control_callback))
634 lookup("ok");
635 else
636 lookup("badcachecont");
637 }
638
639 char cache_control_list[][20] = {
640 "max-age", "max-stale", "min-fresh", "must-revalidate",
641 "no-cache", "no-store", "no-transform", "only-if-cached",
642 "private", "proxy-revalidate", "public", "s-maxage"
643 };
644
645 void header_cache_control_callback(const char *s, regmatch_t pmatch[])
646 {
647 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
648 char name[20];
649 char *dir;
650
651 if (19 < len) {
652 lookup("unknowncachecont");
653 return;
654 }
655
656 strncpy(name, s + pmatch[1].rm_so, len);
657 name[len] = 0;
658
659 dir = bsearch(name, cache_control_list,
660 sizeof cache_control_list / sizeof cache_control_list[0],
661 sizeof cache_control_list[0],
662 (int (*)(const void *, const void *)) strcasecmp);
663
664 if (!dir) {
665 printf(" Cache-Control directive '%s':\n", name);
666 lookup("unknowncachecont");
667 }
668 }
669
670 void header_connection(const char *s)
671 {
672 if (strcmp(s, "close") == 0)
673 lookup("ok");
674 else
675 lookup("badconnection");
676 }
677
678 void header_content_encoding(const char *s)
679 {
680 if (parse_list(s, &re_token, 1, UINT_MAX,
681 header_content_encoding_callback))
682 lookup("ok");
683 else
684 lookup("badcontenc");
685 }
686
687 char content_coding_list[][20] = {
688 "compress", "deflate", "gzip", "identity"
689 };
690
691 void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
692 {
693 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
694 char name[20];
695 char *dir;
696
697 if (19 < len) {
698 lookup("unknowncontenc");
699 return;
700 }
701
702 strncpy(name, s + pmatch[1].rm_so, len);
703 name[len] = 0;
704
705 dir = bsearch(name, content_coding_list,
706 sizeof content_coding_list / sizeof content_coding_list[0],
707 sizeof content_coding_list[0],
708 (int (*)(const void *, const void *)) strcasecmp);
709 if (!dir) {
710 printf(" Content-Encoding '%s':\n", name);
711 lookup("unknowncontenc");
712 }
713 }
714
715 void header_content_language(const char *s)
716 {
717 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
718 lookup("ok");
719 else
720 lookup("badcontlang");
721 }
722
723 void header_content_length(const char *s)
724 {
725 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
726 lookup("badcontlen");
727 else
728 lookup("ok");
729 }
730
731 void header_content_location(const char *s)
732 {
733 if (strchr(s, ' '))
734 lookup("badcontloc");
735 else
736 lookup("ok");
737 }
738
739 void header_content_md5(const char *s)
740 {
741 if (strlen(s) != 24)
742 lookup("badcontmd5");
743 else
744 lookup("ok");
745 }
746
747 void header_content_range(const char *s)
748 {
749 UNUSED(s);
750 lookup("contentrange");
751 }
752
753 void header_content_type(const char *s)
754 {
755 bool charset = false;
756 char *type, *subtype;
757 unsigned int i;
758 int r;
759 regmatch_t pmatch[30];
760
761 r = regexec(&re_content_type, s, 30, pmatch, 0);
762 if (r) {
763 lookup("badcontenttype");
764 return;
765 }
766
767 type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
768 subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
769
770 /* parameters */
771 for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
772 char *attrib, *value;
773
774 attrib = strndup(s + pmatch[i + 1].rm_so,
775 pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
776 value = strndup(s + pmatch[i + 2].rm_so,
777 pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
778
779 if (strcasecmp(attrib, "charset") == 0)
780 charset = true;
781 }
782
783 if (strcasecmp(type, "text") == 0 && !charset)
784 lookup("nocharset");
785 else
786 lookup("ok");
787 }
788
789 void header_date(const char *s)
790 {
791 double diff;
792 time_t time0, time1;
793 struct tm tm;
794
795 time0 = time(0);
796 if (!parse_date(s, &tm))
797 return;
798 time1 = mktime_from_utc(&tm);
799
800 diff = difftime(time0, time1);
801 if (10 < fabs(diff))
802 lookup("wrongdate");
803 else
804 lookup("ok");
805 }
806
807 void header_etag(const char *s)
808 {
809 int r;
810 r = regexec(&re_etag, s, 0, 0, 0);
811 if (r)
812 lookup("badetag");
813 else
814 lookup("ok");
815 }
816
817 void header_expires(const char *s)
818 {
819 struct tm tm;
820 if (parse_date(s, &tm))
821 lookup("ok");
822 }
823
824 void header_last_modified(const char *s)
825 {
826 double diff;
827 time_t time0, time1;
828 struct tm tm;
829
830 time0 = time(0);
831 if (!parse_date(s, &tm))
832 return;
833 time1 = mktime_from_utc(&tm);
834
835 diff = difftime(time1, time0);
836 if (10 < diff)
837 lookup("futurelastmod");
838 else
839 lookup("ok");
840 }
841
842 void header_location(const char *s)
843 {
844 int r;
845 r = regexec(&re_absolute_uri, s, 0, 0, 0);
846 if (r)
847 lookup("badlocation");
848 else
849 lookup("ok");
850 }
851
852 void header_pragma(const char *s)
853 {
854 if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
855 lookup("ok");
856 else
857 lookup("badpragma");
858 }
859
860 void header_retry_after(const char *s)
861 {
862 struct tm tm;
863
864 if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
865 lookup("ok");
866 return;
867 }
868
869 if (!parse_date(s, &tm))
870 return;
871
872 lookup("ok");
873 }
874
875 void header_server(const char *s)
876 {
877 int r;
878 r = regexec(&re_server, s, 0, 0, 0);
879 if (r)
880 lookup("badserver");
881 else
882 lookup("ok");
883 }
884
885 void header_trailer(const char *s)
886 {
887 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
888 lookup("ok");
889 else
890 lookup("badtrailer");
891 }
892
893 void header_transfer_encoding(const char *s)
894 {
895 if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
896 header_transfer_encoding_callback))
897 lookup("ok");
898 else
899 lookup("badtransenc");
900 }
901
902 char transfer_coding_list[][20] = {
903 "chunked", "compress", "deflate", "gzip", "identity"
904 };
905
906 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
907 {
908 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
909 char name[20];
910 char *dir;
911
912 if (19 < len) {
913 lookup("unknowntransenc");
914 return;
915 }
916
917 strncpy(name, s + pmatch[1].rm_so, len);
918 name[len] = 0;
919
920 dir = bsearch(name, transfer_coding_list,
921 sizeof transfer_coding_list / sizeof transfer_coding_list[0],
922 sizeof transfer_coding_list[0],
923 (int (*)(const void *, const void *)) strcasecmp);
924 if (!dir) {
925 printf(" Transfer-Encoding '%s':\n", name);
926 lookup("unknowntransenc");
927 }
928 }
929
930 void header_upgrade(const char *s)
931 {
932 int r;
933 r = regexec(&re_upgrade, s, 0, 0, 0);
934 if (r)
935 lookup("badupgrade");
936 else
937 lookup("ok");
938 }
939
940 void header_vary(const char *s)
941 {
942 if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
943 lookup("ok");
944 else
945 lookup("badvary");
946 }
947
948 void header_via(const char *s)
949 {
950 UNUSED(s);
951 lookup("via");
952 }
953
954 /* http://wp.netscape.com/newsref/std/cookie_spec.html */
955 void header_set_cookie(const char *s)
956 {
957 bool ok = true;
958 int r;
959 const char *semi = strchr(s, ';');
960 const char *s2;
961 struct tm tm;
962 double diff;
963 time_t time0, time1;
964 regmatch_t pmatch[20];
965
966 if (semi)
967 s2 = strndup(s, semi - s);
968 else
969 s2 = s;
970
971 r = regexec(&re_cookie_nameval, s2, 0, 0, 0);
972 if (r) {
973 lookup("cookiebadnameval");
974 ok = false;
975 }
976
977 if (!semi)
978 return;
979
980 s = skip_lws(semi + 1);
981
982 while (*s) {
983 semi = strchr(s, ';');
984 if (semi)
985 s2 = strndup(s, semi - s);
986 else
987 s2 = s;
988
989 if (strncmp(s2, "expires=", 8) == 0) {
990 s2 += 8;
991 r = regexec(&re_cookie_expires, s2, 20, pmatch, 0);
992 if (r == 0) {
993 tm.tm_mday = atoi(s2 + pmatch[2].rm_so);
994 tm.tm_mon = month(s2 + pmatch[3].rm_so);
995 tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900;
996 tm.tm_hour = atoi(s2 + pmatch[5].rm_so);
997 tm.tm_min = atoi(s2 + pmatch[6].rm_so);
998 tm.tm_sec = atoi(s2 + pmatch[7].rm_so);
999
1000 time0 = time(0);
1001 time1 = mktime_from_utc(&tm);
1002
1003 diff = difftime(time0, time1);
1004 if (10 < diff) {
1005 lookup("cookiepastdate");
1006 ok = false;
1007 }
1008 } else {
1009 lookup("cookiebaddate");
1010 ok = false;
1011 }
1012 } else if (strncmp(s2, "domain=", 7) == 0) {
1013 } else if (strncmp(s2, "path=", 5) == 0) {
1014 if (s2[5] != '/') {
1015 lookup("cookiebadpath");
1016 ok = false;
1017 }
1018 } else if (strcmp(s, "secure") == 0) {
1019 } else {
1020 printf(" Set-Cookie field '%s':\n", s2);
1021 lookup("cookieunknownfield");
1022 ok = false;
1023 }
1024
1025 if (semi)
1026 s = skip_lws(semi + 1);
1027 else
1028 break;
1029 }
1030
1031 if (ok)
1032 lookup("ok");
1033 }
1034
1035
1036 /**
1037 * Print an error message and exit.
1038 */
1039 void die(const char *error)
1040 {
1041 fprintf(stderr, "httplint: %s\n", error);
1042 exit(EXIT_FAILURE);
1043 }
1044
1045
1046 /**
1047 * Print a warning message.
1048 */
1049 void warning(const char *message)
1050 {
1051 printf("Warning: %s\n", message);
1052 }
1053
1054
1055 /**
1056 * Print an error message.
1057 */
1058 void error(const char *message)
1059 {
1060 printf("Error: %s\n", message);
1061 }
1062
1063
1064 /**
1065 * Print a string which contains control characters.
1066 */
1067 void print(const char *s, size_t len)
1068 {
1069 size_t i;
1070 for (i = 0; i != len; i++) {
1071 if (31 < s[i] && s[i] < 127)
1072 putchar(s[i]);
1073 else
1074 printf("[%.2x]", s[i]);
1075 }
1076 }
1077
1078
1079 struct message_entry {
1080 const char key[20];
1081 const char *value;
1082 } message_table[] = {
1083 { "1xx", "A response status code in the range 100 - 199 indicates a "
1084 "'provisional response'." },
1085 { "2xx", "A response status code in the range 200 - 299 indicates that "
1086 "the request was successful." },
1087 { "3xx", "A response status code in the range 300 - 399 indicates that "
1088 "the client should redirect to a new URL." },
1089 { "4xx", "A response status code in the range 400 - 499 indicates that "
1090 "the request could not be fulfilled due to client error." },
1091 { "5xx", "A response status code in the range 500 - 599 indicates that "
1092 "an error occurred on the server." },
1093 { "asctime", "Warning: This date is in the obsolete asctime() format. "
1094 "Consider using the RFC 1123 format instead." },
1095 { "badage", "Error: The Age header must be one number." },
1096 { "badallow", "Error: The Allow header must be a comma-separated list of "
1097 "HTTP methods." },
1098 { "badcachecont", "Error: The Cache-Control header must be a "
1099 "comma-separated list of directives." },
1100 { "badconnection", "Warning: The only value of the Connection header "
1101 "defined by HTTP/1.1 is \"close\"." },
1102 { "badcontenc", "Error: The Content-Encoding header must be a "
1103 "comma-separated list of encodings." },
1104 { "badcontenttype", "Error: The Content-Type header must be of the form "
1105 "'type/subtype (; optional parameters)'." },
1106 { "badcontlang", "Error: The Content-Language header must be a "
1107 "comma-separated list of language tags." },
1108 { "badcontlen", "Error: The Content-Length header must be a number." },
1109 { "badcontloc", "Error: The Content-Location header must be an absolute "
1110 "or relative URI." },
1111 { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1112 "MD5 sum." },
1113 { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1114 "1123 format." },
1115 { "badetag", "Error: The ETag header must be a quoted string (optionally "
1116 "preceded by \"W/\" for a weak tag)." },
1117 { "badlocation", "Error: The Location header must be an absolute URI. "
1118 "Relative URIs are not permitted." },
1119 { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1120 "directives." },
1121 { "badserver", "Error: The Server header must be a space-separated list of "
1122 "products of the form Name/optional-version and comments "
1123 "in ()." },
1124 { "badstatus", "Warning: The response status code is outside the standard "
1125 "range 100 - 599." },
1126 { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1127 "status line must be of the form 'HTTP/n.n <3-digit "
1128 "status> <reason phrase>'." },
1129 { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1130 "of header names." },
1131 { "badtransenc", "Error: The Transfer-Encoding header must be a "
1132 "comma-separated of encodings." },
1133 { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1134 "of product identifiers." },
1135 { "badvary", "Error: The Vary header must be a comma-separated list "
1136 "of header names, or \"*\"." },
1137 { "contentrange", "Warning: The Content-Range header should not be returned "
1138 "by the server for this request." },
1139 { "cookiebaddate", "Error: The expires date must be in the form "
1140 "\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." },
1141 { "cookiebadnameval", "Error: A Set-Cookie header must start with "
1142 "name=value, each excluding semi-colon, comma and "
1143 "white space." },
1144 { "cookiebadpath", "Error: The path does not start with \"/\"." },
1145 { "cookiepastdate", "Warning: The expires date is in the past. The cookie "
1146 "will be deleted by browsers." },
1147 { "cookieunknownfield", "Warning: This is not a standard Set-Cookie "
1148 "field." },
1149 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1150 "version of this tool." },
1151 { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1152 "the future." },
1153 { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1154 { "missingcontenttype", "Warning: No Content-Type header was present. The "
1155 "client will have to guess the media type or ask "
1156 "the user. Adding a Content-Type header is strongly "
1157 "recommended." },
1158 { "missingcontlang", "Consider adding a Content-Language header if "
1159 "applicable for this document." },
1160 { "missingdate", "Warning: No Date header was present. A Date header must "
1161 "be present, unless the server does not have a clock, or "
1162 "the response is 100, 101, or 500 - 599." },
1163 { "missinglastmod", "No Last-Modified header was present. The "
1164 "HTTP/1.1 specification states that this header should "
1165 "be sent whenever feasible." },
1166 { "nocharset", "Warning: No character set is specified in the Content-Type. "
1167 "Clients may assume the default of ISO-8859-1. Consider "
1168 "appending '; charset=...'." },
1169 { "nonstandard", "Warning: I don't know anything about this header. Is it "
1170 "a standard HTTP response header?" },
1171 { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1172 "that all header lines end with CR LF." },
1173 { "ok", "OK." },
1174 { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1175 "to HTTP/1.1." },
1176 { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1177 "Consider using the RFC 1123 format instead." },
1178 { "ugly", "This URL appears to contain implementation-specific parts such "
1179 "as an extension or a query string. This may make the URL liable "
1180 "to change when the implementation is changed, resulting in "
1181 "broken links. Consider using URL rewriting or equivalent to "
1182 "implement a future-proof URL space. See "
1183 "http://www.w3.org/Provider/Style/URI for more information." },
1184 { "unknowncachecont", "Warning: This Cache-Control directive is "
1185 "non-standard and will have limited support." },
1186 { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1187 { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1188 "range." },
1189 { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1190 { "via", "This header was added by a proxy, cache or gateway." },
1191 { "wrongdate", "Warning: The server date-time differs from this system's "
1192 "date-time by more than 10 seconds. Check that both the "
1193 "system clocks are correct." },
1194 { "xheader", "This is an extension header. I don't know how to check it." }
1195 };
1196
1197
1198 /**
1199 * Look up and output the string referenced by a key.
1200 */
1201 void lookup(const char *key)
1202 {
1203 const char *s, *spc;
1204 int x;
1205 struct message_entry *message;
1206
1207 message = bsearch(key, message_table,
1208 sizeof message_table / sizeof message_table[0],
1209 sizeof message_table[0],
1210 (int (*)(const void *, const void *)) strcasecmp);
1211 if (message)
1212 s = message->value;
1213 else
1214 s = key;
1215
1216 printf(" ");
1217 x = 4;
1218 while (*s) {
1219 spc = strchr(s, ' ');
1220 if (!spc)
1221 spc = s + strlen(s);
1222 if (75 < x + (spc - s)) {
1223 printf("\n ");
1224 x = 4;
1225 }
1226 x += spc - s + 1;
1227 printf("%.*s ", spc - s, s);
1228 if (*spc)
1229 s = spc + 1;
1230 else
1231 s = spc;
1232 }
1233 printf("\n\n");
1234 }

  ViewVC Help
Powered by ViewVC 1.1.26