/[james]/httplint/httplint.c
ViewVC logotype

Contents of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 44 - (show annotations) (download) (as text)
Thu Dec 18 00:52:50 2003 UTC (20 years, 10 months ago) by james
File MIME type: text/x-csrc
File size: 29967 byte(s)
Fix Last-Modified, add X- header message, add ! to tokens.

1 /*
2 * HTTP Header Lint
3 * Licensed under the same license as Curl
4 * http://curl.haxx.se/docs/copyright.html
5 * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
6 */
7
8 /*
9 * Compile using
10 * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11 *
12 * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13 */
14
15 #define _GNU_SOURCE
16 #define __USE_XOPEN
17
18 #include <limits.h>
19 #include <math.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <sys/types.h>
26 #include <regex.h>
27 #include <curl/curl.h>
28
29
30 #define NUMBER "0123456789"
31 #define UNUSED(x) x = x
32
33
34 bool start;
35 CURL *curl;
36 int status_code;
37 char error_buffer[CURL_ERROR_SIZE];
38 regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
39 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
40 re_rfc1123, re_rfc1036, re_asctime;
41
42
43 void init(void);
44 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
45 void check_url(const char *url);
46 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
47 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
48 void check_status_line(const char *s);
49 void check_header(const char *name, const char *value);
50 bool parse_date(const char *s, struct tm *tm);
51 int month(const char *s);
52 time_t mktime_from_utc(struct tm *t);
53 const char *skip_lws(const char *s);
54 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
55 void (*callback)(const char *s, regmatch_t pmatch[]));
56 void header_accept_ranges(const char *s);
57 void header_age(const char *s);
58 void header_allow(const char *s);
59 void header_cache_control(const char *s);
60 void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
61 void header_connection(const char *s);
62 void header_content_encoding(const char *s);
63 void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
64 void header_content_language(const char *s);
65 void header_content_length(const char *s);
66 void header_content_location(const char *s);
67 void header_content_md5(const char *s);
68 void header_content_range(const char *s);
69 void header_content_type(const char *s);
70 void header_date(const char *s);
71 void header_etag(const char *s);
72 void header_expires(const char *s);
73 void header_last_modified(const char *s);
74 void header_location(const char *s);
75 void header_pragma(const char *s);
76 void header_retry_after(const char *s);
77 void header_server(const char *s);
78 void header_trailer(const char *s);
79 void header_transfer_encoding(const char *s);
80 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
81 void header_upgrade(const char *s);
82 void header_vary(const char *s);
83 void header_via(const char *s);
84 void die(const char *error);
85 void warning(const char *message);
86 void error(const char *message);
87 void print(const char *s, size_t len);
88 void lookup(const char *key);
89
90
91 struct header_entry {
92 char name[40];
93 void (*handler)(const char *s);
94 int count;
95 char *missing;
96 } header_table[] = {
97 { "Accept-Ranges", header_accept_ranges, 0, 0 },
98 { "Age", header_age, 0, 0 },
99 { "Allow", header_allow, 0, 0 },
100 { "Cache-Control", header_cache_control, 0, 0 },
101 { "Connection", header_connection, 0, 0 },
102 { "Content-Encoding", header_content_encoding, 0, 0 },
103 { "Content-Language", header_content_language, 0, "missingcontlang" },
104 { "Content-Length", header_content_length, 0, 0 },
105 { "Content-Location", header_content_location, 0, 0 },
106 { "Content-MD5", header_content_md5, 0, 0 },
107 { "Content-Range", header_content_range, 0, 0 },
108 { "Content-Type", header_content_type, 0, "missingcontenttype" },
109 { "Date", header_date, 0, "missingdate" },
110 { "ETag", header_etag, 0, 0 },
111 { "Expires", header_expires, 0, 0 },
112 { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113 { "Location", header_location, 0, 0 },
114 { "Pragma", header_pragma, 0, 0 },
115 { "Retry-After", header_retry_after, 0, 0 },
116 { "Server", header_server, 0, 0 },
117 { "Trailer", header_trailer, 0, 0 },
118 { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
119 { "Upgrade", header_upgrade, 0, 0 },
120 { "Vary", header_vary, 0, 0 },
121 { "Via", header_via, 0, 0 }
122 };
123
124
125 /**
126 * Main entry point.
127 */
128 int main(int argc, char *argv[])
129 {
130 int i;
131
132 if (argc < 2)
133 die("Usage: httplint url [url ...]");
134
135 init();
136
137 for (i = 1; i != argc; i++)
138 check_url(argv[i]);
139
140 curl_global_cleanup();
141
142 return 0;
143 }
144
145
146 /**
147 * Initialise the curl handle and compile regular expressions.
148 */
149 void init(void)
150 {
151 struct curl_slist *request_headers = 0;
152
153 if (curl_global_init(CURL_GLOBAL_ALL))
154 die("Failed to initialise libcurl");
155
156 curl = curl_easy_init();
157 if (!curl)
158 die("Failed to create curl handle");
159
160 if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
161 die("Failed to set curl options");
162 if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
163 die("Failed to set curl options");
164 if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
165 die("Failed to set curl options");
166 if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
167 die("Failed to set curl options");
168
169 /* remove libcurl default headers */
170 request_headers = curl_slist_append(request_headers, "Accept:");
171 request_headers = curl_slist_append(request_headers, "Pragma:");
172 if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
173 die("Failed to set curl options");
174
175 /* compile regular expressions */
176 regcomp_wrapper(&re_status_line,
177 "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
178 REG_EXTENDED);
179 regcomp_wrapper(&re_token,
180 "^([-0-9a-zA-Z_.!]+)",
181 REG_EXTENDED);
182 regcomp_wrapper(&re_token_value,
183 "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?",
184 REG_EXTENDED);
185 regcomp_wrapper(&re_content_type,
186 "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
187 "(;[ \t]*([-0-9a-zA-Z_.]+)="
188 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
189 REG_EXTENDED);
190 regcomp_wrapper(&re_absolute_uri,
191 "^[a-zA-Z0-9]+://[^ ]+$",
192 REG_EXTENDED);
193 regcomp_wrapper(&re_etag,
194 "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
195 REG_EXTENDED);
196 regcomp_wrapper(&re_server,
197 "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
198 REG_EXTENDED);
199 regcomp_wrapper(&re_transfer_coding,
200 "^([-0-9a-zA-Z_.]+)[ \t]*"
201 "(;[ \t]*([-0-9a-zA-Z_.]+)="
202 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
203 REG_EXTENDED);
204 regcomp_wrapper(&re_upgrade,
205 "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
206 REG_EXTENDED);
207 regcomp_wrapper(&re_ugly,
208 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
209 REG_EXTENDED);
210 regcomp_wrapper(&re_rfc1123,
211 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
212 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
213 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
214 REG_EXTENDED);
215 regcomp_wrapper(&re_rfc1036,
216 "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
217 "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
218 "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
219 REG_EXTENDED);
220 regcomp_wrapper(&re_asctime,
221 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
222 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
223 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
224 REG_EXTENDED);
225 }
226
227
228 /**
229 * Compile a regular expression, handling errors.
230 */
231 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
232 {
233 char errbuf[200];
234 int r;
235 r = regcomp(preg, regex, cflags);
236 if (r) {
237 regerror(r, preg, errbuf, sizeof errbuf);
238 fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
239 die(errbuf);
240 }
241 }
242
243
244 /**
245 * Fetch and check the headers for the specified url.
246 */
247 void check_url(const char *url)
248 {
249 int i, r;
250 CURLcode code;
251
252 start = true;
253 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
254 header_table[i].count = 0;
255
256 printf("Checking URL %s\n", url);
257 if (strncmp(url, "http", 4))
258 warning("this is not an http or https url");
259
260 if (curl_easy_setopt(curl, CURLOPT_URL, url))
261 die("Failed to set curl options");
262
263 code = curl_easy_perform(curl);
264 if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
265 error(error_buffer);
266 return;
267 } else {
268 printf("\n");
269 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
270 if (header_table[i].count == 0 && header_table[i].missing)
271 lookup(header_table[i].missing);
272 }
273 }
274
275 r = regexec(&re_ugly, url, 0, 0, 0);
276 if (r)
277 lookup("ugly");
278 }
279
280
281 /**
282 * Callback for received header data.
283 */
284 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
285 {
286 const size_t size = msize * nmemb;
287 char s[400], *name, *value;
288
289 UNUSED(stream);
290
291 printf("* ");
292 print(ptr, size);
293 printf("\n");
294
295 if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
296 lookup("notcrlf");
297 return size;
298 }
299 if (sizeof s <= size) {
300 warning("header too long: ignored\n");
301 return size;
302 }
303 strncpy(s, ptr, size);
304 s[size - 2] = 0;
305
306 name = s;
307 value = strchr(s, ':');
308
309 if (s[0] == 0) {
310 /* empty header indicates end of headers */
311 puts("End of headers.");
312 return 0;
313
314 } else if (start) {
315 /* Status-Line [6.1] */
316 check_status_line(s);
317 start = false;
318
319 } else if (!value) {
320 lookup("missingcolon");
321
322 } else {
323 *value = 0;
324 value++;
325
326 check_header(name, skip_lws(value));
327 }
328
329 return size;
330 }
331
332
333 /**
334 * Callback for received body data.
335 *
336 * We are not interested in the body, so abort the fetch by returning 0.
337 */
338 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
339 {
340 UNUSED(ptr);
341 UNUSED(size);
342 UNUSED(nmemb);
343 UNUSED(stream);
344
345 return 0;
346 }
347
348
349 /**
350 * Check the syntax and content of the response Status-Line [6.1].
351 */
352 void check_status_line(const char *s)
353 {
354 const char *reason;
355 unsigned int major = 0, minor = 0;
356 int r;
357 regmatch_t pmatch[5];
358
359 r = regexec(&re_status_line, s, 5, pmatch, 0);
360 if (r) {
361 lookup("badstatusline");
362 return;
363 }
364
365 major = atoi(s + pmatch[1].rm_so);
366 minor = atoi(s + pmatch[2].rm_so);
367 status_code = atoi(s + pmatch[3].rm_so);
368 reason = s + pmatch[4].rm_so;
369
370 if (major < 1 || (major == 1 && minor == 0)) {
371 lookup("oldhttp");
372 } else if ((major == 1 && 1 < minor) || 1 < major) {
373 lookup("futurehttp");
374 } else {
375 if (status_code < 100 || 600 <= status_code) {
376 lookup("badstatus");
377 } else {
378 char key[] = "xxx";
379 key[0] = '0' + status_code / 100;
380 lookup(key);
381 }
382 }
383 }
384
385
386 /**
387 * Check the syntax and content of a header.
388 */
389 void check_header(const char *name, const char *value)
390 {
391 struct header_entry *header;
392
393 header = bsearch(name, header_table,
394 sizeof header_table / sizeof header_table[0],
395 sizeof header_table[0],
396 (int (*)(const void *, const void *)) strcasecmp);
397
398 if (header) {
399 header->count++;
400 header->handler(value);
401 } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') {
402 lookup("xheader");
403 } else {
404 lookup("nonstandard");
405 }
406 }
407
408
409 /**
410 * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
411 */
412 bool parse_date(const char *s, struct tm *tm)
413 {
414 int r;
415 int len = strlen(s);
416 regmatch_t pmatch[20];
417
418 if (len == 29) {
419 /* RFC 1123 */
420 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
421 if (r == 0) {
422 tm->tm_mday = atoi(s + pmatch[2].rm_so);
423 tm->tm_mon = month(s + pmatch[3].rm_so);
424 tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
425 tm->tm_hour = atoi(s + pmatch[5].rm_so);
426 tm->tm_min = atoi(s + pmatch[6].rm_so);
427 tm->tm_sec = atoi(s + pmatch[7].rm_so);
428 return true;
429 }
430
431 } else if (len == 24) {
432 /* asctime() format */
433 r = regexec(&re_asctime, s, 20, pmatch, 0);
434 if (r == 0) {
435 if (s[pmatch[3].rm_so] == ' ')
436 tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
437 else
438 tm->tm_mday = atoi(s + pmatch[3].rm_so);
439 tm->tm_mon = month(s + pmatch[2].rm_so);
440 tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
441 tm->tm_hour = atoi(s + pmatch[4].rm_so);
442 tm->tm_min = atoi(s + pmatch[5].rm_so);
443 tm->tm_sec = atoi(s + pmatch[6].rm_so);
444 lookup("asctime");
445 return true;
446 }
447
448 } else {
449 /* RFC 1036 */
450 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
451 if (r == 0) {
452 tm->tm_mday = atoi(s + pmatch[2].rm_so);
453 tm->tm_mon = month(s + pmatch[3].rm_so);
454 tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
455 tm->tm_hour = atoi(s + pmatch[5].rm_so);
456 tm->tm_min = atoi(s + pmatch[6].rm_so);
457 tm->tm_sec = atoi(s + pmatch[7].rm_so);
458 lookup("rfc1036");
459 return true;
460 }
461
462 }
463
464 lookup("baddate");
465 return false;
466 }
467
468
469 /**
470 * Convert a month name to the month number.
471 */
472 int month(const char *s)
473 {
474 switch (s[0]) {
475 case 'J':
476 switch (s[1]) {
477 case 'a':
478 return 0;
479 case 'u':
480 return s[2] == 'n' ? 5 : 6;
481 }
482 case 'F':
483 return 1;
484 case 'M':
485 return s[2] == 'r' ? 2 : 4;
486 case 'A':
487 return s[1] == 'p' ? 3 : 7;
488 case 'S':
489 return 8;
490 case 'O':
491 return 9;
492 case 'N':
493 return 10;
494 case 'D':
495 return 11;
496 }
497 return 0;
498 }
499
500
501 /**
502 * UTC version of mktime, from
503 * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
504 */
505 time_t mktime_from_utc(struct tm *t)
506 {
507 time_t tl, tb;
508 struct tm *tg;
509
510 tl = mktime (t);
511 if (tl == -1)
512 {
513 t->tm_hour--;
514 tl = mktime (t);
515 if (tl == -1)
516 return -1; /* can't deal with output from strptime */
517 tl += 3600;
518 }
519 tg = gmtime (&tl);
520 tg->tm_isdst = 0;
521 tb = mktime (tg);
522 if (tb == -1)
523 {
524 tg->tm_hour--;
525 tb = mktime (tg);
526 if (tb == -1)
527 return -1; /* can't deal with output from gmtime */
528 tb += 3600;
529 }
530 return (tl - (tb - tl));
531 }
532
533
534 /**
535 * Skip optional LWS (linear white space) [2.2]
536 */
537 const char *skip_lws(const char *s)
538 {
539 if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
540 s += 2;
541 while (*s == ' ' || *s == '\t')
542 s++;
543 return s;
544 }
545
546
547 /**
548 * Parse a list of elements (#rule in [2.1]).
549 */
550 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
551 void (*callback)(const char *s, regmatch_t pmatch[]))
552 {
553 int r;
554 unsigned int items = 0;
555 regmatch_t pmatch[20];
556
557 do {
558 r = regexec(preg, s, 20, pmatch, 0);
559 if (r) {
560 printf(" Failed to match list item %i\n", items + 1);
561 return false;
562 }
563
564 if (callback)
565 callback(s, pmatch);
566 items++;
567
568 s += pmatch[0].rm_eo;
569 s = skip_lws(s);
570 if (*s == 0)
571 break;
572 if (*s != ',') {
573 printf(" Expecting , after list item %i\n", items);
574 return false;
575 }
576 while (*s == ',')
577 s = skip_lws(s + 1);
578 } while (*s != 0);
579
580 if (items < n || m < items) {
581 printf(" %i items in list, but there should be ", items);
582 if (m == UINT_MAX)
583 printf("at least %i\n", n);
584 else
585 printf("between %i and %i\n", n, m);
586 return false;
587 }
588
589 return true;
590 }
591
592
593 /* Header-specific validation. */
594 void header_accept_ranges(const char *s)
595 {
596 if (strcmp(s, "bytes") == 0)
597 lookup("ok");
598 else if (strcmp(s, "none") == 0)
599 lookup("ok");
600 else
601 lookup("unknownrange");
602 }
603
604 void header_age(const char *s)
605 {
606 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
607 lookup("badage");
608 else
609 lookup("ok");
610 }
611
612 void header_allow(const char *s)
613 {
614 if (parse_list(s, &re_token, 0, UINT_MAX, 0))
615 lookup("ok");
616 else
617 lookup("badallow");
618 }
619
620 void header_cache_control(const char *s)
621 {
622 if (parse_list(s, &re_token_value, 1, UINT_MAX,
623 header_cache_control_callback))
624 lookup("ok");
625 else
626 lookup("badcachecont");
627 }
628
629 char cache_control_list[][20] = {
630 "max-age", "max-stale", "min-fresh", "must-revalidate",
631 "no-cache", "no-store", "no-transform", "only-if-cached",
632 "private", "proxy-revalidate", "public", "s-maxage"
633 };
634
635 void header_cache_control_callback(const char *s, regmatch_t pmatch[])
636 {
637 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
638 char name[20];
639 char *dir;
640
641 if (19 < len) {
642 lookup("unknowncachecont");
643 return;
644 }
645
646 strncpy(name, s + pmatch[1].rm_so, len);
647 name[len] = 0;
648
649 dir = bsearch(name, cache_control_list,
650 sizeof cache_control_list / sizeof cache_control_list[0],
651 sizeof cache_control_list[0],
652 (int (*)(const void *, const void *)) strcasecmp);
653
654 if (!dir) {
655 printf(" Cache-Control directive '%s':\n", name);
656 lookup("unknowncachecont");
657 }
658 }
659
660 void header_connection(const char *s)
661 {
662 if (strcmp(s, "close") == 0)
663 lookup("ok");
664 else
665 lookup("badconnection");
666 }
667
668 void header_content_encoding(const char *s)
669 {
670 if (parse_list(s, &re_token, 1, UINT_MAX,
671 header_content_encoding_callback))
672 lookup("ok");
673 else
674 lookup("badcontenc");
675 }
676
677 char content_coding_list[][20] = {
678 "compress", "deflate", "gzip", "identity"
679 };
680
681 void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
682 {
683 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
684 char name[20];
685 char *dir;
686
687 if (19 < len) {
688 lookup("unknowncontenc");
689 return;
690 }
691
692 strncpy(name, s + pmatch[1].rm_so, len);
693 name[len] = 0;
694
695 dir = bsearch(name, content_coding_list,
696 sizeof content_coding_list / sizeof content_coding_list[0],
697 sizeof content_coding_list[0],
698 (int (*)(const void *, const void *)) strcasecmp);
699 if (!dir) {
700 printf(" Content-Encoding '%s':\n", name);
701 lookup("unknowncontenc");
702 }
703 }
704
705 void header_content_language(const char *s)
706 {
707 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
708 lookup("ok");
709 else
710 lookup("badcontlang");
711 }
712
713 void header_content_length(const char *s)
714 {
715 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
716 lookup("badcontlen");
717 else
718 lookup("ok");
719 }
720
721 void header_content_location(const char *s)
722 {
723 if (strchr(s, ' '))
724 lookup("badcontloc");
725 else
726 lookup("ok");
727 }
728
729 void header_content_md5(const char *s)
730 {
731 if (strlen(s) != 24)
732 lookup("badcontmd5");
733 else
734 lookup("ok");
735 }
736
737 void header_content_range(const char *s)
738 {
739 UNUSED(s);
740 lookup("contentrange");
741 }
742
743 void header_content_type(const char *s)
744 {
745 bool charset = false;
746 char *type, *subtype;
747 unsigned int i;
748 int r;
749 regmatch_t pmatch[30];
750
751 r = regexec(&re_content_type, s, 30, pmatch, 0);
752 if (r) {
753 lookup("badcontenttype");
754 return;
755 }
756
757 type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
758 subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
759
760 /* parameters */
761 for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
762 char *attrib, *value;
763
764 attrib = strndup(s + pmatch[i + 1].rm_so,
765 pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
766 value = strndup(s + pmatch[i + 2].rm_so,
767 pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
768
769 if (strcasecmp(attrib, "charset") == 0)
770 charset = true;
771 }
772
773 if (strcasecmp(type, "text") == 0 && !charset)
774 lookup("nocharset");
775 else
776 lookup("ok");
777 }
778
779 void header_date(const char *s)
780 {
781 double diff;
782 time_t time0, time1;
783 struct tm tm;
784
785 time0 = time(0);
786 if (!parse_date(s, &tm))
787 return;
788 time1 = mktime_from_utc(&tm);
789
790 diff = difftime(time0, time1);
791 if (10 < fabs(diff))
792 lookup("wrongdate");
793 else
794 lookup("ok");
795 }
796
797 void header_etag(const char *s)
798 {
799 int r;
800 r = regexec(&re_etag, s, 0, 0, 0);
801 if (r)
802 lookup("badetag");
803 else
804 lookup("ok");
805 }
806
807 void header_expires(const char *s)
808 {
809 struct tm tm;
810 if (parse_date(s, &tm))
811 lookup("ok");
812 }
813
814 void header_last_modified(const char *s)
815 {
816 double diff;
817 time_t time0, time1;
818 struct tm tm;
819
820 time0 = time(0);
821 if (!parse_date(s, &tm))
822 return;
823 time1 = mktime_from_utc(&tm);
824
825 diff = difftime(time1, time0);
826 if (10 < diff)
827 lookup("futurelastmod");
828 else
829 lookup("ok");
830 }
831
832 void header_location(const char *s)
833 {
834 int r;
835 r = regexec(&re_absolute_uri, s, 0, 0, 0);
836 if (r)
837 lookup("badlocation");
838 else
839 lookup("ok");
840 }
841
842 void header_pragma(const char *s)
843 {
844 if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
845 lookup("ok");
846 else
847 lookup("badpragma");
848 }
849
850 void header_retry_after(const char *s)
851 {
852 struct tm tm;
853
854 if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
855 lookup("ok");
856 return;
857 }
858
859 if (!parse_date(s, &tm))
860 return;
861
862 lookup("ok");
863 }
864
865 void header_server(const char *s)
866 {
867 int r;
868 r = regexec(&re_server, s, 0, 0, 0);
869 if (r)
870 lookup("badserver");
871 else
872 lookup("ok");
873 }
874
875 void header_trailer(const char *s)
876 {
877 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
878 lookup("ok");
879 else
880 lookup("badtrailer");
881 }
882
883 void header_transfer_encoding(const char *s)
884 {
885 if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
886 header_transfer_encoding_callback))
887 lookup("ok");
888 else
889 lookup("badtransenc");
890 }
891
892 char transfer_coding_list[][20] = {
893 "chunked", "compress", "deflate", "gzip", "identity"
894 };
895
896 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
897 {
898 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
899 char name[20];
900 char *dir;
901
902 if (19 < len) {
903 lookup("unknowntransenc");
904 return;
905 }
906
907 strncpy(name, s + pmatch[1].rm_so, len);
908 name[len] = 0;
909
910 dir = bsearch(name, transfer_coding_list,
911 sizeof transfer_coding_list / sizeof transfer_coding_list[0],
912 sizeof transfer_coding_list[0],
913 (int (*)(const void *, const void *)) strcasecmp);
914 if (!dir) {
915 printf(" Transfer-Encoding '%s':\n", name);
916 lookup("unknowntransenc");
917 }
918 }
919
920 void header_upgrade(const char *s)
921 {
922 int r;
923 r = regexec(&re_upgrade, s, 0, 0, 0);
924 if (r)
925 lookup("badupgrade");
926 else
927 lookup("ok");
928 }
929
930 void header_vary(const char *s)
931 {
932 if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
933 lookup("ok");
934 else
935 lookup("badvary");
936 }
937
938 void header_via(const char *s)
939 {
940 UNUSED(s);
941 lookup("via");
942 }
943
944
945 /**
946 * Print an error message and exit.
947 */
948 void die(const char *error)
949 {
950 fprintf(stderr, "httplint: %s\n", error);
951 exit(EXIT_FAILURE);
952 }
953
954
955 /**
956 * Print a warning message.
957 */
958 void warning(const char *message)
959 {
960 printf("Warning: %s\n", message);
961 }
962
963
964 /**
965 * Print an error message.
966 */
967 void error(const char *message)
968 {
969 printf("Error: %s\n", message);
970 }
971
972
973 /**
974 * Print a string which contains control characters.
975 */
976 void print(const char *s, size_t len)
977 {
978 size_t i;
979 for (i = 0; i != len; i++) {
980 if (31 < s[i] && s[i] < 127)
981 putchar(s[i]);
982 else
983 printf("[%.2x]", s[i]);
984 }
985 }
986
987
988 struct message_entry {
989 const char key[20];
990 const char *value;
991 } message_table[] = {
992 { "1xx", "A response status code in the range 100 - 199 indicates a "
993 "'provisional response'." },
994 { "2xx", "A response status code in the range 200 - 299 indicates that "
995 "the request was successful." },
996 { "3xx", "A response status code in the range 300 - 399 indicates that "
997 "the client should redirect to a new URL." },
998 { "4xx", "A response status code in the range 400 - 499 indicates that "
999 "the request could not be fulfilled due to client error." },
1000 { "5xx", "A response status code in the range 500 - 599 indicates that "
1001 "an error occurred on the server." },
1002 { "asctime", "Warning: This date is in the obsolete asctime() format. "
1003 "Consider using the RFC 1123 format instead." },
1004 { "badage", "Error: The Age header must be one number." },
1005 { "badallow", "Error: The Allow header must be a comma-separated list of "
1006 "HTTP methods." },
1007 { "badcachecont", "Error: The Cache-Control header must be a "
1008 "comma-separated list of directives." },
1009 { "badconnection", "Warning: The only value of the Connection header "
1010 "defined by HTTP/1.1 is \"close\"." },
1011 { "badcontenc", "Error: The Content-Encoding header must be a "
1012 "comma-separated list of encodings." },
1013 { "badcontenttype", "Error: The Content-Type header must be of the form "
1014 "'type/subtype (; optional parameters)'." },
1015 { "badcontlang", "Error: The Content-Language header must be a "
1016 "comma-separated list of language tags." },
1017 { "badcontlen", "Error: The Content-Length header must be a number." },
1018 { "badcontloc", "Error: The Content-Location header must be an absolute "
1019 "or relative URI." },
1020 { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1021 "MD5 sum." },
1022 { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1023 "1123 format." },
1024 { "badetag", "Error: The ETag header must be a quoted string (optionally "
1025 "preceded by \"W/\" for a weak tag)." },
1026 { "badlocation", "Error: The Location header must be an absolute URI. "
1027 "Relative URIs are not permitted." },
1028 { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1029 "directives." },
1030 { "badserver", "Error: The Server header must be a space-separated list of "
1031 "products of the form Name/optional-version and comments "
1032 "in ()." },
1033 { "badstatus", "Warning: The response status code is outside the standard "
1034 "range 100 - 599." },
1035 { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1036 "status line must be of the form 'HTTP/n.n <3-digit "
1037 "status> <reason phrase>'." },
1038 { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1039 "of header names." },
1040 { "badtransenc", "Error: The Transfer-Encoding header must be a "
1041 "comma-separated of encodings." },
1042 { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1043 "of product identifiers." },
1044 { "badvary", "Error: The Vary header must be a comma-separated list "
1045 "of header names, or \"*\"." },
1046 { "contentrange", "Warning: The Content-Range header should not be returned "
1047 "by the server for this request." },
1048 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1049 "version of this tool." },
1050 { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1051 "the future." },
1052 { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1053 { "missingcontenttype", "Warning: No Content-Type header was present. The "
1054 "client will have to guess the media type or ask "
1055 "the user. Adding a Content-Type header is strongly "
1056 "recommended." },
1057 { "missingcontlang", "Consider adding a Content-Language header if "
1058 "applicable for this document." },
1059 { "missingdate", "Warning: No Date header was present. A Date header must "
1060 "be present, unless the server does not have a clock, or "
1061 "the response is 100, 101, or 500 - 599." },
1062 { "missinglastmod", "No Last-Modified header was present. The "
1063 "HTTP/1.1 specification states that this header should "
1064 "be sent whenever feasible." },
1065 { "nocharset", "Warning: No character set is specified in the Content-Type. "
1066 "Clients may assume the default of ISO-8859-1. Consider "
1067 "appending '; charset=...'." },
1068 { "nonstandard", "Warning: I don't know anything about this header. Is it "
1069 "a standard HTTP response header?" },
1070 { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1071 "that all header lines end with CR LF." },
1072 { "ok", "OK." },
1073 { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1074 "to HTTP/1.1." },
1075 { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1076 "Consider using the RFC 1123 format instead." },
1077 { "ugly", "This URL appears to contain implementation-specific parts such "
1078 "as an extension or a query string. This may make the URL liable "
1079 "to change when the implementation is changed, resulting in "
1080 "broken links. Consider using URL rewriting or equivalent to "
1081 "implement a future-proof URL space. See "
1082 "http://www.w3.org/Provider/Style/URI for more information." },
1083 { "unknowncachecont", "Warning: This Cache-Control directive is "
1084 "non-standard and will have limited support." },
1085 { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1086 { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1087 "range." },
1088 { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1089 { "via", "This header was added by a proxy, cache or gateway." },
1090 { "wrongdate", "Warning: The server date-time differs from this system's "
1091 "date-time by more than 10 seconds. Check that both the "
1092 "system clocks are correct." },
1093 { "xheader", "This is an extension header. I don't know how to check it." }
1094 };
1095
1096
1097 /**
1098 * Look up and output the string referenced by a key.
1099 */
1100 void lookup(const char *key)
1101 {
1102 const char *s, *spc;
1103 int x;
1104 struct message_entry *message;
1105
1106 message = bsearch(key, message_table,
1107 sizeof message_table / sizeof message_table[0],
1108 sizeof message_table[0],
1109 (int (*)(const void *, const void *)) strcasecmp);
1110 if (message)
1111 s = message->value;
1112 else
1113 s = key;
1114
1115 printf(" ");
1116 x = 4;
1117 while (*s) {
1118 spc = strchr(s, ' ');
1119 if (!spc)
1120 spc = s + strlen(s);
1121 if (75 < x + (spc - s)) {
1122 printf("\n ");
1123 x = 4;
1124 }
1125 x += spc - s + 1;
1126 printf("%.*s ", spc - s, s);
1127 if (*spc)
1128 s = spc + 1;
1129 else
1130 s = spc;
1131 }
1132 printf("\n\n");
1133 }

  ViewVC Help
Powered by ViewVC 1.1.26