/[james]/httplint/httplint.c
ViewVC logotype

Contents of /httplint/httplint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 43 - (show annotations) (download) (as text)
Wed Dec 17 21:54:46 2003 UTC (20 years, 10 months ago) by james
File MIME type: text/x-csrc
File size: 29777 byte(s)
Fix range in regexp.

1 /*
2 * HTTP Header Lint
3 * Licensed under the same license as Curl
4 * http://curl.haxx.se/docs/copyright.html
5 * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
6 */
7
8 /*
9 * Compile using
10 * gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c
11 *
12 * References of the form [6.1.1] are to RFC 2616 (HTTP/1.1).
13 */
14
15 #define _GNU_SOURCE
16 #define __USE_XOPEN
17
18 #include <limits.h>
19 #include <math.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <sys/types.h>
26 #include <regex.h>
27 #include <curl/curl.h>
28
29
30 #define NUMBER "0123456789"
31 #define UNUSED(x) x = x
32
33
34 bool start;
35 CURL *curl;
36 int status_code;
37 char error_buffer[CURL_ERROR_SIZE];
38 regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly,
39 re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade,
40 re_rfc1123, re_rfc1036, re_asctime;
41
42
43 void init(void);
44 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
45 void check_url(const char *url);
46 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream);
47 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream);
48 void check_status_line(const char *s);
49 void check_header(const char *name, const char *value);
50 bool parse_date(const char *s, struct tm *tm);
51 int month(const char *s);
52 time_t mktime_from_utc(struct tm *t);
53 const char *skip_lws(const char *s);
54 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
55 void (*callback)(const char *s, regmatch_t pmatch[]));
56 void header_accept_ranges(const char *s);
57 void header_age(const char *s);
58 void header_allow(const char *s);
59 void header_cache_control(const char *s);
60 void header_cache_control_callback(const char *s, regmatch_t pmatch[]);
61 void header_connection(const char *s);
62 void header_content_encoding(const char *s);
63 void header_content_encoding_callback(const char *s, regmatch_t pmatch[]);
64 void header_content_language(const char *s);
65 void header_content_length(const char *s);
66 void header_content_location(const char *s);
67 void header_content_md5(const char *s);
68 void header_content_range(const char *s);
69 void header_content_type(const char *s);
70 void header_date(const char *s);
71 void header_etag(const char *s);
72 void header_expires(const char *s);
73 void header_last_modified(const char *s);
74 void header_location(const char *s);
75 void header_pragma(const char *s);
76 void header_retry_after(const char *s);
77 void header_server(const char *s);
78 void header_trailer(const char *s);
79 void header_transfer_encoding(const char *s);
80 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]);
81 void header_upgrade(const char *s);
82 void header_vary(const char *s);
83 void header_via(const char *s);
84 void die(const char *error);
85 void warning(const char *message);
86 void error(const char *message);
87 void print(const char *s, size_t len);
88 void lookup(const char *key);
89
90
91 struct header_entry {
92 char name[40];
93 void (*handler)(const char *s);
94 int count;
95 char *missing;
96 } header_table[] = {
97 { "Accept-Ranges", header_accept_ranges, 0, 0 },
98 { "Age", header_age, 0, 0 },
99 { "Allow", header_allow, 0, 0 },
100 { "Cache-Control", header_cache_control, 0, 0 },
101 { "Connection", header_connection, 0, 0 },
102 { "Content-Encoding", header_content_encoding, 0, 0 },
103 { "Content-Language", header_content_language, 0, "missingcontlang" },
104 { "Content-Length", header_content_length, 0, 0 },
105 { "Content-Location", header_content_location, 0, 0 },
106 { "Content-MD5", header_content_md5, 0, 0 },
107 { "Content-Range", header_content_range, 0, 0 },
108 { "Content-Type", header_content_type, 0, "missingcontenttype" },
109 { "Date", header_date, 0, "missingdate" },
110 { "ETag", header_etag, 0, 0 },
111 { "Expires", header_expires, 0, 0 },
112 { "Last-Modified", header_last_modified, 0, "missinglastmod" },
113 { "Location", header_location, 0, 0 },
114 { "Pragma", header_pragma, 0, 0 },
115 { "Retry-After", header_retry_after, 0, 0 },
116 { "Server", header_server, 0, 0 },
117 { "Trailer", header_trailer, 0, 0 },
118 { "Transfer-Encoding", header_transfer_encoding, 0, 0 },
119 { "Upgrade", header_upgrade, 0, 0 },
120 { "Vary", header_vary, 0, 0 },
121 { "Via", header_via, 0, 0 }
122 };
123
124
125 /**
126 * Main entry point.
127 */
128 int main(int argc, char *argv[])
129 {
130 int i;
131
132 if (argc < 2)
133 die("Usage: httplint url [url ...]");
134
135 init();
136
137 for (i = 1; i != argc; i++)
138 check_url(argv[i]);
139
140 curl_global_cleanup();
141
142 return 0;
143 }
144
145
146 /**
147 * Initialise the curl handle and compile regular expressions.
148 */
149 void init(void)
150 {
151 struct curl_slist *request_headers = 0;
152
153 if (curl_global_init(CURL_GLOBAL_ALL))
154 die("Failed to initialise libcurl");
155
156 curl = curl_easy_init();
157 if (!curl)
158 die("Failed to create curl handle");
159
160 if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback))
161 die("Failed to set curl options");
162 if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback))
163 die("Failed to set curl options");
164 if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint"))
165 die("Failed to set curl options");
166 if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer))
167 die("Failed to set curl options");
168
169 /* remove libcurl default headers */
170 request_headers = curl_slist_append(request_headers, "Accept:");
171 request_headers = curl_slist_append(request_headers, "Pragma:");
172 if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers))
173 die("Failed to set curl options");
174
175 /* compile regular expressions */
176 regcomp_wrapper(&re_status_line,
177 "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$",
178 REG_EXTENDED);
179 regcomp_wrapper(&re_token,
180 "^([-0-9a-zA-Z_.]+)",
181 REG_EXTENDED);
182 regcomp_wrapper(&re_token_value,
183 "^([-0-9a-zA-Z_.]+)(=([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\"))?",
184 REG_EXTENDED);
185 regcomp_wrapper(&re_content_type,
186 "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*"
187 "(;[ \t]*([-0-9a-zA-Z_.]+)="
188 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
189 REG_EXTENDED);
190 regcomp_wrapper(&re_absolute_uri,
191 "^[a-zA-Z0-9]+://[^ ]+$",
192 REG_EXTENDED);
193 regcomp_wrapper(&re_etag,
194 "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$",
195 REG_EXTENDED);
196 regcomp_wrapper(&re_server,
197 "^((([-0-9a-zA-Z_.]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$",
198 REG_EXTENDED);
199 regcomp_wrapper(&re_transfer_coding,
200 "^([-0-9a-zA-Z_.]+)[ \t]*"
201 "(;[ \t]*([-0-9a-zA-Z_.]+)="
202 "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
203 REG_EXTENDED);
204 regcomp_wrapper(&re_upgrade,
205 "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$",
206 REG_EXTENDED);
207 regcomp_wrapper(&re_ugly,
208 "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$",
209 REG_EXTENDED);
210 regcomp_wrapper(&re_rfc1123,
211 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) "
212 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) "
213 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
214 REG_EXTENDED);
215 regcomp_wrapper(&re_rfc1036,
216 "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), "
217 "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-"
218 "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$",
219 REG_EXTENDED);
220 regcomp_wrapper(&re_asctime,
221 "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) "
222 "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) "
223 "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$",
224 REG_EXTENDED);
225 }
226
227
228 /**
229 * Compile a regular expression, handling errors.
230 */
231 void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
232 {
233 char errbuf[200];
234 int r;
235 r = regcomp(preg, regex, cflags);
236 if (r) {
237 regerror(r, preg, errbuf, sizeof errbuf);
238 fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
239 die(errbuf);
240 }
241 }
242
243
244 /**
245 * Fetch and check the headers for the specified url.
246 */
247 void check_url(const char *url)
248 {
249 int i, r;
250 CURLcode code;
251
252 start = true;
253 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++)
254 header_table[i].count = 0;
255
256 printf("Checking URL %s\n", url);
257 if (strncmp(url, "http", 4))
258 warning("this is not an http or https url");
259
260 if (curl_easy_setopt(curl, CURLOPT_URL, url))
261 die("Failed to set curl options");
262
263 code = curl_easy_perform(curl);
264 if (code != CURLE_OK && code != CURLE_WRITE_ERROR) {
265 error(error_buffer);
266 return;
267 } else {
268 printf("\n");
269 for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) {
270 if (header_table[i].count == 0 && header_table[i].missing)
271 lookup(header_table[i].missing);
272 }
273 }
274
275 r = regexec(&re_ugly, url, 0, 0, 0);
276 if (r)
277 lookup("ugly");
278 }
279
280
281 /**
282 * Callback for received header data.
283 */
284 size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream)
285 {
286 const size_t size = msize * nmemb;
287 char s[400], *name, *value;
288
289 UNUSED(stream);
290
291 printf("* ");
292 print(ptr, size);
293 printf("\n");
294
295 if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) {
296 lookup("notcrlf");
297 return size;
298 }
299 if (sizeof s <= size) {
300 warning("header too long: ignored\n");
301 return size;
302 }
303 strncpy(s, ptr, size);
304 s[size - 2] = 0;
305
306 name = s;
307 value = strchr(s, ':');
308
309 if (s[0] == 0) {
310 /* empty header indicates end of headers */
311 puts("End of headers.");
312 return 0;
313
314 } else if (start) {
315 /* Status-Line [6.1] */
316 check_status_line(s);
317 start = false;
318
319 } else if (!value) {
320 lookup("missingcolon");
321
322 } else {
323 *value = 0;
324 value++;
325
326 check_header(name, skip_lws(value));
327 }
328
329 return size;
330 }
331
332
333 /**
334 * Callback for received body data.
335 *
336 * We are not interested in the body, so abort the fetch by returning 0.
337 */
338 size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream)
339 {
340 UNUSED(ptr);
341 UNUSED(size);
342 UNUSED(nmemb);
343 UNUSED(stream);
344
345 return 0;
346 }
347
348
349 /**
350 * Check the syntax and content of the response Status-Line [6.1].
351 */
352 void check_status_line(const char *s)
353 {
354 const char *reason;
355 unsigned int major = 0, minor = 0;
356 int r;
357 regmatch_t pmatch[5];
358
359 r = regexec(&re_status_line, s, 5, pmatch, 0);
360 if (r) {
361 lookup("badstatusline");
362 return;
363 }
364
365 major = atoi(s + pmatch[1].rm_so);
366 minor = atoi(s + pmatch[2].rm_so);
367 status_code = atoi(s + pmatch[3].rm_so);
368 reason = s + pmatch[4].rm_so;
369
370 if (major < 1 || (major == 1 && minor == 0)) {
371 lookup("oldhttp");
372 } else if ((major == 1 && 1 < minor) || 1 < major) {
373 lookup("futurehttp");
374 } else {
375 if (status_code < 100 || 600 <= status_code) {
376 lookup("badstatus");
377 } else {
378 char key[] = "xxx";
379 key[0] = '0' + status_code / 100;
380 lookup(key);
381 }
382 }
383 }
384
385
386 /**
387 * Check the syntax and content of a header.
388 */
389 void check_header(const char *name, const char *value)
390 {
391 struct header_entry *header;
392
393 header = bsearch(name, header_table,
394 sizeof header_table / sizeof header_table[0],
395 sizeof header_table[0],
396 (int (*)(const void *, const void *)) strcasecmp);
397
398 if (header) {
399 header->count++;
400 header->handler(value);
401 } else
402 lookup("nonstandard");
403 }
404
405
406 /**
407 * Attempt to parse an HTTP Full Date (3.3.1), returning true on success.
408 */
409 bool parse_date(const char *s, struct tm *tm)
410 {
411 int r;
412 int len = strlen(s);
413 regmatch_t pmatch[20];
414
415 if (len == 29) {
416 /* RFC 1123 */
417 r = regexec(&re_rfc1123, s, 20, pmatch, 0);
418 if (r == 0) {
419 tm->tm_mday = atoi(s + pmatch[2].rm_so);
420 tm->tm_mon = month(s + pmatch[3].rm_so);
421 tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900;
422 tm->tm_hour = atoi(s + pmatch[5].rm_so);
423 tm->tm_min = atoi(s + pmatch[6].rm_so);
424 tm->tm_sec = atoi(s + pmatch[7].rm_so);
425 return true;
426 }
427
428 } else if (len == 24) {
429 /* asctime() format */
430 r = regexec(&re_asctime, s, 20, pmatch, 0);
431 if (r == 0) {
432 if (s[pmatch[3].rm_so] == ' ')
433 tm->tm_mday = atoi(s + pmatch[3].rm_so + 1);
434 else
435 tm->tm_mday = atoi(s + pmatch[3].rm_so);
436 tm->tm_mon = month(s + pmatch[2].rm_so);
437 tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900;
438 tm->tm_hour = atoi(s + pmatch[4].rm_so);
439 tm->tm_min = atoi(s + pmatch[5].rm_so);
440 tm->tm_sec = atoi(s + pmatch[6].rm_so);
441 lookup("asctime");
442 return true;
443 }
444
445 } else {
446 /* RFC 1036 */
447 r = regexec(&re_rfc1036, s, 20, pmatch, 0);
448 if (r == 0) {
449 tm->tm_mday = atoi(s + pmatch[2].rm_so);
450 tm->tm_mon = month(s + pmatch[3].rm_so);
451 tm->tm_year = 100 + atoi(s + pmatch[4].rm_so);
452 tm->tm_hour = atoi(s + pmatch[5].rm_so);
453 tm->tm_min = atoi(s + pmatch[6].rm_so);
454 tm->tm_sec = atoi(s + pmatch[7].rm_so);
455 lookup("rfc1036");
456 return true;
457 }
458
459 }
460
461 lookup("baddate");
462 return false;
463 }
464
465
466 /**
467 * Convert a month name to the month number.
468 */
469 int month(const char *s)
470 {
471 switch (s[0]) {
472 case 'J':
473 switch (s[1]) {
474 case 'a':
475 return 0;
476 case 'u':
477 return s[2] == 'n' ? 5 : 6;
478 }
479 case 'F':
480 return 1;
481 case 'M':
482 return s[2] == 'r' ? 2 : 4;
483 case 'A':
484 return s[1] == 'p' ? 3 : 7;
485 case 'S':
486 return 8;
487 case 'O':
488 return 9;
489 case 'N':
490 return 10;
491 case 'D':
492 return 11;
493 }
494 return 0;
495 }
496
497
498 /**
499 * UTC version of mktime, from
500 * http://lists.debian.org/deity/2002/deity-200204/msg00082.html
501 */
502 time_t mktime_from_utc(struct tm *t)
503 {
504 time_t tl, tb;
505 struct tm *tg;
506
507 tl = mktime (t);
508 if (tl == -1)
509 {
510 t->tm_hour--;
511 tl = mktime (t);
512 if (tl == -1)
513 return -1; /* can't deal with output from strptime */
514 tl += 3600;
515 }
516 tg = gmtime (&tl);
517 tg->tm_isdst = 0;
518 tb = mktime (tg);
519 if (tb == -1)
520 {
521 tg->tm_hour--;
522 tb = mktime (tg);
523 if (tb == -1)
524 return -1; /* can't deal with output from gmtime */
525 tb += 3600;
526 }
527 return (tl - (tb - tl));
528 }
529
530
531 /**
532 * Skip optional LWS (linear white space) [2.2]
533 */
534 const char *skip_lws(const char *s)
535 {
536 if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t'))
537 s += 2;
538 while (*s == ' ' || *s == '\t')
539 s++;
540 return s;
541 }
542
543
544 /**
545 * Parse a list of elements (#rule in [2.1]).
546 */
547 bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m,
548 void (*callback)(const char *s, regmatch_t pmatch[]))
549 {
550 int r;
551 unsigned int items = 0;
552 regmatch_t pmatch[20];
553
554 do {
555 r = regexec(preg, s, 20, pmatch, 0);
556 if (r) {
557 printf(" Failed to match list item %i\n", items + 1);
558 return false;
559 }
560
561 if (callback)
562 callback(s, pmatch);
563 items++;
564
565 s += pmatch[0].rm_eo;
566 s = skip_lws(s);
567 if (*s == 0)
568 break;
569 if (*s != ',') {
570 printf(" Expecting , after list item %i\n", items);
571 return false;
572 }
573 while (*s == ',')
574 s = skip_lws(s + 1);
575 } while (*s != 0);
576
577 if (items < n || m < items) {
578 printf(" %i items in list, but there should be ", items);
579 if (m == UINT_MAX)
580 printf("at least %i\n", n);
581 else
582 printf("between %i and %i\n", n, m);
583 return false;
584 }
585
586 return true;
587 }
588
589
590 /* Header-specific validation. */
591 void header_accept_ranges(const char *s)
592 {
593 if (strcmp(s, "bytes") == 0)
594 lookup("ok");
595 else if (strcmp(s, "none") == 0)
596 lookup("ok");
597 else
598 lookup("unknownrange");
599 }
600
601 void header_age(const char *s)
602 {
603 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
604 lookup("badage");
605 else
606 lookup("ok");
607 }
608
609 void header_allow(const char *s)
610 {
611 if (parse_list(s, &re_token, 0, UINT_MAX, 0))
612 lookup("ok");
613 else
614 lookup("badallow");
615 }
616
617 void header_cache_control(const char *s)
618 {
619 if (parse_list(s, &re_token_value, 1, UINT_MAX,
620 header_cache_control_callback))
621 lookup("ok");
622 else
623 lookup("badcachecont");
624 }
625
626 char cache_control_list[][20] = {
627 "max-age", "max-stale", "min-fresh", "must-revalidate",
628 "no-cache", "no-store", "no-transform", "only-if-cached",
629 "private", "proxy-revalidate", "public", "s-maxage"
630 };
631
632 void header_cache_control_callback(const char *s, regmatch_t pmatch[])
633 {
634 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
635 char name[20];
636 char *dir;
637
638 if (19 < len) {
639 lookup("unknowncachecont");
640 return;
641 }
642
643 strncpy(name, s + pmatch[1].rm_so, len);
644 name[len] = 0;
645
646 dir = bsearch(name, cache_control_list,
647 sizeof cache_control_list / sizeof cache_control_list[0],
648 sizeof cache_control_list[0],
649 (int (*)(const void *, const void *)) strcasecmp);
650
651 if (!dir) {
652 printf(" Cache-Control directive '%s':\n", name);
653 lookup("unknowncachecont");
654 }
655 }
656
657 void header_connection(const char *s)
658 {
659 if (strcmp(s, "close") == 0)
660 lookup("ok");
661 else
662 lookup("badconnection");
663 }
664
665 void header_content_encoding(const char *s)
666 {
667 if (parse_list(s, &re_token, 1, UINT_MAX,
668 header_content_encoding_callback))
669 lookup("ok");
670 else
671 lookup("badcontenc");
672 }
673
674 char content_coding_list[][20] = {
675 "compress", "deflate", "gzip", "identity"
676 };
677
678 void header_content_encoding_callback(const char *s, regmatch_t pmatch[])
679 {
680 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
681 char name[20];
682 char *dir;
683
684 if (19 < len) {
685 lookup("unknowncontenc");
686 return;
687 }
688
689 strncpy(name, s + pmatch[1].rm_so, len);
690 name[len] = 0;
691
692 dir = bsearch(name, content_coding_list,
693 sizeof content_coding_list / sizeof content_coding_list[0],
694 sizeof content_coding_list[0],
695 (int (*)(const void *, const void *)) strcasecmp);
696 if (!dir) {
697 printf(" Content-Encoding '%s':\n", name);
698 lookup("unknowncontenc");
699 }
700 }
701
702 void header_content_language(const char *s)
703 {
704 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
705 lookup("ok");
706 else
707 lookup("badcontlang");
708 }
709
710 void header_content_length(const char *s)
711 {
712 if (s[0] == 0 || strspn(s, NUMBER) != strlen(s))
713 lookup("badcontlen");
714 else
715 lookup("ok");
716 }
717
718 void header_content_location(const char *s)
719 {
720 if (strchr(s, ' '))
721 lookup("badcontloc");
722 else
723 lookup("ok");
724 }
725
726 void header_content_md5(const char *s)
727 {
728 if (strlen(s) != 24)
729 lookup("badcontmd5");
730 else
731 lookup("ok");
732 }
733
734 void header_content_range(const char *s)
735 {
736 UNUSED(s);
737 lookup("contentrange");
738 }
739
740 void header_content_type(const char *s)
741 {
742 bool charset = false;
743 char *type, *subtype;
744 unsigned int i;
745 int r;
746 regmatch_t pmatch[30];
747
748 r = regexec(&re_content_type, s, 30, pmatch, 0);
749 if (r) {
750 lookup("badcontenttype");
751 return;
752 }
753
754 type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
755 subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
756
757 /* parameters */
758 for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) {
759 char *attrib, *value;
760
761 attrib = strndup(s + pmatch[i + 1].rm_so,
762 pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so);
763 value = strndup(s + pmatch[i + 2].rm_so,
764 pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so);
765
766 if (strcasecmp(attrib, "charset") == 0)
767 charset = true;
768 }
769
770 if (strcasecmp(type, "text") == 0 && !charset)
771 lookup("nocharset");
772 else
773 lookup("ok");
774 }
775
776 void header_date(const char *s)
777 {
778 double diff;
779 time_t time0, time1;
780 struct tm tm;
781
782 time0 = time(0);
783 if (!parse_date(s, &tm))
784 return;
785 time1 = mktime_from_utc(&tm);
786
787 diff = difftime(time0, time1);
788 if (10 < fabs(diff))
789 lookup("wrongdate");
790 else
791 lookup("ok");
792 }
793
794 void header_etag(const char *s)
795 {
796 int r;
797 r = regexec(&re_etag, s, 0, 0, 0);
798 if (r)
799 lookup("badetag");
800 else
801 lookup("ok");
802 }
803
804 void header_expires(const char *s)
805 {
806 struct tm tm;
807 if (parse_date(s, &tm))
808 lookup("ok");
809 }
810
811 void header_last_modified(const char *s)
812 {
813 double diff;
814 time_t time0, time1;
815 struct tm tm;
816
817 time0 = time(0);
818 if (!parse_date(s, &tm))
819 return;
820 time1 = mktime(&tm);
821
822 diff = difftime(time1, time0);
823 if (10 < diff)
824 lookup("futurelastmod");
825 else
826 lookup("ok");
827 }
828
829 void header_location(const char *s)
830 {
831 int r;
832 r = regexec(&re_absolute_uri, s, 0, 0, 0);
833 if (r)
834 lookup("badlocation");
835 else
836 lookup("ok");
837 }
838
839 void header_pragma(const char *s)
840 {
841 if (parse_list(s, &re_token_value, 1, UINT_MAX, 0))
842 lookup("ok");
843 else
844 lookup("badpragma");
845 }
846
847 void header_retry_after(const char *s)
848 {
849 struct tm tm;
850
851 if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) {
852 lookup("ok");
853 return;
854 }
855
856 if (!parse_date(s, &tm))
857 return;
858
859 lookup("ok");
860 }
861
862 void header_server(const char *s)
863 {
864 int r;
865 r = regexec(&re_server, s, 0, 0, 0);
866 if (r)
867 lookup("badserver");
868 else
869 lookup("ok");
870 }
871
872 void header_trailer(const char *s)
873 {
874 if (parse_list(s, &re_token, 1, UINT_MAX, 0))
875 lookup("ok");
876 else
877 lookup("badtrailer");
878 }
879
880 void header_transfer_encoding(const char *s)
881 {
882 if (parse_list(s, &re_transfer_coding, 1, UINT_MAX,
883 header_transfer_encoding_callback))
884 lookup("ok");
885 else
886 lookup("badtransenc");
887 }
888
889 char transfer_coding_list[][20] = {
890 "chunked", "compress", "deflate", "gzip", "identity"
891 };
892
893 void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[])
894 {
895 size_t len = pmatch[1].rm_eo - pmatch[1].rm_so;
896 char name[20];
897 char *dir;
898
899 if (19 < len) {
900 lookup("unknowntransenc");
901 return;
902 }
903
904 strncpy(name, s + pmatch[1].rm_so, len);
905 name[len] = 0;
906
907 dir = bsearch(name, transfer_coding_list,
908 sizeof transfer_coding_list / sizeof transfer_coding_list[0],
909 sizeof transfer_coding_list[0],
910 (int (*)(const void *, const void *)) strcasecmp);
911 if (!dir) {
912 printf(" Transfer-Encoding '%s':\n", name);
913 lookup("unknowntransenc");
914 }
915 }
916
917 void header_upgrade(const char *s)
918 {
919 int r;
920 r = regexec(&re_upgrade, s, 0, 0, 0);
921 if (r)
922 lookup("badupgrade");
923 else
924 lookup("ok");
925 }
926
927 void header_vary(const char *s)
928 {
929 if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0))
930 lookup("ok");
931 else
932 lookup("badvary");
933 }
934
935 void header_via(const char *s)
936 {
937 UNUSED(s);
938 lookup("via");
939 }
940
941
942 /**
943 * Print an error message and exit.
944 */
945 void die(const char *error)
946 {
947 fprintf(stderr, "httplint: %s\n", error);
948 exit(EXIT_FAILURE);
949 }
950
951
952 /**
953 * Print a warning message.
954 */
955 void warning(const char *message)
956 {
957 printf("Warning: %s\n", message);
958 }
959
960
961 /**
962 * Print an error message.
963 */
964 void error(const char *message)
965 {
966 printf("Error: %s\n", message);
967 }
968
969
970 /**
971 * Print a string which contains control characters.
972 */
973 void print(const char *s, size_t len)
974 {
975 size_t i;
976 for (i = 0; i != len; i++) {
977 if (31 < s[i] && s[i] < 127)
978 putchar(s[i]);
979 else
980 printf("[%.2x]", s[i]);
981 }
982 }
983
984
985 struct message_entry {
986 const char key[20];
987 const char *value;
988 } message_table[] = {
989 { "1xx", "A response status code in the range 100 - 199 indicates a "
990 "'provisional response'." },
991 { "2xx", "A response status code in the range 200 - 299 indicates that "
992 "the request was successful." },
993 { "3xx", "A response status code in the range 300 - 399 indicates that "
994 "the client should redirect to a new URL." },
995 { "4xx", "A response status code in the range 400 - 499 indicates that "
996 "the request could not be fulfilled due to client error." },
997 { "5xx", "A response status code in the range 500 - 599 indicates that "
998 "an error occurred on the server." },
999 { "asctime", "Warning: This date is in the obsolete asctime() format. "
1000 "Consider using the RFC 1123 format instead." },
1001 { "badage", "Error: The Age header must be one number." },
1002 { "badallow", "Error: The Allow header must be a comma-separated list of "
1003 "HTTP methods." },
1004 { "badcachecont", "Error: The Cache-Control header must be a "
1005 "comma-separated list of directives." },
1006 { "badconnection", "Warning: The only value of the Connection header "
1007 "defined by HTTP/1.1 is \"close\"." },
1008 { "badcontenc", "Error: The Content-Encoding header must be a "
1009 "comma-separated list of encodings." },
1010 { "badcontenttype", "Error: The Content-Type header must be of the form "
1011 "'type/subtype (; optional parameters)'." },
1012 { "badcontlang", "Error: The Content-Language header must be a "
1013 "comma-separated list of language tags." },
1014 { "badcontlen", "Error: The Content-Length header must be a number." },
1015 { "badcontloc", "Error: The Content-Location header must be an absolute "
1016 "or relative URI." },
1017 { "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded "
1018 "MD5 sum." },
1019 { "baddate", "Error: Failed to parse this date. Dates should be in the RFC "
1020 "1123 format." },
1021 { "badetag", "Error: The ETag header must be a quoted string (optionally "
1022 "preceded by \"W/\" for a weak tag)." },
1023 { "badlocation", "Error: The Location header must be an absolute URI. "
1024 "Relative URIs are not permitted." },
1025 { "badpragma", "Error: The Pragma header must be a comma-separated list of "
1026 "directives." },
1027 { "badserver", "Error: The Server header must be a space-separated list of "
1028 "products of the form Name/optional-version and comments "
1029 "in ()." },
1030 { "badstatus", "Warning: The response status code is outside the standard "
1031 "range 100 - 599." },
1032 { "badstatusline", "Error: Failed to parse the response Status-Line. The "
1033 "status line must be of the form 'HTTP/n.n <3-digit "
1034 "status> <reason phrase>'." },
1035 { "badtrailer", "Error: The Trailer header must be a comma-separated list "
1036 "of header names." },
1037 { "badtransenc", "Error: The Transfer-Encoding header must be a "
1038 "comma-separated of encodings." },
1039 { "badupgrade", "Error: The Upgrade header must be a comma-separated list "
1040 "of product identifiers." },
1041 { "badvary", "Error: The Vary header must be a comma-separated list "
1042 "of header names, or \"*\"." },
1043 { "contentrange", "Warning: The Content-Range header should not be returned "
1044 "by the server for this request." },
1045 { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer "
1046 "version of this tool." },
1047 { "futurelastmod", "Error: The specified Last-Modified date-time is in "
1048 "the future." },
1049 { "missingcolon", "Error: Headers must be of the form 'Name: value'." },
1050 { "missingcontenttype", "Warning: No Content-Type header was present. The "
1051 "client will have to guess the media type or ask "
1052 "the user. Adding a Content-Type header is strongly "
1053 "recommended." },
1054 { "missingcontlang", "Consider adding a Content-Language header if "
1055 "applicable for this document." },
1056 { "missingdate", "Warning: No Date header was present. A Date header must "
1057 "be present, unless the server does not have a clock, or "
1058 "the response is 100, 101, or 500 - 599." },
1059 { "missinglastmod", "No Last-Modified header was present. The "
1060 "HTTP/1.1 specification states that this header should "
1061 "be sent whenever feasible." },
1062 { "nocharset", "Warning: No character set is specified in the Content-Type. "
1063 "Clients may assume the default of ISO-8859-1. Consider "
1064 "appending '; charset=...'." },
1065 { "nonstandard", "Warning: I don't know anything about this header. Is it "
1066 "a standard HTTP response header?" },
1067 { "notcrlf", "Error: This header line does not end in CR LF. HTTP requires "
1068 "that all header lines end with CR LF." },
1069 { "ok", "OK." },
1070 { "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading "
1071 "to HTTP/1.1." },
1072 { "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. "
1073 "Consider using the RFC 1123 format instead." },
1074 { "ugly", "This URL appears to contain implementation-specific parts such "
1075 "as an extension or a query string. This may make the URL liable "
1076 "to change when the implementation is changed, resulting in "
1077 "broken links. Consider using URL rewriting or equivalent to "
1078 "implement a future-proof URL space. See "
1079 "http://www.w3.org/Provider/Style/URI for more information." },
1080 { "unknowncachecont", "Warning: This Cache-Control directive is "
1081 "non-standard and will have limited support." },
1082 { "unknowncontenc", "Warning: This is not a standard Content-Encoding." },
1083 { "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 "
1084 "range." },
1085 { "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." },
1086 { "via", "This header was added by a proxy, cache or gateway." },
1087 { "wrongdate", "Warning: The server date-time differs from this system's "
1088 "date-time by more than 10 seconds. Check that both the "
1089 "system clocks are correct." }
1090 };
1091
1092
1093 /**
1094 * Look up and output the string referenced by a key.
1095 */
1096 void lookup(const char *key)
1097 {
1098 const char *s, *spc;
1099 int x;
1100 struct message_entry *message;
1101
1102 message = bsearch(key, message_table,
1103 sizeof message_table / sizeof message_table[0],
1104 sizeof message_table[0],
1105 (int (*)(const void *, const void *)) strcasecmp);
1106 if (message)
1107 s = message->value;
1108 else
1109 s = key;
1110
1111 printf(" ");
1112 x = 4;
1113 while (*s) {
1114 spc = strchr(s, ' ');
1115 if (!spc)
1116 spc = s + strlen(s);
1117 if (75 < x + (spc - s)) {
1118 printf("\n ");
1119 x = 4;
1120 }
1121 x += spc - s + 1;
1122 printf("%.*s ", spc - s, s);
1123 if (*spc)
1124 s = spc + 1;
1125 else
1126 s = spc;
1127 }
1128 printf("\n\n");
1129 }

  ViewVC Help
Powered by ViewVC 1.1.26