1 |
/* |
2 |
* HTTP Header Lint |
3 |
* Licensed under the same license as Curl |
4 |
* http://curl.haxx.se/docs/copyright.html |
5 |
* Copyright 2003 James Bursa <bursa@users.sourceforge.net> |
6 |
*/ |
7 |
|
8 |
/* |
9 |
* Compile using |
10 |
* gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c |
11 |
* |
12 |
* References of the form [6.1.1] are to RFC 2616 (HTTP/1.1). |
13 |
*/ |
14 |
|
15 |
#define _GNU_SOURCE |
16 |
#define __USE_XOPEN |
17 |
|
18 |
#include <limits.h> |
19 |
#include <math.h> |
20 |
#include <stdbool.h> |
21 |
#include <stdio.h> |
22 |
#include <stdlib.h> |
23 |
#include <string.h> |
24 |
#include <time.h> |
25 |
#include <sys/types.h> |
26 |
#include <regex.h> |
27 |
#include <curl/curl.h> |
28 |
|
29 |
|
30 |
#define NUMBER "0123456789" |
31 |
#define UNUSED(x) x = x |
32 |
|
33 |
|
34 |
bool start; |
35 |
CURL *curl; |
36 |
int status_code; |
37 |
char error_buffer[CURL_ERROR_SIZE]; |
38 |
regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly, |
39 |
re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade; |
40 |
|
41 |
|
42 |
void init(void); |
43 |
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags); |
44 |
void check_url(const char *url); |
45 |
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream); |
46 |
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream); |
47 |
void check_status_line(const char *s); |
48 |
void check_header(const char *name, const char *value); |
49 |
bool parse_date(const char *s, struct tm *tm); |
50 |
const char *skip_lws(const char *s); |
51 |
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
52 |
void (*callback)(const char *s, regmatch_t pmatch[])); |
53 |
void header_accept_ranges(const char *s); |
54 |
void header_age(const char *s); |
55 |
void header_allow(const char *s); |
56 |
void header_cache_control(const char *s); |
57 |
void header_cache_control_callback(const char *s, regmatch_t pmatch[]); |
58 |
void header_connection(const char *s); |
59 |
void header_content_encoding(const char *s); |
60 |
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]); |
61 |
void header_content_language(const char *s); |
62 |
void header_content_length(const char *s); |
63 |
void header_content_location(const char *s); |
64 |
void header_content_md5(const char *s); |
65 |
void header_content_range(const char *s); |
66 |
void header_content_type(const char *s); |
67 |
void header_date(const char *s); |
68 |
void header_etag(const char *s); |
69 |
void header_expires(const char *s); |
70 |
void header_last_modified(const char *s); |
71 |
void header_location(const char *s); |
72 |
void header_pragma(const char *s); |
73 |
void header_retry_after(const char *s); |
74 |
void header_server(const char *s); |
75 |
void header_trailer(const char *s); |
76 |
void header_transfer_encoding(const char *s); |
77 |
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]); |
78 |
void header_upgrade(const char *s); |
79 |
void header_vary(const char *s); |
80 |
void header_via(const char *s); |
81 |
void die(const char *error); |
82 |
void warning(const char *message); |
83 |
void error(const char *message); |
84 |
void print(const char *s, size_t len); |
85 |
void lookup(const char *key); |
86 |
|
87 |
|
88 |
struct header_entry { |
89 |
char name[40]; |
90 |
void (*handler)(const char *s); |
91 |
int count; |
92 |
char *missing; |
93 |
} header_table[] = { |
94 |
{ "Accept-Ranges", header_accept_ranges, 0, 0 }, |
95 |
{ "Age", header_age, 0, 0 }, |
96 |
{ "Allow", header_allow, 0, 0 }, |
97 |
{ "Cache-Control", header_cache_control, 0, 0 }, |
98 |
{ "Connection", header_connection, 0, 0 }, |
99 |
{ "Content-Encoding", header_content_encoding, 0, 0 }, |
100 |
{ "Content-Language", header_content_language, 0, "missingcontlang" }, |
101 |
{ "Content-Length", header_content_length, 0, 0 }, |
102 |
{ "Content-Location", header_content_location, 0, 0 }, |
103 |
{ "Content-MD5", header_content_md5, 0, 0 }, |
104 |
{ "Content-Range", header_content_range, 0, 0 }, |
105 |
{ "Content-Type", header_content_type, 0, "missingcontenttype" }, |
106 |
{ "Date", header_date, 0, "missingdate" }, |
107 |
{ "ETag", header_etag, 0, 0 }, |
108 |
{ "Expires", header_expires, 0, 0 }, |
109 |
{ "Last-Modified", header_last_modified, 0, "missinglastmod" }, |
110 |
{ "Location", header_location, 0, 0 }, |
111 |
{ "Pragma", header_pragma, 0, 0 }, |
112 |
{ "Retry-After", header_retry_after, 0, 0 }, |
113 |
{ "Server", header_server, 0, 0 }, |
114 |
{ "Trailer", header_trailer, 0, 0 }, |
115 |
{ "Transfer-Encoding", header_transfer_encoding, 0, 0 }, |
116 |
{ "Upgrade", header_upgrade, 0, 0 }, |
117 |
{ "Vary", header_vary, 0, 0 }, |
118 |
{ "Via", header_via, 0, 0 } |
119 |
}; |
120 |
|
121 |
|
122 |
/** |
123 |
* Main entry point. |
124 |
*/ |
125 |
int main(int argc, char *argv[]) |
126 |
{ |
127 |
int i; |
128 |
|
129 |
if (argc < 2) |
130 |
die("Usage: httplint url [url ...]"); |
131 |
|
132 |
init(); |
133 |
|
134 |
for (i = 1; i != argc; i++) |
135 |
check_url(argv[i]); |
136 |
|
137 |
curl_global_cleanup(); |
138 |
|
139 |
return 0; |
140 |
} |
141 |
|
142 |
|
143 |
/** |
144 |
* Initialise the curl handle and compile regular expressions. |
145 |
*/ |
146 |
void init(void) |
147 |
{ |
148 |
struct curl_slist *request_headers = 0; |
149 |
|
150 |
if (curl_global_init(CURL_GLOBAL_ALL)) |
151 |
die("Failed to initialise libcurl"); |
152 |
|
153 |
curl = curl_easy_init(); |
154 |
if (!curl) |
155 |
die("Failed to create curl handle"); |
156 |
|
157 |
if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback)) |
158 |
die("Failed to set curl options"); |
159 |
if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback)) |
160 |
die("Failed to set curl options"); |
161 |
if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint")) |
162 |
die("Failed to set curl options"); |
163 |
if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer)) |
164 |
die("Failed to set curl options"); |
165 |
|
166 |
/* remove libcurl default headers */ |
167 |
request_headers = curl_slist_append(request_headers, "Accept:"); |
168 |
request_headers = curl_slist_append(request_headers, "Pragma:"); |
169 |
if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers)) |
170 |
die("Failed to set curl options"); |
171 |
|
172 |
/* compile regular expressions */ |
173 |
regcomp_wrapper(&re_status_line, |
174 |
"^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$", |
175 |
REG_EXTENDED); |
176 |
regcomp_wrapper(&re_token, |
177 |
"^([-0-9a-zA-Z_.]+)", |
178 |
REG_EXTENDED); |
179 |
regcomp_wrapper(&re_token_value, |
180 |
"^([-0-9a-zA-Z_.]+)(=([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\"))?", |
181 |
REG_EXTENDED); |
182 |
regcomp_wrapper(&re_content_type, |
183 |
"^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*" |
184 |
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
185 |
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
186 |
REG_EXTENDED); |
187 |
regcomp_wrapper(&re_absolute_uri, |
188 |
"^[a-zA-Z0-9]+://[^ ]+$", |
189 |
REG_EXTENDED); |
190 |
regcomp_wrapper(&re_etag, |
191 |
"^(W/[ \t]*)?\"([^\"]|[\\].)*\"$", |
192 |
REG_EXTENDED); |
193 |
regcomp_wrapper(&re_server, |
194 |
"^((([-0-9a-zA-Z_.]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", |
195 |
REG_EXTENDED); |
196 |
regcomp_wrapper(&re_transfer_coding, |
197 |
"^([-0-9a-zA-Z_.]+)[ \t]*" |
198 |
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
199 |
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
200 |
REG_EXTENDED); |
201 |
regcomp_wrapper(&re_upgrade, |
202 |
"^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$", |
203 |
REG_EXTENDED); |
204 |
regcomp_wrapper(&re_ugly, |
205 |
"^[a-zA-Z0-9]+://[^/]+[/a-zA-Z0-9-_]*$", |
206 |
REG_EXTENDED); |
207 |
} |
208 |
|
209 |
|
210 |
/** |
211 |
* Compile a regular expression, handling errors. |
212 |
*/ |
213 |
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags) |
214 |
{ |
215 |
char errbuf[200]; |
216 |
int r; |
217 |
r = regcomp(preg, regex, cflags); |
218 |
if (r) { |
219 |
regerror(r, preg, errbuf, sizeof errbuf); |
220 |
fprintf(stderr, "Failed to compile regexp '%s'\n", regex); |
221 |
die(errbuf); |
222 |
} |
223 |
} |
224 |
|
225 |
|
226 |
/** |
227 |
* Fetch and check the headers for the specified url. |
228 |
*/ |
229 |
void check_url(const char *url) |
230 |
{ |
231 |
int i, r; |
232 |
CURLcode code; |
233 |
|
234 |
start = true; |
235 |
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) |
236 |
header_table[i].count = 0; |
237 |
|
238 |
printf("Checking URL %s\n", url); |
239 |
if (strncmp(url, "http", 4)) |
240 |
warning("this is not an http or https url"); |
241 |
|
242 |
if (curl_easy_setopt(curl, CURLOPT_URL, url)) |
243 |
die("Failed to set curl options"); |
244 |
|
245 |
code = curl_easy_perform(curl); |
246 |
if (code != CURLE_OK && code != CURLE_WRITE_ERROR) { |
247 |
error(error_buffer); |
248 |
return; |
249 |
} else { |
250 |
printf("\n"); |
251 |
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) { |
252 |
if (header_table[i].count == 0 && header_table[i].missing) |
253 |
lookup(header_table[i].missing); |
254 |
} |
255 |
} |
256 |
|
257 |
r = regexec(&re_ugly, url, 0, 0, 0); |
258 |
if (r) |
259 |
lookup("ugly"); |
260 |
} |
261 |
|
262 |
|
263 |
/** |
264 |
* Callback for received header data. |
265 |
*/ |
266 |
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream) |
267 |
{ |
268 |
const size_t size = msize * nmemb; |
269 |
char s[400], *name, *value; |
270 |
|
271 |
UNUSED(stream); |
272 |
|
273 |
printf("* "); |
274 |
print(ptr, size); |
275 |
printf("\n"); |
276 |
|
277 |
if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) { |
278 |
lookup("notcrlf"); |
279 |
return size; |
280 |
} |
281 |
if (sizeof s <= size) { |
282 |
warning("header too long: ignored\n"); |
283 |
return size; |
284 |
} |
285 |
strncpy(s, ptr, size); |
286 |
s[size - 2] = 0; |
287 |
|
288 |
name = s; |
289 |
value = strchr(s, ':'); |
290 |
|
291 |
if (s[0] == 0) { |
292 |
/* empty header indicates end of headers */ |
293 |
puts("End of headers."); |
294 |
return 0; |
295 |
|
296 |
} else if (start) { |
297 |
/* Status-Line [6.1] */ |
298 |
check_status_line(s); |
299 |
start = false; |
300 |
|
301 |
} else if (!value) { |
302 |
lookup("missingcolon"); |
303 |
|
304 |
} else { |
305 |
*value = 0; |
306 |
value++; |
307 |
|
308 |
check_header(name, skip_lws(value)); |
309 |
} |
310 |
|
311 |
return size; |
312 |
} |
313 |
|
314 |
|
315 |
/** |
316 |
* Callback for received body data. |
317 |
* |
318 |
* We are not interested in the body, so abort the fetch by returning 0. |
319 |
*/ |
320 |
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream) |
321 |
{ |
322 |
UNUSED(ptr); |
323 |
UNUSED(size); |
324 |
UNUSED(nmemb); |
325 |
UNUSED(stream); |
326 |
|
327 |
return 0; |
328 |
} |
329 |
|
330 |
|
331 |
/** |
332 |
* Check the syntax and content of the response Status-Line [6.1]. |
333 |
*/ |
334 |
void check_status_line(const char *s) |
335 |
{ |
336 |
const char *reason; |
337 |
unsigned int major = 0, minor = 0; |
338 |
int r; |
339 |
regmatch_t pmatch[5]; |
340 |
|
341 |
r = regexec(&re_status_line, s, 5, pmatch, 0); |
342 |
if (r) { |
343 |
lookup("badstatusline"); |
344 |
return; |
345 |
} |
346 |
|
347 |
major = atoi(s + pmatch[1].rm_so); |
348 |
minor = atoi(s + pmatch[2].rm_so); |
349 |
status_code = atoi(s + pmatch[3].rm_so); |
350 |
reason = s + pmatch[4].rm_so; |
351 |
|
352 |
if (major < 1 || (major == 1 && minor == 0)) { |
353 |
lookup("oldhttp"); |
354 |
} else if ((major == 1 && 1 < minor) || 1 < major) { |
355 |
lookup("futurehttp"); |
356 |
} else { |
357 |
if (status_code < 100 || 600 <= status_code) { |
358 |
lookup("badstatus"); |
359 |
} else { |
360 |
char key[] = "xxx"; |
361 |
key[0] = '0' + status_code / 100; |
362 |
lookup(key); |
363 |
} |
364 |
} |
365 |
} |
366 |
|
367 |
|
368 |
/** |
369 |
* Check the syntax and content of a header. |
370 |
*/ |
371 |
void check_header(const char *name, const char *value) |
372 |
{ |
373 |
struct header_entry *header; |
374 |
|
375 |
header = bsearch(name, header_table, |
376 |
sizeof header_table / sizeof header_table[0], |
377 |
sizeof header_table[0], |
378 |
(int (*)(const void *, const void *)) strcasecmp); |
379 |
|
380 |
if (header) { |
381 |
header->count++; |
382 |
header->handler(value); |
383 |
} else |
384 |
lookup("nonstandard"); |
385 |
} |
386 |
|
387 |
|
388 |
/** |
389 |
* Attempt to parse an HTTP Full Date (3.3.1), returning true on success. |
390 |
*/ |
391 |
bool parse_date(const char *s, struct tm *tm) |
392 |
{ |
393 |
char *r; |
394 |
int len = strlen(s); |
395 |
|
396 |
if (len == 29) { |
397 |
/* RFC 1123 */ |
398 |
r = strptime(s, "%a, %d %b %Y %H:%M:%S GMT", tm); |
399 |
if (r == s + len) |
400 |
return true; |
401 |
|
402 |
} else if (len == 24) { |
403 |
/* asctime() format */ |
404 |
r = strptime(s, "%a %b %d %H:%M:%S %Y", tm); |
405 |
if (r == s + len) { |
406 |
lookup("asctime"); |
407 |
return true; |
408 |
} |
409 |
r = strptime(s, "%a %b %d %H:%M:%S %Y", tm); |
410 |
if (r == s + len) { |
411 |
lookup("asctime"); |
412 |
return true; |
413 |
} |
414 |
|
415 |
} else { |
416 |
/* RFC 1036 */ |
417 |
r = strptime(s, "%a, %d-%b-%y %H:%M:%S GMT", tm); |
418 |
if (r == s + len) { |
419 |
lookup("rfc1036"); |
420 |
return true; |
421 |
} |
422 |
|
423 |
} |
424 |
|
425 |
lookup("baddate"); |
426 |
return false; |
427 |
} |
428 |
|
429 |
|
430 |
/** |
431 |
* Skip optional LWS (linear white space) [2.2] |
432 |
*/ |
433 |
const char *skip_lws(const char *s) |
434 |
{ |
435 |
if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t')) |
436 |
s += 2; |
437 |
while (*s == ' ' || *s == '\t') |
438 |
s++; |
439 |
return s; |
440 |
} |
441 |
|
442 |
|
443 |
/** |
444 |
* Parse a list of elements (#rule in [2.1]). |
445 |
*/ |
446 |
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
447 |
void (*callback)(const char *s, regmatch_t pmatch[])) |
448 |
{ |
449 |
int r; |
450 |
unsigned int items = 0; |
451 |
regmatch_t pmatch[20]; |
452 |
|
453 |
do { |
454 |
r = regexec(preg, s, 20, pmatch, 0); |
455 |
if (r) { |
456 |
printf(" Failed to match list item %i\n", items + 1); |
457 |
return false; |
458 |
} |
459 |
|
460 |
if (callback) |
461 |
callback(s, pmatch); |
462 |
items++; |
463 |
|
464 |
s += pmatch[0].rm_eo; |
465 |
s = skip_lws(s); |
466 |
if (*s == 0) |
467 |
break; |
468 |
if (*s != ',') { |
469 |
printf(" Expecting , after list item %i\n", items); |
470 |
return false; |
471 |
} |
472 |
while (*s == ',') |
473 |
s = skip_lws(s + 1); |
474 |
} while (*s != 0); |
475 |
|
476 |
if (items < n || m < items) { |
477 |
printf(" %i items in list, but there should be ", items); |
478 |
if (m == UINT_MAX) |
479 |
printf("at least %i\n", n); |
480 |
else |
481 |
printf("between %i and %i\n", n, m); |
482 |
return false; |
483 |
} |
484 |
|
485 |
return true; |
486 |
} |
487 |
|
488 |
|
489 |
/* Header-specific validation. */ |
490 |
void header_accept_ranges(const char *s) |
491 |
{ |
492 |
if (strcmp(s, "bytes") == 0) |
493 |
lookup("ok"); |
494 |
else if (strcmp(s, "none") == 0) |
495 |
lookup("ok"); |
496 |
else |
497 |
lookup("unknownrange"); |
498 |
} |
499 |
|
500 |
void header_age(const char *s) |
501 |
{ |
502 |
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
503 |
lookup("badage"); |
504 |
else |
505 |
lookup("ok"); |
506 |
} |
507 |
|
508 |
void header_allow(const char *s) |
509 |
{ |
510 |
if (parse_list(s, &re_token, 0, UINT_MAX, 0)) |
511 |
lookup("ok"); |
512 |
else |
513 |
lookup("badallow"); |
514 |
} |
515 |
|
516 |
void header_cache_control(const char *s) |
517 |
{ |
518 |
if (parse_list(s, &re_token_value, 1, UINT_MAX, |
519 |
header_cache_control_callback)) |
520 |
lookup("ok"); |
521 |
else |
522 |
lookup("badcachecont"); |
523 |
} |
524 |
|
525 |
char cache_control_list[][20] = { |
526 |
"max-age", "max-stale", "min-fresh", "must-revalidate", |
527 |
"no-cache", "no-store", "no-transform", "only-if-cached", |
528 |
"private", "proxy-revalidate", "public", "s-maxage" |
529 |
}; |
530 |
|
531 |
void header_cache_control_callback(const char *s, regmatch_t pmatch[]) |
532 |
{ |
533 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
534 |
char name[20]; |
535 |
char *dir; |
536 |
|
537 |
if (19 < len) { |
538 |
lookup("unknowncachecont"); |
539 |
return; |
540 |
} |
541 |
|
542 |
strncpy(name, s + pmatch[1].rm_so, len); |
543 |
name[len] = 0; |
544 |
|
545 |
dir = bsearch(name, cache_control_list, |
546 |
sizeof cache_control_list / sizeof cache_control_list[0], |
547 |
sizeof cache_control_list[0], |
548 |
(int (*)(const void *, const void *)) strcasecmp); |
549 |
|
550 |
if (!dir) { |
551 |
printf(" Cache-Control directive '%s':\n", name); |
552 |
lookup("unknowncachecont"); |
553 |
} |
554 |
} |
555 |
|
556 |
void header_connection(const char *s) |
557 |
{ |
558 |
if (strcmp(s, "close") == 0) |
559 |
lookup("ok"); |
560 |
else |
561 |
lookup("badconnection"); |
562 |
} |
563 |
|
564 |
void header_content_encoding(const char *s) |
565 |
{ |
566 |
if (parse_list(s, &re_token, 1, UINT_MAX, |
567 |
header_content_encoding_callback)) |
568 |
lookup("ok"); |
569 |
else |
570 |
lookup("badcontenc"); |
571 |
} |
572 |
|
573 |
char content_coding_list[][20] = { |
574 |
"compress", "deflate", "gzip", "identity" |
575 |
}; |
576 |
|
577 |
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]) |
578 |
{ |
579 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
580 |
char name[20]; |
581 |
char *dir; |
582 |
|
583 |
if (19 < len) { |
584 |
lookup("unknowncontenc"); |
585 |
return; |
586 |
} |
587 |
|
588 |
strncpy(name, s + pmatch[1].rm_so, len); |
589 |
name[len] = 0; |
590 |
|
591 |
dir = bsearch(name, content_coding_list, |
592 |
sizeof content_coding_list / sizeof content_coding_list[0], |
593 |
sizeof content_coding_list[0], |
594 |
(int (*)(const void *, const void *)) strcasecmp); |
595 |
if (!dir) { |
596 |
printf(" Content-Encoding '%s':\n", name); |
597 |
lookup("unknowncontenc"); |
598 |
} |
599 |
} |
600 |
|
601 |
void header_content_language(const char *s) |
602 |
{ |
603 |
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
604 |
lookup("ok"); |
605 |
else |
606 |
lookup("badcontlang"); |
607 |
} |
608 |
|
609 |
void header_content_length(const char *s) |
610 |
{ |
611 |
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
612 |
lookup("badcontlen"); |
613 |
else |
614 |
lookup("ok"); |
615 |
} |
616 |
|
617 |
void header_content_location(const char *s) |
618 |
{ |
619 |
if (strchr(s, ' ')) |
620 |
lookup("badcontloc"); |
621 |
else |
622 |
lookup("ok"); |
623 |
} |
624 |
|
625 |
void header_content_md5(const char *s) |
626 |
{ |
627 |
if (strlen(s) != 24) |
628 |
lookup("badcontmd5"); |
629 |
else |
630 |
lookup("ok"); |
631 |
} |
632 |
|
633 |
void header_content_range(const char *s) |
634 |
{ |
635 |
UNUSED(s); |
636 |
lookup("contentrange"); |
637 |
} |
638 |
|
639 |
void header_content_type(const char *s) |
640 |
{ |
641 |
bool charset = false; |
642 |
char *type, *subtype; |
643 |
unsigned int i; |
644 |
int r; |
645 |
regmatch_t pmatch[30]; |
646 |
|
647 |
r = regexec(&re_content_type, s, 30, pmatch, 0); |
648 |
if (r) { |
649 |
lookup("badcontenttype"); |
650 |
return; |
651 |
} |
652 |
|
653 |
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so); |
654 |
subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so); |
655 |
|
656 |
/* parameters */ |
657 |
for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) { |
658 |
char *attrib, *value; |
659 |
|
660 |
attrib = strndup(s + pmatch[i + 1].rm_so, |
661 |
pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so); |
662 |
value = strndup(s + pmatch[i + 2].rm_so, |
663 |
pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so); |
664 |
|
665 |
if (strcasecmp(attrib, "charset") == 0) |
666 |
charset = true; |
667 |
} |
668 |
|
669 |
if (strcasecmp(type, "text") == 0 && !charset) |
670 |
lookup("nocharset"); |
671 |
else |
672 |
lookup("ok"); |
673 |
} |
674 |
|
675 |
void header_date(const char *s) |
676 |
{ |
677 |
double diff; |
678 |
time_t time0, time1; |
679 |
struct tm tm; |
680 |
|
681 |
time0 = time(0); |
682 |
if (!parse_date(s, &tm)) |
683 |
return; |
684 |
time1 = mktime(&tm); |
685 |
|
686 |
diff = difftime(time0, time1); |
687 |
if (10 < fabs(diff)) |
688 |
lookup("wrongdate"); |
689 |
else |
690 |
lookup("ok"); |
691 |
} |
692 |
|
693 |
void header_etag(const char *s) |
694 |
{ |
695 |
int r; |
696 |
r = regexec(&re_etag, s, 0, 0, 0); |
697 |
if (r) |
698 |
lookup("badetag"); |
699 |
else |
700 |
lookup("ok"); |
701 |
} |
702 |
|
703 |
void header_expires(const char *s) |
704 |
{ |
705 |
struct tm tm; |
706 |
if (parse_date(s, &tm)) |
707 |
lookup("ok"); |
708 |
} |
709 |
|
710 |
void header_last_modified(const char *s) |
711 |
{ |
712 |
double diff; |
713 |
time_t time0, time1; |
714 |
struct tm tm; |
715 |
|
716 |
time0 = time(0); |
717 |
if (!parse_date(s, &tm)) |
718 |
return; |
719 |
time1 = mktime(&tm); |
720 |
|
721 |
diff = difftime(time1, time0); |
722 |
if (10 < diff) |
723 |
lookup("futurelastmod"); |
724 |
else |
725 |
lookup("ok"); |
726 |
} |
727 |
|
728 |
void header_location(const char *s) |
729 |
{ |
730 |
int r; |
731 |
r = regexec(&re_absolute_uri, s, 0, 0, 0); |
732 |
if (r) |
733 |
lookup("badlocation"); |
734 |
else |
735 |
lookup("ok"); |
736 |
} |
737 |
|
738 |
void header_pragma(const char *s) |
739 |
{ |
740 |
if (parse_list(s, &re_token_value, 1, UINT_MAX, 0)) |
741 |
lookup("ok"); |
742 |
else |
743 |
lookup("badpragma"); |
744 |
} |
745 |
|
746 |
void header_retry_after(const char *s) |
747 |
{ |
748 |
struct tm tm; |
749 |
|
750 |
if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) { |
751 |
lookup("ok"); |
752 |
return; |
753 |
} |
754 |
|
755 |
if (!parse_date(s, &tm)) |
756 |
return; |
757 |
|
758 |
lookup("ok"); |
759 |
} |
760 |
|
761 |
void header_server(const char *s) |
762 |
{ |
763 |
int r; |
764 |
r = regexec(&re_server, s, 0, 0, 0); |
765 |
if (r) |
766 |
lookup("badserver"); |
767 |
else |
768 |
lookup("ok"); |
769 |
} |
770 |
|
771 |
void header_trailer(const char *s) |
772 |
{ |
773 |
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
774 |
lookup("ok"); |
775 |
else |
776 |
lookup("badtrailer"); |
777 |
} |
778 |
|
779 |
void header_transfer_encoding(const char *s) |
780 |
{ |
781 |
if (parse_list(s, &re_transfer_coding, 1, UINT_MAX, |
782 |
header_transfer_encoding_callback)) |
783 |
lookup("ok"); |
784 |
else |
785 |
lookup("badtransenc"); |
786 |
} |
787 |
|
788 |
char transfer_coding_list[][20] = { |
789 |
"chunked", "compress", "deflate", "gzip", "identity" |
790 |
}; |
791 |
|
792 |
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]) |
793 |
{ |
794 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
795 |
char name[20]; |
796 |
char *dir; |
797 |
|
798 |
if (19 < len) { |
799 |
lookup("unknowntransenc"); |
800 |
return; |
801 |
} |
802 |
|
803 |
strncpy(name, s + pmatch[1].rm_so, len); |
804 |
name[len] = 0; |
805 |
|
806 |
dir = bsearch(name, transfer_coding_list, |
807 |
sizeof transfer_coding_list / sizeof transfer_coding_list[0], |
808 |
sizeof transfer_coding_list[0], |
809 |
(int (*)(const void *, const void *)) strcasecmp); |
810 |
if (!dir) { |
811 |
printf(" Transfer-Encoding '%s':\n", name); |
812 |
lookup("unknowntransenc"); |
813 |
} |
814 |
} |
815 |
|
816 |
void header_upgrade(const char *s) |
817 |
{ |
818 |
int r; |
819 |
r = regexec(&re_upgrade, s, 0, 0, 0); |
820 |
if (r) |
821 |
lookup("badupgrade"); |
822 |
else |
823 |
lookup("ok"); |
824 |
} |
825 |
|
826 |
void header_vary(const char *s) |
827 |
{ |
828 |
if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0)) |
829 |
lookup("ok"); |
830 |
else |
831 |
lookup("badvary"); |
832 |
} |
833 |
|
834 |
void header_via(const char *s) |
835 |
{ |
836 |
UNUSED(s); |
837 |
lookup("via"); |
838 |
} |
839 |
|
840 |
|
841 |
/** |
842 |
* Print an error message and exit. |
843 |
*/ |
844 |
void die(const char *error) |
845 |
{ |
846 |
fprintf(stderr, "httplint: %s\n", error); |
847 |
exit(EXIT_FAILURE); |
848 |
} |
849 |
|
850 |
|
851 |
/** |
852 |
* Print a warning message. |
853 |
*/ |
854 |
void warning(const char *message) |
855 |
{ |
856 |
printf("Warning: %s\n", message); |
857 |
} |
858 |
|
859 |
|
860 |
/** |
861 |
* Print an error message. |
862 |
*/ |
863 |
void error(const char *message) |
864 |
{ |
865 |
printf("Error: %s\n", message); |
866 |
} |
867 |
|
868 |
|
869 |
/** |
870 |
* Print a string which contains control characters. |
871 |
*/ |
872 |
void print(const char *s, size_t len) |
873 |
{ |
874 |
size_t i; |
875 |
for (i = 0; i != len; i++) { |
876 |
if (31 < s[i] && s[i] < 127) |
877 |
putchar(s[i]); |
878 |
else |
879 |
printf("[%.2x]", s[i]); |
880 |
} |
881 |
} |
882 |
|
883 |
|
884 |
struct message_entry { |
885 |
const char key[20]; |
886 |
const char *value; |
887 |
} message_table[] = { |
888 |
{ "1xx", "A response status code in the range 100 - 199 indicates a " |
889 |
"'provisional response'." }, |
890 |
{ "2xx", "A response status code in the range 200 - 299 indicates that " |
891 |
"the request was successful." }, |
892 |
{ "3xx", "A response status code in the range 300 - 399 indicates that " |
893 |
"the client should redirect to a new URL." }, |
894 |
{ "4xx", "A response status code in the range 400 - 499 indicates that " |
895 |
"the request could not be fulfilled due to client error." }, |
896 |
{ "5xx", "A response status code in the range 500 - 599 indicates that " |
897 |
"an error occurred on the server." }, |
898 |
{ "asctime", "Warning: This date is in the obsolete asctime() format. " |
899 |
"Consider using the RFC 1123 format instead." }, |
900 |
{ "badage", "Error: The Age header must be one number." }, |
901 |
{ "badallow", "Error: The Allow header must be a comma-separated list of " |
902 |
"HTTP methods." }, |
903 |
{ "badcachecont", "Error: The Cache-Control header must be a " |
904 |
"comma-separated list of directives." }, |
905 |
{ "badconnection", "Warning: The only value of the Connection header " |
906 |
"defined by HTTP/1.1 is \"close\"." }, |
907 |
{ "badcontenc", "Error: The Content-Encoding header must be a " |
908 |
"comma-separated list of encodings." }, |
909 |
{ "badcontenttype", "Error: The Content-Type header must be of the form " |
910 |
"'type/subtype (; optional parameters)'." }, |
911 |
{ "badcontlang", "Error: The Content-Language header must be a " |
912 |
"comma-separated list of language tags." }, |
913 |
{ "badcontlen", "Error: The Content-Length header must be a number." }, |
914 |
{ "badcontloc", "Error: The Content-Location header must be an absolute " |
915 |
"or relative URI." }, |
916 |
{ "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded " |
917 |
"MD5 sum." }, |
918 |
{ "baddate", "Error: Failed to parse this date. Dates should be in the RFC " |
919 |
"1123 format." }, |
920 |
{ "badetag", "Error: The ETag header must be a quoted string (optionally " |
921 |
"preceded by \"W/\" for a weak tag)." }, |
922 |
{ "badlocation", "Error: The Location header must be an absolute URI. " |
923 |
"Relative URIs are not permitted." }, |
924 |
{ "badpragma", "Error: The Pragma header must be a comma-separated list of " |
925 |
"directives." }, |
926 |
{ "badserver", "Error: The Server header must be a space-separated list of " |
927 |
"products of the form Name/optional-version and comments " |
928 |
"in ()." }, |
929 |
{ "badstatus", "Warning: The response status code is outside the standard " |
930 |
"range 100 - 599." }, |
931 |
{ "badstatusline", "Error: Failed to parse the response Status-Line. The " |
932 |
"status line must be of the form 'HTTP/n.n <3-digit " |
933 |
"status> <reason phrase>'." }, |
934 |
{ "badtrailer", "Error: The Trailer header must be a comma-separated list " |
935 |
"of header names." }, |
936 |
{ "badtransenc", "Error: The Transfer-Encoding header must be a " |
937 |
"comma-separated of encodings." }, |
938 |
{ "badupgrade", "Error: The Upgrade header must be a comma-separated list " |
939 |
"of product identifiers." }, |
940 |
{ "badvary", "Error: The Vary header must be a comma-separated list " |
941 |
"of header names, or \"*\"." }, |
942 |
{ "contentrange", "Warning: The Content-Range header should not be returned " |
943 |
"by the server for this request." }, |
944 |
{ "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer " |
945 |
"version of this tool." }, |
946 |
{ "futurelastmod", "Error: The specified Last-Modified date-time is in " |
947 |
"the future." }, |
948 |
{ "missingcolon", "Error: Headers must be of the form 'Name: value'." }, |
949 |
{ "missingcontenttype", "Warning: No Content-Type header was present. The " |
950 |
"client will have to guess the media type or ask " |
951 |
"the user. Adding a Content-Type header is strongly " |
952 |
"recommended." }, |
953 |
{ "missingcontlang", "Consider adding a Content-Language header if " |
954 |
"applicable for this document." }, |
955 |
{ "missingdate", "Warning: No Date header was present. A Date header must " |
956 |
"be present, unless the server does not have a clock, or " |
957 |
"the response is 100, 101, or 500 - 599." }, |
958 |
{ "missinglastmod", "No Last-Modified header was present. The " |
959 |
"HTTP/1.1 specification states that this header should " |
960 |
"be sent whenever feasible." }, |
961 |
{ "nocharset", "Warning: No character set is specified in the Content-Type. " |
962 |
"Clients may assume the default of ISO-8859-1. Consider " |
963 |
"appending '; charset=...'." }, |
964 |
{ "nonstandard", "Warning: I don't know anything about this header. Is it " |
965 |
"a standard HTTP response header?" }, |
966 |
{ "notcrlf", "Error: This header line does not end in CR LF. HTTP requires " |
967 |
"that all header lines end with CR LF." }, |
968 |
{ "ok", "OK." }, |
969 |
{ "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading " |
970 |
"to HTTP/1.1." }, |
971 |
{ "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. " |
972 |
"Consider using the RFC 1123 format instead." }, |
973 |
{ "ugly", "This URL appears to contain implementation-specific parts such " |
974 |
"as an extension or a query string. This may make the URL liable " |
975 |
"to change when the implementation is changed, resulting in " |
976 |
"broken links. Consider using URL rewriting or equivalent to " |
977 |
"implement a future-proof URL space. See " |
978 |
"http://www.w3.org/Provider/Style/URI for more information." }, |
979 |
{ "unknowncachecont", "Warning: This Cache-Control directive is " |
980 |
"non-standard and will have limited support." }, |
981 |
{ "unknowncontenc", "Warning: This is not a standard Content-Encoding." }, |
982 |
{ "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 " |
983 |
"range." }, |
984 |
{ "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." }, |
985 |
{ "via", "This header was added by a proxy, cache or gateway." }, |
986 |
{ "wrongdate", "Warning: The server date-time differs from this system's " |
987 |
"date-time by more than 10 seconds. Check that both the " |
988 |
"system clocks are correct." } |
989 |
}; |
990 |
|
991 |
|
992 |
/** |
993 |
* Look up and output the string referenced by a key. |
994 |
*/ |
995 |
void lookup(const char *key) |
996 |
{ |
997 |
const char *s, *spc; |
998 |
int x; |
999 |
struct message_entry *message; |
1000 |
|
1001 |
message = bsearch(key, message_table, |
1002 |
sizeof message_table / sizeof message_table[0], |
1003 |
sizeof message_table[0], |
1004 |
(int (*)(const void *, const void *)) strcasecmp); |
1005 |
if (message) |
1006 |
s = message->value; |
1007 |
else |
1008 |
s = key; |
1009 |
|
1010 |
printf(" "); |
1011 |
x = 4; |
1012 |
while (*s) { |
1013 |
spc = strchr(s, ' '); |
1014 |
if (!spc) |
1015 |
spc = s + strlen(s); |
1016 |
if (75 < x + (spc - s)) { |
1017 |
printf("\n "); |
1018 |
x = 4; |
1019 |
} |
1020 |
x += spc - s + 1; |
1021 |
printf("%.*s ", spc - s, s); |
1022 |
if (*spc) |
1023 |
s = spc + 1; |
1024 |
else |
1025 |
s = spc; |
1026 |
} |
1027 |
printf("\n\n"); |
1028 |
} |