1 |
/* |
2 |
* HTTP Header Lint |
3 |
* Licensed under the same license as Curl |
4 |
* http://curl.haxx.se/docs/copyright.html |
5 |
* Copyright 2003 James Bursa <bursa@users.sourceforge.net> |
6 |
*/ |
7 |
|
8 |
/* |
9 |
* Compile using |
10 |
* gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c |
11 |
* |
12 |
* References of the form [6.1.1] are to RFC 2616 (HTTP/1.1). |
13 |
*/ |
14 |
|
15 |
#define _GNU_SOURCE |
16 |
#define __USE_XOPEN |
17 |
|
18 |
#include <limits.h> |
19 |
#include <math.h> |
20 |
#include <stdbool.h> |
21 |
#include <stdio.h> |
22 |
#include <stdlib.h> |
23 |
#include <string.h> |
24 |
#include <time.h> |
25 |
#include <sys/types.h> |
26 |
#include <regex.h> |
27 |
#include <curl/curl.h> |
28 |
|
29 |
|
30 |
#define NUMBER "0123456789" |
31 |
#define UNUSED(x) x = x |
32 |
|
33 |
|
34 |
bool start; |
35 |
CURL *curl; |
36 |
int status_code; |
37 |
char error_buffer[CURL_ERROR_SIZE]; |
38 |
regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly, |
39 |
re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade, |
40 |
re_rfc1123, re_rfc1036, re_asctime; |
41 |
|
42 |
|
43 |
void init(void); |
44 |
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags); |
45 |
void check_url(const char *url); |
46 |
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream); |
47 |
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream); |
48 |
void check_status_line(const char *s); |
49 |
void check_header(const char *name, const char *value); |
50 |
bool parse_date(const char *s, struct tm *tm); |
51 |
int month(const char *s); |
52 |
time_t mktime_from_utc(struct tm *t); |
53 |
const char *skip_lws(const char *s); |
54 |
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
55 |
void (*callback)(const char *s, regmatch_t pmatch[])); |
56 |
void header_accept_ranges(const char *s); |
57 |
void header_age(const char *s); |
58 |
void header_allow(const char *s); |
59 |
void header_cache_control(const char *s); |
60 |
void header_cache_control_callback(const char *s, regmatch_t pmatch[]); |
61 |
void header_connection(const char *s); |
62 |
void header_content_encoding(const char *s); |
63 |
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]); |
64 |
void header_content_language(const char *s); |
65 |
void header_content_length(const char *s); |
66 |
void header_content_location(const char *s); |
67 |
void header_content_md5(const char *s); |
68 |
void header_content_range(const char *s); |
69 |
void header_content_type(const char *s); |
70 |
void header_date(const char *s); |
71 |
void header_etag(const char *s); |
72 |
void header_expires(const char *s); |
73 |
void header_last_modified(const char *s); |
74 |
void header_location(const char *s); |
75 |
void header_pragma(const char *s); |
76 |
void header_retry_after(const char *s); |
77 |
void header_server(const char *s); |
78 |
void header_trailer(const char *s); |
79 |
void header_transfer_encoding(const char *s); |
80 |
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]); |
81 |
void header_upgrade(const char *s); |
82 |
void header_vary(const char *s); |
83 |
void header_via(const char *s); |
84 |
void die(const char *error); |
85 |
void warning(const char *message); |
86 |
void error(const char *message); |
87 |
void print(const char *s, size_t len); |
88 |
void lookup(const char *key); |
89 |
|
90 |
|
91 |
struct header_entry { |
92 |
char name[40]; |
93 |
void (*handler)(const char *s); |
94 |
int count; |
95 |
char *missing; |
96 |
} header_table[] = { |
97 |
{ "Accept-Ranges", header_accept_ranges, 0, 0 }, |
98 |
{ "Age", header_age, 0, 0 }, |
99 |
{ "Allow", header_allow, 0, 0 }, |
100 |
{ "Cache-Control", header_cache_control, 0, 0 }, |
101 |
{ "Connection", header_connection, 0, 0 }, |
102 |
{ "Content-Encoding", header_content_encoding, 0, 0 }, |
103 |
{ "Content-Language", header_content_language, 0, "missingcontlang" }, |
104 |
{ "Content-Length", header_content_length, 0, 0 }, |
105 |
{ "Content-Location", header_content_location, 0, 0 }, |
106 |
{ "Content-MD5", header_content_md5, 0, 0 }, |
107 |
{ "Content-Range", header_content_range, 0, 0 }, |
108 |
{ "Content-Type", header_content_type, 0, "missingcontenttype" }, |
109 |
{ "Date", header_date, 0, "missingdate" }, |
110 |
{ "ETag", header_etag, 0, 0 }, |
111 |
{ "Expires", header_expires, 0, 0 }, |
112 |
{ "Last-Modified", header_last_modified, 0, "missinglastmod" }, |
113 |
{ "Location", header_location, 0, 0 }, |
114 |
{ "Pragma", header_pragma, 0, 0 }, |
115 |
{ "Retry-After", header_retry_after, 0, 0 }, |
116 |
{ "Server", header_server, 0, 0 }, |
117 |
{ "Trailer", header_trailer, 0, 0 }, |
118 |
{ "Transfer-Encoding", header_transfer_encoding, 0, 0 }, |
119 |
{ "Upgrade", header_upgrade, 0, 0 }, |
120 |
{ "Vary", header_vary, 0, 0 }, |
121 |
{ "Via", header_via, 0, 0 } |
122 |
}; |
123 |
|
124 |
|
125 |
/** |
126 |
* Main entry point. |
127 |
*/ |
128 |
int main(int argc, char *argv[]) |
129 |
{ |
130 |
int i; |
131 |
|
132 |
if (argc < 2) |
133 |
die("Usage: httplint url [url ...]"); |
134 |
|
135 |
init(); |
136 |
|
137 |
for (i = 1; i != argc; i++) |
138 |
check_url(argv[i]); |
139 |
|
140 |
curl_global_cleanup(); |
141 |
|
142 |
return 0; |
143 |
} |
144 |
|
145 |
|
146 |
/** |
147 |
* Initialise the curl handle and compile regular expressions. |
148 |
*/ |
149 |
void init(void) |
150 |
{ |
151 |
struct curl_slist *request_headers = 0; |
152 |
|
153 |
if (curl_global_init(CURL_GLOBAL_ALL)) |
154 |
die("Failed to initialise libcurl"); |
155 |
|
156 |
curl = curl_easy_init(); |
157 |
if (!curl) |
158 |
die("Failed to create curl handle"); |
159 |
|
160 |
if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback)) |
161 |
die("Failed to set curl options"); |
162 |
if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback)) |
163 |
die("Failed to set curl options"); |
164 |
if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint")) |
165 |
die("Failed to set curl options"); |
166 |
if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer)) |
167 |
die("Failed to set curl options"); |
168 |
|
169 |
/* remove libcurl default headers */ |
170 |
request_headers = curl_slist_append(request_headers, "Accept:"); |
171 |
request_headers = curl_slist_append(request_headers, "Pragma:"); |
172 |
if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers)) |
173 |
die("Failed to set curl options"); |
174 |
|
175 |
/* compile regular expressions */ |
176 |
regcomp_wrapper(&re_status_line, |
177 |
"^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$", |
178 |
REG_EXTENDED); |
179 |
regcomp_wrapper(&re_token, |
180 |
"^([-0-9a-zA-Z_.!]+)", |
181 |
REG_EXTENDED); |
182 |
regcomp_wrapper(&re_token_value, |
183 |
"^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?", |
184 |
REG_EXTENDED); |
185 |
regcomp_wrapper(&re_content_type, |
186 |
"^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*" |
187 |
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
188 |
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
189 |
REG_EXTENDED); |
190 |
regcomp_wrapper(&re_absolute_uri, |
191 |
"^[a-zA-Z0-9]+://[^ ]+$", |
192 |
REG_EXTENDED); |
193 |
regcomp_wrapper(&re_etag, |
194 |
"^(W/[ \t]*)?\"([^\"]|[\\].)*\"$", |
195 |
REG_EXTENDED); |
196 |
regcomp_wrapper(&re_server, |
197 |
"^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", |
198 |
REG_EXTENDED); |
199 |
regcomp_wrapper(&re_transfer_coding, |
200 |
"^([-0-9a-zA-Z_.]+)[ \t]*" |
201 |
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
202 |
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
203 |
REG_EXTENDED); |
204 |
regcomp_wrapper(&re_upgrade, |
205 |
"^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$", |
206 |
REG_EXTENDED); |
207 |
regcomp_wrapper(&re_ugly, |
208 |
"^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$", |
209 |
REG_EXTENDED); |
210 |
regcomp_wrapper(&re_rfc1123, |
211 |
"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) " |
212 |
"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) " |
213 |
"([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", |
214 |
REG_EXTENDED); |
215 |
regcomp_wrapper(&re_rfc1036, |
216 |
"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), " |
217 |
"([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-" |
218 |
"([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", |
219 |
REG_EXTENDED); |
220 |
regcomp_wrapper(&re_asctime, |
221 |
"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) " |
222 |
"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) " |
223 |
"([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$", |
224 |
REG_EXTENDED); |
225 |
} |
226 |
|
227 |
|
228 |
/** |
229 |
* Compile a regular expression, handling errors. |
230 |
*/ |
231 |
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags) |
232 |
{ |
233 |
char errbuf[200]; |
234 |
int r; |
235 |
r = regcomp(preg, regex, cflags); |
236 |
if (r) { |
237 |
regerror(r, preg, errbuf, sizeof errbuf); |
238 |
fprintf(stderr, "Failed to compile regexp '%s'\n", regex); |
239 |
die(errbuf); |
240 |
} |
241 |
} |
242 |
|
243 |
|
244 |
/** |
245 |
* Fetch and check the headers for the specified url. |
246 |
*/ |
247 |
void check_url(const char *url) |
248 |
{ |
249 |
int i, r; |
250 |
CURLcode code; |
251 |
|
252 |
start = true; |
253 |
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) |
254 |
header_table[i].count = 0; |
255 |
|
256 |
printf("Checking URL %s\n", url); |
257 |
if (strncmp(url, "http", 4)) |
258 |
warning("this is not an http or https url"); |
259 |
|
260 |
if (curl_easy_setopt(curl, CURLOPT_URL, url)) |
261 |
die("Failed to set curl options"); |
262 |
|
263 |
code = curl_easy_perform(curl); |
264 |
if (code != CURLE_OK && code != CURLE_WRITE_ERROR) { |
265 |
error(error_buffer); |
266 |
return; |
267 |
} else { |
268 |
printf("\n"); |
269 |
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) { |
270 |
if (header_table[i].count == 0 && header_table[i].missing) |
271 |
lookup(header_table[i].missing); |
272 |
} |
273 |
} |
274 |
|
275 |
r = regexec(&re_ugly, url, 0, 0, 0); |
276 |
if (r) |
277 |
lookup("ugly"); |
278 |
} |
279 |
|
280 |
|
281 |
/** |
282 |
* Callback for received header data. |
283 |
*/ |
284 |
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream) |
285 |
{ |
286 |
const size_t size = msize * nmemb; |
287 |
char s[400], *name, *value; |
288 |
|
289 |
UNUSED(stream); |
290 |
|
291 |
printf("* "); |
292 |
print(ptr, size); |
293 |
printf("\n"); |
294 |
|
295 |
if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) { |
296 |
lookup("notcrlf"); |
297 |
return size; |
298 |
} |
299 |
if (sizeof s <= size) { |
300 |
warning("header too long: ignored\n"); |
301 |
return size; |
302 |
} |
303 |
strncpy(s, ptr, size); |
304 |
s[size - 2] = 0; |
305 |
|
306 |
name = s; |
307 |
value = strchr(s, ':'); |
308 |
|
309 |
if (s[0] == 0) { |
310 |
/* empty header indicates end of headers */ |
311 |
puts("End of headers."); |
312 |
return 0; |
313 |
|
314 |
} else if (start) { |
315 |
/* Status-Line [6.1] */ |
316 |
check_status_line(s); |
317 |
start = false; |
318 |
|
319 |
} else if (!value) { |
320 |
lookup("missingcolon"); |
321 |
|
322 |
} else { |
323 |
*value = 0; |
324 |
value++; |
325 |
|
326 |
check_header(name, skip_lws(value)); |
327 |
} |
328 |
|
329 |
return size; |
330 |
} |
331 |
|
332 |
|
333 |
/** |
334 |
* Callback for received body data. |
335 |
* |
336 |
* We are not interested in the body, so abort the fetch by returning 0. |
337 |
*/ |
338 |
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream) |
339 |
{ |
340 |
UNUSED(ptr); |
341 |
UNUSED(size); |
342 |
UNUSED(nmemb); |
343 |
UNUSED(stream); |
344 |
|
345 |
return 0; |
346 |
} |
347 |
|
348 |
|
349 |
/** |
350 |
* Check the syntax and content of the response Status-Line [6.1]. |
351 |
*/ |
352 |
void check_status_line(const char *s) |
353 |
{ |
354 |
const char *reason; |
355 |
unsigned int major = 0, minor = 0; |
356 |
int r; |
357 |
regmatch_t pmatch[5]; |
358 |
|
359 |
r = regexec(&re_status_line, s, 5, pmatch, 0); |
360 |
if (r) { |
361 |
lookup("badstatusline"); |
362 |
return; |
363 |
} |
364 |
|
365 |
major = atoi(s + pmatch[1].rm_so); |
366 |
minor = atoi(s + pmatch[2].rm_so); |
367 |
status_code = atoi(s + pmatch[3].rm_so); |
368 |
reason = s + pmatch[4].rm_so; |
369 |
|
370 |
if (major < 1 || (major == 1 && minor == 0)) { |
371 |
lookup("oldhttp"); |
372 |
} else if ((major == 1 && 1 < minor) || 1 < major) { |
373 |
lookup("futurehttp"); |
374 |
} else { |
375 |
if (status_code < 100 || 600 <= status_code) { |
376 |
lookup("badstatus"); |
377 |
} else { |
378 |
char key[] = "xxx"; |
379 |
key[0] = '0' + status_code / 100; |
380 |
lookup(key); |
381 |
} |
382 |
} |
383 |
} |
384 |
|
385 |
|
386 |
/** |
387 |
* Check the syntax and content of a header. |
388 |
*/ |
389 |
void check_header(const char *name, const char *value) |
390 |
{ |
391 |
struct header_entry *header; |
392 |
|
393 |
header = bsearch(name, header_table, |
394 |
sizeof header_table / sizeof header_table[0], |
395 |
sizeof header_table[0], |
396 |
(int (*)(const void *, const void *)) strcasecmp); |
397 |
|
398 |
if (header) { |
399 |
header->count++; |
400 |
header->handler(value); |
401 |
} else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') { |
402 |
lookup("xheader"); |
403 |
} else { |
404 |
lookup("nonstandard"); |
405 |
} |
406 |
} |
407 |
|
408 |
|
409 |
/** |
410 |
* Attempt to parse an HTTP Full Date (3.3.1), returning true on success. |
411 |
*/ |
412 |
bool parse_date(const char *s, struct tm *tm) |
413 |
{ |
414 |
int r; |
415 |
int len = strlen(s); |
416 |
regmatch_t pmatch[20]; |
417 |
|
418 |
if (len == 29) { |
419 |
/* RFC 1123 */ |
420 |
r = regexec(&re_rfc1123, s, 20, pmatch, 0); |
421 |
if (r == 0) { |
422 |
tm->tm_mday = atoi(s + pmatch[2].rm_so); |
423 |
tm->tm_mon = month(s + pmatch[3].rm_so); |
424 |
tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900; |
425 |
tm->tm_hour = atoi(s + pmatch[5].rm_so); |
426 |
tm->tm_min = atoi(s + pmatch[6].rm_so); |
427 |
tm->tm_sec = atoi(s + pmatch[7].rm_so); |
428 |
return true; |
429 |
} |
430 |
|
431 |
} else if (len == 24) { |
432 |
/* asctime() format */ |
433 |
r = regexec(&re_asctime, s, 20, pmatch, 0); |
434 |
if (r == 0) { |
435 |
if (s[pmatch[3].rm_so] == ' ') |
436 |
tm->tm_mday = atoi(s + pmatch[3].rm_so + 1); |
437 |
else |
438 |
tm->tm_mday = atoi(s + pmatch[3].rm_so); |
439 |
tm->tm_mon = month(s + pmatch[2].rm_so); |
440 |
tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900; |
441 |
tm->tm_hour = atoi(s + pmatch[4].rm_so); |
442 |
tm->tm_min = atoi(s + pmatch[5].rm_so); |
443 |
tm->tm_sec = atoi(s + pmatch[6].rm_so); |
444 |
lookup("asctime"); |
445 |
return true; |
446 |
} |
447 |
|
448 |
} else { |
449 |
/* RFC 1036 */ |
450 |
r = regexec(&re_rfc1036, s, 20, pmatch, 0); |
451 |
if (r == 0) { |
452 |
tm->tm_mday = atoi(s + pmatch[2].rm_so); |
453 |
tm->tm_mon = month(s + pmatch[3].rm_so); |
454 |
tm->tm_year = 100 + atoi(s + pmatch[4].rm_so); |
455 |
tm->tm_hour = atoi(s + pmatch[5].rm_so); |
456 |
tm->tm_min = atoi(s + pmatch[6].rm_so); |
457 |
tm->tm_sec = atoi(s + pmatch[7].rm_so); |
458 |
lookup("rfc1036"); |
459 |
return true; |
460 |
} |
461 |
|
462 |
} |
463 |
|
464 |
lookup("baddate"); |
465 |
return false; |
466 |
} |
467 |
|
468 |
|
469 |
/** |
470 |
* Convert a month name to the month number. |
471 |
*/ |
472 |
int month(const char *s) |
473 |
{ |
474 |
switch (s[0]) { |
475 |
case 'J': |
476 |
switch (s[1]) { |
477 |
case 'a': |
478 |
return 0; |
479 |
case 'u': |
480 |
return s[2] == 'n' ? 5 : 6; |
481 |
} |
482 |
case 'F': |
483 |
return 1; |
484 |
case 'M': |
485 |
return s[2] == 'r' ? 2 : 4; |
486 |
case 'A': |
487 |
return s[1] == 'p' ? 3 : 7; |
488 |
case 'S': |
489 |
return 8; |
490 |
case 'O': |
491 |
return 9; |
492 |
case 'N': |
493 |
return 10; |
494 |
case 'D': |
495 |
return 11; |
496 |
} |
497 |
return 0; |
498 |
} |
499 |
|
500 |
|
501 |
/** |
502 |
* UTC version of mktime, from |
503 |
* http://lists.debian.org/deity/2002/deity-200204/msg00082.html |
504 |
*/ |
505 |
time_t mktime_from_utc(struct tm *t) |
506 |
{ |
507 |
time_t tl, tb; |
508 |
struct tm *tg; |
509 |
|
510 |
tl = mktime (t); |
511 |
if (tl == -1) |
512 |
{ |
513 |
t->tm_hour--; |
514 |
tl = mktime (t); |
515 |
if (tl == -1) |
516 |
return -1; /* can't deal with output from strptime */ |
517 |
tl += 3600; |
518 |
} |
519 |
tg = gmtime (&tl); |
520 |
tg->tm_isdst = 0; |
521 |
tb = mktime (tg); |
522 |
if (tb == -1) |
523 |
{ |
524 |
tg->tm_hour--; |
525 |
tb = mktime (tg); |
526 |
if (tb == -1) |
527 |
return -1; /* can't deal with output from gmtime */ |
528 |
tb += 3600; |
529 |
} |
530 |
return (tl - (tb - tl)); |
531 |
} |
532 |
|
533 |
|
534 |
/** |
535 |
* Skip optional LWS (linear white space) [2.2] |
536 |
*/ |
537 |
const char *skip_lws(const char *s) |
538 |
{ |
539 |
if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t')) |
540 |
s += 2; |
541 |
while (*s == ' ' || *s == '\t') |
542 |
s++; |
543 |
return s; |
544 |
} |
545 |
|
546 |
|
547 |
/** |
548 |
* Parse a list of elements (#rule in [2.1]). |
549 |
*/ |
550 |
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
551 |
void (*callback)(const char *s, regmatch_t pmatch[])) |
552 |
{ |
553 |
int r; |
554 |
unsigned int items = 0; |
555 |
regmatch_t pmatch[20]; |
556 |
|
557 |
do { |
558 |
r = regexec(preg, s, 20, pmatch, 0); |
559 |
if (r) { |
560 |
printf(" Failed to match list item %i\n", items + 1); |
561 |
return false; |
562 |
} |
563 |
|
564 |
if (callback) |
565 |
callback(s, pmatch); |
566 |
items++; |
567 |
|
568 |
s += pmatch[0].rm_eo; |
569 |
s = skip_lws(s); |
570 |
if (*s == 0) |
571 |
break; |
572 |
if (*s != ',') { |
573 |
printf(" Expecting , after list item %i\n", items); |
574 |
return false; |
575 |
} |
576 |
while (*s == ',') |
577 |
s = skip_lws(s + 1); |
578 |
} while (*s != 0); |
579 |
|
580 |
if (items < n || m < items) { |
581 |
printf(" %i items in list, but there should be ", items); |
582 |
if (m == UINT_MAX) |
583 |
printf("at least %i\n", n); |
584 |
else |
585 |
printf("between %i and %i\n", n, m); |
586 |
return false; |
587 |
} |
588 |
|
589 |
return true; |
590 |
} |
591 |
|
592 |
|
593 |
/* Header-specific validation. */ |
594 |
void header_accept_ranges(const char *s) |
595 |
{ |
596 |
if (strcmp(s, "bytes") == 0) |
597 |
lookup("ok"); |
598 |
else if (strcmp(s, "none") == 0) |
599 |
lookup("ok"); |
600 |
else |
601 |
lookup("unknownrange"); |
602 |
} |
603 |
|
604 |
void header_age(const char *s) |
605 |
{ |
606 |
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
607 |
lookup("badage"); |
608 |
else |
609 |
lookup("ok"); |
610 |
} |
611 |
|
612 |
void header_allow(const char *s) |
613 |
{ |
614 |
if (parse_list(s, &re_token, 0, UINT_MAX, 0)) |
615 |
lookup("ok"); |
616 |
else |
617 |
lookup("badallow"); |
618 |
} |
619 |
|
620 |
void header_cache_control(const char *s) |
621 |
{ |
622 |
if (parse_list(s, &re_token_value, 1, UINT_MAX, |
623 |
header_cache_control_callback)) |
624 |
lookup("ok"); |
625 |
else |
626 |
lookup("badcachecont"); |
627 |
} |
628 |
|
629 |
char cache_control_list[][20] = { |
630 |
"max-age", "max-stale", "min-fresh", "must-revalidate", |
631 |
"no-cache", "no-store", "no-transform", "only-if-cached", |
632 |
"private", "proxy-revalidate", "public", "s-maxage" |
633 |
}; |
634 |
|
635 |
void header_cache_control_callback(const char *s, regmatch_t pmatch[]) |
636 |
{ |
637 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
638 |
char name[20]; |
639 |
char *dir; |
640 |
|
641 |
if (19 < len) { |
642 |
lookup("unknowncachecont"); |
643 |
return; |
644 |
} |
645 |
|
646 |
strncpy(name, s + pmatch[1].rm_so, len); |
647 |
name[len] = 0; |
648 |
|
649 |
dir = bsearch(name, cache_control_list, |
650 |
sizeof cache_control_list / sizeof cache_control_list[0], |
651 |
sizeof cache_control_list[0], |
652 |
(int (*)(const void *, const void *)) strcasecmp); |
653 |
|
654 |
if (!dir) { |
655 |
printf(" Cache-Control directive '%s':\n", name); |
656 |
lookup("unknowncachecont"); |
657 |
} |
658 |
} |
659 |
|
660 |
void header_connection(const char *s) |
661 |
{ |
662 |
if (strcmp(s, "close") == 0) |
663 |
lookup("ok"); |
664 |
else |
665 |
lookup("badconnection"); |
666 |
} |
667 |
|
668 |
void header_content_encoding(const char *s) |
669 |
{ |
670 |
if (parse_list(s, &re_token, 1, UINT_MAX, |
671 |
header_content_encoding_callback)) |
672 |
lookup("ok"); |
673 |
else |
674 |
lookup("badcontenc"); |
675 |
} |
676 |
|
677 |
char content_coding_list[][20] = { |
678 |
"compress", "deflate", "gzip", "identity" |
679 |
}; |
680 |
|
681 |
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]) |
682 |
{ |
683 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
684 |
char name[20]; |
685 |
char *dir; |
686 |
|
687 |
if (19 < len) { |
688 |
lookup("unknowncontenc"); |
689 |
return; |
690 |
} |
691 |
|
692 |
strncpy(name, s + pmatch[1].rm_so, len); |
693 |
name[len] = 0; |
694 |
|
695 |
dir = bsearch(name, content_coding_list, |
696 |
sizeof content_coding_list / sizeof content_coding_list[0], |
697 |
sizeof content_coding_list[0], |
698 |
(int (*)(const void *, const void *)) strcasecmp); |
699 |
if (!dir) { |
700 |
printf(" Content-Encoding '%s':\n", name); |
701 |
lookup("unknowncontenc"); |
702 |
} |
703 |
} |
704 |
|
705 |
void header_content_language(const char *s) |
706 |
{ |
707 |
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
708 |
lookup("ok"); |
709 |
else |
710 |
lookup("badcontlang"); |
711 |
} |
712 |
|
713 |
void header_content_length(const char *s) |
714 |
{ |
715 |
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
716 |
lookup("badcontlen"); |
717 |
else |
718 |
lookup("ok"); |
719 |
} |
720 |
|
721 |
void header_content_location(const char *s) |
722 |
{ |
723 |
if (strchr(s, ' ')) |
724 |
lookup("badcontloc"); |
725 |
else |
726 |
lookup("ok"); |
727 |
} |
728 |
|
729 |
void header_content_md5(const char *s) |
730 |
{ |
731 |
if (strlen(s) != 24) |
732 |
lookup("badcontmd5"); |
733 |
else |
734 |
lookup("ok"); |
735 |
} |
736 |
|
737 |
void header_content_range(const char *s) |
738 |
{ |
739 |
UNUSED(s); |
740 |
lookup("contentrange"); |
741 |
} |
742 |
|
743 |
void header_content_type(const char *s) |
744 |
{ |
745 |
bool charset = false; |
746 |
char *type, *subtype; |
747 |
unsigned int i; |
748 |
int r; |
749 |
regmatch_t pmatch[30]; |
750 |
|
751 |
r = regexec(&re_content_type, s, 30, pmatch, 0); |
752 |
if (r) { |
753 |
lookup("badcontenttype"); |
754 |
return; |
755 |
} |
756 |
|
757 |
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so); |
758 |
subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so); |
759 |
|
760 |
/* parameters */ |
761 |
for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) { |
762 |
char *attrib, *value; |
763 |
|
764 |
attrib = strndup(s + pmatch[i + 1].rm_so, |
765 |
pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so); |
766 |
value = strndup(s + pmatch[i + 2].rm_so, |
767 |
pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so); |
768 |
|
769 |
if (strcasecmp(attrib, "charset") == 0) |
770 |
charset = true; |
771 |
} |
772 |
|
773 |
if (strcasecmp(type, "text") == 0 && !charset) |
774 |
lookup("nocharset"); |
775 |
else |
776 |
lookup("ok"); |
777 |
} |
778 |
|
779 |
void header_date(const char *s) |
780 |
{ |
781 |
double diff; |
782 |
time_t time0, time1; |
783 |
struct tm tm; |
784 |
|
785 |
time0 = time(0); |
786 |
if (!parse_date(s, &tm)) |
787 |
return; |
788 |
time1 = mktime_from_utc(&tm); |
789 |
|
790 |
diff = difftime(time0, time1); |
791 |
if (10 < fabs(diff)) |
792 |
lookup("wrongdate"); |
793 |
else |
794 |
lookup("ok"); |
795 |
} |
796 |
|
797 |
void header_etag(const char *s) |
798 |
{ |
799 |
int r; |
800 |
r = regexec(&re_etag, s, 0, 0, 0); |
801 |
if (r) |
802 |
lookup("badetag"); |
803 |
else |
804 |
lookup("ok"); |
805 |
} |
806 |
|
807 |
void header_expires(const char *s) |
808 |
{ |
809 |
struct tm tm; |
810 |
if (parse_date(s, &tm)) |
811 |
lookup("ok"); |
812 |
} |
813 |
|
814 |
void header_last_modified(const char *s) |
815 |
{ |
816 |
double diff; |
817 |
time_t time0, time1; |
818 |
struct tm tm; |
819 |
|
820 |
time0 = time(0); |
821 |
if (!parse_date(s, &tm)) |
822 |
return; |
823 |
time1 = mktime_from_utc(&tm); |
824 |
|
825 |
diff = difftime(time1, time0); |
826 |
if (10 < diff) |
827 |
lookup("futurelastmod"); |
828 |
else |
829 |
lookup("ok"); |
830 |
} |
831 |
|
832 |
void header_location(const char *s) |
833 |
{ |
834 |
int r; |
835 |
r = regexec(&re_absolute_uri, s, 0, 0, 0); |
836 |
if (r) |
837 |
lookup("badlocation"); |
838 |
else |
839 |
lookup("ok"); |
840 |
} |
841 |
|
842 |
void header_pragma(const char *s) |
843 |
{ |
844 |
if (parse_list(s, &re_token_value, 1, UINT_MAX, 0)) |
845 |
lookup("ok"); |
846 |
else |
847 |
lookup("badpragma"); |
848 |
} |
849 |
|
850 |
void header_retry_after(const char *s) |
851 |
{ |
852 |
struct tm tm; |
853 |
|
854 |
if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) { |
855 |
lookup("ok"); |
856 |
return; |
857 |
} |
858 |
|
859 |
if (!parse_date(s, &tm)) |
860 |
return; |
861 |
|
862 |
lookup("ok"); |
863 |
} |
864 |
|
865 |
void header_server(const char *s) |
866 |
{ |
867 |
int r; |
868 |
r = regexec(&re_server, s, 0, 0, 0); |
869 |
if (r) |
870 |
lookup("badserver"); |
871 |
else |
872 |
lookup("ok"); |
873 |
} |
874 |
|
875 |
void header_trailer(const char *s) |
876 |
{ |
877 |
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
878 |
lookup("ok"); |
879 |
else |
880 |
lookup("badtrailer"); |
881 |
} |
882 |
|
883 |
void header_transfer_encoding(const char *s) |
884 |
{ |
885 |
if (parse_list(s, &re_transfer_coding, 1, UINT_MAX, |
886 |
header_transfer_encoding_callback)) |
887 |
lookup("ok"); |
888 |
else |
889 |
lookup("badtransenc"); |
890 |
} |
891 |
|
892 |
char transfer_coding_list[][20] = { |
893 |
"chunked", "compress", "deflate", "gzip", "identity" |
894 |
}; |
895 |
|
896 |
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]) |
897 |
{ |
898 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
899 |
char name[20]; |
900 |
char *dir; |
901 |
|
902 |
if (19 < len) { |
903 |
lookup("unknowntransenc"); |
904 |
return; |
905 |
} |
906 |
|
907 |
strncpy(name, s + pmatch[1].rm_so, len); |
908 |
name[len] = 0; |
909 |
|
910 |
dir = bsearch(name, transfer_coding_list, |
911 |
sizeof transfer_coding_list / sizeof transfer_coding_list[0], |
912 |
sizeof transfer_coding_list[0], |
913 |
(int (*)(const void *, const void *)) strcasecmp); |
914 |
if (!dir) { |
915 |
printf(" Transfer-Encoding '%s':\n", name); |
916 |
lookup("unknowntransenc"); |
917 |
} |
918 |
} |
919 |
|
920 |
void header_upgrade(const char *s) |
921 |
{ |
922 |
int r; |
923 |
r = regexec(&re_upgrade, s, 0, 0, 0); |
924 |
if (r) |
925 |
lookup("badupgrade"); |
926 |
else |
927 |
lookup("ok"); |
928 |
} |
929 |
|
930 |
void header_vary(const char *s) |
931 |
{ |
932 |
if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0)) |
933 |
lookup("ok"); |
934 |
else |
935 |
lookup("badvary"); |
936 |
} |
937 |
|
938 |
void header_via(const char *s) |
939 |
{ |
940 |
UNUSED(s); |
941 |
lookup("via"); |
942 |
} |
943 |
|
944 |
|
945 |
/** |
946 |
* Print an error message and exit. |
947 |
*/ |
948 |
void die(const char *error) |
949 |
{ |
950 |
fprintf(stderr, "httplint: %s\n", error); |
951 |
exit(EXIT_FAILURE); |
952 |
} |
953 |
|
954 |
|
955 |
/** |
956 |
* Print a warning message. |
957 |
*/ |
958 |
void warning(const char *message) |
959 |
{ |
960 |
printf("Warning: %s\n", message); |
961 |
} |
962 |
|
963 |
|
964 |
/** |
965 |
* Print an error message. |
966 |
*/ |
967 |
void error(const char *message) |
968 |
{ |
969 |
printf("Error: %s\n", message); |
970 |
} |
971 |
|
972 |
|
973 |
/** |
974 |
* Print a string which contains control characters. |
975 |
*/ |
976 |
void print(const char *s, size_t len) |
977 |
{ |
978 |
size_t i; |
979 |
for (i = 0; i != len; i++) { |
980 |
if (31 < s[i] && s[i] < 127) |
981 |
putchar(s[i]); |
982 |
else |
983 |
printf("[%.2x]", s[i]); |
984 |
} |
985 |
} |
986 |
|
987 |
|
988 |
struct message_entry { |
989 |
const char key[20]; |
990 |
const char *value; |
991 |
} message_table[] = { |
992 |
{ "1xx", "A response status code in the range 100 - 199 indicates a " |
993 |
"'provisional response'." }, |
994 |
{ "2xx", "A response status code in the range 200 - 299 indicates that " |
995 |
"the request was successful." }, |
996 |
{ "3xx", "A response status code in the range 300 - 399 indicates that " |
997 |
"the client should redirect to a new URL." }, |
998 |
{ "4xx", "A response status code in the range 400 - 499 indicates that " |
999 |
"the request could not be fulfilled due to client error." }, |
1000 |
{ "5xx", "A response status code in the range 500 - 599 indicates that " |
1001 |
"an error occurred on the server." }, |
1002 |
{ "asctime", "Warning: This date is in the obsolete asctime() format. " |
1003 |
"Consider using the RFC 1123 format instead." }, |
1004 |
{ "badage", "Error: The Age header must be one number." }, |
1005 |
{ "badallow", "Error: The Allow header must be a comma-separated list of " |
1006 |
"HTTP methods." }, |
1007 |
{ "badcachecont", "Error: The Cache-Control header must be a " |
1008 |
"comma-separated list of directives." }, |
1009 |
{ "badconnection", "Warning: The only value of the Connection header " |
1010 |
"defined by HTTP/1.1 is \"close\"." }, |
1011 |
{ "badcontenc", "Error: The Content-Encoding header must be a " |
1012 |
"comma-separated list of encodings." }, |
1013 |
{ "badcontenttype", "Error: The Content-Type header must be of the form " |
1014 |
"'type/subtype (; optional parameters)'." }, |
1015 |
{ "badcontlang", "Error: The Content-Language header must be a " |
1016 |
"comma-separated list of language tags." }, |
1017 |
{ "badcontlen", "Error: The Content-Length header must be a number." }, |
1018 |
{ "badcontloc", "Error: The Content-Location header must be an absolute " |
1019 |
"or relative URI." }, |
1020 |
{ "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded " |
1021 |
"MD5 sum." }, |
1022 |
{ "baddate", "Error: Failed to parse this date. Dates should be in the RFC " |
1023 |
"1123 format." }, |
1024 |
{ "badetag", "Error: The ETag header must be a quoted string (optionally " |
1025 |
"preceded by \"W/\" for a weak tag)." }, |
1026 |
{ "badlocation", "Error: The Location header must be an absolute URI. " |
1027 |
"Relative URIs are not permitted." }, |
1028 |
{ "badpragma", "Error: The Pragma header must be a comma-separated list of " |
1029 |
"directives." }, |
1030 |
{ "badserver", "Error: The Server header must be a space-separated list of " |
1031 |
"products of the form Name/optional-version and comments " |
1032 |
"in ()." }, |
1033 |
{ "badstatus", "Warning: The response status code is outside the standard " |
1034 |
"range 100 - 599." }, |
1035 |
{ "badstatusline", "Error: Failed to parse the response Status-Line. The " |
1036 |
"status line must be of the form 'HTTP/n.n <3-digit " |
1037 |
"status> <reason phrase>'." }, |
1038 |
{ "badtrailer", "Error: The Trailer header must be a comma-separated list " |
1039 |
"of header names." }, |
1040 |
{ "badtransenc", "Error: The Transfer-Encoding header must be a " |
1041 |
"comma-separated of encodings." }, |
1042 |
{ "badupgrade", "Error: The Upgrade header must be a comma-separated list " |
1043 |
"of product identifiers." }, |
1044 |
{ "badvary", "Error: The Vary header must be a comma-separated list " |
1045 |
"of header names, or \"*\"." }, |
1046 |
{ "contentrange", "Warning: The Content-Range header should not be returned " |
1047 |
"by the server for this request." }, |
1048 |
{ "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer " |
1049 |
"version of this tool." }, |
1050 |
{ "futurelastmod", "Error: The specified Last-Modified date-time is in " |
1051 |
"the future." }, |
1052 |
{ "missingcolon", "Error: Headers must be of the form 'Name: value'." }, |
1053 |
{ "missingcontenttype", "Warning: No Content-Type header was present. The " |
1054 |
"client will have to guess the media type or ask " |
1055 |
"the user. Adding a Content-Type header is strongly " |
1056 |
"recommended." }, |
1057 |
{ "missingcontlang", "Consider adding a Content-Language header if " |
1058 |
"applicable for this document." }, |
1059 |
{ "missingdate", "Warning: No Date header was present. A Date header must " |
1060 |
"be present, unless the server does not have a clock, or " |
1061 |
"the response is 100, 101, or 500 - 599." }, |
1062 |
{ "missinglastmod", "No Last-Modified header was present. The " |
1063 |
"HTTP/1.1 specification states that this header should " |
1064 |
"be sent whenever feasible." }, |
1065 |
{ "nocharset", "Warning: No character set is specified in the Content-Type. " |
1066 |
"Clients may assume the default of ISO-8859-1. Consider " |
1067 |
"appending '; charset=...'." }, |
1068 |
{ "nonstandard", "Warning: I don't know anything about this header. Is it " |
1069 |
"a standard HTTP response header?" }, |
1070 |
{ "notcrlf", "Error: This header line does not end in CR LF. HTTP requires " |
1071 |
"that all header lines end with CR LF." }, |
1072 |
{ "ok", "OK." }, |
1073 |
{ "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading " |
1074 |
"to HTTP/1.1." }, |
1075 |
{ "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. " |
1076 |
"Consider using the RFC 1123 format instead." }, |
1077 |
{ "ugly", "This URL appears to contain implementation-specific parts such " |
1078 |
"as an extension or a query string. This may make the URL liable " |
1079 |
"to change when the implementation is changed, resulting in " |
1080 |
"broken links. Consider using URL rewriting or equivalent to " |
1081 |
"implement a future-proof URL space. See " |
1082 |
"http://www.w3.org/Provider/Style/URI for more information." }, |
1083 |
{ "unknowncachecont", "Warning: This Cache-Control directive is " |
1084 |
"non-standard and will have limited support." }, |
1085 |
{ "unknowncontenc", "Warning: This is not a standard Content-Encoding." }, |
1086 |
{ "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 " |
1087 |
"range." }, |
1088 |
{ "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." }, |
1089 |
{ "via", "This header was added by a proxy, cache or gateway." }, |
1090 |
{ "wrongdate", "Warning: The server date-time differs from this system's " |
1091 |
"date-time by more than 10 seconds. Check that both the " |
1092 |
"system clocks are correct." }, |
1093 |
{ "xheader", "This is an extension header. I don't know how to check it." } |
1094 |
}; |
1095 |
|
1096 |
|
1097 |
/** |
1098 |
* Look up and output the string referenced by a key. |
1099 |
*/ |
1100 |
void lookup(const char *key) |
1101 |
{ |
1102 |
const char *s, *spc; |
1103 |
int x; |
1104 |
struct message_entry *message; |
1105 |
|
1106 |
message = bsearch(key, message_table, |
1107 |
sizeof message_table / sizeof message_table[0], |
1108 |
sizeof message_table[0], |
1109 |
(int (*)(const void *, const void *)) strcasecmp); |
1110 |
if (message) |
1111 |
s = message->value; |
1112 |
else |
1113 |
s = key; |
1114 |
|
1115 |
printf(" "); |
1116 |
x = 4; |
1117 |
while (*s) { |
1118 |
spc = strchr(s, ' '); |
1119 |
if (!spc) |
1120 |
spc = s + strlen(s); |
1121 |
if (75 < x + (spc - s)) { |
1122 |
printf("\n "); |
1123 |
x = 4; |
1124 |
} |
1125 |
x += spc - s + 1; |
1126 |
printf("%.*s ", spc - s, s); |
1127 |
if (*spc) |
1128 |
s = spc + 1; |
1129 |
else |
1130 |
s = spc; |
1131 |
} |
1132 |
printf("\n\n"); |
1133 |
} |