1 |
/* |
2 |
* HTTP Header Lint |
3 |
* Licensed under the same license as Curl |
4 |
* http://curl.haxx.se/docs/copyright.html |
5 |
* Copyright 2003 James Bursa <bursa@users.sourceforge.net> |
6 |
*/ |
7 |
|
8 |
/* |
9 |
* Compile using |
10 |
* gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c |
11 |
* |
12 |
* References of the form [6.1.1] are to RFC 2616 (HTTP/1.1). |
13 |
*/ |
14 |
|
15 |
#define _GNU_SOURCE |
16 |
#define __USE_XOPEN |
17 |
|
18 |
#include <limits.h> |
19 |
#include <math.h> |
20 |
#include <stdbool.h> |
21 |
#include <stdio.h> |
22 |
#include <stdlib.h> |
23 |
#include <string.h> |
24 |
#include <time.h> |
25 |
#include <sys/types.h> |
26 |
#include <regex.h> |
27 |
#include <curl/curl.h> |
28 |
|
29 |
|
30 |
#define NUMBER "0123456789" |
31 |
#define UNUSED(x) x = x |
32 |
|
33 |
|
34 |
bool start; |
35 |
CURL *curl; |
36 |
int status_code; |
37 |
char error_buffer[CURL_ERROR_SIZE]; |
38 |
regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly, |
39 |
re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade, |
40 |
re_rfc1123, re_rfc1036, re_asctime; |
41 |
|
42 |
|
43 |
void init(void); |
44 |
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags); |
45 |
void check_url(const char *url); |
46 |
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream); |
47 |
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream); |
48 |
void check_status_line(const char *s); |
49 |
void check_header(const char *name, const char *value); |
50 |
bool parse_date(const char *s, struct tm *tm); |
51 |
int month(const char *s); |
52 |
const char *skip_lws(const char *s); |
53 |
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
54 |
void (*callback)(const char *s, regmatch_t pmatch[])); |
55 |
void header_accept_ranges(const char *s); |
56 |
void header_age(const char *s); |
57 |
void header_allow(const char *s); |
58 |
void header_cache_control(const char *s); |
59 |
void header_cache_control_callback(const char *s, regmatch_t pmatch[]); |
60 |
void header_connection(const char *s); |
61 |
void header_content_encoding(const char *s); |
62 |
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]); |
63 |
void header_content_language(const char *s); |
64 |
void header_content_length(const char *s); |
65 |
void header_content_location(const char *s); |
66 |
void header_content_md5(const char *s); |
67 |
void header_content_range(const char *s); |
68 |
void header_content_type(const char *s); |
69 |
void header_date(const char *s); |
70 |
void header_etag(const char *s); |
71 |
void header_expires(const char *s); |
72 |
void header_last_modified(const char *s); |
73 |
void header_location(const char *s); |
74 |
void header_pragma(const char *s); |
75 |
void header_retry_after(const char *s); |
76 |
void header_server(const char *s); |
77 |
void header_trailer(const char *s); |
78 |
void header_transfer_encoding(const char *s); |
79 |
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]); |
80 |
void header_upgrade(const char *s); |
81 |
void header_vary(const char *s); |
82 |
void header_via(const char *s); |
83 |
void die(const char *error); |
84 |
void warning(const char *message); |
85 |
void error(const char *message); |
86 |
void print(const char *s, size_t len); |
87 |
void lookup(const char *key); |
88 |
|
89 |
|
90 |
struct header_entry { |
91 |
char name[40]; |
92 |
void (*handler)(const char *s); |
93 |
int count; |
94 |
char *missing; |
95 |
} header_table[] = { |
96 |
{ "Accept-Ranges", header_accept_ranges, 0, 0 }, |
97 |
{ "Age", header_age, 0, 0 }, |
98 |
{ "Allow", header_allow, 0, 0 }, |
99 |
{ "Cache-Control", header_cache_control, 0, 0 }, |
100 |
{ "Connection", header_connection, 0, 0 }, |
101 |
{ "Content-Encoding", header_content_encoding, 0, 0 }, |
102 |
{ "Content-Language", header_content_language, 0, "missingcontlang" }, |
103 |
{ "Content-Length", header_content_length, 0, 0 }, |
104 |
{ "Content-Location", header_content_location, 0, 0 }, |
105 |
{ "Content-MD5", header_content_md5, 0, 0 }, |
106 |
{ "Content-Range", header_content_range, 0, 0 }, |
107 |
{ "Content-Type", header_content_type, 0, "missingcontenttype" }, |
108 |
{ "Date", header_date, 0, "missingdate" }, |
109 |
{ "ETag", header_etag, 0, 0 }, |
110 |
{ "Expires", header_expires, 0, 0 }, |
111 |
{ "Last-Modified", header_last_modified, 0, "missinglastmod" }, |
112 |
{ "Location", header_location, 0, 0 }, |
113 |
{ "Pragma", header_pragma, 0, 0 }, |
114 |
{ "Retry-After", header_retry_after, 0, 0 }, |
115 |
{ "Server", header_server, 0, 0 }, |
116 |
{ "Trailer", header_trailer, 0, 0 }, |
117 |
{ "Transfer-Encoding", header_transfer_encoding, 0, 0 }, |
118 |
{ "Upgrade", header_upgrade, 0, 0 }, |
119 |
{ "Vary", header_vary, 0, 0 }, |
120 |
{ "Via", header_via, 0, 0 } |
121 |
}; |
122 |
|
123 |
|
124 |
/** |
125 |
* Main entry point. |
126 |
*/ |
127 |
int main(int argc, char *argv[]) |
128 |
{ |
129 |
int i; |
130 |
|
131 |
if (argc < 2) |
132 |
die("Usage: httplint url [url ...]"); |
133 |
|
134 |
init(); |
135 |
|
136 |
for (i = 1; i != argc; i++) |
137 |
check_url(argv[i]); |
138 |
|
139 |
curl_global_cleanup(); |
140 |
|
141 |
return 0; |
142 |
} |
143 |
|
144 |
|
145 |
/** |
146 |
* Initialise the curl handle and compile regular expressions. |
147 |
*/ |
148 |
void init(void) |
149 |
{ |
150 |
struct curl_slist *request_headers = 0; |
151 |
|
152 |
if (curl_global_init(CURL_GLOBAL_ALL)) |
153 |
die("Failed to initialise libcurl"); |
154 |
|
155 |
curl = curl_easy_init(); |
156 |
if (!curl) |
157 |
die("Failed to create curl handle"); |
158 |
|
159 |
if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback)) |
160 |
die("Failed to set curl options"); |
161 |
if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback)) |
162 |
die("Failed to set curl options"); |
163 |
if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint")) |
164 |
die("Failed to set curl options"); |
165 |
if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer)) |
166 |
die("Failed to set curl options"); |
167 |
|
168 |
/* remove libcurl default headers */ |
169 |
request_headers = curl_slist_append(request_headers, "Accept:"); |
170 |
request_headers = curl_slist_append(request_headers, "Pragma:"); |
171 |
if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers)) |
172 |
die("Failed to set curl options"); |
173 |
|
174 |
/* compile regular expressions */ |
175 |
regcomp_wrapper(&re_status_line, |
176 |
"^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$", |
177 |
REG_EXTENDED); |
178 |
regcomp_wrapper(&re_token, |
179 |
"^([-0-9a-zA-Z_.]+)", |
180 |
REG_EXTENDED); |
181 |
regcomp_wrapper(&re_token_value, |
182 |
"^([-0-9a-zA-Z_.]+)(=([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\"))?", |
183 |
REG_EXTENDED); |
184 |
regcomp_wrapper(&re_content_type, |
185 |
"^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*" |
186 |
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
187 |
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
188 |
REG_EXTENDED); |
189 |
regcomp_wrapper(&re_absolute_uri, |
190 |
"^[a-zA-Z0-9]+://[^ ]+$", |
191 |
REG_EXTENDED); |
192 |
regcomp_wrapper(&re_etag, |
193 |
"^(W/[ \t]*)?\"([^\"]|[\\].)*\"$", |
194 |
REG_EXTENDED); |
195 |
regcomp_wrapper(&re_server, |
196 |
"^((([-0-9a-zA-Z_.]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", |
197 |
REG_EXTENDED); |
198 |
regcomp_wrapper(&re_transfer_coding, |
199 |
"^([-0-9a-zA-Z_.]+)[ \t]*" |
200 |
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
201 |
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
202 |
REG_EXTENDED); |
203 |
regcomp_wrapper(&re_upgrade, |
204 |
"^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$", |
205 |
REG_EXTENDED); |
206 |
regcomp_wrapper(&re_ugly, |
207 |
"^[a-zA-Z0-9]+://[^/]+[/a-zA-Z0-9-_]*$", |
208 |
REG_EXTENDED); |
209 |
regcomp_wrapper(&re_rfc1123, |
210 |
"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) " |
211 |
"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) " |
212 |
"([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", |
213 |
REG_EXTENDED); |
214 |
regcomp_wrapper(&re_rfc1036, |
215 |
"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), " |
216 |
"([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-" |
217 |
"([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", |
218 |
REG_EXTENDED); |
219 |
regcomp_wrapper(&re_asctime, |
220 |
"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) " |
221 |
"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) " |
222 |
"([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$", |
223 |
REG_EXTENDED); |
224 |
} |
225 |
|
226 |
|
227 |
/** |
228 |
* Compile a regular expression, handling errors. |
229 |
*/ |
230 |
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags) |
231 |
{ |
232 |
char errbuf[200]; |
233 |
int r; |
234 |
r = regcomp(preg, regex, cflags); |
235 |
if (r) { |
236 |
regerror(r, preg, errbuf, sizeof errbuf); |
237 |
fprintf(stderr, "Failed to compile regexp '%s'\n", regex); |
238 |
die(errbuf); |
239 |
} |
240 |
} |
241 |
|
242 |
|
243 |
/** |
244 |
* Fetch and check the headers for the specified url. |
245 |
*/ |
246 |
void check_url(const char *url) |
247 |
{ |
248 |
int i, r; |
249 |
CURLcode code; |
250 |
|
251 |
start = true; |
252 |
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) |
253 |
header_table[i].count = 0; |
254 |
|
255 |
printf("Checking URL %s\n", url); |
256 |
if (strncmp(url, "http", 4)) |
257 |
warning("this is not an http or https url"); |
258 |
|
259 |
if (curl_easy_setopt(curl, CURLOPT_URL, url)) |
260 |
die("Failed to set curl options"); |
261 |
|
262 |
code = curl_easy_perform(curl); |
263 |
if (code != CURLE_OK && code != CURLE_WRITE_ERROR) { |
264 |
error(error_buffer); |
265 |
return; |
266 |
} else { |
267 |
printf("\n"); |
268 |
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) { |
269 |
if (header_table[i].count == 0 && header_table[i].missing) |
270 |
lookup(header_table[i].missing); |
271 |
} |
272 |
} |
273 |
|
274 |
r = regexec(&re_ugly, url, 0, 0, 0); |
275 |
if (r) |
276 |
lookup("ugly"); |
277 |
} |
278 |
|
279 |
|
280 |
/** |
281 |
* Callback for received header data. |
282 |
*/ |
283 |
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream) |
284 |
{ |
285 |
const size_t size = msize * nmemb; |
286 |
char s[400], *name, *value; |
287 |
|
288 |
UNUSED(stream); |
289 |
|
290 |
printf("* "); |
291 |
print(ptr, size); |
292 |
printf("\n"); |
293 |
|
294 |
if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) { |
295 |
lookup("notcrlf"); |
296 |
return size; |
297 |
} |
298 |
if (sizeof s <= size) { |
299 |
warning("header too long: ignored\n"); |
300 |
return size; |
301 |
} |
302 |
strncpy(s, ptr, size); |
303 |
s[size - 2] = 0; |
304 |
|
305 |
name = s; |
306 |
value = strchr(s, ':'); |
307 |
|
308 |
if (s[0] == 0) { |
309 |
/* empty header indicates end of headers */ |
310 |
puts("End of headers."); |
311 |
return 0; |
312 |
|
313 |
} else if (start) { |
314 |
/* Status-Line [6.1] */ |
315 |
check_status_line(s); |
316 |
start = false; |
317 |
|
318 |
} else if (!value) { |
319 |
lookup("missingcolon"); |
320 |
|
321 |
} else { |
322 |
*value = 0; |
323 |
value++; |
324 |
|
325 |
check_header(name, skip_lws(value)); |
326 |
} |
327 |
|
328 |
return size; |
329 |
} |
330 |
|
331 |
|
332 |
/** |
333 |
* Callback for received body data. |
334 |
* |
335 |
* We are not interested in the body, so abort the fetch by returning 0. |
336 |
*/ |
337 |
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream) |
338 |
{ |
339 |
UNUSED(ptr); |
340 |
UNUSED(size); |
341 |
UNUSED(nmemb); |
342 |
UNUSED(stream); |
343 |
|
344 |
return 0; |
345 |
} |
346 |
|
347 |
|
348 |
/** |
349 |
* Check the syntax and content of the response Status-Line [6.1]. |
350 |
*/ |
351 |
void check_status_line(const char *s) |
352 |
{ |
353 |
const char *reason; |
354 |
unsigned int major = 0, minor = 0; |
355 |
int r; |
356 |
regmatch_t pmatch[5]; |
357 |
|
358 |
r = regexec(&re_status_line, s, 5, pmatch, 0); |
359 |
if (r) { |
360 |
lookup("badstatusline"); |
361 |
return; |
362 |
} |
363 |
|
364 |
major = atoi(s + pmatch[1].rm_so); |
365 |
minor = atoi(s + pmatch[2].rm_so); |
366 |
status_code = atoi(s + pmatch[3].rm_so); |
367 |
reason = s + pmatch[4].rm_so; |
368 |
|
369 |
if (major < 1 || (major == 1 && minor == 0)) { |
370 |
lookup("oldhttp"); |
371 |
} else if ((major == 1 && 1 < minor) || 1 < major) { |
372 |
lookup("futurehttp"); |
373 |
} else { |
374 |
if (status_code < 100 || 600 <= status_code) { |
375 |
lookup("badstatus"); |
376 |
} else { |
377 |
char key[] = "xxx"; |
378 |
key[0] = '0' + status_code / 100; |
379 |
lookup(key); |
380 |
} |
381 |
} |
382 |
} |
383 |
|
384 |
|
385 |
/** |
386 |
* Check the syntax and content of a header. |
387 |
*/ |
388 |
void check_header(const char *name, const char *value) |
389 |
{ |
390 |
struct header_entry *header; |
391 |
|
392 |
header = bsearch(name, header_table, |
393 |
sizeof header_table / sizeof header_table[0], |
394 |
sizeof header_table[0], |
395 |
(int (*)(const void *, const void *)) strcasecmp); |
396 |
|
397 |
if (header) { |
398 |
header->count++; |
399 |
header->handler(value); |
400 |
} else |
401 |
lookup("nonstandard"); |
402 |
} |
403 |
|
404 |
|
405 |
/** |
406 |
* Attempt to parse an HTTP Full Date (3.3.1), returning true on success. |
407 |
*/ |
408 |
bool parse_date(const char *s, struct tm *tm) |
409 |
{ |
410 |
int r; |
411 |
int len = strlen(s); |
412 |
regmatch_t pmatch[20]; |
413 |
|
414 |
tm->tm_wday = 0; |
415 |
tm->tm_yday = 0; |
416 |
tm->tm_isdst = 0; |
417 |
tm->tm_gmtoff = 0; |
418 |
tm->tm_zone = "GMT"; |
419 |
|
420 |
if (len == 29) { |
421 |
/* RFC 1123 */ |
422 |
r = regexec(&re_rfc1123, s, 20, pmatch, 0); |
423 |
if (r == 0) { |
424 |
tm->tm_mday = atoi(s + pmatch[2].rm_so); |
425 |
tm->tm_mon = month(s + pmatch[3].rm_so); |
426 |
tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900; |
427 |
tm->tm_hour = atoi(s + pmatch[5].rm_so); |
428 |
tm->tm_min = atoi(s + pmatch[6].rm_so); |
429 |
tm->tm_sec = atoi(s + pmatch[7].rm_so); |
430 |
return true; |
431 |
} |
432 |
|
433 |
} else if (len == 24) { |
434 |
/* asctime() format */ |
435 |
r = regexec(&re_asctime, s, 20, pmatch, 0); |
436 |
if (r == 0) { |
437 |
if (s[pmatch[3].rm_so] == ' ') |
438 |
tm->tm_mday = atoi(s + pmatch[3].rm_so + 1); |
439 |
else |
440 |
tm->tm_mday = atoi(s + pmatch[3].rm_so); |
441 |
tm->tm_mon = month(s + pmatch[2].rm_so); |
442 |
tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900; |
443 |
tm->tm_hour = atoi(s + pmatch[4].rm_so); |
444 |
tm->tm_min = atoi(s + pmatch[5].rm_so); |
445 |
tm->tm_sec = atoi(s + pmatch[6].rm_so); |
446 |
lookup("asctime"); |
447 |
return true; |
448 |
} |
449 |
|
450 |
} else { |
451 |
/* RFC 1036 */ |
452 |
r = regexec(&re_rfc1036, s, 20, pmatch, 0); |
453 |
if (r == 0) { |
454 |
tm->tm_mday = atoi(s + pmatch[2].rm_so); |
455 |
tm->tm_mon = month(s + pmatch[3].rm_so); |
456 |
tm->tm_year = 100 + atoi(s + pmatch[4].rm_so); |
457 |
tm->tm_hour = atoi(s + pmatch[5].rm_so); |
458 |
tm->tm_min = atoi(s + pmatch[6].rm_so); |
459 |
tm->tm_sec = atoi(s + pmatch[7].rm_so); |
460 |
lookup("rfc1036"); |
461 |
return true; |
462 |
} |
463 |
|
464 |
} |
465 |
|
466 |
lookup("baddate"); |
467 |
return false; |
468 |
} |
469 |
|
470 |
|
471 |
/** |
472 |
* Convert a month name to the month number. |
473 |
*/ |
474 |
int month(const char *s) |
475 |
{ |
476 |
switch (s[0]) { |
477 |
case 'J': |
478 |
switch (s[1]) { |
479 |
case 'a': |
480 |
return 0; |
481 |
case 'u': |
482 |
return s[2] == 'n' ? 5 : 6; |
483 |
} |
484 |
case 'F': |
485 |
return 1; |
486 |
case 'M': |
487 |
return s[2] == 'r' ? 2 : 4; |
488 |
case 'A': |
489 |
return s[1] == 'p' ? 3 : 7; |
490 |
case 'S': |
491 |
return 8; |
492 |
case 'O': |
493 |
return 9; |
494 |
case 'N': |
495 |
return 10; |
496 |
case 'D': |
497 |
return 11; |
498 |
} |
499 |
return 0; |
500 |
} |
501 |
|
502 |
|
503 |
/** |
504 |
* Skip optional LWS (linear white space) [2.2] |
505 |
*/ |
506 |
const char *skip_lws(const char *s) |
507 |
{ |
508 |
if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t')) |
509 |
s += 2; |
510 |
while (*s == ' ' || *s == '\t') |
511 |
s++; |
512 |
return s; |
513 |
} |
514 |
|
515 |
|
516 |
/** |
517 |
* Parse a list of elements (#rule in [2.1]). |
518 |
*/ |
519 |
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
520 |
void (*callback)(const char *s, regmatch_t pmatch[])) |
521 |
{ |
522 |
int r; |
523 |
unsigned int items = 0; |
524 |
regmatch_t pmatch[20]; |
525 |
|
526 |
do { |
527 |
r = regexec(preg, s, 20, pmatch, 0); |
528 |
if (r) { |
529 |
printf(" Failed to match list item %i\n", items + 1); |
530 |
return false; |
531 |
} |
532 |
|
533 |
if (callback) |
534 |
callback(s, pmatch); |
535 |
items++; |
536 |
|
537 |
s += pmatch[0].rm_eo; |
538 |
s = skip_lws(s); |
539 |
if (*s == 0) |
540 |
break; |
541 |
if (*s != ',') { |
542 |
printf(" Expecting , after list item %i\n", items); |
543 |
return false; |
544 |
} |
545 |
while (*s == ',') |
546 |
s = skip_lws(s + 1); |
547 |
} while (*s != 0); |
548 |
|
549 |
if (items < n || m < items) { |
550 |
printf(" %i items in list, but there should be ", items); |
551 |
if (m == UINT_MAX) |
552 |
printf("at least %i\n", n); |
553 |
else |
554 |
printf("between %i and %i\n", n, m); |
555 |
return false; |
556 |
} |
557 |
|
558 |
return true; |
559 |
} |
560 |
|
561 |
|
562 |
/* Header-specific validation. */ |
563 |
void header_accept_ranges(const char *s) |
564 |
{ |
565 |
if (strcmp(s, "bytes") == 0) |
566 |
lookup("ok"); |
567 |
else if (strcmp(s, "none") == 0) |
568 |
lookup("ok"); |
569 |
else |
570 |
lookup("unknownrange"); |
571 |
} |
572 |
|
573 |
void header_age(const char *s) |
574 |
{ |
575 |
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
576 |
lookup("badage"); |
577 |
else |
578 |
lookup("ok"); |
579 |
} |
580 |
|
581 |
void header_allow(const char *s) |
582 |
{ |
583 |
if (parse_list(s, &re_token, 0, UINT_MAX, 0)) |
584 |
lookup("ok"); |
585 |
else |
586 |
lookup("badallow"); |
587 |
} |
588 |
|
589 |
void header_cache_control(const char *s) |
590 |
{ |
591 |
if (parse_list(s, &re_token_value, 1, UINT_MAX, |
592 |
header_cache_control_callback)) |
593 |
lookup("ok"); |
594 |
else |
595 |
lookup("badcachecont"); |
596 |
} |
597 |
|
598 |
char cache_control_list[][20] = { |
599 |
"max-age", "max-stale", "min-fresh", "must-revalidate", |
600 |
"no-cache", "no-store", "no-transform", "only-if-cached", |
601 |
"private", "proxy-revalidate", "public", "s-maxage" |
602 |
}; |
603 |
|
604 |
void header_cache_control_callback(const char *s, regmatch_t pmatch[]) |
605 |
{ |
606 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
607 |
char name[20]; |
608 |
char *dir; |
609 |
|
610 |
if (19 < len) { |
611 |
lookup("unknowncachecont"); |
612 |
return; |
613 |
} |
614 |
|
615 |
strncpy(name, s + pmatch[1].rm_so, len); |
616 |
name[len] = 0; |
617 |
|
618 |
dir = bsearch(name, cache_control_list, |
619 |
sizeof cache_control_list / sizeof cache_control_list[0], |
620 |
sizeof cache_control_list[0], |
621 |
(int (*)(const void *, const void *)) strcasecmp); |
622 |
|
623 |
if (!dir) { |
624 |
printf(" Cache-Control directive '%s':\n", name); |
625 |
lookup("unknowncachecont"); |
626 |
} |
627 |
} |
628 |
|
629 |
void header_connection(const char *s) |
630 |
{ |
631 |
if (strcmp(s, "close") == 0) |
632 |
lookup("ok"); |
633 |
else |
634 |
lookup("badconnection"); |
635 |
} |
636 |
|
637 |
void header_content_encoding(const char *s) |
638 |
{ |
639 |
if (parse_list(s, &re_token, 1, UINT_MAX, |
640 |
header_content_encoding_callback)) |
641 |
lookup("ok"); |
642 |
else |
643 |
lookup("badcontenc"); |
644 |
} |
645 |
|
646 |
char content_coding_list[][20] = { |
647 |
"compress", "deflate", "gzip", "identity" |
648 |
}; |
649 |
|
650 |
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]) |
651 |
{ |
652 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
653 |
char name[20]; |
654 |
char *dir; |
655 |
|
656 |
if (19 < len) { |
657 |
lookup("unknowncontenc"); |
658 |
return; |
659 |
} |
660 |
|
661 |
strncpy(name, s + pmatch[1].rm_so, len); |
662 |
name[len] = 0; |
663 |
|
664 |
dir = bsearch(name, content_coding_list, |
665 |
sizeof content_coding_list / sizeof content_coding_list[0], |
666 |
sizeof content_coding_list[0], |
667 |
(int (*)(const void *, const void *)) strcasecmp); |
668 |
if (!dir) { |
669 |
printf(" Content-Encoding '%s':\n", name); |
670 |
lookup("unknowncontenc"); |
671 |
} |
672 |
} |
673 |
|
674 |
void header_content_language(const char *s) |
675 |
{ |
676 |
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
677 |
lookup("ok"); |
678 |
else |
679 |
lookup("badcontlang"); |
680 |
} |
681 |
|
682 |
void header_content_length(const char *s) |
683 |
{ |
684 |
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
685 |
lookup("badcontlen"); |
686 |
else |
687 |
lookup("ok"); |
688 |
} |
689 |
|
690 |
void header_content_location(const char *s) |
691 |
{ |
692 |
if (strchr(s, ' ')) |
693 |
lookup("badcontloc"); |
694 |
else |
695 |
lookup("ok"); |
696 |
} |
697 |
|
698 |
void header_content_md5(const char *s) |
699 |
{ |
700 |
if (strlen(s) != 24) |
701 |
lookup("badcontmd5"); |
702 |
else |
703 |
lookup("ok"); |
704 |
} |
705 |
|
706 |
void header_content_range(const char *s) |
707 |
{ |
708 |
UNUSED(s); |
709 |
lookup("contentrange"); |
710 |
} |
711 |
|
712 |
void header_content_type(const char *s) |
713 |
{ |
714 |
bool charset = false; |
715 |
char *type, *subtype; |
716 |
unsigned int i; |
717 |
int r; |
718 |
regmatch_t pmatch[30]; |
719 |
|
720 |
r = regexec(&re_content_type, s, 30, pmatch, 0); |
721 |
if (r) { |
722 |
lookup("badcontenttype"); |
723 |
return; |
724 |
} |
725 |
|
726 |
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so); |
727 |
subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so); |
728 |
|
729 |
/* parameters */ |
730 |
for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) { |
731 |
char *attrib, *value; |
732 |
|
733 |
attrib = strndup(s + pmatch[i + 1].rm_so, |
734 |
pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so); |
735 |
value = strndup(s + pmatch[i + 2].rm_so, |
736 |
pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so); |
737 |
|
738 |
if (strcasecmp(attrib, "charset") == 0) |
739 |
charset = true; |
740 |
} |
741 |
|
742 |
if (strcasecmp(type, "text") == 0 && !charset) |
743 |
lookup("nocharset"); |
744 |
else |
745 |
lookup("ok"); |
746 |
} |
747 |
|
748 |
void header_date(const char *s) |
749 |
{ |
750 |
double diff; |
751 |
time_t time0, time1; |
752 |
struct tm tm; |
753 |
|
754 |
time0 = time(0); |
755 |
if (!parse_date(s, &tm)) |
756 |
return; |
757 |
time1 = mktime(&tm); |
758 |
|
759 |
diff = difftime(time0, time1); |
760 |
if (10 < fabs(diff)) |
761 |
lookup("wrongdate"); |
762 |
else |
763 |
lookup("ok"); |
764 |
} |
765 |
|
766 |
void header_etag(const char *s) |
767 |
{ |
768 |
int r; |
769 |
r = regexec(&re_etag, s, 0, 0, 0); |
770 |
if (r) |
771 |
lookup("badetag"); |
772 |
else |
773 |
lookup("ok"); |
774 |
} |
775 |
|
776 |
void header_expires(const char *s) |
777 |
{ |
778 |
struct tm tm; |
779 |
if (parse_date(s, &tm)) |
780 |
lookup("ok"); |
781 |
} |
782 |
|
783 |
void header_last_modified(const char *s) |
784 |
{ |
785 |
double diff; |
786 |
time_t time0, time1; |
787 |
struct tm tm; |
788 |
|
789 |
time0 = time(0); |
790 |
if (!parse_date(s, &tm)) |
791 |
return; |
792 |
time1 = mktime(&tm); |
793 |
|
794 |
diff = difftime(time1, time0); |
795 |
if (10 < diff) |
796 |
lookup("futurelastmod"); |
797 |
else |
798 |
lookup("ok"); |
799 |
} |
800 |
|
801 |
void header_location(const char *s) |
802 |
{ |
803 |
int r; |
804 |
r = regexec(&re_absolute_uri, s, 0, 0, 0); |
805 |
if (r) |
806 |
lookup("badlocation"); |
807 |
else |
808 |
lookup("ok"); |
809 |
} |
810 |
|
811 |
void header_pragma(const char *s) |
812 |
{ |
813 |
if (parse_list(s, &re_token_value, 1, UINT_MAX, 0)) |
814 |
lookup("ok"); |
815 |
else |
816 |
lookup("badpragma"); |
817 |
} |
818 |
|
819 |
void header_retry_after(const char *s) |
820 |
{ |
821 |
struct tm tm; |
822 |
|
823 |
if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) { |
824 |
lookup("ok"); |
825 |
return; |
826 |
} |
827 |
|
828 |
if (!parse_date(s, &tm)) |
829 |
return; |
830 |
|
831 |
lookup("ok"); |
832 |
} |
833 |
|
834 |
void header_server(const char *s) |
835 |
{ |
836 |
int r; |
837 |
r = regexec(&re_server, s, 0, 0, 0); |
838 |
if (r) |
839 |
lookup("badserver"); |
840 |
else |
841 |
lookup("ok"); |
842 |
} |
843 |
|
844 |
void header_trailer(const char *s) |
845 |
{ |
846 |
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
847 |
lookup("ok"); |
848 |
else |
849 |
lookup("badtrailer"); |
850 |
} |
851 |
|
852 |
void header_transfer_encoding(const char *s) |
853 |
{ |
854 |
if (parse_list(s, &re_transfer_coding, 1, UINT_MAX, |
855 |
header_transfer_encoding_callback)) |
856 |
lookup("ok"); |
857 |
else |
858 |
lookup("badtransenc"); |
859 |
} |
860 |
|
861 |
char transfer_coding_list[][20] = { |
862 |
"chunked", "compress", "deflate", "gzip", "identity" |
863 |
}; |
864 |
|
865 |
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]) |
866 |
{ |
867 |
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
868 |
char name[20]; |
869 |
char *dir; |
870 |
|
871 |
if (19 < len) { |
872 |
lookup("unknowntransenc"); |
873 |
return; |
874 |
} |
875 |
|
876 |
strncpy(name, s + pmatch[1].rm_so, len); |
877 |
name[len] = 0; |
878 |
|
879 |
dir = bsearch(name, transfer_coding_list, |
880 |
sizeof transfer_coding_list / sizeof transfer_coding_list[0], |
881 |
sizeof transfer_coding_list[0], |
882 |
(int (*)(const void *, const void *)) strcasecmp); |
883 |
if (!dir) { |
884 |
printf(" Transfer-Encoding '%s':\n", name); |
885 |
lookup("unknowntransenc"); |
886 |
} |
887 |
} |
888 |
|
889 |
void header_upgrade(const char *s) |
890 |
{ |
891 |
int r; |
892 |
r = regexec(&re_upgrade, s, 0, 0, 0); |
893 |
if (r) |
894 |
lookup("badupgrade"); |
895 |
else |
896 |
lookup("ok"); |
897 |
} |
898 |
|
899 |
void header_vary(const char *s) |
900 |
{ |
901 |
if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0)) |
902 |
lookup("ok"); |
903 |
else |
904 |
lookup("badvary"); |
905 |
} |
906 |
|
907 |
void header_via(const char *s) |
908 |
{ |
909 |
UNUSED(s); |
910 |
lookup("via"); |
911 |
} |
912 |
|
913 |
|
914 |
/** |
915 |
* Print an error message and exit. |
916 |
*/ |
917 |
void die(const char *error) |
918 |
{ |
919 |
fprintf(stderr, "httplint: %s\n", error); |
920 |
exit(EXIT_FAILURE); |
921 |
} |
922 |
|
923 |
|
924 |
/** |
925 |
* Print a warning message. |
926 |
*/ |
927 |
void warning(const char *message) |
928 |
{ |
929 |
printf("Warning: %s\n", message); |
930 |
} |
931 |
|
932 |
|
933 |
/** |
934 |
* Print an error message. |
935 |
*/ |
936 |
void error(const char *message) |
937 |
{ |
938 |
printf("Error: %s\n", message); |
939 |
} |
940 |
|
941 |
|
942 |
/** |
943 |
* Print a string which contains control characters. |
944 |
*/ |
945 |
void print(const char *s, size_t len) |
946 |
{ |
947 |
size_t i; |
948 |
for (i = 0; i != len; i++) { |
949 |
if (31 < s[i] && s[i] < 127) |
950 |
putchar(s[i]); |
951 |
else |
952 |
printf("[%.2x]", s[i]); |
953 |
} |
954 |
} |
955 |
|
956 |
|
957 |
struct message_entry { |
958 |
const char key[20]; |
959 |
const char *value; |
960 |
} message_table[] = { |
961 |
{ "1xx", "A response status code in the range 100 - 199 indicates a " |
962 |
"'provisional response'." }, |
963 |
{ "2xx", "A response status code in the range 200 - 299 indicates that " |
964 |
"the request was successful." }, |
965 |
{ "3xx", "A response status code in the range 300 - 399 indicates that " |
966 |
"the client should redirect to a new URL." }, |
967 |
{ "4xx", "A response status code in the range 400 - 499 indicates that " |
968 |
"the request could not be fulfilled due to client error." }, |
969 |
{ "5xx", "A response status code in the range 500 - 599 indicates that " |
970 |
"an error occurred on the server." }, |
971 |
{ "asctime", "Warning: This date is in the obsolete asctime() format. " |
972 |
"Consider using the RFC 1123 format instead." }, |
973 |
{ "badage", "Error: The Age header must be one number." }, |
974 |
{ "badallow", "Error: The Allow header must be a comma-separated list of " |
975 |
"HTTP methods." }, |
976 |
{ "badcachecont", "Error: The Cache-Control header must be a " |
977 |
"comma-separated list of directives." }, |
978 |
{ "badconnection", "Warning: The only value of the Connection header " |
979 |
"defined by HTTP/1.1 is \"close\"." }, |
980 |
{ "badcontenc", "Error: The Content-Encoding header must be a " |
981 |
"comma-separated list of encodings." }, |
982 |
{ "badcontenttype", "Error: The Content-Type header must be of the form " |
983 |
"'type/subtype (; optional parameters)'." }, |
984 |
{ "badcontlang", "Error: The Content-Language header must be a " |
985 |
"comma-separated list of language tags." }, |
986 |
{ "badcontlen", "Error: The Content-Length header must be a number." }, |
987 |
{ "badcontloc", "Error: The Content-Location header must be an absolute " |
988 |
"or relative URI." }, |
989 |
{ "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded " |
990 |
"MD5 sum." }, |
991 |
{ "baddate", "Error: Failed to parse this date. Dates should be in the RFC " |
992 |
"1123 format." }, |
993 |
{ "badetag", "Error: The ETag header must be a quoted string (optionally " |
994 |
"preceded by \"W/\" for a weak tag)." }, |
995 |
{ "badlocation", "Error: The Location header must be an absolute URI. " |
996 |
"Relative URIs are not permitted." }, |
997 |
{ "badpragma", "Error: The Pragma header must be a comma-separated list of " |
998 |
"directives." }, |
999 |
{ "badserver", "Error: The Server header must be a space-separated list of " |
1000 |
"products of the form Name/optional-version and comments " |
1001 |
"in ()." }, |
1002 |
{ "badstatus", "Warning: The response status code is outside the standard " |
1003 |
"range 100 - 599." }, |
1004 |
{ "badstatusline", "Error: Failed to parse the response Status-Line. The " |
1005 |
"status line must be of the form 'HTTP/n.n <3-digit " |
1006 |
"status> <reason phrase>'." }, |
1007 |
{ "badtrailer", "Error: The Trailer header must be a comma-separated list " |
1008 |
"of header names." }, |
1009 |
{ "badtransenc", "Error: The Transfer-Encoding header must be a " |
1010 |
"comma-separated of encodings." }, |
1011 |
{ "badupgrade", "Error: The Upgrade header must be a comma-separated list " |
1012 |
"of product identifiers." }, |
1013 |
{ "badvary", "Error: The Vary header must be a comma-separated list " |
1014 |
"of header names, or \"*\"." }, |
1015 |
{ "contentrange", "Warning: The Content-Range header should not be returned " |
1016 |
"by the server for this request." }, |
1017 |
{ "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer " |
1018 |
"version of this tool." }, |
1019 |
{ "futurelastmod", "Error: The specified Last-Modified date-time is in " |
1020 |
"the future." }, |
1021 |
{ "missingcolon", "Error: Headers must be of the form 'Name: value'." }, |
1022 |
{ "missingcontenttype", "Warning: No Content-Type header was present. The " |
1023 |
"client will have to guess the media type or ask " |
1024 |
"the user. Adding a Content-Type header is strongly " |
1025 |
"recommended." }, |
1026 |
{ "missingcontlang", "Consider adding a Content-Language header if " |
1027 |
"applicable for this document." }, |
1028 |
{ "missingdate", "Warning: No Date header was present. A Date header must " |
1029 |
"be present, unless the server does not have a clock, or " |
1030 |
"the response is 100, 101, or 500 - 599." }, |
1031 |
{ "missinglastmod", "No Last-Modified header was present. The " |
1032 |
"HTTP/1.1 specification states that this header should " |
1033 |
"be sent whenever feasible." }, |
1034 |
{ "nocharset", "Warning: No character set is specified in the Content-Type. " |
1035 |
"Clients may assume the default of ISO-8859-1. Consider " |
1036 |
"appending '; charset=...'." }, |
1037 |
{ "nonstandard", "Warning: I don't know anything about this header. Is it " |
1038 |
"a standard HTTP response header?" }, |
1039 |
{ "notcrlf", "Error: This header line does not end in CR LF. HTTP requires " |
1040 |
"that all header lines end with CR LF." }, |
1041 |
{ "ok", "OK." }, |
1042 |
{ "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading " |
1043 |
"to HTTP/1.1." }, |
1044 |
{ "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. " |
1045 |
"Consider using the RFC 1123 format instead." }, |
1046 |
{ "ugly", "This URL appears to contain implementation-specific parts such " |
1047 |
"as an extension or a query string. This may make the URL liable " |
1048 |
"to change when the implementation is changed, resulting in " |
1049 |
"broken links. Consider using URL rewriting or equivalent to " |
1050 |
"implement a future-proof URL space. See " |
1051 |
"http://www.w3.org/Provider/Style/URI for more information." }, |
1052 |
{ "unknowncachecont", "Warning: This Cache-Control directive is " |
1053 |
"non-standard and will have limited support." }, |
1054 |
{ "unknowncontenc", "Warning: This is not a standard Content-Encoding." }, |
1055 |
{ "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 " |
1056 |
"range." }, |
1057 |
{ "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." }, |
1058 |
{ "via", "This header was added by a proxy, cache or gateway." }, |
1059 |
{ "wrongdate", "Warning: The server date-time differs from this system's " |
1060 |
"date-time by more than 10 seconds. Check that both the " |
1061 |
"system clocks are correct." } |
1062 |
}; |
1063 |
|
1064 |
|
1065 |
/** |
1066 |
* Look up and output the string referenced by a key. |
1067 |
*/ |
1068 |
void lookup(const char *key) |
1069 |
{ |
1070 |
const char *s, *spc; |
1071 |
int x; |
1072 |
struct message_entry *message; |
1073 |
|
1074 |
message = bsearch(key, message_table, |
1075 |
sizeof message_table / sizeof message_table[0], |
1076 |
sizeof message_table[0], |
1077 |
(int (*)(const void *, const void *)) strcasecmp); |
1078 |
if (message) |
1079 |
s = message->value; |
1080 |
else |
1081 |
s = key; |
1082 |
|
1083 |
printf(" "); |
1084 |
x = 4; |
1085 |
while (*s) { |
1086 |
spc = strchr(s, ' '); |
1087 |
if (!spc) |
1088 |
spc = s + strlen(s); |
1089 |
if (75 < x + (spc - s)) { |
1090 |
printf("\n "); |
1091 |
x = 4; |
1092 |
} |
1093 |
x += spc - s + 1; |
1094 |
printf("%.*s ", spc - s, s); |
1095 |
if (*spc) |
1096 |
s = spc + 1; |
1097 |
else |
1098 |
s = spc; |
1099 |
} |
1100 |
printf("\n\n"); |
1101 |
} |