1 |
james |
40 |
/* |
2 |
|
|
* HTTP Header Lint |
3 |
james |
50 |
* Licensed under the MIT License |
4 |
|
|
* http://www.opensource.org/licenses/mit-license |
5 |
|
|
* Copyright 2004 James Bursa <bursa@users.sourceforge.net> |
6 |
james |
40 |
*/ |
7 |
|
|
|
8 |
|
|
/* |
9 |
|
|
* Compile using |
10 |
|
|
* gcc -W -Wall `curl-config --cflags --libs` -o httplint httplint.c |
11 |
|
|
* |
12 |
|
|
* References of the form [6.1.1] are to RFC 2616 (HTTP/1.1). |
13 |
|
|
*/ |
14 |
|
|
|
15 |
|
|
#define _GNU_SOURCE |
16 |
|
|
#define __USE_XOPEN |
17 |
|
|
|
18 |
|
|
#include <limits.h> |
19 |
|
|
#include <math.h> |
20 |
|
|
#include <stdbool.h> |
21 |
|
|
#include <stdio.h> |
22 |
|
|
#include <stdlib.h> |
23 |
|
|
#include <string.h> |
24 |
|
|
#include <time.h> |
25 |
|
|
#include <sys/types.h> |
26 |
|
|
#include <regex.h> |
27 |
|
|
#include <curl/curl.h> |
28 |
|
|
|
29 |
|
|
|
30 |
|
|
#define NUMBER "0123456789" |
31 |
|
|
#define UNUSED(x) x = x |
32 |
|
|
|
33 |
|
|
|
34 |
|
|
bool start; |
35 |
james |
56 |
bool html = false; |
36 |
james |
40 |
CURL *curl; |
37 |
|
|
int status_code; |
38 |
|
|
char error_buffer[CURL_ERROR_SIZE]; |
39 |
|
|
regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly, |
40 |
james |
41 |
re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade, |
41 |
james |
48 |
re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires; |
42 |
james |
40 |
|
43 |
|
|
|
44 |
|
|
void init(void); |
45 |
|
|
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags); |
46 |
|
|
void check_url(const char *url); |
47 |
|
|
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream); |
48 |
|
|
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream); |
49 |
|
|
void check_status_line(const char *s); |
50 |
|
|
void check_header(const char *name, const char *value); |
51 |
|
|
bool parse_date(const char *s, struct tm *tm); |
52 |
james |
41 |
int month(const char *s); |
53 |
james |
42 |
time_t mktime_from_utc(struct tm *t); |
54 |
james |
40 |
const char *skip_lws(const char *s); |
55 |
|
|
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
56 |
|
|
void (*callback)(const char *s, regmatch_t pmatch[])); |
57 |
|
|
void header_accept_ranges(const char *s); |
58 |
|
|
void header_age(const char *s); |
59 |
|
|
void header_allow(const char *s); |
60 |
|
|
void header_cache_control(const char *s); |
61 |
|
|
void header_cache_control_callback(const char *s, regmatch_t pmatch[]); |
62 |
|
|
void header_connection(const char *s); |
63 |
|
|
void header_content_encoding(const char *s); |
64 |
|
|
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]); |
65 |
|
|
void header_content_language(const char *s); |
66 |
|
|
void header_content_length(const char *s); |
67 |
|
|
void header_content_location(const char *s); |
68 |
|
|
void header_content_md5(const char *s); |
69 |
|
|
void header_content_range(const char *s); |
70 |
|
|
void header_content_type(const char *s); |
71 |
|
|
void header_date(const char *s); |
72 |
|
|
void header_etag(const char *s); |
73 |
|
|
void header_expires(const char *s); |
74 |
|
|
void header_last_modified(const char *s); |
75 |
|
|
void header_location(const char *s); |
76 |
|
|
void header_pragma(const char *s); |
77 |
|
|
void header_retry_after(const char *s); |
78 |
|
|
void header_server(const char *s); |
79 |
|
|
void header_trailer(const char *s); |
80 |
|
|
void header_transfer_encoding(const char *s); |
81 |
|
|
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]); |
82 |
|
|
void header_upgrade(const char *s); |
83 |
|
|
void header_vary(const char *s); |
84 |
|
|
void header_via(const char *s); |
85 |
james |
48 |
void header_set_cookie(const char *s); |
86 |
james |
40 |
void die(const char *error); |
87 |
|
|
void print(const char *s, size_t len); |
88 |
|
|
void lookup(const char *key); |
89 |
|
|
|
90 |
|
|
|
91 |
|
|
struct header_entry { |
92 |
|
|
char name[40]; |
93 |
|
|
void (*handler)(const char *s); |
94 |
|
|
int count; |
95 |
|
|
char *missing; |
96 |
|
|
} header_table[] = { |
97 |
|
|
{ "Accept-Ranges", header_accept_ranges, 0, 0 }, |
98 |
|
|
{ "Age", header_age, 0, 0 }, |
99 |
|
|
{ "Allow", header_allow, 0, 0 }, |
100 |
|
|
{ "Cache-Control", header_cache_control, 0, 0 }, |
101 |
|
|
{ "Connection", header_connection, 0, 0 }, |
102 |
|
|
{ "Content-Encoding", header_content_encoding, 0, 0 }, |
103 |
|
|
{ "Content-Language", header_content_language, 0, "missingcontlang" }, |
104 |
|
|
{ "Content-Length", header_content_length, 0, 0 }, |
105 |
|
|
{ "Content-Location", header_content_location, 0, 0 }, |
106 |
|
|
{ "Content-MD5", header_content_md5, 0, 0 }, |
107 |
|
|
{ "Content-Range", header_content_range, 0, 0 }, |
108 |
|
|
{ "Content-Type", header_content_type, 0, "missingcontenttype" }, |
109 |
|
|
{ "Date", header_date, 0, "missingdate" }, |
110 |
|
|
{ "ETag", header_etag, 0, 0 }, |
111 |
|
|
{ "Expires", header_expires, 0, 0 }, |
112 |
|
|
{ "Last-Modified", header_last_modified, 0, "missinglastmod" }, |
113 |
|
|
{ "Location", header_location, 0, 0 }, |
114 |
|
|
{ "Pragma", header_pragma, 0, 0 }, |
115 |
|
|
{ "Retry-After", header_retry_after, 0, 0 }, |
116 |
|
|
{ "Server", header_server, 0, 0 }, |
117 |
james |
48 |
{ "Set-Cookie", header_set_cookie, 0, 0 }, |
118 |
james |
40 |
{ "Trailer", header_trailer, 0, 0 }, |
119 |
|
|
{ "Transfer-Encoding", header_transfer_encoding, 0, 0 }, |
120 |
|
|
{ "Upgrade", header_upgrade, 0, 0 }, |
121 |
|
|
{ "Vary", header_vary, 0, 0 }, |
122 |
|
|
{ "Via", header_via, 0, 0 } |
123 |
|
|
}; |
124 |
|
|
|
125 |
|
|
|
126 |
|
|
/** |
127 |
|
|
* Main entry point. |
128 |
|
|
*/ |
129 |
|
|
int main(int argc, char *argv[]) |
130 |
|
|
{ |
131 |
james |
56 |
int i = 1; |
132 |
james |
40 |
|
133 |
|
|
if (argc < 2) |
134 |
james |
56 |
die("Usage: httplint [--html] url [url ...]"); |
135 |
james |
40 |
|
136 |
|
|
init(); |
137 |
|
|
|
138 |
james |
56 |
if (1 < argc && strcmp(argv[1], "--html") == 0) { |
139 |
|
|
html = true; |
140 |
|
|
i++; |
141 |
|
|
} |
142 |
|
|
|
143 |
|
|
for (; i != argc; i++) |
144 |
james |
40 |
check_url(argv[i]); |
145 |
|
|
|
146 |
|
|
curl_global_cleanup(); |
147 |
|
|
|
148 |
|
|
return 0; |
149 |
|
|
} |
150 |
|
|
|
151 |
|
|
|
152 |
|
|
/** |
153 |
|
|
* Initialise the curl handle and compile regular expressions. |
154 |
|
|
*/ |
155 |
|
|
void init(void) |
156 |
|
|
{ |
157 |
|
|
struct curl_slist *request_headers = 0; |
158 |
|
|
|
159 |
|
|
if (curl_global_init(CURL_GLOBAL_ALL)) |
160 |
|
|
die("Failed to initialise libcurl"); |
161 |
|
|
|
162 |
|
|
curl = curl_easy_init(); |
163 |
|
|
if (!curl) |
164 |
|
|
die("Failed to create curl handle"); |
165 |
|
|
|
166 |
|
|
if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback)) |
167 |
|
|
die("Failed to set curl options"); |
168 |
|
|
if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback)) |
169 |
|
|
die("Failed to set curl options"); |
170 |
|
|
if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint")) |
171 |
|
|
die("Failed to set curl options"); |
172 |
|
|
if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer)) |
173 |
|
|
die("Failed to set curl options"); |
174 |
|
|
|
175 |
|
|
/* remove libcurl default headers */ |
176 |
|
|
request_headers = curl_slist_append(request_headers, "Accept:"); |
177 |
|
|
request_headers = curl_slist_append(request_headers, "Pragma:"); |
178 |
|
|
if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers)) |
179 |
|
|
die("Failed to set curl options"); |
180 |
|
|
|
181 |
|
|
/* compile regular expressions */ |
182 |
|
|
regcomp_wrapper(&re_status_line, |
183 |
|
|
"^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~�-�]*)$", |
184 |
|
|
REG_EXTENDED); |
185 |
|
|
regcomp_wrapper(&re_token, |
186 |
james |
44 |
"^([-0-9a-zA-Z_.!]+)", |
187 |
james |
40 |
REG_EXTENDED); |
188 |
|
|
regcomp_wrapper(&re_token_value, |
189 |
james |
44 |
"^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?", |
190 |
james |
40 |
REG_EXTENDED); |
191 |
|
|
regcomp_wrapper(&re_content_type, |
192 |
|
|
"^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*" |
193 |
|
|
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
194 |
|
|
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
195 |
|
|
REG_EXTENDED); |
196 |
|
|
regcomp_wrapper(&re_absolute_uri, |
197 |
|
|
"^[a-zA-Z0-9]+://[^ ]+$", |
198 |
|
|
REG_EXTENDED); |
199 |
|
|
regcomp_wrapper(&re_etag, |
200 |
|
|
"^(W/[ \t]*)?\"([^\"]|[\\].)*\"$", |
201 |
|
|
REG_EXTENDED); |
202 |
|
|
regcomp_wrapper(&re_server, |
203 |
james |
44 |
"^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", |
204 |
james |
40 |
REG_EXTENDED); |
205 |
|
|
regcomp_wrapper(&re_transfer_coding, |
206 |
|
|
"^([-0-9a-zA-Z_.]+)[ \t]*" |
207 |
|
|
"(;[ \t]*([-0-9a-zA-Z_.]+)=" |
208 |
|
|
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", |
209 |
|
|
REG_EXTENDED); |
210 |
|
|
regcomp_wrapper(&re_upgrade, |
211 |
|
|
"^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$", |
212 |
|
|
REG_EXTENDED); |
213 |
|
|
regcomp_wrapper(&re_ugly, |
214 |
james |
43 |
"^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$", |
215 |
james |
40 |
REG_EXTENDED); |
216 |
james |
41 |
regcomp_wrapper(&re_rfc1123, |
217 |
|
|
"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) " |
218 |
|
|
"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) " |
219 |
|
|
"([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", |
220 |
|
|
REG_EXTENDED); |
221 |
|
|
regcomp_wrapper(&re_rfc1036, |
222 |
|
|
"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), " |
223 |
|
|
"([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-" |
224 |
|
|
"([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", |
225 |
|
|
REG_EXTENDED); |
226 |
|
|
regcomp_wrapper(&re_asctime, |
227 |
|
|
"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) " |
228 |
|
|
"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) " |
229 |
|
|
"([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$", |
230 |
|
|
REG_EXTENDED); |
231 |
james |
48 |
regcomp_wrapper(&re_cookie_nameval, |
232 |
|
|
"^[^;, ]+=[^;, ]*$", |
233 |
|
|
REG_EXTENDED); |
234 |
|
|
regcomp_wrapper(&re_cookie_expires, |
235 |
|
|
"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-" |
236 |
|
|
"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) " |
237 |
|
|
"([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", |
238 |
|
|
REG_EXTENDED); |
239 |
james |
40 |
} |
240 |
|
|
|
241 |
|
|
|
242 |
|
|
/** |
243 |
|
|
* Compile a regular expression, handling errors. |
244 |
|
|
*/ |
245 |
|
|
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags) |
246 |
|
|
{ |
247 |
|
|
char errbuf[200]; |
248 |
|
|
int r; |
249 |
|
|
r = regcomp(preg, regex, cflags); |
250 |
|
|
if (r) { |
251 |
|
|
regerror(r, preg, errbuf, sizeof errbuf); |
252 |
|
|
fprintf(stderr, "Failed to compile regexp '%s'\n", regex); |
253 |
|
|
die(errbuf); |
254 |
|
|
} |
255 |
|
|
} |
256 |
|
|
|
257 |
|
|
|
258 |
|
|
/** |
259 |
|
|
* Fetch and check the headers for the specified url. |
260 |
|
|
*/ |
261 |
|
|
void check_url(const char *url) |
262 |
|
|
{ |
263 |
|
|
int i, r; |
264 |
|
|
CURLcode code; |
265 |
|
|
|
266 |
|
|
start = true; |
267 |
|
|
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) |
268 |
|
|
header_table[i].count = 0; |
269 |
|
|
|
270 |
james |
56 |
if (!html) |
271 |
|
|
printf("Checking URL %s\n", url); |
272 |
|
|
if (strncmp(url, "http", 4)) { |
273 |
|
|
if (html) |
274 |
|
|
printf("<p class='warning'>"); |
275 |
|
|
printf("Warning: this is not an http or https url"); |
276 |
|
|
if (html) |
277 |
|
|
printf("</p>"); |
278 |
|
|
printf("\n"); |
279 |
|
|
} |
280 |
james |
40 |
|
281 |
|
|
if (curl_easy_setopt(curl, CURLOPT_URL, url)) |
282 |
|
|
die("Failed to set curl options"); |
283 |
|
|
|
284 |
james |
56 |
if (html) |
285 |
|
|
printf("<ul>\n"); |
286 |
james |
40 |
code = curl_easy_perform(curl); |
287 |
james |
56 |
if (html) |
288 |
|
|
printf("</ul>\n"); |
289 |
james |
40 |
if (code != CURLE_OK && code != CURLE_WRITE_ERROR) { |
290 |
james |
56 |
if (html) |
291 |
|
|
printf("<p class='error'>"); |
292 |
|
|
printf("Error: "); |
293 |
|
|
print(error_buffer, strlen(error_buffer)); |
294 |
|
|
printf("."); |
295 |
|
|
if (html) |
296 |
|
|
printf("</p>"); |
297 |
|
|
printf("\n"); |
298 |
james |
40 |
return; |
299 |
|
|
} else { |
300 |
|
|
printf("\n"); |
301 |
james |
56 |
if (html) |
302 |
|
|
printf("<ul>"); |
303 |
james |
40 |
for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) { |
304 |
|
|
if (header_table[i].count == 0 && header_table[i].missing) |
305 |
|
|
lookup(header_table[i].missing); |
306 |
|
|
} |
307 |
|
|
} |
308 |
|
|
|
309 |
|
|
r = regexec(&re_ugly, url, 0, 0, 0); |
310 |
|
|
if (r) |
311 |
|
|
lookup("ugly"); |
312 |
james |
56 |
|
313 |
|
|
if (html) |
314 |
|
|
printf("</ul>"); |
315 |
james |
40 |
} |
316 |
|
|
|
317 |
|
|
|
318 |
|
|
/** |
319 |
|
|
* Callback for received header data. |
320 |
|
|
*/ |
321 |
|
|
size_t header_callback(char *ptr, size_t msize, size_t nmemb, void *stream) |
322 |
|
|
{ |
323 |
|
|
const size_t size = msize * nmemb; |
324 |
|
|
char s[400], *name, *value; |
325 |
|
|
|
326 |
|
|
UNUSED(stream); |
327 |
|
|
|
328 |
james |
56 |
printf(html ? "<li><code>" : "* "); |
329 |
james |
40 |
print(ptr, size); |
330 |
james |
56 |
printf(html ? "</code><ul>" : "\n"); |
331 |
james |
40 |
|
332 |
|
|
if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) { |
333 |
|
|
lookup("notcrlf"); |
334 |
james |
56 |
if (html) |
335 |
|
|
printf("</ul></li>\n"); |
336 |
james |
40 |
return size; |
337 |
|
|
} |
338 |
|
|
if (sizeof s <= size) { |
339 |
james |
56 |
lookup("headertoolong"); |
340 |
|
|
if (html) |
341 |
|
|
printf("</ul></li>\n"); |
342 |
james |
40 |
return size; |
343 |
|
|
} |
344 |
|
|
strncpy(s, ptr, size); |
345 |
|
|
s[size - 2] = 0; |
346 |
|
|
|
347 |
|
|
name = s; |
348 |
|
|
value = strchr(s, ':'); |
349 |
|
|
|
350 |
|
|
if (s[0] == 0) { |
351 |
|
|
/* empty header indicates end of headers */ |
352 |
james |
56 |
lookup("endofheaders"); |
353 |
|
|
if (html) |
354 |
|
|
printf("</ul></li>\n"); |
355 |
james |
40 |
return 0; |
356 |
|
|
|
357 |
|
|
} else if (start) { |
358 |
|
|
/* Status-Line [6.1] */ |
359 |
|
|
check_status_line(s); |
360 |
|
|
start = false; |
361 |
|
|
|
362 |
|
|
} else if (!value) { |
363 |
|
|
lookup("missingcolon"); |
364 |
|
|
|
365 |
|
|
} else { |
366 |
|
|
*value = 0; |
367 |
|
|
value++; |
368 |
|
|
|
369 |
|
|
check_header(name, skip_lws(value)); |
370 |
|
|
} |
371 |
|
|
|
372 |
james |
56 |
if (html) |
373 |
|
|
printf("</ul></li>\n"); |
374 |
james |
40 |
return size; |
375 |
|
|
} |
376 |
|
|
|
377 |
|
|
|
378 |
|
|
/** |
379 |
|
|
* Callback for received body data. |
380 |
|
|
* |
381 |
|
|
* We are not interested in the body, so abort the fetch by returning 0. |
382 |
|
|
*/ |
383 |
|
|
size_t data_callback(void *ptr, size_t size, size_t nmemb, void *stream) |
384 |
|
|
{ |
385 |
|
|
UNUSED(ptr); |
386 |
|
|
UNUSED(size); |
387 |
|
|
UNUSED(nmemb); |
388 |
|
|
UNUSED(stream); |
389 |
|
|
|
390 |
|
|
return 0; |
391 |
|
|
} |
392 |
|
|
|
393 |
|
|
|
394 |
|
|
/** |
395 |
|
|
* Check the syntax and content of the response Status-Line [6.1]. |
396 |
|
|
*/ |
397 |
|
|
void check_status_line(const char *s) |
398 |
|
|
{ |
399 |
|
|
const char *reason; |
400 |
|
|
unsigned int major = 0, minor = 0; |
401 |
|
|
int r; |
402 |
|
|
regmatch_t pmatch[5]; |
403 |
|
|
|
404 |
|
|
r = regexec(&re_status_line, s, 5, pmatch, 0); |
405 |
|
|
if (r) { |
406 |
|
|
lookup("badstatusline"); |
407 |
|
|
return; |
408 |
|
|
} |
409 |
|
|
|
410 |
|
|
major = atoi(s + pmatch[1].rm_so); |
411 |
|
|
minor = atoi(s + pmatch[2].rm_so); |
412 |
|
|
status_code = atoi(s + pmatch[3].rm_so); |
413 |
|
|
reason = s + pmatch[4].rm_so; |
414 |
|
|
|
415 |
|
|
if (major < 1 || (major == 1 && minor == 0)) { |
416 |
|
|
lookup("oldhttp"); |
417 |
|
|
} else if ((major == 1 && 1 < minor) || 1 < major) { |
418 |
|
|
lookup("futurehttp"); |
419 |
|
|
} else { |
420 |
|
|
if (status_code < 100 || 600 <= status_code) { |
421 |
|
|
lookup("badstatus"); |
422 |
|
|
} else { |
423 |
|
|
char key[] = "xxx"; |
424 |
|
|
key[0] = '0' + status_code / 100; |
425 |
|
|
lookup(key); |
426 |
|
|
} |
427 |
|
|
} |
428 |
|
|
} |
429 |
|
|
|
430 |
|
|
|
431 |
|
|
/** |
432 |
|
|
* Check the syntax and content of a header. |
433 |
|
|
*/ |
434 |
|
|
void check_header(const char *name, const char *value) |
435 |
|
|
{ |
436 |
|
|
struct header_entry *header; |
437 |
|
|
|
438 |
|
|
header = bsearch(name, header_table, |
439 |
|
|
sizeof header_table / sizeof header_table[0], |
440 |
|
|
sizeof header_table[0], |
441 |
|
|
(int (*)(const void *, const void *)) strcasecmp); |
442 |
|
|
|
443 |
|
|
if (header) { |
444 |
|
|
header->count++; |
445 |
|
|
header->handler(value); |
446 |
james |
44 |
} else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') { |
447 |
|
|
lookup("xheader"); |
448 |
|
|
} else { |
449 |
james |
40 |
lookup("nonstandard"); |
450 |
james |
44 |
} |
451 |
james |
40 |
} |
452 |
|
|
|
453 |
|
|
|
454 |
|
|
/** |
455 |
|
|
* Attempt to parse an HTTP Full Date (3.3.1), returning true on success. |
456 |
|
|
*/ |
457 |
|
|
bool parse_date(const char *s, struct tm *tm) |
458 |
|
|
{ |
459 |
james |
41 |
int r; |
460 |
james |
40 |
int len = strlen(s); |
461 |
james |
41 |
regmatch_t pmatch[20]; |
462 |
james |
40 |
|
463 |
james |
59 |
tm->tm_isdst = 0; |
464 |
|
|
tm->tm_gmtoff = 0; |
465 |
|
|
tm->tm_zone = "GMT"; |
466 |
|
|
|
467 |
james |
40 |
if (len == 29) { |
468 |
|
|
/* RFC 1123 */ |
469 |
james |
41 |
r = regexec(&re_rfc1123, s, 20, pmatch, 0); |
470 |
|
|
if (r == 0) { |
471 |
|
|
tm->tm_mday = atoi(s + pmatch[2].rm_so); |
472 |
|
|
tm->tm_mon = month(s + pmatch[3].rm_so); |
473 |
|
|
tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900; |
474 |
|
|
tm->tm_hour = atoi(s + pmatch[5].rm_so); |
475 |
|
|
tm->tm_min = atoi(s + pmatch[6].rm_so); |
476 |
|
|
tm->tm_sec = atoi(s + pmatch[7].rm_so); |
477 |
james |
40 |
return true; |
478 |
james |
41 |
} |
479 |
james |
40 |
|
480 |
|
|
} else if (len == 24) { |
481 |
|
|
/* asctime() format */ |
482 |
james |
41 |
r = regexec(&re_asctime, s, 20, pmatch, 0); |
483 |
|
|
if (r == 0) { |
484 |
|
|
if (s[pmatch[3].rm_so] == ' ') |
485 |
|
|
tm->tm_mday = atoi(s + pmatch[3].rm_so + 1); |
486 |
|
|
else |
487 |
|
|
tm->tm_mday = atoi(s + pmatch[3].rm_so); |
488 |
|
|
tm->tm_mon = month(s + pmatch[2].rm_so); |
489 |
|
|
tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900; |
490 |
|
|
tm->tm_hour = atoi(s + pmatch[4].rm_so); |
491 |
|
|
tm->tm_min = atoi(s + pmatch[5].rm_so); |
492 |
|
|
tm->tm_sec = atoi(s + pmatch[6].rm_so); |
493 |
james |
40 |
lookup("asctime"); |
494 |
|
|
return true; |
495 |
|
|
} |
496 |
|
|
|
497 |
|
|
} else { |
498 |
|
|
/* RFC 1036 */ |
499 |
james |
41 |
r = regexec(&re_rfc1036, s, 20, pmatch, 0); |
500 |
|
|
if (r == 0) { |
501 |
|
|
tm->tm_mday = atoi(s + pmatch[2].rm_so); |
502 |
|
|
tm->tm_mon = month(s + pmatch[3].rm_so); |
503 |
|
|
tm->tm_year = 100 + atoi(s + pmatch[4].rm_so); |
504 |
|
|
tm->tm_hour = atoi(s + pmatch[5].rm_so); |
505 |
|
|
tm->tm_min = atoi(s + pmatch[6].rm_so); |
506 |
|
|
tm->tm_sec = atoi(s + pmatch[7].rm_so); |
507 |
james |
40 |
lookup("rfc1036"); |
508 |
|
|
return true; |
509 |
|
|
} |
510 |
|
|
|
511 |
|
|
} |
512 |
|
|
|
513 |
|
|
lookup("baddate"); |
514 |
|
|
return false; |
515 |
|
|
} |
516 |
|
|
|
517 |
|
|
|
518 |
|
|
/** |
519 |
james |
41 |
* Convert a month name to the month number. |
520 |
|
|
*/ |
521 |
|
|
int month(const char *s) |
522 |
|
|
{ |
523 |
|
|
switch (s[0]) { |
524 |
|
|
case 'J': |
525 |
|
|
switch (s[1]) { |
526 |
|
|
case 'a': |
527 |
|
|
return 0; |
528 |
|
|
case 'u': |
529 |
|
|
return s[2] == 'n' ? 5 : 6; |
530 |
|
|
} |
531 |
|
|
case 'F': |
532 |
|
|
return 1; |
533 |
|
|
case 'M': |
534 |
|
|
return s[2] == 'r' ? 2 : 4; |
535 |
|
|
case 'A': |
536 |
|
|
return s[1] == 'p' ? 3 : 7; |
537 |
|
|
case 'S': |
538 |
|
|
return 8; |
539 |
|
|
case 'O': |
540 |
|
|
return 9; |
541 |
|
|
case 'N': |
542 |
|
|
return 10; |
543 |
|
|
case 'D': |
544 |
|
|
return 11; |
545 |
|
|
} |
546 |
|
|
return 0; |
547 |
|
|
} |
548 |
|
|
|
549 |
|
|
|
550 |
|
|
/** |
551 |
james |
42 |
* UTC version of mktime, from |
552 |
|
|
* http://lists.debian.org/deity/2002/deity-200204/msg00082.html |
553 |
|
|
*/ |
554 |
|
|
time_t mktime_from_utc(struct tm *t) |
555 |
|
|
{ |
556 |
|
|
time_t tl, tb; |
557 |
|
|
struct tm *tg; |
558 |
|
|
|
559 |
|
|
tl = mktime (t); |
560 |
|
|
if (tl == -1) |
561 |
|
|
{ |
562 |
|
|
t->tm_hour--; |
563 |
|
|
tl = mktime (t); |
564 |
|
|
if (tl == -1) |
565 |
|
|
return -1; /* can't deal with output from strptime */ |
566 |
|
|
tl += 3600; |
567 |
|
|
} |
568 |
|
|
tg = gmtime (&tl); |
569 |
|
|
tg->tm_isdst = 0; |
570 |
|
|
tb = mktime (tg); |
571 |
|
|
if (tb == -1) |
572 |
|
|
{ |
573 |
|
|
tg->tm_hour--; |
574 |
|
|
tb = mktime (tg); |
575 |
|
|
if (tb == -1) |
576 |
|
|
return -1; /* can't deal with output from gmtime */ |
577 |
|
|
tb += 3600; |
578 |
|
|
} |
579 |
|
|
return (tl - (tb - tl)); |
580 |
|
|
} |
581 |
|
|
|
582 |
|
|
|
583 |
|
|
/** |
584 |
james |
40 |
* Skip optional LWS (linear white space) [2.2] |
585 |
|
|
*/ |
586 |
|
|
const char *skip_lws(const char *s) |
587 |
|
|
{ |
588 |
|
|
if (s[0] == 13 && s[1] == 10 && (s[2] == ' ' || s[2] == '\t')) |
589 |
|
|
s += 2; |
590 |
|
|
while (*s == ' ' || *s == '\t') |
591 |
|
|
s++; |
592 |
|
|
return s; |
593 |
|
|
} |
594 |
|
|
|
595 |
|
|
|
596 |
|
|
/** |
597 |
|
|
* Parse a list of elements (#rule in [2.1]). |
598 |
|
|
*/ |
599 |
|
|
bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, |
600 |
|
|
void (*callback)(const char *s, regmatch_t pmatch[])) |
601 |
|
|
{ |
602 |
|
|
int r; |
603 |
|
|
unsigned int items = 0; |
604 |
|
|
regmatch_t pmatch[20]; |
605 |
|
|
|
606 |
|
|
do { |
607 |
|
|
r = regexec(preg, s, 20, pmatch, 0); |
608 |
|
|
if (r) { |
609 |
james |
56 |
if (html) |
610 |
|
|
printf("<li class='error'>"); |
611 |
james |
40 |
printf(" Failed to match list item %i\n", items + 1); |
612 |
james |
56 |
if (html) |
613 |
|
|
printf("</li>\n"); |
614 |
james |
40 |
return false; |
615 |
|
|
} |
616 |
|
|
|
617 |
|
|
if (callback) |
618 |
|
|
callback(s, pmatch); |
619 |
|
|
items++; |
620 |
|
|
|
621 |
|
|
s += pmatch[0].rm_eo; |
622 |
|
|
s = skip_lws(s); |
623 |
|
|
if (*s == 0) |
624 |
|
|
break; |
625 |
|
|
if (*s != ',') { |
626 |
james |
56 |
if (html) |
627 |
|
|
printf("<li class='error'>"); |
628 |
james |
40 |
printf(" Expecting , after list item %i\n", items); |
629 |
james |
56 |
if (html) |
630 |
|
|
printf("</li>\n"); |
631 |
james |
40 |
return false; |
632 |
|
|
} |
633 |
|
|
while (*s == ',') |
634 |
|
|
s = skip_lws(s + 1); |
635 |
|
|
} while (*s != 0); |
636 |
|
|
|
637 |
|
|
if (items < n || m < items) { |
638 |
james |
56 |
if (html) |
639 |
|
|
printf("<li class='error'>"); |
640 |
james |
40 |
printf(" %i items in list, but there should be ", items); |
641 |
|
|
if (m == UINT_MAX) |
642 |
|
|
printf("at least %i\n", n); |
643 |
|
|
else |
644 |
|
|
printf("between %i and %i\n", n, m); |
645 |
james |
56 |
if (html) |
646 |
|
|
printf("</li>\n"); |
647 |
james |
40 |
return false; |
648 |
|
|
} |
649 |
|
|
|
650 |
|
|
return true; |
651 |
|
|
} |
652 |
|
|
|
653 |
|
|
|
654 |
|
|
/* Header-specific validation. */ |
655 |
|
|
void header_accept_ranges(const char *s) |
656 |
|
|
{ |
657 |
|
|
if (strcmp(s, "bytes") == 0) |
658 |
|
|
lookup("ok"); |
659 |
|
|
else if (strcmp(s, "none") == 0) |
660 |
|
|
lookup("ok"); |
661 |
|
|
else |
662 |
|
|
lookup("unknownrange"); |
663 |
|
|
} |
664 |
|
|
|
665 |
|
|
void header_age(const char *s) |
666 |
|
|
{ |
667 |
|
|
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
668 |
|
|
lookup("badage"); |
669 |
|
|
else |
670 |
|
|
lookup("ok"); |
671 |
|
|
} |
672 |
|
|
|
673 |
|
|
void header_allow(const char *s) |
674 |
|
|
{ |
675 |
|
|
if (parse_list(s, &re_token, 0, UINT_MAX, 0)) |
676 |
|
|
lookup("ok"); |
677 |
|
|
else |
678 |
|
|
lookup("badallow"); |
679 |
|
|
} |
680 |
|
|
|
681 |
|
|
void header_cache_control(const char *s) |
682 |
|
|
{ |
683 |
|
|
if (parse_list(s, &re_token_value, 1, UINT_MAX, |
684 |
|
|
header_cache_control_callback)) |
685 |
|
|
lookup("ok"); |
686 |
|
|
else |
687 |
|
|
lookup("badcachecont"); |
688 |
|
|
} |
689 |
|
|
|
690 |
|
|
char cache_control_list[][20] = { |
691 |
|
|
"max-age", "max-stale", "min-fresh", "must-revalidate", |
692 |
|
|
"no-cache", "no-store", "no-transform", "only-if-cached", |
693 |
|
|
"private", "proxy-revalidate", "public", "s-maxage" |
694 |
|
|
}; |
695 |
|
|
|
696 |
|
|
void header_cache_control_callback(const char *s, regmatch_t pmatch[]) |
697 |
|
|
{ |
698 |
|
|
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
699 |
|
|
char name[20]; |
700 |
|
|
char *dir; |
701 |
|
|
|
702 |
|
|
if (19 < len) { |
703 |
|
|
lookup("unknowncachecont"); |
704 |
|
|
return; |
705 |
|
|
} |
706 |
|
|
|
707 |
|
|
strncpy(name, s + pmatch[1].rm_so, len); |
708 |
|
|
name[len] = 0; |
709 |
|
|
|
710 |
|
|
dir = bsearch(name, cache_control_list, |
711 |
|
|
sizeof cache_control_list / sizeof cache_control_list[0], |
712 |
|
|
sizeof cache_control_list[0], |
713 |
|
|
(int (*)(const void *, const void *)) strcasecmp); |
714 |
|
|
|
715 |
|
|
if (!dir) { |
716 |
james |
56 |
if (html) |
717 |
|
|
printf("<li class='warning'>"); |
718 |
|
|
printf(" Cache-Control directive '"); |
719 |
|
|
print(name, strlen(name)); |
720 |
|
|
printf("':\n"); |
721 |
|
|
if (html) |
722 |
|
|
printf("</li>\n"); |
723 |
james |
40 |
lookup("unknowncachecont"); |
724 |
|
|
} |
725 |
|
|
} |
726 |
|
|
|
727 |
|
|
void header_connection(const char *s) |
728 |
|
|
{ |
729 |
|
|
if (strcmp(s, "close") == 0) |
730 |
|
|
lookup("ok"); |
731 |
|
|
else |
732 |
|
|
lookup("badconnection"); |
733 |
|
|
} |
734 |
|
|
|
735 |
|
|
void header_content_encoding(const char *s) |
736 |
|
|
{ |
737 |
|
|
if (parse_list(s, &re_token, 1, UINT_MAX, |
738 |
|
|
header_content_encoding_callback)) |
739 |
|
|
lookup("ok"); |
740 |
|
|
else |
741 |
|
|
lookup("badcontenc"); |
742 |
|
|
} |
743 |
|
|
|
744 |
|
|
char content_coding_list[][20] = { |
745 |
|
|
"compress", "deflate", "gzip", "identity" |
746 |
|
|
}; |
747 |
|
|
|
748 |
|
|
void header_content_encoding_callback(const char *s, regmatch_t pmatch[]) |
749 |
|
|
{ |
750 |
|
|
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
751 |
|
|
char name[20]; |
752 |
|
|
char *dir; |
753 |
|
|
|
754 |
|
|
if (19 < len) { |
755 |
|
|
lookup("unknowncontenc"); |
756 |
|
|
return; |
757 |
|
|
} |
758 |
|
|
|
759 |
|
|
strncpy(name, s + pmatch[1].rm_so, len); |
760 |
|
|
name[len] = 0; |
761 |
|
|
|
762 |
|
|
dir = bsearch(name, content_coding_list, |
763 |
|
|
sizeof content_coding_list / sizeof content_coding_list[0], |
764 |
|
|
sizeof content_coding_list[0], |
765 |
|
|
(int (*)(const void *, const void *)) strcasecmp); |
766 |
|
|
if (!dir) { |
767 |
james |
56 |
if (html) |
768 |
|
|
printf("<li class='warning'>"); |
769 |
james |
40 |
printf(" Content-Encoding '%s':\n", name); |
770 |
james |
56 |
if (html) |
771 |
|
|
printf("</li>\n"); |
772 |
james |
40 |
lookup("unknowncontenc"); |
773 |
|
|
} |
774 |
|
|
} |
775 |
|
|
|
776 |
|
|
void header_content_language(const char *s) |
777 |
|
|
{ |
778 |
|
|
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
779 |
|
|
lookup("ok"); |
780 |
|
|
else |
781 |
|
|
lookup("badcontlang"); |
782 |
|
|
} |
783 |
|
|
|
784 |
|
|
void header_content_length(const char *s) |
785 |
|
|
{ |
786 |
|
|
if (s[0] == 0 || strspn(s, NUMBER) != strlen(s)) |
787 |
|
|
lookup("badcontlen"); |
788 |
|
|
else |
789 |
|
|
lookup("ok"); |
790 |
|
|
} |
791 |
|
|
|
792 |
|
|
void header_content_location(const char *s) |
793 |
|
|
{ |
794 |
|
|
if (strchr(s, ' ')) |
795 |
|
|
lookup("badcontloc"); |
796 |
|
|
else |
797 |
|
|
lookup("ok"); |
798 |
|
|
} |
799 |
|
|
|
800 |
|
|
void header_content_md5(const char *s) |
801 |
|
|
{ |
802 |
|
|
if (strlen(s) != 24) |
803 |
|
|
lookup("badcontmd5"); |
804 |
|
|
else |
805 |
|
|
lookup("ok"); |
806 |
|
|
} |
807 |
|
|
|
808 |
|
|
void header_content_range(const char *s) |
809 |
|
|
{ |
810 |
|
|
UNUSED(s); |
811 |
|
|
lookup("contentrange"); |
812 |
|
|
} |
813 |
|
|
|
814 |
|
|
void header_content_type(const char *s) |
815 |
|
|
{ |
816 |
|
|
bool charset = false; |
817 |
|
|
char *type, *subtype; |
818 |
|
|
unsigned int i; |
819 |
|
|
int r; |
820 |
|
|
regmatch_t pmatch[30]; |
821 |
|
|
|
822 |
|
|
r = regexec(&re_content_type, s, 30, pmatch, 0); |
823 |
|
|
if (r) { |
824 |
|
|
lookup("badcontenttype"); |
825 |
|
|
return; |
826 |
|
|
} |
827 |
|
|
|
828 |
|
|
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so); |
829 |
|
|
subtype = strndup(s + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so); |
830 |
|
|
|
831 |
|
|
/* parameters */ |
832 |
|
|
for (i = 3; i != 30 && pmatch[i].rm_so != -1; i += 3) { |
833 |
|
|
char *attrib, *value; |
834 |
|
|
|
835 |
|
|
attrib = strndup(s + pmatch[i + 1].rm_so, |
836 |
|
|
pmatch[i + 1].rm_eo - pmatch[i + 1].rm_so); |
837 |
|
|
value = strndup(s + pmatch[i + 2].rm_so, |
838 |
|
|
pmatch[i + 2].rm_eo - pmatch[i + 2].rm_so); |
839 |
|
|
|
840 |
|
|
if (strcasecmp(attrib, "charset") == 0) |
841 |
|
|
charset = true; |
842 |
|
|
} |
843 |
|
|
|
844 |
|
|
if (strcasecmp(type, "text") == 0 && !charset) |
845 |
|
|
lookup("nocharset"); |
846 |
|
|
else |
847 |
|
|
lookup("ok"); |
848 |
|
|
} |
849 |
|
|
|
850 |
|
|
void header_date(const char *s) |
851 |
|
|
{ |
852 |
|
|
double diff; |
853 |
|
|
time_t time0, time1; |
854 |
|
|
struct tm tm; |
855 |
|
|
|
856 |
|
|
time0 = time(0); |
857 |
|
|
if (!parse_date(s, &tm)) |
858 |
|
|
return; |
859 |
james |
42 |
time1 = mktime_from_utc(&tm); |
860 |
james |
40 |
|
861 |
|
|
diff = difftime(time0, time1); |
862 |
|
|
if (10 < fabs(diff)) |
863 |
|
|
lookup("wrongdate"); |
864 |
|
|
else |
865 |
|
|
lookup("ok"); |
866 |
|
|
} |
867 |
|
|
|
868 |
|
|
void header_etag(const char *s) |
869 |
|
|
{ |
870 |
|
|
int r; |
871 |
|
|
r = regexec(&re_etag, s, 0, 0, 0); |
872 |
|
|
if (r) |
873 |
|
|
lookup("badetag"); |
874 |
|
|
else |
875 |
|
|
lookup("ok"); |
876 |
|
|
} |
877 |
|
|
|
878 |
|
|
void header_expires(const char *s) |
879 |
|
|
{ |
880 |
|
|
struct tm tm; |
881 |
|
|
if (parse_date(s, &tm)) |
882 |
|
|
lookup("ok"); |
883 |
|
|
} |
884 |
|
|
|
885 |
|
|
void header_last_modified(const char *s) |
886 |
|
|
{ |
887 |
|
|
double diff; |
888 |
|
|
time_t time0, time1; |
889 |
|
|
struct tm tm; |
890 |
|
|
|
891 |
|
|
time0 = time(0); |
892 |
|
|
if (!parse_date(s, &tm)) |
893 |
|
|
return; |
894 |
james |
44 |
time1 = mktime_from_utc(&tm); |
895 |
james |
40 |
|
896 |
|
|
diff = difftime(time1, time0); |
897 |
|
|
if (10 < diff) |
898 |
|
|
lookup("futurelastmod"); |
899 |
|
|
else |
900 |
|
|
lookup("ok"); |
901 |
|
|
} |
902 |
|
|
|
903 |
|
|
void header_location(const char *s) |
904 |
|
|
{ |
905 |
|
|
int r; |
906 |
|
|
r = regexec(&re_absolute_uri, s, 0, 0, 0); |
907 |
|
|
if (r) |
908 |
|
|
lookup("badlocation"); |
909 |
|
|
else |
910 |
|
|
lookup("ok"); |
911 |
|
|
} |
912 |
|
|
|
913 |
|
|
void header_pragma(const char *s) |
914 |
|
|
{ |
915 |
|
|
if (parse_list(s, &re_token_value, 1, UINT_MAX, 0)) |
916 |
|
|
lookup("ok"); |
917 |
|
|
else |
918 |
|
|
lookup("badpragma"); |
919 |
|
|
} |
920 |
|
|
|
921 |
|
|
void header_retry_after(const char *s) |
922 |
|
|
{ |
923 |
|
|
struct tm tm; |
924 |
|
|
|
925 |
|
|
if (s[0] != 0 && strspn(s, NUMBER) == strlen(s)) { |
926 |
|
|
lookup("ok"); |
927 |
|
|
return; |
928 |
|
|
} |
929 |
|
|
|
930 |
|
|
if (!parse_date(s, &tm)) |
931 |
|
|
return; |
932 |
|
|
|
933 |
|
|
lookup("ok"); |
934 |
|
|
} |
935 |
|
|
|
936 |
|
|
void header_server(const char *s) |
937 |
|
|
{ |
938 |
|
|
int r; |
939 |
|
|
r = regexec(&re_server, s, 0, 0, 0); |
940 |
|
|
if (r) |
941 |
|
|
lookup("badserver"); |
942 |
|
|
else |
943 |
|
|
lookup("ok"); |
944 |
|
|
} |
945 |
|
|
|
946 |
|
|
void header_trailer(const char *s) |
947 |
|
|
{ |
948 |
|
|
if (parse_list(s, &re_token, 1, UINT_MAX, 0)) |
949 |
|
|
lookup("ok"); |
950 |
|
|
else |
951 |
|
|
lookup("badtrailer"); |
952 |
|
|
} |
953 |
|
|
|
954 |
|
|
void header_transfer_encoding(const char *s) |
955 |
|
|
{ |
956 |
|
|
if (parse_list(s, &re_transfer_coding, 1, UINT_MAX, |
957 |
|
|
header_transfer_encoding_callback)) |
958 |
|
|
lookup("ok"); |
959 |
|
|
else |
960 |
|
|
lookup("badtransenc"); |
961 |
|
|
} |
962 |
|
|
|
963 |
|
|
char transfer_coding_list[][20] = { |
964 |
|
|
"chunked", "compress", "deflate", "gzip", "identity" |
965 |
|
|
}; |
966 |
|
|
|
967 |
|
|
void header_transfer_encoding_callback(const char *s, regmatch_t pmatch[]) |
968 |
|
|
{ |
969 |
|
|
size_t len = pmatch[1].rm_eo - pmatch[1].rm_so; |
970 |
|
|
char name[20]; |
971 |
|
|
char *dir; |
972 |
|
|
|
973 |
|
|
if (19 < len) { |
974 |
|
|
lookup("unknowntransenc"); |
975 |
|
|
return; |
976 |
|
|
} |
977 |
|
|
|
978 |
|
|
strncpy(name, s + pmatch[1].rm_so, len); |
979 |
|
|
name[len] = 0; |
980 |
|
|
|
981 |
|
|
dir = bsearch(name, transfer_coding_list, |
982 |
|
|
sizeof transfer_coding_list / sizeof transfer_coding_list[0], |
983 |
|
|
sizeof transfer_coding_list[0], |
984 |
|
|
(int (*)(const void *, const void *)) strcasecmp); |
985 |
|
|
if (!dir) { |
986 |
james |
56 |
if (html) |
987 |
|
|
printf("<li class='warning'>"); |
988 |
james |
40 |
printf(" Transfer-Encoding '%s':\n", name); |
989 |
james |
56 |
if (html) |
990 |
|
|
printf("</li>\n"); |
991 |
james |
40 |
lookup("unknowntransenc"); |
992 |
|
|
} |
993 |
|
|
} |
994 |
|
|
|
995 |
|
|
void header_upgrade(const char *s) |
996 |
|
|
{ |
997 |
|
|
int r; |
998 |
|
|
r = regexec(&re_upgrade, s, 0, 0, 0); |
999 |
|
|
if (r) |
1000 |
|
|
lookup("badupgrade"); |
1001 |
|
|
else |
1002 |
|
|
lookup("ok"); |
1003 |
|
|
} |
1004 |
|
|
|
1005 |
|
|
void header_vary(const char *s) |
1006 |
|
|
{ |
1007 |
|
|
if (strcmp(s, "*") == 0 || parse_list(s, &re_token, 1, UINT_MAX, 0)) |
1008 |
|
|
lookup("ok"); |
1009 |
|
|
else |
1010 |
|
|
lookup("badvary"); |
1011 |
|
|
} |
1012 |
|
|
|
1013 |
|
|
void header_via(const char *s) |
1014 |
|
|
{ |
1015 |
|
|
UNUSED(s); |
1016 |
|
|
lookup("via"); |
1017 |
|
|
} |
1018 |
|
|
|
1019 |
james |
48 |
/* http://wp.netscape.com/newsref/std/cookie_spec.html */ |
1020 |
|
|
void header_set_cookie(const char *s) |
1021 |
|
|
{ |
1022 |
|
|
bool ok = true; |
1023 |
|
|
int r; |
1024 |
|
|
const char *semi = strchr(s, ';'); |
1025 |
|
|
const char *s2; |
1026 |
|
|
struct tm tm; |
1027 |
|
|
double diff; |
1028 |
|
|
time_t time0, time1; |
1029 |
|
|
regmatch_t pmatch[20]; |
1030 |
james |
40 |
|
1031 |
james |
48 |
if (semi) |
1032 |
|
|
s2 = strndup(s, semi - s); |
1033 |
|
|
else |
1034 |
|
|
s2 = s; |
1035 |
|
|
|
1036 |
|
|
r = regexec(&re_cookie_nameval, s2, 0, 0, 0); |
1037 |
|
|
if (r) { |
1038 |
|
|
lookup("cookiebadnameval"); |
1039 |
|
|
ok = false; |
1040 |
|
|
} |
1041 |
james |
50 |
|
1042 |
james |
48 |
if (!semi) |
1043 |
|
|
return; |
1044 |
|
|
|
1045 |
|
|
s = skip_lws(semi + 1); |
1046 |
|
|
|
1047 |
|
|
while (*s) { |
1048 |
|
|
semi = strchr(s, ';'); |
1049 |
|
|
if (semi) |
1050 |
|
|
s2 = strndup(s, semi - s); |
1051 |
|
|
else |
1052 |
|
|
s2 = s; |
1053 |
|
|
|
1054 |
james |
56 |
if (strncasecmp(s2, "expires=", 8) == 0) { |
1055 |
james |
48 |
s2 += 8; |
1056 |
|
|
r = regexec(&re_cookie_expires, s2, 20, pmatch, 0); |
1057 |
|
|
if (r == 0) { |
1058 |
|
|
tm.tm_mday = atoi(s2 + pmatch[2].rm_so); |
1059 |
|
|
tm.tm_mon = month(s2 + pmatch[3].rm_so); |
1060 |
|
|
tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900; |
1061 |
|
|
tm.tm_hour = atoi(s2 + pmatch[5].rm_so); |
1062 |
|
|
tm.tm_min = atoi(s2 + pmatch[6].rm_so); |
1063 |
|
|
tm.tm_sec = atoi(s2 + pmatch[7].rm_so); |
1064 |
|
|
|
1065 |
|
|
time0 = time(0); |
1066 |
|
|
time1 = mktime_from_utc(&tm); |
1067 |
|
|
|
1068 |
|
|
diff = difftime(time0, time1); |
1069 |
|
|
if (10 < diff) { |
1070 |
|
|
lookup("cookiepastdate"); |
1071 |
|
|
ok = false; |
1072 |
|
|
} |
1073 |
|
|
} else { |
1074 |
|
|
lookup("cookiebaddate"); |
1075 |
|
|
ok = false; |
1076 |
|
|
} |
1077 |
james |
56 |
} else if (strncasecmp(s2, "domain=", 7) == 0) { |
1078 |
|
|
} else if (strncasecmp(s2, "path=", 5) == 0) { |
1079 |
james |
48 |
if (s2[5] != '/') { |
1080 |
|
|
lookup("cookiebadpath"); |
1081 |
|
|
ok = false; |
1082 |
|
|
} |
1083 |
james |
56 |
} else if (strcasecmp(s, "secure") == 0) { |
1084 |
james |
48 |
} else { |
1085 |
james |
56 |
if (html) |
1086 |
|
|
printf("<li class='warning'>"); |
1087 |
james |
48 |
printf(" Set-Cookie field '%s':\n", s2); |
1088 |
james |
56 |
if (html) |
1089 |
|
|
printf("</li>\n"); |
1090 |
james |
48 |
lookup("cookieunknownfield"); |
1091 |
|
|
ok = false; |
1092 |
|
|
} |
1093 |
|
|
|
1094 |
|
|
if (semi) |
1095 |
|
|
s = skip_lws(semi + 1); |
1096 |
|
|
else |
1097 |
|
|
break; |
1098 |
|
|
} |
1099 |
|
|
|
1100 |
|
|
if (ok) |
1101 |
|
|
lookup("ok"); |
1102 |
|
|
} |
1103 |
|
|
|
1104 |
|
|
|
1105 |
james |
40 |
/** |
1106 |
|
|
* Print an error message and exit. |
1107 |
|
|
*/ |
1108 |
|
|
void die(const char *error) |
1109 |
|
|
{ |
1110 |
|
|
fprintf(stderr, "httplint: %s\n", error); |
1111 |
|
|
exit(EXIT_FAILURE); |
1112 |
|
|
} |
1113 |
|
|
|
1114 |
|
|
|
1115 |
|
|
/** |
1116 |
|
|
* Print a string which contains control characters. |
1117 |
|
|
*/ |
1118 |
|
|
void print(const char *s, size_t len) |
1119 |
|
|
{ |
1120 |
|
|
size_t i; |
1121 |
|
|
for (i = 0; i != len; i++) { |
1122 |
james |
56 |
if (html && s[i] == '<') |
1123 |
|
|
printf("<"); |
1124 |
|
|
else if (html && s[i] == '>') |
1125 |
|
|
printf(">"); |
1126 |
|
|
else if (html && s[i] == '&') |
1127 |
|
|
printf("&"); |
1128 |
|
|
else if (31 < s[i] && s[i] < 127) |
1129 |
james |
40 |
putchar(s[i]); |
1130 |
james |
56 |
else { |
1131 |
|
|
if (html) |
1132 |
|
|
printf("<span class='cc'>"); |
1133 |
james |
40 |
printf("[%.2x]", s[i]); |
1134 |
james |
56 |
if (html) |
1135 |
|
|
printf("</span>"); |
1136 |
|
|
} |
1137 |
james |
40 |
} |
1138 |
|
|
} |
1139 |
|
|
|
1140 |
|
|
|
1141 |
|
|
struct message_entry { |
1142 |
|
|
const char key[20]; |
1143 |
|
|
const char *value; |
1144 |
|
|
} message_table[] = { |
1145 |
|
|
{ "1xx", "A response status code in the range 100 - 199 indicates a " |
1146 |
|
|
"'provisional response'." }, |
1147 |
|
|
{ "2xx", "A response status code in the range 200 - 299 indicates that " |
1148 |
|
|
"the request was successful." }, |
1149 |
|
|
{ "3xx", "A response status code in the range 300 - 399 indicates that " |
1150 |
|
|
"the client should redirect to a new URL." }, |
1151 |
|
|
{ "4xx", "A response status code in the range 400 - 499 indicates that " |
1152 |
|
|
"the request could not be fulfilled due to client error." }, |
1153 |
|
|
{ "5xx", "A response status code in the range 500 - 599 indicates that " |
1154 |
|
|
"an error occurred on the server." }, |
1155 |
|
|
{ "asctime", "Warning: This date is in the obsolete asctime() format. " |
1156 |
|
|
"Consider using the RFC 1123 format instead." }, |
1157 |
|
|
{ "badage", "Error: The Age header must be one number." }, |
1158 |
|
|
{ "badallow", "Error: The Allow header must be a comma-separated list of " |
1159 |
|
|
"HTTP methods." }, |
1160 |
|
|
{ "badcachecont", "Error: The Cache-Control header must be a " |
1161 |
|
|
"comma-separated list of directives." }, |
1162 |
|
|
{ "badconnection", "Warning: The only value of the Connection header " |
1163 |
|
|
"defined by HTTP/1.1 is \"close\"." }, |
1164 |
|
|
{ "badcontenc", "Error: The Content-Encoding header must be a " |
1165 |
|
|
"comma-separated list of encodings." }, |
1166 |
|
|
{ "badcontenttype", "Error: The Content-Type header must be of the form " |
1167 |
|
|
"'type/subtype (; optional parameters)'." }, |
1168 |
|
|
{ "badcontlang", "Error: The Content-Language header must be a " |
1169 |
|
|
"comma-separated list of language tags." }, |
1170 |
|
|
{ "badcontlen", "Error: The Content-Length header must be a number." }, |
1171 |
|
|
{ "badcontloc", "Error: The Content-Location header must be an absolute " |
1172 |
|
|
"or relative URI." }, |
1173 |
|
|
{ "badcontmd5", "Error: The Content-MD5 header must be a base64 encoded " |
1174 |
|
|
"MD5 sum." }, |
1175 |
|
|
{ "baddate", "Error: Failed to parse this date. Dates should be in the RFC " |
1176 |
|
|
"1123 format." }, |
1177 |
|
|
{ "badetag", "Error: The ETag header must be a quoted string (optionally " |
1178 |
|
|
"preceded by \"W/\" for a weak tag)." }, |
1179 |
|
|
{ "badlocation", "Error: The Location header must be an absolute URI. " |
1180 |
|
|
"Relative URIs are not permitted." }, |
1181 |
|
|
{ "badpragma", "Error: The Pragma header must be a comma-separated list of " |
1182 |
|
|
"directives." }, |
1183 |
|
|
{ "badserver", "Error: The Server header must be a space-separated list of " |
1184 |
|
|
"products of the form Name/optional-version and comments " |
1185 |
|
|
"in ()." }, |
1186 |
|
|
{ "badstatus", "Warning: The response status code is outside the standard " |
1187 |
|
|
"range 100 - 599." }, |
1188 |
|
|
{ "badstatusline", "Error: Failed to parse the response Status-Line. The " |
1189 |
|
|
"status line must be of the form 'HTTP/n.n <3-digit " |
1190 |
|
|
"status> <reason phrase>'." }, |
1191 |
|
|
{ "badtrailer", "Error: The Trailer header must be a comma-separated list " |
1192 |
|
|
"of header names." }, |
1193 |
|
|
{ "badtransenc", "Error: The Transfer-Encoding header must be a " |
1194 |
|
|
"comma-separated of encodings." }, |
1195 |
|
|
{ "badupgrade", "Error: The Upgrade header must be a comma-separated list " |
1196 |
|
|
"of product identifiers." }, |
1197 |
|
|
{ "badvary", "Error: The Vary header must be a comma-separated list " |
1198 |
|
|
"of header names, or \"*\"." }, |
1199 |
|
|
{ "contentrange", "Warning: The Content-Range header should not be returned " |
1200 |
|
|
"by the server for this request." }, |
1201 |
james |
48 |
{ "cookiebaddate", "Error: The expires date must be in the form " |
1202 |
|
|
"\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." }, |
1203 |
|
|
{ "cookiebadnameval", "Error: A Set-Cookie header must start with " |
1204 |
|
|
"name=value, each excluding semi-colon, comma and " |
1205 |
|
|
"white space." }, |
1206 |
|
|
{ "cookiebadpath", "Error: The path does not start with \"/\"." }, |
1207 |
|
|
{ "cookiepastdate", "Warning: The expires date is in the past. The cookie " |
1208 |
|
|
"will be deleted by browsers." }, |
1209 |
|
|
{ "cookieunknownfield", "Warning: This is not a standard Set-Cookie " |
1210 |
|
|
"field." }, |
1211 |
james |
56 |
{ "endofheaders", "End of headers." }, |
1212 |
james |
40 |
{ "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer " |
1213 |
|
|
"version of this tool." }, |
1214 |
|
|
{ "futurelastmod", "Error: The specified Last-Modified date-time is in " |
1215 |
|
|
"the future." }, |
1216 |
james |
56 |
{ "headertoolong", "Warning: Header too long: ignored." }, |
1217 |
james |
40 |
{ "missingcolon", "Error: Headers must be of the form 'Name: value'." }, |
1218 |
|
|
{ "missingcontenttype", "Warning: No Content-Type header was present. The " |
1219 |
|
|
"client will have to guess the media type or ask " |
1220 |
|
|
"the user. Adding a Content-Type header is strongly " |
1221 |
|
|
"recommended." }, |
1222 |
|
|
{ "missingcontlang", "Consider adding a Content-Language header if " |
1223 |
|
|
"applicable for this document." }, |
1224 |
|
|
{ "missingdate", "Warning: No Date header was present. A Date header must " |
1225 |
|
|
"be present, unless the server does not have a clock, or " |
1226 |
|
|
"the response is 100, 101, or 500 - 599." }, |
1227 |
|
|
{ "missinglastmod", "No Last-Modified header was present. The " |
1228 |
|
|
"HTTP/1.1 specification states that this header should " |
1229 |
|
|
"be sent whenever feasible." }, |
1230 |
|
|
{ "nocharset", "Warning: No character set is specified in the Content-Type. " |
1231 |
|
|
"Clients may assume the default of ISO-8859-1. Consider " |
1232 |
|
|
"appending '; charset=...'." }, |
1233 |
|
|
{ "nonstandard", "Warning: I don't know anything about this header. Is it " |
1234 |
|
|
"a standard HTTP response header?" }, |
1235 |
|
|
{ "notcrlf", "Error: This header line does not end in CR LF. HTTP requires " |
1236 |
|
|
"that all header lines end with CR LF." }, |
1237 |
|
|
{ "ok", "OK." }, |
1238 |
|
|
{ "oldhttp", "Warning: This version of HTTP is obsolete. Consider upgrading " |
1239 |
|
|
"to HTTP/1.1." }, |
1240 |
|
|
{ "rfc1036", "Warning: This date is in the obsolete RFC 1036 format. " |
1241 |
|
|
"Consider using the RFC 1123 format instead." }, |
1242 |
|
|
{ "ugly", "This URL appears to contain implementation-specific parts such " |
1243 |
|
|
"as an extension or a query string. This may make the URL liable " |
1244 |
|
|
"to change when the implementation is changed, resulting in " |
1245 |
|
|
"broken links. Consider using URL rewriting or equivalent to " |
1246 |
|
|
"implement a future-proof URL space. See " |
1247 |
|
|
"http://www.w3.org/Provider/Style/URI for more information." }, |
1248 |
|
|
{ "unknowncachecont", "Warning: This Cache-Control directive is " |
1249 |
|
|
"non-standard and will have limited support." }, |
1250 |
|
|
{ "unknowncontenc", "Warning: This is not a standard Content-Encoding." }, |
1251 |
|
|
{ "unknownrange", "Warning: This range unit is not a standard HTTP/1.1 " |
1252 |
|
|
"range." }, |
1253 |
|
|
{ "unknowntransenc", "Warning: This is not a standard Transfer-Encoding." }, |
1254 |
|
|
{ "via", "This header was added by a proxy, cache or gateway." }, |
1255 |
|
|
{ "wrongdate", "Warning: The server date-time differs from this system's " |
1256 |
|
|
"date-time by more than 10 seconds. Check that both the " |
1257 |
james |
44 |
"system clocks are correct." }, |
1258 |
|
|
{ "xheader", "This is an extension header. I don't know how to check it." } |
1259 |
james |
40 |
}; |
1260 |
|
|
|
1261 |
|
|
|
1262 |
|
|
/** |
1263 |
|
|
* Look up and output the string referenced by a key. |
1264 |
|
|
*/ |
1265 |
|
|
void lookup(const char *key) |
1266 |
|
|
{ |
1267 |
|
|
const char *s, *spc; |
1268 |
|
|
int x; |
1269 |
|
|
struct message_entry *message; |
1270 |
|
|
|
1271 |
|
|
message = bsearch(key, message_table, |
1272 |
|
|
sizeof message_table / sizeof message_table[0], |
1273 |
|
|
sizeof message_table[0], |
1274 |
|
|
(int (*)(const void *, const void *)) strcasecmp); |
1275 |
|
|
if (message) |
1276 |
|
|
s = message->value; |
1277 |
|
|
else |
1278 |
|
|
s = key; |
1279 |
|
|
|
1280 |
james |
56 |
if (html) { |
1281 |
|
|
if (strncmp(s, "Warning:", 8) == 0) |
1282 |
|
|
printf("<li class='warning'>"); |
1283 |
|
|
else if (strncmp(s, "Error:", 6) == 0) |
1284 |
|
|
printf("<li class='error'>"); |
1285 |
|
|
else if (strncmp(s, "OK", 2) == 0) |
1286 |
|
|
printf("<li class='ok'>"); |
1287 |
|
|
else |
1288 |
|
|
printf("<li>"); |
1289 |
|
|
for (; *s; s++) { |
1290 |
|
|
if (strncmp(s, "http://", 7) == 0) { |
1291 |
|
|
spc = strchr(s, ' '); |
1292 |
|
|
printf("<a href='%.*s'>%.*s</a>", spc - s, s, spc - s, s); |
1293 |
|
|
s = spc; |
1294 |
|
|
} |
1295 |
|
|
switch (*s) { |
1296 |
|
|
case '<': printf("<"); break; |
1297 |
|
|
case '>': printf(">"); break; |
1298 |
|
|
case '&': printf("&"); break; |
1299 |
|
|
default: printf("%c", *s); break; |
1300 |
|
|
} |
1301 |
james |
40 |
} |
1302 |
james |
56 |
printf("</li>\n"); |
1303 |
|
|
|
1304 |
|
|
} else { |
1305 |
|
|
printf(" "); |
1306 |
|
|
x = 4; |
1307 |
|
|
while (*s) { |
1308 |
|
|
spc = strchr(s, ' '); |
1309 |
|
|
if (!spc) |
1310 |
|
|
spc = s + strlen(s); |
1311 |
|
|
if (75 < x + (spc - s)) { |
1312 |
|
|
printf("\n "); |
1313 |
|
|
x = 4; |
1314 |
|
|
} |
1315 |
|
|
x += spc - s + 1; |
1316 |
|
|
printf("%.*s ", spc - s, s); |
1317 |
|
|
if (*spc) |
1318 |
|
|
s = spc + 1; |
1319 |
|
|
else |
1320 |
|
|
s = spc; |
1321 |
|
|
} |
1322 |
|
|
printf("\n\n"); |
1323 |
james |
40 |
} |
1324 |
|
|
} |
1325 |
james |
56 |
|