--- httplint/httplint.c 2003/12/17 17:44:40 41 +++ httplint/httplint.c 2004/03/12 21:54:02 56 @@ -1,8 +1,8 @@ /* * HTTP Header Lint - * Licensed under the same license as Curl - * http://curl.haxx.se/docs/copyright.html - * Copyright 2003 James Bursa + * Licensed under the MIT License + * http://www.opensource.org/licenses/mit-license + * Copyright 2004 James Bursa */ /* @@ -32,12 +32,13 @@ bool start; +bool html = false; CURL *curl; int status_code; char error_buffer[CURL_ERROR_SIZE]; regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly, re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade, - re_rfc1123, re_rfc1036, re_asctime; + re_rfc1123, re_rfc1036, re_asctime, re_cookie_nameval, re_cookie_expires; void init(void); @@ -49,6 +50,7 @@ void check_header(const char *name, const char *value); bool parse_date(const char *s, struct tm *tm); int month(const char *s); +time_t mktime_from_utc(struct tm *t); const char *skip_lws(const char *s); bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, void (*callback)(const char *s, regmatch_t pmatch[])); @@ -80,9 +82,8 @@ void header_upgrade(const char *s); void header_vary(const char *s); void header_via(const char *s); +void header_set_cookie(const char *s); void die(const char *error); -void warning(const char *message); -void error(const char *message); void print(const char *s, size_t len); void lookup(const char *key); @@ -113,6 +114,7 @@ { "Pragma", header_pragma, 0, 0 }, { "Retry-After", header_retry_after, 0, 0 }, { "Server", header_server, 0, 0 }, + { "Set-Cookie", header_set_cookie, 0, 0 }, { "Trailer", header_trailer, 0, 0 }, { "Transfer-Encoding", header_transfer_encoding, 0, 0 }, { "Upgrade", header_upgrade, 0, 0 }, @@ -126,14 +128,19 @@ */ int main(int argc, char *argv[]) { - int i; + int i = 1; if (argc < 2) - die("Usage: httplint url [url ...]"); + die("Usage: httplint [--html] url [url ...]"); init(); - for (i = 1; i != argc; i++) + if (1 < argc && strcmp(argv[1], "--html") == 0) { + html = true; + i++; + } + + for (; i != argc; i++) check_url(argv[i]); curl_global_cleanup(); @@ -176,10 +183,10 @@ "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~€-ÿ]*)$", REG_EXTENDED); regcomp_wrapper(&re_token, - "^([-0-9a-zA-Z_.]+)", + "^([-0-9a-zA-Z_.!]+)", REG_EXTENDED); regcomp_wrapper(&re_token_value, - "^([-0-9a-zA-Z_.]+)(=([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\"))?", + "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?", REG_EXTENDED); regcomp_wrapper(&re_content_type, "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*" @@ -193,7 +200,7 @@ "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$", REG_EXTENDED); regcomp_wrapper(&re_server, - "^((([-0-9a-zA-Z_.]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", + "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", REG_EXTENDED); regcomp_wrapper(&re_transfer_coding, "^([-0-9a-zA-Z_.]+)[ \t]*" @@ -204,7 +211,7 @@ "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$", REG_EXTENDED); regcomp_wrapper(&re_ugly, - "^[a-zA-Z0-9]+://[^/]+[/a-zA-Z0-9-_]*$", + "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$", REG_EXTENDED); regcomp_wrapper(&re_rfc1123, "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) " @@ -221,6 +228,14 @@ "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) " "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$", REG_EXTENDED); + regcomp_wrapper(&re_cookie_nameval, + "^[^;, ]+=[^;, ]*$", + REG_EXTENDED); + regcomp_wrapper(&re_cookie_expires, + "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-" + "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) " + "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", + REG_EXTENDED); } @@ -252,19 +267,39 @@ for (i = 0; i != sizeof header_table / sizeof header_table[0]; i++) header_table[i].count = 0; - printf("Checking URL %s\n", url); - if (strncmp(url, "http", 4)) - warning("this is not an http or https url"); + if (!html) + printf("Checking URL %s\n", url); + if (strncmp(url, "http", 4)) { + if (html) + printf("

"); + printf("Warning: this is not an http or https url"); + if (html) + printf("

"); + printf("\n"); + } if (curl_easy_setopt(curl, CURLOPT_URL, url)) die("Failed to set curl options"); + if (html) + printf("\n"); if (code != CURLE_OK && code != CURLE_WRITE_ERROR) { - error(error_buffer); + if (html) + printf("

"); + printf("Error: "); + print(error_buffer, strlen(error_buffer)); + printf("."); + if (html) + printf("

"); + printf("\n"); return; } else { printf("\n"); + if (html) + printf(""); } @@ -287,16 +325,20 @@ UNUSED(stream); - printf("* "); + printf(html ? "
  • " : "* "); print(ptr, size); - printf("\n"); + printf(html ? "
      " : "\n"); if (size < 2 || ptr[size - 2] != 13 || ptr[size - 1] != 10) { lookup("notcrlf"); + if (html) + printf("
  • \n"); return size; } if (sizeof s <= size) { - warning("header too long: ignored\n"); + lookup("headertoolong"); + if (html) + printf("\n"); return size; } strncpy(s, ptr, size); @@ -307,7 +349,9 @@ if (s[0] == 0) { /* empty header indicates end of headers */ - puts("End of headers."); + lookup("endofheaders"); + if (html) + printf("\n"); return 0; } else if (start) { @@ -325,6 +369,8 @@ check_header(name, skip_lws(value)); } + if (html) + printf("\n"); return size; } @@ -397,8 +443,11 @@ if (header) { header->count++; header->handler(value); - } else + } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') { + lookup("xheader"); + } else { lookup("nonstandard"); + } } @@ -411,12 +460,6 @@ int len = strlen(s); regmatch_t pmatch[20]; - tm->tm_wday = 0; - tm->tm_yday = 0; - tm->tm_isdst = 0; - tm->tm_gmtoff = 0; - tm->tm_zone = "GMT"; - if (len == 29) { /* RFC 1123 */ r = regexec(&re_rfc1123, s, 20, pmatch, 0); @@ -501,6 +544,39 @@ /** + * UTC version of mktime, from + * http://lists.debian.org/deity/2002/deity-200204/msg00082.html + */ +time_t mktime_from_utc(struct tm *t) +{ + time_t tl, tb; + struct tm *tg; + + tl = mktime (t); + if (tl == -1) + { + t->tm_hour--; + tl = mktime (t); + if (tl == -1) + return -1; /* can't deal with output from strptime */ + tl += 3600; + } + tg = gmtime (&tl); + tg->tm_isdst = 0; + tb = mktime (tg); + if (tb == -1) + { + tg->tm_hour--; + tb = mktime (tg); + if (tb == -1) + return -1; /* can't deal with output from gmtime */ + tb += 3600; + } + return (tl - (tb - tl)); +} + + +/** * Skip optional LWS (linear white space) [2.2] */ const char *skip_lws(const char *s) @@ -526,7 +602,11 @@ do { r = regexec(preg, s, 20, pmatch, 0); if (r) { + if (html) + printf("
  • "); printf(" Failed to match list item %i\n", items + 1); + if (html) + printf("
  • \n"); return false; } @@ -539,7 +619,11 @@ if (*s == 0) break; if (*s != ',') { + if (html) + printf("
  • "); printf(" Expecting , after list item %i\n", items); + if (html) + printf("
  • \n"); return false; } while (*s == ',') @@ -547,11 +631,15 @@ } while (*s != 0); if (items < n || m < items) { + if (html) + printf("
  • "); printf(" %i items in list, but there should be ", items); if (m == UINT_MAX) printf("at least %i\n", n); else printf("between %i and %i\n", n, m); + if (html) + printf("
  • \n"); return false; } @@ -621,7 +709,13 @@ (int (*)(const void *, const void *)) strcasecmp); if (!dir) { - printf(" Cache-Control directive '%s':\n", name); + if (html) + printf("
  • "); + printf(" Cache-Control directive '"); + print(name, strlen(name)); + printf("':\n"); + if (html) + printf("
  • \n"); lookup("unknowncachecont"); } } @@ -666,7 +760,11 @@ sizeof content_coding_list[0], (int (*)(const void *, const void *)) strcasecmp); if (!dir) { + if (html) + printf("
  • "); printf(" Content-Encoding '%s':\n", name); + if (html) + printf("
  • \n"); lookup("unknowncontenc"); } } @@ -754,7 +852,7 @@ time0 = time(0); if (!parse_date(s, &tm)) return; - time1 = mktime(&tm); + time1 = mktime_from_utc(&tm); diff = difftime(time0, time1); if (10 < fabs(diff)) @@ -789,7 +887,7 @@ time0 = time(0); if (!parse_date(s, &tm)) return; - time1 = mktime(&tm); + time1 = mktime_from_utc(&tm); diff = difftime(time1, time0); if (10 < diff) @@ -881,7 +979,11 @@ sizeof transfer_coding_list[0], (int (*)(const void *, const void *)) strcasecmp); if (!dir) { + if (html) + printf("
  • "); printf(" Transfer-Encoding '%s':\n", name); + if (html) + printf("
  • \n"); lookup("unknowntransenc"); } } @@ -910,32 +1012,99 @@ lookup("via"); } - -/** - * Print an error message and exit. - */ -void die(const char *error) +/* http://wp.netscape.com/newsref/std/cookie_spec.html */ +void header_set_cookie(const char *s) { - fprintf(stderr, "httplint: %s\n", error); - exit(EXIT_FAILURE); -} + bool ok = true; + int r; + const char *semi = strchr(s, ';'); + const char *s2; + struct tm tm; + double diff; + time_t time0, time1; + regmatch_t pmatch[20]; + if (semi) + s2 = strndup(s, semi - s); + else + s2 = s; -/** - * Print a warning message. - */ -void warning(const char *message) -{ - printf("Warning: %s\n", message); + r = regexec(&re_cookie_nameval, s2, 0, 0, 0); + if (r) { + lookup("cookiebadnameval"); + ok = false; + } + + if (!semi) + return; + + s = skip_lws(semi + 1); + + while (*s) { + semi = strchr(s, ';'); + if (semi) + s2 = strndup(s, semi - s); + else + s2 = s; + + if (strncasecmp(s2, "expires=", 8) == 0) { + s2 += 8; + r = regexec(&re_cookie_expires, s2, 20, pmatch, 0); + if (r == 0) { + tm.tm_mday = atoi(s2 + pmatch[2].rm_so); + tm.tm_mon = month(s2 + pmatch[3].rm_so); + tm.tm_year = atoi(s2 + pmatch[4].rm_so) - 1900; + tm.tm_hour = atoi(s2 + pmatch[5].rm_so); + tm.tm_min = atoi(s2 + pmatch[6].rm_so); + tm.tm_sec = atoi(s2 + pmatch[7].rm_so); + + time0 = time(0); + time1 = mktime_from_utc(&tm); + + diff = difftime(time0, time1); + if (10 < diff) { + lookup("cookiepastdate"); + ok = false; + } + } else { + lookup("cookiebaddate"); + ok = false; + } + } else if (strncasecmp(s2, "domain=", 7) == 0) { + } else if (strncasecmp(s2, "path=", 5) == 0) { + if (s2[5] != '/') { + lookup("cookiebadpath"); + ok = false; + } + } else if (strcasecmp(s, "secure") == 0) { + } else { + if (html) + printf("
  • "); + printf(" Set-Cookie field '%s':\n", s2); + if (html) + printf("
  • \n"); + lookup("cookieunknownfield"); + ok = false; + } + + if (semi) + s = skip_lws(semi + 1); + else + break; + } + + if (ok) + lookup("ok"); } /** - * Print an error message. + * Print an error message and exit. */ -void error(const char *message) +void die(const char *error) { - printf("Error: %s\n", message); + fprintf(stderr, "httplint: %s\n", error); + exit(EXIT_FAILURE); } @@ -946,10 +1115,21 @@ { size_t i; for (i = 0; i != len; i++) { - if (31 < s[i] && s[i] < 127) + if (html && s[i] == '<') + printf("<"); + else if (html && s[i] == '>') + printf(">"); + else if (html && s[i] == '&') + printf("&"); + else if (31 < s[i] && s[i] < 127) putchar(s[i]); - else + else { + if (html) + printf(""); printf("[%.2x]", s[i]); + if (html) + printf(""); + } } } @@ -1014,10 +1194,22 @@ "of header names, or \"*\"." }, { "contentrange", "Warning: The Content-Range header should not be returned " "by the server for this request." }, + { "cookiebaddate", "Error: The expires date must be in the form " + "\"Wdy, DD-Mon-YYYY HH:MM:SS GMT\"." }, + { "cookiebadnameval", "Error: A Set-Cookie header must start with " + "name=value, each excluding semi-colon, comma and " + "white space." }, + { "cookiebadpath", "Error: The path does not start with \"/\"." }, + { "cookiepastdate", "Warning: The expires date is in the past. The cookie " + "will be deleted by browsers." }, + { "cookieunknownfield", "Warning: This is not a standard Set-Cookie " + "field." }, + { "endofheaders", "End of headers." }, { "futurehttp", "Warning: I only understand HTTP/1.1. Check for a newer " "version of this tool." }, { "futurelastmod", "Error: The specified Last-Modified date-time is in " "the future." }, + { "headertoolong", "Warning: Header too long: ignored." }, { "missingcolon", "Error: Headers must be of the form 'Name: value'." }, { "missingcontenttype", "Warning: No Content-Type header was present. The " "client will have to guess the media type or ask " @@ -1058,7 +1250,8 @@ { "via", "This header was added by a proxy, cache or gateway." }, { "wrongdate", "Warning: The server date-time differs from this system's " "date-time by more than 10 seconds. Check that both the " - "system clocks are correct." } + "system clocks are correct." }, + { "xheader", "This is an extension header. I don't know how to check it." } }; @@ -1080,22 +1273,49 @@ else s = key; - printf(" "); - x = 4; - while (*s) { - spc = strchr(s, ' '); - if (!spc) - spc = s + strlen(s); - if (75 < x + (spc - s)) { - printf("\n "); - x = 4; - } - x += spc - s + 1; - printf("%.*s ", spc - s, s); - if (*spc) - s = spc + 1; + if (html) { + if (strncmp(s, "Warning:", 8) == 0) + printf("
  • "); + else if (strncmp(s, "Error:", 6) == 0) + printf("
  • "); + else if (strncmp(s, "OK", 2) == 0) + printf("
  • "); else - s = spc; + printf("
  • "); + for (; *s; s++) { + if (strncmp(s, "http://", 7) == 0) { + spc = strchr(s, ' '); + printf("%.*s", spc - s, s, spc - s, s); + s = spc; + } + switch (*s) { + case '<': printf("<"); break; + case '>': printf(">"); break; + case '&': printf("&"); break; + default: printf("%c", *s); break; + } + } + printf("
  • \n"); + + } else { + printf(" "); + x = 4; + while (*s) { + spc = strchr(s, ' '); + if (!spc) + spc = s + strlen(s); + if (75 < x + (spc - s)) { + printf("\n "); + x = 4; + } + x += spc - s + 1; + printf("%.*s ", spc - s, s); + if (*spc) + s = spc + 1; + else + s = spc; + } + printf("\n\n"); } - printf("\n\n"); } +