--- httplint/httplint.c 2003/12/15 15:07:31 40 +++ httplint/httplint.c 2003/12/18 00:52:50 44 @@ -36,7 +36,8 @@ int status_code; char error_buffer[CURL_ERROR_SIZE]; regex_t re_status_line, re_token, re_token_value, re_content_type, re_ugly, - re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade; + re_absolute_uri, re_etag, re_server, re_transfer_coding, re_upgrade, + re_rfc1123, re_rfc1036, re_asctime; void init(void); @@ -47,6 +48,8 @@ void check_status_line(const char *s); void check_header(const char *name, const char *value); bool parse_date(const char *s, struct tm *tm); +int month(const char *s); +time_t mktime_from_utc(struct tm *t); const char *skip_lws(const char *s); bool parse_list(const char *s, regex_t *preg, unsigned int n, unsigned int m, void (*callback)(const char *s, regmatch_t pmatch[])); @@ -174,10 +177,10 @@ "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~€-ÿ]*)$", REG_EXTENDED); regcomp_wrapper(&re_token, - "^([-0-9a-zA-Z_.]+)", + "^([-0-9a-zA-Z_.!]+)", REG_EXTENDED); regcomp_wrapper(&re_token_value, - "^([-0-9a-zA-Z_.]+)(=([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\"))?", + "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?", REG_EXTENDED); regcomp_wrapper(&re_content_type, "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*" @@ -191,7 +194,7 @@ "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$", REG_EXTENDED); regcomp_wrapper(&re_server, - "^((([-0-9a-zA-Z_.]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", + "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", REG_EXTENDED); regcomp_wrapper(&re_transfer_coding, "^([-0-9a-zA-Z_.]+)[ \t]*" @@ -202,7 +205,22 @@ "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$", REG_EXTENDED); regcomp_wrapper(&re_ugly, - "^[a-zA-Z0-9]+://[^/]+[/a-zA-Z0-9-_]*$", + "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$", + REG_EXTENDED); + regcomp_wrapper(&re_rfc1123, + "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) " + "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) " + "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", + REG_EXTENDED); + regcomp_wrapper(&re_rfc1036, + "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), " + "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-" + "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", + REG_EXTENDED); + regcomp_wrapper(&re_asctime, + "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) " + "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) " + "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$", REG_EXTENDED); } @@ -380,8 +398,11 @@ if (header) { header->count++; header->handler(value); - } else + } else if ((name[0] == 'X' || name[0] == 'x') && name[1] == '-') { + lookup("xheader"); + } else { lookup("nonstandard"); + } } @@ -390,32 +411,50 @@ */ bool parse_date(const char *s, struct tm *tm) { - char *r; + int r; int len = strlen(s); + regmatch_t pmatch[20]; if (len == 29) { /* RFC 1123 */ - r = strptime(s, "%a, %d %b %Y %H:%M:%S GMT", tm); - if (r == s + len) + r = regexec(&re_rfc1123, s, 20, pmatch, 0); + if (r == 0) { + tm->tm_mday = atoi(s + pmatch[2].rm_so); + tm->tm_mon = month(s + pmatch[3].rm_so); + tm->tm_year = atoi(s + pmatch[4].rm_so) - 1900; + tm->tm_hour = atoi(s + pmatch[5].rm_so); + tm->tm_min = atoi(s + pmatch[6].rm_so); + tm->tm_sec = atoi(s + pmatch[7].rm_so); return true; + } } else if (len == 24) { /* asctime() format */ - r = strptime(s, "%a %b %d %H:%M:%S %Y", tm); - if (r == s + len) { - lookup("asctime"); - return true; - } - r = strptime(s, "%a %b %d %H:%M:%S %Y", tm); - if (r == s + len) { + r = regexec(&re_asctime, s, 20, pmatch, 0); + if (r == 0) { + if (s[pmatch[3].rm_so] == ' ') + tm->tm_mday = atoi(s + pmatch[3].rm_so + 1); + else + tm->tm_mday = atoi(s + pmatch[3].rm_so); + tm->tm_mon = month(s + pmatch[2].rm_so); + tm->tm_year = atoi(s + pmatch[7].rm_so) - 1900; + tm->tm_hour = atoi(s + pmatch[4].rm_so); + tm->tm_min = atoi(s + pmatch[5].rm_so); + tm->tm_sec = atoi(s + pmatch[6].rm_so); lookup("asctime"); return true; } } else { /* RFC 1036 */ - r = strptime(s, "%a, %d-%b-%y %H:%M:%S GMT", tm); - if (r == s + len) { + r = regexec(&re_rfc1036, s, 20, pmatch, 0); + if (r == 0) { + tm->tm_mday = atoi(s + pmatch[2].rm_so); + tm->tm_mon = month(s + pmatch[3].rm_so); + tm->tm_year = 100 + atoi(s + pmatch[4].rm_so); + tm->tm_hour = atoi(s + pmatch[5].rm_so); + tm->tm_min = atoi(s + pmatch[6].rm_so); + tm->tm_sec = atoi(s + pmatch[7].rm_so); lookup("rfc1036"); return true; } @@ -428,6 +467,71 @@ /** + * Convert a month name to the month number. + */ +int month(const char *s) +{ + switch (s[0]) { + case 'J': + switch (s[1]) { + case 'a': + return 0; + case 'u': + return s[2] == 'n' ? 5 : 6; + } + case 'F': + return 1; + case 'M': + return s[2] == 'r' ? 2 : 4; + case 'A': + return s[1] == 'p' ? 3 : 7; + case 'S': + return 8; + case 'O': + return 9; + case 'N': + return 10; + case 'D': + return 11; + } + return 0; +} + + +/** + * UTC version of mktime, from + * http://lists.debian.org/deity/2002/deity-200204/msg00082.html + */ +time_t mktime_from_utc(struct tm *t) +{ + time_t tl, tb; + struct tm *tg; + + tl = mktime (t); + if (tl == -1) + { + t->tm_hour--; + tl = mktime (t); + if (tl == -1) + return -1; /* can't deal with output from strptime */ + tl += 3600; + } + tg = gmtime (&tl); + tg->tm_isdst = 0; + tb = mktime (tg); + if (tb == -1) + { + tg->tm_hour--; + tb = mktime (tg); + if (tb == -1) + return -1; /* can't deal with output from gmtime */ + tb += 3600; + } + return (tl - (tb - tl)); +} + + +/** * Skip optional LWS (linear white space) [2.2] */ const char *skip_lws(const char *s) @@ -681,7 +785,7 @@ time0 = time(0); if (!parse_date(s, &tm)) return; - time1 = mktime(&tm); + time1 = mktime_from_utc(&tm); diff = difftime(time0, time1); if (10 < fabs(diff)) @@ -716,7 +820,7 @@ time0 = time(0); if (!parse_date(s, &tm)) return; - time1 = mktime(&tm); + time1 = mktime_from_utc(&tm); diff = difftime(time1, time0); if (10 < diff) @@ -985,7 +1089,8 @@ { "via", "This header was added by a proxy, cache or gateway." }, { "wrongdate", "Warning: The server date-time differs from this system's " "date-time by more than 10 seconds. Check that both the " - "system clocks are correct." } + "system clocks are correct." }, + { "xheader", "This is an extension header. I don't know how to check it." } };