1 |
james |
76 |
# |
2 |
|
|
# This file is part of Sargasso, http://zamez.org/sargasso |
3 |
|
|
# Licensed under the GNU General Public License, |
4 |
|
|
# http://www.opensource.org/licenses/gpl-license |
5 |
|
|
# Copyright 2005 James Bursa <james@semichrome.net> |
6 |
|
|
# |
7 |
|
|
|
8 |
|
|
import sys |
9 |
|
|
import asyncore |
10 |
|
|
import feedparser |
11 |
|
|
import http_client |
12 |
|
|
|
13 |
|
|
feeds = [] |
14 |
|
|
STATUS_IDLE, STATUS_FETCHING, STATUS_DONE, STATUS_ERROR = range(4) |
15 |
|
|
|
16 |
|
|
http_client.request_headers += ['User-Agent: Sargasso', |
17 |
|
|
'Accept: ' + feedparser.ACCEPT_HEADER] |
18 |
|
|
|
19 |
|
|
|
20 |
|
|
class Feed: |
21 |
|
|
|
22 |
|
|
def __init__(self, url): |
23 |
|
|
self.url = url |
24 |
|
|
self.status = STATUS_IDLE |
25 |
|
|
self.data = '' |
26 |
|
|
self.feed = {} |
27 |
|
|
self.version = '' |
28 |
|
|
self.entries = [] |
29 |
|
|
self.new_entries = [] |
30 |
|
|
|
31 |
|
|
def poll(self): |
32 |
|
|
"""Do some work on this feed.""" |
33 |
|
|
if self.status == STATUS_IDLE: |
34 |
|
|
# start fetch |
35 |
|
|
try: |
36 |
|
|
self.fetch = http_client.do_request(self.url, self) |
37 |
|
|
except Exception, e: |
38 |
|
|
self.status = STATUS_ERROR |
39 |
|
|
self.error = str(e) |
40 |
|
|
return |
41 |
|
|
self.status = STATUS_FETCHING |
42 |
|
|
|
43 |
|
|
# http_client callbacks |
44 |
|
|
def http_header(self, client): |
45 |
|
|
print client.header |
46 |
|
|
if (client.status[1] != "200"): |
47 |
|
|
self.status = STATUS_ERROR |
48 |
|
|
self.error = '%s %s' % (client.status[1], client.status[2][:-2]) |
49 |
|
|
raise http_client.CloseConnection |
50 |
|
|
self.host = client.host |
51 |
|
|
|
52 |
|
|
def http_failed(self, client): |
53 |
|
|
self.status = STATUS_ERROR |
54 |
|
|
self.error = 'Connection failed' |
55 |
|
|
|
56 |
|
|
def receive(self, data): |
57 |
|
|
self.data += data |
58 |
|
|
|
59 |
|
|
def close(self): |
60 |
|
|
self.feedparse = fp = feedparser.parse(self.data) |
61 |
|
|
self.feed = fp.feed |
62 |
|
|
self.version = fp.version |
63 |
|
|
for entry in fp.entries: |
64 |
|
|
if entry not in self.entries: |
65 |
|
|
self.new_entries.append(entry) |
66 |
|
|
entries = fp.entries |
67 |
|
|
for old_entry in self.entries: |
68 |
|
|
if old_entry not in entries: |
69 |
|
|
entries.append(old_entry) |
70 |
|
|
self.entries = entries |
71 |
|
|
self.status = STATUS_DONE |
72 |
|
|
|
73 |
|
|
# update this feed |
74 |
|
|
def update(self): |
75 |
|
|
if self.status != STATUS_DONE: |
76 |
|
|
return |
77 |
|
|
self.status = STATUS_IDLE |
78 |
|
|
self.data = '' |
79 |
|
|
|
80 |
|
|
# show current status |
81 |
|
|
def __repr__(self): |
82 |
|
|
if self.status == STATUS_IDLE: |
83 |
|
|
return '%s IDLE' % self.url |
84 |
|
|
elif self.status == STATUS_FETCHING: |
85 |
|
|
return '%s FETCHING (%i bytes)' % (self.url, |
86 |
|
|
len(self.data)) |
87 |
|
|
elif self.status == STATUS_DONE: |
88 |
|
|
return '%s DONE (%i bytes)' % (self.url, |
89 |
|
|
len(self.data)) |
90 |
|
|
elif self.status == STATUS_ERROR: |
91 |
|
|
return '%s ERROR (%s)' % (self.url, self.error) |
92 |
|
|
|
93 |
|
|
|
94 |
|
|
def add_feed(url): |
95 |
|
|
"""Add a feed to the set of feeds.""" |
96 |
|
|
for feed in feeds: |
97 |
|
|
if feed.url == url: |
98 |
|
|
return |
99 |
|
|
feeds.append(Feed(url)) |
100 |
|
|
|
101 |
|
|
|
102 |
|
|
def poll(): |
103 |
|
|
"""Do some work on the set of feeds.""" |
104 |
|
|
for feed in feeds: |
105 |
|
|
feed.poll() |
106 |
|
|
asyncore.poll() |
107 |
|
|
|
108 |
|
|
|
109 |
|
|
def remove_feed(url): |
110 |
|
|
"""Remove a feed from the set of feeds.""" |
111 |
|
|
for feed in feeds: |
112 |
|
|
if feed.url == url: |
113 |
|
|
feeds.remove(feed) |
114 |
|
|
break |
115 |
|
|
|
116 |
|
|
|
117 |
|
|
|
118 |
|
|
|
119 |
|
|
if __name__ == '__main__': |
120 |
|
|
from time import sleep |
121 |
|
|
from pprint import pprint |
122 |
|
|
map(add_feed, sys.argv[1:]) |
123 |
|
|
while 1: |
124 |
|
|
poll() |
125 |
|
|
pprint(feeds) |
126 |
|
|
sleep(0.5) |