1 |
# |
2 |
# This file is part of Sargasso, http://zamez.org/sargasso |
3 |
# Licensed under the GNU General Public License, |
4 |
# http://www.opensource.org/licenses/gpl-license |
5 |
# Copyright 2005 James Bursa <james@semichrome.net> |
6 |
# |
7 |
|
8 |
import sys |
9 |
import asyncore |
10 |
import feedparser |
11 |
import http_client |
12 |
|
13 |
feeds = [] |
14 |
STATUS_IDLE, STATUS_FETCHING, STATUS_DONE, STATUS_ERROR = range(4) |
15 |
|
16 |
http_client.request_headers += ['User-Agent: Sargasso', |
17 |
'Accept: ' + feedparser.ACCEPT_HEADER] |
18 |
|
19 |
|
20 |
class Feed: |
21 |
|
22 |
def __init__(self, url): |
23 |
self.url = url |
24 |
self.status = STATUS_IDLE |
25 |
self.data = '' |
26 |
self.feed = {} |
27 |
self.version = '' |
28 |
self.entries = [] |
29 |
self.new_entries = [] |
30 |
|
31 |
def poll(self): |
32 |
"""Do some work on this feed.""" |
33 |
if self.status == STATUS_IDLE: |
34 |
# start fetch |
35 |
try: |
36 |
self.fetch = http_client.do_request(self.url, self) |
37 |
except Exception, e: |
38 |
self.status = STATUS_ERROR |
39 |
self.error = str(e) |
40 |
return |
41 |
self.status = STATUS_FETCHING |
42 |
|
43 |
# http_client callbacks |
44 |
def http_header(self, client): |
45 |
print client.header |
46 |
if (client.status[1] != "200"): |
47 |
self.status = STATUS_ERROR |
48 |
self.error = '%s %s' % (client.status[1], client.status[2][:-2]) |
49 |
raise http_client.CloseConnection |
50 |
self.host = client.host |
51 |
|
52 |
def http_failed(self, client): |
53 |
self.status = STATUS_ERROR |
54 |
self.error = 'Connection failed' |
55 |
|
56 |
def receive(self, data): |
57 |
self.data += data |
58 |
|
59 |
def close(self): |
60 |
self.feedparse = fp = feedparser.parse(self.data) |
61 |
self.feed = fp.feed |
62 |
self.version = fp.version |
63 |
for entry in fp.entries: |
64 |
if entry not in self.entries: |
65 |
self.new_entries.append(entry) |
66 |
entries = fp.entries |
67 |
for old_entry in self.entries: |
68 |
if old_entry not in entries: |
69 |
entries.append(old_entry) |
70 |
self.entries = entries |
71 |
self.status = STATUS_DONE |
72 |
|
73 |
# update this feed |
74 |
def update(self): |
75 |
if self.status != STATUS_DONE: |
76 |
return |
77 |
self.status = STATUS_IDLE |
78 |
self.data = '' |
79 |
|
80 |
# show current status |
81 |
def __repr__(self): |
82 |
if self.status == STATUS_IDLE: |
83 |
return '%s IDLE' % self.url |
84 |
elif self.status == STATUS_FETCHING: |
85 |
return '%s FETCHING (%i bytes)' % (self.url, |
86 |
len(self.data)) |
87 |
elif self.status == STATUS_DONE: |
88 |
return '%s DONE (%i bytes)' % (self.url, |
89 |
len(self.data)) |
90 |
elif self.status == STATUS_ERROR: |
91 |
return '%s ERROR (%s)' % (self.url, self.error) |
92 |
|
93 |
|
94 |
def add_feed(url): |
95 |
"""Add a feed to the set of feeds.""" |
96 |
for feed in feeds: |
97 |
if feed.url == url: |
98 |
return |
99 |
feeds.append(Feed(url)) |
100 |
|
101 |
|
102 |
def poll(): |
103 |
"""Do some work on the set of feeds.""" |
104 |
for feed in feeds: |
105 |
feed.poll() |
106 |
asyncore.poll() |
107 |
|
108 |
|
109 |
def remove_feed(url): |
110 |
"""Remove a feed from the set of feeds.""" |
111 |
for feed in feeds: |
112 |
if feed.url == url: |
113 |
feeds.remove(feed) |
114 |
break |
115 |
|
116 |
|
117 |
|
118 |
|
119 |
if __name__ == '__main__': |
120 |
from time import sleep |
121 |
from pprint import pprint |
122 |
map(add_feed, sys.argv[1:]) |
123 |
while 1: |
124 |
poll() |
125 |
pprint(feeds) |
126 |
sleep(0.5) |