| 1 |
james |
76 |
# |
| 2 |
|
|
# This file is part of Sargasso, http://zamez.org/sargasso |
| 3 |
|
|
# Licensed under the GNU General Public License, |
| 4 |
|
|
# http://www.opensource.org/licenses/gpl-license |
| 5 |
|
|
# Copyright 2005 James Bursa <james@semichrome.net> |
| 6 |
|
|
# |
| 7 |
|
|
|
| 8 |
|
|
import sys |
| 9 |
|
|
import asyncore |
| 10 |
|
|
import feedparser |
| 11 |
|
|
import http_client |
| 12 |
|
|
|
| 13 |
|
|
feeds = [] |
| 14 |
|
|
STATUS_IDLE, STATUS_FETCHING, STATUS_DONE, STATUS_ERROR = range(4) |
| 15 |
|
|
|
| 16 |
|
|
http_client.request_headers += ['User-Agent: Sargasso', |
| 17 |
|
|
'Accept: ' + feedparser.ACCEPT_HEADER] |
| 18 |
|
|
|
| 19 |
|
|
|
| 20 |
|
|
class Feed: |
| 21 |
|
|
|
| 22 |
|
|
def __init__(self, url): |
| 23 |
|
|
self.url = url |
| 24 |
|
|
self.status = STATUS_IDLE |
| 25 |
|
|
self.data = '' |
| 26 |
|
|
self.feed = {} |
| 27 |
|
|
self.version = '' |
| 28 |
|
|
self.entries = [] |
| 29 |
|
|
self.new_entries = [] |
| 30 |
|
|
|
| 31 |
|
|
def poll(self): |
| 32 |
|
|
"""Do some work on this feed.""" |
| 33 |
|
|
if self.status == STATUS_IDLE: |
| 34 |
|
|
# start fetch |
| 35 |
|
|
try: |
| 36 |
|
|
self.fetch = http_client.do_request(self.url, self) |
| 37 |
|
|
except Exception, e: |
| 38 |
|
|
self.status = STATUS_ERROR |
| 39 |
|
|
self.error = str(e) |
| 40 |
|
|
return |
| 41 |
|
|
self.status = STATUS_FETCHING |
| 42 |
|
|
|
| 43 |
|
|
# http_client callbacks |
| 44 |
|
|
def http_header(self, client): |
| 45 |
|
|
print client.header |
| 46 |
|
|
if (client.status[1] != "200"): |
| 47 |
|
|
self.status = STATUS_ERROR |
| 48 |
|
|
self.error = '%s %s' % (client.status[1], client.status[2][:-2]) |
| 49 |
|
|
raise http_client.CloseConnection |
| 50 |
|
|
self.host = client.host |
| 51 |
|
|
|
| 52 |
|
|
def http_failed(self, client): |
| 53 |
|
|
self.status = STATUS_ERROR |
| 54 |
|
|
self.error = 'Connection failed' |
| 55 |
|
|
|
| 56 |
|
|
def receive(self, data): |
| 57 |
|
|
self.data += data |
| 58 |
|
|
|
| 59 |
|
|
def close(self): |
| 60 |
|
|
self.feedparse = fp = feedparser.parse(self.data) |
| 61 |
|
|
self.feed = fp.feed |
| 62 |
|
|
self.version = fp.version |
| 63 |
|
|
for entry in fp.entries: |
| 64 |
|
|
if entry not in self.entries: |
| 65 |
|
|
self.new_entries.append(entry) |
| 66 |
|
|
entries = fp.entries |
| 67 |
|
|
for old_entry in self.entries: |
| 68 |
|
|
if old_entry not in entries: |
| 69 |
|
|
entries.append(old_entry) |
| 70 |
|
|
self.entries = entries |
| 71 |
|
|
self.status = STATUS_DONE |
| 72 |
|
|
|
| 73 |
|
|
# update this feed |
| 74 |
|
|
def update(self): |
| 75 |
|
|
if self.status != STATUS_DONE: |
| 76 |
|
|
return |
| 77 |
|
|
self.status = STATUS_IDLE |
| 78 |
|
|
self.data = '' |
| 79 |
|
|
|
| 80 |
|
|
# show current status |
| 81 |
|
|
def __repr__(self): |
| 82 |
|
|
if self.status == STATUS_IDLE: |
| 83 |
|
|
return '%s IDLE' % self.url |
| 84 |
|
|
elif self.status == STATUS_FETCHING: |
| 85 |
|
|
return '%s FETCHING (%i bytes)' % (self.url, |
| 86 |
|
|
len(self.data)) |
| 87 |
|
|
elif self.status == STATUS_DONE: |
| 88 |
|
|
return '%s DONE (%i bytes)' % (self.url, |
| 89 |
|
|
len(self.data)) |
| 90 |
|
|
elif self.status == STATUS_ERROR: |
| 91 |
|
|
return '%s ERROR (%s)' % (self.url, self.error) |
| 92 |
|
|
|
| 93 |
|
|
|
| 94 |
|
|
def add_feed(url): |
| 95 |
|
|
"""Add a feed to the set of feeds.""" |
| 96 |
|
|
for feed in feeds: |
| 97 |
|
|
if feed.url == url: |
| 98 |
|
|
return |
| 99 |
|
|
feeds.append(Feed(url)) |
| 100 |
|
|
|
| 101 |
|
|
|
| 102 |
|
|
def poll(): |
| 103 |
|
|
"""Do some work on the set of feeds.""" |
| 104 |
|
|
for feed in feeds: |
| 105 |
|
|
feed.poll() |
| 106 |
|
|
asyncore.poll() |
| 107 |
|
|
|
| 108 |
|
|
|
| 109 |
|
|
def remove_feed(url): |
| 110 |
|
|
"""Remove a feed from the set of feeds.""" |
| 111 |
|
|
for feed in feeds: |
| 112 |
|
|
if feed.url == url: |
| 113 |
|
|
feeds.remove(feed) |
| 114 |
|
|
break |
| 115 |
|
|
|
| 116 |
|
|
|
| 117 |
|
|
|
| 118 |
|
|
|
| 119 |
|
|
if __name__ == '__main__': |
| 120 |
|
|
from time import sleep |
| 121 |
|
|
from pprint import pprint |
| 122 |
|
|
map(add_feed, sys.argv[1:]) |
| 123 |
|
|
while 1: |
| 124 |
|
|
poll() |
| 125 |
|
|
pprint(feeds) |
| 126 |
|
|
sleep(0.5) |