# # This file is part of Sargasso, http://zamez.org/sargasso # Licensed under the GNU General Public License, # http://www.opensource.org/licenses/gpl-license # Copyright 2005 James Bursa # import sys import asyncore import feedparser import http_client feeds = [] STATUS_IDLE, STATUS_FETCHING, STATUS_DONE, STATUS_ERROR = range(4) http_client.request_headers += ['User-Agent: Sargasso', 'Accept: ' + feedparser.ACCEPT_HEADER] class Feed: def __init__(self, url): self.url = url self.status = STATUS_IDLE self.data = '' self.feed = {} self.version = '' self.entries = [] self.new_entries = [] def poll(self): """Do some work on this feed.""" if self.status == STATUS_IDLE: # start fetch try: self.fetch = http_client.do_request(self.url, self) except Exception, e: self.status = STATUS_ERROR self.error = str(e) return self.status = STATUS_FETCHING # http_client callbacks def http_header(self, client): print client.header if (client.status[1] != "200"): self.status = STATUS_ERROR self.error = '%s %s' % (client.status[1], client.status[2][:-2]) raise http_client.CloseConnection self.host = client.host def http_failed(self, client): self.status = STATUS_ERROR self.error = 'Connection failed' def receive(self, data): self.data += data def close(self): self.feedparse = fp = feedparser.parse(self.data) self.feed = fp.feed self.version = fp.version for entry in fp.entries: if entry not in self.entries: self.new_entries.append(entry) entries = fp.entries for old_entry in self.entries: if old_entry not in entries: entries.append(old_entry) self.entries = entries self.status = STATUS_DONE # update this feed def update(self): if self.status != STATUS_DONE: return self.status = STATUS_IDLE self.data = '' # show current status def __repr__(self): if self.status == STATUS_IDLE: return '%s IDLE' % self.url elif self.status == STATUS_FETCHING: return '%s FETCHING (%i bytes)' % (self.url, len(self.data)) elif self.status == STATUS_DONE: return '%s DONE (%i bytes)' % (self.url, len(self.data)) elif self.status == STATUS_ERROR: return '%s ERROR (%s)' % (self.url, self.error) def add_feed(url): """Add a feed to the set of feeds.""" for feed in feeds: if feed.url == url: return feeds.append(Feed(url)) def poll(): """Do some work on the set of feeds.""" for feed in feeds: feed.poll() asyncore.poll() def remove_feed(url): """Remove a feed from the set of feeds.""" for feed in feeds: if feed.url == url: feeds.remove(feed) break if __name__ == '__main__': from time import sleep from pprint import pprint map(add_feed, sys.argv[1:]) while 1: poll() pprint(feeds) sleep(0.5)