代码语言:javascript复制
http://www.newzbot.com/serverlist.php?since=ALL&orderby=kps&sortorder=desc&show_maxgroup=on&show_post=on&show_kps=on&show_created=on 这是可以找到当前有哪些服务器的地方网址,
这个项目的目的就是收集信息,并且将其生成一个html的报告(当然也可以是其他的形式的报告),完成代码如下
代码语言:javascript复制''''' Created on 2012-7-18 @author: mars ''' import nntplib from nntplib import NNTP from time import time,strftime,localtime from email import message_from_string from urllib import urlopen import textwrap import re day=24*60*60 def wrap(string,max=70): #make the string to the max linewidth return 'n'.join(textwrap.wrap(string)) 'n' class NewsAgent: #can get the new project and announce to the object fo the new from the souuce of the news def __init__(self): self.sources=[] self.destinations=[] def addSource(self,source): self.sources.append(source) def addDestination(self,dest): self.destinations.append(dest) def distribute(self): items=[] for source in self.sources: items.extend(source.getItems()) for dest in self.destinations: dest.receiveItems(items) class NewsItem: #simle news project including tile and text def __init__(self,title,body): self.title=title self.body=body class NNTPSource: #the nntp source def __init__(self,servername,group,window): self.servername=servername self.group=group self.window=window def getItems(self): start=localtime(time()-self.window*day) date=strftime('%y%m%d',start) hour=strftime('%H%M%S',start) server=NNTP(self.servername) ids=server.group(self.group)[2] #ids=server.newnews(self.group, date, hour)[1] for id in ids: lines=server.article(id)[3] message=message_from_string('n'.join(lines)) title=message['subject'] body=message.get_payload() if message.is_multipart(): body=body[0] yield NewsItem(title,body) server.quit() class SimpleWebSource: #user the re to fetch thr source from the webpage def __init__(self,url,titlePattern,bodyPattern): self.url=url self.titlePattern=re.compile(titlePattern) self.bodyPattern=re.compile(bodyPattern) def getItems(self): text=urlopen(self.url).read() titles=self.titlePattern.findall(text) bodies=self.bodyPattern.findall(text) for title,body in zip(titles,bodies): yield NewsItem(title.wrap(body)) class PlainDestination: #make it to the pure text def receiveItems(self,items): for item in items: print item.title #print '-'*len(subject) #print '-'*len(item.title) print item.body #print 'fuck&&&&&&&bitch' class HTMLDestination: # make it to the html def __init__(self, filename): self.filename = filename def receiveItems(self, items): out = open(self.filename, 'w') print >> out, """ <html> <head> <title>Today's News</title> </head> <body> <h1>Today's News</h1> """ print >> out, '<ul>' id = 0 for item in items: id = 1 print >> out, '<li><a href="#%i">%s</a></li>' % (id, item.title) print >> out, '</ul>' id = 0 for item in items: id = 1 print >> out, '<h2><a name="%i">%s</a></h2>' % (id, item.title) print >> out, '<pre>%s</pre>' % item.body print >> out, """ </body> </html> """ class runDefaultSetup(): #the souce can modify by yourself agent=NewsAgent() #bbc_url='http://www.chinanews.com/' bbc_url='http://www.bbc.co.uk/news/' #bbc_url='http://www.bbc.co.uk/text_only.stm' bbc_title=r'(?s)a href="[^"]*>s*<b>s*(.*?)s*</b>' bbc_body=r'(?s)</a>s*<br/>s*(.*?)s*<' bbc=SimpleWebSource(bbc_url,bbc_title,bbc_body) agent.addSource(bbc) #cong gmane.comp.python.announce get the nntpsource clpa_server='news.gmane.org' clpa_group='gmane.comp.python.apple' clpa_window=1 clpa=NNTPSource(clpa_server,clpa_group,clpa_window) agent.addSource(clpa) #add the text and html target agent.addDestination(PlainDestination()) agent.addDestination(HTMLDestination('news.html')) #public agent.distribute() if __name__=='__main__': runDefaultSetup()
其实这个程序呢 在第二版的教程上有,不过呢 那个给出的服务器不能用,所以在文章的开始的时候我就给出了 可以找到服务器地址的地方,比如我这里用的就是
clpa_server='news.gmane.org' clpa_group='gmane.comp.python.apple' 这个!
当然这段代码我也稍微说下,最开始的类NewsAgent,接着是NewsItem,NNTPSource,SimpleWebSource,PlainDestination,HTMLDestination和runDefaultSetup
程序一运行就开始跑的是runDefaultSetup,这里就将NewsAgent实例化为agent,SimpleWebSource的3个参数分别是url, title和body,然后将其实例化为bbc!
随后将bbc作为参数,调用agent的addsource。同样的道理完成了nntpsouce这一块。
最后就是就是调用agent.addDestionation。最后HTMLDestionation以news.html作为生成报告的html文本!