#!/usr/bin/env python
# -*- coding: utf-8 -*-
#---------------------------------------
# 程序:XML解析器
# 版本:01.0
# 作者:mupeng
# 日期:2013-12-18
# 语言:Python 2.7
# 功能:将xml解析成对应的html
# 注解:该程序用xml.sax模块的parse函数解析XML,并生成事件
# 继承ContentHandler并重写其事件处理函数
# Dispatcher主要用于相应标签的起始、结束事件的派发
#---------------------------------------
from xml.sax.handler import ContentHandler
from xml.sax import parse
class Dispatcher:
def dispatch(self, prefix, name, attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self, mname, None)
if callable(method): args = ()
else:
method = getattr(self, dname, None)
#args = name
#if prefix == 'start': args += attrs
if callable(method): method()
def startElement(self, name, attrs):
self.dispatch('start', name, attrs)
def endElement(self, name):
self.dispatch('end', name)
class Website(Dispatcher, ContentHandler):
def __init__(self):
self.fout = open('ddt_SAX.html', 'w')
self.imagein = False
self.desflag = False
self.item = False
self.title = ''
self.link = ''
self.guid = ''
self.url = ''
self.pubdate = ''
self.description = ''
self.temp = ''
self.prx = ''
def startChannel(self):
self.fout.write('''\n
\n
RSS-''')
def endChannel(self):
self.fout.write('''
|
<script><br> function GetTimeDiff(str)<br> {<br> if(str == '')<br> {<br> return '';<br> }</p>
<p> var pubDate = new Date(str);<br> var nowDate = new Date();<br> var diffMilSeconds = nowDate.valueOf()-pubDate.valueOf();<br> var days = diffMilSeconds/86400000;<br> days = parseInt(days);</p>
<p> diffMilSeconds = diffMilSeconds-(days*86400000);<br> var hours = diffMilSeconds/3600000;<br> hours = parseInt(hours);</p>
<p> diffMilSeconds = diffMilSeconds-(hours*3600000);<br> var minutes = diffMilSeconds/60000;<br> minutes = parseInt(minutes);</p>
<p> diffMilSeconds = diffMilSeconds-(minutes*60000);<br> var seconds = diffMilSeconds/1000;<br> seconds = parseInt(seconds);<br><br> var returnStr = "±±¾©·¢²¼Ê±¼ä£º" + pubDate.toLocaleString();</p>
<p> if(days > 0)<br> {<br> returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + days + "Ìì" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";<br> }<br> else if (hours > 0)<br> {<br> returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";<br> }<br> else if (minutes > 0)<br> {<br> returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + minutes + "·ÖÖÓ£©";<br> }</p>
<p> return returnStr;</p>
<p> }</p>
<p> function GetSpanText()<br> {<br> var pubDate;<br> var pubDateArray;<br> var spanArray = document.getElementsByTagName("span");</p>
<p> for(var i = 0; i < spanArray.length; i++)<BR> {<BR> pubDate = spanArray[i].innerHTML;<BR> document.getElementsByTagName("span")[i].innerHTML = GetTimeDiff(pubDate); <BR> }<BR> }</P>
<P> GetSpanText();<BR> </script>
''')
self.fout.close()
def characters(self, chars):
if chars.strip():
#chars = chars.strip()
self.temp += chars
#print self.temp
def startTitle(self):
if self.item:
self.fout.write('''
\n\n ''')
def endTitle(self):
if not self.imagein and not self.item: self.title = self.temp self.temp = '' self.fout.write(self.title.encode('gb2312'))
#self.title = self.temp self.fout.write(''' \n\n\n\n <script>\n</p>
<p> function copyLink()<br> {<br> clipboardData.setData("Text",window.location.href);<br> alert("RSSÁ´½ÓÒѾ¸´ÖƵ½¼ôÌù°å");<br> }</p>
<p> function subscibeLink()<br> {<br> var str = window.location.pathname;<br> while(str.match(/^\//))<br> {<br> str = str.replace(/^\//,"");<br> }<br> window.open("http://rss.sina.com.cn/my_sina_web_rss_news.html?url=" + str,"_self");</p>
<p> }<br> </script>\n |