./0000755000076500007650000000000010764320642015727 5ustar00massimodipierromassimodipierro00000000000000ABOUT0000644000076500007650000000036310763221761016372 0ustar00massimodipierromassimodipierro00000000000000This is the Podcast Peer RSS reader and recommendation program. It will read and parse RSS for podcasts plus exchange podcast recommendations based on user preference via remote procedure call for SQL database records. Developed with web2py.LICENSE0000644000076500007650000000147210763217247016610 0ustar00massimodipierromassimodipierro00000000000000Copyright (c) 2007, Tony G. Sardella All rights reserved. BSD License THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.__init__.py0000644000076500007650000000000010762757741017705 0ustar00massimodipierromassimodipierro00000000000000__init__.pyc0000644000076500007650000000031410753726222020046 0ustar00massimodipierromassimodipierro00000000000000 굗Gc@sdS(N((((su/Users/tony127/Desktop/web2py framework /web2py 8/web2py.app/Contents/Resources/applications/podcast_peer/__init__.pysscache/0000755000076500007650000000000010764320653016637 5ustar00massimodipierromassimodipierro00000000000000cache/cache.lock0000644000076500007650000000000010764320653020542 0ustar00massimodipierromassimodipierro00000000000000controllers/0000755000076500007650000000000010764320642020140 5ustar00massimodipierromassimodipierro00000000000000controllers/appadmin.py0000644000076500007650000001244010764320642022304 0ustar00massimodipierromassimodipierro00000000000000########################################################### ### make sure administrator is on localhost ############################################################ import os from gluon.contenttype import contenttype from gluon.fileutils import check_credentials if request.env.remote_addr!=request.env.http_host.split(':')[0]: raise HTTP(400) if not check_credentials(request): redirect('/admin') response.view='appadmin.html' response.menu=[['design',False,'/admin/default/design/%s' % request.application], ['db',False,'/%s/%s/index' % (request.application, request.controller)], ['state',False,'/%s/%s/state' % (request.application, request.controller)]] ########################################################### ### list all tables in database ############################################################ def index(): import types as _types _dbs={} for _key,_value in globals().items(): if isinstance(_value,SQLDB): tables=_dbs[_key]=[] for _tablename in _value.tables: tables.append((_key,_tablename)) return dict(dbs=_dbs) ########################################################### ### insert a new record ############################################################ def insert(): try: dbname=request.args[0] db=eval(dbname) table=request.args[1] form=SQLFORM(db[table]) except: redirect(URL(r=request,f='index')) if form.accepts(request.vars,session): response.flash='new record inserted' return dict(form=form) ########################################################### ### list all records in table and insert new record ############################################################ def download(): filename=request.args[0] response.headers['Content-Type']=contenttype(filename) return open(os.path.join(request.folder,'uploads/','%s' % filename),'rb').read() def csv(): import gluon.contenttype, csv, cStringIO response.headers['Content-Type']=gluon.contenttype.contenttype('.csv') try: dbname=request.vars.dbname db=eval(dbname) records=db(request.vars.query).select() except: redirect(URL(r=request,f='index')) s=cStringIO.StringIO() writer = csv.writer(s) writer.writerow(records.colnames) c=range(len(records.colnames)) for i in range(len(records)): writer.writerow([records.response[i][j] for j in c]) ### FILL HERE return s.getvalue() def import_csv(table,file): import csv reader = csv.reader(file) colnames=None for line in reader: if not colnames: colnames=[x[x.find('.')+1:] for x in line] c=[i for i in range(len(line)) if colnames[i]!='id'] else: items=[(colnames[i],line[i]) for i in c] table.insert(**dict(items)) def select(): try: dbname=request.args[0] db=eval(dbname) if not request.vars.query: table=request.args[1] query='%s.id>0' % table else: query=request.vars.query except: redirect(URL(r=request,f='index')) if request.vars.csvfile!=None: try: import_csv(db[table],request.vars.csvfile.file) response.flash='data uploaded' except: response.flash='unable to parse csv file' if request.vars.delete_all and request.vars.delete_all_sure=='yes': try: db(query).delete() response.flash='records deleted' except: response.flash='invalid SQL FILTER' elif request.vars.update_string: try: env=dict(db=db,query=query) exec('db(query).update('+request.vars.update_string+')') in env response.flash='records updated' except: response.flash='invalid SQL FILTER or UPDATE STRING' if request.vars.start: start=int(request.vars.start) else: start=0 limitby=(start,start+100) try: records=db(query).select(limitby=limitby) except: response.flash='invalid SQL FILTER' return dict(records='no records',nrecords=0,query=query,start=0) linkto=URL(r=request,f='update/%s'% (dbname)) upload=URL(r=request,f='download') return dict(start=start,query=query,\ nrecords=len(records),\ records=SQLTABLE(records,linkto,upload,_class='sortable')) ########################################################### ### edit delete one record ############################################################ def update(): try: dbname=request.args[0] db=eval(dbname) table=request.args[1] except: redirect(URL(r=request,f='index')) try: id=int(request.args[2]) record=db(db[table].id==id).select()[0] except: redirect(URL(r=request,f='select/%s/%s'%(dbname,table))) form=SQLFORM(db[table],record,deletable=True, linkto=URL(r=request,f='select/'+dbname), upload=URL(r=request,f='download/')) if form.accepts(request.vars,session): response.flash='done!' redirect(URL(r=request,f='select/%s/%s'%(dbname,table))) return dict(form=form) ########################################################### ### get global variables ############################################################ def state(): return dict(state=request.env) controllers/default.py0000644000076500007650000002027610764320642022145 0ustar00massimodipierromassimodipierro00000000000000from gluon.fileutils import check_credentials import gluon.contrib.feedparser as feedparser import time import socket try: session.authorized = check_credentials(request) except:pass response.menu=[['Home',True,'/%s/default/index' % request.application], ['Google',False,'http://www.google.com'], ['Podcasts',False,'http://podcast.com'], ['ITunes Podcasts', False, 'http://www.apple.com/itunes/store/podcasts.html'], ['Podcast Directory', False, 'http://www.podcastingnews.com/topics/Podcast_Directory.html'], ['Podcast Alley', False,'http://www.podcastalley.com'], ['Podcasting info', False,'http://en.wikipedia.org/wiki/Podcasting'], ['Yahoo Podcast Search', False, 'http://audio.search.yahoo.com/audio'], ['Digg podcasts', False,'http://www.digg.com/podcasts']] ############################################################ ## index function ############################################################ def index(): try: page_id=int(request.args[0]) this_page=db(db.page.id==page_id).select(db.page.ALL)[0] except: this_page=db().select(db.page.ALL)[0] page_id=this_page.id try: d = feedparser.parse(this_page.url) size = len(d['entries']) e = d.entries[0] except: session.flash= 'exception b, invalid url. switched to default.' this_page.update_record(url="http://rss.streamos.com/streamos/rss/genfeed.php?feedid=360&groupname=nature") redirect(URL(r=request,f='index')) try: rows = db().select(db.servers.ALL) pages=db(db.page.parent==0).select(db.page.ALL,orderby=db.page.indx) except: session.flash= 'exception c' redirect(URL(r=request,f='index')) return dict(pages=pages, this_page=this_page, servers = rows, entry = e) ############################################################ ## create podcast ############################################################ def create(): last_page=db(db.page.parent==0).select(orderby=~db.page.indx)[0] db.page.insert(title='',indx=last_page.indx+1) redirect(URL(r=request,f='index')) ############################################################ ## delete podcast ############################################################ def delete(): try: page_id=int(request.args[0]) except: page_id=1 this_page=db(db.page.id==page_id).select()[0] if this_page.indx != 1: db(db.page.id==page_id).delete() session.flash= this_page.title + " deleted." else: session.flash= "Base index, cannot delete. Edit only." redirect(URL(r=request,f='index')) ############################################################ ## edit podcast ############################################################ def edit(): try: page_id=int(request.args[0]) except: page_id=1 this_page=db(db.page.id==page_id).select()[0] myform = SQLFORM(db.page, this_page, fields = ['title','url','rating','genre']) if myform.accepts(request.vars): session.flash='changes accepted' redirect(URL(r=request,f='index',args=[this_page.id])) if myform.errors: response.flash='invalid form' pages=db(db.page.parent==0).select(db.page.ALL,orderby=db.page.indx) return dict(pages=pages,form=myform) ############################################################ ## move selection up ############################################################ def moveup(): try: page_id=int(request.args[0]) this_page=db(db.page.id==page_id).select()[0] other_page=db((db.page.parent==0)&(db.page.indxthis_page.indx)).select(orderby=db.page.indx,limitby=(0,1))[0] tmp=this_page.indx this_page.update_record(indx=other_page.indx) other_page.update_record(indx=tmp) except: pass redirect(URL(r=request,f='index')) ############################################################ ## grab db ############################################################ def grab(): import xmlrpclib try: client = xmlrpclib.ServerProxy(request.vars.url) columns,rows=client.search('page.id>0') j=len('user.') except: session.flash='invalid server' redirect(URL(r=request,f='index')) ses = db(db.session.id==1).select()[0] rank, genre = prefs(ses.rating, ses.genre) try: for row in rows: items=dict([(columns[i][j:],row[i]) for i in range(len(columns)) if columns[i][-3:]!='id']) rank1, genre1 = prefs(items['rating'], items['genre']) #session.flash = int(items['rating']) if ((genre1 == genre) and (int(items['rating']) == rank)): db(db.page.title == items['title']).delete() db.page.insert(**items) except: pass session.flash= "records transfered" return dict(records=db(db.page.id>0).select(), link = A('go back to main page',_href=URL(r=request,f='index'))) ############################################################ ## episodes ############################################################ def episodes(): try: page_id=int(request.args[0]) except: page_id=1 this_page=db(db.page.id==page_id).select()[0] try: d = feedparser.parse(this_page.url) e = d['entries'] lengt = len(d['entries']) except: redirect(URL(r=request,f='index')) return dict(entries = e, page = this_page, d = d) ############################################################ ## add servers ############################################################ def add(): form=SQLFORM(db.servers) if form.accepts(request.vars,session): response.flash='new server added' return dict(form = form) ############################################################ ### manage server ############################################################ def manage(): id=int(request.args[0]) record = db(db.servers.id==id).select()[0] ses = db(db.session.id==1).select()[0] form=SQLFORM(db.servers, record, deletable=True) ses.update_record(genre = record.Select_genre) ses.update_record(rating = record.Get_by_rating) if form.accepts(request.vars,session): response.flash='done!' return dict(form = form, record = record) ############################################################ ### connect to server ############################################################ def connect(): id=int(request.args[0]) record=db(db.servers.id==id).select()[0] ses = db(db.session.id==1).select()[0] ses.update_record(genre = record.Select_genre) ses.update_record(rating = record.Get_by_rating) url = record.url url = 'http://127.0.0.1:8000/podcast_peer_7/default/grab?url=' + url redirect(url) ############################################################ ### determine preference as numerical value ############################################################ def prefs(usr_rating, usr_genre): rank = 0 genre = 0 if usr_rating == 'one': rank = 1 elif usr_rating == 'two': rank = 2 elif usr_rating == 'three': rank = 3 elif usr_rating == 'four': rank = 4 elif usr_rating == 'five': rank = 5 if usr_genre == 'info': genre = 1 elif usr_genre == 'music': genre = 2 elif usr_genre == 'comedy ': genre = 3 return(rank, genre)controllers/xmlrpc.py0000644000076500007650000000025110764320642022015 0ustar00massimodipierromassimodipierro00000000000000import SimpleXMLRPCServer def search(query): rows=db(query).select() return rows.colnames, rows.response def handle(): return response.xmlrpc(request,[search])databases/0000755000076500007650000000000010764321172017520 5ustar00massimodipierromassimodipierro00000000000000databases/db.db0000644000076500007650000002600010764321172020412 0ustar00massimodipierromassimodipierro00000000000000SQLite format 3@   z{MtablesessionsessionCREATE TABLE session( id INTEGER PRIMARY KEY AUTOINCREMENT, genre CHAR(32), rating CHAR(32) )g%tableserversserversCREATE TABLE servers( id INTEGER PRIMARY KEY AUTOINCREMENT, title CHAR(32), url CHAR(32) , Get_by_rating CHAR(1), genre CHAR(32), Select_genre CHAR(32), server_id REFERENCES page(id) ON DELETE CASCADE)stabledocdocCREATE TABLE doc( id INTEGER PRIMARY KEY AUTOINCREMENT, name CHAR(32), timestamp TIMESTAMP, filename CHAR(64) )P++Ytablesqlite_sequencesqlite_sequenceCREATE TABLE sqlite_sequence(name,seq)1EtablepagepageCREATE TABLE page( id INTEGER PRIMARY KEY AUTOINCREMENT, title CHAR(32), text TEXT, timestamp TIMESTAMP, parent INTEGER , idnx INTEGER, indx INTEGER, feed CHAR(32), url CHAR(32), ur CHAR(32), rating INTEGER, genre CHAR(32), votes INTEGER, mean_vote DOUBLE, Get_by_rating CHAR(1))  !   session page1 servers  y^ [Z)udog server 100http://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handle/Two starsmusicy^!s#big serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handleThree starsmusicy!s!O!sjoe serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handleonemusicO!sdog serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handlethreemusicfoQ )udog server 100http://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handle/twoinfody nfooofo %%Error servererror server?!sbig serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/haP!scat serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handlefourcomedy Q )sawesome serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handlefiveinfocN!sbig serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handlefourmusicM!sjoe serverhttp://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handleonemusicdy >p@[U 3[Tom Green2008-02-16 23:42:22 http://www.tomgreen.com/podcast/tgl.rsscomedyhZ1 3cOnion News Network2008-02-23 16:06:15http://feeds.theonion.com/OnionNewsNetwork/comedyk/ 3mJade Tree Records2008-02-25 18:57:39http://www.jadetree.com/rss2.php?feed=mp3podcastmusicV ' 3KXLR8R Podcast2008-02-11 09:25:31http://feeds.xlr8r.com/xlr8rmp3musicyC 3uDavid Duriez (Neo podcasts)2008-02-15 23:54:20 http://feeds.feedburner.com/David-duriez-fg-dj-radiomusicQ% 3CKEXP podcast2008-02-23 13:26:56http://kexp.org/podcast.xmlmusicW/ 3EXLR8R m4a Podcast2008-02-23 15:26:45http://feeds.xlr8r.com/xlr8rmusicc 3oXLR8R TV2008-02-23 15:37:27http://revision3.com/xlr8rtv/feed/quicktime-largemusic ]c) 3cFabric Podcast2008-02-24 09:25:00http://feeds.fabriclondon.com/fabricpodcastmusic`' 3aNew Scientist2008-02-23 10:58:31http://www.newscientist.com/podcastfeed.nsinfo\ 3aGeek News2008-02-10 18:43:36http://www.geeknewscentral.com/podcast.xmlinfo\# 3 _Science Mag2008-02-10 20:38:06http://www.sciencemag.org/rss/podcast.xmlinfox A 3wSpitzer Space Telescope HD2008-02-23 15:44:39http://www.spitzer.caltech.edu/features/hd/hdfeed.xmlinfog!- 3iTotally Rad show2008-02-23 17:19:02http://revision3.com/trs/feed/quicktime-large/info comedy four C[ey1 3Washington Post HD2008-02-23 16:48:12http://www.washingtonpost.com/wp-srv/mmedia/hd_podcast.xmlinfoz'9 3Wake Up to Money (BBC)2008-02-15 05:00:23 http://downloads.bbc.co.uk/podcasts/fivelive/money/rss.xmlinfo%+ 3%Nature Podcasts2008-02-10 18:32:56http://rss.streamos.com/streamos/rss/genfeed.php?feedid=360&groupname=natureinfo\ 3aGeek News2008-02-10 18:43:36http://www.geeknewscentral.com/podcast.xmlinfo\# 3 _Science Mag2008-02-10 20:38:06http://www.sciencemag.org/rss/podcast.xmlinfox A 3wSpitzer Space Telescope HD2008-02-23 15:44:39http://www.spitzer.caltech.edu/features/hd/hdfeed.xmlinfog!- 3iTotally Rad show2008-02-23 17:19:02http://revision3.com/trs/feed/quicktime-large/info 1 3k&' 3wNerd TV (PBS)2008-02-15 04:57:25 http://www.pbs.org/cringely/nerdtv/rss/nerdtv-mp3.xmlinfo {A 3wSpitzer Space Telescope HD2008-02-23 15:44:39http://www.spitzer.caltech.edu/features/hd/hdfeed.xmlinfoz9 3Wake Up to Money (BBC)2008-02-15 05:00:23 http://downloads.bbc.co.uk/podcasts/fivelive/money/rss.xmlinfok~' 3wNerd TV (PBS)2008-02-15 04:57:25 http://www.pbs.org/cringely/nerdtv/rss/nerdtv-mp3.xmlinfo3# 3 _Science Mag2008-02-10 20:38:06http://www.sciencemag.org/rss/podcast.xmlinfo3_ 3aGeek News2008-02-10 18:43:36http://www.geeknewscentral.com/podcast.xmlinfo{+ 3%Nature Podcasts2008-02-10 18:32:56http://rss.streamos.com/streamos/rss/genfeed.php?feedid=360&groupname=natureinfo' 3aNew Scientist2008-02-23 10:58:31http://www.newscientist.com/podcastfeed.nsinfoj- 3iTotally Rad show2008-02-23 17:19:02http://revision3.com/trs/feed/quicktime-large/info ?v11 3Washington Post HD2008-02-23 16:48:12http://www.washingtonpost.com/wp-srv/mmedia/hd_podcast.xmlinfo`. 3eThe Onion2008-03-03 05:38:51http://feeds.theonion.com/theonion/radionewscomedy1 3n/- 3uSouthern Records2008-02-25 18:36:03http://www.southern.com/southern/podcast-us/feed.rssmusic%+ 3%Nature Podcasts2008-02-10 18:32:56http://rss.streamos.com/streamos/rss/genfeed.php?feedid=360&groupname=natureinfok&' 3wNerd TV (PBS)2008-02-15 04:57:25 http://www.pbs.org/cringely/nerdtv/rss/nerdtv-mp3.xmlinfoz'9 3Wake Up to Money (BBC)2008-02-15 05:00:23 http://downloads.bbc.co.uk/podcasts/fivelive/money/rss.xmlinfoq- 3uSouthern Records2008-02-25 18:36:03http://www.southern.com/southern/podcast-us/feed.rssmusicdatabases/f6db3e6e66dc0b6aea6b47a87ef29c1f_doc.table0000644000076500007650000000024410747742547026345 0ustar00massimodipierromassimodipierro00000000000000(dp1 S'timestamp' p2 S'TIMESTAMP' p3 sS'filename' p4 S'CHAR(64)' p5 sS'id' p6 S'INTEGER PRIMARY KEY AUTOINCREMENT' p7 sS'name' p8 S'CHAR(32)' p9 s.databases/f6db3e6e66dc0b6aea6b47a87ef29c1f_page.table0000644000076500007650000000072410764321551026502 0ustar00massimodipierromassimodipierro00000000000000(dp1 S'feed' p2 S'CHAR(32)' p3 sS'rating' p4 S'INTEGER' p5 sS'votes' p6 S'INTEGER' p7 sS'mean_vote' p8 S'DOUBLE' p9 sS'idnx' p10 S'INTEGER' p11 sS'parent' p12 S'INTEGER' p13 sS'title' p14 S'CHAR(32)' p15 sS'url' p16 S'CHAR(32)' p17 sS'text' p18 S'TEXT' p19 sS'id' p20 S'INTEGER PRIMARY KEY AUTOINCREMENT' p21 sS'indx' p22 S'INTEGER' p23 sS'ur' p24 S'CHAR(32)' p25 sS'timestamp' p26 S'TIMESTAMP' p27 sS'genre' p28 S'CHAR(32)' p29 sS'Get_by_rating' p30 S'CHAR(1)' p31 s.databases/f6db3e6e66dc0b6aea6b47a87ef29c1f_servers.table0000644000076500007650000000042310764321551027253 0ustar00massimodipierromassimodipierro00000000000000(dp1 S'server_id' p2 S'REFERENCES page(id) ON DELETE CASCADE' p3 sS'title' p4 S'CHAR(32)' p5 sS'url' p6 S'CHAR(32)' p7 sS'id' p8 S'INTEGER PRIMARY KEY AUTOINCREMENT' p9 sS'genre' p10 S'CHAR(32)' p11 sS'Select_genre' p12 S'CHAR(32)' p13 sS'Get_by_rating' p14 S'CHAR(1)' p15 s.databases/f6db3e6e66dc0b6aea6b47a87ef29c1f_session.table0000644000076500007650000000016110760533101027234 0ustar00massimodipierromassimodipierro00000000000000(dp1 S'genre' p2 S'CHAR(32)' p3 sS'rating' p4 S'CHAR(32)' p5 sS'id' p6 S'INTEGER PRIMARY KEY AUTOINCREMENT' p7 s.databases/sql.log0000644000076500007650000000401710760533101021015 0ustar00massimodipierromassimodipierro00000000000000timestamp: 2008-01-29T18:31:35.061000 CREATE TABLE page( id INTEGER PRIMARY KEY AUTOINCREMENT, title CHAR(32), text TEXT, timestamp TIMESTAMP, parent INTEGER ); success! timestamp: 2008-01-29T18:31:35.093000 CREATE TABLE doc( id INTEGER PRIMARY KEY AUTOINCREMENT, name CHAR(32), timestamp TIMESTAMP, filename CHAR(64) ); success! timestamp: 2008-01-29T20:33:36.561000 ALTER TABLE page ADD COLUMN index INTEGER; timestamp: 2008-01-29T20:34:05.718000 ALTER TABLE page ADD COLUMN idnx INTEGER; success! timestamp: 2008-01-29T20:38:36.866000 ALTER TABLE page ADD COLUMN indx INTEGER; success! timestamp: 2008-02-10T11:24:46.908823 CREATE TABLE servers( id INTEGER PRIMARY KEY AUTOINCREMENT, title CHAR(32), url CHAR(32) ); success! timestamp: 2008-02-10T15:47:57.401479 ALTER TABLE page ADD COLUMN feed CHAR(32); success! timestamp: 2008-02-10T16:43:10.058586 ALTER TABLE page ADD COLUMN url CHAR(32); success! timestamp: 2008-02-10T16:49:40.860808 ALTER TABLE page ADD COLUMN ur CHAR(32); success! timestamp: 2008-02-16T17:14:22.261197 ALTER TABLE page ADD COLUMN rating INTEGER; success! timestamp: 2008-02-16T17:15:22.203159 ALTER TABLE page ADD COLUMN genre CHAR(32); success! timestamp: 2008-02-16T17:44:51.128612 ALTER TABLE page ADD COLUMN votes INTEGER; success! timestamp: 2008-02-16T17:44:51.131577 ALTER TABLE page ADD COLUMN mean_vote DOUBLE; success! timestamp: 2008-02-25T01:07:16.471301 ALTER TABLE page ADD COLUMN Get_by_rating CHAR(1); success! timestamp: 2008-02-25T01:23:20.806113 ALTER TABLE servers ADD COLUMN Get_by_rating CHAR(1); success! timestamp: 2008-02-25T02:14:21.427607 ALTER TABLE servers ADD COLUMN genre CHAR(32); success! timestamp: 2008-02-25T02:15:58.912188 ALTER TABLE servers ADD COLUMN Select_genre CHAR(32); success! timestamp: 2008-02-25T04:18:24.561843 ALTER TABLE servers ADD COLUMN server_id REFERENCES page(id) ON DELETE CASCADE; success! timestamp: 2008-02-25T04:36:17.767817 CREATE TABLE session( id INTEGER PRIMARY KEY AUTOINCREMENT, genre CHAR(32), rating CHAR(32) ); success! embed/0000755000076500007650000000000010764320642016646 5ustar00massimodipierromassimodipierro00000000000000embed/swfobject.js0000644000076500007650000002072610756211152021175 0ustar00massimodipierromassimodipierro00000000000000/** * SWFObject v2.0: Flash Player detection and embed - http://blog.deconcept.com/swfobject/ * * SWFObject is (c) 2006 Geoff Stearns and is released under the MIT License: * http://www.opensource.org/licenses/mit-license.php * */ if(typeof deconcept == "undefined") var deconcept = new Object(); if(typeof deconcept.util == "undefined") deconcept.util = new Object(); if(typeof deconcept.SWFObjectUtil == "undefined") deconcept.SWFObjectUtil = new Object(); deconcept.SWFObject = function(swf, id, w, h, ver, c, quality, xiRedirectUrl, redirectUrl, detectKey) { if (!document.getElementById) { return; } this.DETECT_KEY = detectKey ? detectKey : 'detectflash'; this.skipDetect = deconcept.util.getRequestParameter(this.DETECT_KEY); this.params = new Object(); this.variables = new Object(); this.attributes = new Array(); if(swf) { this.setAttribute('swf', swf); } if(id) { this.setAttribute('id', id); } if(w) { this.setAttribute('width', w); } if(h) { this.setAttribute('height', h); } if(ver) { this.setAttribute('version', new deconcept.PlayerVersion(ver.toString().split("."))); } this.installedVer = deconcept.SWFObjectUtil.getPlayerVersion(); if (!window.opera && document.all && this.installedVer.major > 7) { // only add the onunload cleanup if the Flash Player version supports External Interface and we are in IE deconcept.SWFObject.doPrepUnload = true; } if(c) { this.addParam('bgcolor', c); } var q = quality ? quality : 'high'; this.addParam('quality', q); this.setAttribute('useExpressInstall', false); this.setAttribute('doExpressInstall', false); var xir = (xiRedirectUrl) ? xiRedirectUrl : window.location; this.setAttribute('xiRedirectUrl', xir); this.setAttribute('redirectUrl', ''); if(redirectUrl) { this.setAttribute('redirectUrl', redirectUrl); } } deconcept.SWFObject.prototype = { useExpressInstall: function(path) { this.xiSWFPath = !path ? "expressinstall.swf" : path; this.setAttribute('useExpressInstall', true); }, setAttribute: function(name, value){ this.attributes[name] = value; }, getAttribute: function(name){ return this.attributes[name]; }, addParam: function(name, value){ this.params[name] = value; }, getParams: function(){ return this.params; }, addVariable: function(name, value){ this.variables[name] = value; }, getVariable: function(name){ return this.variables[name]; }, getVariables: function(){ return this.variables; }, getVariablePairs: function(){ var variablePairs = new Array(); var key; var variables = this.getVariables(); for(key in variables){ variablePairs.push(key +"="+ variables[key]); } return variablePairs; }, getSWFHTML: function() { var swfNode = ""; if (navigator.plugins && navigator.mimeTypes && navigator.mimeTypes.length) { // netscape plugin architecture if (this.getAttribute("doExpressInstall")) { this.addVariable("MMplayerType", "PlugIn"); this.setAttribute('swf', this.xiSWFPath); } swfNode = ' 0){ swfNode += 'flashvars="'+ pairs +'"'; } swfNode += '/>'; } else { // PC IE if (this.getAttribute("doExpressInstall")) { this.addVariable("MMplayerType", "ActiveX"); this.setAttribute('swf', this.xiSWFPath); } swfNode = ''; swfNode += ''; var params = this.getParams(); for(var key in params) { swfNode += ''; } var pairs = this.getVariablePairs().join("&"); if(pairs.length > 0) {swfNode += '';} swfNode += ""; } return swfNode; }, write: function(elementId){ if(this.getAttribute('useExpressInstall')) { // check to see if we need to do an express install var expressInstallReqVer = new deconcept.PlayerVersion([6,0,65]); if (this.installedVer.versionIsValid(expressInstallReqVer) && !this.installedVer.versionIsValid(this.getAttribute('version'))) { this.setAttribute('doExpressInstall', true); this.addVariable("MMredirectURL", escape(this.getAttribute('xiRedirectUrl'))); document.title = document.title.slice(0, 47) + " - Flash Player Installation"; this.addVariable("MMdoctitle", document.title); } } if(this.skipDetect || this.getAttribute('doExpressInstall') || this.installedVer.versionIsValid(this.getAttribute('version'))){ var n = (typeof elementId == 'string') ? document.getElementById(elementId) : elementId; n.innerHTML = this.getSWFHTML(); return true; }else{ if(this.getAttribute('redirectUrl') != "") { document.location.replace(this.getAttribute('redirectUrl')); } } return false; } } /* ---- detection functions ---- */ deconcept.SWFObjectUtil.getPlayerVersion = function(){ var PlayerVersion = new deconcept.PlayerVersion([0,0,0]); if(navigator.plugins && navigator.mimeTypes.length){ var x = navigator.plugins["Shockwave Flash"]; if(x && x.description) { PlayerVersion = new deconcept.PlayerVersion(x.description.replace(/([a-zA-Z]|\s)+/, "").replace(/(\s+r|\s+b[0-9]+)/, ".").split(".")); } }else{ // do minor version lookup in IE, but avoid fp6 crashing issues // see http://blog.deconcept.com/2006/01/11/getvariable-setvariable-crash-internet-explorer-flash-6/ try{ var axo = new ActiveXObject("ShockwaveFlash.ShockwaveFlash.7"); }catch(e){ try { var axo = new ActiveXObject("ShockwaveFlash.ShockwaveFlash.6"); PlayerVersion = new deconcept.PlayerVersion([6,0,21]); axo.AllowScriptAccess = "always"; // throws if player version < 6.0.47 (thanks to Michael Williams @ Adobe for this code) } catch(e) { if (PlayerVersion.major == 6) { return PlayerVersion; } } try { axo = new ActiveXObject("ShockwaveFlash.ShockwaveFlash"); } catch(e) {} } if (axo != null) { PlayerVersion = new deconcept.PlayerVersion(axo.GetVariable("$version").split(" ")[1].split(",")); } } return PlayerVersion; } deconcept.PlayerVersion = function(arrVersion){ this.major = arrVersion[0] != null ? parseInt(arrVersion[0]) : 0; this.minor = arrVersion[1] != null ? parseInt(arrVersion[1]) : 0; this.rev = arrVersion[2] != null ? parseInt(arrVersion[2]) : 0; } deconcept.PlayerVersion.prototype.versionIsValid = function(fv){ if(this.major < fv.major) return false; if(this.major > fv.major) return true; if(this.minor < fv.minor) return false; if(this.minor > fv.minor) return true; if(this.rev < fv.rev) return false; return true; } /* ---- get value of query string param ---- */ deconcept.util = { getRequestParameter: function(param) { var q = document.location.search || document.location.hash; if(q) { var pairs = q.substring(1).split("&"); for (var i=0; i < pairs.length; i++) { if (pairs[i].substring(0, pairs[i].indexOf("=")) == param) { return pairs[i].substring((pairs[i].indexOf("=")+1)); } } } return ""; } } /* fix for video streaming bug */ deconcept.SWFObjectUtil.cleanupSWFs = function() { var objects = document.getElementsByTagName("OBJECT"); for (var i=0; i < objects.length; i++) { objects[i].style.display = 'none'; for (var x in objects[i]) { if (typeof objects[i][x] == 'function') { objects[i][x] = function(){}; } } } } // fixes bug in fp9 see http://blog.deconcept.com/2006/07/28/swfobject-143-released/ if (deconcept.SWFObject.doPrepUnload) { deconcept.SWFObjectUtil.prepUnload = function() { __flash_unloadHandler = function(){}; __flash_savedUnloadHandler = function(){}; window.attachEvent("onunload", deconcept.SWFObjectUtil.cleanupSWFs); } window.attachEvent("onbeforeunload", deconcept.SWFObjectUtil.prepUnload); } /* add Array.push if needed (ie5) */ if (Array.prototype.push == null) { Array.prototype.push = function(item) { this[this.length] = item; return this.length; }} /* add some aliases for ease of use/backwards compatibility */ var getQueryParamValue = deconcept.util.getRequestParameter; var FlashObject = deconcept.SWFObject; // for legacy support var SWFObject = deconcept.SWFObject; errors/0000755000076500007650000000000010764320642017106 5ustar00massimodipierromassimodipierro00000000000000languages/0000755000076500007650000000000010764320642017540 5ustar00massimodipierromassimodipierro00000000000000languages/it..py0000644000076500007650000000007110764320642020602 0ustar00massimodipierromassimodipierro00000000000000{ 'Hello World':'', 'Welcome to web2py':'', 'doh!':'', } languages/it.py0000644000076500007650000000012710764320642020526 0ustar00massimodipierromassimodipierro00000000000000{ 'Hello World':'Salve Mondo', 'Welcome to web2py':'Ciao da wek2py', 'doh!':'oops!', } models/0000755000076500007650000000000010764320642017055 5ustar00massimodipierromassimodipierro00000000000000models/db.py0000644000076500007650000000227110764320642020016 0ustar00massimodipierromassimodipierro00000000000000import datetime now=datetime.datetime.today() db=SQLDB("sqlite://db.db") db.define_table('page', SQLField('title'), SQLField('text','text',default=''), SQLField('timestamp','datetime',default=now), SQLField('url','text', default = 'http://rss.streamos.com/streamos/rss/genfeed.php?feedid=360&groupname=nature'), SQLField('indx','integer'), SQLField('parent','integer',default=0), SQLField('rating','double',default=5), SQLField('genre')) db.define_table('doc', SQLField('name'), SQLField('timestamp','datetime',default=now), SQLField('filename','upload')) db.define_table('servers', SQLField('title'), SQLField('url', 'text', default='http://127.0.0.1:8000/podcast_peer_2a/xmlrpc/handle/'), SQLField('Select_genre',default = 'comedy '), SQLField('Get_by_rating',default = 'five')) db.define_table('session', SQLField('genre'), SQLField('rating')) db.page.title.requires=IS_NOT_EMPTY() db.doc.name.requires=IS_NOT_EMPTY() db.servers.Select_genre.requires=IS_IN_SET(['music','info','comedy ']) db.servers.Get_by_rating.requires=IS_IN_SET(['one','two','three','four','five']) page_labels={ 'title':'Page Title', 'indx':'Page Number', 'text':'Page Text' }modules/0000755000076500007650000000000010764320642017242 5ustar00massimodipierromassimodipierro00000000000000modules/__init__.py0000644000076500007650000000000010762757741021355 0ustar00massimodipierromassimodipierro00000000000000modules/__init__.pyc0000644000076500007650000000032410753726222021517 0ustar00massimodipierromassimodipierro00000000000000 굗Gc@sdS(N((((s}/Users/tony127/Desktop/web2py framework /web2py 8/web2py.app/Contents/Resources/applications/podcast_peer/modules/__init__.pyssmodules/feedparser.py0000755000076500007650000036006510764320642021751 0ustar00massimodipierromassimodipierro00000000000000#!/usr/bin/env python """Universal feed parser Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds Visit http://feedparser.org/ for the latest version Visit http://feedparser.org/docs/ for the latest documentation Required: Python 2.1 or later Recommended: Python 2.3 or later Recommended: CJKCodecs and iconv_codec """ __version__ = "4.1"# + "$Revision: 1.92 $"[11:15] + "-cvs" __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.""" __author__ = "Mark Pilgrim " __contributors__ = ["Jason Diamond ", "John Beimler ", "Fazal Majid ", "Aaron Swartz ", "Kevin Marks "] _debug = 0 # HTTP "User-Agent" header to send to servers when downloading feeds. # If you are embedding feedparser in a larger application, you should # change this to your application name and URL. USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__ # HTTP "Accept" header to send to servers when downloading feeds. If you don't # want to send an Accept header, set this to None. ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" # List of preferred XML parsers, by SAX driver name. These will be tried first, # but if they're not installed, Python will keep searching through its own list # of pre-installed parsers until it finds one that supports everything we need. PREFERRED_XML_PARSERS = ["drv_libxml2"] # If you want feedparser to automatically run HTML markup through HTML Tidy, set # this to 1. Requires mxTidy # or utidylib . TIDY_MARKUP = 0 # List of Python interfaces for HTML Tidy, in order of preference. Only useful # if TIDY_MARKUP = 1 PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] # ---------- required modules (should come with any Python distribution) ---------- import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2 try: from cStringIO import StringIO as _StringIO except: from StringIO import StringIO as _StringIO # ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- # gzip is included with most Python distributions, but may not be available if you compiled your own try: import gzip except: gzip = None try: import zlib except: zlib = None # If a real XML parser is available, feedparser will attempt to use it. feedparser has # been tested with the built-in SAX parser, PyXML, and libxml2. On platforms where the # Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some # versions of FreeBSD), feedparser will quietly fall back on regex-based parsing. try: import xml.sax xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers from xml.sax.saxutils import escape as _xmlescape _XML_AVAILABLE = 1 except: _XML_AVAILABLE = 0 def _xmlescape(data): data = data.replace('&', '&') data = data.replace('>', '>') data = data.replace('<', '<') return data # base64 support for Atom feeds that contain embedded binary data try: import base64, binascii except: base64 = binascii = None # cjkcodecs and iconv_codec provide support for more character encodings. # Both are available from http://cjkpython.i18n.org/ try: import cjkcodecs.aliases except: pass try: import iconv_codec except: pass # chardet library auto-detects character encodings # Download from http://chardet.feedparser.org/ try: import chardet if _debug: import chardet.constants chardet.constants._debug = 1 except: chardet = None # ---------- don't touch these ---------- class ThingsNobodyCaresAboutButMe(Exception): pass class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass class UndeclaredNamespace(Exception): pass sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') sgmllib.special = re.compile('' % (tag, ''.join([' %s="%s"' % t for t in attrs])), escape=0) # match namespaces if tag.find(':') <> -1: prefix, suffix = tag.split(':', 1) else: prefix, suffix = '', tag prefix = self.namespacemap.get(prefix, prefix) if prefix: prefix = prefix + '_' # special hack for better tracking of empty textinput/image elements in illformed feeds if (not prefix) and tag not in ('title', 'link', 'description', 'name'): self.intextinput = 0 if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'): self.inimage = 0 # call special handler (if defined) or default handler methodname = '_start_' + prefix + suffix try: method = getattr(self, methodname) return method(attrsD) except AttributeError: return self.push(prefix + suffix, 1) def unknown_endtag(self, tag): if _debug: sys.stderr.write('end %s\n' % tag) # match namespaces if tag.find(':') <> -1: prefix, suffix = tag.split(':', 1) else: prefix, suffix = '', tag prefix = self.namespacemap.get(prefix, prefix) if prefix: prefix = prefix + '_' # call special handler (if defined) or default handler methodname = '_end_' + prefix + suffix try: method = getattr(self, methodname) method() except AttributeError: self.pop(prefix + suffix) # track inline content if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): # element declared itself as escaped markup, but it isn't really self.contentparams['type'] = 'application/xhtml+xml' if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': tag = tag.split(':')[-1] self.handle_data('' % tag, escape=0) # track xml:base and xml:lang going out of scope if self.basestack: self.basestack.pop() if self.basestack and self.basestack[-1]: self.baseuri = self.basestack[-1] if self.langstack: self.langstack.pop() if self.langstack: # and (self.langstack[-1] is not None): self.lang = self.langstack[-1] def handle_charref(self, ref): # called for each character reference, e.g. for ' ', ref will be '160' if not self.elementstack: return ref = ref.lower() if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'): text = '&#%s;' % ref else: if ref[0] == 'x': c = int(ref[1:], 16) else: c = int(ref) text = unichr(c).encode('utf-8') self.elementstack[-1][2].append(text) def handle_entityref(self, ref): # called for each entity reference, e.g. for '©', ref will be 'copy' if not self.elementstack: return if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): text = '&%s;' % ref else: # entity resolution graciously donated by Aaron Swartz def name2cp(k): import htmlentitydefs if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] k = htmlentitydefs.entitydefs[k] if k.startswith('&#') and k.endswith(';'): return int(k[2:-1]) # not in latin-1 return ord(k) try: name2cp(ref) except KeyError: text = '&%s;' % ref else: text = unichr(name2cp(ref)).encode('utf-8') self.elementstack[-1][2].append(text) def handle_data(self, text, escape=1): # called for each block of plain text, i.e. outside of any tag and # not containing any character or entity references if not self.elementstack: return if escape and self.contentparams.get('type') == 'application/xhtml+xml': text = _xmlescape(text) self.elementstack[-1][2].append(text) def handle_comment(self, text): # called for each comment, e.g. pass def handle_pi(self, text): # called for each processing instruction, e.g. pass def handle_decl(self, text): pass def parse_declaration(self, i): # override internal declaration handler to handle CDATA blocks if _debug: sys.stderr.write('entering parse_declaration\n') if self.rawdata[i:i+9] == '', i) if k == -1: k = len(self.rawdata) self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0) return k+3 else: k = self.rawdata.find('>', i) return k+1 def mapContentType(self, contentType): contentType = contentType.lower() if contentType == 'text': contentType = 'text/plain' elif contentType == 'html': contentType = 'text/html' elif contentType == 'xhtml': contentType = 'application/xhtml+xml' return contentType def trackNamespace(self, prefix, uri): loweruri = uri.lower() if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version: self.version = 'rss090' if loweruri == 'http://purl.org/rss/1.0/' and not self.version: self.version = 'rss10' if loweruri == 'http://www.w3.org/2005/atom' and not self.version: self.version = 'atom10' if loweruri.find('backend.userland.com/rss') <> -1: # match any backend.userland.com namespace uri = 'http://backend.userland.com/rss' loweruri = uri if self._matchnamespaces.has_key(loweruri): self.namespacemap[prefix] = self._matchnamespaces[loweruri] self.namespacesInUse[self._matchnamespaces[loweruri]] = uri else: self.namespacesInUse[prefix or ''] = uri def resolveURI(self, uri): return _urljoin(self.baseuri or '', uri) def decodeEntities(self, element, data): return data def push(self, element, expectingText): self.elementstack.append([element, expectingText, []]) def pop(self, element, stripWhitespace=1): if not self.elementstack: return if self.elementstack[-1][0] != element: return element, expectingText, pieces = self.elementstack.pop() output = ''.join(pieces) if stripWhitespace: output = output.strip() if not expectingText: return output # decode base64 content if base64 and self.contentparams.get('base64', 0): try: output = base64.decodestring(output) except binascii.Error: pass except binascii.Incomplete: pass # resolve relative URIs if (element in self.can_be_relative_uri) and output: output = self.resolveURI(output) # decode entities within embedded markup if not self.contentparams.get('base64', 0): output = self.decodeEntities(element, output) # remove temporary cruft from contentparams try: del self.contentparams['mode'] except KeyError: pass try: del self.contentparams['base64'] except KeyError: pass # resolve relative URIs within embedded markup if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types: if element in self.can_contain_relative_uris: output = _resolveRelativeURIs(output, self.baseuri, self.encoding) # sanitize embedded markup if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types: if element in self.can_contain_dangerous_markup: output = _sanitizeHTML(output, self.encoding) if self.encoding and type(output) != type(u''): try: output = unicode(output, self.encoding) except: pass # categories/tags/keywords/whatever are handled in _end_category if element == 'category': return output # store output in appropriate place(s) if self.inentry and not self.insource: if element == 'content': self.entries[-1].setdefault(element, []) contentparams = copy.deepcopy(self.contentparams) contentparams['value'] = output self.entries[-1][element].append(contentparams) elif element == 'link': self.entries[-1][element] = output if output: self.entries[-1]['links'][-1]['href'] = output else: if element == 'description': element = 'summary' self.entries[-1][element] = output if self.incontent: contentparams = copy.deepcopy(self.contentparams) contentparams['value'] = output self.entries[-1][element + '_detail'] = contentparams elif (self.infeed or self.insource) and (not self.intextinput) and (not self.inimage): context = self._getContext() if element == 'description': element = 'subtitle' context[element] = output if element == 'link': context['links'][-1]['href'] = output elif self.incontent: contentparams = copy.deepcopy(self.contentparams) contentparams['value'] = output context[element + '_detail'] = contentparams return output def pushContent(self, tag, attrsD, defaultContentType, expectingText): self.incontent += 1 self.contentparams = FeedParserDict({ 'type': self.mapContentType(attrsD.get('type', defaultContentType)), 'language': self.lang, 'base': self.baseuri}) self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams) self.push(tag, expectingText) def popContent(self, tag): value = self.pop(tag) self.incontent -= 1 self.contentparams.clear() return value def _mapToStandardPrefix(self, name): colonpos = name.find(':') if colonpos <> -1: prefix = name[:colonpos] suffix = name[colonpos+1:] prefix = self.namespacemap.get(prefix, prefix) name = prefix + ':' + suffix return name def _getAttribute(self, attrsD, name): return attrsD.get(self._mapToStandardPrefix(name)) def _isBase64(self, attrsD, contentparams): if attrsD.get('mode', '') == 'base64': return 1 if self.contentparams['type'].startswith('text/'): return 0 if self.contentparams['type'].endswith('+xml'): return 0 if self.contentparams['type'].endswith('/xml'): return 0 return 1 def _itsAnHrefDamnIt(self, attrsD): href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) if href: try: del attrsD['url'] except KeyError: pass try: del attrsD['uri'] except KeyError: pass attrsD['href'] = href return attrsD def _save(self, key, value): context = self._getContext() context.setdefault(key, value) def _start_rss(self, attrsD): versionmap = {'0.91': 'rss091u', '0.92': 'rss092', '0.93': 'rss093', '0.94': 'rss094'} if not self.version: attr_version = attrsD.get('version', '') version = versionmap.get(attr_version) if version: self.version = version elif attr_version.startswith('2.'): self.version = 'rss20' else: self.version = 'rss' def _start_dlhottitles(self, attrsD): self.version = 'hotrss' def _start_channel(self, attrsD): self.infeed = 1 self._cdf_common(attrsD) _start_feedinfo = _start_channel def _cdf_common(self, attrsD): if attrsD.has_key('lastmod'): self._start_modified({}) self.elementstack[-1][-1] = attrsD['lastmod'] self._end_modified() if attrsD.has_key('href'): self._start_link({}) self.elementstack[-1][-1] = attrsD['href'] self._end_link() def _start_feed(self, attrsD): self.infeed = 1 versionmap = {'0.1': 'atom01', '0.2': 'atom02', '0.3': 'atom03'} if not self.version: attr_version = attrsD.get('version') version = versionmap.get(attr_version) if version: self.version = version else: self.version = 'atom' def _end_channel(self): self.infeed = 0 _end_feed = _end_channel def _start_image(self, attrsD): self.inimage = 1 self.push('image', 0) context = self._getContext() context.setdefault('image', FeedParserDict()) def _end_image(self): self.pop('image') self.inimage = 0 def _start_textinput(self, attrsD): self.intextinput = 1 self.push('textinput', 0) context = self._getContext() context.setdefault('textinput', FeedParserDict()) _start_textInput = _start_textinput def _end_textinput(self): self.pop('textinput') self.intextinput = 0 _end_textInput = _end_textinput def _start_author(self, attrsD): self.inauthor = 1 self.push('author', 1) _start_managingeditor = _start_author _start_dc_author = _start_author _start_dc_creator = _start_author _start_itunes_author = _start_author def _end_author(self): self.pop('author') self.inauthor = 0 self._sync_author_detail() _end_managingeditor = _end_author _end_dc_author = _end_author _end_dc_creator = _end_author _end_itunes_author = _end_author def _start_itunes_owner(self, attrsD): self.inpublisher = 1 self.push('publisher', 0) def _end_itunes_owner(self): self.pop('publisher') self.inpublisher = 0 self._sync_author_detail('publisher') def _start_contributor(self, attrsD): self.incontributor = 1 context = self._getContext() context.setdefault('contributors', []) context['contributors'].append(FeedParserDict()) self.push('contributor', 0) def _end_contributor(self): self.pop('contributor') self.incontributor = 0 def _start_dc_contributor(self, attrsD): self.incontributor = 1 context = self._getContext() context.setdefault('contributors', []) context['contributors'].append(FeedParserDict()) self.push('name', 0) def _end_dc_contributor(self): self._end_name() self.incontributor = 0 def _start_name(self, attrsD): self.push('name', 0) _start_itunes_name = _start_name def _end_name(self): value = self.pop('name') if self.inpublisher: self._save_author('name', value, 'publisher') elif self.inauthor: self._save_author('name', value) elif self.incontributor: self._save_contributor('name', value) elif self.intextinput: context = self._getContext() context['textinput']['name'] = value _end_itunes_name = _end_name def _start_width(self, attrsD): self.push('width', 0) def _end_width(self): value = self.pop('width') try: value = int(value) except: value = 0 if self.inimage: context = self._getContext() context['image']['width'] = value def _start_height(self, attrsD): self.push('height', 0) def _end_height(self): value = self.pop('height') try: value = int(value) except: value = 0 if self.inimage: context = self._getContext() context['image']['height'] = value def _start_url(self, attrsD): self.push('href', 1) _start_homepage = _start_url _start_uri = _start_url def _end_url(self): value = self.pop('href') if self.inauthor: self._save_author('href', value) elif self.incontributor: self._save_contributor('href', value) elif self.inimage: context = self._getContext() context['image']['href'] = value elif self.intextinput: context = self._getContext() context['textinput']['link'] = value _end_homepage = _end_url _end_uri = _end_url def _start_email(self, attrsD): self.push('email', 0) _start_itunes_email = _start_email def _end_email(self): value = self.pop('email') if self.inpublisher: self._save_author('email', value, 'publisher') elif self.inauthor: self._save_author('email', value) elif self.incontributor: self._save_contributor('email', value) _end_itunes_email = _end_email def _getContext(self): if self.insource: context = self.sourcedata elif self.inentry: context = self.entries[-1] else: context = self.feeddata return context def _save_author(self, key, value, prefix='author'): context = self._getContext() context.setdefault(prefix + '_detail', FeedParserDict()) context[prefix + '_detail'][key] = value self._sync_author_detail() def _save_contributor(self, key, value): context = self._getContext() context.setdefault('contributors', [FeedParserDict()]) context['contributors'][-1][key] = value def _sync_author_detail(self, key='author'): context = self._getContext() detail = context.get('%s_detail' % key) if detail: name = detail.get('name') email = detail.get('email') if name and email: context[key] = '%s (%s)' % (name, email) elif name: context[key] = name elif email: context[key] = email else: author = context.get(key) if not author: return emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author) if not emailmatch: return email = emailmatch.group(0) # probably a better way to do the following, but it passes all the tests author = author.replace(email, '') author = author.replace('()', '') author = author.strip() if author and (author[0] == '('): author = author[1:] if author and (author[-1] == ')'): author = author[:-1] author = author.strip() context.setdefault('%s_detail' % key, FeedParserDict()) context['%s_detail' % key]['name'] = author context['%s_detail' % key]['email'] = email def _start_subtitle(self, attrsD): self.pushContent('subtitle', attrsD, 'text/plain', 1) _start_tagline = _start_subtitle _start_itunes_subtitle = _start_subtitle def _end_subtitle(self): self.popContent('subtitle') _end_tagline = _end_subtitle _end_itunes_subtitle = _end_subtitle def _start_rights(self, attrsD): self.pushContent('rights', attrsD, 'text/plain', 1) _start_dc_rights = _start_rights _start_copyright = _start_rights def _end_rights(self): self.popContent('rights') _end_dc_rights = _end_rights _end_copyright = _end_rights def _start_item(self, attrsD): self.entries.append(FeedParserDict()) self.push('item', 0) self.inentry = 1 self.guidislink = 0 id = self._getAttribute(attrsD, 'rdf:about') if id: context = self._getContext() context['id'] = id self._cdf_common(attrsD) _start_entry = _start_item _start_product = _start_item def _end_item(self): self.pop('item') self.inentry = 0 _end_entry = _end_item def _start_dc_language(self, attrsD): self.push('language', 1) _start_language = _start_dc_language def _end_dc_language(self): self.lang = self.pop('language') _end_language = _end_dc_language def _start_dc_publisher(self, attrsD): self.push('publisher', 1) _start_webmaster = _start_dc_publisher def _end_dc_publisher(self): self.pop('publisher') self._sync_author_detail('publisher') _end_webmaster = _end_dc_publisher def _start_published(self, attrsD): self.push('published', 1) _start_dcterms_issued = _start_published _start_issued = _start_published def _end_published(self): value = self.pop('published') self._save('published_parsed', _parse_date(value)) _end_dcterms_issued = _end_published _end_issued = _end_published def _start_updated(self, attrsD): self.push('updated', 1) _start_modified = _start_updated _start_dcterms_modified = _start_updated _start_pubdate = _start_updated _start_dc_date = _start_updated def _end_updated(self): value = self.pop('updated') parsed_value = _parse_date(value) self._save('updated_parsed', parsed_value) _end_modified = _end_updated _end_dcterms_modified = _end_updated _end_pubdate = _end_updated _end_dc_date = _end_updated def _start_created(self, attrsD): self.push('created', 1) _start_dcterms_created = _start_created def _end_created(self): value = self.pop('created') self._save('created_parsed', _parse_date(value)) _end_dcterms_created = _end_created def _start_expirationdate(self, attrsD): self.push('expired', 1) def _end_expirationdate(self): self._save('expired_parsed', _parse_date(self.pop('expired'))) def _start_cc_license(self, attrsD): self.push('license', 1) value = self._getAttribute(attrsD, 'rdf:resource') if value: self.elementstack[-1][2].append(value) self.pop('license') def _start_creativecommons_license(self, attrsD): self.push('license', 1) def _end_creativecommons_license(self): self.pop('license') def _addTag(self, term, scheme, label): context = self._getContext() tags = context.setdefault('tags', []) if (not term) and (not scheme) and (not label): return value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label}) if value not in tags: tags.append(FeedParserDict({'term': term, 'scheme': scheme, 'label': label})) def _start_category(self, attrsD): if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD)) term = attrsD.get('term') scheme = attrsD.get('scheme', attrsD.get('domain')) label = attrsD.get('label') self._addTag(term, scheme, label) self.push('category', 1) _start_dc_subject = _start_category _start_keywords = _start_category def _end_itunes_keywords(self): for term in self.pop('itunes_keywords').split(): self._addTag(term, 'http://www.itunes.com/', None) def _start_itunes_category(self, attrsD): self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None) self.push('category', 1) def _end_category(self): value = self.pop('category') if not value: return context = self._getContext() tags = context['tags'] if value and len(tags) and not tags[-1]['term']: tags[-1]['term'] = value else: self._addTag(value, None, None) _end_dc_subject = _end_category _end_keywords = _end_category _end_itunes_category = _end_category def _start_cloud(self, attrsD): self._getContext()['cloud'] = FeedParserDict(attrsD) def _start_link(self, attrsD): attrsD.setdefault('rel', 'alternate') attrsD.setdefault('type', 'text/html') attrsD = self._itsAnHrefDamnIt(attrsD) if attrsD.has_key('href'): attrsD['href'] = self.resolveURI(attrsD['href']) expectingText = self.infeed or self.inentry or self.insource context = self._getContext() context.setdefault('links', []) context['links'].append(FeedParserDict(attrsD)) if attrsD['rel'] == 'enclosure': self._start_enclosure(attrsD) if attrsD.has_key('href'): expectingText = 0 if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): context['link'] = attrsD['href'] else: self.push('link', expectingText) _start_producturl = _start_link def _end_link(self): value = self.pop('link') context = self._getContext() if self.intextinput: context['textinput']['link'] = value if self.inimage: context['image']['link'] = value _end_producturl = _end_link def _start_guid(self, attrsD): self.guidislink = (attrsD.get('ispermalink', 'true') == 'true') self.push('id', 1) def _end_guid(self): value = self.pop('id') self._save('guidislink', self.guidislink and not self._getContext().has_key('link')) if self.guidislink: # guid acts as link, but only if 'ispermalink' is not present or is 'true', # and only if the item doesn't already have a link element self._save('link', value) def _start_title(self, attrsD): self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) _start_dc_title = _start_title _start_media_title = _start_title def _end_title(self): value = self.popContent('title') context = self._getContext() if self.intextinput: context['textinput']['title'] = value elif self.inimage: context['image']['title'] = value _end_dc_title = _end_title _end_media_title = _end_title def _start_description(self, attrsD): context = self._getContext() if context.has_key('summary'): self._summaryKey = 'content' self._start_content(attrsD) else: self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource) def _start_abstract(self, attrsD): self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) def _end_description(self): if self._summaryKey == 'content': self._end_content() else: value = self.popContent('description') context = self._getContext() if self.intextinput: context['textinput']['description'] = value elif self.inimage: context['image']['description'] = value self._summaryKey = None _end_abstract = _end_description def _start_info(self, attrsD): self.pushContent('info', attrsD, 'text/plain', 1) _start_feedburner_browserfriendly = _start_info def _end_info(self): self.popContent('info') _end_feedburner_browserfriendly = _end_info def _start_generator(self, attrsD): if attrsD: attrsD = self._itsAnHrefDamnIt(attrsD) if attrsD.has_key('href'): attrsD['href'] = self.resolveURI(attrsD['href']) self._getContext()['generator_detail'] = FeedParserDict(attrsD) self.push('generator', 1) def _end_generator(self): value = self.pop('generator') context = self._getContext() if context.has_key('generator_detail'): context['generator_detail']['name'] = value def _start_admin_generatoragent(self, attrsD): self.push('generator', 1) value = self._getAttribute(attrsD, 'rdf:resource') if value: self.elementstack[-1][2].append(value) self.pop('generator') self._getContext()['generator_detail'] = FeedParserDict({'href': value}) def _start_admin_errorreportsto(self, attrsD): self.push('errorreportsto', 1) value = self._getAttribute(attrsD, 'rdf:resource') if value: self.elementstack[-1][2].append(value) self.pop('errorreportsto') def _start_summary(self, attrsD): context = self._getContext() if context.has_key('summary'): self._summaryKey = 'content' self._start_content(attrsD) else: self._summaryKey = 'summary' self.pushContent(self._summaryKey, attrsD, 'text/plain', 1) _start_itunes_summary = _start_summary def _end_summary(self): if self._summaryKey == 'content': self._end_content() else: self.popContent(self._summaryKey or 'summary') self._summaryKey = None _end_itunes_summary = _end_summary def _start_enclosure(self, attrsD): attrsD = self._itsAnHrefDamnIt(attrsD) self._getContext().setdefault('enclosures', []).append(FeedParserDict(attrsD)) href = attrsD.get('href') if href: context = self._getContext() if not context.get('id'): context['id'] = href def _start_source(self, attrsD): self.insource = 1 def _end_source(self): self.insource = 0 self._getContext()['source'] = copy.deepcopy(self.sourcedata) self.sourcedata.clear() def _start_content(self, attrsD): self.pushContent('content', attrsD, 'text/plain', 1) src = attrsD.get('src') if src: self.contentparams['src'] = src self.push('content', 1) def _start_prodlink(self, attrsD): self.pushContent('content', attrsD, 'text/html', 1) def _start_body(self, attrsD): self.pushContent('content', attrsD, 'application/xhtml+xml', 1) _start_xhtml_body = _start_body def _start_content_encoded(self, attrsD): self.pushContent('content', attrsD, 'text/html', 1) _start_fullitem = _start_content_encoded def _end_content(self): copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) value = self.popContent('content') if copyToDescription: self._save('description', value) _end_body = _end_content _end_xhtml_body = _end_content _end_content_encoded = _end_content _end_fullitem = _end_content _end_prodlink = _end_content def _start_itunes_image(self, attrsD): self.push('itunes_image', 0) self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) _start_itunes_link = _start_itunes_image def _end_itunes_block(self): value = self.pop('itunes_block', 0) self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0 def _end_itunes_explicit(self): value = self.pop('itunes_explicit', 0) self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0 if _XML_AVAILABLE: class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): def __init__(self, baseuri, baselang, encoding): if _debug: sys.stderr.write('trying StrictFeedParser\n') xml.sax.handler.ContentHandler.__init__(self) _FeedParserMixin.__init__(self, baseuri, baselang, encoding) self.bozo = 0 self.exc = None def startPrefixMapping(self, prefix, uri): self.trackNamespace(prefix, uri) def startElementNS(self, name, qname, attrs): namespace, localname = name lowernamespace = str(namespace or '').lower() if lowernamespace.find('backend.userland.com/rss') <> -1: # match any backend.userland.com namespace namespace = 'http://backend.userland.com/rss' lowernamespace = namespace if qname and qname.find(':') > 0: givenprefix = qname.split(':')[0] else: givenprefix = None prefix = self._matchnamespaces.get(lowernamespace, givenprefix) if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix): raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix if prefix: localname = prefix + ':' + localname localname = str(localname).lower() if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) # qname implementation is horribly broken in Python 2.1 (it # doesn't report any), and slightly broken in Python 2.2 (it # doesn't report the xml: namespace). So we match up namespaces # with a known list first, and then possibly override them with # the qnames the SAX parser gives us (if indeed it gives us any # at all). Thanks to MatejC for helping me test this and # tirelessly telling me that it didn't work yet. attrsD = {} for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): lowernamespace = (namespace or '').lower() prefix = self._matchnamespaces.get(lowernamespace, '') if prefix: attrlocalname = prefix + ':' + attrlocalname attrsD[str(attrlocalname).lower()] = attrvalue for qname in attrs.getQNames(): attrsD[str(qname).lower()] = attrs.getValueByQName(qname) self.unknown_starttag(localname, attrsD.items()) def characters(self, text): self.handle_data(text) def endElementNS(self, name, qname): namespace, localname = name lowernamespace = str(namespace or '').lower() if qname and qname.find(':') > 0: givenprefix = qname.split(':')[0] else: givenprefix = '' prefix = self._matchnamespaces.get(lowernamespace, givenprefix) if prefix: localname = prefix + ':' + localname localname = str(localname).lower() self.unknown_endtag(localname) def error(self, exc): self.bozo = 1 self.exc = exc def fatalError(self, exc): self.error(exc) raise exc class _BaseHTMLProcessor(sgmllib.SGMLParser): elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param'] def __init__(self, encoding): self.encoding = encoding if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) sgmllib.SGMLParser.__init__(self) def reset(self): self.pieces = [] sgmllib.SGMLParser.reset(self) def _shorttag_replace(self, match): tag = match.group(1) if tag in self.elements_no_end_tag: return '<' + tag + ' />' else: return '<' + tag + '>' def feed(self, data): data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') if self.encoding and type(data) == type(u''): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) def normalize_attrs(self, attrs): # utility method to be called by descendants attrs = [(k.lower(), v) for k, v in attrs] attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] return attrs def unknown_starttag(self, tag, attrs): # called for each start tag # attrs is a list of (attr, value) tuples # e.g. for
, tag='pre', attrs=[('class', 'screen')]
        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
        uattrs = []
        # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
        for key, value in attrs:
            if type(value) != type(u''):
                value = unicode(value, self.encoding)
            uattrs.append((unicode(key, self.encoding), value))
        strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
        if tag in self.elements_no_end_tag:
            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
        else:
            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())

    def unknown_endtag(self, tag):
        # called for each end tag, e.g. for 
, tag will be 'pre' # Reconstruct the original end tag. if tag not in self.elements_no_end_tag: self.pieces.append("" % locals()) def handle_charref(self, ref): # called for each character reference, e.g. for ' ', ref will be '160' # Reconstruct the original character reference. self.pieces.append('&#%(ref)s;' % locals()) def handle_entityref(self, ref): # called for each entity reference, e.g. for '©', ref will be 'copy' # Reconstruct the original entity reference. self.pieces.append('&%(ref)s;' % locals()) def handle_data(self, text): # called for each block of plain text, i.e. outside of any tag and # not containing any character or entity references # Store the original text verbatim. if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text) self.pieces.append(text) def handle_comment(self, text): # called for each HTML comment, e.g. # Reconstruct the original comment. self.pieces.append('' % locals()) def handle_pi(self, text): # called for each processing instruction, e.g. # Reconstruct original processing instruction. self.pieces.append('' % locals()) def handle_decl(self, text): # called for the DOCTYPE, if present, e.g. # # Reconstruct original DOCTYPE self.pieces.append('' % locals()) _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match def _scan_name(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) if i == n: return None, -1 m = self._new_declname_match(rawdata, i) if m: s = m.group() name = s.strip() if (i + len(s)) == n: return None, -1 # end of buffer return name.lower(), m.end() else: self.handle_data(rawdata) # self.updatepos(declstartpos, i) return None, -1 def output(self): '''Return processed HTML as a single string''' return ''.join([str(p) for p in self.pieces]) class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): def __init__(self, baseuri, baselang, encoding): sgmllib.SGMLParser.__init__(self) _FeedParserMixin.__init__(self, baseuri, baselang, encoding) def decodeEntities(self, element, data): data = data.replace('<', '<') data = data.replace('<', '<') data = data.replace('>', '>') data = data.replace('>', '>') data = data.replace('&', '&') data = data.replace('&', '&') data = data.replace('"', '"') data = data.replace('"', '"') data = data.replace(''', ''') data = data.replace(''', ''') if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): data = data.replace('<', '<') data = data.replace('>', '>') data = data.replace('&', '&') data = data.replace('"', '"') data = data.replace(''', "'") return data class _RelativeURIResolver(_BaseHTMLProcessor): relative_uris = [('a', 'href'), ('applet', 'codebase'), ('area', 'href'), ('blockquote', 'cite'), ('body', 'background'), ('del', 'cite'), ('form', 'action'), ('frame', 'longdesc'), ('frame', 'src'), ('iframe', 'longdesc'), ('iframe', 'src'), ('head', 'profile'), ('img', 'longdesc'), ('img', 'src'), ('img', 'usemap'), ('input', 'src'), ('input', 'usemap'), ('ins', 'cite'), ('link', 'href'), ('object', 'classid'), ('object', 'codebase'), ('object', 'data'), ('object', 'usemap'), ('q', 'cite'), ('script', 'src')] def __init__(self, baseuri, encoding): _BaseHTMLProcessor.__init__(self, encoding) self.baseuri = baseuri def resolveURI(self, uri): return _urljoin(self.baseuri, uri) def unknown_starttag(self, tag, attrs): attrs = self.normalize_attrs(attrs) attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) def _resolveRelativeURIs(htmlSource, baseURI, encoding): if _debug: sys.stderr.write('entering _resolveRelativeURIs\n') p = _RelativeURIResolver(baseURI, encoding) p.feed(htmlSource) return p.output() class _HTMLSanitizer(_BaseHTMLProcessor): acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var'] acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width'] unacceptable_elements_with_end_tag = ['script', 'applet'] def reset(self): _BaseHTMLProcessor.reset(self) self.unacceptablestack = 0 def unknown_starttag(self, tag, attrs): if not tag in self.acceptable_elements: if tag in self.unacceptable_elements_with_end_tag: self.unacceptablestack += 1 return attrs = self.normalize_attrs(attrs) attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes] _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) def unknown_endtag(self, tag): if not tag in self.acceptable_elements: if tag in self.unacceptable_elements_with_end_tag: self.unacceptablestack -= 1 return _BaseHTMLProcessor.unknown_endtag(self, tag) def handle_pi(self, text): pass def handle_decl(self, text): pass def handle_data(self, text): if not self.unacceptablestack: _BaseHTMLProcessor.handle_data(self, text) def _sanitizeHTML(htmlSource, encoding): p = _HTMLSanitizer(encoding) p.feed(htmlSource) data = p.output() if TIDY_MARKUP: # loop through list of preferred Tidy interfaces looking for one that's installed, # then set up a common _tidy function to wrap the interface-specific API. _tidy = None for tidy_interface in PREFERRED_TIDY_INTERFACES: try: if tidy_interface == "uTidy": from tidy import parseString as _utidy def _tidy(data, **kwargs): return str(_utidy(data, **kwargs)) break elif tidy_interface == "mxTidy": from mx.Tidy import Tidy as _mxtidy def _tidy(data, **kwargs): nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, **kwargs) return data break except: pass if _tidy: utf8 = type(data) == type(u'') if utf8: data = data.encode('utf-8') data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8") if utf8: data = unicode(data, 'utf-8') if data.count(''): data = data.split('>', 1)[1] if data.count('= '2.3.3' assert base64 != None user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':') realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] self.add_password(realm, host, user, passw) retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) self.reset_retry_count() return retry except: return self.http_error_default(req, fp, code, msg, headers) def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers): """URL, filename, or string --> stream This function lets you define parsers that take any input source (URL, pathname to local or network file, or actual data as a string) and deal with it in a uniform manner. Returned object is guaranteed to have all the basic stdio read methods (read, readline, readlines). Just .close() the object when you're done with it. If the etag argument is supplied, it will be used as the value of an If-None-Match request header. If the modified argument is supplied, it must be a tuple of 9 integers as returned by gmtime() in the standard Python time module. This MUST be in GMT (Gr