From: Maxious Date: Wed, 28 Nov 2012 12:16:21 +0000 Subject: add disclogs rss and sitemap and viewer X-Git-Url: http://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=d34ebaf2263fb4c70f030bee61b6ce59b02059aa --- add disclogs rss and sitemap and viewer Former-commit-id: cf7e993d742cf3770bb93293c41bb9e298a36776 --- --- a/.gitmodules +++ b/.gitmodules @@ -31,4 +31,7 @@ [submodule "documents/lib/parsedatetime"] path = documents/lib/parsedatetime url = git://github.com/bear/parsedatetime.git +[submodule "lib/FeedWriter"] + path = lib/FeedWriter + url = https://github.com/mibe/FeedWriter --- /dev/null +++ b/documents/.gitignore @@ -1,1 +1,2 @@ +*.pyc --- a/documents/disclogsList.php +++ b/documents/disclogsList.php @@ -35,18 +35,18 @@ echo '
(' . 'view local copy)'; } else { - echo ""; + echo ""; } echo "\n"; if (isset($row->value->FOIDocumentsURL)) { if (file_exists("./scrapers/" . $row->id . '.py')) { - echo ""; + echo ""; $green++; } else if (file_exists("./scrapers/" . $row->id . '.txt')) { - echo ""; + echo ""; $orange++; } else { - echo ""; + echo ""; $red++; } } @@ -57,7 +57,8 @@ setteErrorHandler($e); } echo ""; -echo $agencies." agencies ".(($disclogs/$agencies)*100)."% with disclosure logs, ".(($green/$disclogs)*100)."% with scrapers ".(($red/$disclogs)*100)."% without scrapers ".(($orange/$disclogs)*100)."% WIP scrapers "; +echo $agencies." agencies, ".round(($disclogs/$agencies)*100)."% with disclosure logs; " +.round(($green/$disclogs)*100)."% logs with scrapers ".round(($red/$disclogs)*100)."% logs without scrapers ".round(($orange/$disclogs)*100)."% logs Work-In-Progress scrapers "; include_footer_documents(); ?> --- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -84,12 +84,17 @@ doc.update({'description': descriptiontxt}) return def getTitle(self, content, entry, doc): - doc.update({'title': content.string}) + doc.update({'title': (''.join(content.stripped_strings))}) return def getTable(self, soup): return soup.table + def getRows(self, table): + return table.find_all('tr') def getDate(self, content, entry, doc): - edate = parse(''.join(content.stripped_strings).strip(), dayfirst=True, fuzzy=True).strftime("%Y-%m-%d") + date = ''.join(content.stripped_strings).strip() + date = str.replace("Octber","October",date) + print date + edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d") print edate doc.update({'date': edate}) return @@ -110,10 +115,10 @@ # http://www.crummy.com/software/BeautifulSoup/documentation.html soup = BeautifulSoup(content) table = self.getTable(soup) - for row in table.find_all('tr'): + for row in self.getRows(table): columns = row.find_all('td') if len(columns) == self.getColumnCount(): - (id, date, description, title, notes) = self.getColumns(columns) + (id, date, title, description, notes) = self.getColumns(columns) print ''.join(id.stripped_strings) if id.string == None: hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings)))) @@ -123,13 +128,13 @@ if doc == None: print "saving " +hash - doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': id.string} + doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))} self.getLinks(self.getURL(),row,doc) self.getTitle(title,row, doc) self.getDate(date,row, doc) self.getDescription(description,row, doc) if notes != None: - doc.update({ 'notes': notes.string}) + doc.update({ 'notes': (''.join(notes.stripped_strings))}) foidocsdb.save(doc) else: print "already saved "+hash --- a/documents/index.php +++ b/documents/index.php @@ -20,7 +20,7 @@ if ($rows) { foreach ($rows as $row) { -displayLogEntry($row,$idtoname); +echo displayLogEntry($row,$idtoname); } } } catch (SetteeRestClientException $e) { --- a/documents/robots.txt +++ b/documents/robots.txt @@ -2,4 +2,5 @@ # http://code.google.com/web/controlcrawlindex/ User-agent: * - +Disallow: /admin/ +Sitemap: http://disclosurelo.gs/sitemap.xml.php --- a/documents/rss.xml.php +++ b/documents/rss.xml.php @@ -3,28 +3,38 @@ // Agency X updated Y, new files, diff of plain text/link text, // feed for just one agency or all // This is a minimum example of using the Universal Feed Generator Class -include("lib/FeedWriter.php"); +include("../lib/FeedWriter/FeedTypes.php"); +include_once('../include/common.inc.php'); //Creating an instance of FeedWriter class. -$TestFeed = new FeedWriter(RSS2); +$TestFeed = new RSS2FeedWriter(); //Setting the channel elements //Use wrapper functions for common channelelements $TestFeed->setTitle('Last Modified - All'); -$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php'); +$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php'); $TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); + $TestFeed->setChannelElement('language', 'en-us'); + $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time())); //Retriving informations from database -$rows = $db->get_view("app", "byLastModified")->rows; +$idtoname = Array(); +$agenciesdb = $server->get_db('disclosr-agencies'); +foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { + $idtoname[$row->id] = trim($row->value->name); +} +$foidocsdb = $server->get_db('disclosr-foidocuments'); +$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows; //print_r($rows); foreach ($rows as $row) { //Create an empty FeedItem $newItem = $TestFeed->createNewItem(); //Add elements to the feed item - $newItem->setTitle($row['name']); - $newItem->setLink($row['id']); - $newItem->setDate(date("c", $row['metadata']['lastModified'])); - $newItem->setDescription($row['name']); + $newItem->setTitle($row->value->title); + $newItem->setLink("view.php?id=".$row->value->docID); + $newItem->setDate(date("c", strtotime($row->value->date))); + $newItem->setDescription(displayLogEntry($row,$idtoname)); + $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true')); //Now add the feed item $TestFeed->addItem($newItem); } //OK. Everything is done. Now genarate the feed. -$TestFeed->genarateFeed(); +$TestFeed->generateFeed(); ?> --- /dev/null +++ b/documents/scrapers/00a294de663db69062ca09aede7c0487.txt @@ -1,1 +1,2 @@ +multipage --- /dev/null +++ b/documents/scrapers/0324e4b1654fd6dd651307abcef67094.py @@ -1,1 +1,19 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 6 + def getColumns(self,columns): + (id, date, title, description, notes,link) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py +++ b/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py @@ -8,7 +8,7 @@ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) --- a/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py +++ b/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py @@ -10,7 +10,7 @@ return 5 def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) def getTable(self,soup): return soup.find_all('table')[4] --- /dev/null +++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.txt @@ -1,1 +1,2 @@ +multipage log --- /dev/null +++ b/documents/scrapers/38ca99d2790975a40dde3fae41dbdc3d.py @@ -1,1 +1,32 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +import dateutil +from dateutil.parser import * +from datetime import * +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 3 + def getColumns(self,columns): + (date, title, description) = columns + return (date, date, title, description, None) + def getTitle(self, content, entry, doc): + i = 0 + title = "" + for string in content.stripped_strings: + if i < 2: + title = title + string + i = i+1 + doc.update({'title': title}) + print title + return + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- /dev/null +++ b/documents/scrapers/3b54190e3f409380e109fae29e1917aa.py @@ -1,1 +1,19 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 7 + def getColumns(self,columns): + (id, date, title, description, link, deldate,notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py +++ b/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py @@ -7,7 +7,7 @@ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumns(self,columns): (id, date, description, title, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) --- /dev/null +++ b/documents/scrapers/3d5871a44abbbc81ef5b3a420070755d.py @@ -1,1 +1,47 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +import dateutil +from dateutil.parser import * +from datetime import * +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getTable(self,soup): + return soup.find(class_ = "inner-column").table + def getRows(self,table): + return table.tbody.find_all('tr',recursive=False) + def getColumnCount(self): + return 3 + def getColumns(self,columns): + (date, title, description) = columns + return (date, date, title, description, None) + def getDate(self, content, entry, doc): + i = 0 + date = "" + for string in content.stripped_strings: + if i ==1: + date = string + i = i+1 + edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d") + print edate + doc.update({'date': edate}) + return + def getTitle(self, content, entry, doc): + i = 0 + title = "" + for string in content.stripped_strings: + if i < 2: + title = title + string + i = i+1 + doc.update({'title': title}) + #print title + return + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- /dev/null +++ b/documents/scrapers/41a166419503bb50e410c58be54c102f.txt @@ -1,1 +1,1 @@ - +aspx --- /dev/null +++ b/documents/scrapers/4934000fddd6a5b1094f398798341290.py @@ -1,1 +1,23 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +import dateutil +from dateutil.parser import * +from datetime import * +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description, notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + si = ScraperImplementation() + si.doScrape() + --- /dev/null +++ b/documents/scrapers/53b14397c8f27c29ff07b6319f7a0ec5.py @@ -1,1 +1,21 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + #def getTable(self,soup): + # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description,notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py +++ b/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py @@ -10,7 +10,7 @@ return soup.find(class_ = "ms-rtestate-field").table def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) def getLinks(self, content, entry, doc): link = None --- /dev/null +++ b/documents/scrapers/55b69726fde4b4898ecf6d7217d1d1d2.py @@ -1,1 +1,21 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + #def getTable(self,soup): + # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table + def getColumnCount(self): + return 4 + def getColumns(self,columns): + (id, date, title, description) = columns + return (id, date, title, description, None) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- /dev/null +++ b/documents/scrapers/5716ce0aacfe98f7d638b7a66b7f1040.py @@ -1,1 +1,19 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 4 + def getColumns(self,columns): + (date, id, title, description) = columns + return (id, date, title, description, None) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- /dev/null +++ b/documents/scrapers/5d05365e981d87e746b596d63e35b1dc.py @@ -1,1 +1,21 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getTable(self,soup): + return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description,notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- /dev/null +++ b/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.txt @@ -1,1 +1,2 @@ +see parent dhs --- /dev/null +++ b/documents/scrapers/649b053f5e2884906ddc7174c2cd4b38.py @@ -1,1 +1,28 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +import dateutil +from dateutil.parser import * +from datetime import * +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description, notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + si = ScraperImplementation() + si.doScrape() + si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2011-12-financial-year" + si.doScrape() + si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2010-11-financial-year" + si.doScrape() + + --- /dev/null +++ b/documents/scrapers/6cf3870aedeeecfd6394b5c0abed4c55.py @@ -1,1 +1,23 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +import dateutil +from dateutil.parser import * +from datetime import * +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description, notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + si = ScraperImplementation() + si.doScrape() + --- /dev/null +++ b/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.py @@ -1,1 +1,19 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getTable(self,soup): + return soup.find(id = "content_div_50269").table + def getColumns(self,columns): + (id, date, title, description, notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.txt +++ /dev/null @@ -1,19 +1,1 @@ -import sys,os -sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) -import genericScrapers -import scrape -from bs4 import BeautifulSoup -#http://www.doughellmann.com/PyMOTW/abc/ -class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): - def getTable(self,soup): - return soup.find(id = "content_div_50269").table - def getColumns(self,columns): - (id, date, title, description, notes) = columns - return (id, date, title, description, notes) - -if __name__ == '__main__': - print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) - print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) - ScraperImplementation().doScrape() - --- a/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py +++ b/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py @@ -8,7 +8,7 @@ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) --- /dev/null +++ b/documents/scrapers/77f02f713e3c37bff73882fb90828379.py @@ -1,1 +1,22 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getTable(self,soup): + return soup.find("table",width="571") +#findAll("table")[3] + def getColumnCount(self): + return 7 + def getColumns(self,columns): + (id, date, title, description,link,deldate,notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/795c33ed030293dbdb155c909ea47e37.py +++ b/documents/scrapers/795c33ed030293dbdb155c909ea47e37.py @@ -10,7 +10,7 @@ return 7 def getColumns(self,columns): (id, date, title, description, notes, deletedate, otherinfo) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) #def getTable(self,soup): # return soup.find(class_ = "box").table --- a/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py +++ b/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py @@ -7,7 +7,7 @@ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) --- a/documents/scrapers/8ef0e5802f99800f514b3a148e013b75.py +++ b/documents/scrapers/8ef0e5802f99800f514b3a148e013b75.py @@ -12,7 +12,7 @@ return soup.find(class_ = "content").table def getColumns(self,columns): (id, date, title, description) = columns - return (id, date, description, title, None) + return (id, date, title, description, None) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) --- /dev/null +++ b/documents/scrapers/905a1c409b6afb1de0074b13a5559560.py @@ -1,1 +1,23 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +import dateutil +from dateutil.parser import * +from datetime import * +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description, notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + si = ScraperImplementation() + si.doScrape() + --- /dev/null +++ b/documents/scrapers/9f5cd66dea3e2ec958c17e28b27e60a7.txt @@ -1,1 +1,2 @@ +acma style --- /dev/null +++ b/documents/scrapers/ad033512610d8e36886ab6a795f26561.py @@ -1,1 +1,21 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getTable(self,soup): + return soup.find(id = "_ctl0__ctl0_MainContentPlaceHolder_MainContentPlaceHolder_ContentSpan").findAll("table")[3] + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description,notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- /dev/null +++ b/documents/scrapers/b0a3281ba66efe173c5a33d5ef90ff76.txt @@ -1,1 +1,2 @@ +multipage immi --- /dev/null +++ b/documents/scrapers/b506b87c8ee9e3a7ea8007914078c741.py @@ -1,1 +1,19 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 6 + def getColumns(self,columns): + (id, date, title, description,link,notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- /dev/null +++ b/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.py @@ -1,1 +1,21 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getTable(self,soup): + return soup.find(class_ = "ms-rtestate-field").table + def getColumnCount(self): + return 4 + def getColumns(self,columns): + (id, date, title, description) = columns + return (id, date, title, description, None) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.txt +++ /dev/null @@ -1,20 +1,1 @@ -import sys,os -sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) -import genericScrapers -import scrape -from bs4 import BeautifulSoup -#http://www.doughellmann.com/PyMOTW/abc/ -class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): - - def getColumnCount(self): - return 4 - def getColumns(self,columns): - (id, date, title, description) = columns - return (id, date, title, description, None) - -if __name__ == '__main__': - print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) - print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) - ScraperImplementation().doScrape() - --- a/documents/scrapers/c43ca6780764f4e61918e8836be74420.py +++ b/documents/scrapers/c43ca6780764f4e61918e8836be74420.py @@ -7,7 +7,7 @@ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumns(self,columns): (id, date, title,description,notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) --- /dev/null +++ b/documents/scrapers/cb7f40e3495b682de6eee61bf09c1cfc.txt @@ -1,1 +1,2 @@ +no log --- /dev/null +++ b/documents/scrapers/dae7e934f1c341ccc9547a89a8af917e.py @@ -1,1 +1,21 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + #def getTable(self,soup): + # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table + def getColumnCount(self): + return 5 + def getColumns(self,columns): + (id, date, title, description,notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/dfd7414bb0c21a0076ab559901ae0588.py +++ b/documents/scrapers/dfd7414bb0c21a0076ab559901ae0588.py @@ -8,7 +8,7 @@ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) def getTable(self,soup): return soup.find(class_ = "content") --- a/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py +++ b/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py @@ -7,7 +7,7 @@ class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper): def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericRSSDisclogScraper) --- /dev/null +++ b/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.py @@ -1,1 +1,21 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getTable(self,soup): + return soup.find(id = "genericContent").table.tbody + def getColumnCount(self): + return 3 + def getColumns(self,columns): + (id, date,title, description, notes) = columns + return (id, date, title, description, notes) + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() + --- a/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.txt +++ /dev/null @@ -1,21 +1,1 @@ -import sys,os -sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) -import genericScrapers -import scrape -from bs4 import BeautifulSoup -#http://www.doughellmann.com/PyMOTW/abc/ -class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): - def getTable(self,soup): - return soup.find(id = "genericContent").table.tbody - def getColumnCount(self): - return 5 - def getColumns(self,columns): - (id, date,title, description, notes) = columns - return (id, date, title, description, notes) - -if __name__ == '__main__': - print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) - print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) - ScraperImplementation().doScrape() - --- a/documents/scrapers/f2ab2908d8ee56ed8d995ef4187e75e6.py +++ b/documents/scrapers/f2ab2908d8ee56ed8d995ef4187e75e6.py @@ -8,7 +8,7 @@ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) def getTable(self,soup): return soup.find(id = "content").table --- a/documents/scrapers/rtk.py +++ b/documents/scrapers/rtk.py @@ -7,7 +7,7 @@ class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper): def getColumns(self,columns): (id, date, title, description, notes) = columns - return (id, date, description, title, notes) + return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericRSSDisclogScraper) --- /dev/null +++ b/documents/sitemap.xml.php @@ -1,1 +1,25 @@ +"; +echo '' . "\n"; +echo " " . local_url() . "index.php1.0\n"; +foreach (scandir("./") as $file) { + if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php") + echo " " . local_url() . "$file0.6\n"; +} + +$db = $server->get_db('disclosr-foidocuments'); +try { + $rows = $db->get_view("app", "all")->rows; + foreach ($rows as $row) { + echo '' . local_url() . 'view.php?id=' . $row->value->_id . "0.3\n"; + } +} catch (SetteeRestClientException $e) { + setteErrorHandler($e); +} +echo ''; +?> + --- a/documents/template.inc.php +++ b/documents/template.inc.php @@ -130,21 +130,23 @@ } function displayLogEntry($row, $idtoname) { - echo "

".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")

".str_replace("\n","
",$row->value->description); + $result = ""; + $result .= "

".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")

".str_replace("\n","
",$row->value->description); if (isset($row->value->notes)) { -echo "
Note: ".$row->value->notes; +$result .= "
Note: ".$row->value->notes; } -echo "

"; +$result .= "

"; if (isset($row->value->links)){ -echo "

Links/Documents

    "; +$result .= "

    Links/Documents

      "; foreach ($row->value->links as $link) { - echo "
    • ".$link."
    • "; + $result .= "
    • ".$link."
    • "; } - echo "
    "; + $result .= "
"; } - echo "View original source... ID: ".$row->value->docID.""; -echo"
"; + $result .= "View original source... ID: ".$row->value->docID.""; +$result .= "
"; +return $result; } --- /dev/null +++ b/documents/view.php @@ -1,1 +1,27 @@ + +get_db('disclosr-agencies'); + +$idtoname = Array(); +foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { + $idtoname[$row->id] = trim($row->value->name); +} +$foidocsdb = $server->get_db('disclosr-foidocuments'); +try { + $obj = new stdClass(); + $obj->value = $foidocsdb->get($_REQUEST['id']); +echo displayLogEntry($obj,$idtoname); + +} catch (SetteeRestClientException $e) { + setteErrorHandler($e); +} +include_footer_documents(); +?> + --- a/documents/viewDocument.php +++ b/documents/viewDocument.php @@ -3,7 +3,13 @@ include_once('../include/common.inc.php'); $hash = $_REQUEST['hash']; $docsdb = $server->get_db('disclosr-documents'); +try { $doc = object_to_array($docsdb->get($hash)); + +} catch (SetteeRestClientException $e) { + setteErrorHandler($e); +} + if (!isset($doc['_attachments']) || count($doc['_attachments']) == 0) die ("no attachments"); $attachments = $doc['_attachments']; @@ -13,3 +19,4 @@ //echo $url; $request = Requests::get($url); echo ($request->body); + --- a/lib/FeedItem.php +++ /dev/null @@ -1,168 +1,1 @@ - - * @link http://www.ajaxray.com/projects/rss - */ - class FeedItem - { - private $elements = array(); //Collection of feed elements - private $version; - - /** - * Constructor - * - * @param contant (RSS1/RSS2/ATOM) RSS2 is default. - */ - function __construct($version = RSS2) - { - $this->version = $version; - } - - /** - * Add an element to elements array - * - * @access public - * @param srting The tag name of an element - * @param srting The content of tag - * @param array Attributes(if any) in 'attrName' => 'attrValue' format - * @return void - */ - public function addElement($elementName, $content, $attributes = null) - { - $this->elements[$elementName]['name'] = $elementName; - $this->elements[$elementName]['content'] = $content; - $this->elements[$elementName]['attributes'] = $attributes; - } - - /** - * Set multiple feed elements from an array. - * Elements which have attributes cannot be added by this method - * - * @access public - * @param array array of elements in 'tagName' => 'tagContent' format. - * @return void - */ - public function addElementArray($elementArray) - { - if(! is_array($elementArray)) return; - foreach ($elementArray as $elementName => $content) - { - $this->addElement($elementName, $content); - } - } - - /** - * Return the collection of elements in this feed item - * - * @access public - * @return array - */ - public function getElements() - { - return $this->elements; - } - - // Wrapper functions ------------------------------------------------------ - - /** - * Set the 'dscription' element of feed item - * - * @access public - * @param string The content of 'description' element - * @return void - */ - public function setDescription($description) - { - $tag = ($this->version == ATOM)? 'summary' : 'description'; - $this->addElement($tag, $description); - } - - /** - * @desc Set the 'title' element of feed item - * @access public - * @param string The content of 'title' element - * @return void - */ - public function setTitle($title) - { - $this->addElement('title', $title); - } - - /** - * Set the 'date' element of feed item - * - * @access public - * @param string The content of 'date' element - * @return void - */ - public function setDate($date) - { - if(! is_numeric($date)) - { - $date = strtotime($date); - } - - if($this->version == ATOM) - { - $tag = 'updated'; - $value = date(DATE_ATOM, $date); - } - elseif($this->version == RSS2) - { - $tag = 'pubDate'; - $value = date(DATE_RSS, $date); - } - else - { - $tag = 'dc:date'; - $value = date("Y-m-d", $date); - } - - $this->addElement($tag, $value); - } - - /** - * Set the 'link' element of feed item - * - * @access public - * @param string The content of 'link' element - * @return void - */ - public function setLink($link) - { - if($this->version == RSS2 || $this->version == RSS1) - { - $this->addElement('link', $link); - } - else - { - $this->addElement('link','',array('href'=>$link)); - $this->addElement('id', FeedWriter::uuid($link,'urn:uuid:')); - } - - } - - /** - * Set the 'encloser' element of feed item - * For RSS 2.0 only - * - * @access public - * @param string The url attribute of encloser tag - * @param string The length attribute of encloser tag - * @param string The type attribute of encloser tag - * @return void - */ - public function setEncloser($url, $length, $type) - { - $attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type); - $this->addElement('enclosure','',$attributes); - } - - } // end of class FeedItem -?> --- a/lib/FeedWriter.php +++ /dev/null @@ -1,435 +1,1 @@ - - * @link http://www.ajaxray.com/projects/rss - */ - class FeedWriter - { - private $channels = array(); // Collection of channel elements - private $items = array(); // Collection of items as object of FeedItem class. - private $data = array(); // Store some other version wise data - private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA - - private $version = null; - - /** - * Constructor - * - * @param constant the version constant (RSS1/RSS2/ATOM). - */ - function __construct($version = RSS2) - { - $this->version = $version; - - // Setting default value for assential channel elements - $this->channels['title'] = $version . ' Feed'; - $this->channels['link'] = 'http://www.ajaxray.com/blog'; - - //Tag names to encode in CDATA - $this->CDATAEncoding = array('description', 'content:encoded', 'summary'); - } - - // Start # public functions --------------------------------------------- - - /** - * Set a channel element - * @access public - * @param srting name of the channel tag - * @param string content of the channel tag - * @return void - */ - public function setChannelElement($elementName, $content) - { - $this->channels[$elementName] = $content ; - } - - /** - * Set multiple channel elements from an array. Array elements - * should be 'channelName' => 'channelContent' format. - * - * @access public - * @param array array of channels - * @return void - */ - public function setChannelElementsFromArray($elementArray) - { - if(! is_array($elementArray)) return; - foreach ($elementArray as $elementName => $content) - { - $this->setChannelElement($elementName, $content); - } - } - - /** - * Genarate the actual RSS/ATOM file - * - * @access public - * @return void - */ - public function genarateFeed() - { - header("Content-type: text/xml"); - - $this->printHead(); - $this->printChannels(); - $this->printItems(); - $this->printTale(); - } - - /** - * Create a new FeedItem. - * - * @access public - * @return object instance of FeedItem class - */ - public function createNewItem() - { - $Item = new FeedItem($this->version); - return $Item; - } - - /** - * Add a FeedItem to the main class - * - * @access public - * @param object instance of FeedItem class - * @return void - */ - public function addItem($feedItem) - { - $this->items[] = $feedItem; - } - - - // Wrapper functions ------------------------------------------------------------------- - - /** - * Set the 'title' channel element - * - * @access public - * @param srting value of 'title' channel tag - * @return void - */ - public function setTitle($title) - { - $this->setChannelElement('title', $title); - } - - /** - * Set the 'description' channel element - * - * @access public - * @param srting value of 'description' channel tag - * @return void - */ - public function setDescription($desciption) - { - $this->setChannelElement('description', $desciption); - } - - /** - * Set the 'link' channel element - * - * @access public - * @param srting value of 'link' channel tag - * @return void - */ - public function setLink($link) - { - $this->setChannelElement('link', $link); - } - - /** - * Set the 'image' channel element - * - * @access public - * @param srting title of image - * @param srting link url of the imahe - * @param srting path url of the image - * @return void - */ - public function setImage($title, $link, $url) - { - $this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url)); - } - - /** - * Set the 'about' channel element. Only for RSS 1.0 - * - * @access public - * @param srting value of 'about' channel tag - * @return void - */ - public function setChannelAbout($url) - { - $this->data['ChannelAbout'] = $url; - } - - /** - * Genarates an UUID - * @author Anis uddin Ahmad - * @param string an optional prefix - * @return string the formated uuid - */ - public function uuid($key = null, $prefix = '') - { - $key = ($key == null)? uniqid(rand()) : $key; - $chars = md5($key); - $uuid = substr($chars,0,8) . '-'; - $uuid .= substr($chars,8,4) . '-'; - $uuid .= substr($chars,12,4) . '-'; - $uuid .= substr($chars,16,4) . '-'; - $uuid .= substr($chars,20,12); - - return $prefix . $uuid; - } - // End # public functions ---------------------------------------------- - - // Start # private functions ---------------------------------------------- - - /** - * Prints the xml and rss namespace - * - * @access private - * @return void - */ - private function printHead() - { - $out = '' . "\n"; - - if($this->version == RSS2) - { - $out .= '' . PHP_EOL; - } - elseif($this->version == RSS1) - { - $out .= '' . PHP_EOL;; - } - else if($this->version == ATOM) - { - $out .= '' . PHP_EOL;; - } - echo $out; - } - - /** - * Closes the open tags at the end of file - * - * @access private - * @return void - */ - private function printTale() - { - if($this->version == RSS2) - { - echo '' . PHP_EOL . ''; - } - elseif($this->version == RSS1) - { - echo ''; - } - else if($this->version == ATOM) - { - echo ''; - } - - } - - /** - * Creates a single node as xml format - * - * @access private - * @param srting name of the tag - * @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format - * @param array Attributes(if any) in 'attrName' => 'attrValue' format - * @return string formatted xml tag - */ - private function makeNode($tagName, $tagContent, $attributes = null) - { - $nodeText = ''; - $attrText = ''; - - if(is_array($attributes)) - { - foreach ($attributes as $key => $value) - { - $attrText .= " $key=\"$value\" "; - } - } - - if(is_array($tagContent) && $this->version == RSS1) - { - $attrText = ' rdf:parseType="Resource"'; - } - - - $attrText .= (in_array($tagName, $this->CDATAEncoding) && $this->version == ATOM)? ' type="html" ' : ''; - $nodeText .= (in_array($tagName, $this->CDATAEncoding))? "<{$tagName}{$attrText}>"; - - if(is_array($tagContent)) - { - foreach ($tagContent as $key => $value) - { - $nodeText .= $this->makeNode($key, $value); - } - } - else - { - $nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent); - } - - $nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]>" : ""; - - return $nodeText . PHP_EOL; - } - - /** - * @desc Print channels - * @access private - * @return void - */ - private function printChannels() - { - //Start channel tag - switch ($this->version) - { - case RSS2: - echo '' . PHP_EOL; - break; - case RSS1: - echo (isset($this->data['ChannelAbout']))? "data['ChannelAbout']}\">" : "channels['link']}\">"; - break; - } - - //Print Items of channel - foreach ($this->channels as $key => $value) - { - if($this->version == ATOM && $key == 'link') - { - // ATOM prints link element as href attribute - echo $this->makeNode($key,'',array('href'=>$value)); - //Add the id for ATOM - echo $this->makeNode('id',$this->uuid($value,'urn:uuid:')); - } - else - { - echo $this->makeNode($key, $value); - } - - } - - //RSS 1.0 have special tag with channel - if($this->version == RSS1) - { - echo "" . PHP_EOL . "" . PHP_EOL; - foreach ($this->items as $item) - { - $thisItems = $item->getElements(); - echo "" . PHP_EOL; - } - echo "" . PHP_EOL . "" . PHP_EOL . "" . PHP_EOL; - } - } - - /** - * Prints formatted feed items - * - * @access private - * @return void - */ - private function printItems() - { - foreach ($this->items as $item) - { - $thisItems = $item->getElements(); - - //the argument is printed as rdf:about attribute of item in rss 1.0 - echo $this->startItem($thisItems['link']['content']); - - foreach ($thisItems as $feedItem ) - { - echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']); - } - echo $this->endItem(); - } - } - - /** - * Make the starting tag of channels - * - * @access private - * @param srting The vale of about tag which is used for only RSS 1.0 - * @return void - */ - private function startItem($about = false) - { - if($this->version == RSS2) - { - echo '' . PHP_EOL; - } - elseif($this->version == RSS1) - { - if($about) - { - echo "" . PHP_EOL; - } - else - { - die('link element is not set .\n It\'s required for RSS 1.0 to be used as about attribute of item'); - } - } - else if($this->version == ATOM) - { - echo "" . PHP_EOL; - } - } - - /** - * Closes feed item tag - * - * @access private - * @return void - */ - private function endItem() - { - if($this->version == RSS2 || $this->version == RSS1) - { - echo '' . PHP_EOL; - } - else if($this->version == ATOM) - { - echo "" . PHP_EOL; - } - } - - - - // End # private functions ---------------------------------------------- - - } // end of class FeedWriter - -// autoload classes -function __autoload($class_name) -{ - require_once $class_name . '.php'; -} +