From: Maxious <maxious@lambdacomplex.org>
Date: Wed, 28 Nov 2012 12:16:21 +0000
Subject: add disclogs rss and sitemap and viewer
X-Git-Url: http://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=d34ebaf2263fb4c70f030bee61b6ce59b02059aa
---
add disclogs rss and sitemap and viewer


Former-commit-id: cf7e993d742cf3770bb93293c41bb9e298a36776
---


--- a/.gitmodules
+++ b/.gitmodules
@@ -31,4 +31,7 @@
 [submodule "documents/lib/parsedatetime"]
 	path = documents/lib/parsedatetime
 	url = git://github.com/bear/parsedatetime.git
+[submodule "lib/FeedWriter"]
+	path = lib/FeedWriter
+	url = https://github.com/mibe/FeedWriter
 

--- /dev/null
+++ b/documents/.gitignore
@@ -1,1 +1,2 @@
 
+*.pyc

--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -35,18 +35,18 @@
                     echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
                     . 'view local copy</a>)</small>';
             } else {
-                echo "<font color='red'>✘</font>";
+                echo "<font color='red'><abbr title='No'>✘</abbr></font>";
             }
             echo "</td>\n<td>";
             if (isset($row->value->FOIDocumentsURL)) {
                 if (file_exists("./scrapers/" . $row->id . '.py')) {
-                    echo "<font color='green'>✔</font>";
+                    echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
                     $green++;
                 } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
-                    echo "<font color='orange'><b>▬</b></font>";
+                    echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
                     $orange++;
                 } else {
-                    echo "<font color='red'>✘</font>";
+                    echo "<font color='red'><abbr title='No'>✘</abbr></font>";
                     $red++;
                 }
             }
@@ -57,7 +57,8 @@
     setteErrorHandler($e);
 }
 echo "</table>";
-echo $agencies." agencies ".(($disclogs/$agencies)*100)."% with disclosure logs, ".(($green/$disclogs)*100)."% with scrapers ".(($red/$disclogs)*100)."% without scrapers ".(($orange/$disclogs)*100)."% WIP scrapers ";
+echo $agencies." agencies, ".round(($disclogs/$agencies)*100)."% with disclosure logs; "
+.round(($green/$disclogs)*100)."% logs with scrapers ".round(($red/$disclogs)*100)."% logs without scrapers ".round(($orange/$disclogs)*100)."% logs Work-In-Progress scrapers ";
 
 include_footer_documents();
 ?>

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -84,12 +84,17 @@
                 doc.update({'description': descriptiontxt})
 		return
         def getTitle(self, content, entry, doc):
-                doc.update({'title': content.string})
+                doc.update({'title': (''.join(content.stripped_strings))})
 		return
 	def getTable(self, soup):
 		return soup.table
+	def getRows(self, table):
+		return table.find_all('tr')
 	def getDate(self, content, entry, doc):
-		edate = parse(''.join(content.stripped_strings).strip(), dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+		date = ''.join(content.stripped_strings).strip()
+		date = str.replace("Octber","October",date)
+		print date
+		edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
 		print edate
 		doc.update({'date': edate})
 		return
@@ -110,10 +115,10 @@
 			# http://www.crummy.com/software/BeautifulSoup/documentation.html
 				soup = BeautifulSoup(content)
 				table = self.getTable(soup)
-				for row in table.find_all('tr'):
+				for row in self.getRows(table):
 					columns = row.find_all('td')
 					if len(columns) == self.getColumnCount():
-						(id, date, description, title, notes) = self.getColumns(columns)
+						(id, date, title, description, notes) = self.getColumns(columns)
 						print ''.join(id.stripped_strings)
 						if id.string == None:
 							hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
@@ -123,13 +128,13 @@
 							
 						if doc == None:
 							print "saving " +hash
-							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': id.string}
+							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))}
 							self.getLinks(self.getURL(),row,doc)
                                 			self.getTitle(title,row, doc)
                                 			self.getDate(date,row, doc)
 							self.getDescription(description,row, doc)
 							if notes != None:
-                                        			doc.update({ 'notes': notes.string})
+                                        			doc.update({ 'notes': (''.join(notes.stripped_strings))})
 							foidocsdb.save(doc)
 						else:
 							print "already saved "+hash

--- a/documents/index.php
+++ b/documents/index.php
@@ -20,7 +20,7 @@
 
     if ($rows) {
         foreach ($rows as $row) {
-displayLogEntry($row,$idtoname);
+echo displayLogEntry($row,$idtoname);
         }
     }
 } catch (SetteeRestClientException $e) {

--- a/documents/robots.txt
+++ b/documents/robots.txt
@@ -2,4 +2,5 @@
 # http://code.google.com/web/controlcrawlindex/
 
 User-agent: *
-
+Disallow: /admin/
+Sitemap: http://disclosurelo.gs/sitemap.xml.php

--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -3,28 +3,38 @@
 // Agency X updated Y,  new files, diff of plain text/link text,
 // feed for just one agency or all
 // This is a minimum example of using the Universal Feed Generator Class
-include("lib/FeedWriter.php");
+include("../lib/FeedWriter/FeedTypes.php");
+include_once('../include/common.inc.php');
 //Creating an instance of FeedWriter class.
-$TestFeed = new FeedWriter(RSS2);
+$TestFeed = new RSS2FeedWriter();
 //Setting the channel elements
 //Use wrapper functions for common channelelements
 $TestFeed->setTitle('Last Modified - All');
-$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php');
+$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php');
 $TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer');
+  $TestFeed->setChannelElement('language', 'en-us');
+  $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
 //Retriving informations from database
-$rows = $db->get_view("app", "byLastModified")->rows;
+$idtoname = Array();
+$agenciesdb = $server->get_db('disclosr-agencies');
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+    $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows;
 //print_r($rows);
 foreach ($rows as $row) {
     //Create an empty FeedItem
     $newItem = $TestFeed->createNewItem();
     //Add elements to the feed item
-    $newItem->setTitle($row['name']);
-    $newItem->setLink($row['id']);
-    $newItem->setDate(date("c", $row['metadata']['lastModified']));
-    $newItem->setDescription($row['name']);
+    $newItem->setTitle($row->value->title);
+    $newItem->setLink("view.php?id=".$row->value->docID);
+    $newItem->setDate(date("c", strtotime($row->value->date)));
+    $newItem->setDescription(displayLogEntry($row,$idtoname));
+    $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true'));
     //Now add the feed item
     $TestFeed->addItem($newItem);
 }
 //OK. Everything is done. Now genarate the feed.
-$TestFeed->genarateFeed();
+$TestFeed->generateFeed();
 ?>

--- /dev/null
+++ b/documents/scrapers/00a294de663db69062ca09aede7c0487.txt
@@ -1,1 +1,2 @@
+multipage
 

--- /dev/null
+++ b/documents/scrapers/0324e4b1654fd6dd651307abcef67094.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 6
+        def getColumns(self,columns):
+                (id, date, title, description, notes,link) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
+++ b/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- a/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
+++ b/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
@@ -10,7 +10,7 @@
                 return 5
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         def getTable(self,soup):
                 return soup.find_all('table')[4]
 

--- /dev/null
+++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.txt
@@ -1,1 +1,2 @@
+multipage log
 

--- /dev/null
+++ b/documents/scrapers/38ca99d2790975a40dde3fae41dbdc3d.py
@@ -1,1 +1,32 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+   	def getTitle(self, content, entry, doc):
+		i = 0
+		title = ""
+		for string in content.stripped_strings:
+    			if i < 2:
+				title = title + string
+			i = i+1
+                doc.update({'title': title})
+		print title
+                return
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/3b54190e3f409380e109fae29e1917aa.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description, link, deldate,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
+++ b/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
 	def getColumns(self,columns):
 		(id, date, description, title, notes) = columns
-		return (id, date, description, title, notes)
+		return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/3d5871a44abbbc81ef5b3a420070755d.py
@@ -1,1 +1,47 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(class_ = "inner-column").table       
+        def getRows(self,table):
+                return table.tbody.find_all('tr',recursive=False)
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+        def getDate(self, content, entry, doc):
+		i = 0
+		date = ""
+		for string in content.stripped_strings:
+    			if i ==1:
+				date = string
+			i = i+1
+                edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+                print edate
+                doc.update({'date': edate})
+                return
+   	def getTitle(self, content, entry, doc):
+		i = 0
+		title = ""
+		for string in content.stripped_strings:
+    			if i < 2:
+				title = title + string
+			i = i+1
+                doc.update({'title': title})
+		#print title
+                return
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/41a166419503bb50e410c58be54c102f.txt
@@ -1,1 +1,1 @@
-
+aspx

--- /dev/null
+++ b/documents/scrapers/4934000fddd6a5b1094f398798341290.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/53b14397c8f27c29ff07b6319f7a0ec5.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py
+++ b/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py
@@ -10,7 +10,7 @@
                 return soup.find(class_ = "ms-rtestate-field").table
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
         def getLinks(self, content, entry, doc):
 		link = None

--- /dev/null
+++ b/documents/scrapers/55b69726fde4b4898ecf6d7217d1d1d2.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/5716ce0aacfe98f7d638b7a66b7f1040.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (date, id, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/5d05365e981d87e746b596d63e35b1dc.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.txt
@@ -1,1 +1,2 @@
+see parent dhs
 

--- /dev/null
+++ b/documents/scrapers/649b053f5e2884906ddc7174c2cd4b38.py
@@ -1,1 +1,28 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2011-12-financial-year"
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2010-11-financial-year"
+    si.doScrape()
+
+

--- /dev/null
+++ b/documents/scrapers/6cf3870aedeeecfd6394b5c0abed4c55.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "content_div_50269").table
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.txt
+++ /dev/null
@@ -1,19 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-        def getTable(self,soup):
-                return soup.find(id = "content_div_50269").table
-        def getColumns(self,columns):
-                (id, date, title, description, notes) = columns
-                return (id, date, title, description, notes)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-

--- a/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py
+++ b/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/77f02f713e3c37bff73882fb90828379.py
@@ -1,1 +1,22 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find("table",width="571")
+#findAll("table")[3]
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description,link,deldate,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/795c33ed030293dbdb155c909ea47e37.py
+++ b/documents/scrapers/795c33ed030293dbdb155c909ea47e37.py
@@ -10,7 +10,7 @@
                 return 7
         def getColumns(self,columns):
                 (id, date, title, description, notes, deletedate, otherinfo) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         #def getTable(self,soup):
         #        return soup.find(class_ = "box").table
 

--- a/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py
+++ b/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- a/documents/scrapers/8ef0e5802f99800f514b3a148e013b75.py
+++ b/documents/scrapers/8ef0e5802f99800f514b3a148e013b75.py
@@ -12,7 +12,7 @@
                 return soup.find(class_ = "content").table
         def getColumns(self,columns):
                 (id, date, title, description) = columns
-                return (id, date, description, title, None)
+                return (id, date, title, description, None)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/905a1c409b6afb1de0074b13a5559560.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/9f5cd66dea3e2ec958c17e28b27e60a7.txt
@@ -1,1 +1,2 @@
+acma style
 

--- /dev/null
+++ b/documents/scrapers/ad033512610d8e36886ab6a795f26561.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "_ctl0__ctl0_MainContentPlaceHolder_MainContentPlaceHolder_ContentSpan").findAll("table")[3]
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/b0a3281ba66efe173c5a33d5ef90ff76.txt
@@ -1,1 +1,2 @@
+multipage immi
 

--- /dev/null
+++ b/documents/scrapers/b506b87c8ee9e3a7ea8007914078c741.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 6
+        def getColumns(self,columns):
+                (id, date, title, description,link,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(class_ = "ms-rtestate-field").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.txt
+++ /dev/null
@@ -1,20 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-       
-        def getColumnCount(self):
-                return 4
-        def getColumns(self,columns):
-                (id, date, title, description) = columns
-                return (id, date, title, description, None)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-

--- a/documents/scrapers/c43ca6780764f4e61918e8836be74420.py
+++ b/documents/scrapers/c43ca6780764f4e61918e8836be74420.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
 	def getColumns(self,columns):
 		(id, date, title,description,notes) = columns
-		return (id, date, description, title, notes)
+		return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/cb7f40e3495b682de6eee61bf09c1cfc.txt
@@ -1,1 +1,2 @@
+no log
 

--- /dev/null
+++ b/documents/scrapers/dae7e934f1c341ccc9547a89a8af917e.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/dfd7414bb0c21a0076ab559901ae0588.py
+++ b/documents/scrapers/dfd7414bb0c21a0076ab559901ae0588.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         def getTable(self,soup):
                 return soup.find(class_ = "content")
 

--- a/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py
+++ b/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericRSSDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "genericContent").table.tbody
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (id,  date,title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.txt
+++ /dev/null
@@ -1,21 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-        def getTable(self,soup):
-                return soup.find(id = "genericContent").table.tbody
-        def getColumnCount(self):
-                return 5
-        def getColumns(self,columns):
-                (id,  date,title, description, notes) = columns
-                return (id, date, title, description, notes)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-

--- a/documents/scrapers/f2ab2908d8ee56ed8d995ef4187e75e6.py
+++ b/documents/scrapers/f2ab2908d8ee56ed8d995ef4187e75e6.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         def getTable(self,soup):
                 return soup.find(id = "content").table
 

--- a/documents/scrapers/rtk.py
+++ b/documents/scrapers/rtk.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericRSSDisclogScraper)

--- /dev/null
+++ b/documents/sitemap.xml.php
@@ -1,1 +1,25 @@
+<?php
 
+include ('../include/common.inc.php');
+$last_updated = date('Y-m-d', @filemtime('cbrfeed.zip'));
+header("Content-Type: text/xml");
+echo "<?xml version='1.0' encoding='UTF-8'?>";
+echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
+echo " <url><loc>" . local_url() . "index.php</loc><priority>1.0</priority></url>\n";
+foreach (scandir("./") as $file) {
+    if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php")
+        echo " <url><loc>" . local_url() . "$file</loc><priority>0.6</priority></url>\n";
+}
+
+$db = $server->get_db('disclosr-foidocuments');
+try {
+    $rows = $db->get_view("app", "all")->rows;
+    foreach ($rows as $row) {
+        echo '<url><loc>' . local_url() . 'view.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+echo '</urlset>';
+?>
+

--- a/documents/template.inc.php
+++ b/documents/template.inc.php
@@ -130,21 +130,23 @@
 }
 
 function displayLogEntry($row, $idtoname) {
-    echo "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description);
+    $result = "";
+    $result .= "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description);
 if (isset($row->value->notes)) {
-echo " <br>Note: ".$row->value->notes;
+$result .= " <br>Note: ".$row->value->notes;
 }
-echo "</p>";
+$result .= "</p>";
 
 if (isset($row->value->links)){
-echo "<h3>Links/Documents</h3><ul>";
+$result .= "<h3>Links/Documents</h3><ul>";
 foreach ($row->value->links as $link) {
-    echo "<li><a href='$link'>".$link."</a></li>";
+    $result .= "<li><a href='$link'>".$link."</a></li>";
 }
 
-        echo "</ul>";
+        $result .= "</ul>";
 }
-        echo "<small><A href='".$row->value->url."'>View original source...</a> ID: ".$row->value->docID."</small>";
-echo"</div>";
+        $result .= "<small><A href='".$row->value->url."'>View original source...</a> ID: ".$row->value->docID."</small>";
+$result .= "</div>";
+return $result;
 }
 

--- /dev/null
+++ b/documents/view.php
@@ -1,1 +1,27 @@
+<?php
+include('template.inc.php');
+include_header_documents("");
+include_once('../include/common.inc.php');
+?>
+<?php
 
+
+
+$agenciesdb = $server->get_db('disclosr-agencies');
+
+$idtoname = Array();
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+    $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+try {
+  $obj = new stdClass();
+    $obj->value = $foidocsdb->get($_REQUEST['id']);
+echo displayLogEntry($obj,$idtoname);
+
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+include_footer_documents();
+?>
+

--- a/documents/viewDocument.php
+++ b/documents/viewDocument.php
@@ -3,7 +3,13 @@
 include_once('../include/common.inc.php');
 $hash = $_REQUEST['hash'];
 $docsdb = $server->get_db('disclosr-documents');
+try {
 $doc = object_to_array($docsdb->get($hash));
+
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+
 
 if (!isset($doc['_attachments']) || count($doc['_attachments']) == 0) die ("no attachments");
 $attachments = $doc['_attachments'];
@@ -13,3 +19,4 @@
 //echo $url;
 $request = Requests::get($url);
 echo ($request->body);
+

--- a/lib/FeedItem.php
+++ /dev/null
@@ -1,168 +1,1 @@
-<?php
- /**
- * Univarsel Feed Writer
- * 
- * FeedItem class - Used as feed element in FeedWriter class
- *
- * @package         UnivarselFeedWriter
- * @author          Anis uddin Ahmad <anisniit@gmail.com>
- * @link            http://www.ajaxray.com/projects/rss
- */
- class FeedItem
- {
-	private $elements = array();    //Collection of feed elements
-	private $version;
-	
-	/**
-	* Constructor 
-	* 
-	* @param    contant     (RSS1/RSS2/ATOM) RSS2 is default. 
-	*/ 
-	function __construct($version = RSS2)
-	{    
-		$this->version = $version;
-	}
-	
-	/**
-	* Add an element to elements array
-	* 
-	* @access   public
-	* @param    srting  The tag name of an element
-	* @param    srting  The content of tag
-	* @param    array   Attributes(if any) in 'attrName' => 'attrValue' format
-	* @return   void
-	*/
-	public function addElement($elementName, $content, $attributes = null)
-	{
-		$this->elements[$elementName]['name']       = $elementName;
-		$this->elements[$elementName]['content']    = $content;
-		$this->elements[$elementName]['attributes'] = $attributes;
-	}
-	
-	/**
-	* Set multiple feed elements from an array. 
-	* Elements which have attributes cannot be added by this method
-	* 
-	* @access   public
-	* @param    array   array of elements in 'tagName' => 'tagContent' format.
-	* @return   void
-	*/
-	public function addElementArray($elementArray)
-	{
-		if(! is_array($elementArray)) return;
-		foreach ($elementArray as $elementName => $content) 
-		{
-			$this->addElement($elementName, $content);
-		}
-	}
-	
-	/**
-	* Return the collection of elements in this feed item
-	* 
-	* @access   public
-	* @return   array
-	*/
-	public function getElements()
-	{
-		return $this->elements;
-	}
-	
-	// Wrapper functions ------------------------------------------------------
-	
-	/**
-	* Set the 'dscription' element of feed item
-	* 
-	* @access   public
-	* @param    string  The content of 'description' element
-	* @return   void
-	*/
-	public function setDescription($description) 
-	{
-		$tag = ($this->version == ATOM)? 'summary' : 'description'; 
-		$this->addElement($tag, $description);
-	}
-	
-	/**
-	* @desc     Set the 'title' element of feed item
-	* @access   public
-	* @param    string  The content of 'title' element
-	* @return   void
-	*/
-	public function setTitle($title) 
-	{
-		$this->addElement('title', $title);  	
-	}
-	
-	/**
-	* Set the 'date' element of feed item
-	* 
-	* @access   public
-	* @param    string  The content of 'date' element
-	* @return   void
-	*/
-	public function setDate($date) 
-	{
-		if(! is_numeric($date))
-		{
-			$date = strtotime($date);
-		}
-		
-		if($this->version == ATOM)
-		{
-			$tag    = 'updated';
-			$value  = date(DATE_ATOM, $date);
-		}        
-		elseif($this->version == RSS2) 
-		{
-			$tag    = 'pubDate';
-			$value  = date(DATE_RSS, $date);
-		}
-		else                                
-		{
-			$tag    = 'dc:date';
-			$value  = date("Y-m-d", $date);
-		}
-		
-		$this->addElement($tag, $value);    
-	}
-	
-	/**
-	* Set the 'link' element of feed item
-	* 
-	* @access   public
-	* @param    string  The content of 'link' element
-	* @return   void
-	*/
-	public function setLink($link) 
-	{
-		if($this->version == RSS2 || $this->version == RSS1)
-		{
-			$this->addElement('link', $link);
-		}
-		else
-		{
-			$this->addElement('link','',array('href'=>$link));
-			$this->addElement('id', FeedWriter::uuid($link,'urn:uuid:'));
-		} 
-		
-	}
-	
-	/**
-	* Set the 'encloser' element of feed item
-	* For RSS 2.0 only
-	* 
-	* @access   public
-	* @param    string  The url attribute of encloser tag
-	* @param    string  The length attribute of encloser tag
-	* @param    string  The type attribute of encloser tag
-	* @return   void
-	*/
-	public function setEncloser($url, $length, $type)
-	{
-		$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
-		$this->addElement('enclosure','',$attributes);
-	}
-	
- } // end of class FeedItem
-?>
 

--- a/lib/FeedWriter.php
+++ /dev/null
@@ -1,435 +1,1 @@
-<?php
-// RSS 0.90  Officially obsoleted by 1.0
-// RSS 0.91, 0.92, 0.93 and 0.94  Officially obsoleted by 2.0
-// So, define constants for RSS 1.0, RSS 2.0 and ATOM 	
-
-	define('RSS1', 'RSS 1.0', true);
-	define('RSS2', 'RSS 2.0', true);
-	define('ATOM', 'ATOM', true);
-
- /**
- * Univarsel Feed Writer class
- *
- * Genarate RSS 1.0, RSS2.0 and ATOM Feed
- *                             
- * @package     UnivarselFeedWriter
- * @author      Anis uddin Ahmad <anisniit@gmail.com>
- * @link        http://www.ajaxray.com/projects/rss
- */
- class FeedWriter
- {
-	 private $channels      = array();  // Collection of channel elements
-	 private $items         = array();  // Collection of items as object of FeedItem class.
-	 private $data          = array();  // Store some other version wise data
-	 private $CDATAEncoding = array();  // The tag names which have to encoded as CDATA
-	 
-	 private $version   = null; 
-	
-	/**
-	* Constructor
-	* 
-	* @param    constant    the version constant (RSS1/RSS2/ATOM).       
-	*/ 
-	function __construct($version = RSS2)
-	{	
-		$this->version = $version;
-			
-		// Setting default value for assential channel elements
-		$this->channels['title']        = $version . ' Feed';
-		$this->channels['link']         = 'http://www.ajaxray.com/blog';
-				
-		//Tag names to encode in CDATA
-		$this->CDATAEncoding = array('description', 'content:encoded', 'summary');
-	}
-
-	// Start # public functions ---------------------------------------------
-	
-	/**
-	* Set a channel element
-	* @access   public
-	* @param    srting  name of the channel tag
-	* @param    string  content of the channel tag
-	* @return   void
-	*/
-	public function setChannelElement($elementName, $content)
-	{
-		$this->channels[$elementName] = $content ;
-	}
-	
-	/**
-	* Set multiple channel elements from an array. Array elements 
-	* should be 'channelName' => 'channelContent' format.
-	* 
-	* @access   public
-	* @param    array   array of channels
-	* @return   void
-	*/
-	public function setChannelElementsFromArray($elementArray)
-	{
-		if(! is_array($elementArray)) return;
-		foreach ($elementArray as $elementName => $content) 
-		{
-			$this->setChannelElement($elementName, $content);
-		}
-	}
-	
-	/**
-	* Genarate the actual RSS/ATOM file
-	* 
-	* @access   public
-	* @return   void
-	*/ 
-	public function genarateFeed()
-	{
-		header("Content-type: text/xml");
-		
-		$this->printHead();
-		$this->printChannels();
-		$this->printItems();
-		$this->printTale();
-	}
-	
-	/**
-	* Create a new FeedItem.
-	* 
-	* @access   public
-	* @return   object  instance of FeedItem class
-	*/
-	public function createNewItem()
-	{
-		$Item = new FeedItem($this->version);
-		return $Item;
-	}
-	
-	/**
-	* Add a FeedItem to the main class
-	* 
-	* @access   public
-	* @param    object  instance of FeedItem class
-	* @return   void
-	*/
-	public function addItem($feedItem)
-	{
-		$this->items[] = $feedItem;    
-	}
-	
-	
-	// Wrapper functions -------------------------------------------------------------------
-	
-	/**
-	* Set the 'title' channel element
-	* 
-	* @access   public
-	* @param    srting  value of 'title' channel tag
-	* @return   void
-	*/
-	public function setTitle($title)
-	{
-		$this->setChannelElement('title', $title);
-	}
-	
-	/**
-	* Set the 'description' channel element
-	* 
-	* @access   public
-	* @param    srting  value of 'description' channel tag
-	* @return   void
-	*/
-	public function setDescription($desciption)
-	{
-		$this->setChannelElement('description', $desciption);
-	}
-	
-	/**
-	* Set the 'link' channel element
-	* 
-	* @access   public
-	* @param    srting  value of 'link' channel tag
-	* @return   void
-	*/
-	public function setLink($link)
-	{
-		$this->setChannelElement('link', $link);
-	}
-	
-	/**
-	* Set the 'image' channel element
-	* 
-	* @access   public
-	* @param    srting  title of image
-	* @param    srting  link url of the imahe
-	* @param    srting  path url of the image
-	* @return   void
-	*/
-	public function setImage($title, $link, $url)
-	{
-		$this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));
-	}
-	
-	/**
-	* Set the 'about' channel element. Only for RSS 1.0
-	* 
-	* @access   public
-	* @param    srting  value of 'about' channel tag
-	* @return   void
-	*/
-	public function setChannelAbout($url)
-	{
-		$this->data['ChannelAbout'] = $url;    
-	}
-	
-  /**
-  * Genarates an UUID
-  * @author     Anis uddin Ahmad <admin@ajaxray.com>
-  * @param      string  an optional prefix
-  * @return     string  the formated uuid
-  */
-  public function uuid($key = null, $prefix = '') 
-  {
-	$key = ($key == null)? uniqid(rand()) : $key;
-	$chars = md5($key);
-	$uuid  = substr($chars,0,8) . '-';
-	$uuid .= substr($chars,8,4) . '-';
-	$uuid .= substr($chars,12,4) . '-';
-	$uuid .= substr($chars,16,4) . '-';
-	$uuid .= substr($chars,20,12);
-
-	return $prefix . $uuid;
-  }
-	// End # public functions ----------------------------------------------
-	
-	// Start # private functions ----------------------------------------------
-	
-	/**
-	* Prints the xml and rss namespace
-	* 
-	* @access   private
-	* @return   void
-	*/
-	private function printHead()
-	{
-		$out  = '<?xml version="1.0" encoding="utf-8"?>' . "\n";
-		
-		if($this->version == RSS2)
-		{
-			$out .= '<rss version="2.0"
-					xmlns:content="http://purl.org/rss/1.0/modules/content/"
-					xmlns:wfw="http://wellformedweb.org/CommentAPI/"
-				  >' . PHP_EOL;
-		}    
-		elseif($this->version == RSS1)
-		{
-			$out .= '<rdf:RDF 
-					 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-					 xmlns="http://purl.org/rss/1.0/"
-					 xmlns:dc="http://purl.org/dc/elements/1.1/"
-					>' . PHP_EOL;;
-		}
-		else if($this->version == ATOM)
-		{
-			$out .= '<feed xmlns="http://www.w3.org/2005/Atom">' . PHP_EOL;;
-		}
-		echo $out;
-	}
-	
-	/**
-	* Closes the open tags at the end of file
-	* 
-	* @access   private
-	* @return   void
-	*/
-	private function printTale()
-	{
-		if($this->version == RSS2)
-		{
-			echo '</channel>' . PHP_EOL . '</rss>'; 
-		}    
-		elseif($this->version == RSS1)
-		{
-			echo '</rdf:RDF>';
-		}
-		else if($this->version == ATOM)
-		{
-			echo '</feed>';
-		}
-	  
-	}
-
-	/**
-	* Creates a single node as xml format
-	* 
-	* @access   private
-	* @param    srting  name of the tag
-	* @param    mixed   tag value as string or array of nested tags in 'tagName' => 'tagValue' format
-	* @param    array   Attributes(if any) in 'attrName' => 'attrValue' format
-	* @return   string  formatted xml tag
-	*/
-	private function makeNode($tagName, $tagContent, $attributes = null)
-	{        
-		$nodeText = '';
-		$attrText = '';
-
-		if(is_array($attributes))
-		{
-			foreach ($attributes as $key => $value) 
-			{
-				$attrText .= " $key=\"$value\" ";
-			}
-		}
-		
-		if(is_array($tagContent) && $this->version == RSS1)
-		{
-			$attrText = ' rdf:parseType="Resource"';
-		}
-		
-		
-		$attrText .= (in_array($tagName, $this->CDATAEncoding) && $this->version == ATOM)? ' type="html" ' : '';
-		$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "<{$tagName}{$attrText}><![CDATA[" : "<{$tagName}{$attrText}>";
-		 
-		if(is_array($tagContent))
-		{ 
-			foreach ($tagContent as $key => $value) 
-			{
-				$nodeText .= $this->makeNode($key, $value);
-			}
-		}
-		else
-		{
-			$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent);
-		}           
-			
-		$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>";
-
-		return $nodeText . PHP_EOL;
-	}
-	
-	/**
-	* @desc     Print channels
-	* @access   private
-	* @return   void
-	*/
-	private function printChannels()
-	{
-		//Start channel tag
-		switch ($this->version) 
-		{
-		   case RSS2: 
-				echo '<channel>' . PHP_EOL;        
-				break;
-		   case RSS1: 
-				echo (isset($this->data['ChannelAbout']))? "<channel rdf:about=\"{$this->data['ChannelAbout']}\">" : "<channel rdf:about=\"{$this->channels['link']}\">";
-				break;
-		}
-		
-		//Print Items of channel
-		foreach ($this->channels as $key => $value) 
-		{
-			if($this->version == ATOM && $key == 'link') 
-			{
-				// ATOM prints link element as href attribute
-				echo $this->makeNode($key,'',array('href'=>$value));
-				//Add the id for ATOM
-				echo $this->makeNode('id',$this->uuid($value,'urn:uuid:'));
-			}
-			else
-			{
-				echo $this->makeNode($key, $value);
-			}    
-			
-		}
-		
-		//RSS 1.0 have special tag <rdf:Seq> with channel 
-		if($this->version == RSS1)
-		{
-			echo "<items>" . PHP_EOL . "<rdf:Seq>" . PHP_EOL;
-			foreach ($this->items as $item) 
-			{
-				$thisItems = $item->getElements();
-				echo "<rdf:li resource=\"{$thisItems['link']['content']}\"/>" . PHP_EOL;
-			}
-			echo "</rdf:Seq>" . PHP_EOL . "</items>" . PHP_EOL . "</channel>" . PHP_EOL;
-		}
-	}
-	
-	/**
-	* Prints formatted feed items
-	* 
-	* @access   private
-	* @return   void
-	*/
-	private function printItems()
-	{    
-		foreach ($this->items as $item) 
-		{
-			$thisItems = $item->getElements();
-			
-			//the argument is printed as rdf:about attribute of item in rss 1.0 
-			echo $this->startItem($thisItems['link']['content']);
-			
-			foreach ($thisItems as $feedItem ) 
-			{
-				echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']); 
-			}
-			echo $this->endItem();
-		}
-	}
-	
-	/**
-	* Make the starting tag of channels
-	* 
-	* @access   private
-	* @param    srting  The vale of about tag which is used for only RSS 1.0
-	* @return   void
-	*/
-	private function startItem($about = false)
-	{
-		if($this->version == RSS2)
-		{
-			echo '<item>' . PHP_EOL; 
-		}    
-		elseif($this->version == RSS1)
-		{
-			if($about)
-			{
-				echo "<item rdf:about=\"$about\">" . PHP_EOL;
-			}
-			else
-			{
-				die('link element is not set .\n It\'s required for RSS 1.0 to be used as about attribute of item');
-			}
-		}
-		else if($this->version == ATOM)
-		{
-			echo "<entry>" . PHP_EOL;
-		}    
-	}
-	
-	/**
-	* Closes feed item tag
-	* 
-	* @access   private
-	* @return   void
-	*/
-	private function endItem()
-	{
-		if($this->version == RSS2 || $this->version == RSS1)
-		{
-			echo '</item>' . PHP_EOL; 
-		}    
-		else if($this->version == ATOM)
-		{
-			echo "</entry>" . PHP_EOL;
-		}
-	}
-	
-
-	
-	// End # private functions ----------------------------------------------
-	
- } // end of class FeedWriter
- 
-// autoload classes
-function __autoload($class_name) 
-{
-	require_once $class_name . '.php';
-}
+