fix sitemap
fix sitemap


Former-commit-id: ddc69719d5b15a16ac0ec2110cd9fdd9fb44cc0e

--- a/admin/refreshDesignDoc.php
+++ b/admin/refreshDesignDoc.php
@@ -8,7 +8,7 @@
 $obj->_id = "_design/" . urlencode("app");
 $obj->language = "javascript";
 $obj->views->all->map = "function(doc) {   emit(doc._id, doc); };";
-$obj->views->byDate->map = "function(doc) {   emit(doc.date, doc); };";
+$obj->views->byDate->map = "function(doc) {  if (doc.title != "Disclosure Log Updated") { emit(doc.date, doc); } };";
 $obj->views->byDateMonthYear->map = "function(doc) {   emit(doc.date, doc); };";
 $obj->views->byDateMonthYear->reduce = "_count";
 $obj->views->byAgencyID->map = "function(doc) {   emit(doc.agencyID, doc); };";

--- a/documents/agency.php
+++ b/documents/agency.php
@@ -19,7 +19,7 @@
     <br>
 <?php
 try {
-    if ($_REQUEST['id']) {
+    if (isset($_REQUEST['id'])) {
         $rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows;
         foreach ($rows as $row) {
             //print_r($rows);
@@ -42,3 +42,4 @@
 echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>";
 include_footer_documents();
 ?>
+

--- a/documents/charts.php
+++ b/documents/charts.php
@@ -16,7 +16,7 @@
     <h4 class="subheader"></h4>
 </div>
 <div id="bydate" style="width:1000px;height:300px;"></div>
-<div id="byagency" style="width:1200px;height:800px;"></div>
+<div id="byagency" style="width:1000px;height:1400px;"></div>
 <script id="source">
     window.onload = function () {
         $(document).ready(function () {

--- a/documents/datagov-export-groups.py
+++ b/documents/datagov-export-groups.py
@@ -10,7 +10,11 @@
 # Instantiate the CKAN client.
 #ckan = ckanclient.CkanClient(base_location='http://localhost:5000/api',    api_key='b47b24cd-591d-40c1-8677-d73101d56d1b')
 api_key = 'ff34526e-f794-4068-8235-fcbba38cd8bc'
-ckan = ckanclient.CkanClient(base_location='http://data.disclosurelo.gs/api',
+server = 'data.disclosurelo.gs'
+api_key = 'c30eb6f5-0f90-47e0-bf05-9b1b4e3a461a'
+server = 'ckan.data.gov.au'
+
+ckan = ckanclient.CkanClient(base_location='http://' + server + '/api',
                              api_key=api_key)
 couch = couchdb.Server('http://127.0.0.1:5984/')
 #couch = couchdb.Server('http://192.168.1.113:5984/')

--- a/documents/datagov-export.py
+++ b/documents/datagov-export.py
@@ -14,6 +14,7 @@
 
 import tempfile
 def add_package_resource_cachedurl(ckan, package_name, url, name, format, license_id, size,**kwargs):
+    excluded_apis = "recent-earthquakes,sentinel-hotspots,abc-local-stations,action-bus-service-gtfs-feed-act,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,journey-planner-data-act,live-traffic-cameras-nsw,precis-forecast-national,precis-forecast-national,precis-forecast-new-south-wales,precis-forecast-new-south-wales,precis-forecast-northern-territory,precis-forecast-northern-territory,precis-forecast-queensland,precis-forecast-queensland,precis-forecast-south-australia,precis-forecast-south-australia,precis-forecast-tasmania,precis-forecast-tasmania,precis-forecast-victoria,precis-forecast-victoria,precis-forecast-western-australia,precis-forecast-western-australia,register-of-penalty-notices-nsw,sentinel-hotspots,trove-people-and-organisations-data,weather-data-services-radar,abc-local-stations,act-emergency-services-agency-esa-28093-current-incidents,act-emergency-services-agency-esa-news-alerts,act-government-news-and-events,act-government-summaries-of-cabinet-outcomes,act-magistrates-court-judgements,act-supreme-court-judgements,act-supreme-court-sentences,actpla-latest-news,all-vacant-act-government-jobs,community-engagement-current-engagements,community-engagement-news,edd-media-releases,edd-news-and-events,freedom-of-information-foi-summaries,libraries-act-announcements,nsw-rural-fire-service-current-incidents,nsw-rural-fire-service-major-updates,precis-forecast-new-south-wales,precis-forecast-south-australia,precis-forecast-tasmania,precis-forecast-victoria,sentinel-hotspots,south-australian-road-crash-statistics,trove-people-and-organisations-data,weather-warnings-for-new-south-wales-australian-capital-territory,weather-warnings-for-northern-territory,weather-warnings-for-queensland,weather-warnings-for-south-australia,weather-warnings-for-tasmania,weather-warnings-for-victoria,weather-warnings-for-western-australia".split(",")
     if "xls" in url:
 	format = "xls"
     if "pdf" in url:
@@ -27,8 +28,12 @@
     if mime_type in ["application/xlsx","application/x-xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]:
 	format = "xlsx"
 
-    if content != None:
-	    tf = tempfile.NamedTemporaryFile(delete=False)
+    #put file extensions on for windows users downloading files
+    suffix = name.encode("ascii","ignore").replace("/","")
+    if len(suffix) < 5 or (suffix[-4] != "." and suffix[-5] != "."):
+	suffix = suffix + "." + format
+    if content != None and package_name not in excluded_apis:
+	    tf = tempfile.NamedTemporaryFile(suffix=suffix)
 	    tfName = os.path.abspath(tf.name)
 	    print tfName
 	    tf.seek(0)
@@ -196,7 +201,7 @@
             pkg_name = filter(lambda x: x in '0123456789abcdefghijklmnopqrstuvwxyz-_',
                               doc.value['url'].replace("http://data.gov.au/dataset/", '').replace('/', '')[:100]);
             print pkg_name
-            if pkg_name != "":
+            if pkg_name != "" :
 
                 #add to or create organization using direct API
                 agency = doc.value['metadata']["Agency"]
@@ -262,7 +267,7 @@
                         'author': creator,
                         'maintainer': creator,
                         'license_id': get_license_id(doc.value['metadata']['DCTERMS.License']),
-                        'notes': html2text.html2text(doc.value['metadata']['Description']).replace('AC/a!a','-').replace('AC/a!aC/',"'").replace("AC/a!E",":")replace("A "," "),
+                        'notes': html2text.html2text(doc.value['metadata']['Description']).replace('AC/a!a','-').replace('AC/a!aC/',"'").replace("AC/a!E",":").replace("A "," "),
                         'owner_org': org_id,
                         'extras': extras,
                         'private': (pkg_name not in goodcsvdata and pkg_name not in goodotherdata)

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -202,7 +202,7 @@
     def getDate(self, content, entry, doc):
         strdate = ''.join(content.stripped_strings).strip()
         (a, b, c) = strdate.partition("(")
-        strdate = self.remove_control_chars(a.replace("Octber", "October").replace("1012","2012")replace("Janrurary", "January").replace("1012","2012"))
+        strdate = self.remove_control_chars(a.replace("Octber", "October").replace("1012","2012").replace("Janrurary", "January").replace("1012","2012"))
         print strdate
         try:
 		edate = parse(strdate, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
@@ -241,6 +241,7 @@
                         print self.remove_control_chars(
                             ''.join(id.stripped_strings))
                         if id.string is None:
+			    print "no id, using date as hash"
                             dochash = scrape.mkhash(
                                 self.remove_control_chars(
                                     url + (''.join(date.stripped_strings))))

--- a/documents/runScrapers.sh
+++ b/documents/runScrapers.sh
@@ -1,7 +1,8 @@
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+echo $DIR
 cd $DIR
 echo "" > /tmp/disclosr-error
-for f in scrapers/*.py; do
+for f in $DIR/scrapers/*.py; do
 	echo "Processing $f file..";
 	md5=`md5sum /tmp/disclosr-error`
 	python $f 3>&1 1>&2 2>&3 | tee --append /tmp/disclosr-error;

--- a/documents/scrapers/f5ce2d1651739704634eb8ca4b2b46d3.py
+++ b/documents/scrapers/f5ce2d1651739704634eb8ca4b2b46d3.py
@@ -7,12 +7,12 @@
 #http://www.doughellmann.com/PyMOTW/abc/
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getTable(self,soup):
-                return soup.find(id = "ctl00_PlaceHolderMain_PublishingPageContent__ControlWrapper_RichHtmlField").table       
+                return soup.find(id = "block-system-main").table       
         def getColumnCount(self):
-                return 7
+                return 2
         def getColumns(self,columns):
-                (id, date, title, description,link,deldate, notes) = columns
-                return (id, date, title, description, notes)
+                (date, title) = columns
+                return (date, date, title, title, None)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- a/documents/sitemap.xml.php
+++ b/documents/sitemap.xml.php
@@ -1,30 +1,48 @@
 <?php
 
 include ('../include/common.inc.php');
-$last_updated = date('Y-m-d', @filemtime('cbrfeed.zip'));
 header("Content-Type: text/xml");
 echo "<?xml version='1.0' encoding='UTF-8'?>";
 echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
 echo " <url><loc>" . local_url() . "index.php</loc><priority>1.0</priority></url>\n";
 foreach (scandir("./") as $file) {
-    if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php")
+    if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php") {
         echo " <url><loc>" . local_url() . "$file</loc><priority>0.6</priority></url>\n";
+	}
 }
 $agenciesdb = $server->get_db('disclosr-agencies');
+$foidocsdb = $server->get_db('disclosr-foidocuments');
 try {
     $rows = $agenciesdb->get_view("app", "byCanonicalName")->rows;
     foreach ($rows as $row) {
         echo '<url><loc>' . local_url() . 'agency.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
     }
+	unset($rows);
+	$rows = null;
 } catch (SetteeRestClientException $e) {
     setteErrorHandler($e);
 }
-$foidocsdb = $server->get_db('disclosr-foidocuments');
+
+foreach (range(0, 8) as $number) {
 try {
-    $rows = $foidocsdb->get_view("app", "all")->rows;
+    $rows = $foidocsdb->get_view("app", "all", Array($number,$number+1))->rows;
     foreach ($rows as $row) {
         echo '<url><loc>' . local_url() . 'view.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
     }
+	unset($rows);
+	$rows = null;
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+}
+
+try {
+    $rows = $foidocsdb->get_view("app", "all", Array('9','fffffffff'))->rows;
+    foreach ($rows as $row) {
+        echo '<url><loc>' . local_url() . 'view.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
+    }
+	unset($rows);
+	$rows = null;
 } catch (SetteeRestClientException $e) {
     setteErrorHandler($e);
 }

--- a/include/common.inc.php
+++ b/include/common.inc.php
@@ -10,6 +10,7 @@
         || strstr($_SERVER['PHP_SELF'], "documents/")
 	|| $_SERVER['SERVER_NAME'] == "disclosurelo.gs"
 	|| $_SERVER['SERVER_NAME'] == "www.disclosurelo.gs"
+	|| $_SERVER['SERVER_NAME'] == "direct.disclosurelo.gs"
         )
     $basePath = "../";
 

directory:a/js/bubbletree -> directory:b/js/bubbletree
--- a/js/bubbletree
+++ b/js/bubbletree

directory:a/js/flotr2 -> directory:b/js/flotr2
--- a/js/flotr2
+++ b/js/flotr2

directory:a/js/sigma -> directory:b/js/sigma
--- a/js/sigma
+++ b/js/sigma

directory:a/lib/amon-php -> directory:b/lib/amon-php
--- a/lib/amon-php
+++ b/lib/amon-php

directory:a/lib/phpquery -> directory:b/lib/phpquery
--- a/lib/phpquery
+++ b/lib/phpquery