ausaid scraper
ausaid scraper


Former-commit-id: d2ca4331376808c570b588bad9a3c97df495d510

--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -1,4 +1,5 @@
 <?php
+
 include('template.inc.php');
 include_header_documents("");
 include_once('../include/common.inc.php');
@@ -14,24 +15,31 @@
     if ($rows) {
         foreach ($rows as $row) {
 
-            echo "<tr><td><b>" . $row->value->name . "</b> (".$row->id.")</td>\n";
-            
-             echo "<td>";
-             if (isset($row->value->FOIDocumentsURL)) {
-                 echo '<a href="viewDocument.php?hash='.md5($row->value->FOIDocumentsURL).'">'
-                     .$row->value->FOIDocumentsURL.'</a>';
-             } else {
+            echo "<tr><td><b>" . $row->value->name . "</b>";
+            if ($ENV == "DEV")
+                echo "<br>(" . $row->id . ")";
+            echo "</td>\n";
+
+
+            echo "<td>";
+            if (isset($row->value->FOIDocumentsURL)) {
+                echo '<a href="' . $row->value->FOIDocumentsURL . '">'
+                . $row->value->FOIDocumentsURL . '</a>';
+                if ($ENV == "DEV")
+                    echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
+                    . 'view local copy</a>)</small>';
+            } else {
                 echo "<font color='red'>✘</font>";
             }
             echo "</td>\n<td>";
             if (isset($row->value->FOIDocumentsURL)) {
-            if (file_exists("./scrapers/".$row->id.'.py')) {
-                echo "<font color='green'>✔</font>";
-            } else if (file_exists("./scrapers/".$row->id.'.txt')){
-                echo "<font color='blue'><b>▬</b></font>";
-            } else {
-                echo "<font color='red'>✘</font>";
-            }
+                if (file_exists("./scrapers/" . $row->id . '.py')) {
+                    echo "<font color='green'>✔</font>";
+                } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
+                    echo "<font color='blue'><b>▬</b></font>";
+                } else {
+                    echo "<font color='red'>✘</font>";
+                }
             }
             echo "</td></tr>\n";
         }

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -82,8 +82,21 @@
                 	descriptiontxt = descriptiontxt + " \n" + string
                 doc.update({'description': descriptiontxt})
 		return
+        def getTitle(self, content, entry, doc):
+                doc.update({'title': content.string})
+		return
 	def getTable(self, soup):
 		return soup.table
+	def getDate(self, content, entry, doc):
+                dtresult = cal.parseDateText(content.string)
+		if len(dtresult) == 2:
+			(dtdate,dtr) = dtresult
+                      	edate = ""+str(dtdate[0])+'-'+str(dtdate[1])+'-'+str(dtdate[2])
+		else:
+			edate = datetime.strptime(date.string.strip(), "%d %B %Y").strftime("%Y-%m-%d")
+		print edate
+		doc.update({'date': edate})
+		return
 
 	def doScrape(self):
 		cal = pdt.Calendar()
@@ -111,19 +124,13 @@
 							
 						if doc == None:
 							print "saving"
-                                                        dtresult = cal.parseDateText(date.string)
-							if len(dtresult) == 2:
-								(dtdate,dtr) = dtresult
-								print dtdate
-                                                        	edate = ""+str(dtdate[0])+'-'+str(dtdate[1])+'-'+str(dtdate[2])
-							else:
-								edate = datetime.strptime(date.string.strip(), "%d %B %Y").strftime("%Y-%m-%d")
-							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': id.string,
-			 				 "date": edate,"title": title.string}
+							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': id.string}
                                 			if links != []:
                                         			doc.update({'links': links})
+                                			self.getTitle(title,row, doc)
+                                			self.getDate(date,row, doc)
 							self.getDescription(description,row, doc)
-                                			if notes != None:
+							if notes != None:
                                         			doc.update({ 'notes': notes.string})
 							foidocsdb.save(doc)
 						else:

--- a/getAgency.php
+++ b/getAgency.php
@@ -119,7 +119,7 @@
 // edit?
 
     $obj = $db->get($_REQUEST['id']);
-    include_header($obj->name);
+    include_header(isset($obj->name) ? $obj->name : "");
 //print_r($row);
     if (sizeof($_POST) > 0) {
 //print_r($_POST);
@@ -200,14 +200,6 @@
 // show all list
     include_header('Agencies');
     try {
-        /* $rows = $db->get_view("app", "showNamesABNs")->rows;
-          //print_r($rows);
-          foreach ($rows as $row) {
-          //   print_r($row);
-          echo '<li><a href="getAgency.php?id=' . $row->key . '">' .
-          (isset($row->value->name) && $row->value->name != "" ? $row->value->name : "NO NAME " . $row->value->abn)
-          . '</a></li>';
-          } */
         $rows = $db->get_view("app", "byCanonicalName")->rows;
         //print_r($rows);
         echo '<ul>';
@@ -215,7 +207,7 @@
             //   print_r($row);
             echo '<li itemscope itemtype="http://schema.org/GovernmentOrganization" typeof="schema:GovernmentOrganization foaf:Organization" about="getAgency.php?id=' . $row->value->_id . '">
 <a href="getAgency.php?id=' . $row->value->_id . '" rel="schema:url foaf:page" property="schema:name foaf:name" itemprop="url"><span itemprop="name">' .
-            $row->value->name
+            (isset($row->value->name) ? $row->value->name : "ERROR NAME MISSING") 
             . '</span></a></li>';
         }
         echo "</ul>";

--- a/include/common.inc.php
+++ b/include/common.inc.php
@@ -17,7 +17,7 @@
 require_once $basePath.'lib/Requests/library/Requests.php';
 
 Requests::register_autoloader();
-
+$ENV = "DEV";
 if (isset($_SERVER['SERVER_NAME']) && $_SERVER['SERVER_NAME'] != 'localhost') {
 
     require $basePath."lib/amon-php/amon.php";
@@ -25,6 +25,7 @@
                     'protocol' => 'http', 
                     'secret_key' => "I2LJ6dOMmlnXgVAkTPFXd5M3ejkga8Gd2FbBt6iqZdw"));
     Amon::setup_exception_handler();
+    $ENV = "PROD";
 }
 
 # Convert a stdClass to an Array. http://www.php.net/manual/en/language.types.object.php#102735