From: Maxious Date: Sun, 21 Oct 2012 07:22:30 +0000 Subject: beginnings of document scrapers mark 2 X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=ca45e4ee475fe30f7acd3c511dbdd6e19fd9683f --- beginnings of document scrapers mark 2 Former-commit-id: fc862f3bada775436406e0ee8adcd4e7d04ac8f6 --- --- a/.gitmodules +++ b/.gitmodules @@ -25,7 +25,7 @@ [submodule "lib/querypath"] path = lib/querypath url = https://github.com/technosophos/querypath.git -[submodule "sigma.js"] - path = sigma.js - url = https://github.com/jacomyal/sigma.js.git +[submodule "lib/amon-php"] + path = lib/amon-php + url = https://github.com/martinrusev/amon-php.git --- a/alaveteli/exportAgencies.csv.php +++ b/alaveteli/exportAgencies.csv.php @@ -52,18 +52,23 @@ $row["request_email"] = (isset($agency->value->foiEmail) ? $agency->value->foiEmail : ""); $row["short_name"] = (isset($agency->value->shortName) ? $agency->value->shortName : ""); $row["notes"] = (isset($agency->value->description) ? $agency->value->description : ""); - + $otherBodies = Array(); if (isset($agency->value->foiBodies)) { $otherBodies = array_merge($otherBodies, $agency->value->foiBodies); } if (isset($agency->value->positions)) { - $otherBodies = array_merge($otherBodies, $agency->value->positions); + $positions = Array(); + foreach ($agency->value->positions as $position) { + $positions[] = "Office of the ".$position; + } + $otherBodies = array_merge($otherBodies, $positions); } + sort($otherBodies); if (count($otherBodies) > 0) { - $row["notes"] .= "
This department also responds to requests for information held by ".implode(",",$otherBodies); + $row["notes"] .= "
This department also responds to requests for information held by " . implode(", ", $otherBodies); } - + $row["publication_scheme"] = (isset($agency->value->infoPublicationSchemeURL) ? $agency->value->infoPublicationSchemeURL : ""); $row["home_page"] = (isset($agency->value->website) ? $agency->value->website : ""); if ($agency->value->orgType == "FMA-DepartmentOfState") { @@ -74,9 +79,6 @@ $row["tag_string"] .= " " . $agency->value->orgType; $row["tag_string"] .= " federal"; fputcsv($fp, array_values($row)); - - - } } } catch (SetteeRestClientException $e) { --- /dev/null +++ b/documents/google676a414ad086cefb.html @@ -1,1 +1,2 @@ +google-site-verification: google676a414ad086cefb.html --- /dev/null +++ b/documents/index.php @@ -1,1 +1,15 @@ +australian disclosure logs +are you looking for more information about: +contracts +gov orgs +lobbyists + +1/1/11 title (Dept dfggdfgdf) +description: +source link: +documents: + #1 title link + + +rss feed here --- /dev/null +++ b/documents/rss.xml.php @@ -1,1 +1,30 @@ +setTitle('Last Modified - All'); +$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php'); +$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); +//Retriving informations from database +$rows = $db->get_view("app", "byLastModified")->rows; +//print_r($rows); +foreach ($rows as $row) { + //Create an empty FeedItem + $newItem = $TestFeed->createNewItem(); + //Add elements to the feed item + $newItem->setTitle($row['name']); + $newItem->setLink($row['id']); + $newItem->setDate(date("c", $row['metadata']['lastModified'])); + $newItem->setDescription($row['name']); + //Now add the feed item + $TestFeed->addItem($newItem); +} +//OK. Everything is done. Now genarate the feed. +$TestFeed->genarateFeed(); +?> --- /dev/null +++ b/documents/runScrapers.php --- /dev/null +++ b/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py @@ -1,1 +1,1 @@ - +http://www.apvma.gov.au/rss/disclosure.rss --- /dev/null +++ b/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py @@ -1,1 +1,1 @@ - +http://www.daff.gov.au/about/foi/ips/disclosure_log --- /dev/null +++ b/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py @@ -1,1 +1,2 @@ +http://foi.deewr.gov.au/disclosure-log/rss --- /dev/null +++ b/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py @@ -1,1 +1,1 @@ - +www.finance.gov.au/foi/disclosure-log/foi-rss.xml --- /dev/null +++ b/documents/scrapers/rtk.py @@ -1,1 +1,1 @@ - +http://www.righttoknow.org.au/feed/search/%20(latest_status:successful%20OR%20latest_status:partially_successful) --- /dev/null +++ b/documents/search.php @@ -1,1 +1,25 @@ + +
+

Search

+
+
+ + +
+body); + $db = $server->get_db('disclosr-documents'); + foreach ($results->rows as $result) { + //print_r($result); + //$row = $db->get($result->id); + echo $result->doc->_id." ".$result->doc->url."
".PHP_EOL; + } +} +include_footer(); +?> --- a/getAgency.php +++ b/getAgency.php @@ -9,7 +9,11 @@ if (strpos($key, "_") === 0 || $key== "metadata") return; echo ""; - echo "" . $schemas['agency']["properties"][$key]['x-title'] . "
" . $schemas['agency']["properties"][$key]['description'] . ""; + echo ""; + if (isset($schemas['agency']["properties"][$key])) { + echo $schemas['agency']["properties"][$key]['x-title'] . "
" . $schemas['agency']["properties"][$key]['description'].""; + } + echo ""; if (is_array($value)) { echo "
    "; foreach ($value as $subkey => $subvalue) { --- a/include/common.inc.php +++ b/include/common.inc.php @@ -14,6 +14,11 @@ require_once $basePath.'lib/Requests/library/Requests.php'; Requests::register_autoloader(); +require $basePath."lib/amon-php/amon.php"; +Amon::config(array('address'=> 'http://127.0.0.1:2464', + 'protocol' => 'http', + 'secret_key' => "I2LJ6dOMmlnXgVAkTPFXd5M3ejkga8Gd2FbBt6iqZdw")); +Amon::setup_exception_handler(); # Convert a stdClass to an Array. http://www.php.net/manual/en/language.types.object.php#102735 function object_to_array(stdClass $Class) { --- a/include/couchdb.inc.php +++ b/include/couchdb.inc.php @@ -168,6 +168,7 @@ $server = new SetteeServer($serverAddr); function setteErrorHandler($e) { + Amon::log($e->getMessage() . " " . print_r($_SERVER,true), array('error')); echo $e->getMessage() . "
    " . PHP_EOL; } --- /dev/null +++ b/lib/amon-php --- a/rss.xml.php +++ /dev/null @@ -1,30 +1,1 @@ -setTitle('Last Modified - All'); -$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php'); -$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); -//Retriving informations from database -$rows = $db->get_view("app", "byLastModified")->rows; -//print_r($rows); -foreach ($rows as $row) { - //Create an empty FeedItem - $newItem = $TestFeed->createNewItem(); - //Add elements to the feed item - $newItem->setTitle($row['name']); - $newItem->setLink($row['id']); - $newItem->setDate(date("c", $row['metadata']['lastModified'])); - $newItem->setDescription($row['name']); - //Now add the feed item - $TestFeed->addItem($newItem); -} -//OK. Everything is done. Now genarate the feed. -$TestFeed->genarateFeed(); -?> --- a/search.php +++ /dev/null @@ -1,25 +1,1 @@ - -
    -

    Search

    -
    -
    - - -
    -body); - $db = $server->get_db('disclosr-documents'); - foreach ($results->rows as $result) { - //print_r($result); - //$row = $db->get($result->id); - echo $result->doc->_id." ".$result->doc->url."
    ".PHP_EOL; - } -} -include_footer(); -?>