From: Maxious Date: Sun, 21 Oct 2012 07:22:30 +0000 Subject: beginnings of document scrapers mark 2 X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=ca45e4ee475fe30f7acd3c511dbdd6e19fd9683f --- beginnings of document scrapers mark 2 Former-commit-id: fc862f3bada775436406e0ee8adcd4e7d04ac8f6 --- --- a/.gitmodules +++ b/.gitmodules @@ -25,4 +25,7 @@ [submodule "lib/querypath"] path = lib/querypath url = https://github.com/technosophos/querypath.git +[submodule "lib/amon-php"] + path = lib/amon-php + url = https://github.com/martinrusev/amon-php.git --- a/about.php +++ b/about.php @@ -10,8 +10,10 @@ Disclo.gs is a project to monitor Australian Federal Government agencies compliance with their "proactive disclosure requirements" to make a transparency league table as suggested by gov2 taskforce http://gov2.net.au/blog/2009/09/19/a-league-ladder-of-psi-openness/.

Attributions

-National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm. - +National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm
+data.gov.au http://data.gov.au/dataset/directory-gov-au-full-data-export/
+directory.gov.au
+australia.gov.au http://australia.gov.au/about/copyright

Open everything

All documents released CC-BY 3 AU Open source git @ --- a/alaveteli/exportAgencies.csv.php +++ b/alaveteli/exportAgencies.csv.php @@ -1,20 +1,6 @@ id; $row["name"] = trim($agency->value->name); - if (isset($agency->value->foiEmail)) { - $row["request_email"] = $agency->value->foiEmail; - } else { - if ($agency->value->orgType == "FMA-DepartmentOfState") { - $row["request_email"] = "foi@" . GetDomain($agency->value->website); - } else { - $row["request_email"] = $foiEmail[$agency->value->parentOrg]; + $row["request_email"] = (isset($agency->value->foiEmail) ? $agency->value->foiEmail : ""); + $row["short_name"] = (isset($agency->value->shortName) ? $agency->value->shortName : ""); + $row["notes"] = (isset($agency->value->description) ? $agency->value->description : ""); + + $otherBodies = Array(); + if (isset($agency->value->foiBodies)) { + $otherBodies = array_merge($otherBodies, $agency->value->foiBodies); + } + if (isset($agency->value->positions)) { + $positions = Array(); + foreach ($agency->value->positions as $position) { + $positions[] = "Office of the ".$position; } + $otherBodies = array_merge($otherBodies, $positions); } - if (isset($agency->value->shortName)) { - $row["short_name"] = $agency->value->shortName; - } else { - $row["short_name"] = shortName($agency->value->name); + sort($otherBodies); + if (count($otherBodies) > 0) { + $row["notes"] .= "
This department also responds to requests for information held by " . implode(", ", $otherBodies); } - $row["notes"] = ""; + $row["publication_scheme"] = (isset($agency->value->infoPublicationSchemeURL) ? $agency->value->infoPublicationSchemeURL : ""); $row["home_page"] = (isset($agency->value->website) ? $agency->value->website : ""); if ($agency->value->orgType == "FMA-DepartmentOfState") { - $row["tag_string"] = $tag[$agency->value->_id] . " " . $agency->value->orgType; + $row["tag_string"] = $tag[$agency->value->_id]; } else { - $row["tag_string"] = $tag[$agency->value->parentOrg] . " " . $agency->value->orgType; + $row["tag_string"] = $tag[$agency->value->parentOrg]; } - + $row["tag_string"] .= " " . $agency->value->orgType; + $row["tag_string"] .= " federal"; fputcsv($fp, array_values($row)); - - if (isset($agency->value->foiBodies)) { - foreach ($agency->value->foiBodies as $foiBody) { - $row['name'] = iconv("UTF-8", "ASCII//TRANSLIT",$foiBody); - $row["short_name"] = shortName($foiBody); - fputcsv($fp, array_values($row)); - } - } - if (isset($agency->value->positions)) { - foreach ($agency->value->positions as $position) { - $row['name'] = iconv("UTF-8", "ASCII//TRANSLIT",$position); - $row["short_name"] = shortName($position); - fputcsv($fp, array_values($row)); - } - } } } } catch (SetteeRestClientException $e) { --- /dev/null +++ b/documents/google676a414ad086cefb.html @@ -1,1 +1,2 @@ +google-site-verification: google676a414ad086cefb.html --- /dev/null +++ b/documents/index.php @@ -1,1 +1,15 @@ +australian disclosure logs +are you looking for more information about: +contracts +gov orgs +lobbyists + +1/1/11 title (Dept dfggdfgdf) +description: +source link: +documents: + #1 title link + + +rss feed here --- /dev/null +++ b/documents/rss.xml.php @@ -1,1 +1,30 @@ +setTitle('Last Modified - All'); +$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php'); +$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); +//Retriving informations from database +$rows = $db->get_view("app", "byLastModified")->rows; +//print_r($rows); +foreach ($rows as $row) { + //Create an empty FeedItem + $newItem = $TestFeed->createNewItem(); + //Add elements to the feed item + $newItem->setTitle($row['name']); + $newItem->setLink($row['id']); + $newItem->setDate(date("c", $row['metadata']['lastModified'])); + $newItem->setDescription($row['name']); + //Now add the feed item + $TestFeed->addItem($newItem); +} +//OK. Everything is done. Now genarate the feed. +$TestFeed->genarateFeed(); +?> --- /dev/null +++ b/documents/runScrapers.php --- /dev/null +++ b/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py @@ -1,1 +1,1 @@ - +http://www.apvma.gov.au/rss/disclosure.rss --- /dev/null +++ b/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py @@ -1,1 +1,1 @@ - +http://www.daff.gov.au/about/foi/ips/disclosure_log --- /dev/null +++ b/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py @@ -1,1 +1,2 @@ +http://foi.deewr.gov.au/disclosure-log/rss --- /dev/null +++ b/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py @@ -1,1 +1,1 @@ - +www.finance.gov.au/foi/disclosure-log/foi-rss.xml --- /dev/null +++ b/documents/scrapers/rtk.py @@ -1,1 +1,1 @@ - +http://www.righttoknow.org.au/feed/search/%20(latest_status:successful%20OR%20latest_status:partially_successful) --- /dev/null +++ b/documents/search.php @@ -1,1 +1,25 @@ + +
+

Search

+
+
+ + +
+body); + $db = $server->get_db('disclosr-documents'); + foreach ($results->rows as $result) { + //print_r($result); + //$row = $db->get($result->id); + echo $result->doc->_id." ".$result->doc->url."
".PHP_EOL; + } +} +include_footer(); +?> --- a/getAgency.php +++ b/getAgency.php @@ -9,7 +9,11 @@ if (strpos($key, "_") === 0 || $key== "metadata") return; echo ""; - echo "" . $schemas['agency']["properties"][$key]['x-title'] . "
" . $schemas['agency']["properties"][$key]['description'] . ""; + echo ""; + if (isset($schemas['agency']["properties"][$key])) { + echo $schemas['agency']["properties"][$key]['x-title'] . "
" . $schemas['agency']["properties"][$key]['description'].""; + } + echo ""; if (is_array($value)) { echo "
    "; foreach ($value as $subkey => $subvalue) { --- a/include/common.inc.php +++ b/include/common.inc.php @@ -14,6 +14,11 @@ require_once $basePath.'lib/Requests/library/Requests.php'; Requests::register_autoloader(); +require $basePath."lib/amon-php/amon.php"; +Amon::config(array('address'=> 'http://127.0.0.1:2464', + 'protocol' => 'http', + 'secret_key' => "I2LJ6dOMmlnXgVAkTPFXd5M3ejkga8Gd2FbBt6iqZdw")); +Amon::setup_exception_handler(); # Convert a stdClass to an Array. http://www.php.net/manual/en/language.types.object.php#102735 function object_to_array(stdClass $Class) { --- a/include/couchdb.inc.php +++ b/include/couchdb.inc.php @@ -168,6 +168,7 @@ $server = new SetteeServer($serverAddr); function setteErrorHandler($e) { + Amon::log($e->getMessage() . " " . print_r($_SERVER,true), array('error')); echo $e->getMessage() . "
    " . PHP_EOL; } --- /dev/null +++ b/js/bubbletree --- /dev/null +++ b/js/flotr2 --- /dev/null +++ b/js/sigma --- /dev/null +++ b/lib/amon-php --- a/rss.xml.php +++ /dev/null @@ -1,30 +1,1 @@ -setTitle('Last Modified - All'); -$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php'); -$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); -//Retriving informations from database -$rows = $db->get_view("app", "byLastModified")->rows; -//print_r($rows); -foreach ($rows as $row) { - //Create an empty FeedItem - $newItem = $TestFeed->createNewItem(); - //Add elements to the feed item - $newItem->setTitle($row['name']); - $newItem->setLink($row['id']); - $newItem->setDate(date("c", $row['metadata']['lastModified'])); - $newItem->setDescription($row['name']); - //Now add the feed item - $TestFeed->addItem($newItem); -} -//OK. Everything is done. Now genarate the feed. -$TestFeed->genarateFeed(); -?> --- a/search.php +++ /dev/null @@ -1,25 +1,1 @@ - -
    -

    Search

    -
    -
    - - -
    -body); - $db = $server->get_db('disclosr-documents'); - foreach ($results->rows as $result) { - //print_r($result); - //$row = $db->get($result->id); - echo $result->doc->_id." ".$result->doc->url."
    ".PHP_EOL; - } -} -include_footer(); -?>