[submodule "couchdb/couchdb-lucene"] | [submodule "couchdb/couchdb-lucene"] |
path = couchdb/couchdb-lucene | path = couchdb/couchdb-lucene |
url = https://github.com/rnewson/couchdb-lucene.git | url = https://github.com/rnewson/couchdb-lucene.git |
[submodule "couchdb/settee"] | [submodule "couchdb/settee"] |
path = couchdb/settee | path = couchdb/settee |
url = https://github.com/inadarei/settee.git | url = https://github.com/inadarei/settee.git |
[submodule "lib/php-diff"] | [submodule "lib/php-diff"] |
path = lib/php-diff | path = lib/php-diff |
url = https://github.com/chrisboulton/php-diff.git | url = https://github.com/chrisboulton/php-diff.git |
[submodule "lib/Requests"] | [submodule "lib/Requests"] |
path = lib/Requests | path = lib/Requests |
url = https://github.com/rmccue/Requests.git | url = https://github.com/rmccue/Requests.git |
[submodule "js/flotr2"] | [submodule "js/flotr2"] |
path = js/flotr2 | path = js/flotr2 |
url = https://github.com/HumbleSoftware/Flotr2.git | url = https://github.com/HumbleSoftware/Flotr2.git |
[submodule "lib/phpquery"] | [submodule "lib/phpquery"] |
path = lib/phpquery | path = lib/phpquery |
url = https://github.com/TobiaszCudnik/phpquery.git | url = https://github.com/TobiaszCudnik/phpquery.git |
[submodule "js/sigma"] | [submodule "js/sigma"] |
path = js/sigma | path = js/sigma |
url = https://github.com/jacomyal/sigma.js.git | url = https://github.com/jacomyal/sigma.js.git |
[submodule "js/bubbletree"] | [submodule "js/bubbletree"] |
path = js/bubbletree | path = js/bubbletree |
url = https://github.com/okfn/bubbletree.git | url = https://github.com/okfn/bubbletree.git |
[submodule "lib/querypath"] | [submodule "lib/querypath"] |
path = lib/querypath | path = lib/querypath |
url = https://github.com/technosophos/querypath.git | url = https://github.com/technosophos/querypath.git |
[submodule "sigma.js"] | [submodule "lib/amon-php"] |
path = sigma.js | path = lib/amon-php |
url = https://github.com/jacomyal/sigma.js.git | url = https://github.com/martinrusev/amon-php.git |
<?php | |
require_once '../include/common.inc.php'; | |
$db = $server->get_db('disclosr-agencies'); | |
$rows = $db->get_view("app", "byName")->rows; | |
$nametoid = Array(); | |
$accounts = Array(); | |
foreach ($rows as $row) { | |
$nametoid[trim($row->key)] = $row->value; | |
} | |
function extractCSVAccounts($url, $nameField, $accountField, $filter) { | |
global $accounts, $nametoid; | |
$request = Requests::get($url); | |
echo $url; | |
$Data = str_getcsv($request->body, "\n"); //parse the rows | |
$headers = Array(); | |
foreach ($Data as $num => $line) { | |
$Row = str_getcsv($line, ","); | |
if ($num == 0) { | |
$headers = $Row; | |
print_r($headers); | |
} else { | |
if (isset($Row[array_search($nameField, $headers)])) { | |
$agencyName = $Row[array_search($nameField, $headers)]; | |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | |
echo "$agencyName missing" . PHP_EOL; | |
} else { | |
echo $Row[array_search($nameField, $headers)] . PHP_EOL; | |
$accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; | |
} | |
} else { | |
echo "error finding any agency" . $line . PHP_EOL; | |
} | |
} | |
} | |
} | |
extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); | |
print_r($accounts); | |
/* foreach ($accounts as $id => $accountTypes) { | |
echo $id . "<br>" . PHP_EOL; | |
$doc = object_to_array($db->get($id)); | |
// print_r($doc); | |
foreach ($accountTypes as $accountType => $accounts) { | |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | |
$doc["has" . $accountType] = Array(); | |
} | |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | |
} | |
$db->save($doc); | |
}*/ | |
?> | |
<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
$headers = Array("#id", "name", "request_email", "short_name", "notes", "publication_scheme", "home_page", "tag_string"); | $headers = Array("#id", "name", "request_email", "short_name", "notes", "publication_scheme", "home_page", "tag_string"); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$tag = Array(); | $tag = Array(); |
try { | try { |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | $rows = $db->get_view("app", "byDeptStateName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$tag[$row->id] = phrase_to_tag(dept_to_portfolio($row->key)); | $tag[$row->id] = phrase_to_tag(dept_to_portfolio($row->key)); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
die(); | die(); |
} | } |
$foiEmail = Array(); | $foiEmail = Array(); |
try { | try { |
$rows = $db->get_view("app", "foiEmails", null, true)->rows; | $rows = $db->get_view("app", "foiEmails", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$foiEmail[$row->key] = $row->value; | $foiEmail[$row->key] = $row->value; |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
die(); | die(); |
} | } |
$fp = fopen('php://output', 'w'); | $fp = fopen('php://output', 'w'); |
if ($fp && $db) { | if ($fp && $db) { |
header('Content-Type: text/csv; charset=utf-8'); | header('Content-Type: text/csv; charset=utf-8'); |
header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"'); | header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"'); |
header('Pragma: no-cache'); | header('Pragma: no-cache'); |
header('Expires: 0'); | header('Expires: 0'); |
fputcsv($fp, $headers); | fputcsv($fp, $headers); |
try { | try { |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($agencies as $agency) { | foreach ($agencies as $agency) { |
// print_r($agency); | // print_r($agency); |
if (isset($agency->value->foiEmail) && $agency->value->foiEmail != "null" && !isset($agency->value->status)) { | if (isset($agency->value->foiEmail) && $agency->value->foiEmail != "null" && !isset($agency->value->status)) { |
$row = Array(); | $row = Array(); |
$row["#id"] = $agency->id; | $row["#id"] = $agency->id; |
$row["name"] = trim($agency->value->name); | $row["name"] = trim($agency->value->name); |
$row["request_email"] = (isset($agency->value->foiEmail) ? $agency->value->foiEmail : ""); | $row["request_email"] = (isset($agency->value->foiEmail) ? $agency->value->foiEmail : ""); |
$row["short_name"] = (isset($agency->value->shortName) ? $agency->value->shortName : ""); | $row["short_name"] = (isset($agency->value->shortName) ? $agency->value->shortName : ""); |
$row["notes"] = (isset($agency->value->description) ? $agency->value->description : ""); | $row["notes"] = (isset($agency->value->description) ? $agency->value->description : ""); |
$otherBodies = Array(); | $otherBodies = Array(); |
if (isset($agency->value->foiBodies)) { | if (isset($agency->value->foiBodies)) { |
$otherBodies = array_merge($otherBodies, $agency->value->foiBodies); | $otherBodies = array_merge($otherBodies, $agency->value->foiBodies); |
} | } |
if (isset($agency->value->positions)) { | if (isset($agency->value->positions)) { |
$otherBodies = array_merge($otherBodies, $agency->value->positions); | $positions = Array(); |
foreach ($agency->value->positions as $position) { | |
$positions[] = "Office of the ".$position; | |
} | |
$otherBodies = array_merge($otherBodies, $positions); | |
} | } |
sort($otherBodies); | |
if (count($otherBodies) > 0) { | if (count($otherBodies) > 0) { |
$row["notes"] .= "<br/> This department also responds to requests for information held by ".implode(",",$otherBodies); | $row["notes"] .= "<br/> This department also responds to requests for information held by " . implode(", ", $otherBodies); |
} | } |
$row["publication_scheme"] = (isset($agency->value->infoPublicationSchemeURL) ? $agency->value->infoPublicationSchemeURL : ""); | $row["publication_scheme"] = (isset($agency->value->infoPublicationSchemeURL) ? $agency->value->infoPublicationSchemeURL : ""); |
$row["home_page"] = (isset($agency->value->website) ? $agency->value->website : ""); | $row["home_page"] = (isset($agency->value->website) ? $agency->value->website : ""); |
if ($agency->value->orgType == "FMA-DepartmentOfState") { | if ($agency->value->orgType == "FMA-DepartmentOfState") { |
$row["tag_string"] = $tag[$agency->value->_id]; | $row["tag_string"] = $tag[$agency->value->_id]; |
} else { | } else { |
$row["tag_string"] = $tag[$agency->value->parentOrg]; | $row["tag_string"] = $tag[$agency->value->parentOrg]; |
} | } |
$row["tag_string"] .= " " . $agency->value->orgType; | $row["tag_string"] .= " " . $agency->value->orgType; |
$row["tag_string"] .= " federal"; | $row["tag_string"] .= " federal"; |
fputcsv($fp, array_values($row)); | fputcsv($fp, array_values($row)); |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
die; | die; |
} | } |
?> | ?> |
<?php | |
include_once('../include/common.inc.php'); | |
include_header('Webserver and Accessiblity'); | |
echo "<table> | |
<tr><th>name</th><th>webserver</th><th>accessiblity errors</th></tr>"; | |
$agenciesdb = $server->get_db('disclosr-agencies'); | |
$docsdb = $server->get_db('disclosr-documents'); | |
try { | |
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; | |
if ($rows) { | |
foreach ($rows as $row) { | |
echo "<tr><td>" . $row->value->name . "</td>\n"; | |
echo "<td>"; | |
if (isset($row->value->FOIDocumentsURL)) { | |
echo '<a href="viewDocument.php?hash='.md5($row->value->FOIDocumentsURL).'">' | |
.$row->value->FOIDocumentsURL.'</a>'; | |
} | |
echo "</td>\n"; | |
echo "</tr>\n"; | |
} | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
include_footer(); | |
?> |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import scrape | |
from bs4 import BeautifulSoup | |
import abc | |
class GenericOAICDisclogScraper(object): | |
__metaclass__ = abc.ABCMeta | |
@abc.abstractmethod | |
def getAgencyID(self): | |
""" disclosr agency id """ | |
return | |
@abc.abstractmethod | |
def getURL(self): | |
""" disclog URL""" | |
return | |
@abc.abstractmethod | |
def getColumns(self,columns): | |
""" rearranges columns if required """ | |
return | |
def doScrape(self): | |
foidocsdb = scrape.couch['disclosr-foidocuments'] | |
(url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID()) | |
if content != None: | |
if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml": | |
# http://www.crummy.com/software/BeautifulSoup/documentation.html | |
soup = BeautifulSoup(content) | |
for row in soup.table.find_all('tr'): | |
columns = row.find_all('td') | |
if len(columns) == 5: | |
(id, date, description, title, notes) = self.getColumns(columns) | |
print id.string | |
hash = scrape.mkhash(url+id.string) | |
links = [] | |
for atag in row.find_all("a"): | |
if atag.has_key('href'): | |
links.append(scrape.fullurl(url,atag['href'])) | |
doc = foidocsdb.get(hash) | |
descriptiontxt = "" | |
for string in description.stripped_strings: | |
descriptiontxt = descriptiontxt + string | |
if doc == None: | |
print "saving" | |
doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), "links": links, 'docID': id.string, | |
"date": date.string, "description": descriptiontxt,"title": title.string,"notes": notes.string} | |
foidocsdb.save(doc) | |
else: | |
print "already saved" | |
elif len(row.find_all('th')) == 5: | |
print "header row" | |
else: | |
print "ERROR number of columns incorrect" | |
print row | |
google-site-verification: google676a414ad086cefb.html | |
australian disclosure logs | |
are you looking for more information about: | |
contracts | |
gov orgs | |
lobbyists | |
1/1/11 title (Dept dfggdfgdf) | |
description: | |
source link: | |
documents: | |
#1 title link | |
rss feed here |
<?php | |
// Agency X updated Y, new files, diff of plain text/link text, | |
// feed for just one agency or all | |
// This is a minimum example of using the Universal Feed Generator Class | |
include("lib/FeedWriter.php"); | |
//Creating an instance of FeedWriter class. | |
$TestFeed = new FeedWriter(RSS2); | |
//Setting the channel elements | |
//Use wrapper functions for common channelelements | |
$TestFeed->setTitle('Last Modified - All'); | |
$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php'); | |
$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); | |
//Retriving informations from database | |
$rows = $db->get_view("app", "byLastModified")->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
//Create an empty FeedItem | |
$newItem = $TestFeed->createNewItem(); | |
//Add elements to the feed item | |
$newItem->setTitle($row['name']); | |
$newItem->setLink($row['id']); | |
$newItem->setDate(date("c", $row['metadata']['lastModified'])); | |
$newItem->setDescription($row['name']); | |
//Now add the feed item | |
$TestFeed->addItem($newItem); | |
} | |
//OK. Everything is done. Now genarate the feed. | |
$TestFeed->genarateFeed(); | |
?> |