<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); | require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
// metatags | |
try { | |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | |
//print_r($rows); | |
foreach ($agencies as $agency) { | |
if (isset($agency->value->scrapeDepth)) { | |
unset($agency->value->scrapeDepth); | |
} | |
if (isset($agency->value->lastScraped)) { | |
unset($agency->value->lastScraped); | |
} | |
$db->save($agency->value); | |
echo "<hr>"; | |
flush(); | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
// metatags | |
try { | try { |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($agencies as $agency) { | foreach ($agencies as $agency) { |
//echo $agency->value->name . " ".$agency->value->website."<br />\n"; | //echo $agency->value->name . " ".$agency->value->website."<br />\n"; |
// print_r($agency); | // print_r($agency); |
//hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence | //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence |
// "hasYoutube" -> Tube | // "hasYoutube" -> Tube |
// "comment" -> "comments" | // "comment" -> "comments" |
if (!isset($agency->value->metaTags) && isset($agency->value->website)) { | if (!isset($agency->value->metaTags) && isset($agency->value->website)) { |
echo $agency->value->name . " ".$agency->value->website."<br />\n"; | echo $agency->value->name . " " . $agency->value->website . "<br />\n"; |
$agency->value->metaTags = Array(); | $agency->value->metaTags = Array(); |
$request = Requests::get($agency->value->website); | $request = Requests::get($agency->value->website); |
$html = phpQuery::newDocumentHTML($request->body); | $html = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($html); | phpQuery::selectDocument($html); |
foreach (pq('meta')->elements as $meta) { | foreach (pq('meta')->elements as $meta) { |
$tagName = $meta->getAttribute('name');; | $tagName = $meta->getAttribute('name'); |
; | |
$content = $meta->getAttribute('content'); | $content = $meta->getAttribute('content'); |
if ($tagName != "") { | if ($tagName != "") { |
echo "$tagName == $content <br>\n"; | echo "$tagName == $content <br>\n"; |
$agency->value->metaTags[$tagName] = $content; | $agency->value->metaTags[$tagName] = $content; |
} | } |
} | } |
//print_r($agency->value->metaTags); | //print_r($agency->value->metaTags); |
$db->save($agency->value); | $db->save($agency->value); |
echo "<hr>"; | echo "<hr>"; |
flush(); | flush(); |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
?> | ?> |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$accounts = Array(); | $accounts = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
function extractCSVAccounts($url, $nameField, $accountField, $filter) { | function extractCSVAccounts($url, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
echo $url; | echo $url; |
$Data = str_getcsv($request->body, "\n"); //parse the rows | $Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); | $headers = Array(); |
foreach ($Data as $num => $line) { | foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ","); | $Row = str_getcsv($line, ","); |
if ($num == 0) { | if ($num == 0) { |
$headers = $Row; | $headers = $Row; |
print_r($headers); | print_r($headers); |
} else { | } else { |
if (isset($Row[array_search($nameField, $headers)])) { | if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; | $agencyName = $Row[array_search($nameField, $headers)]; |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo "$agencyName missing" . PHP_EOL; | echo "$agencyName missing" . PHP_EOL; |
} else { | } else { |
echo $Row[array_search($nameField, $headers)] . PHP_EOL; | echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; | $accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; |
$accounts[$nametoid[trim($agencyName)]]["rtkDescriptions"][$agencyName] = $Row[array_search("Notes", $headers)]; | |
} | } |
} else { | } else { |
echo "error finding any agency" . $line . PHP_EOL; | echo "error finding any agency" . $line . PHP_EOL; |
} | } |
} | } |
} | } |
} | } |
extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); | extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); |
print_r($accounts); | //print_r($accounts); |
/* foreach ($accounts as $id => $accountTypes) { | foreach ($accounts as $id => $allvalues) { |
echo $id . "<br>" . PHP_EOL; | echo $id . "<br>" . PHP_EOL; |
$doc = object_to_array($db->get($id)); | $doc = object_to_array($db->get($id)); |
// print_r($doc); | // print_r($doc); |
foreach ($accountTypes as $accountType => $accounts) { | foreach ($allvalues as $valueType => $values) { |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | if (!isset($doc[ $valueType]) || !is_array($doc[ $valueType])) { |
$doc["has" . $accountType] = Array(); | $doc[ $valueType] = Array(); |
} | } |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | $doc[ $valueType] = array_unique(array_merge($doc[ $valueType], $values)); |
if ( $valueType == "rtkDescriptions") { | |
foreach ($values as $descriptionAgency => $descriptionValue) { | |
if ($descriptionAgency == $doc->value->name) { | |
$doc->value->description = $descriptionValue; | |
} | |
} | |
} | |
} | } |
$db->save($doc); | $db->save($doc); |
}*/ | } |
?> | ?> |
import sys | import sys |
import os | import os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import genericScrapers | import genericScrapers |
import traceback | |
try: | |
import amonpy | |
amonpy.config.address = 'http://amon_instance:port' | |
amonpy.config.secret_key = 'the secret key from /etc/amon.conf' | |
amon_available = True | |
except ImportError: | |
amon_available = False | |
class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper): | class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper): |
def __init__(self): | def __init__(self): |
super(ScraperImplementation, self).__init__() | super(ScraperImplementation, self).__init__() |
if __name__ == '__main__': | if __name__ == '__main__': |
print 'Subclass:', issubclass(ScraperImplementation, | print 'Subclass:', issubclass(ScraperImplementation, |
genericScrapers.GenericPDFDisclogScraper) | genericScrapers.GenericPDFDisclogScraper) |
print 'Instance:', isinstance(ScraperImplementation(), | print 'Instance:', isinstance(ScraperImplementation(), |
genericScrapers.GenericPDFDisclogScraper) | genericScrapers.GenericPDFDisclogScraper) |
ScraperImplementation().doScrape() | try: |
ScraperImplementation().doScrape() | |
except Exception, err: | |
sys.stderr.write('ERROR: %s\n' % str(err)) | |
print ‘Error Reason: ‘, err.__doc__ | |
print ‘Exception: ‘, err.__class__ | |
print traceback.format_exc() | |
if amon_available: | |
data = { | |
'exception_class': '', | |
'url': '', | |
'backtrace': ['exception line ', 'another exception line'], | |
'enviroment': '', | |
# In 'data' you can add request information, session variables - it's a recursive | |
# dictionary, so you can literally add everything important for your specific case | |
# The dictionary doesn't have a specified structure, the keys below are only example | |
'data': {'request': '', 'session': '', 'more': ''} | |
} | |
amonpy.exception(data) | |
pass | |
<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
function displayValue($key, $value, $mode) { | function displayValue($key, $value, $mode) { |
global $db, $schemas; | global $db, $schemas; |
if ($mode == "view") { | if ($mode == "view") { |
if (strpos($key, "_") === 0 || $key == "metadata") | if (strpos($key, "_") === 0 || $key == "metadata" || $key == "metaTags" || $key == "statistics") |
return; | return; |
echo "<tr>"; | echo "<tr>"; |
echo "<td>"; | echo "<td class='$key'>"; |
if (isset($schemas['agency']["properties"][$key])) { | if (isset($schemas['agency']["properties"][$key])) { |
echo $schemas['agency']["properties"][$key]['x-title'] . "<br><small>" . $schemas['agency']["properties"][$key]['description'] . "</small>"; | echo $schemas['agency']["properties"][$key]['x-title'] . "<br><small>" . $schemas['agency']["properties"][$key]['description'] . "</small>"; |
} | } |
echo "</td><td>"; | echo "</td><td>"; |
if (is_array($value)) { | if (is_array($value)) { |
echo "<ol>"; | echo "<ol>"; |
foreach ($value as $subkey => $subvalue) { | foreach ($value as $subkey => $subvalue) { |
echo "<li "; | echo "<li "; |
if (isset($schemas['agency']["properties"][$key]['x-property'])) { | if (isset($schemas['agency']["properties"][$key]['x-property'])) { |
echo ' property="' . $schemas['agency']["properties"][$key]['x-property'] . '" '; | echo ' property="' . $schemas['agency']["properties"][$key]['x-property'] . '" '; |
} if (isset($schemas['agency']["properties"][$key]['x-itemprop'])) { | } if (isset($schemas['agency']["properties"][$key]['x-itemprop'])) { |
echo ' itemprop="' . $schemas['agency']["properties"][$key]['x-itemprop'] . '" '; | echo ' itemprop="' . $schemas['agency']["properties"][$key]['x-itemprop'] . '" '; |
} | } |
echo " >"; | echo " >"; |
echo "$subvalue</li>"; | echo "$subvalue</li>"; |
} | } |
echo "</ol></td></tr>"; | echo "</ol></td></tr>"; |
} else { | } else { |
if (isset($schemas['agency']["properties"][$key]['x-property'])) { | if (isset($schemas['agency']["properties"][$key]['x-property'])) { |
echo '<span property="' . $schemas['agency']["properties"][$key]['x-property'] . '">'; | echo '<span property="' . $schemas['agency']["properties"][$key]['x-property'] . '">'; |
} else { | } else { |
echo "<span>"; | echo "<span>"; |
} | } |
if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { | if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { |
echo "<a " . ($key == 'website' ? 'itemprop="url"' : '') . " href='$value'>$value</a>"; | echo "<a " . ($key == 'website' ? 'itemprop="url"' : '') . " href='$value'>$value</a>"; |
} else if ($key == 'abn') { | } else if ($key == 'abn') { |
echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>$value</a>"; | echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>$value</a>"; |
} else { | } else { |
echo "$value"; | echo "$value"; |
} | } |
echo "</span>"; | echo "</span>"; |
} | } |
echo "</td></tr>"; | echo "</td></tr>"; |
} | } |
if ($mode == "edit") { | if ($mode == "edit") { |
if (is_array($value)) { | if (is_array($value)) { |
echo '<div class="row"> | echo '<div class="row"> |
<div class="seven columns"> | <div class="seven columns"> |
<fieldset> | <fieldset> |
<h5>' . $key . '</h5>'; | <h5>' . $key . '</h5>'; |
foreach ($value as $subkey => $subvalue) { | foreach ($value as $subkey => $subvalue) { |
echo "<label>$subkey</label><input class='input-text' type='text' id='$key$subkey' name='$key" . '[' . $subkey . "]' value='$subvalue'/></tr>"; | echo "<label>$subkey</label><input class='input-text' type='text' id='$key$subkey' name='$key" . '[' . $subkey . "]' value='$subvalue'/></tr>"; |
} | } |
echo "</fieldset> | echo "</fieldset> |
</div> | </div> |
</div>"; | </div>"; |
} else { | } else { |
if (strpos($key, "_") === 0) { | if (strpos($key, "_") === 0) { |
echo"<input type='hidden' id='$key' name='$key' value='$value'/>"; | echo"<input type='hidden' id='$key' name='$key' value='$value'/>"; |
} else if ($key == "parentOrg") { | } else if ($key == "parentOrg") { |
echo "<label for='$key'>$key</label><select id='$key' name='$key'><option value=''> Select... </option>"; | echo "<label for='$key'>$key</label><select id='$key' name='$key'><option value=''> Select... </option>"; |
$rows = $db->get_view("app", "byDeptStateName")->rows; | $rows = $db->get_view("app", "byDeptStateName")->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo "<option value='{$row->value}'" . (($row->value == $value) ? "SELECTED" : "") . " >" . str_replace("Department of ", "", $row->key) . "</option>"; | echo "<option value='{$row->value}'" . (($row->value == $value) ? "SELECTED" : "") . " >" . str_replace("Department of ", "", $row->key) . "</option>"; |
} | } |
echo" </select>"; | echo" </select>"; |
} else { | } else { |
echo "<label>$key</label><input class='input-text' type='text' id='$key' name='$key' value='$value'/>"; | echo "<label>$key</label><input class='input-text' type='text' id='$key' name='$key' value='$value'/>"; |
if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { | if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { |
echo "<a " . ($key == 'website' ? 'itemprop="url"' : '') . " href='$value'>view</a>"; | echo "<a " . ($key == 'website' ? 'itemprop="url"' : '') . " href='$value'>view</a>"; |
} | } |
if ($key == 'abn') { | if ($key == 'abn') { |
echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>view abn</a>"; | echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>view abn</a>"; |
} | } |
} | } |
} | } |
} | } |
// | // |
} | } |
function addDefaultFields($row) { | function addDefaultFields($row) { |
global $schemas; | global $schemas; |
$defaultFields = array_keys($schemas['agency']['properties']); | $defaultFields = array_keys($schemas['agency']['properties']); |
foreach ($defaultFields as $defaultField) { | foreach ($defaultFields as $defaultField) { |
if (!isset($row[$defaultField])) { | if (!isset($row[$defaultField])) { |
if ($schemas['agency']['properties'][$defaultField]['type'] == "string") { | if ($schemas['agency']['properties'][$defaultField]['type'] == "string") { |
$row[$defaultField] = ""; | $row[$defaultField] = ""; |
} | } |
if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { | if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { |
$row[$defaultField] = Array(""); | $row[$defaultField] = Array(""); |
} | } |
} else if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { | } else if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { |
if (is_array($row[$defaultField])) { | if (is_array($row[$defaultField])) { |
$row[$defaultField][] = ""; | $row[$defaultField][] = ""; |
$row[$defaultField][] = ""; | $row[$defaultField][] = ""; |
$row[$defaultField][] = ""; | $row[$defaultField][] = ""; |
} else { | } else { |
$value = $row[$defaultField]; | $value = $row[$defaultField]; |
$row[$defaultField] = Array($value); | $row[$defaultField] = Array($value); |
$row[$defaultField][] = ""; | $row[$defaultField][] = ""; |
$row[$defaultField][] = ""; | $row[$defaultField][] = ""; |
} | } |
} | } |
} | } |
return $row; | return $row; |
} | } |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
if (isset($_REQUEST['id'])) { | if (isset($_REQUEST['id'])) { |
//get an agency record as json/html, search by name/abn/id | //get an agency record as json/html, search by name/abn/id |
// by name = startkey="Ham"&endkey="Ham\ufff0" | // by name = startkey="Ham"&endkey="Ham\ufff0" |
// edit? | // edit? |
$obj = $db->get($_REQUEST['id']); | $obj = $db->get($_REQUEST['id']); |
include_header(isset($obj->name) ? $obj->name : ""); | include_header(isset($obj->name) ? $obj->name : ""); |
//print_r($row); | //print_r($row); |
if (sizeof($_POST) > 0) { | if (sizeof($_POST) > 0) { |
//print_r($_POST); | //print_r($_POST); |
foreach ($_POST as $postkey => $postvalue) { | foreach ($_POST as $postkey => $postvalue) { |
if ($postvalue == "") { | if ($postvalue == "") { |
unset($_POST[$postkey]); | unset($_POST[$postkey]); |
} | } |
if (is_array($postv |