<!doctype html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<title>Page Not Found :(</title> | |
<style> | |
::-moz-selection { background: #fe57a1; color: #fff; text-shadow: none; } | |
::selection { background: #fe57a1; color: #fff; text-shadow: none; } | |
html { padding: 30px 10px; font-size: 20px; line-height: 1.4; color: #737373; background: #f0f0f0; -webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; } | |
html, input { font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; } | |
body { max-width: 500px; _width: 500px; padding: 30px 20px 50px; border: 1px solid #b3b3b3; border-radius: 4px; margin: 0 auto; box-shadow: 0 1px 10px #a7a7a7, inset 0 1px 0 #fff; background: #fcfcfc; } | |
h1 { margin: 0 10px; font-size: 50px; text-align: center; } | |
h1 span { color: #bbb; } | |
h3 { margin: 1.5em 0 0.5em; } | |
p { margin: 1em 0; } | |
ul { padding: 0 0 0 40px; margin: 1em 0; } | |
.container { max-width: 380px; _width: 380px; margin: 0 auto; } | |
/* google search */ | |
#goog-fixurl ul { list-style: none; padding: 0; margin: 0; } | |
#goog-fixurl form { margin: 0; } | |
#goog-wm-qt, #goog-wm-sb { border: 1px solid #bbb; font-size: 16px; line-height: normal; vertical-align: top; color: #444; border-radius: 2px; } | |
#goog-wm-qt { width: 220px; height: 20px; padding: 5px; margin: 5px 10px 0 0; box-shadow: inset 0 1px 1px #ccc; } | |
#goog-wm-sb { display: inline-block; height: 32px; padding: 0 10px; margin: 5px 0 0; white-space: nowrap; cursor: pointer; background-color: #f5f5f5; background-image: -webkit-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -moz-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -ms-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -o-linear-gradient(rgba(255,255,255,0), #f1f1f1); -webkit-appearance: none; -moz-appearance: none; appearance: none; *overflow: visible; *display: inline; *zoom: 1; } | |
#goog-wm-sb:hover, #goog-wm-sb:focus { border-color: #aaa; box-shadow: 0 1px 1px rgba(0, 0, 0, 0.1); background-color: #f8f8f8; } | |
#goog-wm-qt:focus, #goog-wm-sb:focus { border-color: #105cb6; outline: 0; color: #222; } | |
input::-moz-focus-inner { padding: 0; border: 0; } | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>Not found <span>:(</span></h1> | |
<p>Sorry, but the page you were trying to view does not exist.</p> | |
<p>It looks like this was the result of either:</p> | |
<ul> | |
<li>a mistyped address</li> | |
<li>an out-of-date link</li> | |
</ul> | |
<script> | |
var GOOG_FIXURL_LANG = (navigator.language || '').slice(0,2),GOOG_FIXURL_SITE = location.host; | |
</script> | |
<script src="http://linkhelp.clients.google.com/tbproxy/lh/wm/fixurl.js"></script> | |
</div> | |
Binary files /dev/null and b/documents/apple-touch-icon-114x114-precomposed.png differ
Binary files /dev/null and b/documents/apple-touch-icon-57x57-precomposed.png differ
Binary files /dev/null and b/documents/apple-touch-icon-72x72-precomposed.png differ
Binary files /dev/null and b/documents/apple-touch-icon-precomposed.png differ
Binary files /dev/null and b/documents/apple-touch-icon.png differ
<?php | |
include_once('../include/common.inc.php'); | |
include_header('Webserver and Accessiblity'); | |
echo "<table> | |
<tr><th>name</th><th>disclog</th><th>scraper?</th></tr>"; | |
$agenciesdb = $server->get_db('disclosr-agencies'); | |
$docsdb = $server->get_db('disclosr-documents'); | |
try { | |
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; | |
if ($rows) { | |
foreach ($rows as $row) { | |
echo "<tr><td>" . $row->value->name . " (".$row->id.")</td>\n"; | |
echo "<td>"; | |
if (isset($row->value->FOIDocumentsURL)) { | |
echo '<a href="viewDocument.php?hash='.md5($row->value->FOIDocumentsURL).'">' | |
.$row->value->FOIDocumentsURL.'</a>'; | |
} else { | |
echo "<font color='red'>✘</font>"; | |
} | |
echo "</td>\n<td>"; | |
if (isset($row->value->FOIDocumentsURL)) { | |
if (file_exists("./scrapers/".$row->id.'.py')) { | |
echo "<font color='green'>✔</font>"; | |
} else if (file_exists("./scrapers/".$row->id.'.txt')){ | |
echo "pass"; | |
} else { | |
echo "<font color='red'>✘</font>"; | |
} | |
} | |
echo "</td></tr>\n"; | |
} | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
include_footer(); | |
?> |
<?xml version="1.0"?> | |
<!DOCTYPE cross-domain-policy SYSTEM "http://www.adobe.com/xml/dtds/cross-domain-policy.dtd"> | |
<cross-domain-policy> | |
<!-- Read this: www.adobe.com/devnet/articles/crossdomain_policy_file_spec.html --> | |
<!-- Most restrictive policy: --> | |
<site-control permitted-cross-domain-policies="none"/> | |
<!-- Least restrictive policy: --> | |
<!-- | |
<site-control permitted-cross-domain-policies="all"/> | |
<allow-access-from domain="*" to-ports="*" secure="false"/> | |
<allow-http-request-headers-from domain="*" headers="*" secure="false"/> | |
--> | |
<!-- | |
If you host a crossdomain.xml file with allow-access-from domain="*" | |
and don’t understand all of the points described here, you probably | |
have a nasty security vulnerability. ~ simon willison | |
--> | |
</cross-domain-policy> | |
Binary files /dev/null and b/documents/favicon.ico differ
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import scrape | |
from bs4 import BeautifulSoup | |
import abc | |
class GenericOAICDisclogScraper(object): | |
__metaclass__ = abc.ABCMeta | |
@abc.abstractmethod | |
def getAgencyID(self): | |
""" disclosr agency id """ | |
return | |
@abc.abstractmethod | |
def getURL(self): | |
""" disclog URL""" | |
return | |
@abc.abstractmethod | |
def getColumns(self,columns): | |
""" rearranges columns if required """ | |
return | |
def doScrape(self): | |
foidocsdb = scrape.couch['disclosr-foidocuments'] | |
(url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID()) | |
if content != None: | |
if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml": | |
# http://www.crummy.com/software/BeautifulSoup/documentation.html | |
soup = BeautifulSoup(content) | |
for row in soup.table.find_all('tr'): | |
columns = row.find_all('td') | |
if len(columns) == 5: | |
(id, date, description, title, notes) = self.getColumns(columns) | |
print id.string | |
hash = scrape.mkhash(url+id.string) | |
links = [] | |
for atag in row.find_all("a"): | |
if atag.has_key('href'): | |
links.append(scrape.fullurl(url,atag['href'])) | |
doc = foidocsdb.get(hash) | |
descriptiontxt = "" | |
for string in description.stripped_strings: | |
descriptiontxt = descriptiontxt + string | |
if doc == None: | |
print "saving" | |
doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), "links": links, 'docID': id.string, | |
"date": date.string, "description": descriptiontxt,"title": title.string,"notes": notes.string} | |
foidocsdb.save(doc) | |
else: | |
print "already saved" | |
elif len(row.find_all('th')) == 5: | |
print "header row" | |
else: | |
print "ERROR number of columns incorrect" | |
print row | |
google-site-verification: google676a414ad086cefb.html | |
google-site-verification: googlebcce906c6b666bb8.html | |
/* the humans responsible & colophon */ | |
/* humanstxt.org */ | |
/* TEAM */ | |
<your title>: <your name> | |
Site: | |
Twitter: | |
Location: | |
/* THANKS */ | |
Names (& URL): | |
/* SITE */ | |
Standards: HTML5, CSS3 | |
Components: Modernizr, jQuery | |
Software: | |
-o/- | |
+oo//- | |
:ooo+//: | |
-ooooo///- | |
/oooooo//: | |
:ooooooo+//- | |
-+oooooooo///- | |
-://////////////+oooooooooo++////////////:: | |
:+ooooooooooooooooooooooooooooooooooooo+:::- | |
-/+ooooooooooooooooooooooooooooooo+/::////:- | |
-:+oooooooooooooooooooooooooooo/::///////:- | |
--/+ooooooooooooooooooooo+::://////:- | |
-:+ooooooooooooooooo+:://////:-- | |
/ooooooooooooooooo+//////:- | |
-ooooooooooooooooooo////- | |
/ooooooooo+oooooooooo//: | |
:ooooooo+/::/+oooooooo+//- | |
-oooooo/::///////+oooooo///- | |
/ooo+::://////:---:/+oooo//: | |
-o+/::///////:- -:/+o+//- | |
:-:///////:- -:/:// | |
-////:- --//: | |
-- -: | |
<!doctype html> | |
<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ --> | |
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]--> | |
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]--> | |
<!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]--> | |
<!-- Consider adding a manifest.appcache: h5bp.com/d/Offline --> | |
<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]--> | |
<head> | |
<meta charset="utf-8"> | |
<!-- Use the .htaccess and remove these lines to avoid edge case issues. | |
More info: h5bp.com/i/378 --> | |
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> | |
<title>disclosure logs</title> | |
<meta name="description" content=""> | |
<!-- Mobile viewport optimized: h5bp.com/viewport --> | |
<meta name="viewport" content="width=device-width"> | |
<!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons --> | |
<meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" /> | |
<link rel="stylesheet" href="css/style.css"> | |
<!-- More ideas for your <head> here: h5bp.com/d/head-Tips --> | |
<!-- All JavaScript at the bottom, except this Modernizr build. | |
Modernizr enables HTML5 elements & feature detects for optimal performance. | |
Create your own custom Modernizr build: www.modernizr.com/download/ --> | |
<script src="js/libs/modernizr-2.5.3.min.js"></script> | |
</head> | |
<body> | |
<!-- Prompt IE 6 users to install Chrome Frame. Remove this if you support IE 6. | |
chromium.org/developers/how-tos/chrome-frame-getting-started --> | |
<!--[if lt IE 7]><p class=chromeframe>Your browser is <em>ancient!</em> <a href="http://browsehappy.com/">Upgrade to a different browser</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to experience this site.</p><![endif]--> | |
<header> | |
<center><h1>disclosurelo.gs</h1></center> | |
</header> | |
<div role="main"> | |
<dl> | |
<dt>Disclosure Log</dt> | |
<dd></dd> | |
</dl> | |
<a href="http://information.<?php echo $_SERVER['HTTP_HOST'];?>">information.disclo.gs - documents</a><br> | |
<a href="http://orgs.<?php echo $_SERVER['HTTP_HOST'];?>">orgs.disclo.gs - structure</a><br> | |
<a href="http://lobbyists.<?php echo $_SERVER['HTTP_HOST'];?>">lobbyists.disclo.gs - lobbylens</a><br> | |
<a href="http://contracts.<?php echo $_SERVER['HTTP_HOST'];?>">contracts.disclo.gs - contractdashboard</a><br> | |
</body> | |
</div> | |
<footer> | |
</footer> | |
<!-- JavaScript at the bottom for fast page loading --> | |
<!-- Grab Google CDN's jQuery, with a protocol relative URL; fall back to local if offline --> | |
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script> | |
<script>window.jQuery || document.write('<script src="js/libs/jquery-1.7.1.min.js"><\/script>')</script> | |
<!-- scripts concatenated and minified via build script --> | |
<script src="js/plugins.js"></script> | |
<script src="js/script.js"></script> | |
<!-- end scripts --> | |
<!-- Asynchronous Google Analytics snippet. Change UA-XXXXX-X to be your site's ID. | |
mathiasbynens.be/notes/async-analytics-snippet --> | |
<script> | |
var _gaq=[['_setAccount','UA-XXXXX-X'],['_trackPageview']]; | |
(function(d,t){var g=d.createElement(t),s=d.getElementsByTagName(t)[0]; | |
g.src=('https:'==location.protocol?'//ssl':'//www')+'.google-analytics.com/ga.js'; | |
s.parentNode.insertBefore(g,s)}(document,'script')); | |
</script> | |
</body> | |
</html> | |
australian disclosure logs | |
are you looking for more information about: | |
contracts | |
gov orgs | |
lobbyists | |
1/1/11 title (Dept dfgg |