Add disclo.gs homepage
Add disclo.gs homepage


Former-commit-id: 6abf389463f189798b499509f3dc589f78c6eacf

  <?php
 
  require_once '../include/common.inc.php';
 
  $db = $server->get_db('disclosr-agencies');
  $rows = $db->get_view("app", "byName")->rows;
  $nametoid = Array();
  $accounts = Array();
  foreach ($rows as $row) {
  $nametoid[trim($row->key)] = $row->value;
  }
 
  function extractCSVAccounts($url, $nameField, $accountField, $filter) {
  global $accounts, $nametoid;
  $request = Requests::get($url);
  echo $url;
  $Data = str_getcsv($request->body, "\n"); //parse the rows
  $headers = Array();
  foreach ($Data as $num => $line) {
  $Row = str_getcsv($line, ",");
  if ($num == 0) {
  $headers = $Row;
  print_r($headers);
  } else {
  if (isset($Row[array_search($nameField, $headers)])) {
  $agencyName = $Row[array_search($nameField, $headers)];
  if (!in_array(trim($agencyName), array_keys($nametoid))) {
  echo "$agencyName missing" . PHP_EOL;
  } else {
  echo $Row[array_search($nameField, $headers)] . PHP_EOL;
  $accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)];
  }
  } else {
  echo "error finding any agency" . $line . PHP_EOL;
  }
  }
  }
  }
 
  extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name");
  print_r($accounts);
  /* foreach ($accounts as $id => $accountTypes) {
  echo $id . "<br>" . PHP_EOL;
  $doc = object_to_array($db->get($id));
  // print_r($doc);
 
  foreach ($accountTypes as $accountType => $accounts) {
  if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
  $doc["has" . $accountType] = Array();
  }
  $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
  }
  $db->save($doc);
  }*/
  ?>
 
  <!doctype html>
  <html lang="en">
  <head>
  <meta charset="utf-8">
  <title>Page Not Found :(</title>
  <style>
  ::-moz-selection { background: #fe57a1; color: #fff; text-shadow: none; }
  ::selection { background: #fe57a1; color: #fff; text-shadow: none; }
  html { padding: 30px 10px; font-size: 20px; line-height: 1.4; color: #737373; background: #f0f0f0; -webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; }
  html, input { font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; }
  body { max-width: 500px; _width: 500px; padding: 30px 20px 50px; border: 1px solid #b3b3b3; border-radius: 4px; margin: 0 auto; box-shadow: 0 1px 10px #a7a7a7, inset 0 1px 0 #fff; background: #fcfcfc; }
  h1 { margin: 0 10px; font-size: 50px; text-align: center; }
  h1 span { color: #bbb; }
  h3 { margin: 1.5em 0 0.5em; }
  p { margin: 1em 0; }
  ul { padding: 0 0 0 40px; margin: 1em 0; }
  .container { max-width: 380px; _width: 380px; margin: 0 auto; }
  /* google search */
  #goog-fixurl ul { list-style: none; padding: 0; margin: 0; }
  #goog-fixurl form { margin: 0; }
  #goog-wm-qt, #goog-wm-sb { border: 1px solid #bbb; font-size: 16px; line-height: normal; vertical-align: top; color: #444; border-radius: 2px; }
  #goog-wm-qt { width: 220px; height: 20px; padding: 5px; margin: 5px 10px 0 0; box-shadow: inset 0 1px 1px #ccc; }
  #goog-wm-sb { display: inline-block; height: 32px; padding: 0 10px; margin: 5px 0 0; white-space: nowrap; cursor: pointer; background-color: #f5f5f5; background-image: -webkit-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -moz-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -ms-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -o-linear-gradient(rgba(255,255,255,0), #f1f1f1); -webkit-appearance: none; -moz-appearance: none; appearance: none; *overflow: visible; *display: inline; *zoom: 1; }
  #goog-wm-sb:hover, #goog-wm-sb:focus { border-color: #aaa; box-shadow: 0 1px 1px rgba(0, 0, 0, 0.1); background-color: #f8f8f8; }
  #goog-wm-qt:focus, #goog-wm-sb:focus { border-color: #105cb6; outline: 0; color: #222; }
  input::-moz-focus-inner { padding: 0; border: 0; }
  </style>
  </head>
  <body>
  <div class="container">
  <h1>Not found <span>:(</span></h1>
  <p>Sorry, but the page you were trying to view does not exist.</p>
  <p>It looks like this was the result of either:</p>
  <ul>
  <li>a mistyped address</li>
  <li>an out-of-date link</li>
  </ul>
  <script>
  var GOOG_FIXURL_LANG = (navigator.language || '').slice(0,2),GOOG_FIXURL_SITE = location.host;
  </script>
  <script src="http://linkhelp.clients.google.com/tbproxy/lh/wm/fixurl.js"></script>
  </div>
 
 
 Binary files /dev/null and b/documents/apple-touch-icon-114x114-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon-57x57-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon-72x72-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon.png differ
  <?php
 
  include_once('../include/common.inc.php');
  include_header('Webserver and Accessiblity');
 
  echo "<table>
  <tr><th>name</th><th>disclog</th><th>scraper?</th></tr>";
  $agenciesdb = $server->get_db('disclosr-agencies');
  $docsdb = $server->get_db('disclosr-documents');
  try {
  $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
 
 
  if ($rows) {
  foreach ($rows as $row) {
 
  echo "<tr><td>" . $row->value->name . " (".$row->id.")</td>\n";
  echo "<td>";
  if (isset($row->value->FOIDocumentsURL)) {
  echo '<a href="viewDocument.php?hash='.md5($row->value->FOIDocumentsURL).'">'
  .$row->value->FOIDocumentsURL.'</a>';
  } else {
  echo "<font color='red'>✘</font>";
  }
  echo "</td>\n<td>";
  if (isset($row->value->FOIDocumentsURL)) {
  if (file_exists("./scrapers/".$row->id.'.py')) {
  echo "<font color='green'>✔</font>";
  } else if (file_exists("./scrapers/".$row->id.'.txt')){
  echo "pass";
  } else {
  echo "<font color='red'>✘</font>";
  }
  }
  echo "</td></tr>\n";
  }
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  include_footer();
  ?>
  <?xml version="1.0"?>
  <!DOCTYPE cross-domain-policy SYSTEM "http://www.adobe.com/xml/dtds/cross-domain-policy.dtd">
  <cross-domain-policy>
 
 
  <!-- Read this: www.adobe.com/devnet/articles/crossdomain_policy_file_spec.html -->
 
  <!-- Most restrictive policy: -->
  <site-control permitted-cross-domain-policies="none"/>
 
 
 
  <!-- Least restrictive policy: -->
  <!--
  <site-control permitted-cross-domain-policies="all"/>
  <allow-access-from domain="*" to-ports="*" secure="false"/>
  <allow-http-request-headers-from domain="*" headers="*" secure="false"/>
  -->
  <!--
  If you host a crossdomain.xml file with allow-access-from domain="*"
  and don’t understand all of the points described here, you probably
  have a nasty security vulnerability. ~ simon willison
  -->
 
  </cross-domain-policy>
 
 Binary files /dev/null and b/documents/favicon.ico differ
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import scrape
 
  from bs4 import BeautifulSoup
  import abc
 
  class GenericOAICDisclogScraper(object):
  __metaclass__ = abc.ABCMeta
  @abc.abstractmethod
  def getAgencyID(self):
  """ disclosr agency id """
  return
 
  @abc.abstractmethod
  def getURL(self):
  """ disclog URL"""
  return
 
  @abc.abstractmethod
  def getColumns(self,columns):
  """ rearranges columns if required """
  return
 
  def doScrape(self):
  foidocsdb = scrape.couch['disclosr-foidocuments']
  (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
  if content != None:
  if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(content)
  for row in soup.table.find_all('tr'):
  columns = row.find_all('td')
  if len(columns) == 5:
  (id, date, description, title, notes) = self.getColumns(columns)
  print id.string
  hash = scrape.mkhash(url+id.string)
  links = []
  for atag in row.find_all("a"):
  if atag.has_key('href'):
  links.append(scrape.fullurl(url,atag['href']))
  doc = foidocsdb.get(hash)
  descriptiontxt = ""
  for string in description.stripped_strings:
  descriptiontxt = descriptiontxt + string
 
  if doc == None:
  print "saving"
  doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), "links": links, 'docID': id.string,
  "date": date.string, "description": descriptiontxt,"title": title.string,"notes": notes.string}
  foidocsdb.save(doc)
  else:
  print "already saved"
 
  elif len(row.find_all('th')) == 5:
  print "header row"
 
  else:
  print "ERROR number of columns incorrect"
  print row
 
  google-site-verification: google676a414ad086cefb.html
 
  google-site-verification: googlebcce906c6b666bb8.html
 
  /* the humans responsible & colophon */
  /* humanstxt.org */
 
 
  /* TEAM */
  <your title>: <your name>
  Site:
  Twitter:
  Location:
 
  /* THANKS */
  Names (& URL):
 
  /* SITE */
  Standards: HTML5, CSS3
  Components: Modernizr, jQuery
  Software:
 
 
 
  -o/-
  +oo//-
  :ooo+//:
  -ooooo///-
  /oooooo//:
  :ooooooo+//-
  -+oooooooo///-
  -://////////////+oooooooooo++////////////::
  :+ooooooooooooooooooooooooooooooooooooo+:::-
  -/+ooooooooooooooooooooooooooooooo+/::////:-
  -:+oooooooooooooooooooooooooooo/::///////:-
  --/+ooooooooooooooooooooo+::://////:-
  -:+ooooooooooooooooo+:://////:--
  /ooooooooooooooooo+//////:-
  -ooooooooooooooooooo////-
  /ooooooooo+oooooooooo//:
  :ooooooo+/::/+oooooooo+//-
  -oooooo/::///////+oooooo///-
  /ooo+::://////:---:/+oooo//:
  -o+/::///////:- -:/+o+//-
  :-:///////:- -:/://
  -////:- --//:
  -- -:
 
  <!doctype html>
  <!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
  <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
  <!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
  <!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
  <!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
  <!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
  <head>
  <meta charset="utf-8">
 
  <!-- Use the .htaccess and remove these lines to avoid edge case issues.
  More info: h5bp.com/i/378 -->
  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
 
  <title>disclosure logs</title>
  <meta name="description" content="">
 
  <!-- Mobile viewport optimized: h5bp.com/viewport -->
  <meta name="viewport" content="width=device-width">
 
  <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
  <meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
  <link rel="stylesheet" href="css/style.css">
 
  <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
 
  <!-- All JavaScript at the bottom, except this Modernizr build.
  Modernizr enables HTML5 elements & feature detects for optimal performance.