From: Maxious Date: Wed, 18 Sep 2013 04:41:58 +0000 Subject: graph/ranking fixes X-Git-Url: http://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=205b4cd3107c535d575e2a5efb64971b11a40e5b --- graph/ranking fixes Former-commit-id: c08fb5bb63762d6d850ae16d8fc7ad947b845078 --- --- a/admin/logo.svg +++ b/admin/logo.svg @@ -1,34 +1,34 @@ - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + --- a/admin/refreshDesignDoc.php +++ b/admin/refreshDesignDoc.php @@ -8,21 +8,13 @@ $obj->_id = "_design/" . urlencode("app"); $obj->language = "javascript"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; -$obj->views->byDate->map = "function(doc) { if (doc.title != "Disclosure Log Updated") { emit(doc.date, doc); } };"; +$obj->views->byDate->map = "function(doc) { if (doc.title != \"Disclosure Log Updated\") { emit(doc.date, doc); } };"; $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };"; $obj->views->byDateMonthYear->reduce = "_count"; $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; $obj->views->byAgencyID->reduce = "_count"; -$obj->views->fieldNames->map = ' -function(doc) { -for(var propName in doc) { - emit(propName, doc._id); - } - -}'; -$obj->views->fieldNames->reduce = 'function (key, values, rereduce) { - return values.length; -}'; +$obj->views->fieldNames->map = 'function(doc) { for(var propName in doc) { emit(propName, doc._id); }}'; +$obj->views->fieldNames->reduce = 'function (key, values, rereduce) { return values.length; }'; // allow safe updates (even if slightly slower due to extra: rev-detection check). $foidb->save($obj, true); --- a/documents/robots.txt +++ b/documents/robots.txt @@ -3,4 +3,5 @@ User-agent: * Disallow: /admin/ +Disallow: /viewDocument.php Sitemap: http://disclosurelo.gs/sitemap.xml.php --- a/documents/scrape.py +++ b/documents/scrape.py @@ -197,7 +197,7 @@ links = soup.findAll('a') # soup.findAll('a', id=re.compile("^p-")) linkurls = set([]) for link in links: - if link.has_key("href"): + if link.has_attr("href"): if link['href'].startswith("http"): # lets not do external links for now # linkurls.add(link['href']) --- a/documents/scrapers/0e46f8bd1414b1fdd4f0543d54a97500.py +++ b/documents/scrapers/0e46f8bd1414b1fdd4f0543d54a97500.py @@ -7,7 +7,7 @@ #http://www.doughellmann.com/PyMOTW/abc/ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getTable(self,soup): - return soup.find(id = "maincontentcontainer").table + return soup.find(class_ = "contentcontainer").table def getColumnCount(self): return 5 def getColumns(self,columns): --- a/documents/scrapers/1803322b27286950cab0c543168b5f21.py +++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.py @@ -16,7 +16,7 @@ links = [] description = "" for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(), atag['href']) (url, mime_type, htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: @@ -25,7 +25,7 @@ row = soup.find(id="content_div_148050") description = ''.join(row.stripped_strings) for atag in row.find_all("a"): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(link, atag['href'])) if links != []: --- a/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py +++ b/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py @@ -11,7 +11,7 @@ links = [] description = "" for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(),atag['href']) (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: @@ -26,7 +26,7 @@ for text in row.stripped_strings: description = description + text + "\n" for atag in row.find_all("a"): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(link,atag['href'])) if links != []: --- a/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py +++ b/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py @@ -16,7 +16,7 @@ link = None links = [] for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(),atag['href']) (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: @@ -24,7 +24,7 @@ # http://www.crummy.com/software/BeautifulSoup/documentation.html soup = BeautifulSoup(htcontent) for atag in soup.find(class_ = "article-content").find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(link,atag['href'])) if links != []: --- a/documents/scrapers/69d59284ef0ccd2677394d82d3292abc.py +++ b/documents/scrapers/69d59284ef0ccd2677394d82d3292abc.py @@ -6,8 +6,6 @@ #http://www.doughellmann.com/PyMOTW/abc/ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): - def getTable(self,soup): - return soup.find(id = "centercontent").table def getColumnCount(self): return 5 def getColumns(self,columns): --- a/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py +++ b/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py @@ -11,7 +11,7 @@ links = [] description = "" for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(),atag['href']) (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: @@ -22,7 +22,7 @@ description = description + text.encode('ascii', 'ignore') for atag in soup.find(id="SortingTable").find_all("a"): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(link,atag['href'])) if links != []: @@ -43,7 +43,7 @@ links = [] description = "" for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(),atag['href']) (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: @@ -53,7 +53,7 @@ for text in soup.find(id="content-item").stripped_strings: description = description + text + " \n" for atag in soup.find(id="content-item").find_all("a"): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(link,atag['href'])) if links != []: doc.update({'links': links}) --- a/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py +++ b/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py @@ -17,7 +17,7 @@ dldivs = soup.find('div',class_="download") if dldivs != None: for atag in dldivs.find_all("a"): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(url,atag['href'])) nodldivs = soup.find('div',class_="incompleteNotification") if nodldivs != None and nodldivs.stripped_strings != None: --- a/documents/scrapers/d1296c366287f7a9faedf235c7e6df01.py +++ b/documents/scrapers/d1296c366287f7a9faedf235c7e6df01.py @@ -6,8 +6,6 @@ #http://www.doughellmann.com/PyMOTW/abc/ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): - def getTable(self,soup): - return soup.find(id="main").table def getColumnCount(self): return 7 def getColumns(self,columns): --- a/documents/sitemap.xml.php +++ b/documents/sitemap.xml.php @@ -6,7 +6,7 @@ echo '' . "\n"; echo " " . local_url() . "index.php1.0\n"; foreach (scandir("./") as $file) { - if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php") { + if (strpos($file, ".php") !== false && ($file != "index.php" && $file != "sitemap.xml.php"&& $file != "viewDocument.php")) { echo " " . local_url() . "$file0.6\n"; } } --- /dev/null +++ b/exportAgencies.csv.php @@ -1,1 +1,88 @@ +get_db('disclosr-agencies'); + + +$foiEmail = Array(); +try { + $rows = $db->get_view("app", "foiEmails", null, true)->rows; + //print_r($rows); + foreach ($rows as $row) { + $foiEmail[$row->key] = $row->value; + } +} catch (SetteeRestClientException $e) { + setteErrorHandler($e); + die(); +} + +$fp = fopen('php://output', 'w'); +if ($fp && $db) { + header('Content-Type: text/csv; charset=utf-8'); + header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"'); + header('Pragma: no-cache'); + header('Expires: 0'); + fputcsv($fp, $headers); + try { + $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; + //print_r($rows); + foreach ($agencies as $agency) { + // print_r($agency); + + if (isset($agency->value->foiEmail) && $agency->value->foiEmail != "null" && !isset($agency->value->status)) { + $row = Array(); + $row["title"] = trim($agency->value->name); + $row["abbr"] = (isset($agency->value->shortName) ? $agency->value->shortName : ""); + $row["key"] = (isset($agency->value->shortName) ? "au/".strtolower($agency->value->shortName) : ""); + $row["category"] =""; + $row["parent"] =""; + $row["parentkey"] =""; + $row["description"] = (isset($agency->value->description) ? $agency->value->description : ""); + $row["url"] = (isset($agency->value->website) ? $agency->value->website : ""); + $row["jurisdiction"] = "Australia"; + $row["jurisdiction_code"] = "au"; + + $row["source"] =""; + $row["source_url"] =""; + $row["address"] =""; + $row["contact"] =""; + + $row["email"] = (isset($agency->value->foiEmail) ? $agency->value->foiEmail : ""); + $row["tags"] =""; + $row["created_at"] =""; + $row["updated_at"] =""; + + + $otherBodies = Array(); + if (isset($agency->value->foiBodies)) { + $otherBodies = array_merge($otherBodies, $agency->value->foiBodies); + } + if (isset($agency->value->positions)) { + $positions = Array(); + foreach ($agency->value->positions as $position) { + $positions[] = "Office of the ".$position; + } + $otherBodies = array_merge($otherBodies, $positions); + } + sort($otherBodies); + if (count($otherBodies) > 0) { + $row["description"] .= "
This department also responds to requests for information held by " . implode(", ", $otherBodies); + } + + + fputcsv($fp, array_values($row)); + } + } + } catch (SetteeRestClientException $e) { + setteErrorHandler($e); + } + + die; +} +?> + --- a/getAgency.php +++ b/getAgency.php @@ -188,7 +188,7 @@
'; - echo '
'; + echo '
'; echo '

' . $row['name'] . '

'; if (isset($row['description'])) { --- a/graph.php +++ b/graph.php @@ -9,13 +9,13 @@ function add_node($id, $label, $parent="") { global $format; if ($format == "html") { - // echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; + // echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; } if ($format == "dot" && $label != "") { - echo "$id [label=\"$label\"];". PHP_EOL; + echo "\"$id\" [label=\"$label\", shape=plaintext];". PHP_EOL; } if ($format == "gexf") { - echo "":">") + echo "":">") ."" ."". PHP_EOL; } @@ -27,7 +27,7 @@ // echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; } if ($format == "dot") { - echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; + echo "\"$from\" -> \"$to\" ".($color != ""? "[color=$color]":"").";". PHP_EOL; } if ($format == "gexf") { echo "". PHP_EOL; @@ -55,7 +55,7 @@ $rows = $db->get_view("app", "byCanonicalName", null, true)->rows; //print_r($rows); foreach ($rows as $row) { - add_node($row->id, $row->key); + add_node($row->id, $row->value->name); } } catch (SetteeRestClientException $e) { setteErrorHandler($e); --- a/include/template.inc.php +++ b/include/template.inc.php @@ -44,7 +44,7 @@ - +