From: Maxious Date: Tue, 16 Jul 2013 10:29:44 +0000 Subject: Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=f5007cb041c9b48f6948dc6591180e1fcc84ce51 --- Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr Conflicts: documents/sitemap.xml.php Former-commit-id: 0480844aebe20a3af30b1c98aa192a5a823fb4d6 --- --- a/documents/robots.txt +++ b/documents/robots.txt @@ -3,4 +3,5 @@ User-agent: * Disallow: /admin/ +Disallow: /viewDocument.php Sitemap: http://disclosurelo.gs/sitemap.xml.php --- a/documents/scrapers/f5ce2d1651739704634eb8ca4b2b46d3.py +++ b/documents/scrapers/f5ce2d1651739704634eb8ca4b2b46d3.py @@ -7,12 +7,12 @@ #http://www.doughellmann.com/PyMOTW/abc/ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getTable(self,soup): - return soup.find(id = "ctl00_PlaceHolderMain_PublishingPageContent__ControlWrapper_RichHtmlField").table + return soup.find(id = "block-system-main").table def getColumnCount(self): - return 7 + return 2 def getColumns(self,columns): - (id, date, title, description,link,deldate, notes) = columns - return (id, date, title, description, notes) + (date, title) = columns + return (date, date, title, title, None) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) --- a/documents/sitemap.xml.php +++ b/documents/sitemap.xml.php @@ -1,30 +1,48 @@ "; echo '' . "\n"; echo " " . local_url() . "index.php1.0\n"; foreach (scandir("./") as $file) { - if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php") + if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php"&& $file != "viewDocument.php") echo " " . local_url() . "$file0.6\n"; + } } $agenciesdb = $server->get_db('disclosr-agencies'); +$foidocsdb = $server->get_db('disclosr-foidocuments'); try { $rows = $agenciesdb->get_view("app", "byCanonicalName")->rows; foreach ($rows as $row) { echo '' . local_url() . 'agency.php?id=' . $row->value->_id . "0.3\n"; } + unset($rows); + $rows = null; } catch (SetteeRestClientException $e) { setteErrorHandler($e); } -$foidocsdb = $server->get_db('disclosr-foidocuments'); + +foreach (range(0, 8) as $number) { try { - $rows = $foidocsdb->get_view("app", "all")->rows; + $rows = $foidocsdb->get_view("app", "all", Array($number,$number+1))->rows; foreach ($rows as $row) { echo '' . local_url() . 'view.php?id=' . $row->value->_id . "0.3\n"; } + unset($rows); + $rows = null; +} catch (SetteeRestClientException $e) { + setteErrorHandler($e); +} +} + +try { + $rows = $foidocsdb->get_view("app", "all", Array('9','fffffffff'))->rows; + foreach ($rows as $row) { + echo '' . local_url() . 'view.php?id=' . $row->value->_id . "0.3\n"; + } + unset($rows); + $rows = null; } catch (SetteeRestClientException $e) { setteErrorHandler($e); } --- a/include/common.inc.php +++ b/include/common.inc.php @@ -10,6 +10,7 @@ || strstr($_SERVER['PHP_SELF'], "documents/") || $_SERVER['SERVER_NAME'] == "disclosurelo.gs" || $_SERVER['SERVER_NAME'] == "www.disclosurelo.gs" + || $_SERVER['SERVER_NAME'] == "direct.disclosurelo.gs" ) $basePath = "../"; --- a/robots.txt +++ b/robots.txt @@ -3,4 +3,5 @@ User-agent: * Disallow: /admin/ +Disallow: /viewDocument.php Sitemap: http://orgs.disclosurelo.gs/sitemap.xml.php --- a/sitemap.xml.php +++ b/sitemap.xml.php @@ -7,7 +7,7 @@ echo '' . "\n"; echo " " . local_url() . "index.php1.0\n"; foreach (scandir("./") as $file) { - if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php") + if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php"&& $file != "viewDocument.php") echo " " . local_url() . "$file0.3\n"; }