From: Maxious Date: Mon, 03 Dec 2012 05:26:52 +0000 Subject: beginning semantic markup X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=ffb630b58b78314af4fbd440790d54c627731304 --- beginning semantic markup Former-commit-id: a5eba7337e3796c4f37731406127e5df85c440fa --- --- a/admin/refreshDesignDoc.php +++ b/admin/refreshDesignDoc.php @@ -9,7 +9,6 @@ $obj->language = "javascript"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };"; -$obj->views->byDate->reduce = "_count"; $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };"; $obj->views->byDateMonthYear->reduce = "_count"; $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; --- /dev/null +++ b/documents/about.php @@ -1,1 +1,11 @@ + +

About

+ + --- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -137,7 +137,12 @@ self.getDescription(description,row, doc) if notes != None: doc.update({ 'notes': (''.join(notes.stripped_strings))}) - foidocsdb.save(doc) + badtitles = ['-','Summary of FOI Request','FOI request(in summary form)','Summary of FOI request received by the ASC', +'Summary of FOI request received by agency/minister','Description of Documents Requested','FOI request','Description of FOI Request','Summary of request','Description','Summary', +'Summary of FOIrequest received by agency/minister','Summary of FOI request received','Description of FOI Request',"FOI request",'Results 1 to 67 of 67'] + if doc['title'] not in badtitles and doc['description'] != '': + print "saving" + foidocsdb.save(doc) else: print "already saved "+hash --- a/documents/rss.xml.php +++ b/documents/rss.xml.php @@ -11,9 +11,10 @@ //Use wrapper functions for common channelelements $TestFeed->setTitle('Last Modified - All'); $TestFeed->setLink('http://disclosurelo.gs/rss.xml.php'); -$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); +$TestFeed->setDescription('Latest entries'); $TestFeed->setChannelElement('language', 'en-us'); $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time())); + //Retriving informations from database $idtoname = Array(); $agenciesdb = $server->get_db('disclosr-agencies'); @@ -21,16 +22,17 @@ $idtoname[$row->id] = trim($row->value->name); } $foidocsdb = $server->get_db('disclosr-foidocuments'); -$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows; +$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00', 50), true)->rows; //print_r($rows); foreach ($rows as $row) { //Create an empty FeedItem $newItem = $TestFeed->createNewItem(); //Add elements to the feed item $newItem->setTitle($row->value->title); - $newItem->setLink("view.php?id=".$row->value->docID); + $newItem->setLink("view.php?id=".$row->value->_id); $newItem->setDate(date("c", strtotime($row->value->date))); $newItem->setDescription(displayLogEntry($row,$idtoname)); + $newItem->setAuthor($idtoname[$row->value->agencyID]); $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true')); //Now add the feed item $TestFeed->addItem($newItem); @@ -38,3 +40,4 @@ //OK. Everything is done. Now genarate the feed. $TestFeed->generateFeed(); ?> + --- /dev/null +++ b/documents/runScrapers.sh @@ -1,1 +1,3 @@ +for f in scrapers/*.py; do echo "Processing $f file.."; python $f; done + --- a/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py +++ b/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py @@ -21,9 +21,10 @@ for row in soup.find(class_ = "ms-rteTable-GreyAlternating").find_all('tr'): if row != None: rowtitle = row.find('th').string - description = description + "\n" + rowtitle + ": " + if rowtitle != None: + description = description + "\n" + rowtitle + ": " for text in row.find('td').stripped_strings: - description = description + text + description = description + text for atag in row.find_all("a"): if atag.has_key('href'): links.append(scrape.fullurl(link,atag['href'])) --- a/documents/template.inc.php +++ b/documents/template.inc.php @@ -1,152 +1,169 @@ - - - - - - - - - + header('X-UA-Compatible: IE=edge,chrome=1'); + ?> + + + + + + + + + - - + Australian Disclosure Logs<?php if ($title != "") echo " - $title"; ?> + - Australian Disclosure Logs<?php if ($title != "") echo " - $title";?> - + + + + + - - + + + + - - + + + - - - - + + + - - - + + + +
+ -

© Company 2012

- - - - - + (function() { + var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; + ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; + var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); + })(); + + + + + - - - + + $length) { + //limit hit! + $string = substr($string, 0, ($length - 3)); + if ($stopanywhere) { + //stop anywhere + $string .= '...'; + } else { + //stop on a word. + $string = substr($string, 0, strrpos($string, ' ')) . '...'; + } + } + return $string; } function displayLogEntry($row, $idtoname) { $result = ""; - $result .= "

".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")

".str_replace("\n","
",$row->value->description); -if (isset($row->value->notes)) { -$result .= "
Note: ".$row->value->notes; -} -$result .= "

"; + $result .= '
'; + $result .= '

: " . truncate($row->value->title, 120).""; + $result .= '(' . $idtoname[$row->value->agencyID] . ')

'; + $result .= "

Title" . $row->value->title . "
" . str_replace("\n", "
", $row->value->description); + if (isset($row->value->notes)) { + $result .= "
Note: " . $row->value->notes; + } + $result .= "

"; -if (isset($row->value->links)){ -$result .= "

Links/Documents

    "; -foreach ($row->value->links as $link) { - $result .= "
  • ".$link."
  • "; + if (isset($row->value->links)) { + $result .= '

    Links/Documents

    "; + } + $result .= " ID: " . strip_tags($row->value->docID) . ""; + $result .= "
"; + return $result; } - $result .= ""; -} - $result .= "View original source... ID: ".strip_tags($row->value->docID).""; -$result .= "
"; -return $result; -} - --- /dev/null +++ b/lib/FeedWriter