From: root Date: Mon, 03 Dec 2012 02:56:21 +0000 Subject: Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=e2b91982ab62f9f22d63338fea8e9f462d4004ca --- Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr Former-commit-id: fb652de2c04eb0dcdcc435c47141e3a0034e996b --- --- a/admin/refreshDesignDoc.php +++ b/admin/refreshDesignDoc.php @@ -9,7 +9,6 @@ $obj->language = "javascript"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };"; -$obj->views->byDate->reduce = "_count"; $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };"; $obj->views->byDateMonthYear->reduce = "_count"; $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; --- /dev/null +++ b/documents/about.php @@ -1,1 +1,11 @@ + +

About

+ + --- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -137,7 +137,12 @@ self.getDescription(description,row, doc) if notes != None: doc.update({ 'notes': (''.join(notes.stripped_strings))}) - foidocsdb.save(doc) + badtitles = ['-','Summary of FOI Request','FOI request(in summary form)','Summary of FOI request received by the ASC', +'Summary of FOI request received by agency/minister','Description of Documents Requested','FOI request','Description of FOI Request','Summary of request','Description','Summary', +'Summary of FOIrequest received by agency/minister','Summary of FOI request received','Description of FOI Request',"FOI request",'Results 1 to 67 of 67'] + if doc['title'] not in badtitles and doc['description'] != '': + print "saving" + foidocsdb.save(doc) else: print "already saved "+hash --- a/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py +++ b/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py @@ -21,9 +21,10 @@ for row in soup.find(class_ = "ms-rteTable-GreyAlternating").find_all('tr'): if row != None: rowtitle = row.find('th').string - description = description + "\n" + rowtitle + ": " + if rowtitle != None: + description = description + "\n" + rowtitle + ": " for text in row.find('td').stripped_strings: - description = description + text + description = description + text for atag in row.find_all("a"): if atag.has_key('href'): links.append(scrape.fullurl(link,atag['href'])) --- a/documents/template.inc.php +++ b/documents/template.inc.php @@ -89,7 +89,7 @@