--- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -72,7 +72,7 @@ edate = date.today().strftime("%Y-%m-%d") doc = {'_id': dochash, 'agencyID': self.getAgencyID() , 'url': self.getURL(), 'docID': dochash, - "date": edate, "title": "Disclosure Log Updated", "description": description} + "date": edate, "title": "Disclosure Log Updated", "description": self.remove_control_chars(description)} foidocsdb.save(doc) else: print "already saved" @@ -104,7 +104,7 @@ edate = date.today().strftime("%Y-%m-%d") doc = {'_id': dochash, 'agencyID': self.getAgencyID() , 'url': self.getURL(), 'docID': dochash, - "date": edate, "title": "Disclosure Log Updated", "description": description} + "date": edate, "title": "Disclosure Log Updated", "description": self.remove_control_chars(description)} foidocsdb.save(doc) else: print "already saved" @@ -202,7 +202,7 @@ def getDate(self, content, entry, doc): date = ''.join(content.stripped_strings).strip() (a, b, c) = date.partition("(") - date = self.remove_control_chars(a.replace("Octber", "October")) + date = self.remove_control_chars(a.replace("Octber", "October").replace("1012","2012")) print date edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d") print edate