--- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -94,7 +94,7 @@ def getDate(self, content, entry, doc): date = ''.join(content.stripped_strings).strip() (a,b,c) = date.partition("(") - date = a.replace("Octber","October") + date = self.remove_control_chars(a.replace("Octber","October")) print date edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d") print edate @@ -121,7 +121,7 @@ columns = row.find_all('td') if len(columns) == self.getColumnCount(): (id, date, title, description, notes) = self.getColumns(columns) - print ''.join(id.stripped_strings).encode('ascii', 'ignore') + print self.remove_control_chars(''.join(id.stripped_strings)) if id.string == None: hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings)))) else: