--- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -84,7 +84,7 @@ doc.update({'description': descriptiontxt}) return def getTitle(self, content, entry, doc): - doc.update({'title': content.string}) + doc.update({'title': (''.join(content.stripped_strings))}) return def getTable(self, soup): return soup.table @@ -123,13 +123,13 @@ if doc == None: print "saving " +hash - doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': id.string} + doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))} self.getLinks(self.getURL(),row,doc) self.getTitle(title,row, doc) self.getDate(date,row, doc) self.getDescription(description,row, doc) if notes != None: - doc.update({ 'notes': notes.string}) + doc.update({ 'notes': (''.join(notes.stripped_strings))}) foidocsdb.save(doc) else: print "already saved "+hash