Former-commit-id: 4ca7f1f81d51d7c094d904b6aa1e176cf5958630
--- a/documents/datagov-export.py
+++ b/documents/datagov-export.py
@@ -14,6 +14,7 @@
import tempfile
def add_package_resource_cachedurl(ckan, package_name, url, name, format, license_id, size,**kwargs):
+ excluded_apis = "recent-earthquakes,sentinel-hotspots,abc-local-stations,action-bus-service-gtfs-feed-act,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,current-and-future-road-reports-traffic-restrictions-in-south-australia,journey-planner-data-act,live-traffic-cameras-nsw,precis-forecast-national,precis-forecast-national,precis-forecast-new-south-wales,precis-forecast-new-south-wales,precis-forecast-northern-territory,precis-forecast-northern-territory,precis-forecast-queensland,precis-forecast-queensland,precis-forecast-south-australia,precis-forecast-south-australia,precis-forecast-tasmania,precis-forecast-tasmania,precis-forecast-victoria,precis-forecast-victoria,precis-forecast-western-australia,precis-forecast-western-australia,register-of-penalty-notices-nsw,sentinel-hotspots,trove-people-and-organisations-data,weather-data-services-radar,abc-local-stations,act-emergency-services-agency-esa-28093-current-incidents,act-emergency-services-agency-esa-news-alerts,act-government-news-and-events,act-government-summaries-of-cabinet-outcomes,act-magistrates-court-judgements,act-supreme-court-judgements,act-supreme-court-sentences,actpla-latest-news,all-vacant-act-government-jobs,community-engagement-current-engagements,community-engagement-news,edd-media-releases,edd-news-and-events,freedom-of-information-foi-summaries,libraries-act-announcements,nsw-rural-fire-service-current-incidents,nsw-rural-fire-service-major-updates,precis-forecast-new-south-wales,precis-forecast-south-australia,precis-forecast-tasmania,precis-forecast-victoria,sentinel-hotspots,south-australian-road-crash-statistics,trove-people-and-organisations-data,weather-warnings-for-new-south-wales-australian-capital-territory,weather-warnings-for-northern-territory,weather-warnings-for-queensland,weather-warnings-for-south-australia,weather-warnings-for-tasmania,weather-warnings-for-victoria,weather-warnings-for-western-australia".split(",")
if "xls" in url:
format = "xls"
if "pdf" in url:
@@ -27,8 +28,12 @@
if mime_type in ["application/xlsx","application/x-xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]:
format = "xlsx"
- if content != None:
- tf = tempfile.NamedTemporaryFile(delete=False)
+ #put file extensions on for windows users downloading files
+ suffix = name.encode("ascii","ignore").replace("/","")
+ if len(suffix) < 5 or (suffix[-4] != "." and suffix[-5] != "."):
+ suffix = suffix + "." + format
+ if content != None and package_name not in excluded_apis:
+ tf = tempfile.NamedTemporaryFile(suffix=suffix)
tfName = os.path.abspath(tf.name)
print tfName
tf.seek(0)
@@ -196,7 +201,7 @@
pkg_name = filter(lambda x: x in '0123456789abcdefghijklmnopqrstuvwxyz-_',
doc.value['url'].replace("http://data.gov.au/dataset/", '').replace('/', '')[:100]);
print pkg_name
- if pkg_name != "":
+ if pkg_name != "" :
#add to or create organization using direct API
agency = doc.value['metadata']["Agency"]
@@ -262,7 +267,7 @@
'author': creator,
'maintainer': creator,
'license_id': get_license_id(doc.value['metadata']['DCTERMS.License']),
- 'notes': html2text.html2text(doc.value['metadata']['Description']).replace('AC/a!a','-').replace('AC/a!aC/',"'").replace("AC/a!E",":")replace("A "," "),
+ 'notes': html2text.html2text(doc.value['metadata']['Description']).replace('AC/a!a','-').replace('AC/a!aC/',"'").replace("AC/a!E",":").replace("A "," "),
'owner_org': org_id,
'extras': extras,
'private': (pkg_name not in goodcsvdata and pkg_name not in goodotherdata)
--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -202,7 +202,7 @@
def getDate(self, content, entry, doc):
strdate = ''.join(content.stripped_strings).strip()
(a, b, c) = strdate.partition("(")
- strdate = self.remove_control_chars(a.replace("Octber", "October").replace("1012","2012")replace("Janrurary", "January").replace("1012","2012"))
+ strdate = self.remove_control_chars(a.replace("Octber", "October").replace("1012","2012").replace("Janrurary", "January").replace("1012","2012"))
print strdate
try:
edate = parse(strdate, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")