Merge commit 'd0db210'
Merge commit 'd0db210'

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -23,7 +23,7 @@
         import ckan.model as model
         model.Session.remove()
         model.Session.configure(bind=model.meta.engine)
-        log = logging.getLogger('ckanext.ga-report')
+        log = logging.getLogger('ckanext.ga_report')
 
         import ga_model
         ga_model.init_tables()

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -32,6 +32,11 @@
         first_of_this_month = datetime.datetime(date.year, date.month, 1)
         _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month))
         last_of_this_month =  datetime.datetime(date.year, date.month, last_day_of_month)
+        # if this is the latest month, note that it is only up until today
+        now = datetime.datetime.now()
+        if now.year == date.year and now.month == date.month:
+            last_day_of_month = now.day
+            last_of_this_month = now
         periods = ((date.strftime(FORMAT_MONTH),
                     last_day_of_month,
                     first_of_this_month, last_of_this_month),)
@@ -126,7 +131,7 @@
                 # Make sure the All records are correct.
                 ga_model.post_update_url_stats()
 
-                log.info('Aggregating datasets by publisher')
+                log.info('Associating datasets with their publisher')
                 ga_model.update_publisher_stats(period_name) # about 30 seconds.
 
 
@@ -298,7 +303,7 @@
 
 
     def _download_stats(self, start_date, end_date, period_name, period_complete_day):
-        """ Fetches stats about language and country """
+        """ Fetches stats about data downloads """
         import ckan.model as model
 
         data = {}
@@ -320,7 +325,14 @@
             return
 
         def process_result_data(result_data, cached=False):
+            progress_total = len(result_data)
+            progress_count = 0
+            resources_not_matched = []
             for result in result_data:
+                progress_count += 1
+                if progress_count % 100 == 0:
+                    log.debug('.. %d/%d done so far', progress_count, progress_total)
+
                 url = result[0].strip()
 
                 # Get package id associated with the resource that has this URL.
@@ -334,9 +346,13 @@
                 if package_name:
                     data[package_name] = data.get(package_name, 0) + int(result[1])
                 else:
-                    log.warning(u"Could not find resource for URL: {url}".format(url=url))
+                    resources_not_matched.append(url)
                     continue
-
+            if resources_not_matched:
+                log.debug('Could not match %i or %i resource URLs to datasets. e.g. %r',
+                          len(resources_not_matched), progress_total, resources_not_matched[:3])
+
+        log.info('Associating downloads of resource URLs with their respective datasets')
         process_result_data(results.get('rows'))
 
         results = self.service.data().ga().get(
@@ -348,6 +364,7 @@
                                  dimensions="ga:eventLabel",
                                  max_results=10000,
                                  end_date=end_date).execute()
+        log.info('Associating downloads of cache resource URLs with their respective datasets')
         process_result_data(results.get('rows'), cached=False)
 
         self._filter_out_long_tail(data, MIN_DOWNLOADS)

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -161,20 +161,20 @@
 
 
 def pre_update_url_stats(period_name):
-    log.debug("Deleting '%s' records" % period_name)
-    model.Session.query(GA_Url).\
-            filter(GA_Url.period_name==period_name).delete()
-
-    count = model.Session.query(GA_Url).\
-            filter(GA_Url.period_name == 'All').count()
-    log.debug("Deleting %d 'All' records" % count)
-    count = model.Session.query(GA_Url).\
-            filter(GA_Url.period_name == 'All').delete()
-    log.debug("Deleted %d 'All' records" % count)
+    q = model.Session.query(GA_Url).\
+        filter(GA_Url.period_name==period_name)
+    log.debug("Deleting %d '%s' records" % (q.count(), period_name))
+    q.delete()
+
+    q = model.Session.query(GA_Url).\
+        filter(GA_Url.period_name == 'All')
+    log.debug("Deleting %d 'All' records..." % q.count())
+    q.delete()
 
     model.Session.flush()
     model.Session.commit()
     model.repo.commit_and_remove()
+    log.debug('...done')
 
 def post_update_url_stats():
 
@@ -185,6 +185,7 @@
         record regardless of whether the URL has an entry for
         the month being currently processed.
     """
+    log.debug('Post-processing "All" records...')
     query = """select url, pageviews::int, visits::int
                from ga_url
                where url not in (select url from ga_url where period_name ='All')"""
@@ -197,7 +198,13 @@
         views[row[0]] = views.get(row[0], 0) + row[1]
         visits[row[0]] = visits.get(row[0], 0) + row[2]
 
+    progress_total = len(views.keys())
+    progress_count = 0
     for key in views.keys():
+        progress_count += 1
+        if progress_count % 100 == 0:
+            log.debug('.. %d/%d done so far', progress_count, progress_total)
+
         package, publisher = _get_package_and_publisher(key)
 
         values = {'id': make_uuid(),
@@ -211,6 +218,7 @@
                   }
         model.Session.add(GA_Url(**values))
     model.Session.commit()
+    log.debug('..done')
 
 
 def update_url_stats(period_name, period_complete_day, url_data):
@@ -219,9 +227,14 @@
     stores them in GA_Url under the period and recalculates the totals for
     the 'All' period.
     '''
+    progress_total = len(progress_data)
+    progress_count = 0
     for url, views, visits in url_data:
+        progress_count += 1
+        if progress_count % 100 == 0:
+            log.debug('.. %d/%d done so far', progress_count, progress_total)
+
         package, publisher = _get_package_and_publisher(url)
-
 
         item = model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).\