From: Ross Jones Date: Thu, 06 Dec 2012 16:30:31 +0000 Subject: Updated count of deleted records X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=25c791cfcf26848e6b6b8cf12e81b7997c7363cb --- Updated count of deleted records --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -22,12 +22,29 @@ def _month_details(cls): - '''Returns a list of all the month names''' + ''' + Returns a list of all the periods for which we have data, unfortunately + knows too much about the type of the cls being passed as GA_Url has a + more complex query + + This may need extending if we add a period_name to the stats + ''' months = [] - vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all() + day = None + + vals = model.Session.query(cls.period_name,cls.period_complete_day)\ + .filter(cls.period_name!='All').distinct(cls.period_name)\ + .order_by("period_name desc").all() + if vals and vals[0][1]: + day = int(vals[0][1]) + ordinal = 'th' if 11 <= day <= 13 \ + else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th') + day = "{day}{ordinal}".format(day=day, ordinal=ordinal) + for m in vals: months.append( (m[0], _get_month_name(m[0]))) - return sorted(months, key=operator.itemgetter(0), reverse=True) + + return months, day class GaReport(BaseController): @@ -56,7 +73,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Stat) + c.months, c.day = _month_details(GA_Stat) # Work out which month to show, based on query params of the first item c.month_desc = 'all months' @@ -220,7 +237,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Url) + c.months, c.day = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', '') @@ -278,7 +295,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Url) + c.months, c.day = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', '') --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -126,7 +126,7 @@ ga_model.update_publisher_stats(period_name) # about 30 seconds. log.info('Downloading and storing analytics for site-wide stats') - self.sitewide_stats( period_name ) + self.sitewide_stats( period_name, period_complete_day ) log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date) @@ -153,7 +153,8 @@ data = collections.defaultdict(list) rows = results.get('rows',[]) for row in rows: - data[_normalize_url(row[0])].append( (row[1], int(row[2]),) ) + url = _normalize_url('http:/' + row[0]) + data[url].append( (row[1], int(row[2]),) ) ga_model.update_social(period_name, data) @@ -194,7 +195,7 @@ if 'url' in data: ga_model.update_url_stats(period_name, period_complete_day, data['url']) - def sitewide_stats(self, period_name): + def sitewide_stats(self, period_name, period_complete_day): import calendar year, month = period_name.split('-') _, last_day_of_month = calendar.monthrange(int(year), int(month)) @@ -205,7 +206,7 @@ '_locale_stats', '_browser_stats', '_mobile_stats'] for f in funcs: log.info('Downloading analytics for %s' % f.split('_')[1]) - getattr(self, f)(start_date, end_date, period_name) + getattr(self, f)(start_date, end_date, period_name, period_complete_day) def _get_results(result_data, f): data = {} @@ -214,7 +215,7 @@ data[key] = data.get(key,0) + result[1] return data - def _totals_stats(self, start_date, end_date, period_name): + def _totals_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches distinct totals, total pageviews etc """ results = self.service.data().ga().get( ids='ga:' + self.profile_id, @@ -224,7 +225,8 @@ max_results=10000, end_date=end_date).execute() result_data = results.get('rows') - ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]}) + ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]}, + period_complete_day) results = self.service.data().ga().get( ids='ga:' + self.profile_id, @@ -239,7 +241,7 @@ 'New visits': result_data[0][2], 'Total visits': result_data[0][3], } - ga_model.update_sitewide_stats(period_name, "Totals", data) + ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day) # Bounces from / or another configurable page. path = '/%s%s' % (config.get('googleanalytics.account'), @@ -261,10 +263,11 @@ bounces, total = [float(x) for x in result_data[0][1:]] pct = 100 * bounces/total log.info('%d bounces from %d total == %s', bounces, total, pct) - ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct}) - - - def _locale_stats(self, start_date, end_date, period_name): + ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct}, + period_complete_day) + + + def _locale_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches stats about language and country """ results = self.service.data().ga().get( ids='ga:' + self.profile_id, @@ -279,16 +282,16 @@ for result in result_data: data[result[0]] = data.get(result[0], 0) + int(result[2]) self._filter_out_long_tail(data, MIN_VIEWS) - ga_model.update_sitewide_stats(period_name, "Languages", data) + ga_model.update_sitewide_stats(period_name, "Languages", data, period_complete_day) data = {} for result in result_data: data[result[1]] = data.get(result[1], 0) + int(result[2]) self._filter_out_long_tail(data, MIN_VIEWS) - ga_model.update_sitewide_stats(period_name, "Country", data) - - - def _social_stats(self, start_date, end_date, period_name): + ga_model.update_sitewide_stats(period_name, "Country", data, period_complete_day) + + + def _social_stats(self, start_date, end_date, period_name, period_complete_day): """ Finds out which social sites people are referred from """ results = self.service.data().ga().get( ids='ga:' + self.profile_id, @@ -304,10 +307,10 @@ if not result[0] == '(not set)': data[result[0]] = data.get(result[0], 0) + int(result[2]) self._filter_out_long_tail(data, 3) - ga_model.update_sitewide_stats(period_name, "Social sources", data) - - - def _os_stats(self, start_date, end_date, period_name): + ga_model.update_sitewide_stats(period_name, "Social sources", data, period_complete_day) + + + def _os_stats(self, start_date, end_date, period_name, period_complete_day): """ Operating system stats """ results = self.service.data().ga().get( ids='ga:' + self.profile_id, @@ -322,17 +325,17 @@ for result in result_data: data[result[0]] = data.get(result[0], 0) + int(result[2]) self._filter_out_long_tail(data, MIN_VIEWS) - ga_model.update_sitewide_stats(period_name, "Operating Systems", data) + ga_model.update_sitewide_stats(period_name, "Operating Systems", data, period_complete_day) data = {} for result in result_data: if int(result[2]) >= MIN_VIEWS: key = "%s %s" % (result[0],result[1]) data[key] = result[2] - ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data) - - - def _browser_stats(self, start_date, end_date, period_name): + ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data, period_complete_day) + + + def _browser_stats(self, start_date, end_date, period_name, period_complete_day): """ Information about browsers and browser versions """ results = self.service.data().ga().get( ids='ga:' + self.profile_id, @@ -349,14 +352,14 @@ for result in result_data: data[result[0]] = data.get(result[0], 0) + int(result[2]) self._filter_out_long_tail(data, MIN_VIEWS) - ga_model.update_sitewide_stats(period_name, "Browsers", data) + ga_model.update_sitewide_stats(period_name, "Browsers", data, period_complete_day) data = {} for result in result_data: key = "%s %s" % (result[0], self._filter_browser_version(result[0], result[1])) data[key] = data.get(key, 0) + int(result[2]) self._filter_out_long_tail(data, MIN_VIEWS) - ga_model.update_sitewide_stats(period_name, "Browser versions", data) + ga_model.update_sitewide_stats(period_name, "Browser versions", data, period_complete_day) @classmethod def _filter_browser_version(cls, browser, version_str): @@ -380,7 +383,7 @@ ver = ver[0] + ver[1] + 'X' * num_hidden_digits return ver - def _mobile_stats(self, start_date, end_date, period_name): + def _mobile_stats(self, start_date, end_date, period_name, period_complete_day): """ Info about mobile devices """ results = self.service.data().ga().get( @@ -397,13 +400,13 @@ for result in result_data: data[result[0]] = data.get(result[0], 0) + int(result[2]) self._filter_out_long_tail(data, MIN_VIEWS) - ga_model.update_sitewide_stats(period_name, "Mobile brands", data) + ga_model.update_sitewide_stats(period_name, "Mobile brands", data, period_complete_day) data = {} for result in result_data: data[result[1]] = data.get(result[1], 0) + int(result[2]) self._filter_out_long_tail(data, MIN_VIEWS) - ga_model.update_sitewide_stats(period_name, "Mobile devices", data) + ga_model.update_sitewide_stats(period_name, "Mobile devices", data, period_complete_day) @classmethod def _filter_out_long_tail(cls, data, threshold=10): --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -47,6 +47,7 @@ Column('id', types.UnicodeText, primary_key=True, default=make_uuid), Column('period_name', types.UnicodeText), + Column('period_complete_day', types.UnicodeText), Column('stat_name', types.UnicodeText), Column('key', types.UnicodeText), Column('value', types.UnicodeText), ) @@ -134,7 +135,7 @@ return None, publisher_match.groups()[0] return None, None -def update_sitewide_stats(period_name, stat_name, data): +def update_sitewide_stats(period_name, stat_name, data, period_complete_day): for k,v in data.iteritems(): item = model.Session.query(GA_Stat).\ filter(GA_Stat.period_name==period_name).\ @@ -144,11 +145,13 @@ item.period_name = period_name item.key = k item.value = v + item.period_complete_day = period_complete_day model.Session.add(item) else: # create the row values = {'id': make_uuid(), 'period_name': period_name, + 'period_complete_day': period_complete_day, 'key': k, 'value': v, 'stat_name': stat_name @@ -158,10 +161,20 @@ def pre_update_url_stats(period_name): + log.debug("Deleting '%s' records" % period_name) model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).delete() - model.Session.query(GA_Url).\ - filter(GA_Url.period_name=='All').delete() + + count = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == 'All').count() + log.debug("Deleting %d 'All' records" % count) + count = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == 'All').delete() + log.debug("Deleted %d 'All' records" % count) + + model.Session.flush() + model.Session.commit() + model.repo.commit_and_remove() def update_url_stats(period_name, period_complete_day, url_data): @@ -213,8 +226,8 @@ 'period_name': 'All', 'period_complete_day': 0, 'url': url, - 'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews, - 'visits': sum([int(e.visits or 0) for e in entries]) + old_visits, + 'pageviews': sum([int(e.pageviews) for e in entries]) + int(old_pageviews), + 'visits': sum([int(e.visits or 0) for e in entries]) + int(old_visits), 'department_id': publisher, 'package_id': package } @@ -340,10 +353,10 @@ ''' for object_type in (GA_Url, GA_Stat, GA_Publisher, GA_ReferralStat): q = model.Session.query(object_type) - if period_name != 'all': + if period_name != 'All': q = q.filter_by(period_name=period_name) q.delete() - model.Session.commit() + model.repo.commit_and_remove() def get_score_for_dataset(dataset_name): ''' --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -50,9 +50,12 @@ dataset = model.Package.get(ga_url.url[len('/dataset/'):]) if dataset and not dataset.state == 'active': dataset = None - count += 1 - if count > 10: - break + # When testing, it is possible that top datasets are not available + # so only go round this loop a few times before falling back on + # a random dataset. + count += 1 + if count > 10: + break if not dataset: # fallback dataset = model.Session.query(model.Package)\ --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -5,6 +5,14 @@ xmlns:xi="http://www.w3.org/2001/XInclude" py:strip="" > + + + --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -27,13 +27,9 @@
- + ${month_selector(c.month, c.months, c.day)} +
@@ -41,14 +37,13 @@
- - - --- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -25,12 +25,9 @@
- + + ${month_selector(c.month, c.months, c.day)} + - - - - - + ${month_selector(c.month, c.months, c.day)} +
Publisher Dataset Views
${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name))} + + ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))} ${views}