import httplib
import urllib
import urllib2
import re
import csv
# from csv import reader, DictReader
from cookielib import CookieJar
from string import ascii_letters, digits
from random import choice
from datetime import date
alphanums = list(ascii_letters + digits)
def _random_id(n):
"""
create a random alphanumeric identifier of length n
"""
''.join(choice(alphanums) for i in range(n))
def _convert_to_float(s):
"""
try to convert content to float, else return original content
"""
try:
return float(s)
except:
return s
class pyGAPI(object):
"""Google Analytics API that works through screen scraping"""
def __init__(self, username, password, website_id=""):
"""
provide login and password to be used to connect to Google Analytics
all immutable system variables are also defined here
website_id is the ID of the specific site on google analytics
"""
self.login_params = {
'GA3T': _random_id(11), # unique identifiers for session
'GALX': _random_id(11), # unique identifiers for session
"continue": 'http://www.google.com/analytics/home/?et=reset&hl=en-US',
'nui': '1',
'hl': 'en-US',
'rmShown': '1',
"PersistentCookie": "yes",
"Email": username,
"Passwd": password,
'service': 'analytics'
}
self.headers = [("Content-type", "application/x-www-form-urlencoded"),
('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'),
("Accept", "text/plain")]
self.url_ServiceLoginBoxAuth = 'https://www.google.com/accounts/ServiceLoginBoxAuth'
self.url_LoginDoneHtml = 'http://www.google.com/accounts/CheckCookie' + \
'?chtml=LoginDoneHtml'
self.url_AnalyticsHome = 'http://www.google.com/analytics/home'
self.url_Export = 'https://www.google.com/analytics/reporting/export'
self._connect()
# set the website_id, requires a connection because we may need to pull
# the list of the user's sites
if not website_id:
# if no website ID, use the first one from the list
self.website_id = self.list_sites()[0]['id']
else:
try:
self.website_id = str(int(website_id))
except:
raise AttributeError, "website_id must be an integer"
def _connect(self):
"""
connect to Google Analytics
"""
params = urllib.urlencode(self.login_params)
self.cj = CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
self.opener.addheaders = self.headers
resp = self.opener.open(self.url_ServiceLoginBoxAuth, params)
# Analytics now does a Javascript redirect, so we need to do two
# additional requests to login and land on the Analyitics homepage.
resp = self.opener.open(self.url_LoginDoneHtml)
resp = self.opener.open(self.url_AnalyticsHome)
def list_sites(self):
"""
get list of sites and corresponding IDs by
screenscraping the analytics home page
"""
sites_body = self.opener.open('https://www.google.com/analytics/home').read()
site_list = [];
re_option_list = re.compile('.*?')
m = re_option_list.search(sites_body)
for match in re.finditer('\s?',
sites_body[m.start():m.end()]):
site_list.append({'site_name': match.group('site'), 'id': match.group('id')})
return site_list
def list_reports(self):
"""
show which reports are currently configured
"""
report_list = ('ReferringSourcesReport', 'SearchEnginesReport',
'AllSourcesReport', 'KeywordsReport', 'CampaignsReport',
'AdVersionsReport', 'TopContentReport',
'ContentByTitleReport', 'ContentDrilldownReport',
'EntrancesReport', 'ExitsReport', 'GeoMapReport',
'LanguagesReport', 'HostnamesReport', 'SpeedsReport',)
return report_list
def download_report(self, report_name, date_range, inputcmp='average', inputfmt='2', limit='10000'):
"""
download a specific report
report_name is limited to what can be called from list_Reports
data_range should be a 2-tuple of Python dates like (date, date)
limit is the number of entries to pull down
"""
# convert dates from a pair of dates to Google's input format
# for instance:
# (date(2008,1,1), date(2008,1,31)) becomes 20080101-20080131
if len(date_range) <> 2:
return "daterange incorrect"
inputpdr = date_range[0].strftime("%Y%m%d") + '-' + date_range[1].strftime("%Y%m%d")
# TODO: convert to urllib2?
params = urllib.urlencode({
'id': self.website_id,
'pdr': inputpdr,
'cmp': inputcmp,
'limit': limit,
'rpt': report_name,
'fmt': inputfmt,
})
self.raw_data = self.opener.open('https://www.google.com/analytics/reporting/export', params).read()
def csv(self):
"""
return just the CSV portion of the data
"""
table_head = "# ----------------------------------------\n# Table\n# ----------------------------------------\n"
table_end = "\n# --------------------------------------------------------------------------------"
table_head_pos = self.raw_data.find(table_head)
table_end_pos = self.raw_data.find(table_end)
return self.raw_data[table_head_pos + len(table_head):table_end_pos]
def parse_csv_as_dicts(self,
convert_numbers=False,
exclude_columnnames=('Keyword')):
reader = csv.DictReader(self.csv().splitlines())
results = list(reader)
if convert_numbers:
for d in results:
for k, v in d.iteritems():
if k not in exclude_columnnames:
d[k] = _convert_to_float(v)
return results