#!/usr/bin/python import csv import email import getopt import os import sys import time from imaplib import * # Sample usage: # ./search_queries.py --passfile=[FILENAME] \ # --username=[USERNAME] \ # --host=[DOMAIN] \ # --outfile=[FILENAME] EMAIL_SUBJECT = 'Analytics asirap.net' IMAP_MB = "INBOX" IMAP_HOST = "" IMAP_PASS = "" IMAP_USER = "" OUTFILE = "" # Retrieve server, username, and password to authenticate to the IMAP server. try: opts, args = getopt.getopt(sys.argv[1:], 'p:u:h:o', ["passfile=", "username=", "host=", "outfile="]) except getopt.GetoptError, err: print str(err) sys.exit(2) for o, a in opts: if o == "--passfile": f = open(os.path.expanduser(a)) IMAP_PASS = f.read().strip() f.close() elif o == "--username": IMAP_USER = a elif o == "--host": IMAP_HOST = a elif o == "--outfile": OUTFILE = a else: assert False, "Unknown option" # Connect to your IMAP server. server = IMAP4_SSL(IMAP_HOST) server.login(IMAP_USER, IMAP_PASS) mboxes = server.list()[1] inbox = server.select(IMAP_MB) # Find any Analytics reports in your mailbox. typ, data = server.search(None, 'SUBJECT', EMAIL_SUBJECT) msg_nums = [int(i) for i in data[0].split()] # Process the most recent Analytics report, naively the one that matches with # the highest message id. typ, data = server.fetch(str(max(msg_nums)), '(RFC822)') msg = email.message_from_string(data[0][1]) date = msg['Date'] or time.strftime("%Y-%m-%d") dtime = email.Utils.parsedate_tz(date) date = time.strftime('%Y-%m-%d', dtime[:9]) # Iterate over all message parts and find, read, and decode the report # attachment. for part in msg.walk(): if part.get_content_type() == "application/octet-stream": decoded = part.get_payload(decode=1) # Create a CSV reader from our CSV string. reader = csv.reader(decoded.split(os.linesep)) lines = [] for r in reader: lines.append(r) queries = [] for i in range(len(lines)): elems = lines[i] if len(elems) != 2: continue # ['Keyword', 'Entrances'] is where the queries start in our CSV file if elems[0] == 'Keyword' and elems[1] == 'Entrances': # The first query is always unset for me, so I skip over it. # The last query is just a demarc line, so I also skip that. queries = lines[i+2:len(lines)-1] break # Personalize your formatting however... f = open(OUTFILE, 'w') for k in queries: f.write("%s... %s
" % (k[0], k[1]))