|
#! /usr/bin/python |
|
# -*- coding: utf-8 -*- |
|
|
|
import argparse |
|
import csv |
|
import codecs |
|
import cStringIO |
|
import requests |
|
|
|
from bs4 import BeautifulSoup |
|
from datetime import datetime |
|
from PIL import Image |
|
from StringIO import StringIO |
|
|
|
SODEXO_URL = 'https://sodexosaldocartao.com.br/saldocartao/consultaSaldo.do' |
|
SETUP_URL = '{0}?operation=setUp'.format(SODEXO_URL) |
|
POST_URL = '{0}?operation=consult'.format(SODEXO_URL) |
|
CAPTCHA_URL = 'https://sodexosaldocartao.com.br/saldocartao/jcaptcha.do' |
|
|
|
CSV_HEADER = [('Date', 'Payee', 'Category', 'Memo', 'Outflow', 'Inflow')] |
|
PAYEE = 'Restaurante' |
|
CATEGORY = 'Sodexo' |
|
|
|
|
|
class AuthenticationError(Exception): |
|
"""Trying to login with wrong credentials.""" |
|
|
|
|
|
class UnicodeCSVWriter(object): |
|
"""A CSV writer with different encoding.""" |
|
|
|
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): |
|
# Redirect output to a queue |
|
self.queue = cStringIO.StringIO() |
|
self.writer = csv.writer(self.queue, dialect=dialect, **kwds) |
|
self.stream = f |
|
self.encoder = codecs.getincrementalencoder(encoding)() |
|
|
|
def writerow(self, row): |
|
self.writer.writerow([s.encode("utf-8") for s in row]) |
|
data = self.queue.getvalue() |
|
data = data.decode("utf-8") |
|
data = self.encoder.encode(data) |
|
self.stream.write(data) |
|
self.queue.truncate(0) |
|
|
|
def writerows(self, rows): |
|
for row in rows: |
|
self.writerow(row) |
|
|
|
|
|
class SodexoScraper(object): |
|
|
|
def __init__(self, card_number, cpf): |
|
self.session = requests.Session() |
|
self.card_number = card_number |
|
self.cpf = cpf |
|
|
|
def _parse_html(self, html): |
|
"""Parse the HTML table and return content as a Python object.""" |
|
soup = BeautifulSoup(html, 'lxml', from_encoding='utf-8') |
|
print 'Encoding: {0}'.format(soup.original_encoding) |
|
|
|
msg_return = soup.find(id='msgRetorno') |
|
if msg_return is not None: |
|
raise AuthenticationError(msg_return.text.strip()) |
|
|
|
# Parse table |
|
entries = [] |
|
balance_table = soup.find(id='gridSaldo') |
|
for row in balance_table.findAll('tr'): |
|
entries.append([_.text for _ in row.findAll('td')]) |
|
|
|
entries = [_ for _ in entries if _] |
|
|
|
def translate_entry(date, value, xtype, auth, memo): |
|
outflow = '' |
|
inflow = '' |
|
if xtype.startswith('C'): |
|
inflow = value |
|
else: |
|
outflow = value |
|
|
|
return date, PAYEE, CATEGORY, memo, outflow, inflow |
|
|
|
return [translate_entry(*_) for _ in entries] |
|
|
|
def _prompt_captcha(self): |
|
"""Get the Captcha image and prompts the user.""" |
|
r = self.session.get(CAPTCHA_URL) |
|
captcha_image = Image.open(StringIO(r.content)) |
|
captcha_image.show() |
|
return raw_input('CAPTCHA: ') |
|
|
|
def _post_card(self, captcha_text): |
|
"""Post the Card Information.""" |
|
post_data = { |
|
'service': '5;1;6', |
|
'cardNumber': self.card_number, |
|
'cpf': self.cpf, |
|
'hiddenField': captcha_text, |
|
} |
|
r = self.session.post(POST_URL, params=post_data) |
|
return r.content |
|
|
|
def get_transaction_history(self): |
|
"""Scrape the website and get Transaction History.""" |
|
# Setup Session before doing the rest |
|
self.session.get(SETUP_URL) |
|
|
|
captcha_resp = self._prompt_captcha() |
|
response = self._post_card(captcha_resp) |
|
return self._parse_html(response) |
|
|
|
def save_csv_data(self, filename): |
|
"""Export Transaction History as CSV File.""" |
|
data = self.get_transaction_history() |
|
with open(filename, 'w') as fd: |
|
csvwriter = UnicodeCSVWriter(fd, delimiter=',', quotechar='"') |
|
csvwriter.writerows(CSV_HEADER + data) |
|
|
|
|
|
def parse_arguments(): |
|
"""Parse Command Arguments.""" |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('card_number', help='Sodexo Card Number.') |
|
parser.add_argument('cpf', help='User CPF.') |
|
return parser.parse_args() |
|
|
|
|
|
def main(): |
|
"""Main Routine.""" |
|
|
|
args = parse_arguments() |
|
sdx = SodexoScraper(args.card_number, args.cpf) |
|
|
|
fname = datetime.now().strftime('sodexo_saldo_%Y%m%d.csv') |
|
sdx.save_csv_data(fname) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |