mark-mishyn · December 5, 2019 07:32
diff --git a/extract_body_from_gmail_mail.py b/extract_body_from_gmail_mail.py
 '''
 Example of usage:
 from googleapiclient.discovery import build
 from httplib2 import Http
 from oauth2client import file

 # Create google client first, for example
 storage = file.Storage('path_to_google_oauth_credentials')
 creds = storage.get()
 http = creds.authorize(Http())
 gmail_client = build('gmail', 'v1', http=http)

 # then retrieve mail data by API call
 gmail_message_id = 12345
 gmail_mail = gmail_client.users().messages().get(userId='me', id=gmail_message_id).execute()

 # finally, extract mail body
 mail_body = get_mail_body(gmail_mail['payload'])
 '''

 import base64
 from typing import List

 def get_mail_body(payload: dict) -> str:
    if payload['mimeType'] in ('text/html', 'text/plain'):
        data = payload['body'].get('data')
        if data:
            return encode_decode_body(data)

    for part in sort_parts_by_mime_type(payload.get('parts', [])):
        return get_mail_body(part)

    return payload.get('snippet', 'Can not read gmail body')

 def encode_decode_body(body: str) -> str:
    return base64.urlsafe_b64decode(body.encode('ASCII')).decode()

 def sort_parts_by_mime_type(parts: List) -> List:
    res = []
    for mime_type in ('text/html', 'text/plain'):  # try to extract HTML first, then plain text
        for p in parts:
            if p['mimeType'] == mime_type:
                res.append(p)
    for p in parts:
        if p['mimeType'].startswith('multipart'):
            res.append(p)
    return res
	'''
	Example of usage:
	from googleapiclient.discovery import build
	from httplib2 import Http
	from oauth2client import file

	# Create google client first, for example
	storage = file.Storage('path_to_google_oauth_credentials')
	creds = storage.get()
	http = creds.authorize(Http())
	gmail_client = build('gmail', 'v1', http=http)

	# then retrieve mail data by API call
	gmail_message_id = 12345
	gmail_mail = gmail_client.users().messages().get(userId='me', id=gmail_message_id).execute()

	# finally, extract mail body
	mail_body = get_mail_body(gmail_mail['payload'])
	'''

	import base64
	from typing import List

	def get_mail_body(payload: dict) -> str:
	if payload['mimeType'] in ('text/html', 'text/plain'):
	data = payload['body'].get('data')
	if data:
	return encode_decode_body(data)

	for part in sort_parts_by_mime_type(payload.get('parts', [])):
	return get_mail_body(part)

	return payload.get('snippet', 'Can not read gmail body')

	def encode_decode_body(body: str) -> str:
	return base64.urlsafe_b64decode(body.encode('ASCII')).decode()

	def sort_parts_by_mime_type(parts: List) -> List:
	res = []
	for mime_type in ('text/html', 'text/plain'): # try to extract HTML first, then plain text
	for p in parts:
	if p['mimeType'] == mime_type:
	res.append(p)
	for p in parts:
	if p['mimeType'].startswith('multipart'):
	res.append(p)
	return res