Skip to content

Instantly share code, notes, and snippets.

@ramSeraph
Created September 3, 2024 05:04
Show Gist options
  • Save ramSeraph/152fc8fd67a8a786460b3b3931adb975 to your computer and use it in GitHub Desktop.
Save ramSeraph/152fc8fd67a8a786460b3b3931adb975 to your computer and use it in GitHub Desktop.
Code to convert doc files to pdf using google drive
# License: UNLICENSE
import io
import json
import string
import random
import mimetypes
from pathlib import Path
import magic
from httplib2 import Http
from oauth2client import file, client, tools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
def get_random_string(n):
res = ''.join(random.choices(string.ascii_uppercase +
string.digits, k=n))
return res
def export_pdf(file_id, service):
request = service.files().export_media(
fileId=file_id, mimeType="application/pdf"
)
file = io.BytesIO()
downloader = MediaIoBaseDownload(file, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(f"Download {int(status.progress() * 100)}.")
return file.getvalue()
def upload_file(content, service):
mimetype = magic.from_buffer(content, mime=True)
ext = mimetypes.guess_extension(mimetype, strict=True)
fname = get_random_string(7) + ext
file_metadata = {
"name": fname,
"mimeType": "application/vnd.google-apps.spreadsheet",
}
iofile = io.BytesIO(content)
media = MediaIoBaseUpload(iofile, mimetype=mimetype)
file = (
service.files()
.create(body=file_metadata, media_body=media, fields="id")
.execute()
)
return file.get("id")
def delete_file(file_id, service):
# directly deleting doesn't work and I don't understand why
#resp = service.files().delete(fileId=file_id, supportsAllDrives=True)
body_value = {'trashed': True}
resp = service.files().update(fileId=file_id, body=body_value).execute()
resp = service.files().emptyTrash().execute()
def get_service():
SCOPES = 'https://www.googleapis.com/auth/drive'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
creds = tools.run_flow(flow, store)
service = build('drive', 'v3', http=creds.authorize(Http()))
return service
def convert(from_file, to_file, service=None):
if service is None:
service = get_service()
from_bytes = Path(from_file).read_bytes()
print('uploading file')
file_id = upload_file(from_bytes, service)
print('downloading file as pdf')
pdf_content = export_pdf(file_id, service)
Path(to_file).write_bytes(pdf_content)
print('deleting file', file_id)
delete_file(file_id, service)
if __name__ == "__main__":
convert('ms1288-2007.doc', 'ms1288-2007.pdf')
python-magic
httplib2
google-api-python-client
google-auth-httplib2
google-auth-oauthlib
@ramSeraph
Copy link
Author

WARNING: Deleting leftover files was not working due to (what I am guessing are) permission issues, So.. I move them to trash and empty the thrash.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment