-
-
Save john-hix/c47a4b98e9c5eba8869ff6d097c646d5 to your computer and use it in GitHub Desktop.
Google Keep "Takeout" to Markdown Converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Google Keep "Takeout" to Markdown Converter | |
# This allows you to convert your Google Keep notes that are downloaded from | |
# Google's "Takeout" system. This works with NextCloud's Notes system. | |
from datetime import datetime | |
import base64 | |
import json | |
import os | |
import shutil | |
# Grab the list of files in the folder | |
input_path = "/tmp/keep/" | |
output_extention = ".md" | |
output_path = "/tmp/keep_converted/" | |
# These are the note colours, lifted directly from the Takeout's CSS | |
note_colours = { | |
"blue": "#3FC3FF", | |
"brown": "#D7CCC8", | |
"cerulean": "#82B1FF", | |
"gray": "#B8C4C9", | |
"green": "#95D641", | |
"orange": "#FF9B00", | |
"pink": "#F8BBD0", | |
"purple": "#B388FF", | |
"red": "#FF6D3F", | |
"teal": "#1CE8B5", | |
"yellow": "#FFDA00", | |
} | |
# Grab a list of the files | |
files = os.listdir(input_path) | |
for file in files: | |
title = "No Title" | |
colour = "" | |
content = "" | |
main_label = "" | |
# Split the file name up in to the name and the extension | |
split_tup = os.path.splitext(file) | |
# Store the file name and extension in variables | |
file_name = split_tup[0] | |
file_extension = split_tup[1] | |
if file_extension.lower() == ".txt": | |
# Read the contents of the text file | |
text_file = open(f"{input_path}{file}", 'r') | |
content += text_file.read() | |
text_file.close() | |
title = file_name | |
elif file_extension.lower() == ".json": | |
json_file = open(f"{input_path}{file}", 'r', encoding='utf-8') | |
json_data = json.load(json_file) | |
json_file.close() | |
# Set the title to what it had before | |
if (("title" in json_data) and | |
(json_data['title'])): | |
title = json_data['title'].strip() | |
# Set the colour, if it isn't default | |
if (("color" in json_data) and | |
(json_data['color']) and | |
(json_data['color'].lower() in note_colours)): | |
colour = note_colours[json_data['color'].lower()] | |
# Grab the content if there's some there | |
if (("textContent" in json_data) and | |
(json_data['textContent'])): | |
content += json_data['textContent'].strip() + "\n" | |
# List items | |
if (("listContent" in json_data) and | |
(json_data['listContent'])): | |
for list_item in json_data['listContent']: | |
if list_item['isChecked']: | |
content += f"🗹 ~~{list_item['text']}~~\n" | |
else: | |
content += f"☐ {list_item['text']}\n" | |
# Attachments | |
if (("attachments" in json_data) and | |
(json_data['attachments'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Attachments:\n" | |
for attachment in json_data['attachments']: | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(f"{output_path}ATTACHMENTS/"): | |
os.makedirs(f"{output_path}ATTACHMENTS/") | |
# If it's a JPG image, since Google is dumb and mixes up JPG and JPEG extensions | |
if attachment['mimetype'] == "image/jpeg": | |
try: | |
# Split up the file name | |
split_tup = os.path.splitext(attachment['filePath']) | |
image_file = split_tup[0] | |
# Copy the attachment over | |
shutil.copy2(f"{input_path}{image_file}.jpg", f"{output_path}ATTACHMENTS/{image_file}.jpg") | |
content += f"* [{image_file}.jpg](ATTACHMENTS/{image_file}.jpg)\n" | |
except Exception as e: | |
raise Exception(e) | |
# Annotations | |
if (("annotations" in json_data) and | |
(json_data['annotations'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Embeds:\n" | |
for annotation in json_data['annotations']: | |
# Clean them up a little bit | |
annotation_description = annotation['description'].replace("\n", "") | |
annotation_source = annotation['source'].replace("\n", "") | |
annotation_title = annotation['title'].replace("\n", "") | |
annotation_url = annotation['url'].replace("\n", "") | |
annotation_description = annotation_description.replace('"', "'") | |
annotation_source = annotation_source.replace('"', "'") | |
annotation_title = annotation_title.replace('"', "'") | |
annotation_url = annotation_url.replace('"', "'") | |
annotation_description = annotation_description.strip() | |
annotation_source = annotation_source.strip() | |
annotation_title = annotation_title.strip() | |
annotation_url = annotation_url.strip() | |
# Then add them in to the content area | |
content += f"* {annotation_source.title()}: " | |
content += f"[{annotation_title}]({annotation_url} \"{annotation_description}\")\n" | |
# Labels | |
if (("labels" in json_data) and | |
(json_data['labels'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Labels:\n" | |
for label in json_data['labels']: | |
# Sets the main / first label if one isn't set yet | |
if not main_label: | |
main_label = label['name'] | |
content += f"* {label['name']}\n" | |
# Round and convert the value to an int, since we don't care about | |
# anything smaller than seconds | |
timestamp = int(round(json_data['userEditedTimestampUsec'] / 1000000)) | |
converted_timestamp = datetime.fromtimestamp(timestamp).isoformat() | |
# Extra Values | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Values:\n" | |
content += f"* Colour: {json_data['color']}\n" | |
content += f"* isArchived: {json_data['isArchived']}\n" | |
content += f"* isPinned: {json_data['isPinned']}\n" | |
content += f"* isTrashed: {json_data['isTrashed']}\n" | |
content += f"* Last Modified: {converted_timestamp}\n" | |
else: # If it's any other file type, just skip it | |
continue | |
# Do some final clean up of the title and content, just in case | |
title = title.strip() | |
content = content.strip() | |
# Now put together the new markdown file | |
document = "" | |
document += title + "\n" | |
document += "-" * len(title) + "\n" | |
document += f"Colour: {colour}\n\n" if colour else "\n" | |
document += content + "\n" | |
label_folder = "" | |
if main_label: | |
main_label = main_label.replace("/", "-") | |
label_folder = f"{main_label}/" | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(f"{output_path}{label_folder}"): | |
os.makedirs(f"{output_path}{label_folder}") | |
new_file = f"{output_path}{label_folder}{file_name}{output_extention}" | |
f = open(new_file, "w", encoding="utf-8") | |
f.write(document) | |
f.close() | |
os.utime(new_file,(timestamp, timestamp)) | |
print(f"Converted {input_path}{file} to {new_file}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment