Skip to content

Instantly share code, notes, and snippets.

Created June 18, 2022 18:40
Show Gist options
  • Save luizbafilho/02e6592ab27e51aa0bd73948f0b9877e to your computer and use it in GitHub Desktop.
Save luizbafilho/02e6592ab27e51aa0bd73948f0b9877e to your computer and use it in GitHub Desktop.
# deps: requests furl beautifulsoup4 flask
import requests
from bs4 import BeautifulSoup as Soup
import http
import pathlib
import furl
import urllib
import re
import csv
cookies = pathlib.Path('./jpdb-cookie.txt')
jar = http.cookiejar.MozillaCookieJar(cookies)
sess = requests.Session()
sess.cookies = jar # or maybe s.cookies.update(jar)
# folding the ability to drop a url param into this function
# this allows us to make the urls printed to console look prettier
def make_abs(rel_link, drop=None):
url = furl.furl(rel_link)
if drop is not None:
if drop in url.args:
del url.args[drop]
return urllib.parse.urljoin(DOMAIN, url.url)
def set_custom_sentence(vocab, sentence, sentence_eng):
# search for the vocab
search_page = sess.get(f'{vocab}&lang=english#a')
search_soup = Soup(search_page.content, "html.parser")
# find a link to the page with the right reading
vocab_page_link_rel = search_soup.find("a", href=re.compile(f"{vocab}(\/.*)?\?"))['href']
vocab_page_link_abs = make_abs(vocab_page_link_rel, drop='expand')
vocab_page = sess.get(vocab_page_link_abs)
vocab_soup = Soup(vocab_page.content, "html.parser")
# grab the "Edit sentence" link
# strip the origin param, since it doesn't really do anything for us
edit_sentence_link_rel = vocab_soup.find("a", href=re.compile("\/edit-shown-sentence"))['href']
edit_sentence_link_abs = make_abs(edit_sentence_link_rel, drop='origin')
# then POST to it with the sentence we want
payload = { 'sentence': sentence, 'translation': sentence_eng }
resp =, data=payload)
def reading_anki_notes():
print("Reading Anki notes...")
vocab_idx = 0
sentence_idx = 4
sentence_eng_idx = 6
extracted = []
with open('anki-notes-reduced.txt', newline = '') as notes:
note_reader = csv.reader(notes, delimiter='\t')
for note in note_reader:
if len(note) >= 7:
extracted.append({'vocab': note[vocab_idx], 'sentence': note[sentence_idx], 'sentence_eng': note[sentence_eng_idx]})
return extracted
if __name__ == "__main__":
notes = reading_anki_notes()
print("Setting custom sentences...")
for note in notes:
set_custom_sentence(note['vocab'], note['sentence'], note['sentence_eng'])
print(f'failed importing: {note["vocab"]}, {note["sentence"]}, {note["sentence_eng"]}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment