Skip to content

Instantly share code, notes, and snippets.

@paulll
Last active March 14, 2020 19:55
Show Gist options
  • Save paulll/f14c1660ec83e7d8755bbf51b4dce1ea to your computer and use it in GitHub Desktop.
Save paulll/f14c1660ec83e7d8755bbf51b4dce1ea to your computer and use it in GitHub Desktop.
Extract YTM playback history from Google Takeout JSON as CSV
import json, re, sys
items = json.load(open('watch-history.json'))
music = (list(x for x in items if x['header'] == 'YouTube Music'))
known_music_channels = {
'Reol Official': 'Reol'
}
counters = {
'Total': len(music),
'Native (GPM/YTM)': 0,
'Known': 0,
'Guessed': 0,
'Fallback': 0,
'Removed': 0,
'Without metadata': 0,
}
for track in music:
if not track['title'].startswith('Watched '):
print(track)
exit(1)
fullname = track['title'][8:]
parts = re.split('\\s+[–—-]\\s+', fullname)
artist = 'unknown'
title = 'unknown'
if len(parts) < 2:
if 'subtitles' in track and track['subtitles'][0]['name'].endswith(' - Topic'):
# Native YTM/GPM artist
artist = track['subtitles'][0]['name'][0:-8]
title = fullname
counters['Native (GPM/YTM)'] += 1
elif 'watch?' in track['title']:
# no metadata at all :shrug:
counters['Without metadata'] += 1
continue
elif 'a video that has been removed' in track['title']:
# :shrug:
counters['Removed'] += 1
continue
elif 'subtitles' in track and track['subtitles'][0]['name'] in known_music_channels:
# known authors on youtube
title = fullname
artist = known_music_channels[track['subtitles'][0]['name']]
counters['Known'] += 1
else:
# 3rd-party upload without ' - ' sign
# trying to guess
# 1. Asian-style
match_quotes_a = re.search('「.*?」', fullname)
match_quotes_b = re.search('【.*?】', fullname)
match_quotes_c = re.search('『.*?』', fullname)
if match_quotes_a:
title = match_quotes_a[0][1:-1]
if match_quotes_b:
artist = match_quotes_b[0][1:-1]
if match_quotes_c:
artist = match_quotes_c[0][1:-1]
title = fullname[match_quotes_c.end():].strip()
# 2. artist: title style
n_parts = re.split(':\\s+', fullname)
if len(n_parts) == 2:
artist = n_parts[0]
title = n_parts[1]
# 3. title by artist style
n_parts = re.split('\\s+by\\s+', fullname)
if len(n_parts) == 2:
artist = n_parts[1]
title = n_parts[0]
if (title != 'unknown') and (artist != 'unknown'):
counters['Guessed'] += 1
else:
title = parts[1]
artist = parts[0]
counters['Guessed'] += 1
# fallback
if artist == 'unknown' or title == 'unknown':
counters['Fallback'] += 1
if artist == 'unknown' and 'subtitles' in track:
artist = track['subtitles'][0]['name']
if title == 'unknown':
title = fullname
# clean mess
artist = re.sub('【.*】', '', artist)
artist = artist.strip()
title = title.strip()
#print('"{}","{}","","{}","",""'.format(artist,title,re.sub('\\.\\d{3}$', '', track['time'].replace('T', ' ').replace('Z', ''))))
print(artist, '\t', title)
print('-----------------', file=sys.stderr)
for (counter, value) in counters.items():
print('{}: {}'.format(counter, value), file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment