Created
March 14, 2017 20:57
-
-
Save fferri/526d85eb23392d6eb29c2485b988726f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spotipy.oauth2 import SpotifyClientCredentials | |
import spotipy | |
import json | |
import urllib | |
import urllib.request | |
import urllib.parse | |
from bs4 import BeautifulSoup | |
import re | |
from unidecode import unidecode | |
import youtube_dl | |
def spotify_get_playlist_tracks(uri): | |
client_credentials_manager = SpotifyClientCredentials() | |
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) | |
_, _, username, _, playlist_id = uri.split(':') | |
r = sp.user_playlist(username, playlist_id) | |
for item in r['tracks']['items']: | |
track = item['track'] | |
if track['type'] != 'track': continue | |
t = { | |
'added_at': item['added_at'], | |
'artists': [a['name'] for a in track['artists'] if a['type']=='artist'], | |
'title': track['name'] | |
} | |
yield ', '.join(t['artists']) + ' - ' + t['title'] | |
def ytsearch(txt): | |
url = "https://www.youtube.com/results?search_query={}".format(urllib.parse.quote(txt)) | |
response = urllib.request.urlopen(url) | |
html = response.read() | |
soup = BeautifulSoup(html, 'html.parser') | |
for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}): | |
yield 'https://www.youtube.com' + vid['href'], vid.text | |
def jaccard_similarity(t1, t2): | |
def normalize_title(t): | |
t = unidecode(t.lower()) | |
t = re.sub('[^a-z]', ' ', t) | |
stopwords = ['hd', 'vs'] | |
for token in t.split(): | |
if token not in stopwords: yield token | |
s1, s2 = map(lambda x: set(normalize_title(x)), (t1, t2)) | |
j = len(s1 & s2) / len(s1 | s2) | |
return j | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
'preferredquality': '320', | |
}] | |
} | |
for t in spotify_get_playlist_tracks('...'): | |
print(t) | |
res = [] | |
for rank, (url, title) in enumerate(ytsearch(t)): | |
res.append((-jaccard_similarity(t, title), rank, url, title)) | |
res.sort() | |
j, rank, url, title = res[0] | |
if j > -1: | |
print('WARNING: possible bad match: "{}" <--> "{}"'.format(t, title)) | |
with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment