Last active
July 21, 2024 19:18
-
-
Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.
Fetch .lrc files for all songs in directory, from the Beautiful lyrics, API, supports A2 extension (Enhanced LRC format).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import json | |
import re | |
from bs4 import BeautifulSoup | |
# Function to load configuration from BLconfig.txt | |
def load_config(): | |
if os.path.exists('BLconfig.txt'): | |
with open('BLconfig.txt', 'r') as config_file: | |
config = json.load(config_file) | |
return config.get('naming_format'), config.get('useA2'), config.get('gapText') | |
return None, None, None | |
# Function to save configuration to BLconfig.txt | |
def save_config(naming_format, useA2, gapText): | |
with open('BLconfig.txt', 'w') as config_file: | |
json.dump({'naming_format': naming_format, 'useA2': useA2, 'gapText': gapText}, config_file) | |
# Load naming format and useA2 from BLconfig.txt if it exists | |
naming_format, useA2, gapText = load_config() | |
if not naming_format: | |
naming_format = input("Enter the naming format (use %A for artist and %T for title): ") | |
useA2 = input("Should use A2 extension (Enhanced LRC format) if available? (yes/no): ").strip().lower() == 'yes' | |
gapText = input('What text should be displayed for instrumental sections. (Enter "MusicNote" for a music note ♪): ').strip() | |
if gapText.lower().replace(' ', '') == "musicnote": | |
gapText = "♪" | |
save_config(naming_format, useA2, gapText) | |
# Ask the user if they want to override existing files | |
override_existing = input("Do you want to override existing files? (yes/no): ").strip().lower() == 'yes' | |
# List of supported file extensions | |
supported_extensions = [ | |
".3gp", ".aa", ".aac", ".aax", ".act", ".aiff", ".alac", ".amr", ".ape", ".au", ".awb", ".dss", | |
".dvf", ".flac", ".gsm", ".iklax", ".ivs", ".m4a", ".m4b", ".m4p", ".mmf", ".movpkg", ".mp3", | |
".mpc", ".msv", ".nmf", ".ogg", ".oga", ".mogg", ".opus", ".ra", ".rm", ".raw", ".rf64", | |
".sln", ".tta", ".voc", ".vox", ".wav", ".wma", ".wv", ".webm", ".8svx", ".cda" | |
] | |
def extract_artist_and_song(filename, naming_format): | |
naming_format = naming_format + "." | |
placeholders = { | |
'%A': '(?P<artist>.+?)', | |
'%T': '(?P<title>.+?)' | |
} | |
escaped_format = re.escape(naming_format) | |
for placeholder, pattern in placeholders.items(): | |
escaped_format = escaped_format.replace(re.escape(placeholder), pattern) | |
pattern = re.compile(escaped_format) | |
match = pattern.match(filename) | |
if match: | |
artist = match.group('artist') | |
title = match.group('title') | |
return artist.strip(), title.strip() | |
else: | |
filename = filename.split('.')[0] | |
print(f"The filename '{filename}' does not match the naming format '{naming_format}'") | |
artist = "unknown_artist" | |
title = "unknown_title" | |
return artist.strip(), title.strip() | |
def get_bearer_token(): | |
fetch_url = "https://open.spotify.com" | |
response = requests.get(fetch_url) | |
response.raise_for_status() | |
html_content = response.text | |
soup = BeautifulSoup(html_content, 'html.parser') | |
session_element = soup.find(id="session") | |
session_html = session_element.get_text() | |
tokens = json.loads(session_html) | |
access_token = tokens['accessToken'] | |
return access_token | |
def search_spotify(artist, song, token): | |
url = f'https://api.spotify.com/v1/search?query=artist%3A+{artist}+track%3A+{song}&type=track&offset=0&limit=1' | |
headers = { | |
'Authorization': f'Bearer {token}' | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
data = response.json() | |
if data['tracks']['items']: | |
href = data['tracks']['items'][0]['href'] | |
match = re.search(r'tracks/([a-zA-Z0-9]+)', href) | |
if match: | |
song_id = match.group(1) | |
return song_id | |
else: | |
raise ValueError("Song ID not found in the href.") | |
else: | |
raise ValueError("No tracks found for the given artist and song.") | |
else: | |
raise Exception(f"Spotify API request failed with status code {response.status_code}") | |
def fetch_lyrics(track_id): | |
url = f'https://beautiful-lyrics.socalifornian.live/lyrics/{track_id}' | |
headers = { | |
'authorization': 'Bearer litterallyAnythingCanGoHereItJustTakesItLOL' | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200 and response.headers.get('content-length') != '0': | |
return response.json() | |
return None | |
def convert_to_lrc_timestamp(timestamp): | |
minutes = int(timestamp // 60) | |
seconds = timestamp % 60 | |
return f"{minutes:02}:{seconds:05.2f}" | |
def parse_lyrics(data, useA2, gapText): | |
lyrics = [] | |
prev_end_time = 0 # Initialize previous end time to zero | |
def add_empty_timestamp_if_gap(start_time, gapText): | |
nonlocal prev_end_time | |
if start_time - prev_end_time > 5: | |
if gapText == '': | |
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}]" | |
else: | |
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}] {gapText}" | |
lyrics.append(empty_timestamp) | |
if data['Type'] == 'Line': | |
if useA2: | |
print("The following song is not compatible with A2 extension (Enhanced LRC format), continuing with standard LRC") | |
for item in data['Content']: | |
if item['Type'] == 'Vocal': | |
start_time = item['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
line = item['Text'] | |
timestamp = convert_to_lrc_timestamp(start_time) | |
lyrics.append(f"[{timestamp}] {line.strip()}") | |
prev_end_time = item['EndTime'] | |
if 'Background' in item: | |
print("This song has Background with Type Line, I was not able to find this in testing so I don't know the structure, please report this song, so I may add support for it.\n https://gist.github.com/yodaluca23/82ab1129e12f39e30c8e760a8c853c1f") | |
elif data['Type'] == 'Syllable': | |
if useA2: | |
for item in data['Content']: | |
if item['Type'] == 'Vocal': | |
start_time = item['Lead']['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
syllables = item['Lead']['Syllables'] | |
line = '' | |
timestamp = convert_to_lrc_timestamp(start_time) | |
previous_is_part_of_word = False # Initialize to False or appropriate default value | |
for syllable in syllables: | |
syllable_text = syllable['Text'] | |
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime']) | |
if previous_is_part_of_word: | |
line += f"{syllable_text}" | |
else: | |
line += f" <{syllable_timestamp}> {syllable_text}" | |
# Update the previous_is_part_of_word for the next iteration | |
previous_is_part_of_word = syllable['IsPartOfWord'] | |
lyrics.append(f"[{timestamp}]{line.strip()}") | |
prev_end_time = item['Lead']['EndTime'] | |
if 'Background' in item: | |
for bg in item['Background']: | |
start_time = bg['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
syllables = bg['Syllables'] | |
line = '' | |
timestamp = convert_to_lrc_timestamp(start_time) | |
for index, syllable in enumerate(syllables): | |
syllable_text = syllable['Text'] | |
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime']) | |
if syllable['IsPartOfWord']: | |
if index == 0: | |
line += f"({syllable_text}" | |
elif index == len(syllables) - 1: | |
line += f"{syllable_text})" | |
else: | |
line += f" {syllable_text}" | |
else: | |
if index == 0: | |
line += f" <{syllable_timestamp}> ({syllable_text}" | |
elif index == len(syllables) - 1: | |
line += f" <{syllable_timestamp}> {syllable_text})" | |
else: | |
line += f" <{syllable_timestamp}> {syllable_text}" | |
lyrics.append(f"[{timestamp}]{line.strip()}") | |
prev_end_time = bg['EndTime'] | |
else: | |
for item in data['Content']: | |
if item['Type'] == 'Vocal': | |
start_time = item['Lead']['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
line = ''.join([ | |
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}" | |
for syllable in item['Lead']['Syllables'] | |
]) | |
timestamp = convert_to_lrc_timestamp(start_time) | |
lyrics.append(f"[{timestamp}] {line.strip()}") | |
prev_end_time = item['Lead']['EndTime'] | |
if 'Background' in item: | |
for bg in item['Background']: | |
start_time = bg['StartTime'] | |
add_empty_timestamp_if_gap(start_time, gapText) | |
line = ''.join([ | |
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}" | |
for syllable in bg['Syllables'] | |
]) | |
timestamp = convert_to_lrc_timestamp(start_time) | |
lyrics.append(f"[{timestamp}] ({line.rstrip()})") | |
prev_end_time = bg['EndTime'] | |
return lyrics | |
def save_lyrics(lrc_filename, lyrics_body, is_time_synced, filename): | |
with open(lrc_filename, 'w') as lrc_file: | |
lrc_file.write("\n".join(lyrics_body)) | |
if is_time_synced: | |
filename = filename.split('.')[0] | |
print(f"Saved time-synced lyrics for {filename}") | |
else: | |
filename = filename.split('.')[0] | |
print(f"Saved non-time-synced lyrics for {filename}") | |
def main(): | |
token = get_bearer_token() | |
for item in os.listdir('.'): | |
if any(item.endswith(ext) for ext in supported_extensions): | |
artist, title = extract_artist_and_song(item, naming_format) | |
if artist and title: | |
lrc_filename = os.path.splitext(item)[0] + '.lrc' | |
if not override_existing and os.path.exists(lrc_filename): | |
item = item.split('.')[0] | |
print(f"Lyrics for {item} already exist, skipping") | |
continue | |
try: | |
track_id = search_spotify(artist, title, token) | |
data = fetch_lyrics(track_id) | |
if data: | |
lyrics = parse_lyrics(data, useA2, gapText) | |
save_lyrics(lrc_filename, lyrics, True, item) | |
else: | |
print(f"No lyrics found for {item}") | |
except Exception as e: | |
print(f"Could not save lyrics for {item}: {e}") | |
else: | |
item = item.split('.')[0] | |
print(f"Could not extract artist and title from {item}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment