Skip to content

Instantly share code, notes, and snippets.

@yodaluca23
Last active July 21, 2024 19:18
Show Gist options
  • Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.
Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.
Fetch .lrc files for all songs in directory, from the Beautiful lyrics, API, supports A2 extension (Enhanced LRC format).
import os
import requests
import json
import re
from bs4 import BeautifulSoup
# Function to load configuration from BLconfig.txt
def load_config():
if os.path.exists('BLconfig.txt'):
with open('BLconfig.txt', 'r') as config_file:
config = json.load(config_file)
return config.get('naming_format'), config.get('useA2'), config.get('gapText')
return None, None, None
# Function to save configuration to BLconfig.txt
def save_config(naming_format, useA2, gapText):
with open('BLconfig.txt', 'w') as config_file:
json.dump({'naming_format': naming_format, 'useA2': useA2, 'gapText': gapText}, config_file)
# Load naming format and useA2 from BLconfig.txt if it exists
naming_format, useA2, gapText = load_config()
if not naming_format:
naming_format = input("Enter the naming format (use %A for artist and %T for title): ")
useA2 = input("Should use A2 extension (Enhanced LRC format) if available? (yes/no): ").strip().lower() == 'yes'
gapText = input('What text should be displayed for instrumental sections. (Enter "MusicNote" for a music note ♪): ').strip()
if gapText.lower().replace(' ', '') == "musicnote":
gapText = "♪"
save_config(naming_format, useA2, gapText)
# Ask the user if they want to override existing files
override_existing = input("Do you want to override existing files? (yes/no): ").strip().lower() == 'yes'
# List of supported file extensions
supported_extensions = [
".3gp", ".aa", ".aac", ".aax", ".act", ".aiff", ".alac", ".amr", ".ape", ".au", ".awb", ".dss",
".dvf", ".flac", ".gsm", ".iklax", ".ivs", ".m4a", ".m4b", ".m4p", ".mmf", ".movpkg", ".mp3",
".mpc", ".msv", ".nmf", ".ogg", ".oga", ".mogg", ".opus", ".ra", ".rm", ".raw", ".rf64",
".sln", ".tta", ".voc", ".vox", ".wav", ".wma", ".wv", ".webm", ".8svx", ".cda"
]
def extract_artist_and_song(filename, naming_format):
naming_format = naming_format + "."
placeholders = {
'%A': '(?P<artist>.+?)',
'%T': '(?P<title>.+?)'
}
escaped_format = re.escape(naming_format)
for placeholder, pattern in placeholders.items():
escaped_format = escaped_format.replace(re.escape(placeholder), pattern)
pattern = re.compile(escaped_format)
match = pattern.match(filename)
if match:
artist = match.group('artist')
title = match.group('title')
return artist.strip(), title.strip()
else:
filename = filename.split('.')[0]
print(f"The filename '{filename}' does not match the naming format '{naming_format}'")
artist = "unknown_artist"
title = "unknown_title"
return artist.strip(), title.strip()
def get_bearer_token():
fetch_url = "https://open.spotify.com"
response = requests.get(fetch_url)
response.raise_for_status()
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
session_element = soup.find(id="session")
session_html = session_element.get_text()
tokens = json.loads(session_html)
access_token = tokens['accessToken']
return access_token
def search_spotify(artist, song, token):
url = f'https://api.spotify.com/v1/search?query=artist%3A+{artist}+track%3A+{song}&type=track&offset=0&limit=1'
headers = {
'Authorization': f'Bearer {token}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
if data['tracks']['items']:
href = data['tracks']['items'][0]['href']
match = re.search(r'tracks/([a-zA-Z0-9]+)', href)
if match:
song_id = match.group(1)
return song_id
else:
raise ValueError("Song ID not found in the href.")
else:
raise ValueError("No tracks found for the given artist and song.")
else:
raise Exception(f"Spotify API request failed with status code {response.status_code}")
def fetch_lyrics(track_id):
url = f'https://beautiful-lyrics.socalifornian.live/lyrics/{track_id}'
headers = {
'authorization': 'Bearer litterallyAnythingCanGoHereItJustTakesItLOL'
}
response = requests.get(url, headers=headers)
if response.status_code == 200 and response.headers.get('content-length') != '0':
return response.json()
return None
def convert_to_lrc_timestamp(timestamp):
minutes = int(timestamp // 60)
seconds = timestamp % 60
return f"{minutes:02}:{seconds:05.2f}"
def parse_lyrics(data, useA2, gapText):
lyrics = []
prev_end_time = 0 # Initialize previous end time to zero
def add_empty_timestamp_if_gap(start_time, gapText):
nonlocal prev_end_time
if start_time - prev_end_time > 5:
if gapText == '':
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}]"
else:
empty_timestamp = f"[{convert_to_lrc_timestamp(prev_end_time)}] {gapText}"
lyrics.append(empty_timestamp)
if data['Type'] == 'Line':
if useA2:
print("The following song is not compatible with A2 extension (Enhanced LRC format), continuing with standard LRC")
for item in data['Content']:
if item['Type'] == 'Vocal':
start_time = item['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
line = item['Text']
timestamp = convert_to_lrc_timestamp(start_time)
lyrics.append(f"[{timestamp}] {line.strip()}")
prev_end_time = item['EndTime']
if 'Background' in item:
print("This song has Background with Type Line, I was not able to find this in testing so I don't know the structure, please report this song, so I may add support for it.\n https://gist.github.com/yodaluca23/82ab1129e12f39e30c8e760a8c853c1f")
elif data['Type'] == 'Syllable':
if useA2:
for item in data['Content']:
if item['Type'] == 'Vocal':
start_time = item['Lead']['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
syllables = item['Lead']['Syllables']
line = ''
timestamp = convert_to_lrc_timestamp(start_time)
previous_is_part_of_word = False # Initialize to False or appropriate default value
for syllable in syllables:
syllable_text = syllable['Text']
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
if previous_is_part_of_word:
line += f"{syllable_text}"
else:
line += f" <{syllable_timestamp}> {syllable_text}"
# Update the previous_is_part_of_word for the next iteration
previous_is_part_of_word = syllable['IsPartOfWord']
lyrics.append(f"[{timestamp}]{line.strip()}")
prev_end_time = item['Lead']['EndTime']
if 'Background' in item:
for bg in item['Background']:
start_time = bg['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
syllables = bg['Syllables']
line = ''
timestamp = convert_to_lrc_timestamp(start_time)
for index, syllable in enumerate(syllables):
syllable_text = syllable['Text']
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
if syllable['IsPartOfWord']:
if index == 0:
line += f"({syllable_text}"
elif index == len(syllables) - 1:
line += f"{syllable_text})"
else:
line += f" {syllable_text}"
else:
if index == 0:
line += f" <{syllable_timestamp}> ({syllable_text}"
elif index == len(syllables) - 1:
line += f" <{syllable_timestamp}> {syllable_text})"
else:
line += f" <{syllable_timestamp}> {syllable_text}"
lyrics.append(f"[{timestamp}]{line.strip()}")
prev_end_time = bg['EndTime']
else:
for item in data['Content']:
if item['Type'] == 'Vocal':
start_time = item['Lead']['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
line = ''.join([
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
for syllable in item['Lead']['Syllables']
])
timestamp = convert_to_lrc_timestamp(start_time)
lyrics.append(f"[{timestamp}] {line.strip()}")
prev_end_time = item['Lead']['EndTime']
if 'Background' in item:
for bg in item['Background']:
start_time = bg['StartTime']
add_empty_timestamp_if_gap(start_time, gapText)
line = ''.join([
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
for syllable in bg['Syllables']
])
timestamp = convert_to_lrc_timestamp(start_time)
lyrics.append(f"[{timestamp}] ({line.rstrip()})")
prev_end_time = bg['EndTime']
return lyrics
def save_lyrics(lrc_filename, lyrics_body, is_time_synced, filename):
with open(lrc_filename, 'w') as lrc_file:
lrc_file.write("\n".join(lyrics_body))
if is_time_synced:
filename = filename.split('.')[0]
print(f"Saved time-synced lyrics for {filename}")
else:
filename = filename.split('.')[0]
print(f"Saved non-time-synced lyrics for {filename}")
def main():
token = get_bearer_token()
for item in os.listdir('.'):
if any(item.endswith(ext) for ext in supported_extensions):
artist, title = extract_artist_and_song(item, naming_format)
if artist and title:
lrc_filename = os.path.splitext(item)[0] + '.lrc'
if not override_existing and os.path.exists(lrc_filename):
item = item.split('.')[0]
print(f"Lyrics for {item} already exist, skipping")
continue
try:
track_id = search_spotify(artist, title, token)
data = fetch_lyrics(track_id)
if data:
lyrics = parse_lyrics(data, useA2, gapText)
save_lyrics(lrc_filename, lyrics, True, item)
else:
print(f"No lyrics found for {item}")
except Exception as e:
print(f"Could not save lyrics for {item}: {e}")
else:
item = item.split('.')[0]
print(f"Could not extract artist and title from {item}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment