Skip to content

Instantly share code, notes, and snippets.

@yodaluca23
Last active July 16, 2024 20:50
Show Gist options
  • Save yodaluca23/3958c29c2986841067324dd84258987b to your computer and use it in GitHub Desktop.
Save yodaluca23/3958c29c2986841067324dd84258987b to your computer and use it in GitHub Desktop.
Fetch Lyrics From Petit Lyrics
import requests
import re
import base64
from bs4 import BeautifulSoup
# Function to extract the first LYRICID from the HTML response
def extract_lyric_id(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find('table', id='lyrics_list')
if table:
first_link = table.find('a', href=re.compile(r'/lyrics/(\d+)'))
if first_link:
lyric_id = re.search(r'/lyrics/(\d+)', first_link['href'])
if lyric_id:
return lyric_id.group(1)
return None
# URL for the POST request
post_url = 'https://petitlyrics.com/search_lyrics'
# Headers for the POST request
headers = {
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type': 'application/x-www-form-urlencoded'
}
# Ask user for title and artist
title = input("Enter the title of the song: ")
artist = input("Enter the artist: ")
# Data for the POST request
data = {
'title': title,
'artist': artist
}
# Perform the POST request to search for lyrics
response_post = requests.post(post_url, headers=headers, data=data)
# Extract LYRICID from the HTML response
lyrics_id = extract_lyric_id(response_post.text)
print(f"Extracted LYRICID: {lyrics_id}")
# URL of the site to obtain cookies
initial_url = f'https://petitlyrics.com/lyrics/{lyrics_id}'
# URL of the file to fetch CSRF Token
csrf_ufl = 'https://petitlyrics.com/lib/pl-lib.js'
# Create a session object to persist cookies
session = requests.Session()
# Make an initial request to the site to get cookies
response = session.get(initial_url)
# Extract the PLSESSION cookie
plsession_cookie = session.cookies.get('PLSESSION')
# Make a request to the CSRF Token file using the session (with cookies)
response_js = session.get(csrf_ufl)
# Extract the X-CSRF-Token using regex
csrf_token_match = re.search(r"X-CSRF-Token',\s*'([^']+)'", response_js.text)
csrf_token = csrf_token_match.group(1) if csrf_token_match else None
# URL for the POST request
post_url = 'https://petitlyrics.com/com/get_lyrics.ajax'
# POST request
headers = {
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': f'PLSESSION={plsession_cookie}',
'X-CSRF-Token': csrf_token,
'X-Requested-With': 'XMLHttpRequest'
}
data = {
'lyrics_id': lyrics_id
}
response_post = session.post(post_url, headers=headers, data=data)
# Parse the JSON response
lyrics_data = response_post.json()
# Decode the base64 lyrics and print each one on a new line
print("\nLyrics:\n")
for item in lyrics_data:
decoded_lyrics = base64.b64decode(item['lyrics']).decode('utf-8')
print(decoded_lyrics)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment