Last active
July 16, 2024 20:50
-
-
Save yodaluca23/3958c29c2986841067324dd84258987b to your computer and use it in GitHub Desktop.
Fetch Lyrics From Petit Lyrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
import base64 | |
from bs4 import BeautifulSoup | |
# Function to extract the first LYRICID from the HTML response | |
def extract_lyric_id(html_content): | |
soup = BeautifulSoup(html_content, 'html.parser') | |
table = soup.find('table', id='lyrics_list') | |
if table: | |
first_link = table.find('a', href=re.compile(r'/lyrics/(\d+)')) | |
if first_link: | |
lyric_id = re.search(r'/lyrics/(\d+)', first_link['href']) | |
if lyric_id: | |
return lyric_id.group(1) | |
return None | |
# URL for the POST request | |
post_url = 'https://petitlyrics.com/search_lyrics' | |
# Headers for the POST request | |
headers = { | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Content-Type': 'application/x-www-form-urlencoded' | |
} | |
# Ask user for title and artist | |
title = input("Enter the title of the song: ") | |
artist = input("Enter the artist: ") | |
# Data for the POST request | |
data = { | |
'title': title, | |
'artist': artist | |
} | |
# Perform the POST request to search for lyrics | |
response_post = requests.post(post_url, headers=headers, data=data) | |
# Extract LYRICID from the HTML response | |
lyrics_id = extract_lyric_id(response_post.text) | |
print(f"Extracted LYRICID: {lyrics_id}") | |
# URL of the site to obtain cookies | |
initial_url = f'https://petitlyrics.com/lyrics/{lyrics_id}' | |
# URL of the file to fetch CSRF Token | |
csrf_ufl = 'https://petitlyrics.com/lib/pl-lib.js' | |
# Create a session object to persist cookies | |
session = requests.Session() | |
# Make an initial request to the site to get cookies | |
response = session.get(initial_url) | |
# Extract the PLSESSION cookie | |
plsession_cookie = session.cookies.get('PLSESSION') | |
# Make a request to the CSRF Token file using the session (with cookies) | |
response_js = session.get(csrf_ufl) | |
# Extract the X-CSRF-Token using regex | |
csrf_token_match = re.search(r"X-CSRF-Token',\s*'([^']+)'", response_js.text) | |
csrf_token = csrf_token_match.group(1) if csrf_token_match else None | |
# URL for the POST request | |
post_url = 'https://petitlyrics.com/com/get_lyrics.ajax' | |
# POST request | |
headers = { | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
'Cookie': f'PLSESSION={plsession_cookie}', | |
'X-CSRF-Token': csrf_token, | |
'X-Requested-With': 'XMLHttpRequest' | |
} | |
data = { | |
'lyrics_id': lyrics_id | |
} | |
response_post = session.post(post_url, headers=headers, data=data) | |
# Parse the JSON response | |
lyrics_data = response_post.json() | |
# Decode the base64 lyrics and print each one on a new line | |
print("\nLyrics:\n") | |
for item in lyrics_data: | |
decoded_lyrics = base64.b64decode(item['lyrics']).decode('utf-8') | |
print(decoded_lyrics) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment