Skip to content

Instantly share code, notes, and snippets.

@tbnbooij
Created October 29, 2017 14:39
Show Gist options
  • Save tbnbooij/5b88a47dcf6cfe181d6d603b99821bd7 to your computer and use it in GitHub Desktop.
Save tbnbooij/5b88a47dcf6cfe181d6d603b99821bd7 to your computer and use it in GitHub Desktop.
Ultimate Guitar Chord Scraper
from flask import Flask, request, jsonify
import re
import requests
from bs4 import BeautifulSoup
app = Flask(__name__)
@app.route('/')
def main():
arg = request.args.get('url')
match = re.search("https://tabs.ultimate-guitar.com/(.*)crd_(.*).htm", arg)
if match is not None:
url = match.group(0)
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')
# Get the song title
try:
raw_title = soup.select(".t_title > div > h1")[0].text.strip()
title_words = raw_title.split(' ')
song_title = ' '.join(title_words[:len(title_words) - 1])
except:
song_title = "UNKNOWN"
# Get the author of the song
try:
song_author = soup.select(".t_autor > a")[0].text
except:
song_author = "UNKNOWN"
# Get the lyrics and chords of the song
try:
raw_content = filter(None, soup.select(".js-tab-content")[0].text.replace("\r", "").replace('\"', """).split("\n"))
content = [i for i in raw_content if i != ""]
except:
content = "UNKNOWN"
return jsonify(title = song_title, author = song_author, content = content)
else:
return jsonify(error = "Invalid argument")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment