Skip to content

Instantly share code, notes, and snippets.

@duongoku
Created June 11, 2022 14:04
Show Gist options
  • Save duongoku/e4da0fb8b6f45d62fa93a25839909c8c to your computer and use it in GitHub Desktop.
Save duongoku/e4da0fb8b6f45d62fa93a25839909c8c to your computer and use it in GitHub Desktop.
Script to extract/crawl C++ STL Containers' constructors from https://cplusplus.com/
from bs4 import BeautifulSoup
import json
import re
import requests
ROOT = "https://cplusplus.com"
def get_containers():
url = "https://cplusplus.com/reference/stl/"
print("Fetching:", url)
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
containers = soup.find_all("dl")
containers = list(map(lambda x: [x.find("b"), x.find("a")], containers))
containers = list(
map(lambda x: {"name": x[0].text.strip(), "url": f"{ROOT}{x[1].attrs['href']}"}, containers))
return containers
def get_constructors(containers):
result = {}
for container in containers:
url = f"{container['url']}{container['name']}"
print("Fetching:", url)
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
versions = soup.select(".C_prototype div.on, .C_prototype div.off")
name = container["name"]
result[name] = {}
for version in versions:
version_name = version.attrs["title"].lower()
constructors = version.find_all("pre")
constructors = map(lambda x: re.sub(
r"\s+", " ", x.text.strip()), constructors)
result[name][version_name] = list(constructors)
return result
def export(object):
with open("stl_constructors.json", "w+") as f:
f.write(json.dumps(object, indent=4))
if __name__ == "__main__":
res = get_constructors(get_containers())
export(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment