Created
August 9, 2023 00:39
-
-
Save amosboldor/a59df05da33859c8c824a99eed8441c7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install requests beautifulsoup4 progress tabulate | |
import requests | |
from bs4 import BeautifulSoup | |
from progress.bar import Bar | |
from tabulate import tabulate | |
# Define the DistroWatch search URL that lists all active Linux distributions for servers | |
dws = ( | |
"https://distrowatch.com/search.php?" | |
"ostype=Linux&" # OS Type: Linux | |
"category=Server&" # Distribution Category: Server | |
"status=Active&" # Status: Active | |
"origin=All&" | |
"basedon=All&" | |
"notbasedon=None&" | |
"desktop=All&" | |
"architecture=All&" | |
"package=All&" | |
"rolling=All&" | |
"isosize=All&" | |
"netinstall=All&" | |
"language=All&" | |
"defaultinit=All" | |
) | |
# Send a GET request to the URL | |
dws_r = requests.get(dws) | |
# Parse the response text with BeautifulSoup | |
dws_r_soup = BeautifulSoup(dws_r.text, "lxml") | |
# Select all Linux distribution names from the parsed HTML | |
distro_atags = dws_r_soup.css.select("td > b > a")[1:] | |
# Define a function to create a distribution's URL on DistroWatch | |
def base_url(d): | |
return f"https://distrowatch.com/table.php?distribution={d}" | |
# Initialize a dictionary to store the counts of different architectures | |
ARCHS = {} | |
# Process each Linux distribution | |
for distro_atag in Bar('Processing').iter(distro_atags): | |
# Create the distribution's URL | |
url = base_url(distro_atag["href"]) | |
# Send a GET request to the URL | |
r = requests.get(url) | |
# Parse the response text with BeautifulSoup | |
soup = BeautifulSoup(r.text, "lxml") | |
# Select the architecture list from the parsed HTML | |
arch_li = soup.css.select("td.TablesTitle > ul > li")[3] | |
arch_atags = arch_li.css.select('a') | |
# Process each architecture | |
for arch_atag in arch_atags: | |
# Get the architecture name | |
arch_name = arch_atag.text | |
# Increment the count of this architecture in ARCHS | |
ARCHS[arch_name] = ARCHS.get(arch_name, 0) + 1 | |
# Print the architectures and their counts, sorted by count in descending order | |
print( | |
tabulate( | |
sorted( | |
ARCHS.items(), | |
key=lambda i: i[1], | |
reverse=True | |
), | |
headers=['Architecture', 'Count'], | |
tablefmt="heavy_grid" | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment