Last active
August 12, 2020 11:19
-
-
Save szepnapot/6fffd93688556f97e4a8f79837a1c0ca to your computer and use it in GitHub Desktop.
Downloads alexa top 1 million site csv and load the urls into noisy's config.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -o errexit | |
set -o pipefail | |
set -o nounset | |
curl -sSL "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip" | tar xvfz - -C . | |
python - <<EOF | |
import json | |
import csv | |
import os | |
# load noisy's config | |
config_file = 'config.json' | |
top_1m = "top-1m.csv" | |
with open(config_file) as f: | |
config = json.load(f) | |
# add top sites and modify depth + sleep | |
with open(top_1m) as csv_file: | |
csv_reader = csv.reader(csv_file, delimiter=',') | |
for row in csv_reader: | |
config["root_urls"].append("https://{}".format(row[-1])) | |
try: | |
os.remove(top_1m) | |
except OSError: | |
pass | |
# update noisy's config | |
with open(config_file, 'w') as json_file: | |
json.dump(config, json_file) | |
EOF | |
# back to bash | |
echo "[*] Alexa top 1 million site added to noisy root urls" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment