Created
April 18, 2021 18:12
-
-
Save trib0r3/d05639b8a75447c16925f46ec0ab74f8 to your computer and use it in GitHub Desktop.
Script for finding dead urls used as images in notes, it also detects used web links (I prefer to use images as local copy in my notes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import os | |
import re | |
logging.basicConfig(level=logging.INFO) | |
def note_files(path): | |
for root, dirs, files in os.walk(path): | |
for file in files: | |
if file.endswith(".md"): | |
yield os.path.join(root, file) | |
def find_dead_links(note: str, note_dir: str): | |
p = re.compile(r"!\[.*?\]\((.*?)\)") | |
matches = p.findall(note) | |
results = [] | |
for link in matches: | |
# complete path | |
if link[0] == "/": | |
p = link | |
if not os.path.exists(p): | |
results.append(p) | |
# url | |
elif str.startswith(link, "http"): | |
results.append(link) | |
# relative path | |
else: | |
p = os.path.join(os.path.dirname(note_dir), link) | |
if not os.path.exists(p): | |
results.append(p) | |
# print all dead urls | |
if len(results) > 0: | |
logging.info(f"[+] Dead local urls OR web urls: {note_dir}\n" + "\n".join(results)) | |
def scan_notes(path: str): | |
for note in note_files(path): | |
with open(note, 'r') as f: | |
find_dead_links(f.read(), note) | |
def main(): | |
parser = argparse.ArgumentParser(description="Find deadlinks in markdown notes") | |
parser.add_argument('DIR', help="Directory containing markdown notes") | |
args = parser.parse_args() | |
mdnotesdir = args.DIR | |
if not os.path.exists(mdnotesdir): | |
logging.error(f"Path: '{mdnotesdir}' doesn't exists") | |
else: | |
scan_notes(mdnotesdir) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment