Created
March 23, 2022 04:42
-
-
Save deli73/68333ed7f742dfc5054fab15d2f8777a to your computer and use it in GitHub Desktop.
a simple script that parses ".thtml" files (html files with <include href="whatever.html"/> elements) into html files a la merge.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### is this a mess? absolutely. do i care? only a little bit. | |
import argparse, re, time, watchdog.events | |
from watchdog.observers import Observer | |
from watchdog.events import FileSystemEventHandler, FileModifiedEvent, FileCreatedEvent | |
from pathlib import Path, PurePath | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-i", "--input", type=str, | |
help="filename or regex of files to parse") | |
parser.add_argument("-o", "--output", type=str, default="out/", | |
help="output folder") | |
parser.add_argument("-w", "--watch", help="watch for updates", action="store_true") | |
global args, in_path | |
args = parser.parse_args() | |
if args.input == None: # default to all html files in the current folder | |
in_path = "./*.thtml" | |
else: | |
in_path = args.input | |
if args.watch: #watch for file changes, parse when a file is added or changed in the folder | |
observer = Observer() | |
observer.schedule(_Handler(), ".", recursive=True) | |
observer.start() | |
try: | |
while True: | |
time.sleep(1) | |
except: | |
observer.stop() | |
finally: | |
observer.join() | |
else: | |
run() | |
class _Handler(FileSystemEventHandler): | |
def on_modified(self, event): | |
super().on_modified(event) | |
if type(event) == FileModifiedEvent and PurePath(event.src_path).match(in_path): | |
run() | |
def on_created(self, event): | |
super().on_created(event) | |
if type(event) == FileCreatedEvent and PurePath(event.src_path).match(in_path): | |
run() | |
def run(): | |
p = Path(".") | |
files_in = p.glob(in_path) | |
folder_out = Path(args.output) | |
for p in files_in: | |
if p.is_file(): | |
# open the file... | |
with open(p, 'r') as in_file: | |
#and parse it | |
out_str = parse(in_file.read()) | |
out_path = folder_out/p.with_suffix(".html").name | |
with open(out_path, 'w') as out_file: | |
out_file.write(out_str) | |
def parse(string): #parse the file and return it as a string | |
include_matcher = re.compile(r"<include href=\"(.*?)\" *?/?>") | |
return re.sub(include_matcher, _replacer, string) #replace the <include> elements with corresponding files | |
def _replacer(match): | |
with open(match.group(1), 'r') as f: | |
return f.read() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment