Skip to content

Instantly share code, notes, and snippets.

@larshb
Last active September 22, 2019 13:13
Show Gist options
  • Save larshb/404800341165bd2db19615335e73c4f9 to your computer and use it in GitHub Desktop.
Save larshb/404800341165bd2db19615335e73c4f9 to your computer and use it in GitHub Desktop.
Python script (wrapper for jsfiddle-downloader) for downloadning and modularizing fiddles.
from logging import *
LOGLEVEL = DEBUG
USE_NODE = True
CLEAN = True
FILES = ['.html', '.js', '.css']
basicConfig(level=LOGLEVEL)
CUSTOM_REPLACEMENTS = {
"API reference": ("API = '//", "API = 'https://")
}
from os.path import dirname, realpath, join, exists
from subprocess import check_output
from requests import get
from config import *
URL = "https://fiddle.jshell.net/{id}/show/light/"
JQUERY = 'https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js'
try:
CUSTOM_REPLACEMENTS
except:
CUSTOM_REPLACEMENTS = {
"Dummy replacement": (r"dummy pattern", "dummy repl")
}
class module:
class js:
modtag = 'Javascript'
filext = '.js'
inline = r"<script type=.text/javascript.>//<!.CDATA.(.*?)//]]></script>"
linked = f'<script defer type="text/javascript" src="{filext}"></script>'
class css:
modtag = 'Stylesheet'
filext = '.css'
inline = r"<style.*?compiled.*?>(.*?)</style>"
linked = f'<link rel="stylesheet" id="compiled-css" type="text/css" href="{filext}">'
class Fiddle:
RAW_FILE = '.raw'
def __init__(self, id, load=True, useCache=True, parse=True):
self.component = {}
self.id = id
if '/' in id:
critical('Username should be omitted from fiddle ID')
self.url = URL.format(id=id)
debug('URL: ' + self.url)
if load: self.load(useCache=useCache)
if parse: self.parse()
def load(self, cache=True, useNpm=True, useCache=False):
if useNpm:
if useCache and exists(self.RAW_FILE):
info('Using cached download')
else:
if useCache: error('Cache not found, downloading')
debug('NPM DL allways cached')
self.loadNpm()
self.raw=open(self.RAW_FILE).read()
else:
r = get(self.url)
if r.ok:
self.raw = r.text
info('Fiddle content loaded')
if cache:
open(RAW_FILE, 'w').write(self.raw)
info('Fiddle content saved to file')
else:
error('GET error: ' + str(r.status_code) + ' ' + reason)
def loadNpm(self):
import os
from subprocess import call
node = "jsfiddle-downloader"
info('Using Node ' + node)
output = self.RAW_FILE
if os.path.exists(output):
try: os.remove(output)
except Exception as err: error(err)
cmd = f"{node} -i {self.id} -o {output} -v"
debug(cmd)
log = check_output(cmd, shell=True).decode()
for line in log.splitlines():
debug(node + ': ' + line)
def parse(self, customReplacements=CUSTOM_REPLACEMENTS):
import re
html = self.raw
open('.raw', 'w').write(html)
replacements = {
"Injected script": (r"<script>.*tell the embed.*?<\/script>", ""),
"CoffeeScript todo": ('<!-- TODO: Missing CoffeeScript 2 -->', ''),
"jQuery reference": (r"<script.*?jquery.*?<\/script>", f'<script src="{JQUERY}"></script>'),
"jsFiddle CSS": (r"<link.*?result-light.*?>", "")
}
replacements.update(customReplacements)
for name, (pattern, repl) in replacements.items():
info('Replacing ' + name)
html, n = re.subn(pattern, repl, html, flags=re.DOTALL)
if n < 1:
warning('Unable to replace ' + name)
# Get compoenents
for mod in [module.js, module.css]:
name = mod.modtag
filename = mod.filext
pattern = mod.inline
substitution = mod.linked
matches = re.search(pattern, html, flags=re.DOTALL)
if matches:
comp = matches.group(1).lstrip()
self.component[name] = comp
open(filename, 'w').write(comp)
info(f"{name} stored")
else:
critical(f"{name} not found")
html, n = re.subn(pattern, substitution, html, flags=re.DOTALL)
if n < 1:
error(f'Unable to inject linked {name} module')
open('.html', 'w').write(html)
info('HTML stored')
def main(argv):
if CLEAN:
import os
for f in FILES:
if exists(f):
os.remove(f)
try:
import coloredlogs
coloredlogs.install(level=LOGLEVEL)
except:
info("Colored logs not installed")
debug('Debug level logging set')
if len(argv) < 2:
print("Usage: %s [<user>/]<fiddle id>" % argv[0])
return 1
id = argv[1]
info("Scraping fiddle " + id)
f = Fiddle(id)
return 0
if __name__ == '__main__':
from sys import argv
e = main(argv)
exit(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment