Skip to content

Instantly share code, notes, and snippets.

@Anon10W1z
Last active September 1, 2019 20:58
Show Gist options
  • Save Anon10W1z/7ea440b11867421ba3aa157ba4901d6c to your computer and use it in GitHub Desktop.
Save Anon10W1z/7ea440b11867421ba3aa157ba4901d6c to your computer and use it in GitHub Desktop.
Composing Programs PDF Producer
import subprocess
import os
open('pages.txt', 'a').close()
if not os.path.exists('pages'):
os.makedirs('pages')
if not os.path.exists('output'):
os.makedirs('output')
output_pages = os.path.join('output', 'pages')
if not os.path.exists(output_pages):
os.makedirs(output_pages)
output_chapters = os.path.join('output', 'chapters')
if not os.path.exists(output_chapters):
os.makedirs(output_chapters)
with open('pages.txt') as f:
pages = [line.rstrip('\n') for line in f]
total_pages = len(pages)
chapters = [[], [], [], []] # we will assume 4 chapters
for i in range(total_pages):
page = pages[i]
page_file_name = os.path.join('pages', '{}.html'.format(page))
ci = int(page[0]) - 1 # chapter index
chapters[ci].append(page_file_name)
if os.path.exists(page_file_name):
print('Page {} already downloaded'.format(page))
else:
process = subprocess.Popen(['ruby', 'html_cleanup.rb', 'http://composingprograms.com/pages/{}.html'.format(page)], stdout=subprocess.PIPE)
cleaned_html = process.communicate()[0]
f = open(page_file_name, 'wb')
f.write(cleaned_html)
f.close()
print('Downloaded page {}'.format(page))
output_file_name = os.path.join('output', 'pages', '{}.pdf'.format(page))
process = subprocess.Popen(['pandoc', '-f', 'html+tex_math_dollars', '-o', output_file_name, '--pdf-engine=xelatex', '--include-in-header', 'fontoptions.tex', page_file_name])
process.wait()
print('{}/{} page PDFs produced'.format(i + 1, total_pages))
total_chapters = len(chapters)
for i in range(total_chapters):
if len(chapters[i]) == 0:
continue
output_file_name = os.path.join('output', 'chapters', 'chapter{}.pdf'.format(i + 1))
command = ['pandoc', '-f', 'html+tex_math_dollars', '-o', output_file_name, '--pdf-engine=xelatex', '--include-in-header', 'fontoptions.tex']
chapters[i].sort()
command = [*command, *chapters[i]]
process = subprocess.Popen(command)
process.wait()
print('{}/{} chapter PDFs produced'.format(i + 1, total_chapters))
\setmainfont[
BoldFont = DejaVuSerif-Bold.ttf,
ItalicFont = DejaVuSerif-Italic.ttf,
BoldItalicFont = DejaVuSerif-BoldItalic.ttf
]{DejaVuSerif.ttf}
\setsansfont[
BoldFont = DejaVuSans-Bold.ttf,
ItalicFont = DejaVuSans-Oblique.ttf,
BoldItalicFont = DejaVuSans-BoldOblique.ttf
]{DejaVuSans.ttf}
\setmonofont[
BoldFont = DejaVuSansMono-Bold.ttf,
ItalicFont = DejaVuSansMono-Oblique.ttf,
BoldItalicFont = DejaVuSansMono-BoldOblique.ttf
]{DejaVuSansMono.ttf}
\setmathfont{DejaVuMathTeXGyre.ttf}
require 'nokogiri'
require 'open-uri'
input = ARGV[0]
page = Nokogiri::HTML(open(input))
page.css('.example').each do |elem|
elem.inner_html = '<i>Environment diagram omitted.</i>'
end
page.css('img').remove
page.xpath('//div[contains(@class, "youtube")]').remove
page.css('p').each do |p|
p.remove if p.children.length > 0 and p.children[0].inner_html == 'Continue'
end
puts page.css('.inner-content').inner_html
11-getting-started
12-elements-of-programming
13-defining-new-functions
14-designing-functions
15-control
16-higher-order-functions
17-recursive-functions
21-introduction
22-data-abstraction
23-sequences
24-mutable-data
25-object-oriented-programming
26-implementing-classes-and-objects
27-object-abstraction
28-efficiency
29-recursive-objects
31-introduction
32-functional-programming
33-exceptions
34-interpreters-for-languages-with-combination
35-interpreters-for-languages-with-abstraction
41-introduction
42-implicit-sequences
43-declarative-programming
44-logic-programming
45-unification
46-distributed-computing
47-distributed-data-processing
48-parallel-computing
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment