Skip to content

Instantly share code, notes, and snippets.

@makash
Forked from bpj/pandoc-newpage.pl
Created February 23, 2017 11:05
Show Gist options
  • Save makash/c70a04bb13eb0cc7113e4bd5b8bd8a31 to your computer and use it in GitHub Desktop.
Save makash/c70a04bb13eb0cc7113e4bd5b8bd8a31 to your computer and use it in GitHub Desktop.
Pandoc filter which converts LaTeX \newpage commands into appropriate pagebreak markup for other formats.
#!/usr/bin/env perl
# Pandoc filter which converts paragraps containing only the LaTeX \newpage
# command into appropriate pagebreak markup for other formats.
#
# You will need perl version 5.10.1 or higher <https://www.perl.org/get.html>
# (Strawberry Perl recommended on Windows!)
# and a module installer <http://www.cpan.org/modules/INSTALL.html>
# and the Pandoc::Elements module version 0.33 or higher
# <https://metacpan.org/pod/Pandoc::Elements>
#
# Run with the -F option:
#
# $ pandoc -F pandoc-newpage.pl ...
#
# If you want to use an HTML class rather than an inline style
# set the value of the metadata key newpage_html_class to the
# name of the class and use CSS like this:
#
# @media all {
# .page-break { display: none; }
# }
# @media print {
# .page-break { display: block; page-break-after: always; }
# }
#
#
# Copyright 2017 Benct Philip Jonsson
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
# See <http://dev.perl.org/licenses/>.
use utf8;
use autodie 2.29;
use 5.010001;
use strict;
use warnings;
use warnings qw(FATAL utf8);
use Carp qw[ carp croak ];
use Pandoc::Elements 0.33;
use Pandoc::Walker 0.27 qw[ action transform ];
my $out_format = shift @ARGV;
my $json = <>;
my $doc = pandoc_json($json);
my $html_break = $doc->meta->value('newpage_html_class') // $ENV{PANDOC_NEWPAGE_HTML_CLASS};
if ( ref $html_break ) {
croak "Metadata>newpage_html_class must be string";
}
$html_break &&= qq[<div class="$html_break"></div>];
$html_break ||= qq[<div style="page-break-after: always;"></div>];
my %break_for = (
html => RawBlock( html => $html_break ),
html5 => RawBlock( html => $html_break ),
## epub doesn't work, or only broken Linux readers?
epub => RawBlock( html => $html_break ),
docx => RawBlock( openxml => '<w:p><w:r><w:br w:type="page" /></w:r></w:p>' ),
);
my $break = $break_for{ $out_format };
# If we don't want to do anything with this doc '
unless ( defined $break ) {
print $json;
exit 0;
}
my %actions = (
'RawBlock' => sub {
my($elem) = @_;
$elem->format =~ /^(?:la)?tex$/ or return;
$elem->content eq '\newpage' or return;
return $break;
},
);
my $action = action \%actions;
# Allow applying the action recursively
$doc->transform($action, $action);
print $doc->to_json;
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment