Created
November 30, 2011 20:37
-
-
Save mistermarco/1410710 to your computer and use it in GitHub Desktop.
Count "pages" in a file, given a template size and an estimated number of characters per page.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use File::Find; | |
use Getopt::Long; | |
use Pod::Usage; | |
use Term::ANSIColor; | |
use strict; | |
my $default_overhead_size = 2000; | |
my $default_page_size = 2500; | |
my $help = 0; | |
my $man = 0; | |
my $modified_days; | |
my @directories = (); | |
my $overhead = 0; | |
my $page_size = 0; | |
my $debug = 0; | |
my $total = 0; | |
my $total_pages = 0; | |
my $all = 0; | |
GetOptions ('days|d=s' => \$modified_days, | |
'dir=s' => \@directories, | |
'help|?' => \$help, | |
'man' => \$man, | |
'overhead|o=i' => \$overhead, | |
'page_size|p=i' => \$page_size, | |
'debug' => \$debug, | |
'total' => \$total, | |
'all' => \$all); | |
pod2usage(1) if $help; | |
pod2usage(-existatus => 0, -verbose => 2) if $man; | |
@directories = split(/ /,join(' ',@directories)); | |
@directories = qw(.) unless scalar @directories; | |
unless ($overhead) { $overhead = $default_overhead_size; } | |
unless ($page_size) { $page_size = $default_page_size; } | |
print "Using overhead size of $overhead and page size of $page_size\n" if $debug; | |
find(\&process_file, @directories); | |
sub process_file { | |
# don't do anything unless it's a file | |
return unless -f; | |
if ($debug) { print color 'reset'; } | |
if ($modified_days) { return unless -M $_ < $modified_days; } | |
if ($debug && -M $_ < 2) { print color 'red'; } | |
# return unless the file has one of the following extensions | |
unless ($all) { | |
return unless /.*\.(html|htm|asp|cgi|fft|pl|tmpl|tpl|php|txt)$/; | |
} | |
# calculate the size of the file | |
my $size = -s $_; | |
# subtract the overhead (size of template code) and divide by the | |
# approximate number of characters printed on a page | |
my $page_count = ($size - $overhead) / $page_size; | |
if ($page_count < 1) { $page_count = 1; } | |
# if the user requested a total, add the page count to the total | |
$total_pages += $page_count if $total; | |
print "$File::Find::name"; | |
print "\t$size" if $debug; | |
print "\t$page_count\n"; | |
} | |
print "Total pages: " . commify($total_pages) ."\n" if $total; | |
sub commify { | |
my $text = reverse $_[0]; | |
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; | |
return scalar reverse $text; | |
} | |
__END__ | |
=head1 NAME | |
page_count - Count the approximate page length of files in a directory | |
=head1 SYNOPSIS | |
Use page_count to get a list of files and their sizes in pages. | |
page_count | |
Used by itself it will run in the current directory (and its subdirectories) and | |
will count the pages for most commonly changed files (html, htm, fft, txt, asp, etc.) | |
=head1 OPTIONS | |
=over 8 | |
=item B<-days> I<number of days> | |
Process pages changed within the number of days specified | |
=item B<-dir> I<directory_name(s)> | |
Process a specific directory and its subdirectories. If you enclose the names in quotes ("), you can specify a list of directories. | |
If omitted searches the current directory. | |
=item B<-help> | |
Prints out a short help message | |
=item B<-total> | |
Adds a total of all page sizes to the end of the report | |
=item B<-overhead> I<characters_in_overhead> | |
Override the template size default | |
=item B<-page_size> I<characters_in_printed_page> | |
Override the page size default | |
=item B<-debug> | |
Print out debugging messages | |
=item B<-all> | |
Calculate the size of all files, not just regular text files | |
=item B<-man> | |
Print out an even more detail man page about the program | |
=head1 AUTHOR | |
Written by Marco Wise (marco.wise@stanford.edu) | |
=head1 COPYRIGHT | |
2006 Board of Trustees, Leland Stanford Junior University | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment