Created
May 20, 2016 13:14
-
-
Save xdbr/3d71bfbb48e6da74bcb41d765626c4ad to your computer and use it in GitHub Desktop.
dump statistics on bibtext entries (number of pdfs, etc.)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use feature 'say'; | |
use IO::File; | |
use Data::Dumper; | |
use BibTeX::Parser; | |
my $filename = $ARGV[0] // "papers.bib"; | |
my $fh = IO::File->new($filename) or die $!; | |
my $parser = BibTeX::Parser->new($fh); | |
my %stats; | |
while (my $e = $parser->next) { | |
if ($e->parse_ok) { | |
# say $e->type, $e->field('title'); | |
$stats{$e->type}{count} ++; | |
$stats{$e->type}{pdf} ++ if defined $e->field('file') and $e->field('file') =~ /:pdf$/i; | |
$stats{$e->type}{abstract}++ if defined $e->field('abstract') and $e->field('abstract') !~ /^\s*$/; | |
} | |
} | |
use DDP; | |
use Text::Table; | |
my $table = Text::Table->new('Type', 'count', 'PDFs', "Abstracts", 'PDF %', "Abstracts %"); | |
my @data = | |
map { [ | |
lc $_, | |
$stats{$_}{count} // 0, | |
$stats{$_}{pdf} // 0, | |
$stats{$_}{abstract} // 0, | |
sprintf("%.2f", | |
$stats{$_}{pdf} > 0 | |
? $stats{$_}{pdf} / $stats{$_}{count} * 100 | |
: 0 | |
), | |
sprintf("%.2f", | |
$stats{$_}{abstract} > 0 | |
? $stats{$_}{abstract} / $stats{$_}{count} * 100 | |
: 0 | |
) | |
] } | |
reverse | |
sort { $stats{$a}{count} <=> $stats{$b}{count} } | |
keys %stats; | |
$table->load(@data); | |
print $table; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment