Skip to content

Instantly share code, notes, and snippets.

@Logioniz
Last active February 18, 2017 08:05
Show Gist options
  • Save Logioniz/f2b059ff0cf0274af058fb5082f75518 to your computer and use it in GitHub Desktop.
Save Logioniz/f2b059ff0cf0274af058fb5082f75518 to your computer and use it in GitHub Desktop.
Web server wrapper for mystem
#!/usr/bin/perl
use Mojo::Base -strict;
use Mojo::JSON::MaybeXS;
use Mojo::JSON 'decode_json';
use Mojolicious::Lite;
use IPC::Open3;
use Mojo::IOLoop;
my $mystem_path = '/usr/local/bin/mystem';
my $write = new IO::Handle;
my $read = new IO::Handle;
my $pid = open3($write, $read, undef, $mystem_path, '-gi', '-d', '-c', '--format', 'json');
sub analyze {
my $text = shift;
utf8::decode($text);
my $result = [];
for my $line (split /\r?\n/, $text) {
my $message = "$line\n";
utf8::encode($message);
syswrite $write, $message, length($message);
my ($ans, $res) = ('', undef);
while (1) {
my $len = sysread $read, my $data, 65535;
$ans .= $data;
$res = eval { decode_json $ans };
last unless $@;
}
push @$result, @$res;
}
return $result;
}
sub get_lemma {
my $o = shift;
return $o->{analysis}[0]{lex} if $o->{analysis};
return $o->{text} if $o->{text};
return undef;
}
sub lemmatize {
my $text = shift;
my $infos = analyze($text);
my @lemmas = grep { $_ } map { get_lemma($_) } @$infos;
return @lemmas;
}
sub lemmatize_to_string {
return join '', lemmatize(shift);
}
any '/' => sub {
my $c = shift;
my $text = $c->param('text');
$c->render(json => analyze($text));
};
any '/lemmatize' => sub {
my $c = shift;
my $text = $c->param('text');
$c->render(text => lemmatize_to_string($text));
};
Mojo::IOLoop->singleton->on(finish => sub {
my $loop = shift;
$write->close;
waitpid $pid, 0;
});
app->start;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment