Skip to content

Instantly share code, notes, and snippets.

@redtower
Created March 2, 2011 02:19
Show Gist options
  • Save redtower/850341 to your computer and use it in GitHub Desktop.
Save redtower/850341 to your computer and use it in GitHub Desktop.
Yahoo MusicからArtistを検索しIDを取得する。
#!/usr/bin/perl
use strict;
use warnings;
use Web::Scraper;
use URI;
use Encode;
my $word = $ARGV[0] ? decode('utf-8', $ARGV[0]) : "weezer";
my $uri = create_uri($word);
my $scraper = scraper {
process 'div.ymsc-mn71 table.artist td.lft a',
'items[]' => {url=>'@href', name=>'TEXT', }
};
if ($ENV{'HTTP_PROXY'}) {
$scraper->user_agent->proxy('http', $ENV{'HTTP_PROXY'});
}
my $res = $scraper->scrape($uri);
foreach my $item (@{$res->{items}}) {
$item->{url} =~ s/.*\/([^\/]*)\/$/$1/;
print encode('utf-8',$item->{name}) . "," . $item->{url} . "\n";
}
exit;
sub create_uri {
my $word = shift;
my $uri=URI->new('http://search.music.yahoo.co.jp/musicsearch');
$uri->query_form(
cc => 'as',
cp => $word
);
return $uri;
}
#!/usr/bin/perl
use strict;
use warnings;
use LWP::Simple 'get';
use URI;
use Encode;
my $word = $ARGV[0] ? decode('utf-8', $ARGV[0]) : "weezer";
my $uri = create_uri($word);
my $data = get($uri);
foreach my $line (split(/\n/, $data)) {
if ($line =~ s/.*lft"><[a-z =":\/.]*([A-Z0-9]*)\/">(.*)<\/a><\/td>/$1,$2/g) {
my @item = split(/\,/,$line, 2);
print encode('utf-8',$item[1]) . "," . $item[0] . "\n";
}
}
exit;
sub create_uri {
my $word = shift;
my $uri=URI->new('http://search.music.yahoo.co.jp/musicsearch');
$uri->query_form(
cc => 'as',
cp => $word
);
return $uri;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment