7415963987456321 · August 5, 2024 11:45
diff --git a/tvinna.pl b/tvinna.pl
 #!/usr/bin/perl
 use strict;
 use warnings;
 use utf8;
 use WWW::Mechanize::Sleepy;
 use HTML::TokeParser;
 use Data::Dumper;
 use open qw( :std :encoding(UTF-8) );

 # Scraping tvinna.is for job postings, to be used for analysis of job market

 # New mech
 my $mech = WWW::Mechanize::Sleepy->new(
    sleep => '1',
    autocheck => 1,
    timeout => 100
 );

 sub read_list{
    my $filename = $ARGV[0] or die("No urllist found in args");
    open(my $fh, '<:encoding(UTF-8)', $filename)
        or die "Could not open file '$filename' $!";
    while (my $row = <$fh>) {
        chomp $row;
        print fetch_comp($row);
    }
 }


 sub fetch_comp {
    my $url = shift or die("invalid url");
    # Get the page
    $mech->get($url );
    # Parse the content
    my $stream = HTML::TokeParser->new(\$mech->{content});

    $stream->get_tag("span"); # First span contains company and url

    my $href_url = $stream
        ->get_tag("a")
        ->[1]{href}
        || "-";

    my $company = $stream->get_trimmed_text("/span"); # Second span is the company name

    $stream->get_tag("span"); # Post date
    my $date_txt = $stream->get_trimmed_text("/span");

    $stream->get_tag("span"); # Type of job, full e.t.c.
    my $type = $stream->get_trimmed_text( "/span");

    return qq("$company", "$href_url", "$date_txt", "$type"\n); # CSV return
 }

 read_list();
	#!/usr/bin/perl
	use strict;
	use warnings;
	use utf8;
	use WWW::Mechanize::Sleepy;
	use HTML::TokeParser;
	use Data::Dumper;
	use open qw( :std :encoding(UTF-8) );

	# Scraping tvinna.is for job postings, to be used for analysis of job market

	# New mech
	my $mech = WWW::Mechanize::Sleepy->new(
	sleep => '1',
	autocheck => 1,
	timeout => 100
	);

	sub read_list{
	my $filename = $ARGV[0] or die("No urllist found in args");
	open(my $fh, '<:encoding(UTF-8)', $filename)
	or die "Could not open file '$filename' $!";
	while (my $row = <$fh>) {
	chomp $row;
	print fetch_comp($row);
	}
	}


	sub fetch_comp {
	my $url = shift or die("invalid url");
	# Get the page
	$mech->get($url );
	# Parse the content
	my $stream = HTML::TokeParser->new(\$mech->{content});

	$stream->get_tag("span"); # First span contains company and url

	my $href_url = $stream
	->get_tag("a")
	->[1]{href}
	\|\| "-";

	my $company = $stream->get_trimmed_text("/span"); # Second span is the company name

	$stream->get_tag("span"); # Post date
	my $date_txt = $stream->get_trimmed_text("/span");

	$stream->get_tag("span"); # Type of job, full e.t.c.
	my $type = $stream->get_trimmed_text( "/span");

	return qq("$company", "$href_url", "$date_txt", "$type"\n); # CSV return
	}

	read_list();