Skip to content

Instantly share code, notes, and snippets.

@kgoess
Last active August 29, 2015 14:02
Show Gist options
  • Save kgoess/3bf000a29dbc6d71a4f9 to your computer and use it in GitHub Desktop.
Save kgoess/3bf000a29dbc6d71a4f9 to your computer and use it in GitHub Desktop.
package LWPx::Protocol::http_paranoid;
use strict;
require LWP::Debug;
require HTTP::Response;
require HTTP::Status;
require Net::HTTP;
use vars qw(@ISA $TOO_LATE $TIME_REMAIN);
require LWP::Protocol;
require LWP::Protocol::http;
@ISA = qw(LWP::Protocol::http);
use vars qw(@ISA @EXTRA_SOCK_OPTS);
my $CRLF = "\015\012";
# lame hack using globals in this package to communicate to sysread in the
# package at bottom, but whatchya gonna do? Don't want to go modify
# Net::HTTP::* to pass explicit timeouts to all the sysreads.
sub _set_time_remain {
my $now = time;
return unless defined $TOO_LATE;
$TIME_REMAIN = $TOO_LATE - $now;
$TIME_REMAIN = 0 if $TIME_REMAIN < 0;
}
# This is pretty much the _new_socket code from the original LWPx::UserAgent
# with the additional $request argument and the custom DNS resolver. Besides
# that, it's the same as the current _new_socket in LWP::Protocol::http
sub _new_socket
{
my($self, $host, $port, $timeout, $request) = @_;
my $conn_cache = $self->{ua}{conn_cache};
if ($conn_cache) {
if (my $sock = $conn_cache->withdraw("http", "$host:$port")) {
return $sock if $sock && !$sock->can_read(0);
# if the socket is readable, then either the peer has closed the
# connection or there are some garbage bytes on it. In either
# case we abandon it.
$sock->close;
}
}
my @addrs = $self->{ua}->_resolve($host, $request, $timeout);
unless (@addrs) {
die "Can't connect to $host:$port (No suitable addresses found)";
}
my $sock;
local($^W) = 0; # IO::Socket::INET can be noisy
while (! $sock && @addrs) {
my $addr = shift @addrs;
my $conn_timeout = $request->{_time_begin} ?
(time() - $request->{_time_begin}) :
$timeout;
$sock = $self->socket_class->new(PeerAddr => $addr,
PeerHost => $host,
PeerPort => $port,
LocalAddr => $self->{ua}{local_address},
Proto => 'tcp',
Timeout => $conn_timeout,
KeepAlive => !!$conn_cache,
SendTE => 1,
$self->_extra_sock_opts($addr,$port),
);
}
unless ($sock) {
# IO::Socket::INET leaves additional error messages in $@
my $status = "Can't connect to $host:$port";
if ($@ =~ /\bconnect: (.*)/ ||
$@ =~ /\b(Bad hostname)\b/ ||
$@ =~ /\b(certificate verify failed)\b/ ||
$@ =~ /\b(Crypt-SSLeay can't verify hostnames)\b/
) {
$status .= " ($1)";
}
die "$status\n\n$@";
}
# perl 5.005's IO::Socket does not have the blocking method.
eval { $sock->blocking(0); };
$sock;
}
# this removes $proxy from the equation
sub _fixup_header
{
my($self, $h, $url, $proxy) = @_;
$proxy = undef;
$self->SUPER::_fixup_header($h, $url, $proxy);
}
# remove $proxy from the equation
# and set up our timers
sub request
{
my($self, $request, $proxy, $arg, $size, $timeout) = @_;
# paranoid: proxy paranoia should be dealt with at the proxy itself
$proxy = undef;
# paranoid: now $timeout means total time, not just between bytes coming in.
# avoids attacker servers from tarpitting a service that fetches URLs.
$TOO_LATE = undef;
$TIME_REMAIN = undef;
if ($timeout) {
my $start_time = $request->{_time_begin} || time();
$TOO_LATE = $start_time + $timeout;
}
$self->SUPER::request($request, $proxy, $arg, $size, $timeout);
}
sub pre_socketread_hook
{
my ($self) = @_;
_set_time_remain();
}
#-----------------------------------------------------------
package LWPx::Protocol::http_paranoid::SocketMethods;
use vars qw(@ISA);
@ISA = qw(LWP::Protocol::http::SocketMethods);
sub sysread {
my $self = shift;
my $timeout = $LWPx::Protocol::http_paranoid::TIME_REMAIN;
die "read timeout" if $timeout <= 0;
if (defined $timeout) {
die "read timeout" unless $self->can_read($timeout);
}
sysread($self, $_[0], $_[1], $_[2] || 0);
}
#-----------------------------------------------------------
package LWPx::Protocol::http_paranoid::Socket;
use vars qw(@ISA);
@ISA = qw(LWPx::Protocol::http_paranoid::SocketMethods Net::HTTP);
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment