source: perl/modules/Facebook/lib/AnyEvent/HTTP.pm @ d953ede

release-1.10release-1.9
Last change on this file since d953ede was bbd0cf1, checked in by Edward Z. Yang <ezyang@mit.edu>, 13 years ago
Bundle Ouch, AnyEvent::HTTP, and URI::Encode from CPAN Those dependencies of Facebook::Graph are not in Debian and are each single files, so we may as well just bundle them. Also update list of bundled libraries in README and drop their licenses (or the closest thing they have resembling one) into COPYING.
  • Property mode set to 100644
File size: 47.6 KB
Line 
1=head1 NAME
2
3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4
5=head1 SYNOPSIS
6
7   use AnyEvent::HTTP;
8
9   http_get "http://www.nethype.de/", sub { print $_[1] };
10
11   # ... do something else here
12
13=head1 DESCRIPTION
14
15This module is an L<AnyEvent> user, you need to make sure that you use and
16run a supported event loop.
17
18This module implements a simple, stateless and non-blocking HTTP
19client. It supports GET, POST and other request methods, cookies and more,
20all on a very low level. It can follow redirects, supports proxies, and
21automatically limits the number of connections to the values specified in
22the RFC.
23
24It should generally be a "good client" that is enough for most HTTP
25tasks. Simple tasks should be simple, but complex tasks should still be
26possible as the user retains control over request and response headers.
27
28The caller is responsible for authentication management, cookies (if
29the simplistic implementation in this module doesn't suffice), referer
30and other high-level protocol details for which this module offers only
31limited support.
32
33=head2 METHODS
34
35=over 4
36
37=cut
38
39package AnyEvent::HTTP;
40
41use common::sense;
42
43use Errno ();
44
45use AnyEvent 5.0 ();
46use AnyEvent::Util ();
47use AnyEvent::Handle ();
48
49use base Exporter::;
50
51our $VERSION = '2.13';
52
53our @EXPORT = qw(http_get http_post http_head http_request);
54
55our $USERAGENT          = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56our $MAX_RECURSE        =  10;
57our $PERSISTENT_TIMEOUT =   3;
58our $TIMEOUT            = 300;
59our $MAX_PER_HOST       =   4; # changing this is evil
60
61our $PROXY;
62our $ACTIVE = 0;
63
64my %KA_CACHE; # indexed by uhost currently, points to [$handle...] array
65my %CO_SLOT;  # number of open connections, and wait queue, per host
66
67=item http_get $url, key => value..., $cb->($data, $headers)
68
69Executes an HTTP-GET request. See the http_request function for details on
70additional parameters and the return value.
71
72=item http_head $url, key => value..., $cb->($data, $headers)
73
74Executes an HTTP-HEAD request. See the http_request function for details
75on additional parameters and the return value.
76
77=item http_post $url, $body, key => value..., $cb->($data, $headers)
78
79Executes an HTTP-POST request with a request body of C<$body>. See the
80http_request function for details on additional parameters and the return
81value.
82
83=item http_request $method => $url, key => value..., $cb->($data, $headers)
84
85Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
86must be an absolute http or https URL.
87
88When called in void context, nothing is returned. In other contexts,
89C<http_request> returns a "cancellation guard" - you have to keep the
90object at least alive until the callback get called. If the object gets
91destroyed before the callback is called, the request will be cancelled.
92
93The callback will be called with the response body data as first argument
94(or C<undef> if an error occured), and a hash-ref with response headers
95(and trailers) as second argument.
96
97All the headers in that hash are lowercased. In addition to the response
98headers, the "pseudo-headers" (uppercase to avoid clashing with possible
99response headers) C<HTTPVersion>, C<Status> and C<Reason> contain the
100three parts of the HTTP Status-Line of the same name. If an error occurs
101during the body phase of a request, then the original C<Status> and
102C<Reason> values from the header are available as C<OrigStatus> and
103C<OrigReason>.
104
105The pseudo-header C<URL> contains the actual URL (which can differ from
106the requested URL when following redirects - for example, you might get
107an error that your URL scheme is not supported even though your URL is a
108valid http URL because it redirected to an ftp URL, in which case you can
109look at the URL pseudo header).
110
111The pseudo-header C<Redirect> only exists when the request was a result
112of an internal redirect. In that case it is an array reference with
113the C<($data, $headers)> from the redirect response. Note that this
114response could in turn be the result of a redirect itself, and C<<
115$headers->{Redirect}[1]{Redirect} >> will then contain the original
116response, and so on.
117
118If the server sends a header multiple times, then their contents will be
119joined together with a comma (C<,>), as per the HTTP spec.
120
121If an internal error occurs, such as not being able to resolve a hostname,
122then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
123C<590>-C<599> and the C<Reason> pseudo-header will contain an error
124message. Currently the following status codes are used:
125
126=over 4
127
128=item 595 - errors during connection etsbalishment, proxy handshake.
129
130=item 596 - errors during TLS negotiation, request sending and header processing.
131
132=item 597 - errors during body receiving or processing.
133
134=item 598 - user aborted request via C<on_header> or C<on_body>.
135
136=item 599 - other, usually nonretryable, errors (garbled URL etc.).
137
138=back
139
140A typical callback might look like this:
141
142   sub {
143      my ($body, $hdr) = @_;
144
145      if ($hdr->{Status} =~ /^2/) {
146         ... everything should be ok
147      } else {
148         print "error, $hdr->{Status} $hdr->{Reason}\n";
149      }
150   }
151
152Additional parameters are key-value pairs, and are fully optional. They
153include:
154
155=over 4
156
157=item recurse => $count (default: $MAX_RECURSE)
158
159Whether to recurse requests or not, e.g. on redirects, authentication
160retries and so on, and how often to do so.
161
162=item headers => hashref
163
164The request headers to use. Currently, C<http_request> may provide its own
165C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and
166will provide defaults at least for C<TE:>, C<Referer:> and C<User-Agent:>
167(this can be suppressed by using C<undef> for these headers in which case
168they won't be sent at all).
169
170You really should provide your own C<User-Agent:> header value that is
171appropriate for your program - I wouldn't be surprised if the default
172AnyEvent string gets blocked by webservers sooner or later.
173
174Also, make sure that your headers names and values do not contain any
175embedded newlines.
176
177=item timeout => $seconds
178
179The time-out to use for various stages - each connect attempt will reset
180the timeout, as will read or write activity, i.e. this is not an overall
181timeout.
182
183Default timeout is 5 minutes.
184
185=item proxy => [$host, $port[, $scheme]] or undef
186
187Use the given http proxy for all requests, or no proxy if C<undef> is
188used.
189
190C<$scheme> must be either missing or must be C<http> for HTTP.
191
192If not specified, then the default proxy is used (see
193C<AnyEvent::HTTP::set_proxy>).
194
195=item body => $string
196
197The request body, usually empty. Will be sent as-is (future versions of
198this module might offer more options).
199
200=item cookie_jar => $hash_ref
201
202Passing this parameter enables (simplified) cookie-processing, loosely
203based on the original netscape specification.
204
205The C<$hash_ref> must be an (initially empty) hash reference which
206will get updated automatically. It is possible to save the cookie jar
207to persistent storage with something like JSON or Storable - see the
208C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
209expired or session-only cookies, and also for documentation on the format
210of the cookie jar.
211
212Note that this cookie implementation is not meant to be complete. If
213you want complete cookie management you have to do that on your
214own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
215working. Cookies are a privacy disaster, do not use them unless required
216to.
217
218When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
219headers will be set and handled by this module, otherwise they will be
220left untouched.
221
222=item tls_ctx => $scheme | $tls_ctx
223
224Specifies the AnyEvent::TLS context to be used for https connections. This
225parameter follows the same rules as the C<tls_ctx> parameter to
226L<AnyEvent::Handle>, but additionally, the two strings C<low> or
227C<high> can be specified, which give you a predefined low-security (no
228verification, highest compatibility) and high-security (CA and common-name
229verification) TLS context.
230
231The default for this option is C<low>, which could be interpreted as "give
232me the page, no matter what".
233
234See also the C<sessionid> parameter.
235
236=item session => $string
237
238The module might reuse connections to the same host internally. Sometimes
239(e.g. when using TLS), you do not want to reuse connections from other
240sessions. This can be achieved by setting this parameter to some unique
241ID (such as the address of an object storing your state data, or the TLS
242context) - only connections using the same unique ID will be reused.
243
244=item on_prepare => $callback->($fh)
245
246In rare cases you need to "tune" the socket before it is used to
247connect (for exmaple, to bind it on a given IP address). This parameter
248overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect>
249and behaves exactly the same way (e.g. it has to provide a
250timeout). See the description for the C<$prepare_cb> argument of
251C<AnyEvent::Socket::tcp_connect> for details.
252
253=item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
254
255In even rarer cases you want total control over how AnyEvent::HTTP
256establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect>
257to do this, but you can provide your own C<tcp_connect> function -
258obviously, it has to follow the same calling conventions, except that it
259may always return a connection guard object.
260
261There are probably lots of weird uses for this function, starting from
262tracing the hosts C<http_request> actually tries to connect, to (inexact
263but fast) host => IP address caching or even socks protocol support.
264
265=item on_header => $callback->($headers)
266
267When specified, this callback will be called with the header hash as soon
268as headers have been successfully received from the remote server (not on
269locally-generated errors).
270
271It has to return either true (in which case AnyEvent::HTTP will continue),
272or false, in which case AnyEvent::HTTP will cancel the download (and call
273the finish callback with an error code of C<598>).
274
275This callback is useful, among other things, to quickly reject unwanted
276content, which, if it is supposed to be rare, can be faster than first
277doing a C<HEAD> request.
278
279The downside is that cancelling the request makes it impossible to re-use
280the connection. Also, the C<on_header> callback will not receive any
281trailer (headers sent after the response body).
282
283Example: cancel the request unless the content-type is "text/html".
284
285   on_header => sub {
286      $_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/
287   },
288
289=item on_body => $callback->($partial_body, $headers)
290
291When specified, all body data will be passed to this callback instead of
292to the completion callback. The completion callback will get the empty
293string instead of the body data.
294
295It has to return either true (in which case AnyEvent::HTTP will continue),
296or false, in which case AnyEvent::HTTP will cancel the download (and call
297the completion callback with an error code of C<598>).
298
299The downside to cancelling the request is that it makes it impossible to
300re-use the connection.
301
302This callback is useful when the data is too large to be held in memory
303(so the callback writes it to a file) or when only some information should
304be extracted, or when the body should be processed incrementally.
305
306It is usually preferred over doing your own body handling via
307C<want_body_handle>, but in case of streaming APIs, where HTTP is
308only used to create a connection, C<want_body_handle> is the better
309alternative, as it allows you to install your own event handler, reducing
310resource usage.
311
312=item want_body_handle => $enable
313
314When enabled (default is disabled), the behaviour of AnyEvent::HTTP
315changes considerably: after parsing the headers, and instead of
316downloading the body (if any), the completion callback will be
317called. Instead of the C<$body> argument containing the body data, the
318callback will receive the L<AnyEvent::Handle> object associated with the
319connection. In error cases, C<undef> will be passed. When there is no body
320(e.g. status C<304>), the empty string will be passed.
321
322The handle object might or might not be in TLS mode, might be connected
323to a proxy, be a persistent connection, use chunked transfer encoding
324etc., and configured in unspecified ways. The user is responsible for this
325handle (it will not be used by this module anymore).
326
327This is useful with some push-type services, where, after the initial
328headers, an interactive protocol is used (typical example would be the
329push-style twitter API which starts a JSON/XML stream).
330
331If you think you need this, first have a look at C<on_body>, to see if
332that doesn't solve your problem in a better way.
333
334=item persistent => $boolean
335
336Try to create/reuse a persistent connection. When this flag is set
337(default: true for idempotent requests, false for all others), then
338C<http_request> tries to re-use an existing (previously-created)
339persistent connection to the host and, failing that, tries to create a new
340one.
341
342Requests failing in certain ways will be automatically retried once, which
343is dangerous for non-idempotent requests, which is why it defaults to off
344for them. The reason for this is because the bozos who designed HTTP/1.1
345made it impossible to distinguish between a fatal error and a normal
346connection timeout, so you never know whether there was a problem with
347your request or not.
348
349When reusing an existent connection, many parameters (such as TLS context)
350will be ignored. See the C<session> parameter for a workaround.
351
352=item keepalive => $boolean
353
354Only used when C<persistent> is also true. This parameter decides whether
355C<http_request> tries to handshake a HTTP/1.0-style keep-alive connection
356(as opposed to only a HTTP/1.1 persistent connection).
357
358The default is true, except when using a proxy, in which case it defaults
359to false, as HTTP/1.0 proxies cannot support this in a meaningful way.
360
361=item handle_params => { key => value ... }
362
363The key-value pairs in this hash will be passed to any L<AnyEvent::Handle>
364constructor that is called - not all requests will create a handle, and
365sometimes more than one is created, so this parameter is only good for
366setting hints.
367
368Example: set the maximum read size to 4096, to potentially conserve memory
369at the cost of speed.
370
371   handle_params => {
372      max_read_size => 4096,
373   },
374
375=back
376
377Example: do a simple HTTP GET request for http://www.nethype.de/ and print
378the response body.
379
380   http_request GET => "http://www.nethype.de/", sub {
381      my ($body, $hdr) = @_;
382      print "$body\n";
383   };
384
385Example: do a HTTP HEAD request on https://www.google.com/, use a
386timeout of 30 seconds.
387
388   http_request
389      HEAD    => "https://www.google.com",
390      headers => { "user-agent" => "MySearchClient 1.0" },
391      timeout => 30,
392      sub {
393         my ($body, $hdr) = @_;
394         use Data::Dumper;
395         print Dumper $hdr;
396      }
397   ;
398
399Example: do another simple HTTP GET request, but immediately try to
400cancel it.
401
402   my $request = http_request GET => "http://www.nethype.de/", sub {
403      my ($body, $hdr) = @_;
404      print "$body\n";
405   };
406
407   undef $request;
408
409=cut
410
411#############################################################################
412# wait queue/slots
413
414sub _slot_schedule;
415sub _slot_schedule($) {
416   my $host = shift;
417
418   while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
419      if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
420         # somebody wants that slot
421         ++$CO_SLOT{$host}[0];
422         ++$ACTIVE;
423
424         $cb->(AnyEvent::Util::guard {
425            --$ACTIVE;
426            --$CO_SLOT{$host}[0];
427            _slot_schedule $host;
428         });
429      } else {
430         # nobody wants the slot, maybe we can forget about it
431         delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
432         last;
433      }
434   }
435}
436
437# wait for a free slot on host, call callback
438sub _get_slot($$) {
439   push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
440
441   _slot_schedule $_[0];
442}
443
444#############################################################################
445# cookie handling
446
447# expire cookies
448sub cookie_jar_expire($;$) {
449   my ($jar, $session_end) = @_;
450
451   %$jar = () if $jar->{version} != 1;
452
453   my $anow = AE::now;
454
455   while (my ($chost, $paths) = each %$jar) {
456      next unless ref $paths;
457
458      while (my ($cpath, $cookies) = each %$paths) {
459         while (my ($cookie, $kv) = each %$cookies) {
460            if (exists $kv->{_expires}) {
461               delete $cookies->{$cookie}
462                  if $anow > $kv->{_expires};
463            } elsif ($session_end) {
464               delete $cookies->{$cookie};
465            }
466         }
467
468         delete $paths->{$cpath}
469            unless %$cookies;
470      }
471
472      delete $jar->{$chost}
473         unless %$paths;
474   }
475}
476 
477# extract cookies from jar
478sub cookie_jar_extract($$$$) {
479   my ($jar, $scheme, $host, $path) = @_;
480
481   %$jar = () if $jar->{version} != 1;
482
483   my @cookies;
484
485   while (my ($chost, $paths) = each %$jar) {
486      next unless ref $paths;
487
488      if ($chost =~ /^\./) {
489         next unless $chost eq substr $host, -length $chost;
490      } elsif ($chost =~ /\./) {
491         next unless $chost eq $host;
492      } else {
493         next;
494      }
495
496      while (my ($cpath, $cookies) = each %$paths) {
497         next unless $cpath eq substr $path, 0, length $cpath;
498
499         while (my ($cookie, $kv) = each %$cookies) {
500            next if $scheme ne "https" && exists $kv->{secure};
501
502            if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
503               delete $cookies->{$cookie};
504               next;
505            }
506
507            my $value = $kv->{value};
508
509            if ($value =~ /[=;,[:space:]]/) {
510               $value =~ s/([\\"])/\\$1/g;
511               $value = "\"$value\"";
512            }
513
514            push @cookies, "$cookie=$value";
515         }
516      }
517   }
518
519   \@cookies
520}
521 
522# parse set_cookie header into jar
523sub cookie_jar_set_cookie($$$$) {
524   my ($jar, $set_cookie, $host, $date) = @_;
525
526   my $anow = int AE::now;
527   my $snow; # server-now
528
529   for ($set_cookie) {
530      # parse NAME=VALUE
531      my @kv;
532
533      # expires is not http-compliant in the original cookie-spec,
534      # we support the official date format and some extensions
535      while (
536         m{
537            \G\s*
538            (?:
539               expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
540               | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )?
541            )
542         }gcxsi
543      ) {
544         my $name = $2;
545         my $value = $4;
546
547         if (defined $1) {
548            # expires
549            $name  = "expires";
550            $value = $1;
551         } elsif (defined $3) {
552            # quoted
553            $value = $3;
554            $value =~ s/\\(.)/$1/gs;
555         }
556
557         push @kv, @kv ? lc $name : $name, $value;
558
559         last unless /\G\s*;/gc;
560      }
561
562      last unless @kv;
563
564      my $name = shift @kv;
565      my %kv = (value => shift @kv, @kv);
566
567      if (exists $kv{"max-age"}) {
568         $kv{_expires} = $anow + delete $kv{"max-age"};
569      } elsif (exists $kv{expires}) {
570         $snow ||= parse_date ($date) || $anow;
571         $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
572      } else {
573         delete $kv{_expires};
574      }
575
576      my $cdom;
577      my $cpath = (delete $kv{path}) || "/";
578
579      if (exists $kv{domain}) {
580         $cdom = delete $kv{domain};
581
582         $cdom =~ s/^\.?/./; # make sure it starts with a "."
583
584         next if $cdom =~ /\.$/;
585
586         # this is not rfc-like and not netscape-like. go figure.
587         my $ndots = $cdom =~ y/.//;
588         next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
589      } else {
590         $cdom = $host;
591      }
592
593      # store it
594      $jar->{version} = 1;
595      $jar->{lc $cdom}{$cpath}{$name} = \%kv;
596
597      redo if /\G\s*,/gc;
598   }
599}
600
601#############################################################################
602# keepalive/persistent connection cache
603
604# fetch a connection from the keepalive cache
605sub ka_fetch($) {
606   my $ka_key = shift;
607
608   my $hdl = pop @{ $KA_CACHE{$ka_key} }; # currently we reuse the MOST RECENTLY USED connection
609   delete $KA_CACHE{$ka_key}
610      unless @{ $KA_CACHE{$ka_key} };
611
612   $hdl
613}
614
615sub ka_store($$) {
616   my ($ka_key, $hdl) = @_;
617
618   my $kaa = $KA_CACHE{$ka_key} ||= [];
619
620   my $destroy = sub {
621      my @ka = grep $_ != $hdl, @{ $KA_CACHE{$ka_key} };
622
623      $hdl->destroy;
624
625      @ka
626         ? $KA_CACHE{$ka_key} = \@ka
627         : delete $KA_CACHE{$ka_key};
628   };
629
630   # on error etc., destroy
631   $hdl->on_error ($destroy);
632   $hdl->on_eof   ($destroy);
633   $hdl->on_read  ($destroy);
634   $hdl->timeout  ($PERSISTENT_TIMEOUT);
635
636   push @$kaa, $hdl;
637   shift @$kaa while @$kaa > $MAX_PER_HOST;
638}
639
640#############################################################################
641# utilities
642
643# continue to parse $_ for headers and place them into the arg
644sub _parse_hdr() {
645   my %hdr;
646
647   # things seen, not parsed:
648   # p3pP="NON CUR OTPi OUR NOR UNI"
649
650   $hdr{lc $1} .= ",$2"
651      while /\G
652            ([^:\000-\037]*):
653            [\011\040]*
654            ((?: [^\012]+ | \012[\011\040] )*)
655            \012
656         /gxc;
657
658   /\G$/
659     or return;
660
661   # remove the "," prefix we added to all headers above
662   substr $_, 0, 1, ""
663      for values %hdr;
664
665   \%hdr
666}
667
668#############################################################################
669# http_get
670
671our $qr_nlnl = qr{(?<![^\012])\015?\012};
672
673our $TLS_CTX_LOW  = { cache => 1, sslv2 => 1 };
674our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
675
676# maybe it should just become a normal object :/
677
678sub _destroy_state(\%) {
679   my ($state) = @_;
680
681   $state->{handle}->destroy if $state->{handle};
682   %$state = ();
683}
684
685sub _error(\%$$) {
686   my ($state, $cb, $hdr) = @_;
687
688   &_destroy_state ($state);
689
690   $cb->(undef, $hdr);
691   ()
692}
693
694sub http_request($$@) {
695   my $cb = pop;
696   my ($method, $url, %arg) = @_;
697
698   my %hdr;
699
700   $arg{tls_ctx} = $TLS_CTX_LOW  if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx};
701   $arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high";
702
703   $method = uc $method;
704
705   if (my $hdr = $arg{headers}) {
706      while (my ($k, $v) = each %$hdr) {
707         $hdr{lc $k} = $v;
708      }
709   }
710
711   # pseudo headers for all subsequent responses
712   my @pseudo = (URL => $url);
713   push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect};
714
715   my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
716
717   return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" })
718      if $recurse < 0;
719
720   my $proxy   = exists $arg{proxy} ? $arg{proxy} : $PROXY;
721   my $timeout = $arg{timeout} || $TIMEOUT;
722
723   my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment
724      $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|;
725
726   $uscheme = lc $uscheme;
727
728   my $uport = $uscheme eq "http"  ?  80
729             : $uscheme eq "https" ? 443
730             : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
731
732   $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
733      or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
734
735   my $uhost = lc $1;
736   $uport = $2 if defined $2;
737
738   $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
739      unless exists $hdr{host};
740
741   $uhost =~ s/^\[(.*)\]$/$1/;
742   $upath .= $query if length $query;
743
744   $upath =~ s%^/?%/%;
745
746   # cookie processing
747   if (my $jar = $arg{cookie_jar}) {
748      my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
749
750      $hdr{cookie} = join "; ", @$cookies
751         if @$cookies;
752   }
753
754   my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
755
756   if ($proxy) {
757      ($rpath, $rhost, $rport, $rscheme) = ($url, @$proxy);
758
759      $rscheme = "http" unless defined $rscheme;
760
761      # don't support https requests over https-proxy transport,
762      # can't be done with tls as spec'ed, unless you double-encrypt.
763      $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
764
765      $rhost   = lc $rhost;
766      $rscheme = lc $rscheme;
767   } else {
768      ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
769   }
770
771   # leave out fragment and query string, just a heuristic
772   $hdr{referer}      = "$uscheme://$uauthority$upath" unless exists $hdr{referer};
773   $hdr{"user-agent"} = $USERAGENT                     unless exists $hdr{"user-agent"};
774
775   $hdr{"content-length"} = length $arg{body}
776      if length $arg{body} || $method ne "GET";
777
778   my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/;
779
780   # default value for keepalive is true iff the request is for an idempotent method
781   my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent;
782   my $keepalive  = exists $arg{keepalive}  ? !!$arg{keepalive}  : !$proxy;
783   my $was_persistent; # true if this is actually a recycled connection
784
785   # the key to use in the keepalive cache
786   my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}";
787
788   $hdr{connection} = ($persistent ? $keepalive ? "keep-alive " : "" : "close ") . "Te"; #1.1
789   $hdr{te}         = "trailers" unless exists $hdr{te}; #1.1
790
791   my %state = (connect_guard => 1);
792
793   my $ae_error = 595; # connecting
794
795   # handle actual, non-tunneled, request
796   my $handle_actual_request = sub {
797      $ae_error = 596; # request phase
798
799      my $hdl = $state{handle};
800
801      $hdl->starttls ("connect") if $uscheme eq "https" && !exists $hdl->{tls};
802
803      # send request
804      $hdl->push_write (
805         "$method $rpath HTTP/1.1\015\012"
806         . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
807         . "\015\012"
808         . (delete $arg{body})
809      );
810
811      # return if error occured during push_write()
812      return unless %state;
813
814      # reduce memory usage, save a kitten, also re-use it for the response headers.
815      %hdr = ();
816
817      # status line and headers
818      $state{read_response} = sub {
819         return unless %state;
820
821         for ("$_[1]") {
822            y/\015//d; # weed out any \015, as they show up in the weirdest of places.
823
824            /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
825               or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid server response" };
826
827            # 100 Continue handling
828            # should not happen as we don't send expect: 100-continue,
829            # but we handle it just in case.
830            # since we send the request body regardless, if we get an error
831            # we are out of-sync, which we currently do NOT handle correctly.
832            return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
833               if $2 eq 100;
834
835            push @pseudo,
836               HTTPVersion => $1,
837               Status      => $2,
838               Reason      => $3,
839            ;
840
841            my $hdr = _parse_hdr
842               or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Garbled response headers" };
843
844            %hdr = (%$hdr, @pseudo);
845         }
846
847         # redirect handling
848         # microsoft and other shitheads don't give a shit for following standards,
849         # try to support some common forms of broken Location headers.
850         if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
851            $hdr{location} =~ s/^\.\/+//;
852
853            my $url = "$rscheme://$uhost:$uport";
854
855            unless ($hdr{location} =~ s/^\///) {
856               $url .= $upath;
857               $url =~ s/\/[^\/]*$//;
858            }
859
860            $hdr{location} = "$url/$hdr{location}";
861         }
862
863         my $redirect;
864
865         if ($recurse) {
866            my $status = $hdr{Status};
867
868            # industry standard is to redirect POST as GET for
869            # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
870            # also, the UA should ask the user for 301 and 307 and POST,
871            # industry standard seems to be to simply follow.
872            # we go with the industry standard.
873            if ($status == 301 or $status == 302 or $status == 303) {
874               # HTTP/1.1 is unclear on how to mutate the method
875               $method = "GET" unless $method eq "HEAD";
876               $redirect = 1;
877            } elsif ($status == 307) {
878               $redirect = 1;
879            }
880         }
881
882         my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
883            if ($state{handle}) {
884               # handle keepalive
885               if (
886                  $persistent
887                  && $_[3]
888                  && ($hdr{HTTPVersion} < 1.1
889                      ? $hdr{connection} =~ /\bkeep-?alive\b/i
890                      : $hdr{connection} !~ /\bclose\b/i)
891               ) {
892                  ka_store $ka_key, delete $state{handle};
893               } else {
894                  # no keepalive, destroy the handle
895                  $state{handle}->destroy;
896               }
897            }
898
899            %state = ();
900
901            if (defined $_[1]) {
902               $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
903               $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
904            }
905
906            # set-cookie processing
907            if ($arg{cookie_jar}) {
908               cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
909            }
910
911            if ($redirect && exists $hdr{location}) {
912               # we ignore any errors, as it is very common to receive
913               # Content-Length != 0 but no actual body
914               # we also access %hdr, as $_[1] might be an erro
915               $state{recurse} =
916                  http_request (
917                     $method  => $hdr{location},
918                     %arg,
919                     recurse  => $recurse - 1,
920                     Redirect => [$_[0], \%hdr],
921                     sub {
922                        %state = ();
923                        &$cb
924                     },
925                  );
926            } else {
927               $cb->($_[0], \%hdr);
928            }
929         };
930
931         $ae_error = 597; # body phase
932
933         my $chunked = $hdr{"transfer-encoding"} =~ /\bchunked\b/i; # not quite correct...
934
935         my $len = $chunked ? undef : $hdr{"content-length"};
936
937         # body handling, many different code paths
938         # - no body expected
939         # - want_body_handle
940         # - te chunked
941         # - 2x length known (with or without on_body)
942         # - 2x length not known (with or without on_body)
943         if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
944            $finish->(undef, 598 => "Request cancelled by on_header");
945         } elsif (
946            $hdr{Status} =~ /^(?:1..|204|205|304)$/
947            or $method eq "HEAD"
948            or (defined $len && $len == 0) # == 0, not !, because "0   " is true
949         ) {
950            # no body
951            $finish->("", undef, undef, 1);
952
953         } elsif (!$redirect && $arg{want_body_handle}) {
954            $_[0]->on_eof   (undef);
955            $_[0]->on_error (undef);
956            $_[0]->on_read  (undef);
957
958            $finish->(delete $state{handle});
959
960         } elsif ($chunked) {
961            my $cl = 0;
962            my $body = "";
963            my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
964
965            $state{read_chunk} = sub {
966               $_[1] =~ /^([0-9a-fA-F]+)/
967                  or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
968
969               my $len = hex $1;
970
971               if ($len) {
972                  $cl += $len;
973
974                  $_[0]->push_read (chunk => $len, sub {
975                     $on_body->($_[1], \%hdr)
976                        or return $finish->(undef, 598 => "Request cancelled by on_body");
977
978                     $_[0]->push_read (line => sub {
979                        length $_[1]
980                           and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
981                        $_[0]->push_read (line => $state{read_chunk});
982                     });
983                  });
984               } else {
985                  $hdr{"content-length"} ||= $cl;
986
987                  $_[0]->push_read (line => $qr_nlnl, sub {
988                     if (length $_[1]) {
989                        for ("$_[1]") {
990                           y/\015//d; # weed out any \015, as they show up in the weirdest of places.
991
992                           my $hdr = _parse_hdr
993                              or return $finish->(undef, $ae_error => "Garbled response trailers");
994
995                           %hdr = (%hdr, %$hdr);
996                        }
997                     }
998
999                     $finish->($body, undef, undef, 1);
1000                  });
1001               }
1002            };
1003
1004            $_[0]->push_read (line => $state{read_chunk});
1005
1006         } elsif ($arg{on_body}) {
1007            if (defined $len) {
1008               $_[0]->on_read (sub {
1009                  $len -= length $_[0]{rbuf};
1010
1011                  $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1012                     or return $finish->(undef, 598 => "Request cancelled by on_body");
1013
1014                  $len > 0
1015                     or $finish->("", undef, undef, 1);
1016               });
1017            } else {
1018               $_[0]->on_eof (sub {
1019                  $finish->("");
1020               });
1021               $_[0]->on_read (sub {
1022                  $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1023                     or $finish->(undef, 598 => "Request cancelled by on_body");
1024               });
1025            }
1026         } else {
1027            $_[0]->on_eof (undef);
1028
1029            if (defined $len) {
1030               $_[0]->on_read (sub {
1031                  $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
1032                     if $len <= length $_[0]{rbuf};
1033               });
1034            } else {
1035               $_[0]->on_error (sub {
1036                  ($! == Errno::EPIPE || !$!)
1037                     ? $finish->(delete $_[0]{rbuf})
1038                     : $finish->(undef, $ae_error => $_[2]);
1039               });
1040               $_[0]->on_read (sub { });
1041            }
1042         }
1043      };
1044
1045      # if keepalive is enabled, then the server closing the connection
1046      # before a response can happen legally - we retry on idempotent methods.
1047      if ($was_persistent && $idempotent) {
1048         my $old_eof = $hdl->{on_eof};
1049         $hdl->{on_eof} = sub {
1050            _destroy_state %state;
1051
1052            %state = ();
1053            $state{recurse} =
1054               http_request (
1055                  $method => $url,
1056                  %arg,
1057                  keepalive => 0,
1058                  sub {
1059                     %state = ();
1060                     &$cb
1061                  }
1062               );
1063         };
1064         $hdl->on_read (sub {
1065            return unless %state;
1066
1067            # as soon as we receive something, a connection close
1068            # once more becomes a hard error
1069            $hdl->{on_eof} = $old_eof;
1070            $hdl->push_read (line => $qr_nlnl, $state{read_response});
1071         });
1072      } else {
1073         $hdl->push_read (line => $qr_nlnl, $state{read_response});
1074      }
1075   };
1076
1077   my $prepare_handle = sub {
1078      my ($hdl) = $state{handle};
1079
1080      $hdl->on_error (sub {
1081         _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] };
1082      });
1083      $hdl->on_eof (sub {
1084         _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" };
1085      });
1086      $hdl->timeout_reset;
1087      $hdl->timeout ($timeout);
1088   };
1089
1090   # connected to proxy (or origin server)
1091   my $connect_cb = sub {
1092      my $fh = shift
1093         or return _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "$!" };
1094
1095      return unless delete $state{connect_guard};
1096
1097      # get handle
1098      $state{handle} = new AnyEvent::Handle
1099         %{ $arg{handle_params} },
1100         fh       => $fh,
1101         peername => $uhost,
1102         tls_ctx  => $arg{tls_ctx},
1103      ;
1104
1105      $prepare_handle->();
1106
1107      #$state{handle}->starttls ("connect") if $rscheme eq "https";
1108
1109      # now handle proxy-CONNECT method
1110      if ($proxy && $uscheme eq "https") {
1111         # oh dear, we have to wrap it into a connect request
1112
1113         # maybe re-use $uauthority with patched port?
1114         $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
1115         $state{handle}->push_read (line => $qr_nlnl, sub {
1116            $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
1117               or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" };
1118
1119            if ($2 == 200) {
1120               $rpath = $upath;
1121               $handle_actual_request->();
1122            } else {
1123               _error %state, $cb, { @pseudo, Status => $2, Reason => $3 };
1124            }
1125         });
1126      } else {
1127         $handle_actual_request->();
1128      }
1129   };
1130
1131   _get_slot $uhost, sub {
1132      $state{slot_guard} = shift;
1133
1134      return unless $state{connect_guard};
1135
1136      # try to use an existing keepalive connection, but only if we, ourselves, plan
1137      # on a keepalive request (in theory, this should be a separate config option).
1138      if ($persistent && $KA_CACHE{$ka_key}) {
1139         $was_persistent = 1;
1140
1141         $state{handle} = ka_fetch $ka_key;
1142         $state{handle}->destroyed
1143            and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d#
1144         $prepare_handle->();
1145         $state{handle}->destroyed
1146            and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d#
1147         $handle_actual_request->();
1148
1149      } else {
1150         my $tcp_connect = $arg{tcp_connect}
1151                           || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1152
1153         $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
1154      }
1155   };
1156
1157   defined wantarray && AnyEvent::Util::guard { _destroy_state %state }
1158}
1159
1160sub http_get($@) {
1161   unshift @_, "GET";
1162   &http_request
1163}
1164
1165sub http_head($@) {
1166   unshift @_, "HEAD";
1167   &http_request
1168}
1169
1170sub http_post($$@) {
1171   my $url = shift;
1172   unshift @_, "POST", $url, "body";
1173   &http_request
1174}
1175
1176=back
1177
1178=head2 DNS CACHING
1179
1180AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for
1181the actual connection, which in turn uses AnyEvent::DNS to resolve
1182hostnames. The latter is a simple stub resolver and does no caching
1183on its own. If you want DNS caching, you currently have to provide
1184your own default resolver (by storing a suitable resolver object in
1185C<$AnyEvent::DNS::RESOLVER>) or your own C<tcp_connect> callback.
1186
1187=head2 GLOBAL FUNCTIONS AND VARIABLES
1188
1189=over 4
1190
1191=item AnyEvent::HTTP::set_proxy "proxy-url"
1192
1193Sets the default proxy server to use. The proxy-url must begin with a
1194string of the form C<http://host:port>, croaks otherwise.
1195
1196To clear an already-set proxy, use C<undef>.
1197
1198When AnyEvent::HTTP is laoded for the first time it will query the
1199default proxy from the operating system, currently by looking at
1200C<$ENV{http_proxy>}.
1201
1202=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1203
1204Remove all cookies from the cookie jar that have been expired. If
1205C<$session_end> is given and true, then additionally remove all session
1206cookies.
1207
1208You should call this function (with a true C<$session_end>) before you
1209save cookies to disk, and you should call this function after loading them
1210again. If you have a long-running program you can additonally call this
1211function from time to time.
1212
1213A cookie jar is initially an empty hash-reference that is managed by this
1214module. It's format is subject to change, but currently it is like this:
1215
1216The key C<version> has to contain C<1>, otherwise the hash gets
1217emptied. All other keys are hostnames or IP addresses pointing to
1218hash-references. The key for these inner hash references is the
1219server path for which this cookie is meant, and the values are again
1220hash-references. The keys of those hash-references is the cookie name, and
1221the value, you guessed it, is another hash-reference, this time with the
1222key-value pairs from the cookie, except for C<expires> and C<max-age>,
1223which have been replaced by a C<_expires> key that contains the cookie
1224expiry timestamp.
1225
1226Here is an example of a cookie jar with a single cookie, so you have a
1227chance of understanding the above paragraph:
1228
1229   {
1230      version    => 1,
1231      "10.0.0.1" => {
1232         "/" => {
1233            "mythweb_id" => {
1234              _expires => 1293917923,
1235              value    => "ooRung9dThee3ooyXooM1Ohm",
1236            },
1237         },
1238      },
1239   }
1240
1241=item $date = AnyEvent::HTTP::format_date $timestamp
1242
1243Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
1244Date (RFC 2616).
1245
1246=item $timestamp = AnyEvent::HTTP::parse_date $date
1247
1248Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
1249bunch of minor variations of those, and returns the corresponding POSIX
1250timestamp, or C<undef> if the date cannot be parsed.
1251
1252=item $AnyEvent::HTTP::MAX_RECURSE
1253
1254The default value for the C<recurse> request parameter (default: C<10>).
1255
1256=item $AnyEvent::HTTP::TIMEOUT
1257
1258The default timeout for conenction operations (default: C<300>).
1259
1260=item $AnyEvent::HTTP::USERAGENT
1261
1262The default value for the C<User-Agent> header (the default is
1263C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
1264
1265=item $AnyEvent::HTTP::MAX_PER_HOST
1266
1267The maximum number of concurrent connections to the same host (identified
1268by the hostname). If the limit is exceeded, then the additional requests
1269are queued until previous connections are closed. Both persistent and
1270non-persistent connections are counted in this limit.
1271
1272The default value for this is C<4>, and it is highly advisable to not
1273increase it much.
1274
1275For comparison: the RFC's recommend 4 non-persistent or 2 persistent
1276connections, older browsers used 2, newers (such as firefox 3) typically
1277use 6, and Opera uses 8 because like, they have the fastest browser and
1278give a shit for everybody else on the planet.
1279
1280=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
1281
1282The time after which idle persistent conenctions get closed by
1283AnyEvent::HTTP (default: C<3>).
1284
1285=item $AnyEvent::HTTP::ACTIVE
1286
1287The number of active connections. This is not the number of currently
1288running requests, but the number of currently open and non-idle TCP
1289connections. This number can be useful for load-leveling.
1290
1291=back
1292
1293=cut
1294
1295our @month   = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
1296our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat);
1297
1298sub format_date($) {
1299   my ($time) = @_;
1300
1301   # RFC 822/1123 format
1302   my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time;
1303
1304   sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT",
1305      $weekday[$wday], $mday, $month[$mon], $year + 1900,
1306      $H, $M, $S;
1307}
1308
1309sub parse_date($) {
1310   my ($date) = @_;
1311
1312   my ($d, $m, $y, $H, $M, $S);
1313
1314   if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1315      # RFC 822/1123, required by RFC 2616 (with " ")
1316      # cookie dates (with "-")
1317
1318      ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1319
1320   } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1321      # RFC 850
1322      ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1323
1324   } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1325      # ISO C's asctime
1326      ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1327   }
1328   # other formats fail in the loop below
1329
1330   for (0..11) {
1331      if ($m eq $month[$_]) {
1332         require Time::Local;
1333         return Time::Local::timegm ($S, $M, $H, $d, $_, $y);
1334      }
1335   }
1336
1337   undef
1338}
1339
1340sub set_proxy($) {
1341   if (length $_[0]) {
1342      $_[0] =~ m%^(http):// ([^:/]+) (?: : (\d*) )?%ix
1343         or Carp::croak "$_[0]: invalid proxy URL";
1344      $PROXY = [$2, $3 || 3128, $1]
1345   } else {
1346      undef $PROXY;
1347   }
1348}
1349
1350# initialise proxy from environment
1351eval {
1352   set_proxy $ENV{http_proxy};
1353};
1354
1355=head2 SHOWCASE
1356
1357This section contaisn some more elaborate "real-world" examples or code
1358snippets.
1359
1360=head2 HTTP/1.1 FILE DOWNLOAD
1361
1362Downloading files with HTTP can be quite tricky, especially when something
1363goes wrong and you want to resume.
1364
1365Here is a function that initiates and resumes a download. It uses the
1366last modified time to check for file content changes, and works with many
1367HTTP/1.0 servers as well, and usually falls back to a complete re-download
1368on older servers.
1369
1370It calls the completion callback with either C<undef>, which means a
1371nonretryable error occured, C<0> when the download was partial and should
1372be retried, and C<1> if it was successful.
1373
1374   use AnyEvent::HTTP;
1375
1376   sub download($$$) {
1377      my ($url, $file, $cb) = @_;
1378
1379      open my $fh, "+<", $file
1380         or die "$file: $!";
1381
1382      my %hdr;
1383      my $ofs = 0;
1384
1385      warn stat $fh;
1386      warn -s _;
1387      if (stat $fh and -s _) {
1388         $ofs = -s _;
1389         warn "-s is ", $ofs;
1390         $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1391         $hdr{"range"} = "bytes=$ofs-";
1392      }
1393
1394      http_get $url,
1395         headers   => \%hdr,
1396         on_header => sub {
1397            my ($hdr) = @_;
1398
1399            if ($hdr->{Status} == 200 && $ofs) {
1400               # resume failed
1401               truncate $fh, $ofs = 0;
1402            }
1403
1404            sysseek $fh, $ofs, 0;
1405
1406            1
1407         },
1408         on_body   => sub {
1409            my ($data, $hdr) = @_;
1410
1411            if ($hdr->{Status} =~ /^2/) {
1412               length $data == syswrite $fh, $data
1413                  or return; # abort on write errors
1414            }
1415
1416            1
1417         },
1418         sub {
1419            my (undef, $hdr) = @_;
1420
1421            my $status = $hdr->{Status};
1422
1423            if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) {
1424               utime $fh, $time, $time;
1425            }
1426
1427            if ($status == 200 || $status == 206 || $status == 416) {
1428               # download ok || resume ok || file already fully downloaded
1429               $cb->(1, $hdr);
1430
1431            } elsif ($status == 412) {
1432               # file has changed while resuming, delete and retry
1433               unlink $file;
1434               $cb->(0, $hdr);
1435
1436            } elsif ($status == 500 or $status == 503 or $status =~ /^59/) {
1437               # retry later
1438               $cb->(0, $hdr);
1439
1440            } else {
1441               $cb->(undef, $hdr);
1442            }
1443         }
1444      ;
1445   }
1446
1447   download "http://server/somelargefile", "/tmp/somelargefile", sub {
1448      if ($_[0]) {
1449         print "OK!\n";
1450      } elsif (defined $_[0]) {
1451         print "please retry later\n";
1452      } else {
1453         print "ERROR\n";
1454      }
1455   };
1456
1457=head3 SOCKS PROXIES
1458
1459Socks proxies are not directly supported by AnyEvent::HTTP. You can
1460compile your perl to support socks, or use an external program such as
1461F<socksify> (dante) or F<tsocks> to make your program use a socks proxy
1462transparently.
1463
1464Alternatively, for AnyEvent::HTTP only, you can use your own
1465C<tcp_connect> function that does the proxy handshake - here is an example
1466that works with socks4a proxies:
1467
1468   use Errno;
1469   use AnyEvent::Util;
1470   use AnyEvent::Socket;
1471   use AnyEvent::Handle;
1472
1473   # host, port and username of/for your socks4a proxy
1474   my $socks_host = "10.0.0.23";
1475   my $socks_port = 9050;
1476   my $socks_user = "";
1477
1478   sub socks4a_connect {
1479      my ($host, $port, $connect_cb, $prepare_cb) = @_;
1480
1481      my $hdl = new AnyEvent::Handle
1482         connect    => [$socks_host, $socks_port],
1483         on_prepare => sub { $prepare_cb->($_[0]{fh}) },
1484         on_error   => sub { $connect_cb->() },
1485      ;
1486
1487      $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host);
1488
1489      $hdl->push_read (chunk => 8, sub {
1490         my ($hdl, $chunk) = @_;
1491         my ($status, $port, $ipn) = unpack "xCna4", $chunk;
1492
1493         if ($status == 0x5a) {
1494            $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port");
1495         } else {
1496            $! = Errno::ENXIO; $connect_cb->();
1497         }
1498      });
1499
1500      $hdl
1501   }
1502
1503Use C<socks4a_connect> instead of C<tcp_connect> when doing C<http_request>s,
1504possibly after switching off other proxy types:
1505
1506   AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies
1507
1508   http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub {
1509      my ($data, $headers) = @_;
1510      ...
1511   };
1512
1513=head1 SEE ALSO
1514
1515L<AnyEvent>.
1516
1517=head1 AUTHOR
1518
1519   Marc Lehmann <schmorp@schmorp.de>
1520   http://home.schmorp.de/
1521
1522With many thanks to Дмитрий Шалашов, who provided countless
1523testcases and bugreports.
1524
1525=cut
1526
15271
1528
Note: See TracBrowser for help on using the repository browser.