1 """HTTP/1.1 client library
3 <intro stuff goes here>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
20 | ( putheader() )* endheaders()
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
29 | response.read() | putrequest()
31 Idle Req-started-unread-response
34 response.read() | | ( putheader() )* endheaders()
36 Request-started Req-sent-unread-response
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
69 from array
import array
72 from sys
import py3kwarning
73 from urlparse
import urlsplit
75 with warnings
.catch_warnings():
77 warnings
.filterwarnings("ignore", ".*mimetools has been removed",
82 from cStringIO
import StringIO
84 from StringIO
import StringIO
86 __all__
= ["HTTP", "HTTPResponse", "HTTPConnection",
87 "HTTPException", "NotConnected", "UnknownProtocol",
88 "UnknownTransferEncoding", "UnimplementedFileMode",
89 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
90 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
91 "BadStatusLine", "error", "responses"]
100 _CS_REQ_STARTED
= 'Request-started'
101 _CS_REQ_SENT
= 'Request-sent'
106 SWITCHING_PROTOCOLS
= 101
113 NON_AUTHORITATIVE_INFORMATION
= 203
116 PARTIAL_CONTENT
= 206
121 MULTIPLE_CHOICES
= 300
122 MOVED_PERMANENTLY
= 301
127 TEMPORARY_REDIRECT
= 307
132 PAYMENT_REQUIRED
= 402
135 METHOD_NOT_ALLOWED
= 405
137 PROXY_AUTHENTICATION_REQUIRED
= 407
138 REQUEST_TIMEOUT
= 408
141 LENGTH_REQUIRED
= 411
142 PRECONDITION_FAILED
= 412
143 REQUEST_ENTITY_TOO_LARGE
= 413
144 REQUEST_URI_TOO_LONG
= 414
145 UNSUPPORTED_MEDIA_TYPE
= 415
146 REQUESTED_RANGE_NOT_SATISFIABLE
= 416
147 EXPECTATION_FAILED
= 417
148 UNPROCESSABLE_ENTITY
= 422
150 FAILED_DEPENDENCY
= 424
151 UPGRADE_REQUIRED
= 426
154 INTERNAL_SERVER_ERROR
= 500
155 NOT_IMPLEMENTED
= 501
157 SERVICE_UNAVAILABLE
= 503
158 GATEWAY_TIMEOUT
= 504
159 HTTP_VERSION_NOT_SUPPORTED
= 505
160 INSUFFICIENT_STORAGE
= 507
163 # Mapping status codes to official W3C names
166 101: 'Switching Protocols',
171 203: 'Non-Authoritative Information',
173 205: 'Reset Content',
174 206: 'Partial Content',
176 300: 'Multiple Choices',
177 301: 'Moved Permanently',
183 307: 'Temporary Redirect',
187 402: 'Payment Required',
190 405: 'Method Not Allowed',
191 406: 'Not Acceptable',
192 407: 'Proxy Authentication Required',
193 408: 'Request Timeout',
196 411: 'Length Required',
197 412: 'Precondition Failed',
198 413: 'Request Entity Too Large',
199 414: 'Request-URI Too Long',
200 415: 'Unsupported Media Type',
201 416: 'Requested Range Not Satisfiable',
202 417: 'Expectation Failed',
204 500: 'Internal Server Error',
205 501: 'Not Implemented',
207 503: 'Service Unavailable',
208 504: 'Gateway Timeout',
209 505: 'HTTP Version Not Supported',
212 # maximal amount of data to read at one time in _safe_read
215 # maximal line length when calling readline().
218 class HTTPMessage(mimetools
.Message
):
220 def addheader(self
, key
, value
):
221 """Add header for field key handling repeats."""
222 prev
= self
.dict.get(key
)
224 self
.dict[key
] = value
226 combined
= ", ".join((prev
, value
))
227 self
.dict[key
] = combined
229 def addcontinue(self
, key
, more
):
230 """Add more field data from a continuation line."""
231 prev
= self
.dict[key
]
232 self
.dict[key
] = prev
+ "\n " + more
234 def readheaders(self
):
235 """Read header lines.
237 Read header lines up to the entirely blank line that terminates them.
238 The (normally blank) line that ends the headers is skipped, but not
239 included in the returned list. If a non-header line ends the headers,
240 (which is an error), an attempt is made to backspace over it; it is
241 never included in the returned list.
243 The variable self.status is set to the empty string if all went well,
244 otherwise it is an error message. The variable self.headers is a
245 completely uninterpreted list of lines contained in the header (so
246 printing them will reproduce the header exactly as it appears in the
249 If multiple header fields with the same name occur, they are combined
250 according to the rules in RFC 2616 sec 4.2:
252 Appending each subsequent field-value to the first, each separated
253 by a comma. The order in which header fields with the same field-name
254 are received is significant to the interpretation of the combined
257 # XXX The implementation overrides the readheaders() method of
258 # rfc822.Message. The base class design isn't amenable to
259 # customized behavior here so the method here is a copy of the
260 # base class code with a few small changes.
264 self
.headers
= hlist
= []
268 startofline
= unread
= tell
= None
269 if hasattr(self
.fp
, 'unread'):
270 unread
= self
.fp
.unread
278 startofline
= tell
= None
280 line
= self
.fp
.readline(_MAXLINE
+ 1)
281 if len(line
) > _MAXLINE
:
282 raise LineTooLong("header line")
284 self
.status
= 'EOF in headers'
286 # Skip unix From name time lines
287 if firstline
and line
.startswith('From '):
288 self
.unixfrom
= self
.unixfrom
+ line
291 if headerseen
and line
[0] in ' \t':
292 # XXX Not sure if continuation lines are handled properly
293 # for http and/or for repeating headers
294 # It's a continuation line.
296 self
.addcontinue(headerseen
, line
.strip())
298 elif self
.iscomment(line
):
299 # It's a comment. Ignore it.
301 elif self
.islast(line
):
302 # Note! No pushback here! The delimiter line gets eaten.
304 headerseen
= self
.isheader(line
)
306 # It's a legal header line, save it.
308 self
.addheader(headerseen
, line
[len(headerseen
)+1:].strip())
311 # It's not a header line; throw it back and stop here.
313 self
.status
= 'No headers'
315 self
.status
= 'Non-header line where header expected'
316 # Try to undo the read.
320 self
.fp
.seek(startofline
)
322 self
.status
= self
.status
+ '; bad seek'
327 # strict: If true, raise BadStatusLine if the status line can't be
328 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
329 # false because it prevents clients from talking to HTTP/0.9
330 # servers. Note that a response with a sufficiently corrupted
331 # status line will look like an HTTP/0.9 response.
333 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
335 def __init__(self
, sock
, debuglevel
=0, strict
=0, method
=None, buffering
=False):
337 # The caller won't be using any sock.recv() calls, so buffering
338 # is fine and recommended for performance.
339 self
.fp
= sock
.makefile('rb')
341 # The buffer size is specified as zero, because the headers of
342 # the response are read with readline(). If the reads were
343 # buffered the readline() calls could consume some of the
344 # response, which make be read via a recv() on the underlying
346 self
.fp
= sock
.makefile('rb', 0)
347 self
.debuglevel
= debuglevel
349 self
._method
= method
353 # from the Status-Line of the response
354 self
.version
= _UNKNOWN
# HTTP-Version
355 self
.status
= _UNKNOWN
# Status-Code
356 self
.reason
= _UNKNOWN
# Reason-Phrase
358 self
.chunked
= _UNKNOWN
# is "chunked" being used?
359 self
.chunk_left
= _UNKNOWN
# bytes left to read in current chunk
360 self
.length
= _UNKNOWN
# number of bytes left in response
361 self
.will_close
= _UNKNOWN
# conn will close at end of response
363 def _read_status(self
):
364 # Initialize with Simple-Response defaults
365 line
= self
.fp
.readline()
366 if self
.debuglevel
> 0:
367 print "reply:", repr(line
)
369 # Presumably, the server closed the connection before
370 # sending a valid response.
371 raise BadStatusLine(line
)
373 [version
, status
, reason
] = line
.split(None, 2)
376 [version
, status
] = line
.split(None, 1)
379 # empty version will cause next test to fail and status
380 # will be treated as 0.9 response.
382 if not version
.startswith('HTTP/'):
385 raise BadStatusLine(line
)
387 # assume it's a Simple-Response from an 0.9 server
388 self
.fp
= LineAndFileWrapper(line
, self
.fp
)
389 return "HTTP/0.9", 200, ""
391 # The status code is a three-digit number
394 if status
< 100 or status
> 999:
395 raise BadStatusLine(line
)
397 raise BadStatusLine(line
)
398 return version
, status
, reason
401 if self
.msg
is not None:
402 # we've already started reading the response
405 # read until we get a non-100 response
407 version
, status
, reason
= self
._read
_status
()
408 if status
!= CONTINUE
:
410 # skip the header from the 100 response
412 skip
= self
.fp
.readline(_MAXLINE
+ 1)
413 if len(skip
) > _MAXLINE
:
414 raise LineTooLong("header line")
418 if self
.debuglevel
> 0:
419 print "header:", skip
422 self
.reason
= reason
.strip()
423 if version
== 'HTTP/1.0':
425 elif version
.startswith('HTTP/1.'):
426 self
.version
= 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
427 elif version
== 'HTTP/0.9':
430 raise UnknownProtocol(version
)
432 if self
.version
== 9:
436 self
.msg
= HTTPMessage(StringIO())
439 self
.msg
= HTTPMessage(self
.fp
, 0)
440 if self
.debuglevel
> 0:
441 for hdr
in self
.msg
.headers
:
442 print "header:", hdr
,
444 # don't let the msg keep an fp
447 # are we using the chunked-style of transfer encoding?
448 tr_enc
= self
.msg
.getheader('transfer-encoding')
449 if tr_enc
and tr_enc
.lower() == "chunked":
451 self
.chunk_left
= None
455 # will the connection close at the end of the response?
456 self
.will_close
= self
._check
_close
()
458 # do we have a Content-Length?
459 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
460 length
= self
.msg
.getheader('content-length')
461 if length
and not self
.chunked
:
463 self
.length
= int(length
)
467 if self
.length
< 0: # ignore nonsensical negative lengths
472 # does the body have a fixed length? (of zero)
473 if (status
== NO_CONTENT
or status
== NOT_MODIFIED
or
474 100 <= status
< 200 or # 1xx codes
475 self
._method
== 'HEAD'):
478 # if the connection remains open, and we aren't using chunked, and
479 # a content-length was not provided, then assume that the connection
481 if not self
.will_close
and \
482 not self
.chunked
and \
486 def _check_close(self
):
487 conn
= self
.msg
.getheader('connection')
488 if self
.version
== 11:
489 # An HTTP/1.1 proxy is assumed to stay open unless
491 conn
= self
.msg
.getheader('connection')
492 if conn
and "close" in conn
.lower():
496 # Some HTTP/1.0 implementations have support for persistent
497 # connections, using rules different than HTTP/1.1.
499 # For older HTTP, Keep-Alive indicates persistent connection.
500 if self
.msg
.getheader('keep-alive'):
503 # At least Akamai returns a "Connection: Keep-Alive" header,
504 # which was supposed to be sent by the client.
505 if conn
and "keep-alive" in conn
.lower():
508 # Proxy-Connection is a netscape hack.
509 pconn
= self
.msg
.getheader('proxy-connection')
510 if pconn
and "keep-alive" in pconn
.lower():
513 # otherwise, assume it will close
522 # NOTE: it is possible that we will not ever call self.close(). This
523 # case occurs when will_close is TRUE, length is None, and we
524 # read up to the last byte, but NOT past it.
526 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
527 # called, meaning self.isclosed() is meaningful.
528 return self
.fp
is None
530 # XXX It would be nice to have readline and __iter__ for this, too.
532 def read(self
, amt
=None):
536 if self
._method
== 'HEAD':
541 return self
._read
_chunked
(amt
)
545 if self
.length
is None:
548 s
= self
._safe
_read
(self
.length
)
550 self
.close() # we read everything
553 if self
.length
is not None:
554 if amt
> self
.length
:
555 # clip the read to the "end of response"
558 # we do not use _safe_read() here because this may be a .will_close
559 # connection, and the user is reading more bytes than will be provided
560 # (for example, reading in 1k chunks)
561 s
= self
.fp
.read(amt
)
562 if self
.length
is not None:
563 self
.length
-= len(s
)
568 def _read_chunked(self
, amt
):
569 assert self
.chunked
!= _UNKNOWN
570 chunk_left
= self
.chunk_left
573 if chunk_left
is None:
574 line
= self
.fp
.readline(_MAXLINE
+ 1)
575 if len(line
) > _MAXLINE
:
576 raise LineTooLong("chunk size")
579 line
= line
[:i
] # strip chunk-extensions
581 chunk_left
= int(line
, 16)
583 # close the connection as protocol synchronisation is
586 raise IncompleteRead(''.join(value
))
590 value
.append(self
._safe
_read
(chunk_left
))
591 elif amt
< chunk_left
:
592 value
.append(self
._safe
_read
(amt
))
593 self
.chunk_left
= chunk_left
- amt
594 return ''.join(value
)
595 elif amt
== chunk_left
:
596 value
.append(self
._safe
_read
(amt
))
597 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
598 self
.chunk_left
= None
599 return ''.join(value
)
601 value
.append(self
._safe
_read
(chunk_left
))
604 # we read the whole chunk, get another
605 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
608 # read and discard trailer up to the CRLF terminator
609 ### note: we shouldn't have any trailers!
611 line
= self
.fp
.readline(_MAXLINE
+ 1)
612 if len(line
) > _MAXLINE
:
613 raise LineTooLong("trailer line")
615 # a vanishingly small number of sites EOF without
616 # sending the trailer
621 # we read everything; close the "file"
624 return ''.join(value
)
626 def _safe_read(self
, amt
):
627 """Read the number of bytes requested, compensating for partial reads.
629 Normally, we have a blocking socket, but a read() can be interrupted
630 by a signal (resulting in a partial read).
632 Note that we cannot distinguish between EOF and an interrupt when zero
633 bytes have been read. IncompleteRead() will be raised in this
636 This function should be used when <amt> bytes "should" be present for
637 reading. If the bytes are truly not available (due to EOF), then the
638 IncompleteRead exception can be used to detect the problem.
640 # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
641 # return less than x bytes unless EOF is encountered. It now handles
642 # signal interruptions (socket.error EINTR) internally. This code
643 # never caught that exception anyways. It seems largely pointless.
644 # self.fp.read(amt) will work fine.
647 chunk
= self
.fp
.read(min(amt
, MAXAMOUNT
))
649 raise IncompleteRead(''.join(s
), amt
)
655 return self
.fp
.fileno()
657 def getheader(self
, name
, default
=None):
659 raise ResponseNotReady()
660 return self
.msg
.getheader(name
, default
)
662 def getheaders(self
):
663 """Return list of (header, value) tuples."""
665 raise ResponseNotReady()
666 return self
.msg
.items()
669 class HTTPConnection
:
672 _http_vsn_str
= 'HTTP/1.1'
674 response_class
= HTTPResponse
675 default_port
= HTTP_PORT
680 def __init__(self
, host
, port
=None, strict
=None,
681 timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
, source_address
=None):
682 self
.timeout
= timeout
683 self
.source_address
= source_address
686 self
.__response
= None
687 self
.__state
= _CS_IDLE
689 self
._tunnel
_host
= None
690 self
._tunnel
_port
= None
691 self
._tunnel
_headers
= {}
693 self
._set
_hostport
(host
, port
)
694 if strict
is not None:
697 def set_tunnel(self
, host
, port
=None, headers
=None):
698 """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
700 The headers argument should be a mapping of extra HTTP headers
701 to send with the CONNECT request.
703 self
._tunnel
_host
= host
704 self
._tunnel
_port
= port
706 self
._tunnel
_headers
= headers
708 self
._tunnel
_headers
.clear()
710 def _set_hostport(self
, host
, port
):
713 j
= host
.rfind(']') # ipv6 addresses have [...]
716 port
= int(host
[i
+1:])
718 raise InvalidURL("nonnumeric port: '%s'" % host
[i
+1:])
721 port
= self
.default_port
722 if host
and host
[0] == '[' and host
[-1] == ']':
727 def set_debuglevel(self
, level
):
728 self
.debuglevel
= level
731 self
._set
_hostport
(self
._tunnel
_host
, self
._tunnel
_port
)
732 self
.send("CONNECT %s:%d HTTP/1.0\r\n" % (self
.host
, self
.port
))
733 for header
, value
in self
._tunnel
_headers
.iteritems():
734 self
.send("%s: %s\r\n" % (header
, value
))
736 response
= self
.response_class(self
.sock
, strict
= self
.strict
,
737 method
= self
._method
)
738 (version
, code
, message
) = response
._read
_status
()
742 raise socket
.error("Tunnel connection failed: %d %s" % (code
,
745 line
= response
.fp
.readline(_MAXLINE
+ 1)
746 if len(line
) > _MAXLINE
:
747 raise LineTooLong("header line")
748 if line
== '\r\n': break
752 """Connect to the host and port specified in __init__."""
753 self
.sock
= socket
.create_connection((self
.host
,self
.port
),
754 self
.timeout
, self
.source_address
)
756 if self
._tunnel
_host
:
760 """Close the connection to the HTTP server."""
762 self
.sock
.close() # close it manually... there may be other refs
765 self
.__response
.close()
766 self
.__response
= None
767 self
.__state
= _CS_IDLE
769 def send(self
, data
):
770 """Send `data' to the server."""
771 if self
.sock
is None:
777 if self
.debuglevel
> 0:
778 print "send:", repr(data
)
780 if hasattr(data
,'read') and not isinstance(data
, array
):
781 if self
.debuglevel
> 0: print "sendIng a read()able"
782 datablock
= data
.read(blocksize
)
784 self
.sock
.sendall(datablock
)
785 datablock
= data
.read(blocksize
)
787 self
.sock
.sendall(data
)
789 def _output(self
, s
):
790 """Add a line of output to the current request buffer.
792 Assumes that the line does *not* end with \\r\\n.
794 self
._buffer
.append(s
)
796 def _send_output(self
, message_body
=None):
797 """Send the currently buffered request and clear the buffer.
799 Appends an extra \\r\\n to the buffer.
800 A message_body may be specified, to be appended to the request.
802 self
._buffer
.extend(("", ""))
803 msg
= "\r\n".join(self
._buffer
)
805 # If msg and message_body are sent in a single send() call,
806 # it will avoid performance problems caused by the interaction
807 # between delayed ack and the Nagle algorithm.
808 if isinstance(message_body
, str):
812 if message_body
is not None:
813 #message_body was not a string (i.e. it is a file) and
814 #we must run the risk of Nagle
815 self
.send(message_body
)
817 def putrequest(self
, method
, url
, skip_host
=0, skip_accept_encoding
=0):
818 """Send a request to the server.
820 `method' specifies an HTTP request method, e.g. 'GET'.
821 `url' specifies the object being requested, e.g. '/index.html'.
822 `skip_host' if True does not add automatically a 'Host:' header
823 `skip_accept_encoding' if True does not add automatically an
824 'Accept-Encoding:' header
827 # if a prior response has been completed, then forget about it.
828 if self
.__response
and self
.__response
.isclosed():
829 self
.__response
= None
832 # in certain cases, we cannot issue another request on this connection.
834 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
835 # 2) a response to a previous request has signalled that it is going
836 # to close the connection upon completion.
837 # 3) the headers for the previous response have not been read, thus
838 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
840 # if there is no prior response, then we can request at will.
842 # if point (2) is true, then we will have passed the socket to the
843 # response (effectively meaning, "there is no prior response"), and
844 # will open a new one when a new request is made.
846 # Note: if a prior response exists, then we *can* start a new request.
847 # We are not allowed to begin fetching the response to this new
848 # request, however, until that prior response is complete.
850 if self
.__state
== _CS_IDLE
:
851 self
.__state
= _CS_REQ_STARTED
853 raise CannotSendRequest()
855 # Save the method we use, we need it later in the response phase
856 self
._method
= method
859 hdr
= '%s %s %s' % (method
, url
, self
._http
_vsn
_str
)
863 if self
._http
_vsn
== 11:
864 # Issue some standard headers for better HTTP/1.1 compliance
867 # this header is issued *only* for HTTP/1.1
868 # connections. more specifically, this means it is
869 # only issued when the client uses the new
870 # HTTPConnection() class. backwards-compat clients
871 # will be using HTTP/1.0 and those clients may be
872 # issuing this header themselves. we should NOT issue
873 # it twice; some web servers (such as Apache) barf
874 # when they see two Host: headers
876 # If we need a non-standard port,include it in the
877 # header. If the request is going through a proxy,
878 # but the host of the actual URL, not the host of the
882 if url
.startswith('http'):
883 nil
, netloc
, nil
, nil
, nil
= urlsplit(url
)
887 netloc_enc
= netloc
.encode("ascii")
888 except UnicodeEncodeError:
889 netloc_enc
= netloc
.encode("idna")
890 self
.putheader('Host', netloc_enc
)
893 host_enc
= self
.host
.encode("ascii")
894 except UnicodeEncodeError:
895 host_enc
= self
.host
.encode("idna")
896 # Wrap the IPv6 Host Header with [] (RFC 2732)
897 if host_enc
.find(':') >= 0:
898 host_enc
= "[" + host_enc
+ "]"
899 if self
.port
== self
.default_port
:
900 self
.putheader('Host', host_enc
)
902 self
.putheader('Host', "%s:%s" % (host_enc
, self
.port
))
904 # note: we are assuming that clients will not attempt to set these
905 # headers since *this* library must deal with the
906 # consequences. this also means that when the supporting
907 # libraries are updated to recognize other forms, then this
908 # code should be changed (removed or updated).
910 # we only want a Content-Encoding of "identity" since we don't
911 # support encodings such as x-gzip or x-deflate.
912 if not skip_accept_encoding
:
913 self
.putheader('Accept-Encoding', 'identity')
915 # we can accept "chunked" Transfer-Encodings, but no others
916 # NOTE: no TE header implies *only* "chunked"
917 #self.putheader('TE', 'chunked')
919 # if TE is supplied in the header, then it must appear in a
921 #self.putheader('Connection', 'TE')
924 # For HTTP/1.0, the server will assume "not chunked"
927 def putheader(self
, header
, *values
):
928 """Send a request header line to the server.
930 For example: h.putheader('Accept', 'text/html')
932 if self
.__state
!= _CS_REQ_STARTED
:
933 raise CannotSendHeader()
935 hdr
= '%s: %s' % (header
, '\r\n\t'.join([str(v
) for v
in values
]))
938 def endheaders(self
, message_body
=None):
939 """Indicate that the last header line has been sent to the server.
941 This method sends the request to the server. The optional
942 message_body argument can be used to pass message body
943 associated with the request. The message body will be sent in
944 the same packet as the message headers if possible. The
945 message_body should be a string.
947 if self
.__state
== _CS_REQ_STARTED
:
948 self
.__state
= _CS_REQ_SENT
950 raise CannotSendHeader()
951 self
._send
_output
(message_body
)
953 def request(self
, method
, url
, body
=None, headers
={}):
954 """Send a complete request to the server."""
955 self
._send
_request
(method
, url
, body
, headers
)
957 def _set_content_length(self
, body
):
958 # Set the content-length based on the body.
961 thelen
= str(len(body
))
962 except TypeError, te
:
963 # If this is a file-like object, try to
964 # fstat its file descriptor
966 thelen
= str(os
.fstat(body
.fileno()).st_size
)
967 except (AttributeError, OSError):
968 # Don't send a length if this failed
969 if self
.debuglevel
> 0: print "Cannot stat!!"
971 if thelen
is not None:
972 self
.putheader('Content-Length', thelen
)
974 def _send_request(self
, method
, url
, body
, headers
):
975 # Honor explicitly requested Host: and Accept-Encoding: headers.
976 header_names
= dict.fromkeys([k
.lower() for k
in headers
])
978 if 'host' in header_names
:
979 skips
['skip_host'] = 1
980 if 'accept-encoding' in header_names
:
981 skips
['skip_accept_encoding'] = 1
983 self
.putrequest(method
, url
, **skips
)
985 if body
and ('content-length' not in header_names
):
986 self
._set
_content
_length
(body
)
987 for hdr
, value
in headers
.iteritems():
988 self
.putheader(hdr
, value
)
989 self
.endheaders(body
)
991 def getresponse(self
, buffering
=False):
992 "Get the response from the server."
994 # if a prior response has been completed, then forget about it.
995 if self
.__response
and self
.__response
.isclosed():
996 self
.__response
= None
999 # if a prior response exists, then it must be completed (otherwise, we
1000 # cannot read this response's header to determine the connection-close
1003 # note: if a prior response existed, but was connection-close, then the
1004 # socket and response were made independent of this HTTPConnection
1005 # object since a new request requires that we open a whole new
1008 # this means the prior response had one of two states:
1009 # 1) will_close: this connection was reset and the prior socket and
1010 # response operate independently
1011 # 2) persistent: the response was retained and we await its
1012 # isclosed() status to become true.
1014 if self
.__state
!= _CS_REQ_SENT
or self
.__response
:
1015 raise ResponseNotReady()
1018 kwds
= {"strict":self
.strict
, "method":self
._method
}
1019 if self
.debuglevel
> 0:
1020 args
+= (self
.debuglevel
,)
1022 #only add this keyword if non-default, for compatibility with
1023 #other response_classes.
1024 kwds
["buffering"] = True;
1025 response
= self
.response_class(*args
, **kwds
)
1028 assert response
.will_close
!= _UNKNOWN
1029 self
.__state
= _CS_IDLE
1031 if response
.will_close
:
1032 # this effectively passes the connection to the response
1035 # remember this, so we can tell when it is complete
1036 self
.__response
= response
1042 "Compatibility class with httplib.py from 1.5."
1045 _http_vsn_str
= 'HTTP/1.0'
1049 _connection_class
= HTTPConnection
1051 def __init__(self
, host
='', port
=None, strict
=None):
1052 "Provide a default host, since the superclass requires one."
1054 # some joker passed 0 explicitly, meaning default port
1058 # Note that we may pass an empty string as the host; this will throw
1059 # an error when we attempt to connect. Presumably, the client code
1060 # will call connect before then, with a proper host.
1061 self
._setup
(self
._connection
_class
(host
, port
, strict
))
1063 def _setup(self
, conn
):
1066 # set up delegation to flesh out interface
1067 self
.send
= conn
.send
1068 self
.putrequest
= conn
.putrequest
1069 self
.putheader
= conn
.putheader
1070 self
.endheaders
= conn
.endheaders
1071 self
.set_debuglevel
= conn
.set_debuglevel
1073 conn
._http
_vsn
= self
._http
_vsn
1074 conn
._http
_vsn
_str
= self
._http
_vsn
_str
1078 def connect(self
, host
=None, port
=None):
1079 "Accept arguments to set the host/port, since the superclass doesn't."
1081 if host
is not None:
1082 self
._conn
._set
_hostport
(host
, port
)
1083 self
._conn
.connect()
1086 "Provide a getfile, since the superclass' does not use this concept."
1089 def getreply(self
, buffering
=False):
1090 """Compat definition since superclass does not define it.
1092 Returns a tuple consisting of:
1093 - server status code (e.g. '200' if all goes well)
1094 - server "reason" corresponding to status code
1095 - any RFC822 headers in the response from the server
1099 response
= self
._conn
.getresponse()
1101 #only add this keyword if non-default for compatibility
1102 #with other connection classes
1103 response
= self
._conn
.getresponse(buffering
)
1104 except BadStatusLine
, e
:
1105 ### hmm. if getresponse() ever closes the socket on a bad request,
1106 ### then we are going to have problems with self.sock
1108 ### should we keep this behavior? do people use it?
1109 # keep the socket open (as a file), and return it
1110 self
.file = self
._conn
.sock
.makefile('rb', 0)
1112 # close our socket -- we want to restart after any protocol error
1116 return -1, e
.line
, None
1118 self
.headers
= response
.msg
1119 self
.file = response
.fp
1120 return response
.status
, response
.reason
, response
.msg
1125 # note that self.file == response.fp, which gets closed by the
1126 # superclass. just clear the object ref here.
1127 ### hmm. messy. if status==-1, then self.file is owned by us.
1128 ### well... we aren't explicitly closing, but losing this ref will
1137 class HTTPSConnection(HTTPConnection
):
1138 "This class allows communication via SSL."
1140 default_port
= HTTPS_PORT
1142 def __init__(self
, host
, port
=None, key_file
=None, cert_file
=None,
1143 strict
=None, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
,
1144 source_address
=None):
1145 HTTPConnection
.__init
__(self
, host
, port
, strict
, timeout
,
1147 self
.key_file
= key_file
1148 self
.cert_file
= cert_file
1151 "Connect to a host on a given (SSL) port."
1153 sock
= socket
.create_connection((self
.host
, self
.port
),
1154 self
.timeout
, self
.source_address
)
1155 if self
._tunnel
_host
:
1158 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
)
1160 __all__
.append("HTTPSConnection")
1163 """Compatibility with 1.5 httplib interface
1165 Python 1.5.2 did not have an HTTPS class, but it defined an
1166 interface for sending http requests that is also useful for
1170 _connection_class
= HTTPSConnection
1172 def __init__(self
, host
='', port
=None, key_file
=None, cert_file
=None,
1174 # provide a default host, pass the X509 cert info
1176 # urf. compensate for bad input.
1179 self
._setup
(self
._connection
_class
(host
, port
, key_file
,
1182 # we never actually use these for anything, but we keep them
1183 # here for compatibility with post-1.5.2 CVS.
1184 self
.key_file
= key_file
1185 self
.cert_file
= cert_file
1188 def FakeSocket (sock
, sslobj
):
1189 warnings
.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1190 "Use the result of ssl.wrap_socket() directly instead.",
1191 DeprecationWarning, stacklevel
=2)
1195 class HTTPException(Exception):
1196 # Subclasses that define an __init__ must call Exception.__init__
1197 # or define self.args. Otherwise, str() will fail.
1200 class NotConnected(HTTPException
):
1203 class InvalidURL(HTTPException
):
1206 class UnknownProtocol(HTTPException
):
1207 def __init__(self
, version
):
1208 self
.args
= version
,
1209 self
.version
= version
1211 class UnknownTransferEncoding(HTTPException
):
1214 class UnimplementedFileMode(HTTPException
):
1217 class IncompleteRead(HTTPException
):
1218 def __init__(self
, partial
, expected
=None):
1219 self
.args
= partial
,
1220 self
.partial
= partial
1221 self
.expected
= expected
1223 if self
.expected
is not None:
1224 e
= ', %i more expected' % self
.expected
1227 return 'IncompleteRead(%i bytes read%s)' % (len(self
.partial
), e
)
1231 class ImproperConnectionState(HTTPException
):
1234 class CannotSendRequest(ImproperConnectionState
):
1237 class CannotSendHeader(ImproperConnectionState
):
1240 class ResponseNotReady(ImproperConnectionState
):
1243 class BadStatusLine(HTTPException
):
1244 def __init__(self
, line
):
1250 class LineTooLong(HTTPException
):
1251 def __init__(self
, line_type
):
1252 HTTPException
.__init
__(self
, "got more than %d bytes when reading %s"
1253 % (_MAXLINE
, line_type
))
1255 # for backwards compatibility
1256 error
= HTTPException
1258 class LineAndFileWrapper
:
1259 """A limited file-like object for HTTP/0.9 responses."""
1261 # The status-line parsing code calls readline(), which normally
1262 # get the HTTP status line. For a 0.9 response, however, this is
1263 # actually the first line of the body! Clients need to get a
1264 # readable file object that contains that line.
1266 def __init__(self
, line
, file):
1269 self
._line
_consumed
= 0
1270 self
._line
_offset
= 0
1271 self
._line
_left
= len(line
)
1273 def __getattr__(self
, attr
):
1274 return getattr(self
._file
, attr
)
1277 # called when the last byte is read from the line. After the
1278 # call, all read methods are delegated to the underlying file
1280 self
._line
_consumed
= 1
1281 self
.read
= self
._file
.read
1282 self
.readline
= self
._file
.readline
1283 self
.readlines
= self
._file
.readlines
1285 def read(self
, amt
=None):
1286 if self
._line
_consumed
:
1287 return self
._file
.read(amt
)
1288 assert self
._line
_left
1289 if amt
is None or amt
> self
._line
_left
:
1290 s
= self
._line
[self
._line
_offset
:]
1293 return s
+ self
._file
.read()
1295 return s
+ self
._file
.read(amt
- len(s
))
1297 assert amt
<= self
._line
_left
1298 i
= self
._line
_offset
1301 self
._line
_offset
= j
1302 self
._line
_left
-= amt
1303 if self
._line
_left
== 0:
1308 if self
._line
_consumed
:
1309 return self
._file
.readline()
1310 assert self
._line
_left
1311 s
= self
._line
[self
._line
_offset
:]
1315 def readlines(self
, size
=None):
1316 if self
._line
_consumed
:
1317 return self
._file
.readlines(size
)
1318 assert self
._line
_left
1319 L
= [self
._line
[self
._line
_offset
:]]
1322 return L
+ self
._file
.readlines()
1324 return L
+ self
._file
.readlines(size
)
1327 """Test this module.
1329 A hodge podge of tests collected here, because they have too many
1330 external dependencies for the regular test suite.
1335 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'd')
1338 if o
== '-d': dl
= dl
+ 1
1339 host
= 'www.python.org'
1341 if args
[0:]: host
= args
[0]
1342 if args
[1:]: selector
= args
[1]
1344 h
.set_debuglevel(dl
)
1346 h
.putrequest('GET', selector
)
1348 status
, reason
, headers
= h
.getreply()
1349 print 'status =', status
1350 print 'reason =', reason
1351 print "read", len(h
.getfile().read())
1354 for header
in headers
.headers
: print header
.strip()
1357 # minimal test that code to extract host from url works
1360 _http_vsn_str
= 'HTTP/1.1'
1362 h
= HTTP11('www.python.org')
1363 h
.putrequest('GET', 'http://www.python.org/~jeremy/')
1374 for host
, selector
in (('sourceforge.net', '/projects/python'),
1376 print "https://%s%s" % (host
, selector
)
1378 hs
.set_debuglevel(dl
)
1380 hs
.putrequest('GET', selector
)
1382 status
, reason
, headers
= hs
.getreply()
1383 print 'status =', status
1384 print 'reason =', reason
1385 print "read", len(hs
.getfile().read())
1388 for header
in headers
.headers
: print header
.strip()
1391 if __name__
== '__main__':