]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | """HTTP cookie handling for web clients.\r |
2 | \r | |
3 | This module has (now fairly distant) origins in Gisle Aas' Perl module\r | |
4 | HTTP::Cookies, from the libwww-perl library.\r | |
5 | \r | |
6 | Docstrings, comments and debug strings in this code refer to the\r | |
7 | attributes of the HTTP cookie system as cookie-attributes, to distinguish\r | |
8 | them clearly from Python attributes.\r | |
9 | \r | |
10 | Class diagram (note that BSDDBCookieJar and the MSIE* classes are not\r | |
11 | distributed with the Python standard library, but are available from\r | |
12 | http://wwwsearch.sf.net/):\r | |
13 | \r | |
14 | CookieJar____\r | |
15 | / \ \\r | |
16 | FileCookieJar \ \\r | |
17 | / | \ \ \\r | |
18 | MozillaCookieJar | LWPCookieJar \ \\r | |
19 | | | \\r | |
20 | | ---MSIEBase | \\r | |
21 | | / | | \\r | |
22 | | / MSIEDBCookieJar BSDDBCookieJar\r | |
23 | |/\r | |
24 | MSIECookieJar\r | |
25 | \r | |
26 | """\r | |
27 | \r | |
28 | __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',\r | |
29 | 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',\r | |
30 | 'MozillaCookieJar']\r | |
31 | \r | |
32 | import re, urlparse, copy, time, urllib\r | |
33 | try:\r | |
34 | import threading as _threading\r | |
35 | except ImportError:\r | |
36 | import dummy_threading as _threading\r | |
37 | import httplib # only for the default HTTP port\r | |
38 | from calendar import timegm\r | |
39 | \r | |
40 | debug = False # set to True to enable debugging via the logging module\r | |
41 | logger = None\r | |
42 | \r | |
43 | def _debug(*args):\r | |
44 | if not debug:\r | |
45 | return\r | |
46 | global logger\r | |
47 | if not logger:\r | |
48 | import logging\r | |
49 | logger = logging.getLogger("cookielib")\r | |
50 | return logger.debug(*args)\r | |
51 | \r | |
52 | \r | |
53 | DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)\r | |
54 | MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "\r | |
55 | "instance initialised with one)")\r | |
56 | \r | |
57 | def _warn_unhandled_exception():\r | |
58 | # There are a few catch-all except: statements in this module, for\r | |
59 | # catching input that's bad in unexpected ways. Warn if any\r | |
60 | # exceptions are caught there.\r | |
61 | import warnings, traceback, StringIO\r | |
62 | f = StringIO.StringIO()\r | |
63 | traceback.print_exc(None, f)\r | |
64 | msg = f.getvalue()\r | |
65 | warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)\r | |
66 | \r | |
67 | \r | |
68 | # Date/time conversion\r | |
69 | # -----------------------------------------------------------------------------\r | |
70 | \r | |
71 | EPOCH_YEAR = 1970\r | |
72 | def _timegm(tt):\r | |
73 | year, month, mday, hour, min, sec = tt[:6]\r | |
74 | if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and\r | |
75 | (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):\r | |
76 | return timegm(tt)\r | |
77 | else:\r | |
78 | return None\r | |
79 | \r | |
80 | DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]\r | |
81 | MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",\r | |
82 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]\r | |
83 | MONTHS_LOWER = []\r | |
84 | for month in MONTHS: MONTHS_LOWER.append(month.lower())\r | |
85 | \r | |
86 | def time2isoz(t=None):\r | |
87 | """Return a string representing time in seconds since epoch, t.\r | |
88 | \r | |
89 | If the function is called without an argument, it will use the current\r | |
90 | time.\r | |
91 | \r | |
92 | The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",\r | |
93 | representing Universal Time (UTC, aka GMT). An example of this format is:\r | |
94 | \r | |
95 | 1994-11-24 08:49:37Z\r | |
96 | \r | |
97 | """\r | |
98 | if t is None: t = time.time()\r | |
99 | year, mon, mday, hour, min, sec = time.gmtime(t)[:6]\r | |
100 | return "%04d-%02d-%02d %02d:%02d:%02dZ" % (\r | |
101 | year, mon, mday, hour, min, sec)\r | |
102 | \r | |
103 | def time2netscape(t=None):\r | |
104 | """Return a string representing time in seconds since epoch, t.\r | |
105 | \r | |
106 | If the function is called without an argument, it will use the current\r | |
107 | time.\r | |
108 | \r | |
109 | The format of the returned string is like this:\r | |
110 | \r | |
111 | Wed, DD-Mon-YYYY HH:MM:SS GMT\r | |
112 | \r | |
113 | """\r | |
114 | if t is None: t = time.time()\r | |
115 | year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]\r | |
116 | return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (\r | |
117 | DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)\r | |
118 | \r | |
119 | \r | |
120 | UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}\r | |
121 | \r | |
122 | TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")\r | |
123 | def offset_from_tz_string(tz):\r | |
124 | offset = None\r | |
125 | if tz in UTC_ZONES:\r | |
126 | offset = 0\r | |
127 | else:\r | |
128 | m = TIMEZONE_RE.search(tz)\r | |
129 | if m:\r | |
130 | offset = 3600 * int(m.group(2))\r | |
131 | if m.group(3):\r | |
132 | offset = offset + 60 * int(m.group(3))\r | |
133 | if m.group(1) == '-':\r | |
134 | offset = -offset\r | |
135 | return offset\r | |
136 | \r | |
137 | def _str2time(day, mon, yr, hr, min, sec, tz):\r | |
138 | # translate month name to number\r | |
139 | # month numbers start with 1 (January)\r | |
140 | try:\r | |
141 | mon = MONTHS_LOWER.index(mon.lower())+1\r | |
142 | except ValueError:\r | |
143 | # maybe it's already a number\r | |
144 | try:\r | |
145 | imon = int(mon)\r | |
146 | except ValueError:\r | |
147 | return None\r | |
148 | if 1 <= imon <= 12:\r | |
149 | mon = imon\r | |
150 | else:\r | |
151 | return None\r | |
152 | \r | |
153 | # make sure clock elements are defined\r | |
154 | if hr is None: hr = 0\r | |
155 | if min is None: min = 0\r | |
156 | if sec is None: sec = 0\r | |
157 | \r | |
158 | yr = int(yr)\r | |
159 | day = int(day)\r | |
160 | hr = int(hr)\r | |
161 | min = int(min)\r | |
162 | sec = int(sec)\r | |
163 | \r | |
164 | if yr < 1000:\r | |
165 | # find "obvious" year\r | |
166 | cur_yr = time.localtime(time.time())[0]\r | |
167 | m = cur_yr % 100\r | |
168 | tmp = yr\r | |
169 | yr = yr + cur_yr - m\r | |
170 | m = m - tmp\r | |
171 | if abs(m) > 50:\r | |
172 | if m > 0: yr = yr + 100\r | |
173 | else: yr = yr - 100\r | |
174 | \r | |
175 | # convert UTC time tuple to seconds since epoch (not timezone-adjusted)\r | |
176 | t = _timegm((yr, mon, day, hr, min, sec, tz))\r | |
177 | \r | |
178 | if t is not None:\r | |
179 | # adjust time using timezone string, to get absolute time since epoch\r | |
180 | if tz is None:\r | |
181 | tz = "UTC"\r | |
182 | tz = tz.upper()\r | |
183 | offset = offset_from_tz_string(tz)\r | |
184 | if offset is None:\r | |
185 | return None\r | |
186 | t = t - offset\r | |
187 | \r | |
188 | return t\r | |
189 | \r | |
190 | STRICT_DATE_RE = re.compile(\r | |
191 | r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "\r | |
192 | "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")\r | |
193 | WEEKDAY_RE = re.compile(\r | |
194 | r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)\r | |
195 | LOOSE_HTTP_DATE_RE = re.compile(\r | |
196 | r"""^\r | |
197 | (\d\d?) # day\r | |
198 | (?:\s+|[-\/])\r | |
199 | (\w+) # month\r | |
200 | (?:\s+|[-\/])\r | |
201 | (\d+) # year\r | |
202 | (?:\r | |
203 | (?:\s+|:) # separator before clock\r | |
204 | (\d\d?):(\d\d) # hour:min\r | |
205 | (?::(\d\d))? # optional seconds\r | |
206 | )? # optional clock\r | |
207 | \s*\r | |
208 | ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone\r | |
209 | \s*\r | |
210 | (?:\(\w+\))? # ASCII representation of timezone in parens.\r | |
211 | \s*$""", re.X)\r | |
212 | def http2time(text):\r | |
213 | """Returns time in seconds since epoch of time represented by a string.\r | |
214 | \r | |
215 | Return value is an integer.\r | |
216 | \r | |
217 | None is returned if the format of str is unrecognized, the time is outside\r | |
218 | the representable range, or the timezone string is not recognized. If the\r | |
219 | string contains no timezone, UTC is assumed.\r | |
220 | \r | |
221 | The timezone in the string may be numerical (like "-0800" or "+0100") or a\r | |
222 | string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the\r | |
223 | timezone strings equivalent to UTC (zero offset) are known to the function.\r | |
224 | \r | |
225 | The function loosely parses the following formats:\r | |
226 | \r | |
227 | Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format\r | |
228 | Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format\r | |
229 | Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format\r | |
230 | 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)\r | |
231 | 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)\r | |
232 | 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)\r | |
233 | \r | |
234 | The parser ignores leading and trailing whitespace. The time may be\r | |
235 | absent.\r | |
236 | \r | |
237 | If the year is given with only 2 digits, the function will select the\r | |
238 | century that makes the year closest to the current date.\r | |
239 | \r | |
240 | """\r | |
241 | # fast exit for strictly conforming string\r | |
242 | m = STRICT_DATE_RE.search(text)\r | |
243 | if m:\r | |
244 | g = m.groups()\r | |
245 | mon = MONTHS_LOWER.index(g[1].lower()) + 1\r | |
246 | tt = (int(g[2]), mon, int(g[0]),\r | |
247 | int(g[3]), int(g[4]), float(g[5]))\r | |
248 | return _timegm(tt)\r | |
249 | \r | |
250 | # No, we need some messy parsing...\r | |
251 | \r | |
252 | # clean up\r | |
253 | text = text.lstrip()\r | |
254 | text = WEEKDAY_RE.sub("", text, 1) # Useless weekday\r | |
255 | \r | |
256 | # tz is time zone specifier string\r | |
257 | day, mon, yr, hr, min, sec, tz = [None]*7\r | |
258 | \r | |
259 | # loose regexp parse\r | |
260 | m = LOOSE_HTTP_DATE_RE.search(text)\r | |
261 | if m is not None:\r | |
262 | day, mon, yr, hr, min, sec, tz = m.groups()\r | |
263 | else:\r | |
264 | return None # bad format\r | |
265 | \r | |
266 | return _str2time(day, mon, yr, hr, min, sec, tz)\r | |
267 | \r | |
268 | ISO_DATE_RE = re.compile(\r | |
269 | """^\r | |
270 | (\d{4}) # year\r | |
271 | [-\/]?\r | |
272 | (\d\d?) # numerical month\r | |
273 | [-\/]?\r | |
274 | (\d\d?) # day\r | |
275 | (?:\r | |
276 | (?:\s+|[-:Tt]) # separator before clock\r | |
277 | (\d\d?):?(\d\d) # hour:min\r | |
278 | (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)\r | |
279 | )? # optional clock\r | |
280 | \s*\r | |
281 | ([-+]?\d\d?:?(:?\d\d)?\r | |
282 | |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)\r | |
283 | \s*$""", re.X)\r | |
284 | def iso2time(text):\r | |
285 | """\r | |
286 | As for http2time, but parses the ISO 8601 formats:\r | |
287 | \r | |
288 | 1994-02-03 14:15:29 -0100 -- ISO 8601 format\r | |
289 | 1994-02-03 14:15:29 -- zone is optional\r | |
290 | 1994-02-03 -- only date\r | |
291 | 1994-02-03T14:15:29 -- Use T as separator\r | |
292 | 19940203T141529Z -- ISO 8601 compact format\r | |
293 | 19940203 -- only date\r | |
294 | \r | |
295 | """\r | |
296 | # clean up\r | |
297 | text = text.lstrip()\r | |
298 | \r | |
299 | # tz is time zone specifier string\r | |
300 | day, mon, yr, hr, min, sec, tz = [None]*7\r | |
301 | \r | |
302 | # loose regexp parse\r | |
303 | m = ISO_DATE_RE.search(text)\r | |
304 | if m is not None:\r | |
305 | # XXX there's an extra bit of the timezone I'm ignoring here: is\r | |
306 | # this the right thing to do?\r | |
307 | yr, mon, day, hr, min, sec, tz, _ = m.groups()\r | |
308 | else:\r | |
309 | return None # bad format\r | |
310 | \r | |
311 | return _str2time(day, mon, yr, hr, min, sec, tz)\r | |
312 | \r | |
313 | \r | |
314 | # Header parsing\r | |
315 | # -----------------------------------------------------------------------------\r | |
316 | \r | |
317 | def unmatched(match):\r | |
318 | """Return unmatched part of re.Match object."""\r | |
319 | start, end = match.span(0)\r | |
320 | return match.string[:start]+match.string[end:]\r | |
321 | \r | |
322 | HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")\r | |
323 | HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")\r | |
324 | HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")\r | |
325 | HEADER_ESCAPE_RE = re.compile(r"\\(.)")\r | |
326 | def split_header_words(header_values):\r | |
327 | r"""Parse header values into a list of lists containing key,value pairs.\r | |
328 | \r | |
329 | The function knows how to deal with ",", ";" and "=" as well as quoted\r | |
330 | values after "=". A list of space separated tokens are parsed as if they\r | |
331 | were separated by ";".\r | |
332 | \r | |
333 | If the header_values passed as argument contains multiple values, then they\r | |
334 | are treated as if they were a single value separated by comma ",".\r | |
335 | \r | |
336 | This means that this function is useful for parsing header fields that\r | |
337 | follow this syntax (BNF as from the HTTP/1.1 specification, but we relax\r | |
338 | the requirement for tokens).\r | |
339 | \r | |
340 | headers = #header\r | |
341 | header = (token | parameter) *( [";"] (token | parameter))\r | |
342 | \r | |
343 | token = 1*<any CHAR except CTLs or separators>\r | |
344 | separators = "(" | ")" | "<" | ">" | "@"\r | |
345 | | "," | ";" | ":" | "\" | <">\r | |
346 | | "/" | "[" | "]" | "?" | "="\r | |
347 | | "{" | "}" | SP | HT\r | |
348 | \r | |
349 | quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )\r | |
350 | qdtext = <any TEXT except <">>\r | |
351 | quoted-pair = "\" CHAR\r | |
352 | \r | |
353 | parameter = attribute "=" value\r | |
354 | attribute = token\r | |
355 | value = token | quoted-string\r | |
356 | \r | |
357 | Each header is represented by a list of key/value pairs. The value for a\r | |
358 | simple token (not part of a parameter) is None. Syntactically incorrect\r | |
359 | headers will not necessarily be parsed as you would want.\r | |
360 | \r | |
361 | This is easier to describe with some examples:\r | |
362 | \r | |
363 | >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])\r | |
364 | [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]\r | |
365 | >>> split_header_words(['text/html; charset="iso-8859-1"'])\r | |
366 | [[('text/html', None), ('charset', 'iso-8859-1')]]\r | |
367 | >>> split_header_words([r'Basic realm="\"foo\bar\""'])\r | |
368 | [[('Basic', None), ('realm', '"foobar"')]]\r | |
369 | \r | |
370 | """\r | |
371 | assert not isinstance(header_values, basestring)\r | |
372 | result = []\r | |
373 | for text in header_values:\r | |
374 | orig_text = text\r | |
375 | pairs = []\r | |
376 | while text:\r | |
377 | m = HEADER_TOKEN_RE.search(text)\r | |
378 | if m:\r | |
379 | text = unmatched(m)\r | |
380 | name = m.group(1)\r | |
381 | m = HEADER_QUOTED_VALUE_RE.search(text)\r | |
382 | if m: # quoted value\r | |
383 | text = unmatched(m)\r | |
384 | value = m.group(1)\r | |
385 | value = HEADER_ESCAPE_RE.sub(r"\1", value)\r | |
386 | else:\r | |
387 | m = HEADER_VALUE_RE.search(text)\r | |
388 | if m: # unquoted value\r | |
389 | text = unmatched(m)\r | |
390 | value = m.group(1)\r | |
391 | value = value.rstrip()\r | |
392 | else:\r | |
393 | # no value, a lone token\r | |
394 | value = None\r | |
395 | pairs.append((name, value))\r | |
396 | elif text.lstrip().startswith(","):\r | |
397 | # concatenated headers, as per RFC 2616 section 4.2\r | |
398 | text = text.lstrip()[1:]\r | |
399 | if pairs: result.append(pairs)\r | |
400 | pairs = []\r | |
401 | else:\r | |
402 | # skip junk\r | |
403 | non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)\r | |
404 | assert nr_junk_chars > 0, (\r | |
405 | "split_header_words bug: '%s', '%s', %s" %\r | |
406 | (orig_text, text, pairs))\r | |
407 | text = non_junk\r | |
408 | if pairs: result.append(pairs)\r | |
409 | return result\r | |
410 | \r | |
411 | HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")\r | |
412 | def join_header_words(lists):\r | |
413 | """Do the inverse (almost) of the conversion done by split_header_words.\r | |
414 | \r | |
415 | Takes a list of lists of (key, value) pairs and produces a single header\r | |
416 | value. Attribute values are quoted if needed.\r | |
417 | \r | |
418 | >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])\r | |
419 | 'text/plain; charset="iso-8859/1"'\r | |
420 | >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])\r | |
421 | 'text/plain, charset="iso-8859/1"'\r | |
422 | \r | |
423 | """\r | |
424 | headers = []\r | |
425 | for pairs in lists:\r | |
426 | attr = []\r | |
427 | for k, v in pairs:\r | |
428 | if v is not None:\r | |
429 | if not re.search(r"^\w+$", v):\r | |
430 | v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \\r | |
431 | v = '"%s"' % v\r | |
432 | k = "%s=%s" % (k, v)\r | |
433 | attr.append(k)\r | |
434 | if attr: headers.append("; ".join(attr))\r | |
435 | return ", ".join(headers)\r | |
436 | \r | |
437 | def _strip_quotes(text):\r | |
438 | if text.startswith('"'):\r | |
439 | text = text[1:]\r | |
440 | if text.endswith('"'):\r | |
441 | text = text[:-1]\r | |
442 | return text\r | |
443 | \r | |
444 | def parse_ns_headers(ns_headers):\r | |
445 | """Ad-hoc parser for Netscape protocol cookie-attributes.\r | |
446 | \r | |
447 | The old Netscape cookie format for Set-Cookie can for instance contain\r | |
448 | an unquoted "," in the expires field, so we have to use this ad-hoc\r | |
449 | parser instead of split_header_words.\r | |
450 | \r | |
451 | XXX This may not make the best possible effort to parse all the crap\r | |
452 | that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient\r | |
453 | parser is probably better, so could do worse than following that if\r | |
454 | this ever gives any trouble.\r | |
455 | \r | |
456 | Currently, this is also used for parsing RFC 2109 cookies.\r | |
457 | \r | |
458 | """\r | |
459 | known_attrs = ("expires", "domain", "path", "secure",\r | |
460 | # RFC 2109 attrs (may turn up in Netscape cookies, too)\r | |
461 | "version", "port", "max-age")\r | |
462 | \r | |
463 | result = []\r | |
464 | for ns_header in ns_headers:\r | |
465 | pairs = []\r | |
466 | version_set = False\r | |
467 | for ii, param in enumerate(re.split(r";\s*", ns_header)):\r | |
468 | param = param.rstrip()\r | |
469 | if param == "": continue\r | |
470 | if "=" not in param:\r | |
471 | k, v = param, None\r | |
472 | else:\r | |
473 | k, v = re.split(r"\s*=\s*", param, 1)\r | |
474 | k = k.lstrip()\r | |
475 | if ii != 0:\r | |
476 | lc = k.lower()\r | |
477 | if lc in known_attrs:\r | |
478 | k = lc\r | |
479 | if k == "version":\r | |
480 | # This is an RFC 2109 cookie.\r | |
481 | v = _strip_quotes(v)\r | |
482 | version_set = True\r | |
483 | if k == "expires":\r | |
484 | # convert expires date to seconds since epoch\r | |
485 | v = http2time(_strip_quotes(v)) # None if invalid\r | |
486 | pairs.append((k, v))\r | |
487 | \r | |
488 | if pairs:\r | |
489 | if not version_set:\r | |
490 | pairs.append(("version", "0"))\r | |
491 | result.append(pairs)\r | |
492 | \r | |
493 | return result\r | |
494 | \r | |
495 | \r | |
496 | IPV4_RE = re.compile(r"\.\d+$")\r | |
497 | def is_HDN(text):\r | |
498 | """Return True if text is a host domain name."""\r | |
499 | # XXX\r | |
500 | # This may well be wrong. Which RFC is HDN defined in, if any (for\r | |
501 | # the purposes of RFC 2965)?\r | |
502 | # For the current implementation, what about IPv6? Remember to look\r | |
503 | # at other uses of IPV4_RE also, if change this.\r | |
504 | if IPV4_RE.search(text):\r | |
505 | return False\r | |
506 | if text == "":\r | |
507 | return False\r | |
508 | if text[0] == "." or text[-1] == ".":\r | |
509 | return False\r | |
510 | return True\r | |
511 | \r | |
512 | def domain_match(A, B):\r | |
513 | """Return True if domain A domain-matches domain B, according to RFC 2965.\r | |
514 | \r | |
515 | A and B may be host domain names or IP addresses.\r | |
516 | \r | |
517 | RFC 2965, section 1:\r | |
518 | \r | |
519 | Host names can be specified either as an IP address or a HDN string.\r | |
520 | Sometimes we compare one host name with another. (Such comparisons SHALL\r | |
521 | be case-insensitive.) Host A's name domain-matches host B's if\r | |
522 | \r | |
523 | * their host name strings string-compare equal; or\r | |
524 | \r | |
525 | * A is a HDN string and has the form NB, where N is a non-empty\r | |
526 | name string, B has the form .B', and B' is a HDN string. (So,\r | |
527 | x.y.com domain-matches .Y.com but not Y.com.)\r | |
528 | \r | |
529 | Note that domain-match is not a commutative operation: a.b.c.com\r | |
530 | domain-matches .c.com, but not the reverse.\r | |
531 | \r | |
532 | """\r | |
533 | # Note that, if A or B are IP addresses, the only relevant part of the\r | |
534 | # definition of the domain-match algorithm is the direct string-compare.\r | |
535 | A = A.lower()\r | |
536 | B = B.lower()\r | |
537 | if A == B:\r | |
538 | return True\r | |
539 | if not is_HDN(A):\r | |
540 | return False\r | |
541 | i = A.rfind(B)\r | |
542 | if i == -1 or i == 0:\r | |
543 | # A does not have form NB, or N is the empty string\r | |
544 | return False\r | |
545 | if not B.startswith("."):\r | |
546 | return False\r | |
547 | if not is_HDN(B[1:]):\r | |
548 | return False\r | |
549 | return True\r | |
550 | \r | |
551 | def liberal_is_HDN(text):\r | |
552 | """Return True if text is a sort-of-like a host domain name.\r | |
553 | \r | |
554 | For accepting/blocking domains.\r | |
555 | \r | |
556 | """\r | |
557 | if IPV4_RE.search(text):\r | |
558 | return False\r | |
559 | return True\r | |
560 | \r | |
561 | def user_domain_match(A, B):\r | |
562 | """For blocking/accepting domains.\r | |
563 | \r | |
564 | A and B may be host domain names or IP addresses.\r | |
565 | \r | |
566 | """\r | |
567 | A = A.lower()\r | |
568 | B = B.lower()\r | |
569 | if not (liberal_is_HDN(A) and liberal_is_HDN(B)):\r | |
570 | if A == B:\r | |
571 | # equal IP addresses\r | |
572 | return True\r | |
573 | return False\r | |
574 | initial_dot = B.startswith(".")\r | |
575 | if initial_dot and A.endswith(B):\r | |
576 | return True\r | |
577 | if not initial_dot and A == B:\r | |
578 | return True\r | |
579 | return False\r | |
580 | \r | |
581 | cut_port_re = re.compile(r":\d+$")\r | |
582 | def request_host(request):\r | |
583 | """Return request-host, as defined by RFC 2965.\r | |
584 | \r | |
585 | Variation from RFC: returned value is lowercased, for convenient\r | |
586 | comparison.\r | |
587 | \r | |
588 | """\r | |
589 | url = request.get_full_url()\r | |
590 | host = urlparse.urlparse(url)[1]\r | |
591 | if host == "":\r | |
592 | host = request.get_header("Host", "")\r | |
593 | \r | |
594 | # remove port, if present\r | |
595 | host = cut_port_re.sub("", host, 1)\r | |
596 | return host.lower()\r | |
597 | \r | |
598 | def eff_request_host(request):\r | |
599 | """Return a tuple (request-host, effective request-host name).\r | |
600 | \r | |
601 | As defined by RFC 2965, except both are lowercased.\r | |
602 | \r | |
603 | """\r | |
604 | erhn = req_host = request_host(request)\r | |
605 | if req_host.find(".") == -1 and not IPV4_RE.search(req_host):\r | |
606 | erhn = req_host + ".local"\r | |
607 | return req_host, erhn\r | |
608 | \r | |
609 | def request_path(request):\r | |
610 | """Path component of request-URI, as defined by RFC 2965."""\r | |
611 | url = request.get_full_url()\r | |
612 | parts = urlparse.urlsplit(url)\r | |
613 | path = escape_path(parts.path)\r | |
614 | if not path.startswith("/"):\r | |
615 | # fix bad RFC 2396 absoluteURI\r | |
616 | path = "/" + path\r | |
617 | return path\r | |
618 | \r | |
619 | def request_port(request):\r | |
620 | host = request.get_host()\r | |
621 | i = host.find(':')\r | |
622 | if i >= 0:\r | |
623 | port = host[i+1:]\r | |
624 | try:\r | |
625 | int(port)\r | |
626 | except ValueError:\r | |
627 | _debug("nonnumeric port: '%s'", port)\r | |
628 | return None\r | |
629 | else:\r | |
630 | port = DEFAULT_HTTP_PORT\r | |
631 | return port\r | |
632 | \r | |
633 | # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't\r | |
634 | # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).\r | |
635 | HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"\r | |
636 | ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")\r | |
637 | def uppercase_escaped_char(match):\r | |
638 | return "%%%s" % match.group(1).upper()\r | |
639 | def escape_path(path):\r | |
640 | """Escape any invalid characters in HTTP URL, and uppercase all escapes."""\r | |
641 | # There's no knowing what character encoding was used to create URLs\r | |
642 | # containing %-escapes, but since we have to pick one to escape invalid\r | |
643 | # path characters, we pick UTF-8, as recommended in the HTML 4.0\r | |
644 | # specification:\r | |
645 | # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1\r | |
646 | # And here, kind of: draft-fielding-uri-rfc2396bis-03\r | |
647 | # (And in draft IRI specification: draft-duerst-iri-05)\r | |
648 | # (And here, for new URI schemes: RFC 2718)\r | |
649 | if isinstance(path, unicode):\r | |
650 | path = path.encode("utf-8")\r | |
651 | path = urllib.quote(path, HTTP_PATH_SAFE)\r | |
652 | path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)\r | |
653 | return path\r | |
654 | \r | |
655 | def reach(h):\r | |
656 | """Return reach of host h, as defined by RFC 2965, section 1.\r | |
657 | \r | |
658 | The reach R of a host name H is defined as follows:\r | |
659 | \r | |
660 | * If\r | |
661 | \r | |
662 | - H is the host domain name of a host; and,\r | |
663 | \r | |
664 | - H has the form A.B; and\r | |
665 | \r | |
666 | - A has no embedded (that is, interior) dots; and\r | |
667 | \r | |
668 | - B has at least one embedded dot, or B is the string "local".\r | |
669 | then the reach of H is .B.\r | |
670 | \r | |
671 | * Otherwise, the reach of H is H.\r | |
672 | \r | |
673 | >>> reach("www.acme.com")\r | |
674 | '.acme.com'\r | |
675 | >>> reach("acme.com")\r | |
676 | 'acme.com'\r | |
677 | >>> reach("acme.local")\r | |
678 | '.local'\r | |
679 | \r | |
680 | """\r | |
681 | i = h.find(".")\r | |
682 | if i >= 0:\r | |
683 | #a = h[:i] # this line is only here to show what a is\r | |
684 | b = h[i+1:]\r | |
685 | i = b.find(".")\r | |
686 | if is_HDN(h) and (i >= 0 or b == "local"):\r | |
687 | return "."+b\r | |
688 | return h\r | |
689 | \r | |
690 | def is_third_party(request):\r | |
691 | """\r | |
692 | \r | |
693 | RFC 2965, section 3.3.6:\r | |
694 | \r | |
695 | An unverifiable transaction is to a third-party host if its request-\r | |
696 | host U does not domain-match the reach R of the request-host O in the\r | |
697 | origin transaction.\r | |
698 | \r | |
699 | """\r | |
700 | req_host = request_host(request)\r | |
701 | if not domain_match(req_host, reach(request.get_origin_req_host())):\r | |
702 | return True\r | |
703 | else:\r | |
704 | return False\r | |
705 | \r | |
706 | \r | |
707 | class Cookie:\r | |
708 | """HTTP Cookie.\r | |
709 | \r | |
710 | This class represents both Netscape and RFC 2965 cookies.\r | |
711 | \r | |
712 | This is deliberately a very simple class. It just holds attributes. It's\r | |
713 | possible to construct Cookie instances that don't comply with the cookie\r | |
714 | standards. CookieJar.make_cookies is the factory function for Cookie\r | |
715 | objects -- it deals with cookie parsing, supplying defaults, and\r | |
716 | normalising to the representation used in this class. CookiePolicy is\r | |
717 | responsible for checking them to see whether they should be accepted from\r | |
718 | and returned to the server.\r | |
719 | \r | |
720 | Note that the port may be present in the headers, but unspecified ("Port"\r | |
721 | rather than"Port=80", for example); if this is the case, port is None.\r | |
722 | \r | |
723 | """\r | |
724 | \r | |
725 | def __init__(self, version, name, value,\r | |
726 | port, port_specified,\r | |
727 | domain, domain_specified, domain_initial_dot,\r | |
728 | path, path_specified,\r | |
729 | secure,\r | |
730 | expires,\r | |
731 | discard,\r | |
732 | comment,\r | |
733 | comment_url,\r | |
734 | rest,\r | |
735 | rfc2109=False,\r | |
736 | ):\r | |
737 | \r | |
738 | if version is not None: version = int(version)\r | |
739 | if expires is not None: expires = int(expires)\r | |
740 | if port is None and port_specified is True:\r | |
741 | raise ValueError("if port is None, port_specified must be false")\r | |
742 | \r | |
743 | self.version = version\r | |
744 | self.name = name\r | |
745 | self.value = value\r | |
746 | self.port = port\r | |
747 | self.port_specified = port_specified\r | |
748 | # normalise case, as per RFC 2965 section 3.3.3\r | |
749 | self.domain = domain.lower()\r | |
750 | self.domain_specified = domain_specified\r | |
751 | # Sigh. We need to know whether the domain given in the\r | |
752 | # cookie-attribute had an initial dot, in order to follow RFC 2965\r | |
753 | # (as clarified in draft errata). Needed for the returned $Domain\r | |
754 | # value.\r | |
755 | self.domain_initial_dot = domain_initial_dot\r | |
756 | self.path = path\r | |
757 | self.path_specified = path_specified\r | |
758 | self.secure = secure\r | |
759 | self.expires = expires\r | |
760 | self.discard = discard\r | |
761 | self.comment = comment\r | |
762 | self.comment_url = comment_url\r | |
763 | self.rfc2109 = rfc2109\r | |
764 | \r | |
765 | self._rest = copy.copy(rest)\r | |
766 | \r | |
767 | def has_nonstandard_attr(self, name):\r | |
768 | return name in self._rest\r | |
769 | def get_nonstandard_attr(self, name, default=None):\r | |
770 | return self._rest.get(name, default)\r | |
771 | def set_nonstandard_attr(self, name, value):\r | |
772 | self._rest[name] = value\r | |
773 | \r | |
774 | def is_expired(self, now=None):\r | |
775 | if now is None: now = time.time()\r | |
776 | if (self.expires is not None) and (self.expires <= now):\r | |
777 | return True\r | |
778 | return False\r | |
779 | \r | |
780 | def __str__(self):\r | |
781 | if self.port is None: p = ""\r | |
782 | else: p = ":"+self.port\r | |
783 | limit = self.domain + p + self.path\r | |
784 | if self.value is not None:\r | |
785 | namevalue = "%s=%s" % (self.name, self.value)\r | |
786 | else:\r | |
787 | namevalue = self.name\r | |
788 | return "<Cookie %s for %s>" % (namevalue, limit)\r | |
789 | \r | |
790 | def __repr__(self):\r | |
791 | args = []\r | |
792 | for name in ("version", "name", "value",\r | |
793 | "port", "port_specified",\r | |
794 | "domain", "domain_specified", "domain_initial_dot",\r | |
795 | "path", "path_specified",\r | |
796 | "secure", "expires", "discard", "comment", "comment_url",\r | |
797 | ):\r | |
798 | attr = getattr(self, name)\r | |
799 | args.append("%s=%s" % (name, repr(attr)))\r | |
800 | args.append("rest=%s" % repr(self._rest))\r | |
801 | args.append("rfc2109=%s" % repr(self.rfc2109))\r | |
802 | return "Cookie(%s)" % ", ".join(args)\r | |
803 | \r | |
804 | \r | |
805 | class CookiePolicy:\r | |
806 | """Defines which cookies get accepted from and returned to server.\r | |
807 | \r | |
808 | May also modify cookies, though this is probably a bad idea.\r | |
809 | \r | |
810 | The subclass DefaultCookiePolicy defines the standard rules for Netscape\r | |
811 | and RFC 2965 cookies -- override that if you want a customised policy.\r | |
812 | \r | |
813 | """\r | |
814 | def set_ok(self, cookie, request):\r | |
815 | """Return true if (and only if) cookie should be accepted from server.\r | |
816 | \r | |
817 | Currently, pre-expired cookies never get this far -- the CookieJar\r | |
818 | class deletes such cookies itself.\r | |
819 | \r | |
820 | """\r | |
821 | raise NotImplementedError()\r | |
822 | \r | |
823 | def return_ok(self, cookie, request):\r | |
824 | """Return true if (and only if) cookie should be returned to server."""\r | |
825 | raise NotImplementedError()\r | |
826 | \r | |
827 | def domain_return_ok(self, domain, request):\r | |
828 | """Return false if cookies should not be returned, given cookie domain.\r | |
829 | """\r | |
830 | return True\r | |
831 | \r | |
832 | def path_return_ok(self, path, request):\r | |
833 | """Return false if cookies should not be returned, given cookie path.\r | |
834 | """\r | |
835 | return True\r | |
836 | \r | |
837 | \r | |
838 | class DefaultCookiePolicy(CookiePolicy):\r | |
839 | """Implements the standard rules for accepting and returning cookies."""\r | |
840 | \r | |
841 | DomainStrictNoDots = 1\r | |
842 | DomainStrictNonDomain = 2\r | |
843 | DomainRFC2965Match = 4\r | |
844 | \r | |
845 | DomainLiberal = 0\r | |
846 | DomainStrict = DomainStrictNoDots|DomainStrictNonDomain\r | |
847 | \r | |
848 | def __init__(self,\r | |
849 | blocked_domains=None, allowed_domains=None,\r | |
850 | netscape=True, rfc2965=False,\r | |
851 | rfc2109_as_netscape=None,\r | |
852 | hide_cookie2=False,\r | |
853 | strict_domain=False,\r | |
854 | strict_rfc2965_unverifiable=True,\r | |
855 | strict_ns_unverifiable=False,\r | |
856 | strict_ns_domain=DomainLiberal,\r | |
857 | strict_ns_set_initial_dollar=False,\r | |
858 | strict_ns_set_path=False,\r | |
859 | ):\r | |
860 | """Constructor arguments should be passed as keyword arguments only."""\r | |
861 | self.netscape = netscape\r | |
862 | self.rfc2965 = rfc2965\r | |
863 | self.rfc2109_as_netscape = rfc2109_as_netscape\r | |
864 | self.hide_cookie2 = hide_cookie2\r | |
865 | self.strict_domain = strict_domain\r | |
866 | self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable\r | |
867 | self.strict_ns_unverifiable = strict_ns_unverifiable\r | |
868 | self.strict_ns_domain = strict_ns_domain\r | |
869 | self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar\r | |
870 | self.strict_ns_set_path = strict_ns_set_path\r | |
871 | \r | |
872 | if blocked_domains is not None:\r | |
873 | self._blocked_domains = tuple(blocked_domains)\r | |
874 | else:\r | |
875 | self._blocked_domains = ()\r | |
876 | \r | |
877 | if allowed_domains is not None:\r | |
878 | allowed_domains = tuple(allowed_domains)\r | |
879 | self._allowed_domains = allowed_domains\r | |
880 | \r | |
881 | def blocked_domains(self):\r | |
882 | """Return the sequence of blocked domains (as a tuple)."""\r | |
883 | return self._blocked_domains\r | |
884 | def set_blocked_domains(self, blocked_domains):\r | |
885 | """Set the sequence of blocked domains."""\r | |
886 | self._blocked_domains = tuple(blocked_domains)\r | |
887 | \r | |
888 | def is_blocked(self, domain):\r | |
889 | for blocked_domain in self._blocked_domains:\r | |
890 | if user_domain_match(domain, blocked_domain):\r | |
891 | return True\r | |
892 | return False\r | |
893 | \r | |
894 | def allowed_domains(self):\r | |
895 | """Return None, or the sequence of allowed domains (as a tuple)."""\r | |
896 | return self._allowed_domains\r | |
897 | def set_allowed_domains(self, allowed_domains):\r | |
898 | """Set the sequence of allowed domains, or None."""\r | |
899 | if allowed_domains is not None:\r | |
900 | allowed_domains = tuple(allowed_domains)\r | |
901 | self._allowed_domains = allowed_domains\r | |
902 | \r | |
903 | def is_not_allowed(self, domain):\r | |
904 | if self._allowed_domains is None:\r | |
905 | return False\r | |
906 | for allowed_domain in self._allowed_domains:\r | |
907 | if user_domain_match(domain, allowed_domain):\r | |
908 | return False\r | |
909 | return True\r | |
910 | \r | |
911 | def set_ok(self, cookie, request):\r | |
912 | """\r | |
913 | If you override .set_ok(), be sure to call this method. If it returns\r | |
914 | false, so should your subclass (assuming your subclass wants to be more\r | |
915 | strict about which cookies to accept).\r | |
916 | \r | |
917 | """\r | |
918 | _debug(" - checking cookie %s=%s", cookie.name, cookie.value)\r | |
919 | \r | |
920 | assert cookie.name is not None\r | |
921 | \r | |
922 | for n in "version", "verifiability", "name", "path", "domain", "port":\r | |
923 | fn_name = "set_ok_"+n\r | |
924 | fn = getattr(self, fn_name)\r | |
925 | if not fn(cookie, request):\r | |
926 | return False\r | |
927 | \r | |
928 | return True\r | |
929 | \r | |
930 | def set_ok_version(self, cookie, request):\r | |
931 | if cookie.version is None:\r | |
932 | # Version is always set to 0 by parse_ns_headers if it's a Netscape\r | |
933 | # cookie, so this must be an invalid RFC 2965 cookie.\r | |
934 | _debug(" Set-Cookie2 without version attribute (%s=%s)",\r | |
935 | cookie.name, cookie.value)\r | |
936 | return False\r | |
937 | if cookie.version > 0 and not self.rfc2965:\r | |
938 | _debug(" RFC 2965 cookies are switched off")\r | |
939 | return False\r | |
940 | elif cookie.version == 0 and not self.netscape:\r | |
941 | _debug(" Netscape cookies are switched off")\r | |
942 | return False\r | |
943 | return True\r | |
944 | \r | |
945 | def set_ok_verifiability(self, cookie, request):\r | |
946 | if request.is_unverifiable() and is_third_party(request):\r | |
947 | if cookie.version > 0 and self.strict_rfc2965_unverifiable:\r | |
948 | _debug(" third-party RFC 2965 cookie during "\r | |
949 | "unverifiable transaction")\r | |
950 | return False\r | |
951 | elif cookie.version == 0 and self.strict_ns_unverifiable:\r | |
952 | _debug(" third-party Netscape cookie during "\r | |
953 | "unverifiable transaction")\r | |
954 | return False\r | |
955 | return True\r | |
956 | \r | |
957 | def set_ok_name(self, cookie, request):\r | |
958 | # Try and stop servers setting V0 cookies designed to hack other\r | |
959 | # servers that know both V0 and V1 protocols.\r | |
960 | if (cookie.version == 0 and self.strict_ns_set_initial_dollar and\r | |
961 | cookie.name.startswith("$")):\r | |
962 | _debug(" illegal name (starts with '$'): '%s'", cookie.name)\r | |
963 | return False\r | |
964 | return True\r | |
965 | \r | |
966 | def set_ok_path(self, cookie, request):\r | |
967 | if cookie.path_specified:\r | |
968 | req_path = request_path(request)\r | |
969 | if ((cookie.version > 0 or\r | |
970 | (cookie.version == 0 and self.strict_ns_set_path)) and\r | |
971 | not req_path.startswith(cookie.path)):\r | |
972 | _debug(" path attribute %s is not a prefix of request "\r | |
973 | "path %s", cookie.path, req_path)\r | |
974 | return False\r | |
975 | return True\r | |
976 | \r | |
977 | def set_ok_domain(self, cookie, request):\r | |
978 | if self.is_blocked(cookie.domain):\r | |
979 | _debug(" domain %s is in user block-list", cookie.domain)\r | |
980 | return False\r | |
981 | if self.is_not_allowed(cookie.domain):\r | |
982 | _debug(" domain %s is not in user allow-list", cookie.domain)\r | |
983 | return False\r | |
984 | if cookie.domain_specified:\r | |
985 | req_host, erhn = eff_request_host(request)\r | |
986 | domain = cookie.domain\r | |
987 | if self.strict_domain and (domain.count(".") >= 2):\r | |
988 | # XXX This should probably be compared with the Konqueror\r | |
989 | # (kcookiejar.cpp) and Mozilla implementations, but it's a\r | |
990 | # losing battle.\r | |
991 | i = domain.rfind(".")\r | |
992 | j = domain.rfind(".", 0, i)\r | |
993 | if j == 0: # domain like .foo.bar\r | |
994 | tld = domain[i+1:]\r | |
995 | sld = domain[j+1:i]\r | |
996 | if sld.lower() in ("co", "ac", "com", "edu", "org", "net",\r | |
997 | "gov", "mil", "int", "aero", "biz", "cat", "coop",\r | |
998 | "info", "jobs", "mobi", "museum", "name", "pro",\r | |
999 | "travel", "eu") and len(tld) == 2:\r | |
1000 | # domain like .co.uk\r | |
1001 | _debug(" country-code second level domain %s", domain)\r | |
1002 | return False\r | |
1003 | if domain.startswith("."):\r | |
1004 | undotted_domain = domain[1:]\r | |
1005 | else:\r | |
1006 | undotted_domain = domain\r | |
1007 | embedded_dots = (undotted_domain.find(".") >= 0)\r | |
1008 | if not embedded_dots and domain != ".local":\r | |
1009 | _debug(" non-local domain %s contains no embedded dot",\r | |
1010 | domain)\r | |
1011 | return False\r | |
1012 | if cookie.version == 0:\r | |
1013 | if (not erhn.endswith(domain) and\r | |
1014 | (not erhn.startswith(".") and\r | |
1015 | not ("."+erhn).endswith(domain))):\r | |
1016 | _debug(" effective request-host %s (even with added "\r | |
1017 | "initial dot) does not end end with %s",\r | |
1018 | erhn, domain)\r | |
1019 | return False\r | |
1020 | if (cookie.version > 0 or\r | |
1021 | (self.strict_ns_domain & self.DomainRFC2965Match)):\r | |
1022 | if not domain_match(erhn, domain):\r | |
1023 | _debug(" effective request-host %s does not domain-match "\r | |
1024 | "%s", erhn, domain)\r | |
1025 | return False\r | |
1026 | if (cookie.version > 0 or\r | |
1027 | (self.strict_ns_domain & self.DomainStrictNoDots)):\r | |
1028 | host_prefix = req_host[:-len(domain)]\r | |
1029 | if (host_prefix.find(".") >= 0 and\r | |
1030 | not IPV4_RE.search(req_host)):\r | |
1031 | _debug(" host prefix %s for domain %s contains a dot",\r | |
1032 | host_prefix, domain)\r | |
1033 | return False\r | |
1034 | return True\r | |
1035 | \r | |
1036 | def set_ok_port(self, cookie, request):\r | |
1037 | if cookie.port_specified:\r | |
1038 | req_port = request_port(request)\r | |
1039 | if req_port is None:\r | |
1040 | req_port = "80"\r | |
1041 | else:\r | |
1042 | req_port = str(req_port)\r | |
1043 | for p in cookie.port.split(","):\r | |
1044 | try:\r | |
1045 | int(p)\r | |
1046 | except ValueError:\r | |
1047 | _debug(" bad port %s (not numeric)", p)\r | |
1048 | return False\r | |
1049 | if p == req_port:\r | |
1050 | break\r | |
1051 | else:\r | |
1052 | _debug(" request port (%s) not found in %s",\r | |
1053 | req_port, cookie.port)\r | |
1054 | return False\r | |
1055 | return True\r | |
1056 | \r | |
1057 | def return_ok(self, cookie, request):\r | |
1058 | """\r | |
1059 | If you override .return_ok(), be sure to call this method. If it\r | |
1060 | returns false, so should your subclass (assuming your subclass wants to\r | |
1061 | be more strict about which cookies to return).\r | |
1062 | \r | |
1063 | """\r | |
1064 | # Path has already been checked by .path_return_ok(), and domain\r | |
1065 | # blocking done by .domain_return_ok().\r | |
1066 | _debug(" - checking cookie %s=%s", cookie.name, cookie.value)\r | |
1067 | \r | |
1068 | for n in "version", "verifiability", "secure", "expires", "port", "domain":\r | |
1069 | fn_name = "return_ok_"+n\r | |
1070 | fn = getattr(self, fn_name)\r | |
1071 | if not fn(cookie, request):\r | |
1072 | return False\r | |
1073 | return True\r | |
1074 | \r | |
1075 | def return_ok_version(self, cookie, request):\r | |
1076 | if cookie.version > 0 and not self.rfc2965:\r | |
1077 | _debug(" RFC 2965 cookies are switched off")\r | |
1078 | return False\r | |
1079 | elif cookie.version == 0 and not self.netscape:\r | |
1080 | _debug(" Netscape cookies are switched off")\r | |
1081 | return False\r | |
1082 | return True\r | |
1083 | \r | |
1084 | def return_ok_verifiability(self, cookie, request):\r | |
1085 | if request.is_unverifiable() and is_third_party(request):\r | |
1086 | if cookie.version > 0 and self.strict_rfc2965_unverifiable:\r | |
1087 | _debug(" third-party RFC 2965 cookie during unverifiable "\r | |
1088 | "transaction")\r | |
1089 | return False\r | |
1090 | elif cookie.version == 0 and self.strict_ns_unverifiable:\r | |
1091 | _debug(" third-party Netscape cookie during unverifiable "\r | |
1092 | "transaction")\r | |
1093 | return False\r | |
1094 | return True\r | |
1095 | \r | |
1096 | def return_ok_secure(self, cookie, request):\r | |
1097 | if cookie.secure and request.get_type() != "https":\r | |
1098 | _debug(" secure cookie with non-secure request")\r | |
1099 | return False\r | |
1100 | return True\r | |
1101 | \r | |
1102 | def return_ok_expires(self, cookie, request):\r | |
1103 | if cookie.is_expired(self._now):\r | |
1104 | _debug(" cookie expired")\r | |
1105 | return False\r | |
1106 | return True\r | |
1107 | \r | |
1108 | def return_ok_port(self, cookie, request):\r | |
1109 | if cookie.port:\r | |
1110 | req_port = request_port(request)\r | |
1111 | if req_port is None:\r | |
1112 | req_port = "80"\r | |
1113 | for p in cookie.port.split(","):\r | |
1114 | if p == req_port:\r | |
1115 | break\r | |
1116 | else:\r | |
1117 | _debug(" request port %s does not match cookie port %s",\r | |
1118 | req_port, cookie.port)\r | |
1119 | return False\r | |
1120 | return True\r | |
1121 | \r | |
1122 | def return_ok_domain(self, cookie, request):\r | |
1123 | req_host, erhn = eff_request_host(request)\r | |
1124 | domain = cookie.domain\r | |
1125 | \r | |
1126 | # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't\r | |
1127 | if (cookie.version == 0 and\r | |
1128 | (self.strict_ns_domain & self.DomainStrictNonDomain) and\r | |
1129 | not cookie.domain_specified and domain != erhn):\r | |
1130 | _debug(" cookie with unspecified domain does not string-compare "\r | |
1131 | "equal to request domain")\r | |
1132 | return False\r | |
1133 | \r | |
1134 | if cookie.version > 0 and not domain_match(erhn, domain):\r | |
1135 | _debug(" effective request-host name %s does not domain-match "\r | |
1136 | "RFC 2965 cookie domain %s", erhn, domain)\r | |
1137 | return False\r | |
1138 | if cookie.version == 0 and not ("."+erhn).endswith(domain):\r | |
1139 | _debug(" request-host %s does not match Netscape cookie domain "\r | |
1140 | "%s", req_host, domain)\r | |
1141 | return False\r | |
1142 | return True\r | |
1143 | \r | |
1144 | def domain_return_ok(self, domain, request):\r | |
1145 | # Liberal check of. This is here as an optimization to avoid\r | |
1146 | # having to load lots of MSIE cookie files unless necessary.\r | |
1147 | req_host, erhn = eff_request_host(request)\r | |
1148 | if not req_host.startswith("."):\r | |
1149 | req_host = "."+req_host\r | |
1150 | if not erhn.startswith("."):\r | |
1151 | erhn = "."+erhn\r | |
1152 | if not (req_host.endswith(domain) or erhn.endswith(domain)):\r | |
1153 | #_debug(" request domain %s does not match cookie domain %s",\r | |
1154 | # req_host, domain)\r | |
1155 | return False\r | |
1156 | \r | |
1157 | if self.is_blocked(domain):\r | |
1158 | _debug(" domain %s is in user block-list", domain)\r | |
1159 | return False\r | |
1160 | if self.is_not_allowed(domain):\r | |
1161 | _debug(" domain %s is not in user allow-list", domain)\r | |
1162 | return False\r | |
1163 | \r | |
1164 | return True\r | |
1165 | \r | |
1166 | def path_return_ok(self, path, request):\r | |
1167 | _debug("- checking cookie path=%s", path)\r | |
1168 | req_path = request_path(request)\r | |
1169 | if not req_path.startswith(path):\r | |
1170 | _debug(" %s does not path-match %s", req_path, path)\r | |
1171 | return False\r | |
1172 | return True\r | |
1173 | \r | |
1174 | \r | |
1175 | def vals_sorted_by_key(adict):\r | |
1176 | keys = adict.keys()\r | |
1177 | keys.sort()\r | |
1178 | return map(adict.get, keys)\r | |
1179 | \r | |
1180 | def deepvalues(mapping):\r | |
1181 | """Iterates over nested mapping, depth-first, in sorted order by key."""\r | |
1182 | values = vals_sorted_by_key(mapping)\r | |
1183 | for obj in values:\r | |
1184 | mapping = False\r | |
1185 | try:\r | |
1186 | obj.items\r | |
1187 | except AttributeError:\r | |
1188 | pass\r | |
1189 | else:\r | |
1190 | mapping = True\r | |
1191 | for subobj in deepvalues(obj):\r | |
1192 | yield subobj\r | |
1193 | if not mapping:\r | |
1194 | yield obj\r | |
1195 | \r | |
1196 | \r | |
1197 | # Used as second parameter to dict.get() method, to distinguish absent\r | |
1198 | # dict key from one with a None value.\r | |
1199 | class Absent: pass\r | |
1200 | \r | |
1201 | class CookieJar:\r | |
1202 | """Collection of HTTP cookies.\r | |
1203 | \r | |
1204 | You may not need to know about this class: try\r | |
1205 | urllib2.build_opener(HTTPCookieProcessor).open(url).\r | |
1206 | \r | |
1207 | """\r | |
1208 | \r | |
1209 | non_word_re = re.compile(r"\W")\r | |
1210 | quote_re = re.compile(r"([\"\\])")\r | |
1211 | strict_domain_re = re.compile(r"\.?[^.]*")\r | |
1212 | domain_re = re.compile(r"[^.]*")\r | |
1213 | dots_re = re.compile(r"^\.+")\r | |
1214 | \r | |
1215 | magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"\r | |
1216 | \r | |
1217 | def __init__(self, policy=None):\r | |
1218 | if policy is None:\r | |
1219 | policy = DefaultCookiePolicy()\r | |
1220 | self._policy = policy\r | |
1221 | \r | |
1222 | self._cookies_lock = _threading.RLock()\r | |
1223 | self._cookies = {}\r | |
1224 | \r | |
1225 | def set_policy(self, policy):\r | |
1226 | self._policy = policy\r | |
1227 | \r | |
1228 | def _cookies_for_domain(self, domain, request):\r | |
1229 | cookies = []\r | |
1230 | if not self._policy.domain_return_ok(domain, request):\r | |
1231 | return []\r | |
1232 | _debug("Checking %s for cookies to return", domain)\r | |
1233 | cookies_by_path = self._cookies[domain]\r | |
1234 | for path in cookies_by_path.keys():\r | |
1235 | if not self._policy.path_return_ok(path, request):\r | |
1236 | continue\r | |
1237 | cookies_by_name = cookies_by_path[path]\r | |
1238 | for cookie in cookies_by_name.values():\r | |
1239 | if not self._policy.return_ok(cookie, request):\r | |
1240 | _debug(" not returning cookie")\r | |
1241 | continue\r | |
1242 | _debug(" it's a match")\r | |
1243 | cookies.append(cookie)\r | |
1244 | return cookies\r | |
1245 | \r | |
1246 | def _cookies_for_request(self, request):\r | |
1247 | """Return a list of cookies to be returned to server."""\r | |
1248 | cookies = []\r | |
1249 | for domain in self._cookies.keys():\r | |
1250 | cookies.extend(self._cookies_for_domain(domain, request))\r | |
1251 | return cookies\r | |
1252 | \r | |
1253 | def _cookie_attrs(self, cookies):\r | |
1254 | """Return a list of cookie-attributes to be returned to server.\r | |
1255 | \r | |
1256 | like ['foo="bar"; $Path="/"', ...]\r | |
1257 | \r | |
1258 | The $Version attribute is also added when appropriate (currently only\r | |
1259 | once per request).\r | |
1260 | \r | |
1261 | """\r | |
1262 | # add cookies in order of most specific (ie. longest) path first\r | |
1263 | cookies.sort(key=lambda arg: len(arg.path), reverse=True)\r | |
1264 | \r | |
1265 | version_set = False\r | |
1266 | \r | |
1267 | attrs = []\r | |
1268 | for cookie in cookies:\r | |
1269 | # set version of Cookie header\r | |
1270 | # XXX\r | |
1271 | # What should it be if multiple matching Set-Cookie headers have\r | |
1272 | # different versions themselves?\r | |
1273 | # Answer: there is no answer; was supposed to be settled by\r | |
1274 | # RFC 2965 errata, but that may never appear...\r | |
1275 | version = cookie.version\r | |
1276 | if not version_set:\r | |
1277 | version_set = True\r | |
1278 | if version > 0:\r | |
1279 | attrs.append("$Version=%s" % version)\r | |
1280 | \r | |
1281 | # quote cookie value if necessary\r | |
1282 | # (not for Netscape protocol, which already has any quotes\r | |
1283 | # intact, due to the poorly-specified Netscape Cookie: syntax)\r | |
1284 | if ((cookie.value is not None) and\r | |
1285 | self.non_word_re.search(cookie.value) and version > 0):\r | |
1286 | value = self.quote_re.sub(r"\\\1", cookie.value)\r | |
1287 | else:\r | |
1288 | value = cookie.value\r | |
1289 | \r | |
1290 | # add cookie-attributes to be returned in Cookie header\r | |
1291 | if cookie.value is None:\r | |
1292 | attrs.append(cookie.name)\r | |
1293 | else:\r | |
1294 | attrs.append("%s=%s" % (cookie.name, value))\r | |
1295 | if version > 0:\r | |
1296 | if cookie.path_specified:\r | |
1297 | attrs.append('$Path="%s"' % cookie.path)\r | |
1298 | if cookie.domain.startswith("."):\r | |
1299 | domain = cookie.domain\r | |
1300 | if (not cookie.domain_initial_dot and\r | |
1301 | domain.startswith(".")):\r | |
1302 | domain = domain[1:]\r | |
1303 | attrs.append('$Domain="%s"' % domain)\r | |
1304 | if cookie.port is not None:\r | |
1305 | p = "$Port"\r | |
1306 | if cookie.port_specified:\r | |
1307 | p = p + ('="%s"' % cookie.port)\r | |
1308 | attrs.append(p)\r | |
1309 | \r | |
1310 | return attrs\r | |
1311 | \r | |
1312 | def add_cookie_header(self, request):\r | |
1313 | """Add correct Cookie: header to request (urllib2.Request object).\r | |
1314 | \r | |
1315 | The Cookie2 header is also added unless policy.hide_cookie2 is true.\r | |
1316 | \r | |
1317 | """\r | |
1318 | _debug("add_cookie_header")\r | |
1319 | self._cookies_lock.acquire()\r | |
1320 | try:\r | |
1321 | \r | |
1322 | self._policy._now = self._now = int(time.time())\r | |
1323 | \r | |
1324 | cookies = self._cookies_for_request(request)\r | |
1325 | \r | |
1326 | attrs = self._cookie_attrs(cookies)\r | |
1327 | if attrs:\r | |
1328 | if not request.has_header("Cookie"):\r | |
1329 | request.add_unredirected_header(\r | |
1330 | "Cookie", "; ".join(attrs))\r | |
1331 | \r | |
1332 | # if necessary, advertise that we know RFC 2965\r | |
1333 | if (self._policy.rfc2965 and not self._policy.hide_cookie2 and\r | |
1334 | not request.has_header("Cookie2")):\r | |
1335 | for cookie in cookies:\r | |
1336 | if cookie.version != 1:\r | |
1337 | request.add_unredirected_header("Cookie2", '$Version="1"')\r | |
1338 | break\r | |
1339 | \r | |
1340 | finally:\r | |
1341 | self._cookies_lock.release()\r | |
1342 | \r | |
1343 | self.clear_expired_cookies()\r | |
1344 | \r | |
1345 | def _normalized_cookie_tuples(self, attrs_set):\r | |
1346 | """Return list of tuples containing normalised cookie information.\r | |
1347 | \r | |
1348 | attrs_set is the list of lists of key,value pairs extracted from\r | |
1349 | the Set-Cookie or Set-Cookie2 headers.\r | |
1350 | \r | |
1351 | Tuples are name, value, standard, rest, where name and value are the\r | |
1352 | cookie name and value, standard is a dictionary containing the standard\r | |
1353 | cookie-attributes (discard, secure, version, expires or max-age,\r | |
1354 | domain, path and port) and rest is a dictionary containing the rest of\r | |
1355 | the cookie-attributes.\r | |
1356 | \r | |
1357 | """\r | |
1358 | cookie_tuples = []\r | |
1359 | \r | |
1360 | boolean_attrs = "discard", "secure"\r | |
1361 | value_attrs = ("version",\r | |
1362 | "expires", "max-age",\r | |
1363 | "domain", "path", "port",\r | |
1364 | "comment", "commenturl")\r | |
1365 | \r | |
1366 | for cookie_attrs in attrs_set:\r | |
1367 | name, value = cookie_attrs[0]\r | |
1368 | \r | |
1369 | # Build dictionary of standard cookie-attributes (standard) and\r | |
1370 | # dictionary of other cookie-attributes (rest).\r | |
1371 | \r | |
1372 | # Note: expiry time is normalised to seconds since epoch. V0\r | |
1373 | # cookies should have the Expires cookie-attribute, and V1 cookies\r | |
1374 | # should have Max-Age, but since V1 includes RFC 2109 cookies (and\r | |
1375 | # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we\r | |
1376 | # accept either (but prefer Max-Age).\r | |
1377 | max_age_set = False\r | |
1378 | \r | |
1379 | bad_cookie = False\r | |
1380 | \r | |
1381 | standard = {}\r | |
1382 | rest = {}\r | |
1383 | for k, v in cookie_attrs[1:]:\r | |
1384 | lc = k.lower()\r | |
1385 | # don't lose case distinction for unknown fields\r | |
1386 | if lc in value_attrs or lc in boolean_attrs:\r | |
1387 | k = lc\r | |
1388 | if k in boolean_attrs and v is None:\r | |
1389 | # boolean cookie-attribute is present, but has no value\r | |
1390 | # (like "discard", rather than "port=80")\r | |
1391 | v = True\r | |
1392 | if k in standard:\r | |
1393 | # only first value is significant\r | |
1394 | continue\r | |
1395 | if k == "domain":\r | |
1396 | if v is None:\r | |
1397 | _debug(" missing value for domain attribute")\r | |
1398 | bad_cookie = True\r | |
1399 | break\r | |
1400 | # RFC 2965 section 3.3.3\r | |
1401 | v = v.lower()\r | |
1402 | if k == "expires":\r | |
1403 | if max_age_set:\r | |
1404 | # Prefer max-age to expires (like Mozilla)\r | |
1405 | continue\r | |
1406 | if v is None:\r | |
1407 | _debug(" missing or invalid value for expires "\r | |
1408 | "attribute: treating as session cookie")\r | |
1409 | continue\r | |
1410 | if k == "max-age":\r | |
1411 | max_age_set = True\r | |
1412 | try:\r | |
1413 | v = int(v)\r | |
1414 | except ValueError:\r | |
1415 | _debug(" missing or invalid (non-numeric) value for "\r | |
1416 | "max-age attribute")\r | |
1417 | bad_cookie = True\r | |
1418 | break\r | |
1419 | # convert RFC 2965 Max-Age to seconds since epoch\r | |
1420 | # XXX Strictly you're supposed to follow RFC 2616\r | |
1421 | # age-calculation rules. Remember that zero Max-Age is a\r | |
1422 | # is a request to discard (old and new) cookie, though.\r | |
1423 | k = "expires"\r | |
1424 | v = self._now + v\r | |
1425 | if (k in value_attrs) or (k in boolean_attrs):\r | |
1426 | if (v is None and\r | |
1427 | k not in ("port", "comment", "commenturl")):\r | |
1428 | _debug(" missing value for %s attribute" % k)\r | |
1429 | bad_cookie = True\r | |
1430 | break\r | |
1431 | standard[k] = v\r | |
1432 | else:\r | |
1433 | rest[k] = v\r | |
1434 | \r | |
1435 | if bad_cookie:\r | |
1436 | continue\r | |
1437 | \r | |
1438 | cookie_tuples.append((name, value, standard, rest))\r | |
1439 | \r | |
1440 | return cookie_tuples\r | |
1441 | \r | |
1442 | def _cookie_from_cookie_tuple(self, tup, request):\r | |
1443 | # standard is dict of standard cookie-attributes, rest is dict of the\r | |
1444 | # rest of them\r | |
1445 | name, value, standard, rest = tup\r | |
1446 | \r | |
1447 | domain = standard.get("domain", Absent)\r | |
1448 | path = standard.get("path", Absent)\r | |
1449 | port = standard.get("port", Absent)\r | |
1450 | expires = standard.get("expires", Absent)\r | |
1451 | \r | |
1452 | # set the easy defaults\r | |
1453 | version = standard.get("version", None)\r | |
1454 | if version is not None:\r | |
1455 | try:\r | |
1456 | version = int(version)\r | |
1457 | except ValueError:\r | |
1458 | return None # invalid version, ignore cookie\r | |
1459 | secure = standard.get("secure", False)\r | |
1460 | # (discard is also set if expires is Absent)\r | |
1461 | discard = standard.get("discard", False)\r | |
1462 | comment = standard.get("comment", None)\r | |
1463 | comment_url = standard.get("commenturl", None)\r | |
1464 | \r | |
1465 | # set default path\r | |
1466 | if path is not Absent and path != "":\r | |
1467 | path_specified = True\r | |
1468 | path = escape_path(path)\r | |
1469 | else:\r | |
1470 | path_specified = False\r | |
1471 | path = request_path(request)\r | |
1472 | i = path.rfind("/")\r | |
1473 | if i != -1:\r | |
1474 | if version == 0:\r | |
1475 | # Netscape spec parts company from reality here\r | |
1476 | path = path[:i]\r | |
1477 | else:\r | |
1478 | path = path[:i+1]\r | |
1479 | if len(path) == 0: path = "/"\r | |
1480 | \r | |
1481 | # set default domain\r | |
1482 | domain_specified = domain is not Absent\r | |
1483 | # but first we have to remember whether it starts with a dot\r | |
1484 | domain_initial_dot = False\r | |
1485 | if domain_specified:\r | |
1486 | domain_initial_dot = bool(domain.startswith("."))\r | |
1487 | if domain is Absent:\r | |
1488 | req_host, erhn = eff_request_host(request)\r | |
1489 | domain = erhn\r | |
1490 | elif not domain.startswith("."):\r | |
1491 | domain = "."+domain\r | |
1492 | \r | |
1493 | # set default port\r | |
1494 | port_specified = False\r | |
1495 | if port is not Absent:\r | |
1496 | if port is None:\r | |
1497 | # Port attr present, but has no value: default to request port.\r | |
1498 | # Cookie should then only be sent back on that port.\r | |
1499 | port = request_port(request)\r | |
1500 | else:\r | |
1501 | port_specified = True\r | |
1502 | port = re.sub(r"\s+", "", port)\r | |
1503 | else:\r | |
1504 | # No port attr present. Cookie can be sent back on any port.\r | |
1505 | port = None\r | |
1506 | \r | |
1507 | # set default expires and discard\r | |
1508 | if expires is Absent:\r | |
1509 | expires = None\r | |
1510 | discard = True\r | |
1511 | elif expires <= self._now:\r | |
1512 | # Expiry date in past is request to delete cookie. This can't be\r | |
1513 | # in DefaultCookiePolicy, because can't delete cookies there.\r | |
1514 | try:\r | |
1515 | self.clear(domain, path, name)\r | |
1516 | except KeyError:\r | |
1517 | pass\r | |
1518 | _debug("Expiring cookie, domain='%s', path='%s', name='%s'",\r | |
1519 | domain, path, name)\r | |
1520 | return None\r | |
1521 | \r | |
1522 | return Cookie(version,\r | |
1523 | name, value,\r | |
1524 | port, port_specified,\r | |
1525 | domain, domain_specified, domain_initial_dot,\r | |
1526 | path, path_specified,\r | |
1527 | secure,\r | |
1528 | expires,\r | |
1529 | discard,\r | |
1530 | comment,\r | |
1531 | comment_url,\r | |
1532 | rest)\r | |
1533 | \r | |
1534 | def _cookies_from_attrs_set(self, attrs_set, request):\r | |
1535 | cookie_tuples = self._normalized_cookie_tuples(attrs_set)\r | |
1536 | \r | |
1537 | cookies = []\r | |
1538 | for tup in cookie_tuples:\r | |
1539 | cookie = self._cookie_from_cookie_tuple(tup, request)\r | |
1540 | if cookie: cookies.append(cookie)\r | |
1541 | return cookies\r | |
1542 | \r | |
1543 | def _process_rfc2109_cookies(self, cookies):\r | |
1544 | rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)\r | |
1545 | if rfc2109_as_ns is None:\r | |
1546 | rfc2109_as_ns = not self._policy.rfc2965\r | |
1547 | for cookie in cookies:\r | |
1548 | if cookie.version == 1:\r | |
1549 | cookie.rfc2109 = True\r | |
1550 | if rfc2109_as_ns:\r | |
1551 | # treat 2109 cookies as Netscape cookies rather than\r | |
1552 | # as RFC2965 cookies\r | |
1553 | cookie.version = 0\r | |
1554 | \r | |
1555 | def make_cookies(self, response, request):\r | |
1556 | """Return sequence of Cookie objects extracted from response object."""\r | |
1557 | # get cookie-attributes for RFC 2965 and Netscape protocols\r | |
1558 | headers = response.info()\r | |
1559 | rfc2965_hdrs = headers.getheaders("Set-Cookie2")\r | |
1560 | ns_hdrs = headers.getheaders("Set-Cookie")\r | |
1561 | \r | |
1562 | rfc2965 = self._policy.rfc2965\r | |
1563 | netscape = self._policy.netscape\r | |
1564 | \r | |
1565 | if ((not rfc2965_hdrs and not ns_hdrs) or\r | |
1566 | (not ns_hdrs and not rfc2965) or\r | |
1567 | (not rfc2965_hdrs and not netscape) or\r | |
1568 | (not netscape and not rfc2965)):\r | |
1569 | return [] # no relevant cookie headers: quick exit\r | |
1570 | \r | |
1571 | try:\r | |
1572 | cookies = self._cookies_from_attrs_set(\r | |
1573 | split_header_words(rfc2965_hdrs), request)\r | |
1574 | except Exception:\r | |
1575 | _warn_unhandled_exception()\r | |
1576 | cookies = []\r | |
1577 | \r | |
1578 | if ns_hdrs and netscape:\r | |
1579 | try:\r | |
1580 | # RFC 2109 and Netscape cookies\r | |
1581 | ns_cookies = self._cookies_from_attrs_set(\r | |
1582 | parse_ns_headers(ns_hdrs), request)\r | |
1583 | except Exception:\r | |
1584 | _warn_unhandled_exception()\r | |
1585 | ns_cookies = []\r | |
1586 | self._process_rfc2109_cookies(ns_cookies)\r | |
1587 | \r | |
1588 | # Look for Netscape cookies (from Set-Cookie headers) that match\r | |
1589 | # corresponding RFC 2965 cookies (from Set-Cookie2 headers).\r | |
1590 | # For each match, keep the RFC 2965 cookie and ignore the Netscape\r | |
1591 | # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are\r | |
1592 | # bundled in with the Netscape cookies for this purpose, which is\r | |
1593 | # reasonable behaviour.\r | |
1594 | if rfc2965:\r | |
1595 | lookup = {}\r | |
1596 | for cookie in cookies:\r | |
1597 | lookup[(cookie.domain, cookie.path, cookie.name)] = None\r | |
1598 | \r | |
1599 | def no_matching_rfc2965(ns_cookie, lookup=lookup):\r | |
1600 | key = ns_cookie.domain, ns_cookie.path, ns_cookie.name\r | |
1601 | return key not in lookup\r | |
1602 | ns_cookies = filter(no_matching_rfc2965, ns_cookies)\r | |
1603 | \r | |
1604 | if ns_cookies:\r | |
1605 | cookies.extend(ns_cookies)\r | |
1606 | \r | |
1607 | return cookies\r | |
1608 | \r | |
1609 | def set_cookie_if_ok(self, cookie, request):\r | |
1610 | """Set a cookie if policy says it's OK to do so."""\r | |
1611 | self._cookies_lock.acquire()\r | |
1612 | try:\r | |
1613 | self._policy._now = self._now = int(time.time())\r | |
1614 | \r | |
1615 | if self._policy.set_ok(cookie, request):\r | |
1616 | self.set_cookie(cookie)\r | |
1617 | \r | |
1618 | \r | |
1619 | finally:\r | |
1620 | self._cookies_lock.release()\r | |
1621 | \r | |
1622 | def set_cookie(self, cookie):\r | |
1623 | """Set a cookie, without checking whether or not it should be set."""\r | |
1624 | c = self._cookies\r | |
1625 | self._cookies_lock.acquire()\r | |
1626 | try:\r | |
1627 | if cookie.domain not in c: c[cookie.domain] = {}\r | |
1628 | c2 = c[cookie.domain]\r | |
1629 | if cookie.path not in c2: c2[cookie.path] = {}\r | |
1630 | c3 = c2[cookie.path]\r | |
1631 | c3[cookie.name] = cookie\r | |
1632 | finally:\r | |
1633 | self._cookies_lock.release()\r | |
1634 | \r | |
1635 | def extract_cookies(self, response, request):\r | |
1636 | """Extract cookies from response, where allowable given the request."""\r | |
1637 | _debug("extract_cookies: %s", response.info())\r | |
1638 | self._cookies_lock.acquire()\r | |
1639 | try:\r | |
1640 | self._policy._now = self._now = int(time.time())\r | |
1641 | \r | |
1642 | for cookie in self.make_cookies(response, request):\r | |
1643 | if self._policy.set_ok(cookie, request):\r | |
1644 | _debug(" setting cookie: %s", cookie)\r | |
1645 | self.set_cookie(cookie)\r | |
1646 | finally:\r | |
1647 | self._cookies_lock.release()\r | |
1648 | \r | |
1649 | def clear(self, domain=None, path=None, name=None):\r | |
1650 | """Clear some cookies.\r | |
1651 | \r | |
1652 | Invoking this method without arguments will clear all cookies. If\r | |
1653 | given a single argument, only cookies belonging to that domain will be\r | |
1654 | removed. If given two arguments, cookies belonging to the specified\r | |
1655 | path within that domain are removed. If given three arguments, then\r | |
1656 | the cookie with the specified name, path and domain is removed.\r | |
1657 | \r | |
1658 | Raises KeyError if no matching cookie exists.\r | |
1659 | \r | |
1660 | """\r | |
1661 | if name is not None:\r | |
1662 | if (domain is None) or (path is None):\r | |
1663 | raise ValueError(\r | |
1664 | "domain and path must be given to remove a cookie by name")\r | |
1665 | del self._cookies[domain][path][name]\r | |
1666 | elif path is not None:\r | |
1667 | if domain is None:\r | |
1668 | raise ValueError(\r | |
1669 | "domain must be given to remove cookies by path")\r | |
1670 | del self._cookies[domain][path]\r | |
1671 | elif domain is not None:\r | |
1672 | del self._cookies[domain]\r | |
1673 | else:\r | |
1674 | self._cookies = {}\r | |
1675 | \r | |
1676 | def clear_session_cookies(self):\r | |
1677 | """Discard all session cookies.\r | |
1678 | \r | |
1679 | Note that the .save() method won't save session cookies anyway, unless\r | |
1680 | you ask otherwise by passing a true ignore_discard argument.\r | |
1681 | \r | |
1682 | """\r | |
1683 | self._cookies_lock.acquire()\r | |
1684 | try:\r | |
1685 | for cookie in self:\r | |
1686 | if cookie.discard:\r | |
1687 | self.clear(cookie.domain, cookie.path, cookie.name)\r | |
1688 | finally:\r | |
1689 | self._cookies_lock.release()\r | |
1690 | \r | |
1691 | def clear_expired_cookies(self):\r | |
1692 | """Discard all expired cookies.\r | |
1693 | \r | |
1694 | You probably don't need to call this method: expired cookies are never\r | |
1695 | sent back to the server (provided you're using DefaultCookiePolicy),\r | |
1696 | this method is called by CookieJar itself every so often, and the\r | |
1697 | .save() method won't save expired cookies anyway (unless you ask\r | |
1698 | otherwise by passing a true ignore_expires argument).\r | |
1699 | \r | |
1700 | """\r | |
1701 | self._cookies_lock.acquire()\r | |
1702 | try:\r | |
1703 | now = time.time()\r | |
1704 | for cookie in self:\r | |
1705 | if cookie.is_expired(now):\r | |
1706 | self.clear(cookie.domain, cookie.path, cookie.name)\r | |
1707 | finally:\r | |
1708 | self._cookies_lock.release()\r | |
1709 | \r | |
1710 | def __iter__(self):\r | |
1711 | return deepvalues(self._cookies)\r | |
1712 | \r | |
1713 | def __len__(self):\r | |
1714 | """Return number of contained cookies."""\r | |
1715 | i = 0\r | |
1716 | for cookie in self: i = i + 1\r | |
1717 | return i\r | |
1718 | \r | |
1719 | def __repr__(self):\r | |
1720 | r = []\r | |
1721 | for cookie in self: r.append(repr(cookie))\r | |
1722 | return "<%s[%s]>" % (self.__class__, ", ".join(r))\r | |
1723 | \r | |
1724 | def __str__(self):\r | |
1725 | r = []\r | |
1726 | for cookie in self: r.append(str(cookie))\r | |
1727 | return "<%s[%s]>" % (self.__class__, ", ".join(r))\r | |
1728 | \r | |
1729 | \r | |
1730 | # derives from IOError for backwards-compatibility with Python 2.4.0\r | |
1731 | class LoadError(IOError): pass\r | |
1732 | \r | |
1733 | class FileCookieJar(CookieJar):\r | |
1734 | """CookieJar that can be loaded from and saved to a file."""\r | |
1735 | \r | |
1736 | def __init__(self, filename=None, delayload=False, policy=None):\r | |
1737 | """\r | |
1738 | Cookies are NOT loaded from the named file until either the .load() or\r | |
1739 | .revert() method is called.\r | |
1740 | \r | |
1741 | """\r | |
1742 | CookieJar.__init__(self, policy)\r | |
1743 | if filename is not None:\r | |
1744 | try:\r | |
1745 | filename+""\r | |
1746 | except:\r | |
1747 | raise ValueError("filename must be string-like")\r | |
1748 | self.filename = filename\r | |
1749 | self.delayload = bool(delayload)\r | |
1750 | \r | |
1751 | def save(self, filename=None, ignore_discard=False, ignore_expires=False):\r | |
1752 | """Save cookies to a file."""\r | |
1753 | raise NotImplementedError()\r | |
1754 | \r | |
1755 | def load(self, filename=None, ignore_discard=False, ignore_expires=False):\r | |
1756 | """Load cookies from a file."""\r | |
1757 | if filename is None:\r | |
1758 | if self.filename is not None: filename = self.filename\r | |
1759 | else: raise ValueError(MISSING_FILENAME_TEXT)\r | |
1760 | \r | |
1761 | f = open(filename)\r | |
1762 | try:\r | |
1763 | self._really_load(f, filename, ignore_discard, ignore_expires)\r | |
1764 | finally:\r | |
1765 | f.close()\r | |
1766 | \r | |
1767 | def revert(self, filename=None,\r | |
1768 | ignore_discard=False, ignore_expires=False):\r | |
1769 | """Clear all cookies and reload cookies from a saved file.\r | |
1770 | \r | |
1771 | Raises LoadError (or IOError) if reversion is not successful; the\r | |
1772 | object's state will not be altered if this happens.\r | |
1773 | \r | |
1774 | """\r | |
1775 | if filename is None:\r | |
1776 | if self.filename is not None: filename = self.filename\r | |
1777 | else: raise ValueError(MISSING_FILENAME_TEXT)\r | |
1778 | \r | |
1779 | self._cookies_lock.acquire()\r | |
1780 | try:\r | |
1781 | \r | |
1782 | old_state = copy.deepcopy(self._cookies)\r | |
1783 | self._cookies = {}\r | |
1784 | try:\r | |
1785 | self.load(filename, ignore_discard, ignore_expires)\r | |
1786 | except (LoadError, IOError):\r | |
1787 | self._cookies = old_state\r | |
1788 | raise\r | |
1789 | \r | |
1790 | finally:\r | |
1791 | self._cookies_lock.release()\r | |
1792 | \r | |
1793 | from _LWPCookieJar import LWPCookieJar, lwp_cookie_str\r | |
1794 | from _MozillaCookieJar import MozillaCookieJar\r |