]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)\r |
2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php\r | |
3 | # Also licenced under the Apache License, 2.0: http://opensource.org/licenses/apache2.0.php\r | |
4 | # Licensed to PSF under a Contributor Agreement\r | |
5 | """\r | |
6 | Middleware to check for obedience to the WSGI specification.\r | |
7 | \r | |
8 | Some of the things this checks:\r | |
9 | \r | |
10 | * Signature of the application and start_response (including that\r | |
11 | keyword arguments are not used).\r | |
12 | \r | |
13 | * Environment checks:\r | |
14 | \r | |
15 | - Environment is a dictionary (and not a subclass).\r | |
16 | \r | |
17 | - That all the required keys are in the environment: REQUEST_METHOD,\r | |
18 | SERVER_NAME, SERVER_PORT, wsgi.version, wsgi.input, wsgi.errors,\r | |
19 | wsgi.multithread, wsgi.multiprocess, wsgi.run_once\r | |
20 | \r | |
21 | - That HTTP_CONTENT_TYPE and HTTP_CONTENT_LENGTH are not in the\r | |
22 | environment (these headers should appear as CONTENT_LENGTH and\r | |
23 | CONTENT_TYPE).\r | |
24 | \r | |
25 | - Warns if QUERY_STRING is missing, as the cgi module acts\r | |
26 | unpredictably in that case.\r | |
27 | \r | |
28 | - That CGI-style variables (that don't contain a .) have\r | |
29 | (non-unicode) string values\r | |
30 | \r | |
31 | - That wsgi.version is a tuple\r | |
32 | \r | |
33 | - That wsgi.url_scheme is 'http' or 'https' (@@: is this too\r | |
34 | restrictive?)\r | |
35 | \r | |
36 | - Warns if the REQUEST_METHOD is not known (@@: probably too\r | |
37 | restrictive).\r | |
38 | \r | |
39 | - That SCRIPT_NAME and PATH_INFO are empty or start with /\r | |
40 | \r | |
41 | - That at least one of SCRIPT_NAME or PATH_INFO are set.\r | |
42 | \r | |
43 | - That CONTENT_LENGTH is a positive integer.\r | |
44 | \r | |
45 | - That SCRIPT_NAME is not '/' (it should be '', and PATH_INFO should\r | |
46 | be '/').\r | |
47 | \r | |
48 | - That wsgi.input has the methods read, readline, readlines, and\r | |
49 | __iter__\r | |
50 | \r | |
51 | - That wsgi.errors has the methods flush, write, writelines\r | |
52 | \r | |
53 | * The status is a string, contains a space, starts with an integer,\r | |
54 | and that integer is in range (> 100).\r | |
55 | \r | |
56 | * That the headers is a list (not a subclass, not another kind of\r | |
57 | sequence).\r | |
58 | \r | |
59 | * That the items of the headers are tuples of strings.\r | |
60 | \r | |
61 | * That there is no 'status' header (that is used in CGI, but not in\r | |
62 | WSGI).\r | |
63 | \r | |
64 | * That the headers don't contain newlines or colons, end in _ or -, or\r | |
65 | contain characters codes below 037.\r | |
66 | \r | |
67 | * That Content-Type is given if there is content (CGI often has a\r | |
68 | default content type, but WSGI does not).\r | |
69 | \r | |
70 | * That no Content-Type is given when there is no content (@@: is this\r | |
71 | too restrictive?)\r | |
72 | \r | |
73 | * That the exc_info argument to start_response is a tuple or None.\r | |
74 | \r | |
75 | * That all calls to the writer are with strings, and no other methods\r | |
76 | on the writer are accessed.\r | |
77 | \r | |
78 | * That wsgi.input is used properly:\r | |
79 | \r | |
80 | - .read() is called with zero or one argument\r | |
81 | \r | |
82 | - That it returns a string\r | |
83 | \r | |
84 | - That readline, readlines, and __iter__ return strings\r | |
85 | \r | |
86 | - That .close() is not called\r | |
87 | \r | |
88 | - No other methods are provided\r | |
89 | \r | |
90 | * That wsgi.errors is used properly:\r | |
91 | \r | |
92 | - .write() and .writelines() is called with a string\r | |
93 | \r | |
94 | - That .close() is not called, and no other methods are provided.\r | |
95 | \r | |
96 | * The response iterator:\r | |
97 | \r | |
98 | - That it is not a string (it should be a list of a single string; a\r | |
99 | string will work, but perform horribly).\r | |
100 | \r | |
101 | - That .next() returns a string\r | |
102 | \r | |
103 | - That the iterator is not iterated over until start_response has\r | |
104 | been called (that can signal either a server or application\r | |
105 | error).\r | |
106 | \r | |
107 | - That .close() is called (doesn't raise exception, only prints to\r | |
108 | sys.stderr, because we only know it isn't called when the object\r | |
109 | is garbage collected).\r | |
110 | """\r | |
111 | __all__ = ['validator']\r | |
112 | \r | |
113 | \r | |
114 | import re\r | |
115 | import sys\r | |
116 | from types import DictType, StringType, TupleType, ListType\r | |
117 | import warnings\r | |
118 | \r | |
119 | header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$')\r | |
120 | bad_header_value_re = re.compile(r'[\000-\037]')\r | |
121 | \r | |
122 | class WSGIWarning(Warning):\r | |
123 | """\r | |
124 | Raised in response to WSGI-spec-related warnings\r | |
125 | """\r | |
126 | \r | |
127 | def assert_(cond, *args):\r | |
128 | if not cond:\r | |
129 | raise AssertionError(*args)\r | |
130 | \r | |
131 | def validator(application):\r | |
132 | \r | |
133 | """\r | |
134 | When applied between a WSGI server and a WSGI application, this\r | |
135 | middleware will check for WSGI compliancy on a number of levels.\r | |
136 | This middleware does not modify the request or response in any\r | |
137 | way, but will throw an AssertionError if anything seems off\r | |
138 | (except for a failure to close the application iterator, which\r | |
139 | will be printed to stderr -- there's no way to throw an exception\r | |
140 | at that point).\r | |
141 | """\r | |
142 | \r | |
143 | def lint_app(*args, **kw):\r | |
144 | assert_(len(args) == 2, "Two arguments required")\r | |
145 | assert_(not kw, "No keyword arguments allowed")\r | |
146 | environ, start_response = args\r | |
147 | \r | |
148 | check_environ(environ)\r | |
149 | \r | |
150 | # We use this to check if the application returns without\r | |
151 | # calling start_response:\r | |
152 | start_response_started = []\r | |
153 | \r | |
154 | def start_response_wrapper(*args, **kw):\r | |
155 | assert_(len(args) == 2 or len(args) == 3, (\r | |
156 | "Invalid number of arguments: %s" % (args,)))\r | |
157 | assert_(not kw, "No keyword arguments allowed")\r | |
158 | status = args[0]\r | |
159 | headers = args[1]\r | |
160 | if len(args) == 3:\r | |
161 | exc_info = args[2]\r | |
162 | else:\r | |
163 | exc_info = None\r | |
164 | \r | |
165 | check_status(status)\r | |
166 | check_headers(headers)\r | |
167 | check_content_type(status, headers)\r | |
168 | check_exc_info(exc_info)\r | |
169 | \r | |
170 | start_response_started.append(None)\r | |
171 | return WriteWrapper(start_response(*args))\r | |
172 | \r | |
173 | environ['wsgi.input'] = InputWrapper(environ['wsgi.input'])\r | |
174 | environ['wsgi.errors'] = ErrorWrapper(environ['wsgi.errors'])\r | |
175 | \r | |
176 | iterator = application(environ, start_response_wrapper)\r | |
177 | assert_(iterator is not None and iterator != False,\r | |
178 | "The application must return an iterator, if only an empty list")\r | |
179 | \r | |
180 | check_iterator(iterator)\r | |
181 | \r | |
182 | return IteratorWrapper(iterator, start_response_started)\r | |
183 | \r | |
184 | return lint_app\r | |
185 | \r | |
186 | class InputWrapper:\r | |
187 | \r | |
188 | def __init__(self, wsgi_input):\r | |
189 | self.input = wsgi_input\r | |
190 | \r | |
191 | def read(self, *args):\r | |
192 | assert_(len(args) <= 1)\r | |
193 | v = self.input.read(*args)\r | |
194 | assert_(type(v) is type(""))\r | |
195 | return v\r | |
196 | \r | |
197 | def readline(self):\r | |
198 | v = self.input.readline()\r | |
199 | assert_(type(v) is type(""))\r | |
200 | return v\r | |
201 | \r | |
202 | def readlines(self, *args):\r | |
203 | assert_(len(args) <= 1)\r | |
204 | lines = self.input.readlines(*args)\r | |
205 | assert_(type(lines) is type([]))\r | |
206 | for line in lines:\r | |
207 | assert_(type(line) is type(""))\r | |
208 | return lines\r | |
209 | \r | |
210 | def __iter__(self):\r | |
211 | while 1:\r | |
212 | line = self.readline()\r | |
213 | if not line:\r | |
214 | return\r | |
215 | yield line\r | |
216 | \r | |
217 | def close(self):\r | |
218 | assert_(0, "input.close() must not be called")\r | |
219 | \r | |
220 | class ErrorWrapper:\r | |
221 | \r | |
222 | def __init__(self, wsgi_errors):\r | |
223 | self.errors = wsgi_errors\r | |
224 | \r | |
225 | def write(self, s):\r | |
226 | assert_(type(s) is type(""))\r | |
227 | self.errors.write(s)\r | |
228 | \r | |
229 | def flush(self):\r | |
230 | self.errors.flush()\r | |
231 | \r | |
232 | def writelines(self, seq):\r | |
233 | for line in seq:\r | |
234 | self.write(line)\r | |
235 | \r | |
236 | def close(self):\r | |
237 | assert_(0, "errors.close() must not be called")\r | |
238 | \r | |
239 | class WriteWrapper:\r | |
240 | \r | |
241 | def __init__(self, wsgi_writer):\r | |
242 | self.writer = wsgi_writer\r | |
243 | \r | |
244 | def __call__(self, s):\r | |
245 | assert_(type(s) is type(""))\r | |
246 | self.writer(s)\r | |
247 | \r | |
248 | class PartialIteratorWrapper:\r | |
249 | \r | |
250 | def __init__(self, wsgi_iterator):\r | |
251 | self.iterator = wsgi_iterator\r | |
252 | \r | |
253 | def __iter__(self):\r | |
254 | # We want to make sure __iter__ is called\r | |
255 | return IteratorWrapper(self.iterator, None)\r | |
256 | \r | |
257 | class IteratorWrapper:\r | |
258 | \r | |
259 | def __init__(self, wsgi_iterator, check_start_response):\r | |
260 | self.original_iterator = wsgi_iterator\r | |
261 | self.iterator = iter(wsgi_iterator)\r | |
262 | self.closed = False\r | |
263 | self.check_start_response = check_start_response\r | |
264 | \r | |
265 | def __iter__(self):\r | |
266 | return self\r | |
267 | \r | |
268 | def next(self):\r | |
269 | assert_(not self.closed,\r | |
270 | "Iterator read after closed")\r | |
271 | v = self.iterator.next()\r | |
272 | if self.check_start_response is not None:\r | |
273 | assert_(self.check_start_response,\r | |
274 | "The application returns and we started iterating over its body, but start_response has not yet been called")\r | |
275 | self.check_start_response = None\r | |
276 | return v\r | |
277 | \r | |
278 | def close(self):\r | |
279 | self.closed = True\r | |
280 | if hasattr(self.original_iterator, 'close'):\r | |
281 | self.original_iterator.close()\r | |
282 | \r | |
283 | def __del__(self):\r | |
284 | if not self.closed:\r | |
285 | sys.stderr.write(\r | |
286 | "Iterator garbage collected without being closed")\r | |
287 | assert_(self.closed,\r | |
288 | "Iterator garbage collected without being closed")\r | |
289 | \r | |
290 | def check_environ(environ):\r | |
291 | assert_(type(environ) is DictType,\r | |
292 | "Environment is not of the right type: %r (environment: %r)"\r | |
293 | % (type(environ), environ))\r | |
294 | \r | |
295 | for key in ['REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',\r | |
296 | 'wsgi.version', 'wsgi.input', 'wsgi.errors',\r | |
297 | 'wsgi.multithread', 'wsgi.multiprocess',\r | |
298 | 'wsgi.run_once']:\r | |
299 | assert_(key in environ,\r | |
300 | "Environment missing required key: %r" % (key,))\r | |
301 | \r | |
302 | for key in ['HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH']:\r | |
303 | assert_(key not in environ,\r | |
304 | "Environment should not have the key: %s "\r | |
305 | "(use %s instead)" % (key, key[5:]))\r | |
306 | \r | |
307 | if 'QUERY_STRING' not in environ:\r | |
308 | warnings.warn(\r | |
309 | 'QUERY_STRING is not in the WSGI environment; the cgi '\r | |
310 | 'module will use sys.argv when this variable is missing, '\r | |
311 | 'so application errors are more likely',\r | |
312 | WSGIWarning)\r | |
313 | \r | |
314 | for key in environ.keys():\r | |
315 | if '.' in key:\r | |
316 | # Extension, we don't care about its type\r | |
317 | continue\r | |
318 | assert_(type(environ[key]) is StringType,\r | |
319 | "Environmental variable %s is not a string: %r (value: %r)"\r | |
320 | % (key, type(environ[key]), environ[key]))\r | |
321 | \r | |
322 | assert_(type(environ['wsgi.version']) is TupleType,\r | |
323 | "wsgi.version should be a tuple (%r)" % (environ['wsgi.version'],))\r | |
324 | assert_(environ['wsgi.url_scheme'] in ('http', 'https'),\r | |
325 | "wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme'])\r | |
326 | \r | |
327 | check_input(environ['wsgi.input'])\r | |
328 | check_errors(environ['wsgi.errors'])\r | |
329 | \r | |
330 | # @@: these need filling out:\r | |
331 | if environ['REQUEST_METHOD'] not in (\r | |
332 | 'GET', 'HEAD', 'POST', 'OPTIONS','PUT','DELETE','TRACE'):\r | |
333 | warnings.warn(\r | |
334 | "Unknown REQUEST_METHOD: %r" % environ['REQUEST_METHOD'],\r | |
335 | WSGIWarning)\r | |
336 | \r | |
337 | assert_(not environ.get('SCRIPT_NAME')\r | |
338 | or environ['SCRIPT_NAME'].startswith('/'),\r | |
339 | "SCRIPT_NAME doesn't start with /: %r" % environ['SCRIPT_NAME'])\r | |
340 | assert_(not environ.get('PATH_INFO')\r | |
341 | or environ['PATH_INFO'].startswith('/'),\r | |
342 | "PATH_INFO doesn't start with /: %r" % environ['PATH_INFO'])\r | |
343 | if environ.get('CONTENT_LENGTH'):\r | |
344 | assert_(int(environ['CONTENT_LENGTH']) >= 0,\r | |
345 | "Invalid CONTENT_LENGTH: %r" % environ['CONTENT_LENGTH'])\r | |
346 | \r | |
347 | if not environ.get('SCRIPT_NAME'):\r | |
348 | assert_('PATH_INFO' in environ,\r | |
349 | "One of SCRIPT_NAME or PATH_INFO are required (PATH_INFO "\r | |
350 | "should at least be '/' if SCRIPT_NAME is empty)")\r | |
351 | assert_(environ.get('SCRIPT_NAME') != '/',\r | |
352 | "SCRIPT_NAME cannot be '/'; it should instead be '', and "\r | |
353 | "PATH_INFO should be '/'")\r | |
354 | \r | |
355 | def check_input(wsgi_input):\r | |
356 | for attr in ['read', 'readline', 'readlines', '__iter__']:\r | |
357 | assert_(hasattr(wsgi_input, attr),\r | |
358 | "wsgi.input (%r) doesn't have the attribute %s"\r | |
359 | % (wsgi_input, attr))\r | |
360 | \r | |
361 | def check_errors(wsgi_errors):\r | |
362 | for attr in ['flush', 'write', 'writelines']:\r | |
363 | assert_(hasattr(wsgi_errors, attr),\r | |
364 | "wsgi.errors (%r) doesn't have the attribute %s"\r | |
365 | % (wsgi_errors, attr))\r | |
366 | \r | |
367 | def check_status(status):\r | |
368 | assert_(type(status) is StringType,\r | |
369 | "Status must be a string (not %r)" % status)\r | |
370 | # Implicitly check that we can turn it into an integer:\r | |
371 | status_code = status.split(None, 1)[0]\r | |
372 | assert_(len(status_code) == 3,\r | |
373 | "Status codes must be three characters: %r" % status_code)\r | |
374 | status_int = int(status_code)\r | |
375 | assert_(status_int >= 100, "Status code is invalid: %r" % status_int)\r | |
376 | if len(status) < 4 or status[3] != ' ':\r | |
377 | warnings.warn(\r | |
378 | "The status string (%r) should be a three-digit integer "\r | |
379 | "followed by a single space and a status explanation"\r | |
380 | % status, WSGIWarning)\r | |
381 | \r | |
382 | def check_headers(headers):\r | |
383 | assert_(type(headers) is ListType,\r | |
384 | "Headers (%r) must be of type list: %r"\r | |
385 | % (headers, type(headers)))\r | |
386 | header_names = {}\r | |
387 | for item in headers:\r | |
388 | assert_(type(item) is TupleType,\r | |
389 | "Individual headers (%r) must be of type tuple: %r"\r | |
390 | % (item, type(item)))\r | |
391 | assert_(len(item) == 2)\r | |
392 | name, value = item\r | |
393 | assert_(name.lower() != 'status',\r | |
394 | "The Status header cannot be used; it conflicts with CGI "\r | |
395 | "script, and HTTP status is not given through headers "\r | |
396 | "(value: %r)." % value)\r | |
397 | header_names[name.lower()] = None\r | |
398 | assert_('\n' not in name and ':' not in name,\r | |
399 | "Header names may not contain ':' or '\\n': %r" % name)\r | |
400 | assert_(header_re.search(name), "Bad header name: %r" % name)\r | |
401 | assert_(not name.endswith('-') and not name.endswith('_'),\r | |
402 | "Names may not end in '-' or '_': %r" % name)\r | |
403 | if bad_header_value_re.search(value):\r | |
404 | assert_(0, "Bad header value: %r (bad char: %r)"\r | |
405 | % (value, bad_header_value_re.search(value).group(0)))\r | |
406 | \r | |
407 | def check_content_type(status, headers):\r | |
408 | code = int(status.split(None, 1)[0])\r | |
409 | # @@: need one more person to verify this interpretation of RFC 2616\r | |
410 | # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html\r | |
411 | NO_MESSAGE_BODY = (204, 304)\r | |
412 | for name, value in headers:\r | |
413 | if name.lower() == 'content-type':\r | |
414 | if code not in NO_MESSAGE_BODY:\r | |
415 | return\r | |
416 | assert_(0, ("Content-Type header found in a %s response, "\r | |
417 | "which must not return content.") % code)\r | |
418 | if code not in NO_MESSAGE_BODY:\r | |
419 | assert_(0, "No Content-Type header found in headers (%s)" % headers)\r | |
420 | \r | |
421 | def check_exc_info(exc_info):\r | |
422 | assert_(exc_info is None or type(exc_info) is type(()),\r | |
423 | "exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info)))\r | |
424 | # More exc_info checks?\r | |
425 | \r | |
426 | def check_iterator(iterator):\r | |
427 | # Technically a string is legal, which is why it's a really bad\r | |
428 | # idea, because it may cause the response to be returned\r | |
429 | # character-by-character\r | |
430 | assert_(not isinstance(iterator, str),\r | |
431 | "You should not return a string as your application iterator, "\r | |
432 | "instead return a single-item list containing that string.")\r |