+++ /dev/null
-"""Guess the MIME type of a file.\r
-\r
-This module defines two useful functions:\r
-\r
-guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.\r
-\r
-guess_extension(type, strict=1) -- guess the extension for a given MIME type.\r
-\r
-It also contains the following, for tuning the behavior:\r
-\r
-Data:\r
-\r
-knownfiles -- list of files to parse\r
-inited -- flag set when init() has been called\r
-suffix_map -- dictionary mapping suffixes to suffixes\r
-encodings_map -- dictionary mapping suffixes to encodings\r
-types_map -- dictionary mapping suffixes to types\r
-\r
-Functions:\r
-\r
-init([files]) -- parse a list of files, default knownfiles (on Windows, the\r
- default values are taken from the registry)\r
-read_mime_types(file) -- parse one file, return a dictionary or None\r
-"""\r
-\r
-import os\r
-import sys\r
-import posixpath\r
-import urllib\r
-try:\r
- import _winreg\r
-except ImportError:\r
- _winreg = None\r
-\r
-__all__ = [\r
- "guess_type","guess_extension","guess_all_extensions",\r
- "add_type","read_mime_types","init"\r
-]\r
-\r
-knownfiles = [\r
- "/etc/mime.types",\r
- "/etc/httpd/mime.types", # Mac OS X\r
- "/etc/httpd/conf/mime.types", # Apache\r
- "/etc/apache/mime.types", # Apache 1\r
- "/etc/apache2/mime.types", # Apache 2\r
- "/usr/local/etc/httpd/conf/mime.types",\r
- "/usr/local/lib/netscape/mime.types",\r
- "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2\r
- "/usr/local/etc/mime.types", # Apache 1.3\r
- ]\r
-\r
-inited = False\r
-_db = None\r
-\r
-\r
-class MimeTypes:\r
- """MIME-types datastore.\r
-\r
- This datastore can handle information from mime.types-style files\r
- and supports basic determination of MIME type from a filename or\r
- URL, and can guess a reasonable extension given a MIME type.\r
- """\r
-\r
- def __init__(self, filenames=(), strict=True):\r
- if not inited:\r
- init()\r
- self.encodings_map = encodings_map.copy()\r
- self.suffix_map = suffix_map.copy()\r
- self.types_map = ({}, {}) # dict for (non-strict, strict)\r
- self.types_map_inv = ({}, {})\r
- for (ext, type) in types_map.items():\r
- self.add_type(type, ext, True)\r
- for (ext, type) in common_types.items():\r
- self.add_type(type, ext, False)\r
- for name in filenames:\r
- self.read(name, strict)\r
-\r
- def add_type(self, type, ext, strict=True):\r
- """Add a mapping between a type and an extension.\r
-\r
- When the extension is already known, the new\r
- type will replace the old one. When the type\r
- is already known the extension will be added\r
- to the list of known extensions.\r
-\r
- If strict is true, information will be added to\r
- list of standard types, else to the list of non-standard\r
- types.\r
- """\r
- self.types_map[strict][ext] = type\r
- exts = self.types_map_inv[strict].setdefault(type, [])\r
- if ext not in exts:\r
- exts.append(ext)\r
-\r
- def guess_type(self, url, strict=True):\r
- """Guess the type of a file based on its URL.\r
-\r
- Return value is a tuple (type, encoding) where type is None if\r
- the type can't be guessed (no or unknown suffix) or a string\r
- of the form type/subtype, usable for a MIME Content-type\r
- header; and encoding is None for no encoding or the name of\r
- the program used to encode (e.g. compress or gzip). The\r
- mappings are table driven. Encoding suffixes are case\r
- sensitive; type suffixes are first tried case sensitive, then\r
- case insensitive.\r
-\r
- The suffixes .tgz, .taz and .tz (case sensitive!) are all\r
- mapped to '.tar.gz'. (This is table-driven too, using the\r
- dictionary suffix_map.)\r
-\r
- Optional `strict' argument when False adds a bunch of commonly found,\r
- but non-standard types.\r
- """\r
- scheme, url = urllib.splittype(url)\r
- if scheme == 'data':\r
- # syntax of data URLs:\r
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data\r
- # mediatype := [ type "/" subtype ] *( ";" parameter )\r
- # data := *urlchar\r
- # parameter := attribute "=" value\r
- # type/subtype defaults to "text/plain"\r
- comma = url.find(',')\r
- if comma < 0:\r
- # bad data URL\r
- return None, None\r
- semi = url.find(';', 0, comma)\r
- if semi >= 0:\r
- type = url[:semi]\r
- else:\r
- type = url[:comma]\r
- if '=' in type or '/' not in type:\r
- type = 'text/plain'\r
- return type, None # never compressed, so encoding is None\r
- base, ext = posixpath.splitext(url)\r
- while ext in self.suffix_map:\r
- base, ext = posixpath.splitext(base + self.suffix_map[ext])\r
- if ext in self.encodings_map:\r
- encoding = self.encodings_map[ext]\r
- base, ext = posixpath.splitext(base)\r
- else:\r
- encoding = None\r
- types_map = self.types_map[True]\r
- if ext in types_map:\r
- return types_map[ext], encoding\r
- elif ext.lower() in types_map:\r
- return types_map[ext.lower()], encoding\r
- elif strict:\r
- return None, encoding\r
- types_map = self.types_map[False]\r
- if ext in types_map:\r
- return types_map[ext], encoding\r
- elif ext.lower() in types_map:\r
- return types_map[ext.lower()], encoding\r
- else:\r
- return None, encoding\r
-\r
- def guess_all_extensions(self, type, strict=True):\r
- """Guess the extensions for a file based on its MIME type.\r
-\r
- Return value is a list of strings giving the possible filename\r
- extensions, including the leading dot ('.'). The extension is not\r
- guaranteed to have been associated with any particular data stream,\r
- but would be mapped to the MIME type `type' by guess_type().\r
-\r
- Optional `strict' argument when false adds a bunch of commonly found,\r
- but non-standard types.\r
- """\r
- type = type.lower()\r
- extensions = self.types_map_inv[True].get(type, [])\r
- if not strict:\r
- for ext in self.types_map_inv[False].get(type, []):\r
- if ext not in extensions:\r
- extensions.append(ext)\r
- return extensions\r
-\r
- def guess_extension(self, type, strict=True):\r
- """Guess the extension for a file based on its MIME type.\r
-\r
- Return value is a string giving a filename extension,\r
- including the leading dot ('.'). The extension is not\r
- guaranteed to have been associated with any particular data\r
- stream, but would be mapped to the MIME type `type' by\r
- guess_type(). If no extension can be guessed for `type', None\r
- is returned.\r
-\r
- Optional `strict' argument when false adds a bunch of commonly found,\r
- but non-standard types.\r
- """\r
- extensions = self.guess_all_extensions(type, strict)\r
- if not extensions:\r
- return None\r
- return extensions[0]\r
-\r
- def read(self, filename, strict=True):\r
- """\r
- Read a single mime.types-format file, specified by pathname.\r
-\r
- If strict is true, information will be added to\r
- list of standard types, else to the list of non-standard\r
- types.\r
- """\r
- with open(filename) as fp:\r
- self.readfp(fp, strict)\r
-\r
- def readfp(self, fp, strict=True):\r
- """\r
- Read a single mime.types-format file.\r
-\r
- If strict is true, information will be added to\r
- list of standard types, else to the list of non-standard\r
- types.\r
- """\r
- while 1:\r
- line = fp.readline()\r
- if not line:\r
- break\r
- words = line.split()\r
- for i in range(len(words)):\r
- if words[i][0] == '#':\r
- del words[i:]\r
- break\r
- if not words:\r
- continue\r
- type, suffixes = words[0], words[1:]\r
- for suff in suffixes:\r
- self.add_type(type, '.' + suff, strict)\r
-\r
- def read_windows_registry(self, strict=True):\r
- """\r
- Load the MIME types database from Windows registry.\r
-\r
- If strict is true, information will be added to\r
- list of standard types, else to the list of non-standard\r
- types.\r
- """\r
-\r
- # Windows only\r
- if not _winreg:\r
- return\r
-\r
- def enum_types(mimedb):\r
- i = 0\r
- while True:\r
- try:\r
- ctype = _winreg.EnumKey(mimedb, i)\r
- except EnvironmentError:\r
- break\r
- try:\r
- ctype = ctype.encode(default_encoding) # omit in 3.x!\r
- except UnicodeEncodeError:\r
- pass\r
- else:\r
- yield ctype\r
- i += 1\r
-\r
- default_encoding = sys.getdefaultencoding()\r
- with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT,\r
- r'MIME\Database\Content Type') as mimedb:\r
- for ctype in enum_types(mimedb):\r
- try:\r
- with _winreg.OpenKey(mimedb, ctype) as key:\r
- suffix, datatype = _winreg.QueryValueEx(key,\r
- 'Extension')\r
- except EnvironmentError:\r
- continue\r
- if datatype != _winreg.REG_SZ:\r
- continue\r
- try:\r
- suffix = suffix.encode(default_encoding) # omit in 3.x!\r
- except UnicodeEncodeError:\r
- continue\r
- self.add_type(ctype, suffix, strict)\r
-\r
-\r
-def guess_type(url, strict=True):\r
- """Guess the type of a file based on its URL.\r
-\r
- Return value is a tuple (type, encoding) where type is None if the\r
- type can't be guessed (no or unknown suffix) or a string of the\r
- form type/subtype, usable for a MIME Content-type header; and\r
- encoding is None for no encoding or the name of the program used\r
- to encode (e.g. compress or gzip). The mappings are table\r
- driven. Encoding suffixes are case sensitive; type suffixes are\r
- first tried case sensitive, then case insensitive.\r
-\r
- The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped\r
- to ".tar.gz". (This is table-driven too, using the dictionary\r
- suffix_map).\r
-\r
- Optional `strict' argument when false adds a bunch of commonly found, but\r
- non-standard types.\r
- """\r
- if _db is None:\r
- init()\r
- return _db.guess_type(url, strict)\r
-\r
-\r
-def guess_all_extensions(type, strict=True):\r
- """Guess the extensions for a file based on its MIME type.\r
-\r
- Return value is a list of strings giving the possible filename\r
- extensions, including the leading dot ('.'). The extension is not\r
- guaranteed to have been associated with any particular data\r
- stream, but would be mapped to the MIME type `type' by\r
- guess_type(). If no extension can be guessed for `type', None\r
- is returned.\r
-\r
- Optional `strict' argument when false adds a bunch of commonly found,\r
- but non-standard types.\r
- """\r
- if _db is None:\r
- init()\r
- return _db.guess_all_extensions(type, strict)\r
-\r
-def guess_extension(type, strict=True):\r
- """Guess the extension for a file based on its MIME type.\r
-\r
- Return value is a string giving a filename extension, including the\r
- leading dot ('.'). The extension is not guaranteed to have been\r
- associated with any particular data stream, but would be mapped to the\r
- MIME type `type' by guess_type(). If no extension can be guessed for\r
- `type', None is returned.\r
-\r
- Optional `strict' argument when false adds a bunch of commonly found,\r
- but non-standard types.\r
- """\r
- if _db is None:\r
- init()\r
- return _db.guess_extension(type, strict)\r
-\r
-def add_type(type, ext, strict=True):\r
- """Add a mapping between a type and an extension.\r
-\r
- When the extension is already known, the new\r
- type will replace the old one. When the type\r
- is already known the extension will be added\r
- to the list of known extensions.\r
-\r
- If strict is true, information will be added to\r
- list of standard types, else to the list of non-standard\r
- types.\r
- """\r
- if _db is None:\r
- init()\r
- return _db.add_type(type, ext, strict)\r
-\r
-\r
-def init(files=None):\r
- global suffix_map, types_map, encodings_map, common_types\r
- global inited, _db\r
- inited = True # so that MimeTypes.__init__() doesn't call us again\r
- db = MimeTypes()\r
- if files is None:\r
- if _winreg:\r
- db.read_windows_registry()\r
- files = knownfiles\r
- for file in files:\r
- if os.path.isfile(file):\r
- db.read(file)\r
- encodings_map = db.encodings_map\r
- suffix_map = db.suffix_map\r
- types_map = db.types_map[True]\r
- common_types = db.types_map[False]\r
- # Make the DB a global variable now that it is fully initialized\r
- _db = db\r
-\r
-\r
-def read_mime_types(file):\r
- try:\r
- f = open(file)\r
- except IOError:\r
- return None\r
- db = MimeTypes()\r
- db.readfp(f, True)\r
- return db.types_map[True]\r
-\r
-\r
-def _default_mime_types():\r
- global suffix_map\r
- global encodings_map\r
- global types_map\r
- global common_types\r
-\r
- suffix_map = {\r
- '.tgz': '.tar.gz',\r
- '.taz': '.tar.gz',\r
- '.tz': '.tar.gz',\r
- '.tbz2': '.tar.bz2',\r
- }\r
-\r
- encodings_map = {\r
- '.gz': 'gzip',\r
- '.Z': 'compress',\r
- '.bz2': 'bzip2',\r
- }\r
-\r
- # Before adding new types, make sure they are either registered with IANA,\r
- # at http://www.isi.edu/in-notes/iana/assignments/media-types\r
- # or extensions, i.e. using the x- prefix\r
-\r
- # If you add to these, please keep them sorted!\r
- types_map = {\r
- '.a' : 'application/octet-stream',\r
- '.ai' : 'application/postscript',\r
- '.aif' : 'audio/x-aiff',\r
- '.aifc' : 'audio/x-aiff',\r
- '.aiff' : 'audio/x-aiff',\r
- '.au' : 'audio/basic',\r
- '.avi' : 'video/x-msvideo',\r
- '.bat' : 'text/plain',\r
- '.bcpio' : 'application/x-bcpio',\r
- '.bin' : 'application/octet-stream',\r
- '.bmp' : 'image/x-ms-bmp',\r
- '.c' : 'text/plain',\r
- # Duplicates :(\r
- '.cdf' : 'application/x-cdf',\r
- '.cdf' : 'application/x-netcdf',\r
- '.cpio' : 'application/x-cpio',\r
- '.csh' : 'application/x-csh',\r
- '.css' : 'text/css',\r
- '.dll' : 'application/octet-stream',\r
- '.doc' : 'application/msword',\r
- '.dot' : 'application/msword',\r
- '.dvi' : 'application/x-dvi',\r
- '.eml' : 'message/rfc822',\r
- '.eps' : 'application/postscript',\r
- '.etx' : 'text/x-setext',\r
- '.exe' : 'application/octet-stream',\r
- '.gif' : 'image/gif',\r
- '.gtar' : 'application/x-gtar',\r
- '.h' : 'text/plain',\r
- '.hdf' : 'application/x-hdf',\r
- '.htm' : 'text/html',\r
- '.html' : 'text/html',\r
- '.ief' : 'image/ief',\r
- '.jpe' : 'image/jpeg',\r
- '.jpeg' : 'image/jpeg',\r
- '.jpg' : 'image/jpeg',\r
- '.js' : 'application/x-javascript',\r
- '.ksh' : 'text/plain',\r
- '.latex' : 'application/x-latex',\r
- '.m1v' : 'video/mpeg',\r
- '.man' : 'application/x-troff-man',\r
- '.me' : 'application/x-troff-me',\r
- '.mht' : 'message/rfc822',\r
- '.mhtml' : 'message/rfc822',\r
- '.mif' : 'application/x-mif',\r
- '.mov' : 'video/quicktime',\r
- '.movie' : 'video/x-sgi-movie',\r
- '.mp2' : 'audio/mpeg',\r
- '.mp3' : 'audio/mpeg',\r
- '.mp4' : 'video/mp4',\r
- '.mpa' : 'video/mpeg',\r
- '.mpe' : 'video/mpeg',\r
- '.mpeg' : 'video/mpeg',\r
- '.mpg' : 'video/mpeg',\r
- '.ms' : 'application/x-troff-ms',\r
- '.nc' : 'application/x-netcdf',\r
- '.nws' : 'message/rfc822',\r
- '.o' : 'application/octet-stream',\r
- '.obj' : 'application/octet-stream',\r
- '.oda' : 'application/oda',\r
- '.p12' : 'application/x-pkcs12',\r
- '.p7c' : 'application/pkcs7-mime',\r
- '.pbm' : 'image/x-portable-bitmap',\r
- '.pdf' : 'application/pdf',\r
- '.pfx' : 'application/x-pkcs12',\r
- '.pgm' : 'image/x-portable-graymap',\r
- '.pl' : 'text/plain',\r
- '.png' : 'image/png',\r
- '.pnm' : 'image/x-portable-anymap',\r
- '.pot' : 'application/vnd.ms-powerpoint',\r
- '.ppa' : 'application/vnd.ms-powerpoint',\r
- '.ppm' : 'image/x-portable-pixmap',\r
- '.pps' : 'application/vnd.ms-powerpoint',\r
- '.ppt' : 'application/vnd.ms-powerpoint',\r
- '.ps' : 'application/postscript',\r
- '.pwz' : 'application/vnd.ms-powerpoint',\r
- '.py' : 'text/x-python',\r
- '.pyc' : 'application/x-python-code',\r
- '.pyo' : 'application/x-python-code',\r
- '.qt' : 'video/quicktime',\r
- '.ra' : 'audio/x-pn-realaudio',\r
- '.ram' : 'application/x-pn-realaudio',\r
- '.ras' : 'image/x-cmu-raster',\r
- '.rdf' : 'application/xml',\r
- '.rgb' : 'image/x-rgb',\r
- '.roff' : 'application/x-troff',\r
- '.rtx' : 'text/richtext',\r
- '.sgm' : 'text/x-sgml',\r
- '.sgml' : 'text/x-sgml',\r
- '.sh' : 'application/x-sh',\r
- '.shar' : 'application/x-shar',\r
- '.snd' : 'audio/basic',\r
- '.so' : 'application/octet-stream',\r
- '.src' : 'application/x-wais-source',\r
- '.sv4cpio': 'application/x-sv4cpio',\r
- '.sv4crc' : 'application/x-sv4crc',\r
- '.swf' : 'application/x-shockwave-flash',\r
- '.t' : 'application/x-troff',\r
- '.tar' : 'application/x-tar',\r
- '.tcl' : 'application/x-tcl',\r
- '.tex' : 'application/x-tex',\r
- '.texi' : 'application/x-texinfo',\r
- '.texinfo': 'application/x-texinfo',\r
- '.tif' : 'image/tiff',\r
- '.tiff' : 'image/tiff',\r
- '.tr' : 'application/x-troff',\r
- '.tsv' : 'text/tab-separated-values',\r
- '.txt' : 'text/plain',\r
- '.ustar' : 'application/x-ustar',\r
- '.vcf' : 'text/x-vcard',\r
- '.wav' : 'audio/x-wav',\r
- '.wiz' : 'application/msword',\r
- '.wsdl' : 'application/xml',\r
- '.xbm' : 'image/x-xbitmap',\r
- '.xlb' : 'application/vnd.ms-excel',\r
- # Duplicates :(\r
- '.xls' : 'application/excel',\r
- '.xls' : 'application/vnd.ms-excel',\r
- '.xml' : 'text/xml',\r
- '.xpdl' : 'application/xml',\r
- '.xpm' : 'image/x-xpixmap',\r
- '.xsl' : 'application/xml',\r
- '.xwd' : 'image/x-xwindowdump',\r
- '.zip' : 'application/zip',\r
- }\r
-\r
- # These are non-standard types, commonly found in the wild. They will\r
- # only match if strict=0 flag is given to the API methods.\r
-\r
- # Please sort these too\r
- common_types = {\r
- '.jpg' : 'image/jpg',\r
- '.mid' : 'audio/midi',\r
- '.midi': 'audio/midi',\r
- '.pct' : 'image/pict',\r
- '.pic' : 'image/pict',\r
- '.pict': 'image/pict',\r
- '.rtf' : 'application/rtf',\r
- '.xul' : 'text/xul'\r
- }\r
-\r
-\r
-_default_mime_types()\r
-\r
-\r
-if __name__ == '__main__':\r
- import getopt\r
-\r
- USAGE = """\\r
-Usage: mimetypes.py [options] type\r
-\r
-Options:\r
- --help / -h -- print this message and exit\r
- --lenient / -l -- additionally search of some common, but non-standard\r
- types.\r
- --extension / -e -- guess extension instead of type\r
-\r
-More than one type argument may be given.\r
-"""\r
-\r
- def usage(code, msg=''):\r
- print USAGE\r
- if msg: print msg\r
- sys.exit(code)\r
-\r
- try:\r
- opts, args = getopt.getopt(sys.argv[1:], 'hle',\r
- ['help', 'lenient', 'extension'])\r
- except getopt.error, msg:\r
- usage(1, msg)\r
-\r
- strict = 1\r
- extension = 0\r
- for opt, arg in opts:\r
- if opt in ('-h', '--help'):\r
- usage(0)\r
- elif opt in ('-l', '--lenient'):\r
- strict = 0\r
- elif opt in ('-e', '--extension'):\r
- extension = 1\r
- for gtype in args:\r
- if extension:\r
- guess = guess_extension(gtype, strict)\r
- if not guess: print "I don't know anything about type", gtype\r
- else: print guess\r
- else:\r
- guess, encoding = guess_type(gtype, strict)\r
- if not guess: print "I don't know anything about type", gtype\r
- else: print 'type:', guess, 'encoding:', encoding\r