+++ /dev/null
-#! /usr/bin/env python\r
-\r
-"""world -- Print mappings between country names and DNS country codes.\r
-\r
-Contact: Barry Warsaw\r
-Email: barry@python.org\r
-Version: %(__version__)s\r
-\r
-This script will take a list of Internet addresses and print out where in the\r
-world those addresses originate from, based on the top-level domain country\r
-code found in the address. Addresses can be in any of the following forms:\r
-\r
- xx -- just the country code or top-level domain identifier\r
- host.domain.xx -- any Internet host or network name\r
- somebody@where.xx -- an Internet email address\r
-\r
-If no match is found, the address is interpreted as a regular expression and a\r
-reverse lookup is attempted. This script will search the country names and\r
-print a list of matching entries. You can force reverse mappings with the\r
-`-r' flag (see below).\r
-\r
-For example:\r
-\r
- %% world tz us\r
- tz originated from Tanzania, United Republic of\r
- us originated from United States\r
-\r
- %% world united\r
- united matches 6 countries:\r
- ae: United Arab Emirates\r
- uk: United Kingdom (common practice)\r
- um: United States Minor Outlying Islands\r
- us: United States\r
- tz: Tanzania, United Republic of\r
- gb: United Kingdom\r
-\r
-Country codes are maintained by the RIPE Network Coordination Centre,\r
-in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The\r
-authoritative source of country code mappings is:\r
-\r
- <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>\r
-\r
-The latest known change to this information was:\r
-\r
- Friday, 5 April 2002, 12.00 CET 2002\r
-\r
-This script also knows about non-geographic top-level domains, and the\r
-additional ccTLDs reserved by IANA.\r
-\r
-Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]\r
-\r
- --dump\r
- -d\r
- Print mapping of all top-level domains.\r
-\r
- --parse file\r
- -p file\r
- Parse an iso3166-countrycodes file extracting the two letter country\r
- code followed by the country name. Note that the three letter country\r
- codes and numbers, which are also provided in the standard format\r
- file, are ignored.\r
-\r
- --outputdict\r
- -o\r
- When used in conjunction with the `-p' option, output is in the form\r
- of a Python dictionary, and country names are normalized\r
- w.r.t. capitalization. This makes it appropriate for cutting and\r
- pasting back into this file. Output is always to standard out.\r
-\r
- --reverse\r
- -r\r
- Force reverse lookup. In this mode the address can be any Python\r
- regular expression; this is matched against all country names and a\r
- list of matching mappings is printed. In normal mode (e.g. without\r
- this flag), reverse lookup is performed on addresses if no matching\r
- country code is found.\r
-\r
- -h\r
- --help\r
- Print this message.\r
-"""\r
-__version__ = '$Revision$'\r
-\r
-\r
-import sys\r
-import getopt\r
-import re\r
-\r
-PROGRAM = sys.argv[0]\r
-\r
-\r
-\f\r
-def usage(code, msg=''):\r
- print __doc__ % globals()\r
- if msg:\r
- print msg\r
- sys.exit(code)\r
-\r
-\r
-\f\r
-def resolve(rawaddr):\r
- parts = rawaddr.split('.')\r
- if not len(parts):\r
- # no top level domain found, bounce it to the next step\r
- return rawaddr\r
- addr = parts[-1]\r
- if nameorgs.has_key(addr):\r
- print rawaddr, 'is in the', nameorgs[addr], 'top level domain'\r
- return None\r
- elif countries.has_key(addr):\r
- print rawaddr, 'originated from', countries[addr]\r
- return None\r
- else:\r
- # Not resolved, bounce it to the next step\r
- return rawaddr\r
-\r
-\r
-\f\r
-def reverse(regexp):\r
- matches = []\r
- cre = re.compile(regexp, re.IGNORECASE)\r
- for code, country in all.items():\r
- mo = cre.search(country)\r
- if mo:\r
- matches.append(code)\r
- # print results\r
- if not matches:\r
- # not resolved, bounce it to the next step\r
- return regexp\r
- if len(matches) == 1:\r
- code = matches[0]\r
- print regexp, "matches code `%s', %s" % (code, all[code])\r
- else:\r
- print regexp, 'matches %d countries:' % len(matches)\r
- for code in matches:\r
- print " %s: %s" % (code, all[code])\r
- return None\r
-\r
-\r
-\f\r
-def parse(file, normalize):\r
- try:\r
- fp = open(file)\r
- except IOError, (err, msg):\r
- print msg, ':', file\r
-\r
- cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')\r
- scanning = 0\r
-\r
- if normalize:\r
- print 'countries = {'\r
-\r
- while 1:\r
- line = fp.readline()\r
- if line == '':\r
- break # EOF\r
- if scanning:\r
- mo = cre.match(line)\r
- if not mo:\r
- line = line.strip()\r
- if not line:\r
- continue\r
- elif line[0] == '-':\r
- break\r
- else:\r
- print 'Could not parse line:', line\r
- continue\r
- country, code = mo.group(1, 2)\r
- if normalize:\r
- words = country.split()\r
- for i in range(len(words)):\r
- w = words[i]\r
- # XXX special cases\r
- if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):\r
- words[i] = w.lower()\r
- elif w == 'THE' and i <> 1:\r
- words[i] = w.lower()\r
- elif len(w) > 3 and w[1] == "'":\r
- words[i] = w[0:3].upper() + w[3:].lower()\r
- elif w in ('(U.S.)', 'U.S.'):\r
- pass\r
- elif w[0] == '(' and w <> '(local':\r
- words[i] = '(' + w[1:].capitalize()\r
- elif w.find('-') <> -1:\r
- words[i] = '-'.join(\r
- [s.capitalize() for s in w.split('-')])\r
- else:\r
- words[i] = w.capitalize()\r
- code = code.lower()\r
- country = ' '.join(words)\r
- print ' "%s": "%s",' % (code, country)\r
- else:\r
- print code, country\r
- \r
- elif line[0] == '-':\r
- scanning = 1\r
-\r
- if normalize:\r
- print ' }'\r
-\r
-\f\r
-def main():\r
- help = 0\r
- status = 0\r
- dump = 0\r
- parsefile = None\r
- normalize = 0\r
- forcerev = 0\r
-\r
- try:\r
- opts, args = getopt.getopt(\r
- sys.argv[1:],\r
- 'p:rohd',\r
- ['parse=', 'reverse', 'outputdict', 'help', 'dump'])\r
- except getopt.error, msg:\r
- usage(1, msg)\r
-\r
- for opt, arg in opts:\r
- if opt in ('-h', '--help'):\r
- help = 1\r
- elif opt in ('-d', '--dump'):\r
- dump = 1\r
- elif opt in ('-p', '--parse'):\r
- parsefile = arg\r
- elif opt in ('-o', '--outputdict'):\r
- normalize = 1\r
- elif opt in ('-r', '--reverse'):\r
- forcerev = 1\r
-\r
- if help:\r
- usage(status)\r
-\r
- if dump:\r
- print 'Non-geographic domains:'\r
- codes = nameorgs.keys()\r
- codes.sort()\r
- for code in codes:\r
- print ' %4s:' % code, nameorgs[code]\r
-\r
- print '\nCountry coded domains:'\r
- codes = countries.keys()\r
- codes.sort()\r
- for code in codes:\r
- print ' %2s:' % code, countries[code]\r
- elif parsefile:\r
- parse(parsefile, normalize)\r
- else:\r
- if not forcerev:\r
- args = filter(None, map(resolve, args))\r
- args = filter(None, map(reverse, args))\r
- for arg in args:\r
- print 'Where in the world is %s?' % arg\r
-\r
-\r
-\f\r
-# The mappings\r
-nameorgs = {\r
- # New top level domains as described by ICANN\r
- # http://www.icann.org/tlds/\r
- "aero": "air-transport industry",\r
- "arpa": "Arpanet",\r
- "biz": "business",\r
- "com": "commercial",\r
- "coop": "cooperatives",\r
- "edu": "educational",\r
- "gov": "government",\r
- "info": "unrestricted `info'",\r
- "int": "international",\r
- "mil": "military",\r
- "museum": "museums",\r
- "name": "`name' (for registration by individuals)",\r
- "net": "networking",\r
- "org": "non-commercial",\r
- "pro": "professionals",\r
- # These additional ccTLDs are included here even though they are not part\r
- # of ISO 3166. IANA has 5 reserved ccTLDs as described here:\r
- #\r
- # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html\r
- #\r
- # but I can't find an official list anywhere.\r
- #\r
- # Note that `uk' is the common practice country code for the United\r
- # Kingdom. AFAICT, the official `gb' code is routinely ignored!\r
- #\r
- # <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166\r
- # was adopted for top-level DNS zone names (although in the reverse order\r
- # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a\r
- # large-scale renaming process as the UK switched from their old `Coloured\r
- # Book' protocols over X.25 to Internet protocols over IP.\r
- #\r
- # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>\r
- #\r
- # Also, `su', while obsolete is still in limited use.\r
- "ac": "Ascension Island",\r
- "gg": "Guernsey",\r
- "im": "Isle of Man",\r
- "je": "Jersey",\r
- "uk": "United Kingdom (common practice)",\r
- "su": "Soviet Union (still in limited use)",\r
- }\r
-\r
-\r
-\f\r
-countries = {\r
- "af": "Afghanistan",\r
- "al": "Albania",\r
- "dz": "Algeria",\r
- "as": "American Samoa",\r
- "ad": "Andorra",\r
- "ao": "Angola",\r
- "ai": "Anguilla",\r
- "aq": "Antarctica",\r
- "ag": "Antigua and Barbuda",\r
- "ar": "Argentina",\r
- "am": "Armenia",\r
- "aw": "Aruba",\r
- "au": "Australia",\r
- "at": "Austria",\r
- "az": "Azerbaijan",\r
- "bs": "Bahamas",\r
- "bh": "Bahrain",\r
- "bd": "Bangladesh",\r
- "bb": "Barbados",\r
- "by": "Belarus",\r
- "be": "Belgium",\r
- "bz": "Belize",\r
- "bj": "Benin",\r
- "bm": "Bermuda",\r
- "bt": "Bhutan",\r
- "bo": "Bolivia",\r
- "ba": "Bosnia and Herzegowina",\r
- "bw": "Botswana",\r
- "bv": "Bouvet Island",\r
- "br": "Brazil",\r
- "io": "British Indian Ocean Territory",\r
- "bn": "Brunei Darussalam",\r
- "bg": "Bulgaria",\r
- "bf": "Burkina Faso",\r
- "bi": "Burundi",\r
- "kh": "Cambodia",\r
- "cm": "Cameroon",\r
- "ca": "Canada",\r
- "cv": "Cape Verde",\r
- "ky": "Cayman Islands",\r
- "cf": "Central African Republic",\r
- "td": "Chad",\r
- "cl": "Chile",\r
- "cn": "China",\r
- "cx": "Christmas Island",\r
- "cc": "Cocos (Keeling) Islands",\r
- "co": "Colombia",\r
- "km": "Comoros",\r
- "cg": "Congo",\r
- "cd": "Congo, The Democratic Republic of the",\r
- "ck": "Cook Islands",\r
- "cr": "Costa Rica",\r
- "ci": "Cote D'Ivoire",\r
- "hr": "Croatia",\r
- "cu": "Cuba",\r
- "cy": "Cyprus",\r
- "cz": "Czech Republic",\r
- "dk": "Denmark",\r
- "dj": "Djibouti",\r
- "dm": "Dominica",\r
- "do": "Dominican Republic",\r
- "tp": "East Timor",\r
- "ec": "Ecuador",\r
- "eg": "Egypt",\r
- "sv": "El Salvador",\r
- "gq": "Equatorial Guinea",\r
- "er": "Eritrea",\r
- "ee": "Estonia",\r
- "et": "Ethiopia",\r
- "fk": "Falkland Islands (Malvinas)",\r
- "fo": "Faroe Islands",\r
- "fj": "Fiji",\r
- "fi": "Finland",\r
- "fr": "France",\r
- "gf": "French Guiana",\r
- "pf": "French Polynesia",\r
- "tf": "French Southern Territories",\r
- "ga": "Gabon",\r
- "gm": "Gambia",\r
- "ge": "Georgia",\r
- "de": "Germany",\r
- "gh": "Ghana",\r
- "gi": "Gibraltar",\r
- "gr": "Greece",\r
- "gl": "Greenland",\r
- "gd": "Grenada",\r
- "gp": "Guadeloupe",\r
- "gu": "Guam",\r
- "gt": "Guatemala",\r
- "gn": "Guinea",\r
- "gw": "Guinea-Bissau",\r
- "gy": "Guyana",\r
- "ht": "Haiti",\r
- "hm": "Heard Island and Mcdonald Islands",\r
- "va": "Holy See (Vatican City State)",\r
- "hn": "Honduras",\r
- "hk": "Hong Kong",\r
- "hu": "Hungary",\r
- "is": "Iceland",\r
- "in": "India",\r
- "id": "Indonesia",\r
- "ir": "Iran, Islamic Republic of",\r
- "iq": "Iraq",\r
- "ie": "Ireland",\r
- "il": "Israel",\r
- "it": "Italy",\r
- "jm": "Jamaica",\r
- "jp": "Japan",\r
- "jo": "Jordan",\r
- "kz": "Kazakstan",\r
- "ke": "Kenya",\r
- "ki": "Kiribati",\r
- "kp": "Korea, Democratic People's Republic of",\r
- "kr": "Korea, Republic of",\r
- "kw": "Kuwait",\r
- "kg": "Kyrgyzstan",\r
- "la": "Lao People's Democratic Republic",\r
- "lv": "Latvia",\r
- "lb": "Lebanon",\r
- "ls": "Lesotho",\r
- "lr": "Liberia",\r
- "ly": "Libyan Arab Jamahiriya",\r
- "li": "Liechtenstein",\r
- "lt": "Lithuania",\r
- "lu": "Luxembourg",\r
- "mo": "Macau",\r
- "mk": "Macedonia, The Former Yugoslav Republic of",\r
- "mg": "Madagascar",\r
- "mw": "Malawi",\r
- "my": "Malaysia",\r
- "mv": "Maldives",\r
- "ml": "Mali",\r
- "mt": "Malta",\r
- "mh": "Marshall Islands",\r
- "mq": "Martinique",\r
- "mr": "Mauritania",\r
- "mu": "Mauritius",\r
- "yt": "Mayotte",\r
- "mx": "Mexico",\r
- "fm": "Micronesia, Federated States of",\r
- "md": "Moldova, Republic of",\r
- "mc": "Monaco",\r
- "mn": "Mongolia",\r
- "ms": "Montserrat",\r
- "ma": "Morocco",\r
- "mz": "Mozambique",\r
- "mm": "Myanmar",\r
- "na": "Namibia",\r
- "nr": "Nauru",\r
- "np": "Nepal",\r
- "nl": "Netherlands",\r
- "an": "Netherlands Antilles",\r
- "nc": "New Caledonia",\r
- "nz": "New Zealand",\r
- "ni": "Nicaragua",\r
- "ne": "Niger",\r
- "ng": "Nigeria",\r
- "nu": "Niue",\r
- "nf": "Norfolk Island",\r
- "mp": "Northern Mariana Islands",\r
- "no": "Norway",\r
- "om": "Oman",\r
- "pk": "Pakistan",\r
- "pw": "Palau",\r
- "ps": "Palestinian Territory, Occupied",\r
- "pa": "Panama",\r
- "pg": "Papua New Guinea",\r
- "py": "Paraguay",\r
- "pe": "Peru",\r
- "ph": "Philippines",\r
- "pn": "Pitcairn",\r
- "pl": "Poland",\r
- "pt": "Portugal",\r
- "pr": "Puerto Rico",\r
- "qa": "Qatar",\r
- "re": "Reunion",\r
- "ro": "Romania",\r
- "ru": "Russian Federation",\r
- "rw": "Rwanda",\r
- "sh": "Saint Helena",\r
- "kn": "Saint Kitts and Nevis",\r
- "lc": "Saint Lucia",\r
- "pm": "Saint Pierre and Miquelon",\r
- "vc": "Saint Vincent and the Grenadines",\r
- "ws": "Samoa",\r
- "sm": "San Marino",\r
- "st": "Sao Tome and Principe",\r
- "sa": "Saudi Arabia",\r
- "sn": "Senegal",\r
- "sc": "Seychelles",\r
- "sl": "Sierra Leone",\r
- "sg": "Singapore",\r
- "sk": "Slovakia",\r
- "si": "Slovenia",\r
- "sb": "Solomon Islands",\r
- "so": "Somalia",\r
- "za": "South Africa",\r
- "gs": "South Georgia and the South Sandwich Islands",\r
- "es": "Spain",\r
- "lk": "Sri Lanka",\r
- "sd": "Sudan",\r
- "sr": "Suriname",\r
- "sj": "Svalbard and Jan Mayen",\r
- "sz": "Swaziland",\r
- "se": "Sweden",\r
- "ch": "Switzerland",\r
- "sy": "Syrian Arab Republic",\r
- "tw": "Taiwan, Province of China",\r
- "tj": "Tajikistan",\r
- "tz": "Tanzania, United Republic of",\r
- "th": "Thailand",\r
- "tg": "Togo",\r
- "tk": "Tokelau",\r
- "to": "Tonga",\r
- "tt": "Trinidad and Tobago",\r
- "tn": "Tunisia",\r
- "tr": "Turkey",\r
- "tm": "Turkmenistan",\r
- "tc": "Turks and Caicos Islands",\r
- "tv": "Tuvalu",\r
- "ug": "Uganda",\r
- "ua": "Ukraine",\r
- "ae": "United Arab Emirates",\r
- "gb": "United Kingdom",\r
- "us": "United States",\r
- "um": "United States Minor Outlying Islands",\r
- "uy": "Uruguay",\r
- "uz": "Uzbekistan",\r
- "vu": "Vanuatu",\r
- "ve": "Venezuela",\r
- "vn": "Viet Nam",\r
- "vg": "Virgin Islands, British",\r
- "vi": "Virgin Islands, U.S.",\r
- "wf": "Wallis and Futuna",\r
- "eh": "Western Sahara",\r
- "ye": "Yemen",\r
- "yu": "Yugoslavia",\r
- "zm": "Zambia",\r
- "zw": "Zimbabwe",\r
- }\r
-\r
-all = nameorgs.copy()\r
-all.update(countries)\r
-\r
-\f\r
-if __name__ == '__main__':\r
- main()\r