]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | #! /usr/bin/env python\r |
2 | \r | |
3 | """world -- Print mappings between country names and DNS country codes.\r | |
4 | \r | |
5 | Contact: Barry Warsaw\r | |
6 | Email: barry@python.org\r | |
7 | Version: %(__version__)s\r | |
8 | \r | |
9 | This script will take a list of Internet addresses and print out where in the\r | |
10 | world those addresses originate from, based on the top-level domain country\r | |
11 | code found in the address. Addresses can be in any of the following forms:\r | |
12 | \r | |
13 | xx -- just the country code or top-level domain identifier\r | |
14 | host.domain.xx -- any Internet host or network name\r | |
15 | somebody@where.xx -- an Internet email address\r | |
16 | \r | |
17 | If no match is found, the address is interpreted as a regular expression and a\r | |
18 | reverse lookup is attempted. This script will search the country names and\r | |
19 | print a list of matching entries. You can force reverse mappings with the\r | |
20 | `-r' flag (see below).\r | |
21 | \r | |
22 | For example:\r | |
23 | \r | |
24 | %% world tz us\r | |
25 | tz originated from Tanzania, United Republic of\r | |
26 | us originated from United States\r | |
27 | \r | |
28 | %% world united\r | |
29 | united matches 6 countries:\r | |
30 | ae: United Arab Emirates\r | |
31 | uk: United Kingdom (common practice)\r | |
32 | um: United States Minor Outlying Islands\r | |
33 | us: United States\r | |
34 | tz: Tanzania, United Republic of\r | |
35 | gb: United Kingdom\r | |
36 | \r | |
37 | Country codes are maintained by the RIPE Network Coordination Centre,\r | |
38 | in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The\r | |
39 | authoritative source of country code mappings is:\r | |
40 | \r | |
41 | <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>\r | |
42 | \r | |
43 | The latest known change to this information was:\r | |
44 | \r | |
45 | Friday, 5 April 2002, 12.00 CET 2002\r | |
46 | \r | |
47 | This script also knows about non-geographic top-level domains, and the\r | |
48 | additional ccTLDs reserved by IANA.\r | |
49 | \r | |
50 | Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]\r | |
51 | \r | |
52 | --dump\r | |
53 | -d\r | |
54 | Print mapping of all top-level domains.\r | |
55 | \r | |
56 | --parse file\r | |
57 | -p file\r | |
58 | Parse an iso3166-countrycodes file extracting the two letter country\r | |
59 | code followed by the country name. Note that the three letter country\r | |
60 | codes and numbers, which are also provided in the standard format\r | |
61 | file, are ignored.\r | |
62 | \r | |
63 | --outputdict\r | |
64 | -o\r | |
65 | When used in conjunction with the `-p' option, output is in the form\r | |
66 | of a Python dictionary, and country names are normalized\r | |
67 | w.r.t. capitalization. This makes it appropriate for cutting and\r | |
68 | pasting back into this file. Output is always to standard out.\r | |
69 | \r | |
70 | --reverse\r | |
71 | -r\r | |
72 | Force reverse lookup. In this mode the address can be any Python\r | |
73 | regular expression; this is matched against all country names and a\r | |
74 | list of matching mappings is printed. In normal mode (e.g. without\r | |
75 | this flag), reverse lookup is performed on addresses if no matching\r | |
76 | country code is found.\r | |
77 | \r | |
78 | -h\r | |
79 | --help\r | |
80 | Print this message.\r | |
81 | """\r | |
82 | __version__ = '$Revision$'\r | |
83 | \r | |
84 | \r | |
85 | import sys\r | |
86 | import getopt\r | |
87 | import re\r | |
88 | \r | |
89 | PROGRAM = sys.argv[0]\r | |
90 | \r | |
91 | \r | |
92 | \f\r | |
93 | def usage(code, msg=''):\r | |
94 | print __doc__ % globals()\r | |
95 | if msg:\r | |
96 | print msg\r | |
97 | sys.exit(code)\r | |
98 | \r | |
99 | \r | |
100 | \f\r | |
101 | def resolve(rawaddr):\r | |
102 | parts = rawaddr.split('.')\r | |
103 | if not len(parts):\r | |
104 | # no top level domain found, bounce it to the next step\r | |
105 | return rawaddr\r | |
106 | addr = parts[-1]\r | |
107 | if nameorgs.has_key(addr):\r | |
108 | print rawaddr, 'is in the', nameorgs[addr], 'top level domain'\r | |
109 | return None\r | |
110 | elif countries.has_key(addr):\r | |
111 | print rawaddr, 'originated from', countries[addr]\r | |
112 | return None\r | |
113 | else:\r | |
114 | # Not resolved, bounce it to the next step\r | |
115 | return rawaddr\r | |
116 | \r | |
117 | \r | |
118 | \f\r | |
119 | def reverse(regexp):\r | |
120 | matches = []\r | |
121 | cre = re.compile(regexp, re.IGNORECASE)\r | |
122 | for code, country in all.items():\r | |
123 | mo = cre.search(country)\r | |
124 | if mo:\r | |
125 | matches.append(code)\r | |
126 | # print results\r | |
127 | if not matches:\r | |
128 | # not resolved, bounce it to the next step\r | |
129 | return regexp\r | |
130 | if len(matches) == 1:\r | |
131 | code = matches[0]\r | |
132 | print regexp, "matches code `%s', %s" % (code, all[code])\r | |
133 | else:\r | |
134 | print regexp, 'matches %d countries:' % len(matches)\r | |
135 | for code in matches:\r | |
136 | print " %s: %s" % (code, all[code])\r | |
137 | return None\r | |
138 | \r | |
139 | \r | |
140 | \f\r | |
141 | def parse(file, normalize):\r | |
142 | try:\r | |
143 | fp = open(file)\r | |
144 | except IOError, (err, msg):\r | |
145 | print msg, ':', file\r | |
146 | \r | |
147 | cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')\r | |
148 | scanning = 0\r | |
149 | \r | |
150 | if normalize:\r | |
151 | print 'countries = {'\r | |
152 | \r | |
153 | while 1:\r | |
154 | line = fp.readline()\r | |
155 | if line == '':\r | |
156 | break # EOF\r | |
157 | if scanning:\r | |
158 | mo = cre.match(line)\r | |
159 | if not mo:\r | |
160 | line = line.strip()\r | |
161 | if not line:\r | |
162 | continue\r | |
163 | elif line[0] == '-':\r | |
164 | break\r | |
165 | else:\r | |
166 | print 'Could not parse line:', line\r | |
167 | continue\r | |
168 | country, code = mo.group(1, 2)\r | |
169 | if normalize:\r | |
170 | words = country.split()\r | |
171 | for i in range(len(words)):\r | |
172 | w = words[i]\r | |
173 | # XXX special cases\r | |
174 | if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):\r | |
175 | words[i] = w.lower()\r | |
176 | elif w == 'THE' and i <> 1:\r | |
177 | words[i] = w.lower()\r | |
178 | elif len(w) > 3 and w[1] == "'":\r | |
179 | words[i] = w[0:3].upper() + w[3:].lower()\r | |
180 | elif w in ('(U.S.)', 'U.S.'):\r | |
181 | pass\r | |
182 | elif w[0] == '(' and w <> '(local':\r | |
183 | words[i] = '(' + w[1:].capitalize()\r | |
184 | elif w.find('-') <> -1:\r | |
185 | words[i] = '-'.join(\r | |
186 | [s.capitalize() for s in w.split('-')])\r | |
187 | else:\r | |
188 | words[i] = w.capitalize()\r | |
189 | code = code.lower()\r | |
190 | country = ' '.join(words)\r | |
191 | print ' "%s": "%s",' % (code, country)\r | |
192 | else:\r | |
193 | print code, country\r | |
194 | \r | |
195 | elif line[0] == '-':\r | |
196 | scanning = 1\r | |
197 | \r | |
198 | if normalize:\r | |
199 | print ' }'\r | |
200 | \r | |
201 | \f\r | |
202 | def main():\r | |
203 | help = 0\r | |
204 | status = 0\r | |
205 | dump = 0\r | |
206 | parsefile = None\r | |
207 | normalize = 0\r | |
208 | forcerev = 0\r | |
209 | \r | |
210 | try:\r | |
211 | opts, args = getopt.getopt(\r | |
212 | sys.argv[1:],\r | |
213 | 'p:rohd',\r | |
214 | ['parse=', 'reverse', 'outputdict', 'help', 'dump'])\r | |
215 | except getopt.error, msg:\r | |
216 | usage(1, msg)\r | |
217 | \r | |
218 | for opt, arg in opts:\r | |
219 | if opt in ('-h', '--help'):\r | |
220 | help = 1\r | |
221 | elif opt in ('-d', '--dump'):\r | |
222 | dump = 1\r | |
223 | elif opt in ('-p', '--parse'):\r | |
224 | parsefile = arg\r | |
225 | elif opt in ('-o', '--outputdict'):\r | |
226 | normalize = 1\r | |
227 | elif opt in ('-r', '--reverse'):\r | |
228 | forcerev = 1\r | |
229 | \r | |
230 | if help:\r | |
231 | usage(status)\r | |
232 | \r | |
233 | if dump:\r | |
234 | print 'Non-geographic domains:'\r | |
235 | codes = nameorgs.keys()\r | |
236 | codes.sort()\r | |
237 | for code in codes:\r | |
238 | print ' %4s:' % code, nameorgs[code]\r | |
239 | \r | |
240 | print '\nCountry coded domains:'\r | |
241 | codes = countries.keys()\r | |
242 | codes.sort()\r | |
243 | for code in codes:\r | |
244 | print ' %2s:' % code, countries[code]\r | |
245 | elif parsefile:\r | |
246 | parse(parsefile, normalize)\r | |
247 | else:\r | |
248 | if not forcerev:\r | |
249 | args = filter(None, map(resolve, args))\r | |
250 | args = filter(None, map(reverse, args))\r | |
251 | for arg in args:\r | |
252 | print 'Where in the world is %s?' % arg\r | |
253 | \r | |
254 | \r | |
255 | \f\r | |
256 | # The mappings\r | |
257 | nameorgs = {\r | |
258 | # New top level domains as described by ICANN\r | |
259 | # http://www.icann.org/tlds/\r | |
260 | "aero": "air-transport industry",\r | |
261 | "arpa": "Arpanet",\r | |
262 | "biz": "business",\r | |
263 | "com": "commercial",\r | |
264 | "coop": "cooperatives",\r | |
265 | "edu": "educational",\r | |
266 | "gov": "government",\r | |
267 | "info": "unrestricted `info'",\r | |
268 | "int": "international",\r | |
269 | "mil": "military",\r | |
270 | "museum": "museums",\r | |
271 | "name": "`name' (for registration by individuals)",\r | |
272 | "net": "networking",\r | |
273 | "org": "non-commercial",\r | |
274 | "pro": "professionals",\r | |
275 | # These additional ccTLDs are included here even though they are not part\r | |
276 | # of ISO 3166. IANA has 5 reserved ccTLDs as described here:\r | |
277 | #\r | |
278 | # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html\r | |
279 | #\r | |
280 | # but I can't find an official list anywhere.\r | |
281 | #\r | |
282 | # Note that `uk' is the common practice country code for the United\r | |
283 | # Kingdom. AFAICT, the official `gb' code is routinely ignored!\r | |
284 | #\r | |
285 | # <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166\r | |
286 | # was adopted for top-level DNS zone names (although in the reverse order\r | |
287 | # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a\r | |
288 | # large-scale renaming process as the UK switched from their old `Coloured\r | |
289 | # Book' protocols over X.25 to Internet protocols over IP.\r | |
290 | #\r | |
291 | # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>\r | |
292 | #\r | |
293 | # Also, `su', while obsolete is still in limited use.\r | |
294 | "ac": "Ascension Island",\r | |
295 | "gg": "Guernsey",\r | |
296 | "im": "Isle of Man",\r | |
297 | "je": "Jersey",\r | |
298 | "uk": "United Kingdom (common practice)",\r | |
299 | "su": "Soviet Union (still in limited use)",\r | |
300 | }\r | |
301 | \r | |
302 | \r | |
303 | \f\r | |
304 | countries = {\r | |
305 | "af": "Afghanistan",\r | |
306 | "al": "Albania",\r | |
307 | "dz": "Algeria",\r | |
308 | "as": "American Samoa",\r | |
309 | "ad": "Andorra",\r | |
310 | "ao": "Angola",\r | |
311 | "ai": "Anguilla",\r | |
312 | "aq": "Antarctica",\r | |
313 | "ag": "Antigua and Barbuda",\r | |
314 | "ar": "Argentina",\r | |
315 | "am": "Armenia",\r | |
316 | "aw": "Aruba",\r | |
317 | "au": "Australia",\r | |
318 | "at": "Austria",\r | |
319 | "az": "Azerbaijan",\r | |
320 | "bs": "Bahamas",\r | |
321 | "bh": "Bahrain",\r | |
322 | "bd": "Bangladesh",\r | |
323 | "bb": "Barbados",\r | |
324 | "by": "Belarus",\r | |
325 | "be": "Belgium",\r | |
326 | "bz": "Belize",\r | |
327 | "bj": "Benin",\r | |
328 | "bm": "Bermuda",\r | |
329 | "bt": "Bhutan",\r | |
330 | "bo": "Bolivia",\r | |
331 | "ba": "Bosnia and Herzegowina",\r | |
332 | "bw": "Botswana",\r | |
333 | "bv": "Bouvet Island",\r | |
334 | "br": "Brazil",\r | |
335 | "io": "British Indian Ocean Territory",\r | |
336 | "bn": "Brunei Darussalam",\r | |
337 | "bg": "Bulgaria",\r | |
338 | "bf": "Burkina Faso",\r | |
339 | "bi": "Burundi",\r | |
340 | "kh": "Cambodia",\r | |
341 | "cm": "Cameroon",\r | |
342 | "ca": "Canada",\r | |
343 | "cv": "Cape Verde",\r | |
344 | "ky": "Cayman Islands",\r | |
345 | "cf": "Central African Republic",\r | |
346 | "td": "Chad",\r | |
347 | "cl": "Chile",\r | |
348 | "cn": "China",\r | |
349 | "cx": "Christmas Island",\r | |
350 | "cc": "Cocos (Keeling) Islands",\r | |
351 | "co": "Colombia",\r | |
352 | "km": "Comoros",\r | |
353 | "cg": "Congo",\r | |
354 | "cd": "Congo, The Democratic Republic of the",\r | |
355 | "ck": "Cook Islands",\r | |
356 | "cr": "Costa Rica",\r | |
357 | "ci": "Cote D'Ivoire",\r | |
358 | "hr": "Croatia",\r | |
359 | "cu": "Cuba",\r | |
360 | "cy": "Cyprus",\r | |
361 | "cz": "Czech Republic",\r | |
362 | "dk": "Denmark",\r | |
363 | "dj": "Djibouti",\r | |
364 | "dm": "Dominica",\r | |
365 | "do": "Dominican Republic",\r | |
366 | "tp": "East Timor",\r | |
367 | "ec": "Ecuador",\r | |
368 | "eg": "Egypt",\r | |
369 | "sv": "El Salvador",\r | |
370 | "gq": "Equatorial Guinea",\r | |
371 | "er": "Eritrea",\r | |
372 | "ee": "Estonia",\r | |
373 | "et": "Ethiopia",\r | |
374 | "fk": "Falkland Islands (Malvinas)",\r | |
375 | "fo": "Faroe Islands",\r | |
376 | "fj": "Fiji",\r | |
377 | "fi": "Finland",\r | |
378 | "fr": "France",\r | |
379 | "gf": "French Guiana",\r | |
380 | "pf": "French Polynesia",\r | |
381 | "tf": "French Southern Territories",\r | |
382 | "ga": "Gabon",\r | |
383 | "gm": "Gambia",\r | |
384 | "ge": "Georgia",\r | |
385 | "de": "Germany",\r | |
386 | "gh": "Ghana",\r | |
387 | "gi": "Gibraltar",\r | |
388 | "gr": "Greece",\r | |
389 | "gl": "Greenland",\r | |
390 | "gd": "Grenada",\r | |
391 | "gp": "Guadeloupe",\r | |
392 | "gu": "Guam",\r | |
393 | "gt": "Guatemala",\r | |
394 | "gn": "Guinea",\r | |
395 | "gw": "Guinea-Bissau",\r | |
396 | "gy": "Guyana",\r | |
397 | "ht": "Haiti",\r | |
398 | "hm": "Heard Island and Mcdonald Islands",\r | |
399 | "va": "Holy See (Vatican City State)",\r | |
400 | "hn": "Honduras",\r | |
401 | "hk": "Hong Kong",\r | |
402 | "hu": "Hungary",\r | |
403 | "is": "Iceland",\r | |
404 | "in": "India",\r | |
405 | "id": "Indonesia",\r | |
406 | "ir": "Iran, Islamic Republic of",\r | |
407 | "iq": "Iraq",\r | |
408 | "ie": "Ireland",\r | |
409 | "il": "Israel",\r | |
410 | "it": "Italy",\r | |
411 | "jm": "Jamaica",\r | |
412 | "jp": "Japan",\r | |
413 | "jo": "Jordan",\r | |
414 | "kz": "Kazakstan",\r | |
415 | "ke": "Kenya",\r | |
416 | "ki": "Kiribati",\r | |
417 | "kp": "Korea, Democratic People's Republic of",\r | |
418 | "kr": "Korea, Republic of",\r | |
419 | "kw": "Kuwait",\r | |
420 | "kg": "Kyrgyzstan",\r | |
421 | "la": "Lao People's Democratic Republic",\r | |
422 | "lv": "Latvia",\r | |
423 | "lb": "Lebanon",\r | |
424 | "ls": "Lesotho",\r | |
425 | "lr": "Liberia",\r | |
426 | "ly": "Libyan Arab Jamahiriya",\r | |
427 | "li": "Liechtenstein",\r | |
428 | "lt": "Lithuania",\r | |
429 | "lu": "Luxembourg",\r | |
430 | "mo": "Macau",\r | |
431 | "mk": "Macedonia, The Former Yugoslav Republic of",\r | |
432 | "mg": "Madagascar",\r | |
433 | "mw": "Malawi",\r | |
434 | "my": "Malaysia",\r | |
435 | "mv": "Maldives",\r | |
436 | "ml": "Mali",\r | |
437 | "mt": "Malta",\r | |
438 | "mh": "Marshall Islands",\r | |
439 | "mq": "Martinique",\r | |
440 | "mr": "Mauritania",\r | |
441 | "mu": "Mauritius",\r | |
442 | "yt": "Mayotte",\r | |
443 | "mx": "Mexico",\r | |
444 | "fm": "Micronesia, Federated States of",\r | |
445 | "md": "Moldova, Republic of",\r | |
446 | "mc": "Monaco",\r | |
447 | "mn": "Mongolia",\r | |
448 | "ms": "Montserrat",\r | |
449 | "ma": "Morocco",\r | |
450 | "mz": "Mozambique",\r | |
451 | "mm": "Myanmar",\r | |
452 | "na": "Namibia",\r | |
453 | "nr": "Nauru",\r | |
454 | "np": "Nepal",\r | |
455 | "nl": "Netherlands",\r | |
456 | "an": "Netherlands Antilles",\r | |
457 | "nc": "New Caledonia",\r | |
458 | "nz": "New Zealand",\r | |
459 | "ni": "Nicaragua",\r | |
460 | "ne": "Niger",\r | |
461 | "ng": "Nigeria",\r | |
462 | "nu": "Niue",\r | |
463 | "nf": "Norfolk Island",\r | |
464 | "mp": "Northern Mariana Islands",\r | |
465 | "no": "Norway",\r | |
466 | "om": "Oman",\r | |
467 | "pk": "Pakistan",\r | |
468 | "pw": "Palau",\r | |
469 | "ps": "Palestinian Territory, Occupied",\r | |
470 | "pa": "Panama",\r | |
471 | "pg": "Papua New Guinea",\r | |
472 | "py": "Paraguay",\r | |
473 | "pe": "Peru",\r | |
474 | "ph": "Philippines",\r | |
475 | "pn": "Pitcairn",\r | |
476 | "pl": "Poland",\r | |
477 | "pt": "Portugal",\r | |
478 | "pr": "Puerto Rico",\r | |
479 | "qa": "Qatar",\r | |
480 | "re": "Reunion",\r | |
481 | "ro": "Romania",\r | |
482 | "ru": "Russian Federation",\r | |
483 | "rw": "Rwanda",\r | |
484 | "sh": "Saint Helena",\r | |
485 | "kn": "Saint Kitts and Nevis",\r | |
486 | "lc": "Saint Lucia",\r | |
487 | "pm": "Saint Pierre and Miquelon",\r | |
488 | "vc": "Saint Vincent and the Grenadines",\r | |
489 | "ws": "Samoa",\r | |
490 | "sm": "San Marino",\r | |
491 | "st": "Sao Tome and Principe",\r | |
492 | "sa": "Saudi Arabia",\r | |
493 | "sn": "Senegal",\r | |
494 | "sc": "Seychelles",\r | |
495 | "sl": "Sierra Leone",\r | |
496 | "sg": "Singapore",\r | |
497 | "sk": "Slovakia",\r | |
498 | "si": "Slovenia",\r | |
499 | "sb": "Solomon Islands",\r | |
500 | "so": "Somalia",\r | |
501 | "za": "South Africa",\r | |
502 | "gs": "South Georgia and the South Sandwich Islands",\r | |
503 | "es": "Spain",\r | |
504 | "lk": "Sri Lanka",\r | |
505 | "sd": "Sudan",\r | |
506 | "sr": "Suriname",\r | |
507 | "sj": "Svalbard and Jan Mayen",\r | |
508 | "sz": "Swaziland",\r | |
509 | "se": "Sweden",\r | |
510 | "ch": "Switzerland",\r | |
511 | "sy": "Syrian Arab Republic",\r | |
512 | "tw": "Taiwan, Province of China",\r | |
513 | "tj": "Tajikistan",\r | |
514 | "tz": "Tanzania, United Republic of",\r | |
515 | "th": "Thailand",\r | |
516 | "tg": "Togo",\r | |
517 | "tk": "Tokelau",\r | |
518 | "to": "Tonga",\r | |
519 | "tt": "Trinidad and Tobago",\r | |
520 | "tn": "Tunisia",\r | |
521 | "tr": "Turkey",\r | |
522 | "tm": "Turkmenistan",\r | |
523 | "tc": "Turks and Caicos Islands",\r | |
524 | "tv": "Tuvalu",\r | |
525 | "ug": "Uganda",\r | |
526 | "ua": "Ukraine",\r | |
527 | "ae": "United Arab Emirates",\r | |
528 | "gb": "United Kingdom",\r | |
529 | "us": "United States",\r | |
530 | "um": "United States Minor Outlying Islands",\r | |
531 | "uy": "Uruguay",\r | |
532 | "uz": "Uzbekistan",\r | |
533 | "vu": "Vanuatu",\r | |
534 | "ve": "Venezuela",\r | |
535 | "vn": "Viet Nam",\r | |
536 | "vg": "Virgin Islands, British",\r | |
537 | "vi": "Virgin Islands, U.S.",\r | |
538 | "wf": "Wallis and Futuna",\r | |
539 | "eh": "Western Sahara",\r | |
540 | "ye": "Yemen",\r | |
541 | "yu": "Yugoslavia",\r | |
542 | "zm": "Zambia",\r | |
543 | "zw": "Zimbabwe",\r | |
544 | }\r | |
545 | \r | |
546 | all = nameorgs.copy()\r | |
547 | all.update(countries)\r | |
548 | \r | |
549 | \f\r | |
550 | if __name__ == '__main__':\r | |
551 | main()\r |