]> git.proxmox.com Git - proxmox-spamassassin.git/blob - sa-updates/20_uri_tests.cf
bump version to 3.4.6-2+1
[proxmox-spamassassin.git] / sa-updates / 20_uri_tests.cf
1 # SpamAssassin rules file: URI tests
2 #
3 # Please don't modify this file as your changes will be overwritten with
4 # the next update. Use /etc/mail/spamassassin/local.cf instead.
5 # See 'perldoc Mail::SpamAssassin::Conf' for details.
6 #
7 # <@LICENSE>
8 # Licensed to the Apache Software Foundation (ASF) under one or more
9 # contributor license agreements. See the NOTICE file distributed with
10 # this work for additional information regarding copyright ownership.
11 # The ASF licenses this file to you under the Apache License, Version 2.0
12 # (the "License"); you may not use this file except in compliance with
13 # the License. You may obtain a copy of the License at:
14 #
15 # http://www.apache.org/licenses/LICENSE-2.0
16 #
17 # Unless required by applicable law or agreed to in writing, software
18 # distributed under the License is distributed on an "AS IS" BASIS,
19 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 # See the License for the specific language governing permissions and
21 # limitations under the License.
22 # </@LICENSE>
23 #
24 ###########################################################################
25
26 require_version 3.004006
27
28 # possible IDN spoofing attack: https://web.archive.org/web/20141006091906/https://www.shmoo.com/idn/homograph.txt
29 # not expecting any hits on this (yet)
30 uri HIGH_CODEPAGE_URI /^https?:\/\/[^\/]*\&\#(?:\d{4,}|[3456789]\d\d);/i
31 tflags HIGH_CODEPAGE_URI userconf
32
33 ###########################################################################
34
35 # Redirector URI patterns
36 redirector_pattern /^http:\/\/chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i
37 redirector_pattern /^http:\/\/www(?:\d+)?\.nate\.com\/r\/\w+\/(.*)$/i
38 redirector_pattern /^http:\/\/.+\.gov\/(?:.*\/)?externalLink\.jhtml\?.*url=(.*?)(?:&.*)?$/i
39 redirector_pattern /^http:\/\/redir\.internet\.com\/.+?\/.+?\/(.*)$/i
40 redirector_pattern /^http:\/\/(?:.*?\.)?adtech\.de\/.*(?:;|\|)link=(.*?)(?:;|$)/i
41 redirector_pattern m'^http.*?/redirect\.php\?.*(?<=[?&])goto=(.*?)(?:$|[&\#])'i
42 redirector_pattern m'^https?:/*(?:[^/]+\.)?emf\d\.com/r\.cfm.*?&r=(.*)'i
43
44 uri NUMERIC_HTTP_ADDR m{^https?://[\d.]+(?:[:/?\#]|$)}i
45 describe NUMERIC_HTTP_ADDR Uses a numeric IP address in URL
46
47 # Theo sez:
48 # Have gotten FPs off this, and whitespace can't be in the host, so...
49 # % Visit my homepage: http://i.like.foo.com %
50 # Also ignore some bad parses like http://foo.bar%20http://foo.bar
51 uri HTTP_ESCAPED_HOST /^https?\:\/\/[^\/\s\?\&\#\']*(?!%(?:20|3[cCeE])(?:https?:|mailto:))%[0-9a-fA-F][0-9a-fA-F]/
52 describe HTTP_ESCAPED_HOST Uses %-escapes inside a URL's hostname
53
54 # look for URI with escaped 0-9, A-Z, or a-z characters (all other safe
55 # characters have been well-tested, but are sometimes unnecessarily escaped
56 # in nonspam; requiring "http" or "https" also reduces false positives).
57 uri HTTP_EXCESSIVE_ESCAPES /^https?:\/\/\S*%(?:3\d|[46][1-9a-f]|[57][\da])/i
58 describe HTTP_EXCESSIVE_ESCAPES Completely unnecessary %-escapes inside a URL
59
60 # bug 1801
61 uri IP_LINK_PLUS m{^https?://\d+\.\d+\.\d+\.\d+.{0,20}(?:cgi|click|ads|id=)}i
62 describe IP_LINK_PLUS Dotted-decimal IP address followed by CGI
63
64 # allow ports 80 and 443 which are http and https, respectively
65 # we don't want to hit http://www.cnn.com:USArticle1840@www.liquidshirts.com/
66 # though, which actually doesn't have a weird port in it.
67 uri WEIRD_PORT m{https?://[^/?\s]+?:\d+(?<!:80)(?<!:443)(?<!:8080)(?:/|\s|$)}
68 describe WEIRD_PORT Uses non-standard port number for HTTP
69
70 # Matt Cline
71 # Pretty good for most folks, except for jm: I have a really stupid
72 # e-commerce bunch obfuscating their URLs with this for some reason. screw 'em
73 # jm: hesitant to remove this outright; it should be good against phishers
74 #uri HTTP_ENTITIES_HOST m{https?://[^\s\">/]*\&\#[\da-f]+}i
75 #describe HTTP_ENTITIES_HOST URI obscured with character entities
76
77 uri YAHOO_RD_REDIR m{^https?\://rd\.yahoo\.com/(?:[0-9]{4}|partner\b|dir\b)}i
78 describe YAHOO_RD_REDIR Has Yahoo Redirect URI
79
80 uri YAHOO_DRS_REDIR m{^https?://drs\.yahoo\.com/}i
81 describe YAHOO_DRS_REDIR Has Yahoo Redirect URI
82
83 # "www" hidden as "%77%77%77", "ww%77", etc.
84 # note: *not* anchored to start of string, to catch use of redirectors
85 uri HTTP_77 /http:\/\/.{0,2}\%77/
86 describe HTTP_77 Contains an URL-encoded hostname (HTTP77)
87
88 # a.com.b.c
89 uri SPOOF_COM2OTH m{^https?://(?:\w+\.)+?com\.(?!(?:[a-z]{2}\.)?s3\.amazonaws\.com|\w+\.psmtp\.com)(?:\w+\.){2}}i
90 describe SPOOF_COM2OTH URI contains ".com" in middle
91
92 # a.com.b.com
93 uri __SPOOF_COM2COM m{^https?://(?:\w+\.)+?com\.(?!(?:[a-z]{2}\.)?s3\.amazonaws\.com|\w+\.psmtp\.com)(?:\w+\.)+?com\b}i
94 meta SPOOF_COM2COM __SPOOF_COM2COM && !SPOOF_COM2OTH
95 describe SPOOF_COM2COM URI contains ".com" in middle and end
96
97 # a.net.b.com
98 uri SPOOF_NET2COM m{^https?://(?:\w+\.)+?(?:net|org)\.(?!(?:[a-z]{2}\.)?s3\.amazonaws\.com)(?:\w+\.)+?com\b}i
99 describe SPOOF_NET2COM URI contains ".net" or ".org", then ".com"
100
101 uri URI_HEX m%^https?://[^/?&\#]*\b(?![0-9a-f]{0,12}[a-f]{3})[0-9a-f]{6,}\b%i
102 describe URI_HEX URI hostname has long hexadecimal sequence
103
104 uri URI_NOVOWEL m%^https?://[^/?&\#]*[bcdfgjklmnpqrstvwxz]{7}%i
105 describe URI_NOVOWEL URI hostname has long non-vowel sequence
106 tflags URI_NOVOWEL userconf # lock scores low
107
108 uri URI_UNSUBSCRIBE /\b(?:gone|opened|out)\.php/i
109 describe URI_UNSUBSCRIBE URI contains suspicious unsubscribe link
110
111
112 # bug 3896: URIs in various TLDs, other than 3rd level www
113 uri URI_NO_WWW_INFO_CGI /^(?:https?:\/\/)?[^\/]+(?<!\/www)\.[^.]{7,}\.info\/(?=\S{15,})\S*\?/i
114 describe URI_NO_WWW_INFO_CGI CGI in .info TLD other than third-level "www"
115
116 uri URI_NO_WWW_BIZ_CGI /^(?:https?:\/\/)?[^\/]+(?<!\/www)\.[^.]{7,}\.biz\/(?=\S{15,})\S*\?/i
117 describe URI_NO_WWW_BIZ_CGI CGI in .biz TLD other than third-level "www"
118
119 ###########################################################################
120
121 uri NORMAL_HTTP_TO_IP m{^https?://(?!1(?:0|27|69\.254|72\.(?:1[6-9]|2\d|3[01])|92\.168)\.)\d+\.\d+\.\d+\.\d+\b(?![.-])}i
122 describe NORMAL_HTTP_TO_IP URI host has a public dotted-decimal IPv4 address
123