]>
Commit | Line | Data |
---|---|---|
b780ea8d SI |
1 | # SpamAssassin rules file: body tests |
2 | # | |
3 | # Please don't modify this file as your changes will be overwritten with | |
4 | # the next update. Use /etc/mail/spamassassin/local.cf instead. | |
5 | # See 'perldoc Mail::SpamAssassin::Conf' for details. | |
6 | # | |
7 | # Note: body tests are run with long lines, so be sure to limit the | |
8 | # size of searches; use /.{0,30}/ instead of /.*/ to avoid huge | |
9 | # search times. | |
10 | # | |
11 | # Note: If you are adding a rule which looks for a phrase in the body | |
12 | # (as most of them do), please add it to rules/20_phrases.cf instead. | |
13 | # | |
14 | # <@LICENSE> | |
15 | # Licensed to the Apache Software Foundation (ASF) under one or more | |
16 | # contributor license agreements. See the NOTICE file distributed with | |
17 | # this work for additional information regarding copyright ownership. | |
18 | # The ASF licenses this file to you under the Apache License, Version 2.0 | |
19 | # (the "License"); you may not use this file except in compliance with | |
20 | # the License. You may obtain a copy of the License at: | |
21 | # | |
22 | # http://www.apache.org/licenses/LICENSE-2.0 | |
23 | # | |
24 | # Unless required by applicable law or agreed to in writing, software | |
25 | # distributed under the License is distributed on an "AS IS" BASIS, | |
26 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
27 | # See the License for the specific language governing permissions and | |
28 | # limitations under the License. | |
29 | # </@LICENSE> | |
30 | # | |
31 | ########################################################################### | |
32 | ||
cabe596e | 33 | require_version 3.004006 |
b780ea8d SI |
34 | |
35 | ########################################################################### | |
36 | # GTUBE test - the generic test for UBE. | |
37 | body GTUBE /XJS\*C4JDBQADN1\.NSBN3\*2IDNEN\*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL\*C\.34X/ | |
38 | describe GTUBE Generic Test for Unsolicited Bulk Email | |
39 | tflags GTUBE userconf noautolearn | |
40 | ||
41 | ########################################################################### | |
42 | ||
43 | # this seems to be the new fashion (as of Jul 5 2002). base64-encoded | |
44 | # parts need to be stripped before this match | |
45 | body TRACKER_ID /^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\s*\z/is | |
46 | describe TRACKER_ID Incorporates a tracking ID number | |
47 | ||
48 | body WEIRD_QUOTING /[\042\223\224\262\263\271]{2}\S{0,16}[\042\223\224\262\263\271]{2}/ | |
49 | describe WEIRD_QUOTING Weird repeated double-quotation marks | |
50 | ||
51 | ########################################################################### | |
52 | # multipart/alternative has very good accuracy, other multipart types are | |
53 | # similar to MIME_HTML_ONLY so they don't need a separate rule | |
54 | header __CTYPE_MULTIPART_ALT Content-Type =~ /multipart\/alternative/i | |
55 | meta MIME_HTML_ONLY_MULTI (__CTYPE_MULTIPART_ALT && MIME_HTML_ONLY) | |
56 | describe MIME_HTML_ONLY_MULTI Multipart message only has text/html MIME parts | |
57 | ||
58 | # note: __HIGHBITS is used in rules/20_html_tests.cf, HTML_CHARSET_FARAWAY | |
59 | meta MIME_CHARSET_FARAWAY (__MIME_CHARSET_FARAWAY && __HIGHBITS) | |
60 | describe MIME_CHARSET_FARAWAY MIME character set indicates foreign language | |
61 | tflags MIME_CHARSET_FARAWAY userconf | |
62 | ||
63 | ########################################################################### | |
64 | ||
65 | # duncf | |
66 | body EMAIL_ROT13 /\b[a-z(\]-]+\^[a-z-]+\([a-z]{2,3}\b/ | |
67 | describe EMAIL_ROT13 Body contains a ROT13-encoded email address | |
68 | test EMAIL_ROT13 ok qhabs^ebtref(pbz | |
69 | test EMAIL_ROT13 ok zxrggyre^riv-vap(pbz | |
70 | test EMAIL_ROT13 fail duncf-nospam@rogers.com | |
71 | ||
72 | # this could use more work | |
73 | body __LONGWORDS_A /\b(?:[a-z]{8,}[\s\.]+){6}/ | |
74 | body __LONGWORDS_B /\b(?:[a-z]{6,}[\s\.]+){9}/ | |
75 | body __LONGWORDS_C /\b(?:[a-z]{5,}[\s\.]+){10}/ | |
76 | meta LONGWORDS (__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_C > 1) | |
77 | describe LONGWORDS Long string of long words | |
78 | ||
79 | ||
80 | ########################################################################### | |
81 | ||
82 | ifplugin Mail::SpamAssassin::Plugin::BodyEval | |
83 | ||
84 | ||
85 | # This rule uses a simple algorithm to determine if the text and html | |
86 | # parts of an multipart/alternative message are different. | |
87 | body MPART_ALT_DIFF eval:multipart_alternative_difference('99', '100') | |
88 | describe MPART_ALT_DIFF HTML and text parts are different | |
89 | ||
90 | body MPART_ALT_DIFF_COUNT eval:multipart_alternative_difference_count('3', '1') | |
91 | describe MPART_ALT_DIFF_COUNT HTML and text parts are different | |
92 | ||
93 | body BLANK_LINES_80_90 eval:check_blank_line_ratio('80','90','4') | |
94 | describe BLANK_LINES_80_90 Message body has 80-90% blank lines | |
95 | ||
96 | # it's the ratio of spaces to non-spaces in each paragraph. apparently | |
97 | # messages where generally there are lots of spaces mean the message is spam. | |
98 | # 8.532 10.6051 0.1897 0.982 0.75 0.01 T_VERTICAL_WORDS_TVD_1 | |
99 | # bug 6149: avoid common .jp false positives | |
100 | header __SUBJECT_UTF8_B_ENCODED Subject:raw =~ /=\?UTF-?8\?B\?/i | |
101 | body __TVD_SPACE_RATIO eval:tvd_vertical_words('0','10') | |
102 | meta TVD_SPACE_RATIO (__TVD_SPACE_RATIO && !__ISO_2022_JP_DELIM && !__SUBJECT_UTF8_B_ENCODED && !__HIGHBITS) | |
103 | ||
104 | endif | |
105 | ||
106 | ########################################################################### | |
107 | ||
108 | ifplugin Mail::SpamAssassin::Plugin::MIMEEval | |
109 | ||
110 | # 0.767 0.9097 0.0000 1.000 0.84 1.00 MULTIPART_ALT_NON_TEXT | |
111 | body MULTIPART_ALT_NON_TEXT eval:check_ma_non_text() | |
112 | ||
113 | body CHARSET_FARAWAY eval:check_for_faraway_charset() | |
114 | describe CHARSET_FARAWAY Character set indicates a foreign language | |
115 | tflags CHARSET_FARAWAY userconf | |
116 | ||
117 | # these tests doesn't actually use rawbody since rawbody isn't raw enough; | |
118 | # they must be written very carefully to avoid modifying the original content | |
119 | ||
120 | # MIME Content-Transfer-Encoding control rules | |
121 | rawbody __MIME_BASE64 eval:check_for_mime('mime_base64_count') | |
122 | describe __MIME_BASE64 Includes a base64 attachment | |
123 | ||
124 | rawbody __MIME_QP eval:check_for_mime('mime_qp_count') | |
125 | describe __MIME_QP Includes a quoted-printable attachment | |
126 | ||
127 | # No longer used in MIMEEval | |
128 | #rawbody MIME_BASE64_BLANKS eval:check_for_mime('mime_base64_blanks') | |
129 | #describe MIME_BASE64_BLANKS Extra blank lines in base64 encoding | |
130 | ||
131 | ||
132 | rawbody MIME_BASE64_TEXT eval:check_for_mime('mime_base64_encoded_text') | |
133 | describe MIME_BASE64_TEXT Message text disguised using base64 encoding | |
134 | ||
135 | ||
136 | body MISSING_MIME_HB_SEP eval:check_msg_parse_flags('missing_mime_head_body_separator') | |
137 | describe MISSING_MIME_HB_SEP Missing blank line between MIME header and body | |
138 | ||
139 | body MIME_HTML_MOSTLY eval:check_mime_multipart_ratio('0.00','0.01') | |
140 | describe MIME_HTML_MOSTLY Multipart message mostly text/html MIME | |
141 | ||
142 | # Steve Linford via Charlie Watts: good test! | |
143 | body MIME_HTML_ONLY eval:check_for_mime_html_only() | |
144 | describe MIME_HTML_ONLY Message only has text/html MIME parts | |
145 | ||
146 | rawbody MIME_QP_LONG_LINE eval:check_for_mime('mime_qp_long_line') | |
147 | describe MIME_QP_LONG_LINE Quoted-printable line longer than 76 chars | |
148 | ||
149 | rawbody __MIME_CHARSET_FARAWAY eval:check_for_mime('mime_faraway_charset') | |
150 | ||
151 | body MIME_BAD_ISO_CHARSET eval:check_for_mime('mime_bad_iso_charset') | |
152 | describe MIME_BAD_ISO_CHARSET MIME character set is an unknown ISO charset | |
153 | ||
154 | body MIMEPART_LIMIT_EXCEEDED eval:check_for_mime('mimepart_limit_exceeded') | |
155 | describe MIMEPART_LIMIT_EXCEEDED Message has too many MIME parts | |
156 | ||
157 | endif | |
158 | ||
159 | ########################################################################### | |
160 | ||
161 | ifplugin Mail::SpamAssassin::Plugin::URIEval | |
162 | ||
163 | body HTTPS_IP_MISMATCH eval:check_https_ip_mismatch() | |
164 | describe HTTPS_IP_MISMATCH IP to HTTPS link found in HTML | |
165 | ||
166 | body URI_TRUNCATED eval:check_uri_truncated() | |
167 | describe URI_TRUNCATED Message contained a URI which was truncated | |
168 | ||
169 | endif |