]>
Commit | Line | Data |
---|---|---|
b780ea8d SI |
1 | # SpamAssassin rules file: HTML tests |
2 | # | |
3 | # Please don't modify this file as your changes will be overwritten with | |
4 | # the next update. Use /etc/mail/spamassassin/local.cf instead. | |
5 | # See 'perldoc Mail::SpamAssassin::Conf' for details. | |
6 | # | |
7 | # <@LICENSE> | |
8 | # Licensed to the Apache Software Foundation (ASF) under one or more | |
9 | # contributor license agreements. See the NOTICE file distributed with | |
10 | # this work for additional information regarding copyright ownership. | |
11 | # The ASF licenses this file to you under the Apache License, Version 2.0 | |
12 | # (the "License"); you may not use this file except in compliance with | |
13 | # the License. You may obtain a copy of the License at: | |
14 | # | |
15 | # http://www.apache.org/licenses/LICENSE-2.0 | |
16 | # | |
17 | # Unless required by applicable law or agreed to in writing, software | |
18 | # distributed under the License is distributed on an "AS IS" BASIS, | |
19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
20 | # See the License for the specific language governing permissions and | |
21 | # limitations under the License. | |
22 | # </@LICENSE> | |
23 | # | |
24 | ########################################################################### | |
25 | ||
cabe596e | 26 | require_version 3.004006 |
b780ea8d SI |
27 | |
28 | # HTML parser tests | |
29 | # | |
30 | # please sort these by eval type then name | |
31 | ||
32 | meta HTML_SHORT_LINK_IMG_1 __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE | |
33 | meta HTML_SHORT_LINK_IMG_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE | |
34 | meta HTML_SHORT_LINK_IMG_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE | |
35 | describe HTML_SHORT_LINK_IMG_1 HTML is very short with a linked image | |
36 | describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image | |
37 | describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image | |
38 | ||
39 | ||
40 | meta HTML_SHORT_CENTER (__HTML_LENGTH_384 && __TAG_EXISTS_CENTER) | |
41 | describe HTML_SHORT_CENTER HTML is very short with CENTER tag | |
42 | ||
43 | ||
44 | meta HTML_TITLE_SUBJ_DIFF __HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT | |
45 | ||
46 | meta HTML_CHARSET_FARAWAY (__HTML_CHARSET_FARAWAY && __HIGHBITS) | |
47 | describe HTML_CHARSET_FARAWAY A foreign language charset used in HTML markup | |
48 | tflags HTML_CHARSET_FARAWAY userconf | |
49 | ||
50 | meta HTML_MIME_NO_HTML_TAG MIME_HTML_ONLY && !__TAG_EXISTS_HTML | |
51 | describe HTML_MIME_NO_HTML_TAG HTML-only message, but there is no HTML tag | |
52 | ||
53 | meta HTML_MISSING_CTYPE (!__MIME_HTML && HTML_MESSAGE) | |
54 | describe HTML_MISSING_CTYPE Message is HTML without HTML Content-Type | |
55 | ||
56 | ########################################################################### | |
57 | # rawbody HTML tests | |
58 | ||
59 | rawbody HIDE_WIN_STATUS /<[^>]{1,1000}onMouseOver=[^>]{1,1000}window\.status=/i | |
60 | describe HIDE_WIN_STATUS Javascript to hide URLs in browser | |
61 | ||
62 | rawbody __OBFUSCATING_COMMENT_A /\w(?:<![^>]*>)+\w/ | |
63 | rawbody __OBFUSCATING_COMMENT_B /[^\s>](?:<![^>]*>)+[^\s<]/ | |
64 | ifplugin Mail::SpamAssassin::Plugin::HTMLEval | |
65 | ifplugin Mail::SpamAssassin::Plugin::MIMEEval | |
66 | meta OBFUSCATING_COMMENT ((__OBFUSCATING_COMMENT_A && HTML_MESSAGE) || (__OBFUSCATING_COMMENT_B && MIME_HTML_ONLY)) && !__ISO_2022_JP_DELIM | |
67 | describe OBFUSCATING_COMMENT HTML comments which obfuscate text | |
68 | endif | |
69 | endif | |
70 | ||
71 | # spams that are assembled from a Javascript array | |
72 | # look for the XOR op | |
73 | rawbody __JS_FROMCHARCODE /String\.fromCharCode\s*\(\s*\S+\s*\[\s*\S+\s*\]\s*\^/ | |
74 | rawbody __JS_DOCWRITE /document\.write/ | |
75 | meta JS_FROMCHARCODE (__JS_FROMCHARCODE && __JS_DOCWRITE) | |
76 | describe JS_FROMCHARCODE Document is built from a Javascript charcode array | |
77 | ||
78 | # a good possible rule that may resurface | |
79 | # ! $ % ' ( ) , - . / : ; = ? @ _ | |
80 | #rawbody ENTITY_DEC_OTHER /\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/ | |
81 | #describe ENTITY_DEC_OTHER HTML contains needlessly encoded punctuation | |
82 | ||
83 | body __HIGHBITS /(?:[\x80-\xff].?){4}/ | |
84 | # note: __HIGHBITS is used by HTML_CHARSET_FARAWAY | |
85 | ||
86 | ########################################################################### | |
87 | ||
88 | ifplugin Mail::SpamAssassin::Plugin::HTMLEval | |
89 | ||
90 | # HTML control test, HTML spam rules should all have better S/O than this | |
91 | body HTML_MESSAGE eval:html_test('html') | |
92 | describe HTML_MESSAGE HTML included in message | |
93 | ||
94 | # HTML comment tests | |
95 | body HTML_COMMENT_SHORT eval:html_text_match('comment', '<!(?!-).{0,6}>') | |
96 | describe HTML_COMMENT_SHORT HTML comment is very short | |
97 | ||
98 | body HTML_COMMENT_SAVED_URL eval:html_text_match('comment', '<!-- saved from url=\(\d{4}\)') | |
99 | describe HTML_COMMENT_SAVED_URL HTML message is a saved web page | |
100 | ||
101 | body HTML_EMBEDS eval:html_test('embeds') | |
102 | describe HTML_EMBEDS HTML with embedded plugin object | |
103 | ||
104 | ||
105 | body HTML_EXTRA_CLOSE eval:html_range('closed_extra_ratio', '0.09', 'inf') | |
106 | describe HTML_EXTRA_CLOSE HTML contains far too many close tags | |
107 | ||
108 | ||
109 | ||
110 | body HTML_FONT_SIZE_LARGE eval:html_range('max_size', '5', '6') | |
111 | describe HTML_FONT_SIZE_LARGE HTML font size is large | |
112 | ||
113 | body HTML_FONT_SIZE_HUGE eval:html_range('max_size', '6', 'inf') | |
114 | describe HTML_FONT_SIZE_HUGE HTML font size is huge | |
115 | ||
116 | ||
117 | ||
118 | ||
119 | body HTML_FONT_LOW_CONTRAST eval:html_test('font_low_contrast') | |
120 | describe HTML_FONT_LOW_CONTRAST HTML font color similar or identical to background | |
121 | ||
122 | body HTML_FONT_FACE_BAD eval:html_test('font_face_bad') | |
123 | describe HTML_FONT_FACE_BAD HTML font face is not a word | |
124 | ||
125 | ||
126 | body HTML_FORMACTION_MAILTO eval:html_test('form_action_mailto') | |
127 | describe HTML_FORMACTION_MAILTO HTML includes a form which sends mail | |
128 | ||
129 | # HTML_IMAGE_ONLY - not much raw HTML with images (absolute) | |
130 | body HTML_IMAGE_ONLY_04 eval:html_image_only('0000','0400') | |
131 | body HTML_IMAGE_ONLY_08 eval:html_image_only('0400','0800') | |
132 | body HTML_IMAGE_ONLY_12 eval:html_image_only('0800','1200') | |
133 | body HTML_IMAGE_ONLY_16 eval:html_image_only('1200','1600') | |
134 | body HTML_IMAGE_ONLY_20 eval:html_image_only('1600','2000') | |
135 | body HTML_IMAGE_ONLY_24 eval:html_image_only('2000','2400') | |
136 | body HTML_IMAGE_ONLY_28 eval:html_image_only('2400','2800') | |
137 | body HTML_IMAGE_ONLY_32 eval:html_image_only('2800','3200') | |
138 | describe HTML_IMAGE_ONLY_04 HTML: images with 0-400 bytes of words | |
139 | describe HTML_IMAGE_ONLY_08 HTML: images with 400-800 bytes of words | |
140 | describe HTML_IMAGE_ONLY_12 HTML: images with 800-1200 bytes of words | |
141 | describe HTML_IMAGE_ONLY_16 HTML: images with 1200-1600 bytes of words | |
142 | describe HTML_IMAGE_ONLY_20 HTML: images with 1600-2000 bytes of words | |
143 | describe HTML_IMAGE_ONLY_24 HTML: images with 2000-2400 bytes of words | |
144 | describe HTML_IMAGE_ONLY_28 HTML: images with 2400-2800 bytes of words | |
145 | describe HTML_IMAGE_ONLY_32 HTML: images with 2800-3200 bytes of words | |
146 | ||
147 | # HTML_IMAGE_RATIO - more image area than text (ratio) | |
148 | body HTML_IMAGE_RATIO_02 eval:html_image_ratio('0.000','0.002') | |
149 | body HTML_IMAGE_RATIO_04 eval:html_image_ratio('0.002','0.004') | |
150 | body HTML_IMAGE_RATIO_06 eval:html_image_ratio('0.004','0.006') | |
151 | body HTML_IMAGE_RATIO_08 eval:html_image_ratio('0.006','0.008') | |
152 | describe HTML_IMAGE_RATIO_02 HTML has a low ratio of text to image area | |
153 | describe HTML_IMAGE_RATIO_04 HTML has a low ratio of text to image area | |
154 | describe HTML_IMAGE_RATIO_06 HTML has a low ratio of text to image area | |
155 | describe HTML_IMAGE_RATIO_08 HTML has a low ratio of text to image area | |
156 | ||
157 | # HTML obfuscation | |
158 | body HTML_OBFUSCATE_05_10 eval:html_range('obfuscation_ratio','.05','.1') | |
159 | body HTML_OBFUSCATE_10_20 eval:html_range('obfuscation_ratio','.1','.2') | |
160 | body HTML_OBFUSCATE_20_30 eval:html_range('obfuscation_ratio','.2','.3') | |
161 | body HTML_OBFUSCATE_30_40 eval:html_range('obfuscation_ratio','.3','.4') | |
162 | body HTML_OBFUSCATE_50_60 eval:html_range('obfuscation_ratio','.5','.6') | |
163 | body HTML_OBFUSCATE_70_80 eval:html_range('obfuscation_ratio','.7','.8') | |
164 | body HTML_OBFUSCATE_90_100 eval:html_range('obfuscation_ratio','.9','1.0') | |
165 | describe HTML_OBFUSCATE_05_10 Message is 5% to 10% HTML obfuscation | |
166 | describe HTML_OBFUSCATE_10_20 Message is 10% to 20% HTML obfuscation | |
167 | describe HTML_OBFUSCATE_20_30 Message is 20% to 30% HTML obfuscation | |
168 | describe HTML_OBFUSCATE_30_40 Message is 30% to 40% HTML obfuscation | |
169 | describe HTML_OBFUSCATE_50_60 Message is 50% to 60% HTML obfuscation | |
170 | describe HTML_OBFUSCATE_70_80 Message is 70% to 80% HTML obfuscation | |
171 | describe HTML_OBFUSCATE_90_100 Message is 90% to 100% HTML obfuscation | |
172 | ||
173 | body HTML_TAG_BALANCE_BODY eval:html_tag_balance('body', '!= 0') | |
174 | describe HTML_TAG_BALANCE_BODY HTML has unbalanced "body" tags | |
175 | ||
176 | body HTML_TAG_BALANCE_HEAD eval:html_tag_balance('head', '!= 0') | |
177 | describe HTML_TAG_BALANCE_HEAD HTML has unbalanced "head" tags | |
178 | ||
179 | body HTML_TAG_EXIST_BGSOUND eval:html_tag_exists('bgsound') | |
180 | describe HTML_TAG_EXIST_BGSOUND HTML has "bgsound" tag | |
181 | ||
182 | # percentage of tags that are not legal elements in HTML | |
183 | body HTML_BADTAG_40_50 eval:html_range('bad_tag_ratio','0.40','0.50') | |
184 | body HTML_BADTAG_50_60 eval:html_range('bad_tag_ratio','0.50','0.60') | |
185 | body HTML_BADTAG_60_70 eval:html_range('bad_tag_ratio','0.60','0.70') | |
186 | body HTML_BADTAG_90_100 eval:html_range('bad_tag_ratio','0.90','1.00') | |
187 | describe HTML_BADTAG_40_50 HTML message is 40% to 50% bad tags | |
188 | describe HTML_BADTAG_50_60 HTML message is 50% to 60% bad tags | |
189 | describe HTML_BADTAG_60_70 HTML message is 60% to 70% bad tags | |
190 | describe HTML_BADTAG_90_100 HTML message is 90% to 100% bad tags | |
191 | ||
192 | # percentage of unique non-elements in HTML | |
193 | body HTML_NONELEMENT_30_40 eval:html_range('non_element_ratio','0.30','0.40') | |
194 | body HTML_NONELEMENT_40_50 eval:html_range('non_element_ratio','0.40','0.50') | |
195 | body HTML_NONELEMENT_60_70 eval:html_range('non_element_ratio','0.60','0.70') | |
196 | body HTML_NONELEMENT_80_90 eval:html_range('non_element_ratio','0.80','0.90') | |
197 | describe HTML_NONELEMENT_30_40 30% to 40% of HTML elements are non-standard | |
198 | describe HTML_NONELEMENT_40_50 40% to 50% of HTML elements are non-standard | |
199 | describe HTML_NONELEMENT_60_70 60% to 70% of HTML elements are non-standard | |
200 | describe HTML_NONELEMENT_80_90 80% to 90% of HTML elements are non-standard | |
201 | ||
202 | # short HTML messages with certain attributes | |
203 | body __HTML_LINK_IMAGE eval:html_text_match('anchor', '<img>') | |
204 | body __HTML_LENGTH_0000_1024 eval:html_range('length', '0', '1024') | |
205 | body __HTML_LENGTH_1024_1536 eval:html_range('length', '1024', '1536') | |
206 | body __HTML_LENGTH_1536_2048 eval:html_range('length', '1536', '2048') | |
207 | ||
208 | body __HTML_LENGTH_512 eval:html_eval('length', '< 512') | |
209 | body __COMMENT_EXISTS eval:html_text_match('comment', '<!.*?>') | |
210 | ||
211 | body __HTML_LENGTH_384 eval:html_eval('length', '< 384') | |
212 | body __TAG_EXISTS_CENTER eval:html_tag_exists('center') | |
213 | ||
214 | body __HTML_TITLE_120 eval:html_text_match('title', '.{120}') | |
215 | ||
216 | body __HTML_TITLE_SUBJ_DIFF eval:html_title_subject_ratio('3.5') | |
217 | ||
218 | ||
219 | body __HTML_CHARSET_FARAWAY eval:html_charset_faraway() | |
220 | ||
221 | body HTML_IFRAME_SRC eval:check_iframe_src() | |
222 | describe HTML_IFRAME_SRC Message has HTML IFRAME tag with SRC URI | |
223 | ||
224 | else | |
225 | ||
226 | meta __COMMENT_EXISTS 0 | |
227 | meta __TAG_EXISTS_CENTER 0 | |
228 | ||
229 | endif | |
230 | ||
231 | ########################################################################### | |
232 | ||
233 | ifplugin Mail::SpamAssassin::Plugin::MIMEEval | |
234 | ||
235 | # __MIME_ATTACHMENT also used in 20_meta_tests.cf | |
236 | body __MIME_ATTACHMENT eval:check_for_mime('mime_attachment') | |
237 | ||
238 | endif |