]>
Commit | Line | Data |
---|---|---|
b780ea8d SI |
1 | # SpamAssassin rules file: Pdfinfo rules |
2 | # | |
3 | # Please don't modify this file as your changes will be overwritten with | |
4 | # the next update. Use /etc/mail/spamassassin/local.cf instead. | |
5 | # See 'perldoc Mail::SpamAssassin::Conf' for details. | |
6 | # | |
7 | # <@LICENSE> | |
8 | # Licensed to the Apache Software Foundation (ASF) under one or more | |
9 | # contributor license agreements. See the NOTICE file distributed with | |
10 | # this work for additional information regarding copyright ownership. | |
11 | # The ASF licenses this file to you under the Apache License, Version 2.0 | |
12 | # (the "License"); you may not use this file except in compliance with | |
13 | # the License. You may obtain a copy of the License at: | |
14 | # | |
15 | # http://www.apache.org/licenses/LICENSE-2.0 | |
16 | # | |
17 | # Unless required by applicable law or agreed to in writing, software | |
18 | # distributed under the License is distributed on an "AS IS" BASIS, | |
19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
20 | # See the License for the specific language governing permissions and | |
21 | # limitations under the License. | |
22 | # </@LICENSE> | |
23 | # | |
24 | ########################################################################### | |
25 | ||
26 | # 2014-12-02 - axb | |
27 | # Info and disabled rules kept for historical & documentation reasons | |
28 | # Updated rules may be added | |
29 | # | |
30 | # Original File: pdfinfo.cf | |
31 | # Original Version: 0.6 | |
32 | # Info: $Id: pdfinfo.cf 895 2007-07-27 10:31:08Z alexb $ | |
33 | # Created: 2007-06-25 | |
34 | # Modified: 2007-07-19 | |
35 | # Original / Defunct Site URL: http://www.rulesemporium.com/plugins.htm#PDFinfo | |
36 | # Author: Dallas Engelken (aka GMD :-) | |
37 | # Rules contributed by Alex Broens | |
38 | # Requires: PDFInfo.pm plugin | |
39 | # Description: This plugin/ruleset combination will help you alleviate the new | |
40 | # PDF based stock spam which began to appear mid-June, 2007. | |
41 | # | |
42 | # | |
43 | # Changes: | |
44 | # | |
45 | # 0.6 - added easypdf producer rule and more no body text metas | |
46 | # - tags support added, see USING TAGS below. | |
47 | # 0.5 - added fuzzy test 7 | |
48 | # 0.4 - added new fuzzy for encyprted pdf image spams. | |
49 | # - added rule to check for encryption | |
50 | # 0.3 - added rules based on the new pdf_match_details() function | |
51 | # - added additional fuzzy md5 rules | |
52 | # - disabled static md5 rules as they are no longer hitting. | |
53 | # 0.2 - added static md5 to hit full page stock spam. | |
54 | # 0.1 - initial ruleset. | |
55 | # | |
56 | ||
57 | ############################################ | |
58 | # USING TAGS | |
59 | ############################################ | |
60 | ||
61 | # The follow tags can be defined in an add_header line | |
62 | # | |
63 | # _PDFCOUNT_ - total number of pdf mime parts in the email | |
64 | # _PDFIMGCOUNT_ - total number of images found inside pdf mime parts | |
65 | # _PDFVERSION_ - PDF Version, space seperated if there are > 1 pdf attachments | |
66 | # _PDFNAME_ - Filenames as found in the mime headers of PDF parts | |
67 | # _PDFPRODUCER_ - Producer/Application that created the PDF(s) | |
68 | # _PDFAUTHOR_ - Author of the PDF | |
69 | # _PDFCREATOR_ - Creator/Program that created the PDF(s) | |
70 | # _PDFTITLE_ - Title of the PDF File, if available | |
71 | # _PDFIMGDIM_ - If PDF Contains images, the dimensions of them will be put here | |
72 | # _PDFIMGAREA_ - The total area of all combined images inside the PDF(s) | |
73 | # _PDFMD5_ - MD5 checksum of PDF(s) - space seperated | |
74 | # _PDFMD5FUZZY1_- Fuzzy1 MD5 checksum of PDF(s) - space seperated | |
75 | # _PDFMD5FUZZY2_- Fuzzy2 MD5 checksum of PDF(s) - space seperated | |
76 | # | |
77 | # Example add_header lines | |
78 | # | |
79 | # add_header all PDF-Info pdf=_PDFCOUNT_, pdfimg=_PDFIMGCOUNT_, ver=_PDFVERSION_, name=_PDFNAME_ | |
80 | # add_header all PDF-Details producer=_PDFPRODUCER_, author=_PDFAUTHOR_, creator=_PDFCREATOR_, title=_PDFTITLE_ | |
81 | # add_header all PDF-ImageInfo dim=_PDFIMGDIM_, area=_PDFIMGAREA_ | |
82 | # add_header all PDF-Md5 md5=_PDFMD5_, fuzzy1=_PDFMD5FUZZY1_, fuzzy2=_PDFMD5FUZZY2_ | |
83 | # | |
84 | ||
85 | ############################################ | |
86 | # GENERIC RULE EXAMPLES SHOWING EVAL USAGE | |
87 | ############################################ | |
88 | ||
89 | # you can match by name | |
90 | # body MY_TEST_PDF eval:pdf_named('mytest.pdf') | |
91 | ||
92 | # or you can write a regex to match dynamic file names. | |
93 | # body MY_TEST_PDF eval:pdf_name_regex('/^(?:my|your)test\.pdf$/') | |
94 | ||
95 | # you can make it case insensitive by using modifiers | |
96 | # body PDF_IMGXXXXX eval:pdf_name_regex('/^IMG\D+\.\.PDF$/i') | |
97 | ||
98 | # you can do exact image size matches | |
99 | # body PDF_DEMS_150_400 eval:pdf_image_size_exact(150,400) | |
100 | ||
101 | # you can do image to text, or image to html ratios | |
102 | # rawbody PDF_TO_HTML_RATIO eval:pdf_image_to_text_ratio(0.000, 0.015) | |
103 | # body PDF_TO_TEXT_RATIO eval:pdf_image_to_text_ratio(0.000, 0.008) | |
104 | ||
105 | # you can do minimum demension matches | |
106 | # body PDF_SIZE_RANGE_1 eval:pdf_image_size_range(300,300) | |
107 | ||
108 | # you can do ranged demension matches | |
109 | # body PDF_SIZE_RANGE_2 eval:pdf_image_size_range(200, 300, 250, 350) | |
110 | ||
111 | # you can count the number of pdf mime partts | |
112 | # body PDF_MIME_COUNT_1 eval:pdf_count(1,1) | |
113 | # body PDF_MIME_COUNT_2_PLUS eval:pdf_count(2) | |
114 | ||
115 | # you can count the number of images inside the pdfs | |
116 | # body PDF_IMG_COUNT_1 eval:pdf_image_count(1,1) | |
117 | # body PDF_IMG_COUNT_2_PLUS eval:pdf_image_count(2) | |
118 | ||
119 | # you can determine pixel coverage | |
120 | # body PDF_AREA_SMALL eval:pdf_pixel_coverage(1,100000) | |
121 | ||
122 | ||
123 | # match a md5 or fuzzy md5 signature of the pdf | |
124 | ||
125 | # body PDF_BAD_MD5 eval:pdf_match_md5('C359F8F89B290DA99DC997ED50117CDF') | |
126 | # body PDF_BAD_FUZZY eval:pdf_match_fuzzy_md5('7340821445D975EEF6F5BDE2EC257900') | |
127 | ||
128 | # Now you can match against certain details if they are found in the PDF. | |
129 | # A regex match is used on the value specified, so if you want to do an | |
130 | # exact match, use anchors ^value$ | |
131 | # | |
132 | # body GMD_AUTHOR_MOBILE eval:pdf_match_details('author','/^mobile$/') | |
133 | # body GMD_PRODUCER_GPL eval:pdf_match_details('producer','/(?i)^gpl ghostscript/') | |
134 | # body GMD_CREATOR_PSCRIPT5 eval:pdf_match_details('creator','/^PScript5/') | |
135 | # body GMD_TITLE_WORD_DOC1 eval:pdf_match_details('title','/^Microsoft Word \- Document1$/) | |
136 | # body GMD_CREATED_JULY07 eval:pdf_match_details('created','/^200707/') | |
137 | # body GMD_MODIFIED_JULY07 eval:pdf_match_details('modified','/^200707/') | |
138 | ||
139 | ifplugin Mail::SpamAssassin::Plugin::PDFInfo | |
140 | ||
141 | ####################################### | |
142 | # DISABLED RULES, ENABLE IF YOU WANT | |
143 | ####################################### | |
144 | ||
145 | # Small area | |
146 | # Disabled - Hits Ham | |
147 | # body GMD_PDF_SMALL_AREA eval:pdf_pixel_coverage(1,100000) | |
148 | # describe GMD_PDF_SMALL_AREA PDF Area covers 150k pixels or less | |
149 | # score GMD_PDF_SMALL_AREA 0.75 | |
150 | # counts GMD_PDF_SMALL_AREA 51s/15h of 10615 corpus (5652s/4963h AxB) 06/25/07 | |
151 | ||
152 | # NOTE - people do send pdf's without message bodies! | |
153 | # Disabled - Hits Ham | |
154 | # body GMD_PDF_NO_TXT eval:pdf_image_to_text_ratio(0.000, 0.005) | |
155 | # describe GMD_PDF_NO_TXT Low rawbody to pixel area ratio | |
156 | # score GMD_PDF_NO_TXT 0.01 | |
157 | # counts GMD_PDF_NO_TXT 64s/3h of 10615 corpus (5652s/4963h AxB) 06/25/07 | |
158 | ||
159 | #################################### | |
160 | # HERE ARE THE LIVE RULES | |
161 | #################################### | |
162 | ||
163 | ||
164 | ||
165 | ###################################################################################################### | |
166 | # pdf image dimensions | |
167 | ||
168 | # thin horizontal, common stox. | |
169 | body GMD_PDF_HORIZ eval:pdf_image_size_range(100, 450, 240, 800) | |
170 | describe GMD_PDF_HORIZ Contains pdf 100-240 (high) x 450-800 (wide) | |
171 | score GMD_PDF_HORIZ 0.25 | |
172 | # counts GMD_PDF_HORIZ 135s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07 | |
173 | # counts GMD_PDF_HORIZ 278s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07 | |
174 | ||
175 | # near square, and small. common stox. | |
176 | body GMD_PDF_SQUARE eval:pdf_image_size_range(180, 180, 360, 360) | |
177 | describe GMD_PDF_SQUARE Contains pdf 180-360 (high) x 180-360 (wide) | |
178 | score GMD_PDF_SQUARE 0.50 | |
179 | # counts GMD_PDF_SQUARE 36s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07 | |
180 | # counts GMD_PDF_SQUARE 46s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07 | |
181 | ||
182 | # thin vertical, very tall. common stox. | |
183 | body GMD_PDF_VERT eval:pdf_image_size_range(450, 100, 800, 240) | |
184 | describe GMD_PDF_VERT Contains pdf 450-800 (high) x 100-240 (wide) | |
185 | score GMD_PDF_VERT 0.90 | |
186 | # counts GMD_PDF_VERT 24s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07 | |
187 | # counts GMD_PDF_VERT 10s/0h of 11773 corpus (10988s/785h AxB2-TRAPS) 07/11/07 | |
188 | ||
189 | ###################################################################################################### | |
190 | # static checksums | |
191 | ||
192 | # all static md5 spam runs are complete as of 7/11 | |
193 | # if there are more, we'll add new rules. | |
194 | ||
195 | # removed fuzzy rules dated 2007 | |
196 | # Get fuzzy info: | |
197 | # cat msg.eml | spamassassin --debug pdfinfo 2>&1 | grep fuzzy 2>&1 | |
198 | ||
199 | # sample rules ONLY | |
200 | # fuzzy checksum for bad stox | |
201 | #body GMD_PDF_FUZZY1_T1 eval:pdf_match_fuzzy_md5('57EBC1FFB1A24CC14AE23E1E227C3484') | |
202 | #describe GMD_PDF_FUZZY1_T1 Fuzzy MD5 Match 57EBC1FFB1A24CC14AE23E1E227C3484 | |
203 | #score GMD_PDF_FUZZY1_T1 0.001 | |
204 | ||
205 | # same as rule above using fuzzy md5 of pdf structure | |
206 | #body GMD_PDF_FUZZY2_T1 eval:pdf_match_fuzzy_md5('653C8AA9FDFD03D382523488058360A2') | |
207 | #describe GMD_PDF_FUZZY2_T1 Fuzzy MD5 Match 653C8AA9FDFD03D382523488058360A2 | |
208 | #score GMD_PDF_FUZZY2_T1 0.001 | |
209 | ||
210 | ||
211 | ###################################################################################################### | |
212 | # pdf_match_details() | |
213 | ||
214 | # from embedded link spam | |
215 | #body GMD_AUTHOR_COLET eval:pdf_match_details('author','/^colet$/') | |
216 | #describe GMD_AUTHOR_COLET PDF author was 'colet' | |
217 | #score GMD_AUTHOR_COLET 4.50 | |
218 | # counts GMD_AUTHOR_COLET 1s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07 | |
219 | # counts GMD_AUTHOR_COLET 2s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07 | |
220 | ||
221 | # from full page pdf stock spammer. | |
222 | #body GMD_AUTHOR_MOBILE eval:pdf_match_details('author','/^mobile$/') | |
223 | #describe GMD_AUTHOR_MOBILE PDF author was 'mobile' | |
224 | #score GMD_AUTHOR_MOBILE 2.75 | |
225 | # counts GMD_AUTHOR_MOBILE 2s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07 | |
226 | # counts GMD_AUTHOR_MOBILE 55s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07 | |
227 | ||
228 | # txt only stock spam | |
229 | #body GMD_AUTHOR_OOO eval:pdf_match_details('author','/^openofficeuser$/') | |
230 | #describe GMD_AUTHOR_OOO PDF author was 'openofficeuser' | |
231 | #score GMD_AUTHOR_OOO 1.75 | |
232 | # counts GMD_AUTHOR_OOO 1s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07 | |
233 | # counts GMD_AUTHOR_OOO 118s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07 | |
234 | ||
235 | # txt only stock spam | |
236 | #body GMD_AUTHOR_HPADMIN eval:pdf_match_details('author','/^HP_Administrator/') | |
237 | #describe GMD_AUTHOR_HPADMIN PDF author was 'HP_Administrator' | |
238 | #score GMD_AUTHOR_HPADMIN 0.25 | |
239 | # counts GMD_AUTHOR_HPADMIN 105s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07 | |
240 | # counts GMD_AUTHOR_HPADMIN 27s/0h of 11773 corpus (10988s/785h AxB2-TRAPS) 07/11/07 | |
241 | ||
242 | # generic rule for software used to produce the pdf. | |
243 | body GMD_PRODUCER_GPL eval:pdf_match_details('producer','/^(?:gnu|gpl) ghostscript/i') | |
244 | describe GMD_PRODUCER_GPL PDF producer was GPL Ghostscript | |
245 | score GMD_PRODUCER_GPL 0.25 | |
246 | # counts GMD_PRODUCER_GPL 227s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07 | |
247 | # counts GMD_PRODUCER_GPL 85s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07 | |
248 | ||
249 | # generic rule for software used to produce the pdf. | |
250 | body GMD_PRODUCER_POWERPDF eval:pdf_match_details('producer','/^PowerPdf 0\./') | |
251 | describe GMD_PRODUCER_POWERPDF PDF producer was PowerPDF | |
252 | score GMD_PRODUCER_POWERPDF 0.25 | |
253 | # counts GMD_PRODUCER_POWERPDF 0s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07 | |
254 | # counts GMD_PRODUCER_POWERPDF 0s/0h of 5641 corpus (4064s/1577h AxB-MANUAL) 07/11/07 | |
255 | ||
256 | # producer is bcl | |
257 | body GMD_PRODUCER_EASYPDF eval:pdf_match_details('producer','/^BCL easyPDF/') | |
258 | describe GMD_PRODUCER_EASYPDF PDF producer was BCL easyPDF | |
259 | score GMD_PRODUCER_EASYPDF 0.25 | |
260 | ||
261 | # simple check for encryption used inside pdf. | |
262 | # recommend meta with something else... | |
263 | body GMD_PDF_ENCRYPTED eval:pdf_is_encrypted() | |
264 | describe GMD_PDF_ENCRYPTED Attached PDF is encrypted | |
265 | score GMD_PDF_ENCRYPTED 0.60 | |
266 | # counts GMD_PDF_ENCRYPTED 13s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07 | |
267 | ||
268 | # simple check for empty msg body when there is one or more pdf attachments present. | |
269 | body GMD_PDF_EMPTY_BODY eval:pdf_is_empty_body() | |
270 | describe GMD_PDF_EMPTY_BODY Attached PDF with empty message body | |
271 | score GMD_PDF_EMPTY_BODY 0.25 | |
272 | # counts GMD_PDF_EMPTY_BODY 1638s/20h of 27034 corpus (24636s/2398h AxB-MANUAL) 07/19/07 | |
dfdd1e08 | 273 | priority GMD_PDF_EMPTY_BODY 2000 # workaround for Bug 8070 |
b780ea8d SI |
274 | |
275 | ###################################################################################################### | |
276 | # metas | |
277 | #meta __GMD_PDF_CHECKSUM ( GMD_PDF_FUZZY1_T1 || GMD_PDF_FUZZY2_T1 || GMD_PDF_FUZZY2_T2 || GMD_PDF_FUZZY2_T3 || GMD_PDF_FUZZY2_T4 || GMD_PDF_FUZZY2_T5 || GMD_PDF_FUZZY2_T6 || GMD_PDF_FUZZY2_T7 ||GMD_PDF_FUZZY2_T9 || GMD_PDF_FUZZY2_T10 || GMD_PDF_FUZZY2_T11 || GMD_PDF_FUZZY2_T12 ) | |
278 | #meta __GMD_PDF_DETAIL ( GMD_AUTHOR_COLET || GMD_AUTHOR_MOBILE || GMD_AUTHOR_OOO || GMD_AUTHOR_HPADMIN || GMD_PRODUCER_GPL || GMD_PRODUCER_POWERPDF || GMD_PRODUCER_EASYPDF ) | |
279 | meta __GMD_PDF_DIMS ( GMD_PDF_VERT || GMD_PDF_HORIZ || GMD_PDF_SQUARE ) | |
280 | meta __GMD_PDF_PRODUCERS ( GMD_PRODUCER_GPL || GMD_PRODUCER_POWERPDF || GMD_PRODUCER_EASYPDF ) | |
281 | ||
282 | # rule hits ham by itself, so use just to meta. | |
283 | body __GMD_PDF_NO_TXT eval:pdf_image_to_text_ratio(0.000, 0.005) | |
284 | ||
285 | # meta checksum hit with image dimensions | |
286 | #meta GMD_PDF_STOX_M1 ( __GMD_PDF_CHECKSUM && __GMD_PDF_DIMS) | |
287 | #describe GMD_PDF_STOX_M1 PDF Stox spam | |
288 | #score GMD_PDF_STOX_M1 3.25 | |
289 | # counts GMD_PDF_STOX_M1 159s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07 | |
290 | # counts GMD_PDF_STOX_M1 40s/0h of 11773 corpus (10988s/785h AxB2-TRAPS) 07/11/07 | |
291 | ||
292 | # meta checksum hit to pdf details | |
293 | #meta GMD_PDF_STOX_M2 ( __GMD_PDF_CHECKSUM && __GMD_PDF_DETAIL ) | |
294 | #describe GMD_PDF_STOX_M2 PDF Stox spam | |
295 | #score GMD_PDF_STOX_M2 2.95 | |
296 | # counts GMD_PDF_STOX_M2 223s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07 | |
297 | # counts GMD_PDF_STOX_M2 29s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07 | |
298 | ||
299 | # meta dimensions and encryption | |
300 | #meta GMD_PDF_STOX_M3 ( __GMD_PDF_DIMS && GMD_PDF_ENCRYPTED ) | |
301 | #describe GMD_PDF_STOX_M3 PDF Stox spam | |
302 | #score GMD_PDF_STOX_M3 2.25 | |
303 | # counts GMD_PDF_STOX_M3 12s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07 | |
304 | ||
305 | # meta checksum with no text | |
306 | #meta GMD_PDF_STOX_M4 ( __GMD_PDF_CHECKSUM && (__GMD_PDF_NO_TXT || GMD_PDF_EMPTY_BODY)) | |
307 | #describe GMD_PDF_STOX_M4 PDF Stox spam | |
308 | #score GMD_PDF_STOX_M4 2.95 | |
309 | ||
310 | # meta no body text along with automated pdf production. | |
311 | #meta GMD_PDF_STOX_M5 ( __GMD_PDF_PRODUCERS && (__GMD_PDF_NO_TXT || GMD_PDF_EMPTY_BODY)) | |
312 | #describe GMD_PDF_STOX_M5 PDF Stox Spam | |
313 | #score GMD_PDF_STOX_M5 1.00 | |
314 | ||
315 | endif |