]>
Commit | Line | Data |
---|---|---|
758c7b6b DM |
1 | package PMG::RuleDB::Spam; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | use Carp; | |
6 | use DBI; | |
7 | use Digest::SHA; | |
8 | use Time::HiRes qw (gettimeofday); | |
9 | ||
10 | use PVE::SafeSyslog; | |
11 | use Mail::SpamAssassin; | |
12 | ||
13 | use PMG::Utils; | |
14 | use PMG::RuleDB::Object; | |
15 | ||
16 | use base qw(PMG::RuleDB::Object); | |
17 | ||
18 | sub otype { | |
19 | return 3000; | |
20 | } | |
21 | ||
22 | sub oclass { | |
23 | return 'what'; | |
24 | } | |
25 | ||
26 | sub otype_text { | |
27 | return 'Spam Filter'; | |
28 | } | |
29 | ||
30 | sub oicon { | |
31 | return 'spam.gif'; | |
32 | } | |
33 | ||
34 | ||
35 | sub oinfo { | |
36 | return 'Mark all Spam-Emails.'; | |
37 | } | |
38 | ||
39 | sub new { | |
40 | my ($type, $level, $ogroup) = @_; | |
41 | ||
42 | my $class = ref($type) || $type; | |
43 | ||
44 | my $self = $class->SUPER::new(otype(), $ogroup); | |
45 | ||
46 | $level = 5 if !defined ($level); | |
47 | ||
48 | $self->{level} = $level; | |
49 | ||
50 | return $self; | |
51 | } | |
52 | ||
53 | sub load_attr { | |
54 | my ($type, $ruledb, $id, $ogroup, $value) = @_; | |
55 | ||
56 | my $class = ref($type) || $type; | |
57 | ||
58 | defined($value) || croak "undefined value: ERROR"; | |
59 | ||
60 | my $obj = $class->new($value, $ogroup); | |
61 | $obj->{id} = $id; | |
62 | ||
63 | $obj->{digest} = Digest::SHA::sha1_hex($id, $value, $ogroup); | |
64 | ||
65 | return $obj; | |
66 | } | |
67 | ||
68 | sub save { | |
69 | my ($self, $ruledb) = @_; | |
70 | ||
71 | defined($self->{ogroup}) || croak "undefined ogroup: ERROR"; | |
72 | defined($self->{level}) || croak "undefined spam level: ERROR"; | |
73 | ||
74 | if (defined ($self->{id})) { | |
75 | # update | |
76 | ||
77 | $ruledb->{dbh}->do( | |
78 | "UPDATE Object SET Value = ? WHERE ID = ?", | |
79 | undef, $self->{level}, $self->{id}); | |
80 | ||
81 | } else { | |
82 | # insert | |
83 | ||
84 | my $sth = $ruledb->{dbh}->prepare( | |
85 | "INSERT INTO Object (Objectgroup_ID, ObjectType, Value) " . | |
86 | "VALUES (?, ?, ?);"); | |
87 | ||
88 | $sth->execute($self->ogroup, $self->otype, $self->{level}); | |
89 | ||
90 | $self->{id} = PMG::Utils::lastid ($ruledb->{dbh}, 'object_id_seq'); | |
91 | } | |
92 | ||
93 | return $self->{id}; | |
94 | } | |
95 | ||
96 | sub parse_addrlist { | |
97 | my ($list) = @_; | |
98 | ||
99 | my $adlist = {}; | |
100 | ||
101 | foreach my $addr (split ('\s*,\s*', $list)) { | |
102 | $addr = lc $addr; | |
103 | my $regex = $addr; | |
104 | # SA like checks | |
105 | $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ? | |
106 | $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars | |
107 | $regex =~ tr/?/./; # replace "?" with "." | |
108 | $regex =~ s/\*+/\.\*/g; # replace "*" with ".*" | |
109 | ||
110 | # we use a hash for extra fast testing | |
111 | $adlist->{$addr} = "^${regex}\$"; | |
112 | } | |
113 | ||
114 | return $adlist; | |
115 | } | |
116 | ||
117 | sub check_addrlist { | |
118 | my ($list, $addrlst) = @_; | |
119 | ||
120 | foreach my $addr (@$addrlst) { | |
121 | ||
122 | $addr = lc $addr; | |
123 | ||
124 | return 1 if defined ($list->{$addr}); | |
125 | ||
126 | study $addr; | |
127 | ||
128 | foreach my $r (values %{$list}) { | |
129 | if ($addr =~ qr/$r/i) { | |
130 | return 1; | |
131 | } | |
132 | } | |
133 | } | |
134 | ||
135 | return 0; | |
136 | } | |
137 | ||
138 | sub get_blackwhite { | |
139 | my ($dbh, $entity, $msginfo) = @_; | |
140 | ||
141 | my $target_info = {}; | |
142 | ||
143 | my $targets = $msginfo->{targets}; | |
144 | ||
145 | my $cond = ''; | |
146 | foreach my $r (@$targets) { | |
147 | my $pmail = $msginfo->{pmail}->{$r} || lc ($r); | |
148 | my $qr = $dbh->quote ($pmail); | |
149 | $cond .= " OR " if $cond; | |
150 | $cond .= "pmail = $qr"; | |
151 | } | |
152 | ||
153 | eval { | |
154 | my $query = "SELECT * FROM UserPrefs WHERE " . | |
155 | "($cond) AND (Name = 'BL' OR Name = 'WL')"; | |
156 | my $sth = $dbh->prepare($query); | |
157 | ||
158 | $sth->execute(); | |
159 | ||
160 | while (my $ref = $sth->fetchrow_hashref()) { | |
161 | my $pmail = lc ($ref->{pmail}); | |
162 | if ($ref->{name} eq 'WL') { | |
163 | $target_info->{$pmail}->{whitelist} = | |
164 | parse_addrlist($ref->{data}); | |
165 | } elsif ($ref->{name} eq 'BL') { | |
166 | $target_info->{$pmail}->{blacklist} = | |
167 | parse_addrlist($ref->{data}); | |
168 | } | |
169 | } | |
170 | ||
171 | $sth->finish; | |
172 | }; | |
173 | if (my $err = $@) { | |
174 | syslog('err', $err); | |
175 | } | |
176 | ||
177 | return $target_info; | |
178 | } | |
179 | ||
180 | sub what_match_targets { | |
181 | my ($self, $queue, $entity, $msginfo, $dbh) = @_; | |
182 | ||
183 | my $target_info; | |
184 | ||
185 | if (!$queue->{spam_analyzed}) { | |
186 | $self->analyze_spam($queue, $entity, $msginfo); | |
187 | $queue->{blackwhite} = get_blackwhite($dbh, $entity, $msginfo); | |
188 | $queue->{spam_analyzed} = 1; | |
189 | } | |
190 | ||
191 | if ($msginfo->{testmode}) { | |
192 | $queue->{sa_score} = 100 if $queue->{sa_score} > 100; | |
193 | my $data; | |
194 | foreach my $s (@{$queue->{sa_data}}) { | |
195 | next if $s->{rule} eq 'AWL'; | |
196 | push @$data, $s; | |
197 | } | |
198 | $queue->{sa_data} = $data; | |
199 | } | |
200 | ||
201 | if (defined($queue->{sa_score}) && $queue->{sa_score} >= $self->{level}) { | |
202 | ||
203 | my $info = { | |
204 | sa_score => $queue->{sa_score}, | |
205 | sa_max => $self->{level}, | |
206 | sa_data => $queue->{sa_data}, | |
207 | sa_hits => $queue->{sa_hits} | |
208 | }; | |
209 | ||
210 | foreach my $t (@{$msginfo->{targets}}) { | |
211 | my $list; | |
212 | my $pmail = $msginfo->{pmail}->{$t} || $t; | |
213 | if ($queue->{blackwhite}->{$pmail} && | |
214 | ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) && | |
215 | check_addrlist($list, $queue->{all_from_addrs})) { | |
216 | syslog('info', "%s: sender in user (%s) whitelist", | |
217 | $queue->{logid}, $pmail); | |
218 | } else { | |
219 | $target_info->{$t}->{marks} = []; # never add additional marks here | |
220 | $target_info->{$t}->{spaminfo} = $info; | |
221 | } | |
222 | } | |
223 | ||
224 | } else { | |
225 | ||
226 | foreach my $t (@{$msginfo->{targets}}) { | |
227 | my $info = { | |
228 | sa_score => 100, | |
229 | sa_max => $self->{level}, | |
230 | sa_data => [{ | |
231 | rule => 'USER_IN_BLACKLIST', | |
232 | score => 100, | |
233 | desc => 'From: address is in the user black-list', | |
234 | }], | |
235 | sa_hits => 'USER_IN_BLACKLIST', | |
236 | }; | |
237 | ||
238 | my $list; | |
239 | my $pmail = $msginfo->{pmail}->{$t} || $t; | |
240 | if ($queue->{blackwhite}->{$pmail} && | |
241 | ($list = $queue->{blackwhite}->{$pmail}->{blacklist}) && | |
242 | check_addrlist($list, $queue->{all_from_addrs})) { | |
243 | $target_info->{$t}->{marks} = []; | |
244 | $target_info->{$t}->{spaminfo} = $info; | |
245 | syslog ('info', "%s: sender in user (%s) blacklist", | |
246 | $queue->{logid}, $pmail); | |
247 | } | |
248 | } | |
249 | } | |
250 | ||
251 | return $target_info; | |
252 | } | |
253 | ||
254 | sub level { | |
255 | my ($self, $v) = @_; | |
256 | ||
257 | if (defined ($v)) { | |
258 | $self->{level} = $v; | |
259 | } | |
260 | ||
261 | $self->{level}; | |
262 | } | |
263 | ||
264 | sub short_desc { | |
265 | my $self = shift; | |
266 | ||
267 | return "Level $self->{level}"; | |
268 | } | |
269 | ||
270 | sub __get_addr { | |
271 | my ($head, $name) = @_; | |
272 | ||
273 | my $result = $head->get($name); | |
274 | ||
275 | return '' if !$result; | |
276 | ||
277 | # copied from Mail::Spamassassin:PerMsgStatus _get() | |
278 | ||
279 | $result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;' | |
280 | $result =~ s/\s+/ /g; # reduce whitespace | |
281 | $result =~ s/^\s+//; # leading whitespace | |
282 | $result =~ s/\s+$//; # trailing whitespace | |
283 | ||
284 | # Get the email address out of the header | |
285 | # All of these should result in "jm@foo": | |
286 | # jm@foo | |
287 | # jm@foo (Foo Blah) | |
288 | # jm@foo, jm@bar | |
289 | # display: jm@foo (Foo Blah), jm@bar ; | |
290 | # Foo Blah <jm@foo> | |
291 | # "Foo Blah" <jm@foo> | |
292 | # "'Foo Blah'" <jm@foo> | |
293 | # | |
294 | # strip out the (comments) | |
295 | $result =~ s/\s*\(.*?\)//g; | |
296 | # strip out the "quoted text", unless it's the only thing in the string | |
297 | if ($result !~ /^".*"$/) { | |
298 | $result =~ s/(?<!<)"[^"]*"(?!@)//g; #" emacs | |
299 | } | |
300 | # Foo Blah <jm@xxx> or <jm@xxx> | |
301 | $result =~ s/^[^"<]*?<(.*?)>.*$/$1/; | |
302 | # multiple addresses on one line? remove all but first | |
303 | $result =~ s/,.*$//; | |
304 | ||
305 | return $result; | |
306 | } | |
307 | ||
308 | # implement our own all_from_addrs() | |
309 | # because we do not call spamassassin in canes of commtouch match | |
310 | # see Mail::Spamassassin:PerMsgStatus for details | |
311 | sub __all_from_addrs { | |
312 | my ($head) = @_; | |
313 | ||
314 | my @addrs; | |
315 | ||
316 | my $resent = $head->get('Resent-From'); | |
317 | if (defined($resent) && $resent =~ /\S/) { | |
318 | @addrs = Mail::SpamAssassin->find_all_addrs_in_line($resent); | |
319 | } else { | |
320 | @addrs = map { tr/././s; $_ } grep { $_ ne '' } | |
321 | (__get_addr($head, 'From'), # std | |
322 | __get_addr($head, 'Envelope-Sender'), # qmail: new-inject(1) | |
323 | __get_addr($head, 'Resent-Sender'), # procmailrc manpage | |
324 | __get_addr($head, 'X-Envelope-From'), # procmailrc manpage | |
325 | __get_addr($head, 'EnvelopeFrom')); # SMTP envelope | |
326 | } | |
327 | ||
328 | # Remove duplicate addresses | |
329 | my %addrs = map { $_ => 1 } @addrs; | |
330 | @addrs = keys %addrs; | |
331 | ||
332 | return @addrs; | |
333 | } | |
334 | ||
335 | sub analyze_spam { | |
336 | my ($self, $queue, $entity, $msginfo) = @_; | |
337 | ||
338 | my $maxspamsize = $msginfo->{maxspamsize}; | |
339 | ||
340 | $maxspamsize = 200*1024 if !$maxspamsize; | |
341 | ||
342 | my ($sa_score, $sa_max, $sa_scores, $sa_sumary, $list, $autolearn, $bayes); | |
343 | $list = ''; | |
344 | $bayes = 'undefined'; | |
345 | $autolearn = 'no'; | |
346 | $sa_score = 0; | |
347 | $sa_max = 5; | |
348 | ||
349 | # do not run SA if license is not valid | |
350 | if (!$queue->{lic_valid}) { | |
351 | $queue->{sa_score} = 0; | |
352 | return 0; | |
353 | } | |
354 | ||
355 | my $fromhash = { $queue->{from} => 1 }; | |
356 | foreach my $f (__all_from_addrs($entity->head())) { | |
357 | $fromhash->{$f} = 1; | |
358 | } | |
359 | $queue->{all_from_addrs} = [ keys %$fromhash ]; | |
360 | ||
361 | my ($csec, $usec) = gettimeofday (); | |
362 | ||
363 | my $spamtest = $queue->{sa}; | |
364 | ||
365 | # only run SA in testmode or when commtouch did not confirm spam (score < 5) | |
366 | # do not run SA if mail is too large | |
367 | if (($queue->{bytes} <= $maxspamsize) && | |
368 | ($msginfo->{testmode} || ($sa_score < 5))) { | |
369 | ||
370 | # save and disable alarm (SA forgets to clear alarm in some cases) | |
371 | my $previous_alarm = alarm (0); | |
372 | ||
373 | my $pid = $$; | |
374 | ||
375 | eval { | |
376 | $queue->{fh}->seek (0, 0); | |
377 | ||
378 | *SATMP = \*{$queue->{fh}}; | |
379 | my $mail = $spamtest->parse(\*SATMP); | |
380 | ||
381 | # hack: pass envelope sender to spamassassin | |
382 | $mail->header('X-Proxmox-Envelope-From', $queue->{from}); | |
383 | ||
384 | my $status = $spamtest->check ($mail); | |
385 | ||
386 | #my $fromhash = { $queue->{from} => 1 }; | |
387 | #foreach my $f ($status->all_from_addrs()) { | |
388 | #$fromhash->{$f} = 1; | |
389 | #} | |
390 | #$queue->{all_from_addrs} = [ keys %$fromhash ]; | |
391 | ||
392 | $sa_score += $status->get_score(); | |
393 | $sa_max = $status->get_required_score(); | |
394 | $autolearn = $status->get_autolearn_status(); | |
395 | ||
396 | $bayes = defined($status->{bayes_score}) ? | |
397 | $status->{bayes_score} : "undefined"; | |
398 | ||
399 | my $salist = $status->get_names_of_tests_hit(); | |
400 | ||
401 | foreach my $rule (split (/,/, $salist)) { | |
402 | $list .= $list ? ",$rule" : $rule; | |
403 | my $score = $status->{conf}->{scores}->{$rule}; | |
404 | my $desc = $status->{conf}->get_description_for_rule($rule); | |
405 | push @$sa_scores, { score => $score, rule => $rule, desc => $desc }; | |
406 | } | |
407 | ||
408 | $status->finish(); | |
409 | $mail->finish(); | |
410 | ||
411 | alarm 0; # avoid race conditions | |
412 | }; | |
413 | my $err = $@; | |
414 | ||
415 | alarm ($previous_alarm); | |
416 | ||
417 | # just to be sure - exit if SA produces a child process | |
418 | if ($$ != $pid) { | |
419 | syslog ('err', "WARNING: detected SA produced process - exiting"); | |
420 | POSIX::_exit (-1); # exit immediately | |
421 | } | |
422 | ||
423 | if ($err) { | |
424 | syslog('err', $err); | |
425 | $queue->{errors} = 1; | |
426 | } | |
427 | } | |
428 | ||
429 | $sa_score = int ($sa_score); | |
430 | $sa_score = 0 if $sa_score < 0; | |
431 | ||
432 | my ($csec_end, $usec_end) = gettimeofday(); | |
433 | $queue->{ptime_spam} = | |
434 | int (($csec_end-$csec)*1000 + ($usec_end - $usec)/1000); | |
435 | ||
436 | syslog ('info', "%s: SA score=%s/%s time=%0.3f bayes=%s autolearn=%s hits=%s", | |
437 | $queue->{logid}, $sa_score, $sa_max, $queue->{ptime_spam}/1000.0, | |
438 | $bayes, $autolearn, $list); | |
439 | ||
440 | $queue->{sa_score} = $sa_score; | |
441 | $queue->{sa_max} = $sa_max; | |
442 | $queue->{sa_data} = $sa_scores; | |
443 | $queue->{sa_hits} = $list; | |
444 | ||
445 | return ($sa_score >= $sa_max); | |
446 | } | |
447 | ||
448 | 1; | |
449 | ||
450 | __END__ | |
451 | ||
452 | =head1 PVE::RuleDB::Spam | |
453 | ||
454 | Spam level filter |