]>
Commit | Line | Data |
---|---|---|
758c7b6b DM |
1 | package PMG::RuleDB::Spam; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
758c7b6b DM |
5 | use DBI; |
6 | use Digest::SHA; | |
7 | use Time::HiRes qw (gettimeofday); | |
8 | ||
9 | use PVE::SafeSyslog; | |
10 | use Mail::SpamAssassin; | |
11 | ||
12 | use PMG::Utils; | |
13 | use PMG::RuleDB::Object; | |
14 | ||
15 | use base qw(PMG::RuleDB::Object); | |
16 | ||
17 | sub otype { | |
18 | return 3000; | |
19 | } | |
20 | ||
21 | sub oclass { | |
22 | return 'what'; | |
23 | } | |
24 | ||
25 | sub otype_text { | |
26 | return 'Spam Filter'; | |
27 | } | |
28 | ||
758c7b6b DM |
29 | sub new { |
30 | my ($type, $level, $ogroup) = @_; | |
31 | ||
32 | my $class = ref($type) || $type; | |
33 | ||
7a2cf7e6 | 34 | my $self = $class->SUPER::new($class->otype(), $ogroup); |
758c7b6b DM |
35 | |
36 | $level = 5 if !defined ($level); | |
37 | ||
38 | $self->{level} = $level; | |
39 | ||
40 | return $self; | |
41 | } | |
42 | ||
43 | sub load_attr { | |
44 | my ($type, $ruledb, $id, $ogroup, $value) = @_; | |
45 | ||
46 | my $class = ref($type) || $type; | |
47 | ||
9ef3f143 | 48 | defined($value) || die "undefined value: ERROR"; |
758c7b6b DM |
49 | |
50 | my $obj = $class->new($value, $ogroup); | |
51 | $obj->{id} = $id; | |
52 | ||
53 | $obj->{digest} = Digest::SHA::sha1_hex($id, $value, $ogroup); | |
54 | ||
55 | return $obj; | |
56 | } | |
57 | ||
58 | sub save { | |
59 | my ($self, $ruledb) = @_; | |
60 | ||
9ef3f143 DM |
61 | defined($self->{ogroup}) || die "undefined ogroup: ERROR"; |
62 | defined($self->{level}) || die "undefined spam level: ERROR"; | |
758c7b6b DM |
63 | |
64 | if (defined ($self->{id})) { | |
65 | # update | |
66 | ||
67 | $ruledb->{dbh}->do( | |
68 | "UPDATE Object SET Value = ? WHERE ID = ?", | |
69 | undef, $self->{level}, $self->{id}); | |
70 | ||
71 | } else { | |
72 | # insert | |
73 | ||
74 | my $sth = $ruledb->{dbh}->prepare( | |
75 | "INSERT INTO Object (Objectgroup_ID, ObjectType, Value) " . | |
76 | "VALUES (?, ?, ?);"); | |
77 | ||
78 | $sth->execute($self->ogroup, $self->otype, $self->{level}); | |
79 | ||
80 | $self->{id} = PMG::Utils::lastid ($ruledb->{dbh}, 'object_id_seq'); | |
81 | } | |
82 | ||
83 | return $self->{id}; | |
84 | } | |
85 | ||
86 | sub parse_addrlist { | |
87 | my ($list) = @_; | |
88 | ||
89 | my $adlist = {}; | |
90 | ||
91 | foreach my $addr (split ('\s*,\s*', $list)) { | |
92 | $addr = lc $addr; | |
93 | my $regex = $addr; | |
94 | # SA like checks | |
95 | $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ? | |
96 | $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars | |
97 | $regex =~ tr/?/./; # replace "?" with "." | |
98 | $regex =~ s/\*+/\.\*/g; # replace "*" with ".*" | |
99 | ||
100 | # we use a hash for extra fast testing | |
101 | $adlist->{$addr} = "^${regex}\$"; | |
102 | } | |
103 | ||
104 | return $adlist; | |
105 | } | |
106 | ||
107 | sub check_addrlist { | |
108 | my ($list, $addrlst) = @_; | |
109 | ||
110 | foreach my $addr (@$addrlst) { | |
111 | ||
112 | $addr = lc $addr; | |
113 | ||
114 | return 1 if defined ($list->{$addr}); | |
115 | ||
116 | study $addr; | |
117 | ||
118 | foreach my $r (values %{$list}) { | |
119 | if ($addr =~ qr/$r/i) { | |
120 | return 1; | |
121 | } | |
122 | } | |
123 | } | |
124 | ||
125 | return 0; | |
126 | } | |
127 | ||
128 | sub get_blackwhite { | |
129 | my ($dbh, $entity, $msginfo) = @_; | |
130 | ||
131 | my $target_info = {}; | |
132 | ||
133 | my $targets = $msginfo->{targets}; | |
134 | ||
135 | my $cond = ''; | |
136 | foreach my $r (@$targets) { | |
137 | my $pmail = $msginfo->{pmail}->{$r} || lc ($r); | |
138 | my $qr = $dbh->quote ($pmail); | |
139 | $cond .= " OR " if $cond; | |
140 | $cond .= "pmail = $qr"; | |
141 | } | |
142 | ||
143 | eval { | |
144 | my $query = "SELECT * FROM UserPrefs WHERE " . | |
145 | "($cond) AND (Name = 'BL' OR Name = 'WL')"; | |
146 | my $sth = $dbh->prepare($query); | |
147 | ||
148 | $sth->execute(); | |
149 | ||
150 | while (my $ref = $sth->fetchrow_hashref()) { | |
151 | my $pmail = lc ($ref->{pmail}); | |
152 | if ($ref->{name} eq 'WL') { | |
153 | $target_info->{$pmail}->{whitelist} = | |
154 | parse_addrlist($ref->{data}); | |
155 | } elsif ($ref->{name} eq 'BL') { | |
156 | $target_info->{$pmail}->{blacklist} = | |
157 | parse_addrlist($ref->{data}); | |
158 | } | |
159 | } | |
160 | ||
161 | $sth->finish; | |
162 | }; | |
163 | if (my $err = $@) { | |
164 | syslog('err', $err); | |
165 | } | |
166 | ||
167 | return $target_info; | |
168 | } | |
169 | ||
170 | sub what_match_targets { | |
171 | my ($self, $queue, $entity, $msginfo, $dbh) = @_; | |
172 | ||
173 | my $target_info; | |
174 | ||
175 | if (!$queue->{spam_analyzed}) { | |
176 | $self->analyze_spam($queue, $entity, $msginfo); | |
177 | $queue->{blackwhite} = get_blackwhite($dbh, $entity, $msginfo); | |
178 | $queue->{spam_analyzed} = 1; | |
179 | } | |
180 | ||
181 | if ($msginfo->{testmode}) { | |
182 | $queue->{sa_score} = 100 if $queue->{sa_score} > 100; | |
183 | my $data; | |
184 | foreach my $s (@{$queue->{sa_data}}) { | |
185 | next if $s->{rule} eq 'AWL'; | |
186 | push @$data, $s; | |
187 | } | |
188 | $queue->{sa_data} = $data; | |
189 | } | |
190 | ||
191 | if (defined($queue->{sa_score}) && $queue->{sa_score} >= $self->{level}) { | |
192 | ||
193 | my $info = { | |
194 | sa_score => $queue->{sa_score}, | |
195 | sa_max => $self->{level}, | |
196 | sa_data => $queue->{sa_data}, | |
197 | sa_hits => $queue->{sa_hits} | |
198 | }; | |
199 | ||
200 | foreach my $t (@{$msginfo->{targets}}) { | |
201 | my $list; | |
202 | my $pmail = $msginfo->{pmail}->{$t} || $t; | |
203 | if ($queue->{blackwhite}->{$pmail} && | |
204 | ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) && | |
205 | check_addrlist($list, $queue->{all_from_addrs})) { | |
206 | syslog('info', "%s: sender in user (%s) whitelist", | |
207 | $queue->{logid}, $pmail); | |
208 | } else { | |
209 | $target_info->{$t}->{marks} = []; # never add additional marks here | |
210 | $target_info->{$t}->{spaminfo} = $info; | |
211 | } | |
212 | } | |
213 | ||
214 | } else { | |
215 | ||
216 | foreach my $t (@{$msginfo->{targets}}) { | |
217 | my $info = { | |
218 | sa_score => 100, | |
219 | sa_max => $self->{level}, | |
220 | sa_data => [{ | |
221 | rule => 'USER_IN_BLACKLIST', | |
222 | score => 100, | |
223 | desc => 'From: address is in the user black-list', | |
224 | }], | |
225 | sa_hits => 'USER_IN_BLACKLIST', | |
226 | }; | |
227 | ||
228 | my $list; | |
229 | my $pmail = $msginfo->{pmail}->{$t} || $t; | |
230 | if ($queue->{blackwhite}->{$pmail} && | |
231 | ($list = $queue->{blackwhite}->{$pmail}->{blacklist}) && | |
232 | check_addrlist($list, $queue->{all_from_addrs})) { | |
233 | $target_info->{$t}->{marks} = []; | |
234 | $target_info->{$t}->{spaminfo} = $info; | |
235 | syslog ('info', "%s: sender in user (%s) blacklist", | |
236 | $queue->{logid}, $pmail); | |
237 | } | |
238 | } | |
239 | } | |
240 | ||
241 | return $target_info; | |
242 | } | |
243 | ||
244 | sub level { | |
245 | my ($self, $v) = @_; | |
246 | ||
247 | if (defined ($v)) { | |
248 | $self->{level} = $v; | |
249 | } | |
250 | ||
251 | $self->{level}; | |
252 | } | |
253 | ||
254 | sub short_desc { | |
255 | my $self = shift; | |
256 | ||
257 | return "Level $self->{level}"; | |
258 | } | |
259 | ||
260 | sub __get_addr { | |
261 | my ($head, $name) = @_; | |
262 | ||
263 | my $result = $head->get($name); | |
264 | ||
265 | return '' if !$result; | |
266 | ||
267 | # copied from Mail::Spamassassin:PerMsgStatus _get() | |
268 | ||
269 | $result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;' | |
270 | $result =~ s/\s+/ /g; # reduce whitespace | |
271 | $result =~ s/^\s+//; # leading whitespace | |
272 | $result =~ s/\s+$//; # trailing whitespace | |
273 | ||
274 | # Get the email address out of the header | |
275 | # All of these should result in "jm@foo": | |
276 | # jm@foo | |
277 | # jm@foo (Foo Blah) | |
278 | # jm@foo, jm@bar | |
279 | # display: jm@foo (Foo Blah), jm@bar ; | |
280 | # Foo Blah <jm@foo> | |
281 | # "Foo Blah" <jm@foo> | |
282 | # "'Foo Blah'" <jm@foo> | |
283 | # | |
284 | # strip out the (comments) | |
285 | $result =~ s/\s*\(.*?\)//g; | |
286 | # strip out the "quoted text", unless it's the only thing in the string | |
287 | if ($result !~ /^".*"$/) { | |
288 | $result =~ s/(?<!<)"[^"]*"(?!@)//g; #" emacs | |
289 | } | |
290 | # Foo Blah <jm@xxx> or <jm@xxx> | |
291 | $result =~ s/^[^"<]*?<(.*?)>.*$/$1/; | |
292 | # multiple addresses on one line? remove all but first | |
293 | $result =~ s/,.*$//; | |
294 | ||
295 | return $result; | |
296 | } | |
297 | ||
298 | # implement our own all_from_addrs() | |
299 | # because we do not call spamassassin in canes of commtouch match | |
300 | # see Mail::Spamassassin:PerMsgStatus for details | |
301 | sub __all_from_addrs { | |
302 | my ($head) = @_; | |
303 | ||
304 | my @addrs; | |
305 | ||
306 | my $resent = $head->get('Resent-From'); | |
307 | if (defined($resent) && $resent =~ /\S/) { | |
308 | @addrs = Mail::SpamAssassin->find_all_addrs_in_line($resent); | |
309 | } else { | |
310 | @addrs = map { tr/././s; $_ } grep { $_ ne '' } | |
311 | (__get_addr($head, 'From'), # std | |
312 | __get_addr($head, 'Envelope-Sender'), # qmail: new-inject(1) | |
313 | __get_addr($head, 'Resent-Sender'), # procmailrc manpage | |
314 | __get_addr($head, 'X-Envelope-From'), # procmailrc manpage | |
315 | __get_addr($head, 'EnvelopeFrom')); # SMTP envelope | |
316 | } | |
317 | ||
318 | # Remove duplicate addresses | |
319 | my %addrs = map { $_ => 1 } @addrs; | |
320 | @addrs = keys %addrs; | |
321 | ||
322 | return @addrs; | |
323 | } | |
324 | ||
325 | sub analyze_spam { | |
326 | my ($self, $queue, $entity, $msginfo) = @_; | |
327 | ||
328 | my $maxspamsize = $msginfo->{maxspamsize}; | |
329 | ||
330 | $maxspamsize = 200*1024 if !$maxspamsize; | |
331 | ||
d3d54e68 | 332 | my ($sa_score, $sa_max, $sa_scores, $sa_sumary, $list, $autolearn, $bayes, $loglist); |
758c7b6b | 333 | $list = ''; |
d3d54e68 | 334 | $loglist = ''; |
758c7b6b DM |
335 | $bayes = 'undefined'; |
336 | $autolearn = 'no'; | |
337 | $sa_score = 0; | |
338 | $sa_max = 5; | |
339 | ||
340 | # do not run SA if license is not valid | |
341 | if (!$queue->{lic_valid}) { | |
342 | $queue->{sa_score} = 0; | |
343 | return 0; | |
344 | } | |
345 | ||
346 | my $fromhash = { $queue->{from} => 1 }; | |
347 | foreach my $f (__all_from_addrs($entity->head())) { | |
348 | $fromhash->{$f} = 1; | |
349 | } | |
350 | $queue->{all_from_addrs} = [ keys %$fromhash ]; | |
351 | ||
cda67dee DM |
352 | if (my $hit = $queue->{clamav_heuristic}) { |
353 | my $score = $queue->{clamav_heuristic_score}; | |
354 | my $descr = "ClamAV heuristic test: $hit"; | |
355 | my $rule = 'ClamAVHeuristics'; | |
0d5209bf DM |
356 | $sa_score += $score; |
357 | $list .= $list ? ",$rule" : $rule; | |
d3d54e68 | 358 | $loglist .= $loglist ? ",$rule($score)" : "$rule($score)"; |
0d5209bf DM |
359 | push @$sa_scores, { score => $score, rule => $rule, desc => $descr }; |
360 | } | |
361 | ||
4c4fec6b SI |
362 | if (my $hit = $queue->{spam_custom}) { |
363 | my $score += $queue->{spam_custom}; | |
364 | my $descr = "Custom Check Script"; | |
365 | my $rule = 'CustomCheck'; | |
366 | $sa_score += $score; | |
367 | $list .= $list ? ",$rule" : $rule; | |
368 | $list .= $list ? ",$rule" : $rule; | |
369 | $loglist .= $loglist ? ",$rule($score)" : "$rule($score)"; | |
370 | push @$sa_scores, { score => $score, rule => $rule, desc => $descr }; | |
371 | } | |
372 | ||
758c7b6b DM |
373 | my ($csec, $usec) = gettimeofday (); |
374 | ||
375 | my $spamtest = $queue->{sa}; | |
376 | ||
cda67dee | 377 | # only run SA in testmode or when clamav_heuristic did not confirm spam (score < 5) |
2da52ccc | 378 | if ($msginfo->{testmode} || ($sa_score < 5)) { |
758c7b6b DM |
379 | |
380 | # save and disable alarm (SA forgets to clear alarm in some cases) | |
381 | my $previous_alarm = alarm (0); | |
382 | ||
383 | my $pid = $$; | |
384 | ||
385 | eval { | |
2da52ccc DM |
386 | $queue->{fh}->seek(0, 0); |
387 | ||
388 | # Truncate message to $maxspamsize | |
389 | # Note: similar code to read content is used inside | |
390 | # Mail::SpamAssassin::Message->new() | |
391 | my $nread; | |
392 | my $raw_str = ''; | |
393 | while ($nread = sysread($queue->{fh}, $raw_str, 16384, length($raw_str))) { | |
394 | last if length($raw_str) >= $maxspamsize; | |
395 | } | |
396 | defined($nread) || die "error reading message: $!\n"; | |
397 | ||
398 | my $suppl_attrib = {}; | |
399 | if (length($raw_str) >= $maxspamsize && | |
400 | length($raw_str) < $queue->{bytes}) { | |
401 | $suppl_attrib->{body_size} = $queue->{bytes}; | |
402 | } | |
403 | ||
404 | my @message = split(/^/m, $raw_str, -1); | |
405 | undef $raw_str; # free memory early | |
758c7b6b | 406 | |
2da52ccc | 407 | my $mail = $spamtest->parse(\@message, 0, $suppl_attrib); |
758c7b6b DM |
408 | |
409 | # hack: pass envelope sender to spamassassin | |
410 | $mail->header('X-Proxmox-Envelope-From', $queue->{from}); | |
411 | ||
2da52ccc | 412 | my $status = $spamtest->check($mail); |
758c7b6b DM |
413 | |
414 | #my $fromhash = { $queue->{from} => 1 }; | |
415 | #foreach my $f ($status->all_from_addrs()) { | |
416 | #$fromhash->{$f} = 1; | |
417 | #} | |
418 | #$queue->{all_from_addrs} = [ keys %$fromhash ]; | |
419 | ||
420 | $sa_score += $status->get_score(); | |
421 | $sa_max = $status->get_required_score(); | |
422 | $autolearn = $status->get_autolearn_status(); | |
423 | ||
424 | $bayes = defined($status->{bayes_score}) ? | |
425 | $status->{bayes_score} : "undefined"; | |
426 | ||
427 | my $salist = $status->get_names_of_tests_hit(); | |
428 | ||
429 | foreach my $rule (split (/,/, $salist)) { | |
430 | $list .= $list ? ",$rule" : $rule; | |
431 | my $score = $status->{conf}->{scores}->{$rule}; | |
d3d54e68 | 432 | $loglist .= $loglist ? ",$rule($score)" : "$rule($score)"; |
758c7b6b DM |
433 | my $desc = $status->{conf}->get_description_for_rule($rule); |
434 | push @$sa_scores, { score => $score, rule => $rule, desc => $desc }; | |
435 | } | |
436 | ||
437 | $status->finish(); | |
438 | $mail->finish(); | |
439 | ||
440 | alarm 0; # avoid race conditions | |
441 | }; | |
442 | my $err = $@; | |
443 | ||
444 | alarm ($previous_alarm); | |
445 | ||
446 | # just to be sure - exit if SA produces a child process | |
447 | if ($$ != $pid) { | |
448 | syslog ('err', "WARNING: detected SA produced process - exiting"); | |
449 | POSIX::_exit (-1); # exit immediately | |
450 | } | |
451 | ||
452 | if ($err) { | |
453 | syslog('err', $err); | |
454 | $queue->{errors} = 1; | |
455 | } | |
456 | } | |
457 | ||
458 | $sa_score = int ($sa_score); | |
459 | $sa_score = 0 if $sa_score < 0; | |
460 | ||
461 | my ($csec_end, $usec_end) = gettimeofday(); | |
462 | $queue->{ptime_spam} = | |
463 | int (($csec_end-$csec)*1000 + ($usec_end - $usec)/1000); | |
464 | ||
465 | syslog ('info', "%s: SA score=%s/%s time=%0.3f bayes=%s autolearn=%s hits=%s", | |
466 | $queue->{logid}, $sa_score, $sa_max, $queue->{ptime_spam}/1000.0, | |
d3d54e68 | 467 | $bayes, $autolearn, $loglist); |
758c7b6b DM |
468 | |
469 | $queue->{sa_score} = $sa_score; | |
470 | $queue->{sa_max} = $sa_max; | |
471 | $queue->{sa_data} = $sa_scores; | |
472 | $queue->{sa_hits} = $list; | |
473 | ||
474 | return ($sa_score >= $sa_max); | |
475 | } | |
476 | ||
519deca0 DC |
477 | sub properties { |
478 | my ($class) = @_; | |
479 | ||
480 | return { | |
481 | spamlevel => { | |
482 | description => "Spam Level", | |
483 | type => 'integer', | |
484 | minimum => 0, | |
485 | }, | |
486 | }; | |
487 | } | |
488 | ||
489 | sub get { | |
490 | my ($self) = @_; | |
491 | ||
492 | return { spamlevel => $self->{level} }; | |
493 | } | |
494 | ||
495 | sub update { | |
496 | my ($self, $param) = @_; | |
497 | ||
498 | $self->{level} = $param->{spamlevel}; | |
499 | } | |
500 | ||
758c7b6b DM |
501 | 1; |
502 | ||
503 | __END__ | |
504 | ||
505 | =head1 PVE::RuleDB::Spam | |
506 | ||
507 | Spam level filter |