]> git.proxmox.com Git - pmg-api.git/blob - src/PMG/RuleDB/Spam.pm
user accesslists: reword logging and hits for newer SA rule sets
[pmg-api.git] / src / PMG / RuleDB / Spam.pm
1 package PMG::RuleDB::Spam;
2
3 use strict;
4 use warnings;
5 use DBI;
6 use Digest::SHA;
7 use Encode qw(encode);
8 use Time::HiRes qw (gettimeofday);
9
10 use PVE::SafeSyslog;
11 use Mail::SpamAssassin;
12
13 use PMG::Utils;
14 use PMG::RuleDB::Object;
15
16 use base qw(PMG::RuleDB::Object);
17
18 sub otype {
19 return 3000;
20 }
21
22 sub oclass {
23 return 'what';
24 }
25
26 sub otype_text {
27 return 'Spam Filter';
28 }
29
30 sub new {
31 my ($type, $level, $ogroup) = @_;
32
33 my $class = ref($type) || $type;
34
35 my $self = $class->SUPER::new($class->otype(), $ogroup);
36
37 $level = 5 if !defined ($level);
38
39 $self->{level} = $level;
40
41 return $self;
42 }
43
44 sub load_attr {
45 my ($type, $ruledb, $id, $ogroup, $value) = @_;
46
47 my $class = ref($type) || $type;
48
49 defined($value) || die "undefined value: ERROR";
50
51 my $obj = $class->new($value, $ogroup);
52 $obj->{id} = $id;
53
54 $obj->{digest} = Digest::SHA::sha1_hex($id, $value, $ogroup);
55
56 return $obj;
57 }
58
59 sub save {
60 my ($self, $ruledb) = @_;
61
62 defined($self->{ogroup}) || die "undefined ogroup: ERROR";
63 defined($self->{level}) || die "undefined spam level: ERROR";
64
65 if (defined ($self->{id})) {
66 # update
67
68 $ruledb->{dbh}->do(
69 "UPDATE Object SET Value = ? WHERE ID = ?",
70 undef, $self->{level}, $self->{id});
71
72 } else {
73 # insert
74
75 my $sth = $ruledb->{dbh}->prepare(
76 "INSERT INTO Object (Objectgroup_ID, ObjectType, Value) " .
77 "VALUES (?, ?, ?);");
78
79 $sth->execute($self->ogroup, $self->otype, $self->{level});
80
81 $self->{id} = PMG::Utils::lastid ($ruledb->{dbh}, 'object_id_seq');
82 }
83
84 return $self->{id};
85 }
86
87 sub parse_addrlist {
88 my ($list) = @_;
89
90 my $adlist = {};
91
92 foreach my $addr (split ('\s*,\s*', $list)) {
93 $addr = lc $addr;
94 my $regex = $addr;
95 # SA like checks
96 $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ?
97 $regex =~ s/([^\*\?_\w])/\\$1/g; # escape possible metachars
98 $regex =~ tr/?/./; # replace "?" with "."
99 $regex =~ s/\*+/\.\*/g; # replace "*" with ".*"
100
101 # we use a hash for extra fast testing
102 $adlist->{$addr} = "^${regex}\$";
103 }
104
105 return $adlist;
106 }
107
108 sub check_addrlist {
109 my ($list, $addrlst) = @_;
110
111 foreach my $addr (@$addrlst) {
112
113 $addr = lc $addr;
114
115 return 1 if defined ($list->{$addr});
116
117 study $addr;
118
119 foreach my $r (values %{$list}) {
120 if ($addr =~ qr/$r/i) {
121 return 1;
122 }
123 }
124 }
125
126 return 0;
127 }
128
129 sub get_blackwhite {
130 my ($dbh, $entity, $msginfo) = @_;
131
132 my $target_info = {};
133
134 my $targets = $msginfo->{targets};
135
136 my $cond = '';
137 foreach my $r (@$targets) {
138 my $pmail = $msginfo->{pmail}->{$r} || lc ($r);
139 my $qr = $dbh->quote (encode('UTF-8', $pmail));
140 $cond .= " OR " if $cond;
141 $cond .= "pmail = $qr";
142 }
143
144 eval {
145 my $query = "SELECT * FROM UserPrefs WHERE " .
146 "($cond) AND (Name = 'BL' OR Name = 'WL')";
147 my $sth = $dbh->prepare($query);
148
149 $sth->execute();
150
151 while (my $ref = $sth->fetchrow_hashref()) {
152 my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail}));
153 if ($ref->{name} eq 'WL') {
154 $target_info->{$pmail}->{whitelist} =
155 parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
156 } elsif ($ref->{name} eq 'BL') {
157 $target_info->{$pmail}->{blacklist} =
158 parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
159 }
160 }
161
162 $sth->finish;
163 };
164 if (my $err = $@) {
165 syslog('err', $err);
166 }
167
168 return $target_info;
169 }
170
171 sub what_match_targets {
172 my ($self, $queue, $entity, $msginfo, $dbh) = @_;
173
174 my $target_info;
175
176 if (!$queue->{spam_analyzed}) {
177 $self->analyze_spam($queue, $entity, $msginfo);
178 $queue->{blackwhite} = get_blackwhite($dbh, $entity, $msginfo);
179 $queue->{spam_analyzed} = 1;
180 }
181
182 if ($msginfo->{testmode}) {
183 $queue->{sa_score} = 100 if $queue->{sa_score} > 100;
184 my $data;
185 foreach my $s (@{$queue->{sa_data}}) {
186 next if $s->{rule} eq 'AWL';
187 push @$data, $s;
188 }
189 $queue->{sa_data} = $data;
190 }
191
192 if (defined($queue->{sa_score}) && $queue->{sa_score} >= $self->{level}) {
193
194 my $info = {
195 sa_score => $queue->{sa_score},
196 sa_max => $self->{level},
197 sa_data => $queue->{sa_data},
198 sa_hits => $queue->{sa_hits}
199 };
200
201 foreach my $t (@{$msginfo->{targets}}) {
202 my $list;
203 my $pmail = $msginfo->{pmail}->{$t} || $t;
204 if ($queue->{blackwhite}->{$pmail} &&
205 ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
206 check_addrlist($list, $queue->{all_from_addrs})) {
207 syslog('info', "%s: sender in user (%s) welcomelist",
208 $queue->{logid}, encode('UTF-8', $pmail));
209 } else {
210 $target_info->{$t}->{marks} = []; # never add additional marks here
211 $target_info->{$t}->{spaminfo} = $info;
212 }
213 }
214
215 } else {
216
217 foreach my $t (@{$msginfo->{targets}}) {
218 my $info = {
219 sa_score => 100,
220 sa_max => $self->{level},
221 sa_data => [{
222 rule => 'USER_IN_BLOCKLIST',
223 score => 100,
224 desc => PMG::Utils::user_bl_description(),
225 }],
226 sa_hits => 'USER_IN_BLOCKLIST',
227 };
228
229 my $list;
230 my $pmail = $msginfo->{pmail}->{$t} || $t;
231 if ($queue->{blackwhite}->{$pmail} &&
232 ($list = $queue->{blackwhite}->{$pmail}->{blacklist}) &&
233 check_addrlist($list, $queue->{all_from_addrs})) {
234 $target_info->{$t}->{marks} = [];
235 $target_info->{$t}->{spaminfo} = $info;
236 syslog ('info', "%s: sender in user (%s) blocklist",
237 $queue->{logid}, encode('UTF-8',$pmail));
238 }
239 }
240 }
241
242 return $target_info;
243 }
244
245 sub level {
246 my ($self, $v) = @_;
247
248 if (defined ($v)) {
249 $self->{level} = $v;
250 }
251
252 $self->{level};
253 }
254
255 sub short_desc {
256 my $self = shift;
257
258 return "Level $self->{level}";
259 }
260
261 sub __get_addr {
262 my ($head, $name) = @_;
263
264 my $result = $head->get($name);
265
266 return '' if !$result;
267
268 # copied from Mail::Spamassassin:PerMsgStatus _get()
269
270 $result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;'
271 $result =~ s/\s+/ /g; # reduce whitespace
272 $result =~ s/^\s+//; # leading whitespace
273 $result =~ s/\s+$//; # trailing whitespace
274
275 # Get the email address out of the header
276 # All of these should result in "jm@foo":
277 # jm@foo
278 # jm@foo (Foo Blah)
279 # jm@foo, jm@bar
280 # display: jm@foo (Foo Blah), jm@bar ;
281 # Foo Blah <jm@foo>
282 # "Foo Blah" <jm@foo>
283 # "'Foo Blah'" <jm@foo>
284 #
285 # strip out the (comments)
286 $result =~ s/\s*\(.*?\)//g;
287 # strip out the "quoted text", unless it's the only thing in the string
288 if ($result !~ /^".*"$/) {
289 $result =~ s/(?<!<)"[^"]*"(?!@)//g; #" emacs
290 }
291 # Foo Blah <jm@xxx> or <jm@xxx>
292 $result =~ s/^[^"<]*?<(.*?)>.*$/$1/;
293 # multiple addresses on one line? remove all but first
294 $result =~ s/,.*$//;
295
296 return $result;
297 }
298
299 # implement our own all_from_addrs()
300 # because we do not call spamassassin in canes of commtouch match
301 # see Mail::Spamassassin:PerMsgStatus for details
302 sub __all_from_addrs {
303 my ($head) = @_;
304
305 my @addrs;
306
307 my $resent = $head->get('Resent-From');
308 if (defined($resent) && $resent =~ /\S/) {
309 @addrs = Mail::SpamAssassin->find_all_addrs_in_line($resent);
310 } else {
311 @addrs = map { tr/././s; $_ } grep { $_ ne '' }
312 (__get_addr($head, 'From'), # std
313 __get_addr($head, 'Envelope-Sender'), # qmail: new-inject(1)
314 __get_addr($head, 'Resent-Sender'), # procmailrc manpage
315 __get_addr($head, 'X-Envelope-From'), # procmailrc manpage
316 __get_addr($head, 'EnvelopeFrom')); # SMTP envelope
317 }
318
319 # Remove duplicate addresses
320 my %addrs = map { $_ => 1 } @addrs;
321 @addrs = keys %addrs;
322
323 return @addrs;
324 }
325
326 sub analyze_spam {
327 my ($self, $queue, $entity, $msginfo) = @_;
328
329 my $maxspamsize = $msginfo->{maxspamsize};
330
331 $maxspamsize = 200*1024 if !$maxspamsize;
332
333 my ($sa_score, $sa_max, $sa_scores, $sa_sumary, $list, $autolearn, $bayes, $loglist);
334 $list = '';
335 $loglist = '';
336 $bayes = 'undefined';
337 $autolearn = 'no';
338 $sa_score = 0;
339 $sa_max = 5;
340
341 # do not run SA if license is not valid
342 if (!$queue->{lic_valid}) {
343 $queue->{sa_score} = 0;
344 return 0;
345 }
346
347 my $fromhash = { $queue->{from} => 1 };
348 foreach my $f (__all_from_addrs($entity->head())) {
349 $fromhash->{$f} = 1;
350 }
351 $queue->{all_from_addrs} = [ keys %$fromhash ];
352
353 if (my $hit = $queue->{clamav_heuristic}) {
354 my $score = $queue->{clamav_heuristic_score};
355 my $descr = "ClamAV heuristic test: $hit";
356 my $rule = 'ClamAVHeuristics';
357 $sa_score += $score;
358 $list .= $list ? ",$rule" : $rule;
359 $loglist .= $loglist ? ",$rule($score)" : "$rule($score)";
360 push @$sa_scores, { score => $score, rule => $rule, desc => $descr };
361 }
362
363 if (my $hit = $queue->{spam_custom}) {
364 my $score += $queue->{spam_custom};
365 my $descr = "Custom Check Script";
366 my $rule = 'CustomCheck';
367 $sa_score += $score;
368 $list .= $list ? ",$rule" : $rule;
369 $list .= $list ? ",$rule" : $rule;
370 $loglist .= $loglist ? ",$rule($score)" : "$rule($score)";
371 push @$sa_scores, { score => $score, rule => $rule, desc => $descr };
372 }
373
374 my ($csec, $usec) = gettimeofday ();
375
376 my $spamtest = $queue->{sa};
377
378 # only run SA in testmode or when clamav_heuristic did not confirm spam (score < 5)
379 if ($msginfo->{testmode} || ($sa_score < 5)) {
380
381 # save and disable alarm (SA forgets to clear alarm in some cases)
382 my $previous_alarm = alarm (0);
383
384 my $pid = $$;
385
386 eval {
387 $queue->{fh}->seek(0, 0);
388
389 # Truncate message to $maxspamsize
390 # Note: similar code to read content is used inside
391 # Mail::SpamAssassin::Message->new()
392 my $nread;
393 my $raw_str = '';
394 while ($nread = sysread($queue->{fh}, $raw_str, 16384, length($raw_str))) {
395 last if length($raw_str) >= $maxspamsize;
396 }
397 defined($nread) || die "error reading message: $!\n";
398
399 my $suppl_attrib = {};
400 if (length($raw_str) >= $maxspamsize &&
401 length($raw_str) < $queue->{bytes}) {
402 $suppl_attrib->{body_size} = $queue->{bytes};
403 }
404
405 my @message = split(/^/m, $raw_str, -1);
406 undef $raw_str; # free memory early
407
408 my $mail = $spamtest->parse(\@message, 0, $suppl_attrib);
409
410 # hack: pass envelope sender to spamassassin
411 $mail->header('X-Proxmox-Envelope-From', $queue->{from});
412
413 my $status = $spamtest->check($mail);
414
415 #my $fromhash = { $queue->{from} => 1 };
416 #foreach my $f ($status->all_from_addrs()) {
417 #$fromhash->{$f} = 1;
418 #}
419 #$queue->{all_from_addrs} = [ keys %$fromhash ];
420
421 $sa_score += $status->get_score();
422 $sa_max = $status->get_required_score();
423 $autolearn = $status->get_autolearn_status();
424
425 $bayes = defined($status->{bayes_score}) ?
426 sprintf('%0.2f', $status->{bayes_score}) : "undefined";
427
428 my $salist = $status->get_names_of_tests_hit();
429
430 foreach my $rule (split (/,/, $salist)) {
431 $list .= $list ? ",$rule" : $rule;
432 my $score = $status->{conf}->{scores}->{$rule};
433 $loglist .= $loglist ? ",$rule($score)" : "$rule($score)";
434 my $desc = $status->{conf}->get_description_for_rule($rule);
435 if (my $hits = $status->{uridnsbl_hits}->{$rule}) {
436 $desc .= ' [' . join(',', keys %$hits) . ']';
437 }
438 push @$sa_scores, { score => $score, rule => $rule, desc => $desc };
439 }
440
441 $status->finish();
442 $mail->finish();
443
444 alarm 0; # avoid race conditions
445 };
446 my $err = $@;
447
448 alarm ($previous_alarm);
449
450 # just to be sure - exit if SA produces a child process
451 if ($$ != $pid) {
452 syslog ('err', "WARNING: detected SA produced process - exiting");
453 POSIX::_exit (-1); # exit immediately
454 }
455
456 if ($err) {
457 syslog('err', $err);
458 $queue->{errors} = 1;
459 }
460 }
461
462 $sa_score = int ($sa_score);
463 $sa_score = 0 if $sa_score < 0;
464
465 my ($csec_end, $usec_end) = gettimeofday();
466 $queue->{ptime_spam} =
467 int (($csec_end-$csec)*1000 + ($usec_end - $usec)/1000);
468
469 syslog ('info', "%s: SA score=%s/%s time=%0.3f bayes=%s autolearn=%s hits=%s",
470 $queue->{logid}, $sa_score, $sa_max, $queue->{ptime_spam}/1000.0,
471 $bayes, $autolearn, $loglist);
472
473 $queue->{sa_score} = $sa_score;
474 $queue->{sa_max} = $sa_max;
475 $queue->{sa_data} = $sa_scores;
476 $queue->{sa_hits} = $list;
477
478 return ($sa_score >= $sa_max);
479 }
480
481 sub properties {
482 my ($class) = @_;
483
484 return {
485 spamlevel => {
486 description => "Spam Level",
487 type => 'integer',
488 minimum => 0,
489 },
490 };
491 }
492
493 sub get {
494 my ($self) = @_;
495
496 return { spamlevel => $self->{level} };
497 }
498
499 sub update {
500 my ($self, $param) = @_;
501
502 $self->{level} = $param->{spamlevel};
503 }
504
505 1;
506
507 __END__
508
509 =head1 PVE::RuleDB::Spam
510
511 Spam level filter