]> git.proxmox.com Git - pmg-api.git/blob - src/PMG/RuleDB/Spam.pm
97a80b3cd978a385cc1edf717c94694f672f0fa5
[pmg-api.git] / src / PMG / RuleDB / Spam.pm
1 package PMG::RuleDB::Spam;
2
3 use strict;
4 use warnings;
5 use DBI;
6 use Digest::SHA;
7 use Time::HiRes qw (gettimeofday);
8
9 use PVE::SafeSyslog;
10 use Mail::SpamAssassin;
11
12 use PMG::Utils;
13 use PMG::RuleDB::Object;
14
15 use base qw(PMG::RuleDB::Object);
16
17 sub otype {
18 return 3000;
19 }
20
21 sub oclass {
22 return 'what';
23 }
24
25 sub otype_text {
26 return 'Spam Filter';
27 }
28
29 sub new {
30 my ($type, $level, $ogroup) = @_;
31
32 my $class = ref($type) || $type;
33
34 my $self = $class->SUPER::new($class->otype(), $ogroup);
35
36 $level = 5 if !defined ($level);
37
38 $self->{level} = $level;
39
40 return $self;
41 }
42
43 sub load_attr {
44 my ($type, $ruledb, $id, $ogroup, $value) = @_;
45
46 my $class = ref($type) || $type;
47
48 defined($value) || die "undefined value: ERROR";
49
50 my $obj = $class->new($value, $ogroup);
51 $obj->{id} = $id;
52
53 $obj->{digest} = Digest::SHA::sha1_hex($id, $value, $ogroup);
54
55 return $obj;
56 }
57
58 sub save {
59 my ($self, $ruledb) = @_;
60
61 defined($self->{ogroup}) || die "undefined ogroup: ERROR";
62 defined($self->{level}) || die "undefined spam level: ERROR";
63
64 if (defined ($self->{id})) {
65 # update
66
67 $ruledb->{dbh}->do(
68 "UPDATE Object SET Value = ? WHERE ID = ?",
69 undef, $self->{level}, $self->{id});
70
71 } else {
72 # insert
73
74 my $sth = $ruledb->{dbh}->prepare(
75 "INSERT INTO Object (Objectgroup_ID, ObjectType, Value) " .
76 "VALUES (?, ?, ?);");
77
78 $sth->execute($self->ogroup, $self->otype, $self->{level});
79
80 $self->{id} = PMG::Utils::lastid ($ruledb->{dbh}, 'object_id_seq');
81 }
82
83 return $self->{id};
84 }
85
86 sub parse_addrlist {
87 my ($list) = @_;
88
89 my $adlist = {};
90
91 foreach my $addr (split ('\s*,\s*', $list)) {
92 $addr = lc $addr;
93 my $regex = $addr;
94 # SA like checks
95 $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ?
96 $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars
97 $regex =~ tr/?/./; # replace "?" with "."
98 $regex =~ s/\*+/\.\*/g; # replace "*" with ".*"
99
100 # we use a hash for extra fast testing
101 $adlist->{$addr} = "^${regex}\$";
102 }
103
104 return $adlist;
105 }
106
107 sub check_addrlist {
108 my ($list, $addrlst) = @_;
109
110 foreach my $addr (@$addrlst) {
111
112 $addr = lc $addr;
113
114 return 1 if defined ($list->{$addr});
115
116 study $addr;
117
118 foreach my $r (values %{$list}) {
119 if ($addr =~ qr/$r/i) {
120 return 1;
121 }
122 }
123 }
124
125 return 0;
126 }
127
128 sub get_blackwhite {
129 my ($dbh, $entity, $msginfo) = @_;
130
131 my $target_info = {};
132
133 my $targets = $msginfo->{targets};
134
135 my $cond = '';
136 foreach my $r (@$targets) {
137 my $pmail = $msginfo->{pmail}->{$r} || lc ($r);
138 my $qr = $dbh->quote ($pmail);
139 $cond .= " OR " if $cond;
140 $cond .= "pmail = $qr";
141 }
142
143 eval {
144 my $query = "SELECT * FROM UserPrefs WHERE " .
145 "($cond) AND (Name = 'BL' OR Name = 'WL')";
146 my $sth = $dbh->prepare($query);
147
148 $sth->execute();
149
150 while (my $ref = $sth->fetchrow_hashref()) {
151 my $pmail = lc ($ref->{pmail});
152 if ($ref->{name} eq 'WL') {
153 $target_info->{$pmail}->{whitelist} =
154 parse_addrlist($ref->{data});
155 } elsif ($ref->{name} eq 'BL') {
156 $target_info->{$pmail}->{blacklist} =
157 parse_addrlist($ref->{data});
158 }
159 }
160
161 $sth->finish;
162 };
163 if (my $err = $@) {
164 syslog('err', $err);
165 }
166
167 return $target_info;
168 }
169
170 sub what_match_targets {
171 my ($self, $queue, $entity, $msginfo, $dbh) = @_;
172
173 my $target_info;
174
175 if (!$queue->{spam_analyzed}) {
176 $self->analyze_spam($queue, $entity, $msginfo);
177 $queue->{blackwhite} = get_blackwhite($dbh, $entity, $msginfo);
178 $queue->{spam_analyzed} = 1;
179 }
180
181 if ($msginfo->{testmode}) {
182 $queue->{sa_score} = 100 if $queue->{sa_score} > 100;
183 my $data;
184 foreach my $s (@{$queue->{sa_data}}) {
185 next if $s->{rule} eq 'AWL';
186 push @$data, $s;
187 }
188 $queue->{sa_data} = $data;
189 }
190
191 if (defined($queue->{sa_score}) && $queue->{sa_score} >= $self->{level}) {
192
193 my $info = {
194 sa_score => $queue->{sa_score},
195 sa_max => $self->{level},
196 sa_data => $queue->{sa_data},
197 sa_hits => $queue->{sa_hits}
198 };
199
200 foreach my $t (@{$msginfo->{targets}}) {
201 my $list;
202 my $pmail = $msginfo->{pmail}->{$t} || $t;
203 if ($queue->{blackwhite}->{$pmail} &&
204 ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
205 check_addrlist($list, $queue->{all_from_addrs})) {
206 syslog('info', "%s: sender in user (%s) whitelist",
207 $queue->{logid}, $pmail);
208 } else {
209 $target_info->{$t}->{marks} = []; # never add additional marks here
210 $target_info->{$t}->{spaminfo} = $info;
211 }
212 }
213
214 } else {
215
216 foreach my $t (@{$msginfo->{targets}}) {
217 my $info = {
218 sa_score => 100,
219 sa_max => $self->{level},
220 sa_data => [{
221 rule => 'USER_IN_BLACKLIST',
222 score => 100,
223 desc => 'From: address is in the user black-list',
224 }],
225 sa_hits => 'USER_IN_BLACKLIST',
226 };
227
228 my $list;
229 my $pmail = $msginfo->{pmail}->{$t} || $t;
230 if ($queue->{blackwhite}->{$pmail} &&
231 ($list = $queue->{blackwhite}->{$pmail}->{blacklist}) &&
232 check_addrlist($list, $queue->{all_from_addrs})) {
233 $target_info->{$t}->{marks} = [];
234 $target_info->{$t}->{spaminfo} = $info;
235 syslog ('info', "%s: sender in user (%s) blacklist",
236 $queue->{logid}, $pmail);
237 }
238 }
239 }
240
241 return $target_info;
242 }
243
244 sub level {
245 my ($self, $v) = @_;
246
247 if (defined ($v)) {
248 $self->{level} = $v;
249 }
250
251 $self->{level};
252 }
253
254 sub short_desc {
255 my $self = shift;
256
257 return "Level $self->{level}";
258 }
259
260 sub __get_addr {
261 my ($head, $name) = @_;
262
263 my $result = $head->get($name);
264
265 return '' if !$result;
266
267 # copied from Mail::Spamassassin:PerMsgStatus _get()
268
269 $result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;'
270 $result =~ s/\s+/ /g; # reduce whitespace
271 $result =~ s/^\s+//; # leading whitespace
272 $result =~ s/\s+$//; # trailing whitespace
273
274 # Get the email address out of the header
275 # All of these should result in "jm@foo":
276 # jm@foo
277 # jm@foo (Foo Blah)
278 # jm@foo, jm@bar
279 # display: jm@foo (Foo Blah), jm@bar ;
280 # Foo Blah <jm@foo>
281 # "Foo Blah" <jm@foo>
282 # "'Foo Blah'" <jm@foo>
283 #
284 # strip out the (comments)
285 $result =~ s/\s*\(.*?\)//g;
286 # strip out the "quoted text", unless it's the only thing in the string
287 if ($result !~ /^".*"$/) {
288 $result =~ s/(?<!<)"[^"]*"(?!@)//g; #" emacs
289 }
290 # Foo Blah <jm@xxx> or <jm@xxx>
291 $result =~ s/^[^"<]*?<(.*?)>.*$/$1/;
292 # multiple addresses on one line? remove all but first
293 $result =~ s/,.*$//;
294
295 return $result;
296 }
297
298 # implement our own all_from_addrs()
299 # because we do not call spamassassin in canes of commtouch match
300 # see Mail::Spamassassin:PerMsgStatus for details
301 sub __all_from_addrs {
302 my ($head) = @_;
303
304 my @addrs;
305
306 my $resent = $head->get('Resent-From');
307 if (defined($resent) && $resent =~ /\S/) {
308 @addrs = Mail::SpamAssassin->find_all_addrs_in_line($resent);
309 } else {
310 @addrs = map { tr/././s; $_ } grep { $_ ne '' }
311 (__get_addr($head, 'From'), # std
312 __get_addr($head, 'Envelope-Sender'), # qmail: new-inject(1)
313 __get_addr($head, 'Resent-Sender'), # procmailrc manpage
314 __get_addr($head, 'X-Envelope-From'), # procmailrc manpage
315 __get_addr($head, 'EnvelopeFrom')); # SMTP envelope
316 }
317
318 # Remove duplicate addresses
319 my %addrs = map { $_ => 1 } @addrs;
320 @addrs = keys %addrs;
321
322 return @addrs;
323 }
324
325 sub analyze_spam {
326 my ($self, $queue, $entity, $msginfo) = @_;
327
328 my $maxspamsize = $msginfo->{maxspamsize};
329
330 $maxspamsize = 200*1024 if !$maxspamsize;
331
332 my ($sa_score, $sa_max, $sa_scores, $sa_sumary, $list, $autolearn, $bayes, $loglist);
333 $list = '';
334 $loglist = '';
335 $bayes = 'undefined';
336 $autolearn = 'no';
337 $sa_score = 0;
338 $sa_max = 5;
339
340 # do not run SA if license is not valid
341 if (!$queue->{lic_valid}) {
342 $queue->{sa_score} = 0;
343 return 0;
344 }
345
346 my $fromhash = { $queue->{from} => 1 };
347 foreach my $f (__all_from_addrs($entity->head())) {
348 $fromhash->{$f} = 1;
349 }
350 $queue->{all_from_addrs} = [ keys %$fromhash ];
351
352 if (my $hit = $queue->{clamav_heuristic}) {
353 my $score = $queue->{clamav_heuristic_score};
354 my $descr = "ClamAV heuristic test: $hit";
355 my $rule = 'ClamAVHeuristics';
356 $sa_score += $score;
357 $list .= $list ? ",$rule" : $rule;
358 $loglist .= $loglist ? ",$rule($score)" : "$rule($score)";
359 push @$sa_scores, { score => $score, rule => $rule, desc => $descr };
360 }
361
362 if (my $hit = $queue->{spam_custom}) {
363 my $score += $queue->{spam_custom};
364 my $descr = "Custom Check Script";
365 my $rule = 'CustomCheck';
366 $sa_score += $score;
367 $list .= $list ? ",$rule" : $rule;
368 $list .= $list ? ",$rule" : $rule;
369 $loglist .= $loglist ? ",$rule($score)" : "$rule($score)";
370 push @$sa_scores, { score => $score, rule => $rule, desc => $descr };
371 }
372
373 my ($csec, $usec) = gettimeofday ();
374
375 my $spamtest = $queue->{sa};
376
377 # only run SA in testmode or when clamav_heuristic did not confirm spam (score < 5)
378 if ($msginfo->{testmode} || ($sa_score < 5)) {
379
380 # save and disable alarm (SA forgets to clear alarm in some cases)
381 my $previous_alarm = alarm (0);
382
383 my $pid = $$;
384
385 eval {
386 $queue->{fh}->seek(0, 0);
387
388 # Truncate message to $maxspamsize
389 # Note: similar code to read content is used inside
390 # Mail::SpamAssassin::Message->new()
391 my $nread;
392 my $raw_str = '';
393 while ($nread = sysread($queue->{fh}, $raw_str, 16384, length($raw_str))) {
394 last if length($raw_str) >= $maxspamsize;
395 }
396 defined($nread) || die "error reading message: $!\n";
397
398 my $suppl_attrib = {};
399 if (length($raw_str) >= $maxspamsize &&
400 length($raw_str) < $queue->{bytes}) {
401 $suppl_attrib->{body_size} = $queue->{bytes};
402 }
403
404 my @message = split(/^/m, $raw_str, -1);
405 undef $raw_str; # free memory early
406
407 my $mail = $spamtest->parse(\@message, 0, $suppl_attrib);
408
409 # hack: pass envelope sender to spamassassin
410 $mail->header('X-Proxmox-Envelope-From', $queue->{from});
411
412 my $status = $spamtest->check($mail);
413
414 #my $fromhash = { $queue->{from} => 1 };
415 #foreach my $f ($status->all_from_addrs()) {
416 #$fromhash->{$f} = 1;
417 #}
418 #$queue->{all_from_addrs} = [ keys %$fromhash ];
419
420 $sa_score += $status->get_score();
421 $sa_max = $status->get_required_score();
422 $autolearn = $status->get_autolearn_status();
423
424 $bayes = defined($status->{bayes_score}) ?
425 sprintf('%0.2f', $status->{bayes_score}) : "undefined";
426
427 my $salist = $status->get_names_of_tests_hit();
428
429 foreach my $rule (split (/,/, $salist)) {
430 $list .= $list ? ",$rule" : $rule;
431 my $score = $status->{conf}->{scores}->{$rule};
432 $loglist .= $loglist ? ",$rule($score)" : "$rule($score)";
433 my $desc = $status->{conf}->get_description_for_rule($rule);
434 push @$sa_scores, { score => $score, rule => $rule, desc => $desc };
435 }
436
437 $status->finish();
438 $mail->finish();
439
440 alarm 0; # avoid race conditions
441 };
442 my $err = $@;
443
444 alarm ($previous_alarm);
445
446 # just to be sure - exit if SA produces a child process
447 if ($$ != $pid) {
448 syslog ('err', "WARNING: detected SA produced process - exiting");
449 POSIX::_exit (-1); # exit immediately
450 }
451
452 if ($err) {
453 syslog('err', $err);
454 $queue->{errors} = 1;
455 }
456 }
457
458 $sa_score = int ($sa_score);
459 $sa_score = 0 if $sa_score < 0;
460
461 my ($csec_end, $usec_end) = gettimeofday();
462 $queue->{ptime_spam} =
463 int (($csec_end-$csec)*1000 + ($usec_end - $usec)/1000);
464
465 syslog ('info', "%s: SA score=%s/%s time=%0.3f bayes=%s autolearn=%s hits=%s",
466 $queue->{logid}, $sa_score, $sa_max, $queue->{ptime_spam}/1000.0,
467 $bayes, $autolearn, $loglist);
468
469 $queue->{sa_score} = $sa_score;
470 $queue->{sa_max} = $sa_max;
471 $queue->{sa_data} = $sa_scores;
472 $queue->{sa_hits} = $list;
473
474 return ($sa_score >= $sa_max);
475 }
476
477 sub properties {
478 my ($class) = @_;
479
480 return {
481 spamlevel => {
482 description => "Spam Level",
483 type => 'integer',
484 minimum => 0,
485 },
486 };
487 }
488
489 sub get {
490 my ($self) = @_;
491
492 return { spamlevel => $self->{level} };
493 }
494
495 sub update {
496 my ($self, $param) = @_;
497
498 $self->{level} = $param->{spamlevel};
499 }
500
501 1;
502
503 __END__
504
505 =head1 PVE::RuleDB::Spam
506
507 Spam level filter