]> git.proxmox.com Git - pmg-api.git/blame - PMG/RuleDB/Spam.pm
fix otype attribute
[pmg-api.git] / PMG / RuleDB / Spam.pm
CommitLineData
758c7b6b
DM
1package PMG::RuleDB::Spam;
2
3use strict;
4use warnings;
5use Carp;
6use DBI;
7use Digest::SHA;
8use Time::HiRes qw (gettimeofday);
9
10use PVE::SafeSyslog;
11use Mail::SpamAssassin;
12
13use PMG::Utils;
14use PMG::RuleDB::Object;
15
16use base qw(PMG::RuleDB::Object);
17
18sub otype {
19 return 3000;
20}
21
22sub oclass {
23 return 'what';
24}
25
26sub otype_text {
27 return 'Spam Filter';
28}
29
30sub oicon {
31 return 'spam.gif';
32}
33
34
35sub oinfo {
36 return 'Mark all Spam-Emails.';
37}
38
39sub new {
40 my ($type, $level, $ogroup) = @_;
41
42 my $class = ref($type) || $type;
43
7a2cf7e6 44 my $self = $class->SUPER::new($class->otype(), $ogroup);
758c7b6b
DM
45
46 $level = 5 if !defined ($level);
47
48 $self->{level} = $level;
49
50 return $self;
51}
52
53sub load_attr {
54 my ($type, $ruledb, $id, $ogroup, $value) = @_;
55
56 my $class = ref($type) || $type;
57
58 defined($value) || croak "undefined value: ERROR";
59
60 my $obj = $class->new($value, $ogroup);
61 $obj->{id} = $id;
62
63 $obj->{digest} = Digest::SHA::sha1_hex($id, $value, $ogroup);
64
65 return $obj;
66}
67
68sub save {
69 my ($self, $ruledb) = @_;
70
71 defined($self->{ogroup}) || croak "undefined ogroup: ERROR";
72 defined($self->{level}) || croak "undefined spam level: ERROR";
73
74 if (defined ($self->{id})) {
75 # update
76
77 $ruledb->{dbh}->do(
78 "UPDATE Object SET Value = ? WHERE ID = ?",
79 undef, $self->{level}, $self->{id});
80
81 } else {
82 # insert
83
84 my $sth = $ruledb->{dbh}->prepare(
85 "INSERT INTO Object (Objectgroup_ID, ObjectType, Value) " .
86 "VALUES (?, ?, ?);");
87
88 $sth->execute($self->ogroup, $self->otype, $self->{level});
89
90 $self->{id} = PMG::Utils::lastid ($ruledb->{dbh}, 'object_id_seq');
91 }
92
93 return $self->{id};
94}
95
96sub parse_addrlist {
97 my ($list) = @_;
98
99 my $adlist = {};
100
101 foreach my $addr (split ('\s*,\s*', $list)) {
102 $addr = lc $addr;
103 my $regex = $addr;
104 # SA like checks
105 $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ?
106 $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars
107 $regex =~ tr/?/./; # replace "?" with "."
108 $regex =~ s/\*+/\.\*/g; # replace "*" with ".*"
109
110 # we use a hash for extra fast testing
111 $adlist->{$addr} = "^${regex}\$";
112 }
113
114 return $adlist;
115}
116
117sub check_addrlist {
118 my ($list, $addrlst) = @_;
119
120 foreach my $addr (@$addrlst) {
121
122 $addr = lc $addr;
123
124 return 1 if defined ($list->{$addr});
125
126 study $addr;
127
128 foreach my $r (values %{$list}) {
129 if ($addr =~ qr/$r/i) {
130 return 1;
131 }
132 }
133 }
134
135 return 0;
136}
137
138sub get_blackwhite {
139 my ($dbh, $entity, $msginfo) = @_;
140
141 my $target_info = {};
142
143 my $targets = $msginfo->{targets};
144
145 my $cond = '';
146 foreach my $r (@$targets) {
147 my $pmail = $msginfo->{pmail}->{$r} || lc ($r);
148 my $qr = $dbh->quote ($pmail);
149 $cond .= " OR " if $cond;
150 $cond .= "pmail = $qr";
151 }
152
153 eval {
154 my $query = "SELECT * FROM UserPrefs WHERE " .
155 "($cond) AND (Name = 'BL' OR Name = 'WL')";
156 my $sth = $dbh->prepare($query);
157
158 $sth->execute();
159
160 while (my $ref = $sth->fetchrow_hashref()) {
161 my $pmail = lc ($ref->{pmail});
162 if ($ref->{name} eq 'WL') {
163 $target_info->{$pmail}->{whitelist} =
164 parse_addrlist($ref->{data});
165 } elsif ($ref->{name} eq 'BL') {
166 $target_info->{$pmail}->{blacklist} =
167 parse_addrlist($ref->{data});
168 }
169 }
170
171 $sth->finish;
172 };
173 if (my $err = $@) {
174 syslog('err', $err);
175 }
176
177 return $target_info;
178}
179
180sub what_match_targets {
181 my ($self, $queue, $entity, $msginfo, $dbh) = @_;
182
183 my $target_info;
184
185 if (!$queue->{spam_analyzed}) {
186 $self->analyze_spam($queue, $entity, $msginfo);
187 $queue->{blackwhite} = get_blackwhite($dbh, $entity, $msginfo);
188 $queue->{spam_analyzed} = 1;
189 }
190
191 if ($msginfo->{testmode}) {
192 $queue->{sa_score} = 100 if $queue->{sa_score} > 100;
193 my $data;
194 foreach my $s (@{$queue->{sa_data}}) {
195 next if $s->{rule} eq 'AWL';
196 push @$data, $s;
197 }
198 $queue->{sa_data} = $data;
199 }
200
201 if (defined($queue->{sa_score}) && $queue->{sa_score} >= $self->{level}) {
202
203 my $info = {
204 sa_score => $queue->{sa_score},
205 sa_max => $self->{level},
206 sa_data => $queue->{sa_data},
207 sa_hits => $queue->{sa_hits}
208 };
209
210 foreach my $t (@{$msginfo->{targets}}) {
211 my $list;
212 my $pmail = $msginfo->{pmail}->{$t} || $t;
213 if ($queue->{blackwhite}->{$pmail} &&
214 ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
215 check_addrlist($list, $queue->{all_from_addrs})) {
216 syslog('info', "%s: sender in user (%s) whitelist",
217 $queue->{logid}, $pmail);
218 } else {
219 $target_info->{$t}->{marks} = []; # never add additional marks here
220 $target_info->{$t}->{spaminfo} = $info;
221 }
222 }
223
224 } else {
225
226 foreach my $t (@{$msginfo->{targets}}) {
227 my $info = {
228 sa_score => 100,
229 sa_max => $self->{level},
230 sa_data => [{
231 rule => 'USER_IN_BLACKLIST',
232 score => 100,
233 desc => 'From: address is in the user black-list',
234 }],
235 sa_hits => 'USER_IN_BLACKLIST',
236 };
237
238 my $list;
239 my $pmail = $msginfo->{pmail}->{$t} || $t;
240 if ($queue->{blackwhite}->{$pmail} &&
241 ($list = $queue->{blackwhite}->{$pmail}->{blacklist}) &&
242 check_addrlist($list, $queue->{all_from_addrs})) {
243 $target_info->{$t}->{marks} = [];
244 $target_info->{$t}->{spaminfo} = $info;
245 syslog ('info', "%s: sender in user (%s) blacklist",
246 $queue->{logid}, $pmail);
247 }
248 }
249 }
250
251 return $target_info;
252}
253
254sub level {
255 my ($self, $v) = @_;
256
257 if (defined ($v)) {
258 $self->{level} = $v;
259 }
260
261 $self->{level};
262}
263
264sub short_desc {
265 my $self = shift;
266
267 return "Level $self->{level}";
268}
269
270sub __get_addr {
271 my ($head, $name) = @_;
272
273 my $result = $head->get($name);
274
275 return '' if !$result;
276
277 # copied from Mail::Spamassassin:PerMsgStatus _get()
278
279 $result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;'
280 $result =~ s/\s+/ /g; # reduce whitespace
281 $result =~ s/^\s+//; # leading whitespace
282 $result =~ s/\s+$//; # trailing whitespace
283
284 # Get the email address out of the header
285 # All of these should result in "jm@foo":
286 # jm@foo
287 # jm@foo (Foo Blah)
288 # jm@foo, jm@bar
289 # display: jm@foo (Foo Blah), jm@bar ;
290 # Foo Blah <jm@foo>
291 # "Foo Blah" <jm@foo>
292 # "'Foo Blah'" <jm@foo>
293 #
294 # strip out the (comments)
295 $result =~ s/\s*\(.*?\)//g;
296 # strip out the "quoted text", unless it's the only thing in the string
297 if ($result !~ /^".*"$/) {
298 $result =~ s/(?<!<)"[^"]*"(?!@)//g; #" emacs
299 }
300 # Foo Blah <jm@xxx> or <jm@xxx>
301 $result =~ s/^[^"<]*?<(.*?)>.*$/$1/;
302 # multiple addresses on one line? remove all but first
303 $result =~ s/,.*$//;
304
305 return $result;
306}
307
308# implement our own all_from_addrs()
309# because we do not call spamassassin in canes of commtouch match
310# see Mail::Spamassassin:PerMsgStatus for details
311sub __all_from_addrs {
312 my ($head) = @_;
313
314 my @addrs;
315
316 my $resent = $head->get('Resent-From');
317 if (defined($resent) && $resent =~ /\S/) {
318 @addrs = Mail::SpamAssassin->find_all_addrs_in_line($resent);
319 } else {
320 @addrs = map { tr/././s; $_ } grep { $_ ne '' }
321 (__get_addr($head, 'From'), # std
322 __get_addr($head, 'Envelope-Sender'), # qmail: new-inject(1)
323 __get_addr($head, 'Resent-Sender'), # procmailrc manpage
324 __get_addr($head, 'X-Envelope-From'), # procmailrc manpage
325 __get_addr($head, 'EnvelopeFrom')); # SMTP envelope
326 }
327
328 # Remove duplicate addresses
329 my %addrs = map { $_ => 1 } @addrs;
330 @addrs = keys %addrs;
331
332 return @addrs;
333}
334
335sub analyze_spam {
336 my ($self, $queue, $entity, $msginfo) = @_;
337
338 my $maxspamsize = $msginfo->{maxspamsize};
339
340 $maxspamsize = 200*1024 if !$maxspamsize;
341
342 my ($sa_score, $sa_max, $sa_scores, $sa_sumary, $list, $autolearn, $bayes);
343 $list = '';
344 $bayes = 'undefined';
345 $autolearn = 'no';
346 $sa_score = 0;
347 $sa_max = 5;
348
349 # do not run SA if license is not valid
350 if (!$queue->{lic_valid}) {
351 $queue->{sa_score} = 0;
352 return 0;
353 }
354
355 my $fromhash = { $queue->{from} => 1 };
356 foreach my $f (__all_from_addrs($entity->head())) {
357 $fromhash->{$f} = 1;
358 }
359 $queue->{all_from_addrs} = [ keys %$fromhash ];
360
361 my ($csec, $usec) = gettimeofday ();
362
363 my $spamtest = $queue->{sa};
364
365 # only run SA in testmode or when commtouch did not confirm spam (score < 5)
366 # do not run SA if mail is too large
367 if (($queue->{bytes} <= $maxspamsize) &&
368 ($msginfo->{testmode} || ($sa_score < 5))) {
369
370 # save and disable alarm (SA forgets to clear alarm in some cases)
371 my $previous_alarm = alarm (0);
372
373 my $pid = $$;
374
375 eval {
376 $queue->{fh}->seek (0, 0);
377
378 *SATMP = \*{$queue->{fh}};
379 my $mail = $spamtest->parse(\*SATMP);
380
381 # hack: pass envelope sender to spamassassin
382 $mail->header('X-Proxmox-Envelope-From', $queue->{from});
383
384 my $status = $spamtest->check ($mail);
385
386 #my $fromhash = { $queue->{from} => 1 };
387 #foreach my $f ($status->all_from_addrs()) {
388 #$fromhash->{$f} = 1;
389 #}
390 #$queue->{all_from_addrs} = [ keys %$fromhash ];
391
392 $sa_score += $status->get_score();
393 $sa_max = $status->get_required_score();
394 $autolearn = $status->get_autolearn_status();
395
396 $bayes = defined($status->{bayes_score}) ?
397 $status->{bayes_score} : "undefined";
398
399 my $salist = $status->get_names_of_tests_hit();
400
401 foreach my $rule (split (/,/, $salist)) {
402 $list .= $list ? ",$rule" : $rule;
403 my $score = $status->{conf}->{scores}->{$rule};
404 my $desc = $status->{conf}->get_description_for_rule($rule);
405 push @$sa_scores, { score => $score, rule => $rule, desc => $desc };
406 }
407
408 $status->finish();
409 $mail->finish();
410
411 alarm 0; # avoid race conditions
412 };
413 my $err = $@;
414
415 alarm ($previous_alarm);
416
417 # just to be sure - exit if SA produces a child process
418 if ($$ != $pid) {
419 syslog ('err', "WARNING: detected SA produced process - exiting");
420 POSIX::_exit (-1); # exit immediately
421 }
422
423 if ($err) {
424 syslog('err', $err);
425 $queue->{errors} = 1;
426 }
427 }
428
429 $sa_score = int ($sa_score);
430 $sa_score = 0 if $sa_score < 0;
431
432 my ($csec_end, $usec_end) = gettimeofday();
433 $queue->{ptime_spam} =
434 int (($csec_end-$csec)*1000 + ($usec_end - $usec)/1000);
435
436 syslog ('info', "%s: SA score=%s/%s time=%0.3f bayes=%s autolearn=%s hits=%s",
437 $queue->{logid}, $sa_score, $sa_max, $queue->{ptime_spam}/1000.0,
438 $bayes, $autolearn, $list);
439
440 $queue->{sa_score} = $sa_score;
441 $queue->{sa_max} = $sa_max;
442 $queue->{sa_data} = $sa_scores;
443 $queue->{sa_hits} = $list;
444
445 return ($sa_score >= $sa_max);
446}
447
4481;
449
450__END__
451
452=head1 PVE::RuleDB::Spam
453
454Spam level filter