4 # Licensed to the Apache Software Foundation (ASF) under one or more
5 # contributor license agreements. See the NOTICE file distributed with
6 # this work for additional information regarding copyright ownership.
7 # The ASF licenses this file to you under the Apache License, Version 2.0
8 # (the "License"); you may not use this file except in compliance with
9 # the License. You may obtain a copy of the License at:
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
24 my $PREFIX = '@@PREFIX@@'; # substituted at 'make' time
25 my $DEF_RULES_DIR = '@@DEF_RULES_DIR@@'; # substituted at 'make' time
26 my $LOCAL_RULES_DIR = '@@LOCAL_RULES_DIR@@'; # substituted at 'make' time
27 my $LOCAL_STATE_DIR = '@@LOCAL_STATE_DIR@@'; # substituted at 'make' time
28 my $RE2C_BIN = '@@RE2C_BIN@@'; # substituted at 'make' time
29 use lib
'@@INSTALLSITELIB@@'; # substituted at 'make' time
34 use POSIX
qw(locale_h setsid sigprocmask _exit);
36 POSIX
::setlocale
(LC_TIME
,'C');
38 BEGIN { # see comments in "spamassassin.raw" for doco
39 my @bin = File
::Spec-
>splitpath($0);
40 my $bin = ($bin[0] ? File
::Spec-
>catpath(@bin[0..1], '') : $bin[1])
41 || File
::Spec-
>curdir;
43 if (-e
$bin.'/lib/Mail/SpamAssassin.pm'
44 || !-e
'@@INSTALLSITELIB@@/Mail/SpamAssassin.pm' )
47 $searchrelative = 1; # disabled during "make install": REMOVEFORINST
48 if ($searchrelative && $bin eq '../' && -e
'../blib/lib/Mail/SpamAssassin.pm')
50 unshift ( @INC, '../blib/lib' );
52 foreach ( qw(lib ../lib/site_perl
53 ../lib/spamassassin ../share/spamassassin/lib))
55 my $dir = File
::Spec-
>catdir( $bin, split ( '/', $_ ) );
56 if ( -f File
::Spec-
>catfile( $dir, "Mail", "SpamAssassin.pm" ) )
57 { unshift ( @INC, $dir ); last; }
63 use Mail
::SpamAssassin
;
64 use Mail
::SpamAssassin
::Util
qw(untaint_var exit_status_str);
65 use Mail
::SpamAssassin
::Logger
;
73 Mail
::SpamAssassin
::Util
::clean_path_in_taint_mode
();
76 Getopt
::Long
::Configure
(
77 qw(bundling no_getopt_compat
78 permute no_auto_abbrev no_ignore_case)
82 'list' => \
$opt{'list'},
83 'sudo' => \
$opt{'sudo'},
84 'quiet' => \
$opt{'quiet'},
85 'keep-tmps' => \
$opt{'keep-tmps'},
87 'configpath|config-file|config-dir|c|C=s' => \
$opt{'configpath'},
88 'prefspath|prefs-file|p=s' => \
$opt{'prefspath'},
89 'siteconfigpath=s' => \
$opt{'siteconfigpath'},
90 'updatedir=s' => \
$opt{'updatedir'},
91 'cf=s' => \
@{$opt{'cf'}},
92 'debug|D:s' => \
$opt{'debug'},
93 'help|h|?' => \
$opt{'help'},
94 'version|V' => \
$opt{'version'},
96 or usage
( 0, "Unknown option!" );
98 if ( defined $opt{'help'} ) {
99 usage
( 0, "For more information read the manual page" );
101 if ( defined $opt{'version'} ) {
102 print "SpamAssassin version " . Mail
::SpamAssassin
::Version
() . "\n"
103 or die "error writing: $!";
107 # Check for some dependencies and provide useful error messages if they aren't
109 eval("use ExtUtils::MakeMaker");
111 print "$0 requires ExtUtils::MakeMaker for proper operation.\n"
112 or die "error writing: $!";
115 unless (qx($RE2C_BIN -V)) {
116 print "$0 requires $RE2C_BIN for proper operation.\n"
117 or die "error writing: $!";
122 my ( $exitval, $message ) = @_;
127 print("\n") or die "error writing: $!";
131 -message
=> $message,
132 -exitval
=> $exitval,
136 # set debug areas, if any specified (only useful for command-line tools)
137 if (defined $opt{'debug'}) {
138 $opt{'debug'} ||= 'all';
142 $opt{'debug'} ||= 'info';
144 my $quiet = $opt{'quiet'} || 0;
146 # ensure the body-rule base extractor plugin is loaded, we use that
148 loadplugin Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor
150 ).join("\n", @{$opt{'cf'}})."\n";
152 my $spamtest = Mail
::SpamAssassin-
>new(
154 rules_filename
=> $opt{'configpath'},
155 site_rules_filename
=> $opt{'siteconfigpath'},
156 userprefs_filename
=> $opt{'prefspath'},
157 debug
=> $opt{'debug'},
158 local_tests_only
=> 1,
159 dont_copy_prefs
=> 1,
161 DEF_RULES_DIR
=> $DEF_RULES_DIR,
162 LOCAL_RULES_DIR
=> $LOCAL_RULES_DIR,
163 LOCAL_STATE_DIR
=> $LOCAL_STATE_DIR,
164 post_config_text
=> $post_config,
168 # appropriate BodyRuleBaseExtractor settings for rule2xs usage
169 $spamtest->{base_extract
} = 1;
170 $spamtest->{bases_must_be_casei
} = 1;
171 $spamtest->{bases_can_use_alternations
} = 0;
172 $spamtest->{bases_can_use_quantifiers
} = 0;
173 $spamtest->{bases_can_use_char_classes
} = 0;
174 $spamtest->{bases_split_out_alternations
} = 1;
175 $spamtest->{base_quiet
} = $quiet;
177 if (defined $opt{'updatedir'}) {
178 $opt{'updatedir'} = Mail
::SpamAssassin
::Util
::untaint_file_path
($opt{'updatedir'});
181 $opt{'updatedir'} = $spamtest->sed_path('__local_state_dir__/compiled/__perl_major_ver__/__version__');
183 my $installdir = $opt{'updatedir'};
184 if ((!defined $opt{'list'})
186 && -d
$installdir && !-w
$installdir)
188 die "sa-compile: cannot write to $installdir, aborting\n";
191 $spamtest->{bases_cache_dir
} = $spamtest->get_and_create_userstate_dir()
192 ."/sa-compile.cache";
195 my $conf = $spamtest->{conf
};
197 # this actually extracts the base rules in the plugin, as a side-effect
198 my $res = $spamtest->lint_rules();
200 die "sa-compile: not compiling; 'spamassassin --lint' check failed!\n";
203 if ( defined $opt{'list'} ) {
204 foreach my $ruletype (sort keys %{$conf->{base_orig
}}) {
205 print dump_base_strings
($ruletype);
209 compile_base_strings
();
214 # make sure we notice any write errors while flushing output buffer
215 close STDOUT
or die "error closing STDOUT: $!";
216 close STDIN
or die "error closing STDIN: $!";
220 ##############################################################################
222 sub dump_base_strings
{
225 my $s = "name $ruletype\n";
227 foreach my $key1 (sort keys %{$conf->{base_orig
}->{$ruletype}}) {
228 $s .= "orig $key1 $conf->{base_orig}->{$ruletype}->{$key1}\n";
231 foreach my $key (sort keys %{$conf->{base_string
}->{$ruletype}}) {
232 $s .= "r $key:$conf->{base_string}->{$ruletype}->{$key}\n";
237 ##############################################################################
244 base_orig
=> $conf->{base_orig
}->{$ruletype},
245 base_string
=> $conf->{base_string
}->{$ruletype}
247 my $s = Data
::Dumper-
>Dump([ \
%todump ], [qw(bases)]);
251 ##############################################################################
253 sub compile_base_strings
{
254 my $dirpath = Mail
::SpamAssassin
::Util
::secure_tmpdir
();
255 die "secure_tmpdir failed" unless $dirpath && -w
$dirpath;
257 my $sudo = ($opt{sudo
} ?
'sudo ' : '');
259 foreach my $ruletype (sort keys %{$conf->{base_orig
}})
261 # create the bases.in file
263 my $basespath = "bases_$ruletype.in";
264 $basespath =~ s/[^A-Za-z0-9_\.]/_/gs;
265 open OUT
, ">$dirpath/$basespath"
266 or die "cannot create $dirpath/$basespath: $!";
267 print OUT dump_base_strings
($ruletype)
268 or die "error writing to $dirpath/$basespath: $!";
270 or die "error closing $dirpath/$basespath: $!";
274 chdir $dirpath or die "cannot chdir to $dirpath: $!";
275 if (!$quiet) { print "cd $dirpath\n" or die "error writing: $!" }
281 $log = ">>$dirpath/log";
283 open(ZERO
, ">$dirpath/log") or die "cannot create $dirpath/log: $!";
284 close ZERO
or die "error closing $dirpath/log: $!";
287 run
(get_perl
()." Makefile.PL ".
288 "PREFIX=$dirpath/ignored INSTALLSITEARCH=$installdir $log");
290 run
($Config{make
}." PREFIX=$dirpath/ignored INSTALLSITEARCH=$installdir $log"); #change to $Config{make}. bug 7294
291 run
($sudo.$Config{make
}." install PREFIX=$dirpath/ignored INSTALLSITEARCH=$installdir $log"); # into $installdir
293 # and generate the bases.pl file, for perl consumers
295 my $plpath = "bases_$ruletype.pl";
296 $plpath =~ s/[^A-Za-z0-9_\.]/_/gs;
297 open(OUT
, ">$dirpath/$plpath")
298 or die "cannot create $dirpath/$plpath: $!";
299 print OUT dump_as_perl
($ruletype)
300 or die "error writing to $dirpath/$plpath: $!";
302 or die "error closing $dirpath/$plpath: $!";
304 run
($sudo."cp $dirpath/$plpath $installdir/$plpath");
307 if (!$opt{'keep-tmps'}) {
310 # saves trouble on MacOS, possibly
311 print "cd /\n" or die "error writing: $!";
313 run
($sudo."rm -rf $dirpath"); # cleanup
316 print "temporary dir left due to --keep-tmps: $dirpath\n"
317 or die "error writing: $!";
323 if (!$quiet) { print join(' ',@cmd)."\n" or die "error writing: $!" }
324 if (system(@cmd) != 0) {
325 my $msg = $? == -1 ?
"failed to execute: $!"
326 : "failed: ".exit_status_str
($?);
327 if (!$quiet) { die "command ".$msg."\n" }
328 else { die "command '".join(' ',@cmd)."' ".$msg."\n" }
340 $perl = $Config{perlpath
};
341 $perl =~ s
|/[^/]*$|/$^X|;
343 return untaint_var
($perl);
346 ##############################################################################
348 use constant MAX_RULES_PER_C_FILE
=> 200;
355 if (!$quiet) { print "reading $FILE\n" or die "error writing: $!" }
356 open(my $fh, $FILE) or die "cannot open $FILE: $!";
357 # read ruleset name from the first line in the file
360 defined $_ or die "error reading $FILE: $!";
362 if (/^name\s+(\S+)/) {
363 $ruleset_name = untaint_var
($1);
367 $modname = "Mail::SpamAssassin::CompiledRegexps::$ruleset_name";
370 our $PATH = $modname;
372 $PATH =~ s/[^-_A-Za-z0-9\.]/_/g;
373 our $PMFILE = $modname;
376 our $XSFILE = $PMFILE;
377 $XSFILE =~ s/\.pm$/.xs/;
379 $force and system("rm -rf $PATH");
380 mkdir $PATH or (!$force and die "mkdir($PATH): $!");
382 if (!$quiet) { print "cd $PATH\n" or die "error writing: $!" }
384 my $cprefix = $modname; $cprefix =~ s/[^A-ZA-z0-9]+/_/gs;
392 open(my $re, ">scanner${numscans}.re")
393 or die "cannot create scanner${numscans}.re: $!";
395 print $re <<EOT or die "error writing: $!";
396 #define NULL ((char*) 0)
397 #define YYCTYPE unsigned char
403 /* backtrack to return other, semi-overlapped tokens; e.g.
404 allow "abcdef" to return both "abc" and "cde" as tokens */
405 #define RET(x) { YYCURSOR = YYMARKER; return (x); }
408 print $re <<EOT or die "error writing: $!";
409 char *${cprefix}_scan${numscans}(unsigned char **p){
410 unsigned char *q = 1 + *p;
416 for ($!=0; <$fh>; $!=0) {
420 if (/^orig\s+(\S+)\s+(.*)$/) {
423 $name =~ s/#/[hash]/gs;
424 $regexp =~ s/#/[hash]/gs;
425 $has_rules .= " q#$name# => q#$regexp#,\n";
430 my ($regexp, $reason) = /^r (.*):(.*)$/;
431 die "no 'r REGEXP:REASON' in $_" unless defined $regexp;
435 Mail
::SpamAssassin
::Plugin
::BodyRuleBaseExtractor
::fixup_re
($regexp),
436 " {RET(\"$reason\");}\n"
437 or die "error writing
: $!";
440 my $eval_stat = $@ ne '' ? $@ : "errno
=$!"; chomp $eval_stat;
441 handle_fixup_error($eval_stat, $regexp, $reason);
443 last if $line == MAX_RULES_PER_C_FILE;
445 defined $_ || $!==0 or
446 $!==EBADF ? dbg("error reading from
$FILE: $!")
447 : die "error reading from
$FILE: $!";
449 print $re <<EOT or die "error writing
: $!";
450 [\\000-\\377] { return NULL; }
458 my $cmd = "$RE2C_BIN -i -b -o scanner$_.c scanner$_.re";
460 # this must be fatal; it can result in corrupt output modules missing
461 # scannerN() functions
462 my $cwd = `pwd`; chop $cwd;
463 die "'$cmd' failed, dying!\n".
464 "Have you got a sufficiently-recent version of $RE2C_BIN?\n".
465 "see $cwd/scanner$_.re\n";
469 my $ccopt = $Config{optimize
}; # typically "-O2"
471 open(FILE
, ">Makefile.PL") or die "cannot create Makefile.PL: $!";
472 print FILE
<<"EOT" or die "error writing to Makefile.PL: $!";
473 use ExtUtils::MakeMaker;
476 'NAME' => '$modname',
477 'VERSION_FROM' => '$PMFILE',
478 'ABSTRACT_FROM' => '$PMFILE',
479 'OBJECT' => '\$(O_FILES)',
480 'OPTIMIZE' => '$ccopt',
481 'AUTHOR' => 'A. U. Tomated <automated\@example.com>',
484 close FILE
or die "error closing Makefile.PL: $!";
486 open(FILE
, ">MANIFEST.SKIP") or die "cannot create MANIFEST.SKIP: $!";
487 print FILE
<<'EOT' or die "error writing to MANIFEST.SKIP: $!";
505 close FILE
or die "error closing MANIFEST.SKIP: $!";
507 open(my $re, ">$XSFILE") or die "cannot create $XSFILE: $!";
508 print $re <<"EOT" or die "error writing to $XSFILE: $!";
513 /* bug 5556: newSVpvn_share() is not a defined API in perl 5.6.x.
514 * Thankfully we can use newSVpvn() without much harm, losing only
515 * a tiny bit of performance (I'd reckon ;).
517 #ifdef newSVpvn_share
518 #define my_newSVpvn_share(x,y,z) newSVpvn_share(x,y,z)
520 #define my_newSVpvn_share(x,y,z) newSVpvn(x,y)
523 /* split single-space-separated result string */
525 split_and_add (AV *results, char *match)
527 char *wordstart, *cp;
529 for (cp = wordstart = match; *cp != (unsigned char) 0; cp++) {
532 my_newSVpvn_share(wordstart, cp-wordstart, (U32)0));
537 my_newSVpvn_share(wordstart, cp-wordstart, (U32)0));
542 # use a buffer string here instead of writing direct to the file,
543 # so we can prepend 'extern' statements (bug 5534)
544 my $xscode = <<"EOT";
546 MODULE = $modname PACKAGE = $modname
556 unsigned char *cursor;
557 unsigned char *pstart;
563 pstart = (unsigned char *) SvPV(psv, plen);
564 pend = pstart + plen;
565 results = (AV *) sv_2mortal((SV *) newAV());
570 my $funcname = $cprefix."_scan".$_;
576 extern char *${funcname} (unsigned char
**);
579 # and append this one
583 while (cursor < pend) {
584 while ((match = ${funcname} (\
&cursor
)) != NULL
) {
585 split_and_add
(results
, match
);
593 print $re $xscode or die "error writing: $!";
594 print $re <<EOT or die "error writing: $!";;
595 RETVAL = newRV((SV *) results);
601 close($re) or die "error closing $XSFILE: $!";
603 open(FILE
, ">$PMFILE") or die "cannot create $PMFILE: $!";
613 our \$VERSION = '1.0';
614 our \@ISA = qw(XSLoader);
615 our \
@EXPORT_OK = qw();
621 XSLoader
::load
'$modname', \
$VERSION;
629 $modname - Efficient string matching
for regexps found
in $FILE
636 my \
$match = ${modname
}::scan
(\
$string);
638 fnord
=head1 DESCRIPTION
640 This module was created by SpamAssassin with the aid of re2xs
, which uses re2c
641 to create an XS library capable of scanning through a bunch of regular
642 expressions as
defined in F
<$FILE>.
644 See C
<sa-compile
> for more details
.
649 $str =~ s/^fnord//gm;
650 print FILE
$str or die "error writing to $PMFILE: $!";
651 close FILE
or die "error closing $PMFILE: $!";
654 sub handle_fixup_error
{
655 my ($strat, $regexp, $reason) = @_;
657 warn "skipped: $regexp: $strat";
661 ##############################################################################
665 sa-compile - compile SpamAssassin ruleset into native code
669 B<sa-compile> [options]
673 --list Output base string list to STDOUT
674 --sudo Use 'sudo' for privilege escalation
675 --keep-tmps Keep temporary files instead of deleting
676 -C path, --configpath=path, --config-file=path
677 Path to standard configuration dir
678 -p prefs, --prefspath=file, --prefs-file=file
679 Set user preferences file
680 --siteconfigpath=path Path for site configs
681 (default: @@PREFIX@@/etc/mail/spamassassin)
682 --updatedir=path Directory to place updates
683 (default: @@LOCAL_STATE_DIR@@/compiled/<perlversion>/@@VERSION@@)
684 --cf='config line' Additional line of configuration
685 -D, --debug [area=n,...] Print debugging messages
686 -V, --version Print version
687 -h, --help Print usage message
691 sa-compile uses C<re2c> to compile the site-wide parts of the SpamAssassin
692 ruleset. No part of user_prefs or any files included from user_prefs can be
693 built into the compiled set.
695 This compiled set is then used by the
696 C<Mail::SpamAssassin::Plugin::Rule2XSBody> plugin to speed up
697 SpamAssassin's operation, where possible, and when that plugin is loaded.
699 C<re2c> can match strings much faster than perl code, by constructing a DFA to
700 match many simple strings in parallel, and compiling that to native object
701 code. Not all SpamAssassin rules are amenable to this conversion, however.
703 This requires C<re2c> (see C<https://re2c.org/>), and the C
704 compiler used to build Perl XS modules, be installed.
706 Note that running this, and creating a compiled ruleset, will have no
707 effect on SpamAssassin scanning speeds unless you also edit your C<v320.pre>
708 file and ensure this line is uncommented:
710 loadplugin Mail::SpamAssassin::Plugin::Rule2XSBody
712 Additionally, "sa-compile" will not restart "spamd" or otherwise cause a scanner to
713 reload the now-compiled ruleset automatically.
721 Output the extracted base strings to STDOUT, instead of generating
722 the C extension code.
726 Use C<sudo(8)> to run code as 'root' when writing files to the compiled-rules
727 storage area (which is C<@@LOCAL_STATE_DIR@@/compiled/@@PERL_MAJOR_VER@@/@@VERSION@@> by default).
731 Produce less diagnostic output. Errors will still be displayed.
735 Keep temporary files after the script completes, instead of
738 =item B<-C> I<path>, B<--configpath>=I<path>, B<--config-file>=I<path>
740 Use the specified path for locating the distributed configuration files.
741 Ignore the default directories (usually C</usr/share/spamassassin> or similar).
743 =item B<--siteconfigpath>=I<path>
745 Use the specified path for locating site-specific configuration files. Ignore
746 the default directories (usually C</etc/mail/spamassassin> or similar).
750 By default, C<sa-compile> will use the system-wide rules update directory:
752 @@LOCAL_STATE_DIR@@/compiled/@@PERL_MAJOR_VER@@/@@VERSION@@
754 If the updates should be stored in another location, specify it here.
756 Note that use of this option is not recommended; if sa-compile is placing the
757 compiled rules the wrong directory, you probably need to rebuild SpamAssassin
758 with different C<Makefile.PL> arguments, instead of overriding sa-compile's
761 =item B<--cf='config line'>
763 Add additional lines of configuration directly from the command-line, parsed
764 after the configuration files are read. Multiple B<--cf> arguments can be
765 used, and each will be considered a separate line of configuration.
767 =item B<-p> I<prefs>, B<--prefspath>=I<prefs>, B<--prefs-file>=I<prefs>
769 Read user score preferences from I<prefs> (usually
770 C<$HOME/.spamassassin/user_prefs>) .
772 =item B<-D> [I<area,...>], B<--debug> [I<area,...>]
774 Produce debugging output. If no areas are listed, all debugging information is
775 printed. Diagnostic output can also be enabled for each area individually;
776 I<area> is the area of the code to instrument.
778 For more information about which areas (also known as channels) are
779 available, please see the documentation at
780 L<https://wiki.apache.org/spamassassin/DebugChannels>.
782 =item B<-h>, B<--help>
784 Print help message and exit.
786 =item B<-V>, B<--version>
788 Print sa-compile version and exit.
794 Mail::SpamAssassin(3)
800 C<Mail::SpamAssassin>
802 C<Mail::SpamAssassin::Plugin::Rule2XSBody>
806 See <https://issues.apache.org/SpamAssassin/>
810 The Apache SpamAssassin(tm) Project <https://spamassassin.apache.org/>
812 =head1 LICENSE AND COPYRIGHT
814 SpamAssassin is distributed under the Apache License, Version 2.0, as
815 described in the file C<LICENSE> included with the distribution.
817 Copyright (C) 2015 The Apache Software Foundation