# -*- cperl -*-
#
# Copyright (c) 2003  Robert James Kaes <rjkaes@flarenet.com>
#   All rights reserved.
#
# This plugin is released under the same license as Qpsmtpd.
#

sub register {
    my ($self, $qp, @args) = @_;

    $self->register_hook("mail", "mailpattern");
    $self->register_hook("rcpt", "rcptpattern");

    # Default action is a hard reject
    $self->{_wildmat_pattern_return} = DENY;

    if (@args > 0) {
	my $method = $args[0];
	$self->log(1, "WARNING: Invalid argument.  Must be 'hard' or 'soft'.")
	    unless $method =~ m/^(hard|soft)$/;
	$self->log(1, "WARNING: Ignoring additional arguments.")
	    if (@args > 1);

	$self->{_wildmat_pattern_return} = ($method eq 'soft') ? DENYSOFT : DENY;
    }
}

sub mailpattern {
    my ($self, $transaction, $sender) = @_;

    my @patterns = $self->qp->config('badmailpatterns');
    return DECLINED unless @patterns;

    my $email = $sender->parse;
    return ($self->{_wildmat_pattern_return}, "Invalid SENDER address: $email")
	if wildmat($email, @patterns);

    return DECLINED;
}

sub rcptpattern {
    my ($self, $transaction, $rcpt) = @_;

    my @patterns = $self->qp->config('badrcptpatterns');
    return DECLINED unless @patterns;

    my $email = $rcpt->parse;
    return ($self->{_wildmat_pattern_return}, "Invalid RECIPIENT address: $email")
	if wildmat($email, @patterns);

    return DECLINED;
}

#
# Take a pattern and a text string and do the test
#
sub wildmat {
    my ($text, @patterns) = @_;

    my $match = 0;
    foreach my $pat (@patterns) {
	my $inv = ($pat =~ s/^!//) ? 1 : 0;
	my $regex = wildmat2regex($pat);

	if ($text =~ m/$regex/) {
	    $match += ($inv) ? -1 : 1;
	}
    }

    return ($match > 0) ? 1 : 0;
}

#
# turns a wildmat into a regexp
#
# http://gd.tuwien.ac.at/infosys/newsreaders/tin/tools/w2r.pl
#
sub wildmat2regex {
	my $wild = shift;	# input line
	my $cchar = "";		# current char
	my $lchar = "";		# last char
	my $reg = "";		# translated char
	my $bmode = 0;		# inside [] ?
	my $rval = "";		# output line

	# break line into chars
	while ($wild =~ s/(.)//) {
		$cchar = $1;

		# if char is a [, and we arn't allreay in []
		if ($lchar !~ m/\\/o && $cchar =~ m/\[/o) {
			$bmode++;
			$reg = $cchar;
		}

		# if char is a ], and we were in []
		if ($lchar !~ m/\\/o && $cchar =~ m/\]/o) {
			$bmode--;
			$reg = $cchar;
		}

		# usual cases
		if ($bmode == 0 && $lchar !~ m/\\/o) {
			$reg = $cchar;
			$reg =~ s/\t/\\t/o;	# translate tabs
			$reg =~ s/\./\\./o;	# quote .
			$reg =~ s/\)/\\)/o;	# quote )
			$reg =~ s/\(/\\(/o;	# quote (
			$reg =~ s/\*/\.*/o;	# translate *
			$reg =~ s/\?/\./o;	# translate ?
			$reg =~ s/\^/\\^/o;	# quote ^
			$reg =~ s/\$/\\\$/o;	# quote $
		}

		# if last char was a qute, current char can't be a meta
		if ($lchar =~ m/\\/o || $bmode != 0) {
			$reg = $cchar;
			$cchar =~ s/\\//o;	# skip 2nd \\ inside []
		}

		$lchar = $cchar;	# store last char
		$rval = $rval.$reg;	# build return string
	}

	# common abbreviations
	#
	# replace [0-9] with [\d] in the first []
	# TODO: make this global
	$rval =~ s/^([^\[]*)\[0-9\]/$1\[\\d\]/o;
	# replace [a-zA-Z_] with [\w] in the first []
	# TODO: make this global
	$rval =~ s/([^\[]*)\[a-za-z0-9_\]/$1\[\\w\]/io;

	# optimizations
	#
	# add ^-anchor if needed
	$rval =~ s/^(?!\.\*)(.*)/\^$1/o;
	# add $-anchor if needed
	$rval =~ s/^((?:.*)(?:[^.][^*]))$/$1\$/o;
	# remove leading .* if allowed
	$rval =~ s/^\.\*(?!$)//o;
	# remove tailing .* if allowed
	$rval =~ s/(.+)\.\*$/$1/o;

	return $rval;
}

__END__

=pod

=head1 NAME

check_badpatterns - Check MAIL FROM and RCPT TO address against pattern

=head1 DESCRIPTION

Include support for the B<SPAMCONTROL> patch's F<badmailpattern> and
F<badrcptpattern> files.  These files work like the F<badmailfrom> and
F<badrcptto> control files, but use the L<wildmat(3)> pattern format.

=head1 CONFIGURATION

Takes one parameter indicating what type of return error should be sent
to the client:

=over

=item C<hard>

Return a 550 response code to indicate a permanent error.

=item C<soft>

Return a 450 response code.  This is a temporary error and is useful
if you would like to tie up the resources of the spammer.

=back

The default return error is C<hard>.

=head1 PATTERN FORMAT

Patterns are one per line and use the L<wildmat(3)> pattern style.  This
type of pattern matching is not as flexible as Perl's regular expressions,
but since the B<SPAMCONTROL> patch uses wildmat, so does this plugin.

One extension of the wildmat format is the NOT (C<!>) special character at
the beginning of a pattern.  For example, if the F<badmailpatterns> file
contained:

 *@example.com
 !fred@example.com
 *@domain.com

then any mail sent from C<*@example.com> would be blocked, I<except> if it
was from C<fred@example.com>.  In addition, email from C<*@domain.com>
would also be blocked.

=head1 FILES

=over

=item F<badmailpatterns>

Match the email address supplied in the C<MAIL FROM> command against a
list of patterns.

=item F<badrcptpatterns>

Match the email address supplied in the C<RCPT TO> command against a
list of patterns.

=back

=head1 HISTORY

=over

=item 1.0 - 2003-08-30

Initial release.

=back

=head1 AUTHOR

Copyright (c) 2003  L<Robert James Kaes|mailto:rjkaes@flarenet.com>

=head1 SEE ALSO

L<wildmat(3)>

=cut
