#!/usr/bin/python -BO
# usage: ifjunk cdb [ size ] [ threshold ]

from sys import argv, exit, path, stderr, stdin
path = path[1:]
from email import message_from_string
from email.errors import MessageParseError
from signal import alarm, signal, SIGALRM
from spambayes.cdb_classifier import CdbClassifier
from spambayes.tokenizer import tokenize


def die(code, x):
    """Write x to stderr and exit(code)."""
    stderr.write(x + "\n")
    stderr.flush()
    exit(code)


if not len(argv) >= 2: die(111, "usage: ifjunk cdb [ size ] [ threshold ]")
size_limit = 204800
threshold = 0.57
try:
    if argv[3:4]:
        threshold = float(argv[3])
        size_limit = int(argv[2])
    elif argv[2:3]:
        if '.' in argv[2]:
            threshold = float(argv[2])
        else:
            size_limit = int(argv[2])
except ValueError as e:
    die(111, "ifjunk: invalid argument: %s" % e)
signal(SIGALRM, lambda s, f: die(111, "ifjunk: timed out"))
alarm(5 * 60)
try:
    classifier = CdbClassifier(open(argv[1], "rb"))
except IOError as e:
    die(111, "ifjunk: %s" % e)
if size_limit < 0:
    m = stdin.read(-1)
else: 
    m = stdin.read(size_limit + 1)
    if len(m) > size_limit: die(1, "ifjunk: msg size")
try:
    m = message_from_string(m)
except MessageParseError:
    die(0, "ifjunk: msg format") # messages email.parser can't grok are junk
probability = classifier.spamprob(tokenize(m))
die((1 if probability <= threshold else 0), "ifjunk: p %.2f" % probability)
