#!/usr/bin/python -BO # usage: ifjunk cdb [ size ] [ threshold ] from sys import argv, exit, path, stderr, stdin path = path[1:] from email import message_from_string from email.errors import MessageParseError from signal import alarm, signal, SIGALRM from spambayes.cdb_classifier import CdbClassifier from spambayes.tokenizer import tokenize def die(code, x): """Write x to stderr and exit(code).""" stderr.write(x + "\n") stderr.flush() exit(code) if not len(argv) >= 2: die(111, "usage: ifjunk cdb [ size ] [ threshold ]") size_limit = 204800 threshold = 0.57 try: if argv[3:4]: threshold = float(argv[3]) size_limit = int(argv[2]) elif argv[2:3]: if '.' in argv[2]: threshold = float(argv[2]) else: size_limit = int(argv[2]) except ValueError as e: die(111, "ifjunk: invalid argument: %s" % e) signal(SIGALRM, lambda s, f: die(111, "ifjunk: timed out")) alarm(5 * 60) try: classifier = CdbClassifier(open(argv[1], "rb")) except IOError as e: die(111, "ifjunk: %s" % e) if size_limit < 0: m = stdin.read(-1) else: m = stdin.read(size_limit + 1) if len(m) > size_limit: die(1, "ifjunk: msg size") try: m = message_from_string(m) except MessageParseError: die(0, "ifjunk: msg format") # messages email.parser can't grok are junk probability = classifier.spamprob(tokenize(m)) die((1 if probability <= threshold else 0), "ifjunk: p %.2f" % probability)