#!/usr/bin/python -O # usage: ifjunk cdb [ size ] [ threshold ] from sys import argv, exit, path, stderr, stdin path = path[1:] from email import message_from_string from email.Errors import MessageParseError from signal import alarm, signal, SIGALRM from spambayes.cdb_classifier import CdbClassifier from spambayes.tokenizer import tokenize def die(code, x): """Write x to stderr and exit(code).""" stderr.write(x + "\n") stderr.flush() exit(code) if not len(argv) >= 2: die(111, "usage: ifjunk cdb [ size ] [ threshold ]") size_limit = 204800 threshold = 0.57 try: try: size_limit = argv[2:3] and int(argv[2]) or size_limit except ValueError: threshold = argv[2:3] and float(argv[2]) or threshold else: threshold = argv[3:4] and float(argv[3]) or threshold except ValueError, e: die(111, "ifjunk: invalid argument: %s" % e) signal(SIGALRM, lambda s, f: die(111, "ifjunk: timed out")) alarm(5 * 60) try: classifier = CdbClassifier(file(argv[1], "rb")) except IOError, e: die(111, "ifjunk: %s" % e) s = stdin.read(size_limit + 1) if len(s) > size_limit: exit(1) try: m = message_from_string(s) del s except MessageParseError: exit(0) # messages email.Parser can't grok are junk exit(classifier.spamprob(tokenize(m)) <= threshold and 1 or 0)