int tokens = 0;
if( !getBrain().isReady())
{
// return "UNKNOWN" if bayesian brain is not yet ready
FilterResult result = new FilterResult("filter.bayes", FilterResult.ResultType.UNKNOWN);
return result;
}
// find top n tokens (abs(.5-prob))
Token[] bestTokens = new Token[MAX_TOKENS_FOR_DECISION_PG];
String[] bestTokenNames = new String[MAX_TOKENS_FOR_DECISION_PG];
LOG.debug("start search for best tokens...");
Tokenizer myTokenizer = getTokenizer();
synchronized( myTokenizer)
{
myTokenizer.reset(mail);
while( myTokenizer.hasNext())
{
String token = myTokenizer.next();
Token t = getBrain().lookUp(token);
if( t != null)
{
double curAbs = Math.abs(0.5 - t.getSpamProbability());
int minToken = 0;
double minAbs = 1.0;
for( int i = 0; i < MAX_TOKENS_FOR_DECISION_PG; i++)
{
if( bestTokens[i] == null)
{
minToken = i;
tokens++;
break;
}
else if( bestTokenNames[i].equals(token))
{
curAbs = 0;
break;
}
double abs = Math.abs(0.5 - bestTokens[i].getSpamProbability());
if( abs < minAbs)
{
minAbs = abs;
minToken = i;
}
}
if( curAbs > minAbs || bestTokens[minToken] == null)
{
bestTokens[minToken] = t;
bestTokenNames[minToken] = token;
}
}
}
}
// log tokens found
LOG.debug("found best " + tokens + " tokens");
if( LOG.isDebugEnabled())
{
for( int i = 0; i < tokens; i++)
{
LOG.debug(bestTokenNames[i] + " " + bestTokens[i].toString());
}
}
if( tokens == 0)
{
// if there is no token, return unknown
FilterResult result = new FilterResult(getId(), FilterResult.ResultType.UNKNOWN);
return result;
}
// do the brain stuff
double pro = 1.0;
double contra = 1.0;
for( int i = 0; i < tokens && bestTokens[i] != null; i++)
{
pro *= bestTokens[i].getSpamProbability();
contra *= 1 - bestTokens[i].getSpamProbability();
}
// build and return result
double result = pro / (pro + contra);
LOG.debug("spam probability = " + result);
FilterResult filterResult = new FilterResult(getId(), result);
if( LOG.isDebugEnabled())
{
// add tokens to FilterResult as comment;
StringBuffer comment = new StringBuffer();