/*254:*/
#line 9898 "annoyance-filter.w"

#define REVDATE "2003-09-24"
#define Xfile string("X-Annoyance-Filter")  \

#define pseudoCountsWord " COUNTS " \

#define fastDictionaryVersionNumber 1
#define fastDictionaryVoidLink static_cast<u_int32_t> (-1) 
#define fastDictionarySignature "AFfd"
#define fastDictionaryFloatingTest (1.0/111)  \

#define messageSentinel "From " \

#define ChIx(c) (static_cast<unsigned int> ((c) ) &0xFF)  \

#define HTMLCommentBegin "<!--"
#define HTMLCommentEnd "-->" \

#define POP_MAX_MESSAGE 512
#define POP_BUFFER ((POP_MAX_MESSAGE) +2)  \

#define Annotate(c) (annotations.test(c) )  \


#line 9899 "annoyance-filter.w"


/*232:*/
#line 9014 "annoyance-filter.w"

#include "config.h"      

/*238:*/
#line 9153 "annoyance-filter.w"

#ifdef WIN32
#undef HAVE_MMAP
#endif


/*:238*/
#line 9017 "annoyance-filter.w"


/*233:*/
#line 9043 "annoyance-filter.w"

#include <iostream> 
#include <iomanip> 
#include <fstream> 
#include <cstdlib> 
#include <string> 
#include <sstream> 
#ifdef HAVE_FDSTREAM_COMPATIBILITY
#include "fdstream.hpp"
#endif
#ifdef HAVE_NEW_STRSTREAM
#include "mystrstream_new.h"
#else
#include "mystrstream.h"
#endif
#include <vector> 
#include <algorithm> 
#include <map> 
#include <stack> 
#include <deque> 
#include <queue> 
#include <list> 
#include <set> 
#include <bitset> 
#include <functional> 
#include <cmath> 
using namespace std;

/*:233*/
#line 9019 "annoyance-filter.w"

/*234:*/
#line 9075 "annoyance-filter.w"

#include <stdio.h> 
#include <stdlib.h> 
#include <fcntl.h> 
#include <ctype.h> 
#include <string.h> 
#include <assert.h> 

/*:234*/
#line 9020 "annoyance-filter.w"

/*235:*/
#line 9089 "annoyance-filter.w"

#ifdef HAVE_STAT
#include <sys/stat.h> 
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h> 
#endif
#ifdef HAVE_DIRENT_H
#include <dirent.h> 
#endif
#ifdef HAVE_MMAP
#include <sys/mman.h> 
#endif

/*:235*/
#line 9021 "annoyance-filter.w"


#ifdef WIN32
#define __GNU_LIBRARY__
#undef __GETOPT_H__
#endif
#include "getopt.h"     
#include "statlib.h" 

/*237:*/
#line 9131 "annoyance-filter.w"

#if defined(HAVE_GNUPLOT) && defined(HAVE_NETPBM) && defined(HAVE_SYSTEM)
#define HAVE_PLOT_UTILITIES
#endif

#if defined(HAVE_DIRENT_H) && defined(HAVE_STAT)
#define HAVE_DIRECTORY_TRAVERSAL
#endif

#if defined(HAVE_PDFTOTEXT) && defined(HAVE_POPEN) && (defined(HAVE_MKSTEMP) || defined(HAVE_TMPNAM))
#define HAVE_PDF_DECODER
#endif

/*:237*/
#line 9030 "annoyance-filter.w"


/*236:*/
#line 9109 "annoyance-filter.w"

#if defined(HAVE_SOCKET) && defined(HAVE_SIGNAL)
#define POP3_PROXY_SERVER
#endif

#ifdef POP3_PROXY_SERVER
#include <signal.h> 
#include <sys/types.h> 
#include <sys/socket.h> 
#include <netinet/in.h> 
#include <arpa/inet.h> 
#include <netdb.h> 
#include <errno.h> 
#endif

/*:236*/
#line 9032 "annoyance-filter.w"


/*:232*/
#line 9901 "annoyance-filter.w"

/*226:*/
#line 8874 "annoyance-filter.w"

static unsigned int messageCount[2];
static list<string> messageTranscript;
static queue<string> parserDiagnostics;
static bool saveParserDiagnostics= false;

/*:226*//*241:*/
#line 9192 "annoyance-filter.w"

static bool verbose= false;
#ifdef TYPE_LOG
static ofstream typeLog("/tmp/typelog.txt");
#endif
static string pDiagFilename= "";
static string transcriptFilename= "";
static bool pTokenTrace= false;
static unsigned int maxTokenLength= 64,minTokenLength= 1;
static unsigned int streamMaxTokenLength= 64,streamMinTokenLength= 5;
static bool singleDictionaryRead= true;
static unsigned int phraseMin= 1,phraseMax= 1;
static unsigned int phraseLimit= 48;
static unsigned int autoPrune= 0;
static bool popProxyTrace= false;

/*:241*//*250:*/
#line 9832 "annoyance-filter.w"


#define ISOch(x)     (static_cast<unsigned char> ((x) & 0xFF))
#define isISOspace(x)   (isascii(ISOch(x)) && isspace(ISOch(x)))
#define isISOalpha(x)   ((isoalpha[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0)
#define isISOupper(x)   ((isoupper[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0)
#define isISOlower(x)   ((isolower[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0)
#define toISOupper(x)   (isISOlower(x) ? (isascii(((unsigned char) (x))) ?  \
                            toupper(x) : (((ISOch(x) != 0xDF) && \
                            (ISOch(x) != 0xFF)) ? \
                            (ISOch(x) - 0x20) : (x))) : (x))
#define toISOlower(x)   (isISOupper(x) ? (isascii(ISOch(x)) ?  \
                            tolower(x) : (ISOch(x) + 0x20)) \
                            : (x))

/*:250*//*251:*/
#line 9851 "annoyance-filter.w"

const unsigned char isoalpha[32]= {
0,0,0,0,0,0,0,0,127,255,255,224,127,255,255,224,0,0,0,0,0,0,0,0,255,255,
254,255,255,255,254,255
};

const unsigned char isoupper[32]= {
0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,0,255,255,254,254,
0,0,0,0
};

const unsigned char isolower[32]= {
0,0,0,0,0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,1,255,255,
254,255
};

/*:251*/
#line 9902 "annoyance-filter.w"

/*10:*/
#line 1250 "annoyance-filter.w"

class dictionaryWord{
public:
static const unsigned int nCategories= 2;
enum mailCategory{Mail= 0,Junk= 1,Unknown};

string text;
unsigned int occurrences[nCategories];
double junkProbability;

dictionaryWord(string s= ""){
set(s);
}

void set(string s= "",unsigned int s_Mail= 0,unsigned int s_Junk= 0,
double jProb= -1){
text= s;
occurrences[Mail]= s_Mail;
occurrences[Junk]= s_Junk;
junkProbability= jProb;
}

string get(void)const{
return text;
}

unsigned int n_mail(void)const{
return occurrences[Mail];
}

unsigned int n_junk(void)const{
return occurrences[Junk];
}

unsigned int n_occurrences(void)const{
unsigned int o= 0;

for(unsigned int i= 0;i<nCategories;i++){
o+= occurrences[i];
}
return o;
}

void add(mailCategory cat,unsigned int howMany= 1){
assert(cat==Mail||cat==Junk);
occurrences[cat]+= howMany;
}





unsigned int resetCat(mailCategory cat){
assert(cat==Mail||cat==Junk);
occurrences[cat]= 0;
return occurrences[Mail]+occurrences[Junk];
}

void computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias= 2,unsigned int minOccurrences= 5);

double getJunkProbability(void)const{
return junkProbability;
}

unsigned int length(void)const{
return text.length();
}

unsigned int estimateMemoryRequirement(void)const{
return(((length()+3)/4)*4)+sizeof(string::size_type)+
(sizeof(unsigned int)*nCategories)+
sizeof(double)+
(sizeof(int)*8);
}

void toLower(void){
transform(text.begin(),text.end(),text.begin(),&dictionaryWord::to_iso_lower);
}

void describe(ostream&os= cout);

void exportCSV(ostream&os= cout);
bool importCSV(istream&is= cin);

static string categoryName(mailCategory c){
return(c==Mail)?"mail":((c==Junk)?"junk":"unknown");
}

void exportToBinaryFile(ostream&os);
bool importFromBinaryFile(istream&is);

protected:

/*18:*/
#line 1586 "annoyance-filter.w"


static char to_iso_lower(char c){
return toISOlower(c);
}

static char to_iso_upper(char c){
return toISOupper(c);
}

/*:18*/
#line 1344 "annoyance-filter.w"
;
};

/*:10*//*19:*/
#line 1606 "annoyance-filter.w"

class dictionary:public map<string,dictionaryWord> {
public:

unsigned int memoryRequired;

void add(dictionaryWord w,dictionaryWord::mailCategory category);

void include(dictionaryWord&w);

void exportCSV(ostream&os= cout);
void importCSV(istream&is= cin);

void computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias= 2,unsigned int minOccurrences= 5);

void purge(unsigned int occurrences= 0);

void resetCat(dictionaryWord::mailCategory category);

void printStatistics(ostream&os= cout)const;

#ifdef HAVE_PLOT_UTILITIES
void plotProbabilityHistogram(string fileName,unsigned int nBins= 20)const;
#endif

void exportToBinaryFile(ostream&os);
void importFromBinaryFile(istream&is);

unsigned int estimateMemoryRequirement(void)const{
return memoryRequired;
}

dictionary():memoryRequired(0){
}
};

/*:19*//*32:*/
#line 2048 "annoyance-filter.w"

class fastDictionary{
private:
static const u_int16_t byteOrderMark= 0xFEFF;
static const u_int16_t doubleSize= sizeof(double);
static const u_int16_t versionNumber= fastDictionaryVersionNumber;

unsigned char*dblock;
u_int32_t totalSize;
u_int32_t hashTableOffset;
u_int32_t hashTableBuckets;
u_int32_t wordTableSize;

u_int32_t*hashTable;
unsigned char*wordTable;

#ifdef HAVE_MMAP
char*dp;
int fileHandle;
long fileLength;
#endif

void regen(void)const{
cerr<<"You should re-generate the fast dictionary on this machine."<<endl;
}

static unsigned int nextGreaterPrime(unsigned int a);

static u_int32_t computeHashValue(const string&s);

static void Vmemcpy(vector<unsigned char> &v,
vector<unsigned char> ::size_type off,
const void*buf,const unsigned int bufl){
const unsigned char*bp= static_cast<const unsigned char*> (buf);

for(unsigned int i= 0;i<bufl;i++){
v[off++]= *bp++;
}
}

public:
fastDictionary():dblock(NULL){
#ifdef HAVE_MMAP
dp= NULL;
#endif
}

~fastDictionary(){
#ifdef HAVE_MMAP
if(dp!=NULL){
munmap(dp,fileLength);
close(fileHandle);
}
#else
if(dblock!=NULL){
delete dblock;
}
#endif
}

bool load(const string fname);

bool isDictionaryLoaded(void){
return dblock!=NULL;
}

double find(const string&target)const;

void describe(ostream&os= cout)const{
if(dblock!=NULL){
os<<"Total size of fast dictionary is "<<totalSize<<endl;
os<<"Hash table offset: "<<hashTableOffset<<endl;
os<<"Hash table buckets: "<<hashTableBuckets<<endl;
os<<"Word table size: "<<wordTableSize<<endl;
}else{
os<<"No fast dictionary is loaded."<<endl;
}
}

static void exportDictionary(const dictionary&d,ostream&o);
static void exportDictionary(const dictionary&d,const string fname);
};

/*:32*//*40:*/
#line 2484 "annoyance-filter.w"

class mailFolder;

class MIMEdecoder{
public:
istream*is;
string partBoundary;
bool atEnd;
bool eofHit;
unsigned int nDecodeErrors;
protected:
string inputLine;
string::size_type ip;
unsigned encodedLineCount;
bool lookAhead;
int lookChar;
string endBoundary;
list<string> *tlist;
mailFolder*mf;

public:
MIMEdecoder(istream*i= NULL,mailFolder*m= NULL,string pb= "",list<string> *tl= NULL){
set(i,m,pb,tl);
resetDecodeErrors();
tlist= NULL;
}

virtual~MIMEdecoder(){
};

void set(istream*i= NULL,mailFolder*m= NULL,
string pb= "",list<string> *tl= NULL){
is= i;
mf= m;
partBoundary= pb;
inputLine= "";
ip= 0;
encodedLineCount= 0;
lookAhead= false;
atEnd= false;
eofHit= false;
tlist= tl;
}

virtual string name(void)const= 0;

virtual void resetDecodeErrors(void){
nDecodeErrors= 0;
}

virtual unsigned int getDecodeErrors(void)const{
return nDecodeErrors;
}

virtual string getTerminatorSentinel(void)const{
return endBoundary;
}

virtual bool isEndOfFile(void)const{
return eofHit;
}

virtual unsigned int getEncodedLineCount(void)const{
return encodedLineCount;
}

virtual int getDecodedChar(void)= 0;

virtual bool getDecodedLine(string&s);

virtual void saveDecodedStream(ostream&os);
virtual void saveDecodedStream(const string fname);

protected:
virtual bool getNextEncodedLine(void);
};

/*:40*//*46:*/
#line 2707 "annoyance-filter.w"

class identityMIMEdecoder:public MIMEdecoder{
public:
string name(void)const{
return"Identity";
}

int getDecodedChar(void){
while(!atEnd){
if(ip<inputLine.length()){
return inputLine[ip++]&0xFF;
}
if(getNextEncodedLine()){
continue;
}
}
return-1;
}

bool getDecodedLine(string&s){
if(ip<inputLine.length()){
s= inputLine.substr(ip);
ip= inputLine.length();
return true;
}
if(getNextEncodedLine()){
s= inputLine;
ip= inputLine.length();
return true;
}
return false;
}
};

/*:46*//*47:*/
#line 2748 "annoyance-filter.w"

class sinkMIMEdecoder:public MIMEdecoder{
public:
string name(void)const{
return"Sink";
}

int getDecodedChar(void){
if(!atEnd){
while(getNextEncodedLine());
assert(atEnd);
}
return-1;
}
};

/*:47*//*48:*/
#line 2771 "annoyance-filter.w"

class base64MIMEdecoder:public MIMEdecoder{
private:
unsigned char dtable[256];
void initialiseDecodingTable(void);
deque<unsigned char> decodedBytes;

public:
base64MIMEdecoder(){
initialiseDecodingTable();
}

string name(void)const{
return"Base64";
}

int getDecodedChar(void);

static string decodeEscapedText(const string s,mailFolder*m= NULL);

};

/*:48*//*58:*/
#line 3014 "annoyance-filter.w"

class quotedPrintableMIMEdecoder:public MIMEdecoder{
public:
quotedPrintableMIMEdecoder(){
atEndOfLine= false;
}

string name(void)const{
return"Quoted-Printable";
}

int getDecodedChar(void);

static string decodeEscapedText(const string s,mailFolder*m= NULL);

protected:
bool atEndOfLine;
int getNextChar(void);
static int hex_to_nybble(const int ch);
};

/*:58*//*68:*/
#line 3278 "annoyance-filter.w"

class MBCSdecoder{
protected:
const string*src;
string::size_type p;
mailFolder*mf;

public:
MBCSdecoder(mailFolder*m= NULL):src(NULL),p(0),mf(NULL){
}

virtual~MBCSdecoder(){
}

virtual string name(void)= 0;

virtual void setSource(const string&s){
src= &s;
p= 0;
}

virtual void setMailFolder(mailFolder*m= NULL){
mf= m;
}

virtual void reset(void){
}

virtual int getNextDecodedChar(void)= 0;

virtual int getNextEncodedByte(void){
if(p>=src->length()){
return-1;
}
return((*src)[p++])&0xFF;
}

protected:
virtual int getNextNBytes(const unsigned int n);

virtual int getNext2Bytes(void){
return getNextNBytes(2);
}

virtual int getNext3Bytes(void){
return getNextNBytes(3);
}

virtual int getNext4Bytes(void){
return getNextNBytes(4);
}

virtual void discardLine(void){
p= src->length();
}

virtual void reportDecoderDiagnostic(const string s)const;
virtual void reportDecoderDiagnostic(const ostringstream&os)const;
};

/*:68*//*71:*/
#line 3393 "annoyance-filter.w"

class EUC_MBCSdecoder:public MBCSdecoder{
public:
virtual string name(void){
return"EUC";
}

virtual int getNextDecodedChar(void);
};

/*:71*//*73:*/
#line 3454 "annoyance-filter.w"

class Big5_MBCSdecoder:public MBCSdecoder{
public:
virtual string name(void){
return"Big5";
}

virtual int getNextDecodedChar(void);
};

/*:73*//*75:*/
#line 3500 "annoyance-filter.w"

class Shift_JIS_MBCSdecoder:public MBCSdecoder{
protected:
string pending;

public:
Shift_JIS_MBCSdecoder():pending(""){
}

virtual~Shift_JIS_MBCSdecoder(){
}

virtual string name(void){
return"Shift_JIS";
}

virtual int getNextDecodedChar(void);
};

/*:75*//*80:*/
#line 3624 "annoyance-filter.w"

class Unicode_MBCSdecoder:public MBCSdecoder{
public:
virtual string name(void){
return"Unicode";
}

virtual int getNextDecodedChar(void)= 0;
};

/*:80*//*81:*/
#line 3642 "annoyance-filter.w"

class UCS_2_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{
protected:
bool bigEndian;

public:
UCS_2_Unicode_MBCSdecoder(bool isBigEndian= true){
setBigEndian(isBigEndian);
}

void setBigEndian(bool isBigEndian= true){
bigEndian= isBigEndian;
}

virtual string name(void){
return"UCS_2_Unicode";
}

virtual int getNextDecodedChar(void);
};

/*:81*//*83:*/
#line 3693 "annoyance-filter.w"

class UTF_8_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{
public:
virtual string name(void){
return"UTF_8_Unicode";
}

virtual int getNextDecodedChar(void);
};

/*:83*//*85:*/
#line 3777 "annoyance-filter.w"

class UTF_16_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{
protected:
bool bigEndian;

int getNextUTF_16Word(void){
int c1= getNextEncodedByte();
if(c1<0){
return c1;
}
int c2= getNextEncodedByte();
if(c2<0){
ostringstream os;

os<<name()<<"_MBCSdecoder: Premature end of line in UTF-16 character.";
reportDecoderDiagnostic(os);
return-1;
}
if(bigEndian){
c1= (c1<<8)|c2;
}else{
c1|= (c2<<8);
}
return c1;
}

public:
UTF_16_Unicode_MBCSdecoder(bool isBigEndian= true){
setBigEndian(isBigEndian);
}

void setBigEndian(bool isBigEndian= true){
bigEndian= isBigEndian;
}

virtual string name(void){
return"UTF_16_Unicode";
}

virtual int getNextDecodedChar(void);
};

/*:85*//*88:*/
#line 3874 "annoyance-filter.w"

class MBCSinterpreter{
protected:
const string*src;
MBCSdecoder*dp;
string prefix,suffix;

public:

virtual~MBCSinterpreter(){
}

virtual string name(void)= 0;

virtual void setDecoder(MBCSdecoder&d){
dp= &d;
}

virtual void setSource(const string&s){
assert(dp!=NULL);
dp->setSource(s);
}

virtual void setPrefixSuffix(string pre= "",string suf= ""){
prefix= pre;
suffix= suf;
}

virtual string getNextDecodedChar(void);

virtual string decodeLine(const string&s);
};

/*:88*//*91:*/
#line 3960 "annoyance-filter.w"

class GB2312_MBCSinterpreter:public MBCSinterpreter{
public:
GB2312_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}

virtual string name(void){
return"GB2312";
}
};

/*:91*//*92:*/
#line 3977 "annoyance-filter.w"

class Big5_MBCSinterpreter:public MBCSinterpreter{
public:
Big5_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}

virtual string name(void){
return"Big5";
}
};

/*:92*//*93:*/
#line 3994 "annoyance-filter.w"

class Shift_JIS_MBCSinterpreter:public MBCSinterpreter{
public:
Shift_JIS_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}

virtual string name(void){
return"Shift_JIS";
}

string getNextDecodedChar(void);
};

/*:93*//*95:*/
#line 4044 "annoyance-filter.w"

class KR_MBCSinterpreter:public MBCSinterpreter{
public:
virtual string name(void){
return"KR";
}
};

/*:95*//*96:*/
#line 4057 "annoyance-filter.w"

class Unicode_MBCSinterpreter:public MBCSinterpreter{
public:
Unicode_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}

virtual string name(void){
return"Unicode";
}

string getNextDecodedChar(void);
};

/*:96*//*98:*/
#line 4127 "annoyance-filter.w"

class applicationStringParser{

protected:
bool error,eof;
mailFolder*mf;

virtual unsigned char get8(void);

virtual void get8n(unsigned char*buf,const int n){
for(int i= 0;(!eof)&&(i<n);i++){
buf[i]= get8();
}
}

public:
applicationStringParser(mailFolder*f= NULL):
error(false),eof(false),mf(NULL){
setMailFolder(f);
}

virtual~applicationStringParser(){
}

virtual string name(void)const= 0;

void setMailFolder(mailFolder*f){
mf= f;
}

virtual bool nextString(string&s)= 0;

virtual void close(void){
error= eof= false;
}

bool isError(void)const{
return error;
}

bool isEOF(void)const{
return eof;
}

bool isOK(void)const{
return(!isEOF())&&(!isError());
}
};

/*:98*//*100:*/
#line 4200 "annoyance-filter.w"

class flashStream:public applicationStringParser{
protected:

/*110:*/
#line 4544 "annoyance-filter.w"


typedef enum{
stagEnd= 0,
stagShowFrame= 1,
stagDefineShape= 2,
stagFreeCharacter= 3,
stagPlaceObject= 4,
stagRemoveObject= 5,
stagDefineBits= 6,
stagDefineButton= 7,
stagJPEGTables= 8,
stagSetBackgroundColor= 9,
stagDefineFont= 10,
stagDefineText= 11,
stagDoAction= 12,
stagDefineFontInfo= 13,
stagDefineSound= 14,
stagStartSound= 15,
stagDefineButtonSound= 17,
stagSoundStreamHead= 18,
stagSoundStreamBlock= 19,
stagDefineBitsLossless= 20,
stagDefineBitsJPEG2= 21,
stagDefineShape2= 22,
stagDefineButtonCxform= 23,
stagProtect= 24,


stagPlaceObject2= 26,
stagRemoveObject2= 28,
stagDefineShape3= 32,
stagDefineText2= 33,
stagDefineButton2= 34,
stagDefineBitsJPEG3= 35,
stagDefineBitsLossless2= 36,
stagDefineEditText= 37,
stagDefineSprite= 39,
stagNameCharacter= 40,
stagFrameLabel= 43,
stagSoundStreamHead2= 45,
stagDefineMorphShape= 46,
stagDefineFont2= 48,
}tagType;

/*:110*/
#line 4204 "annoyance-filter.w"
;
/*111:*/
#line 4598 "annoyance-filter.w"

typedef enum{
sactionNone= 0x00,
sactionNextFrame= 0x04,
sactionPrevFrame= 0x05,
sactionPlay= 0x06,
sactionStop= 0x07,
sactionToggleQuality= 0x08,
sactionStopSounds= 0x09,
sactionAdd= 0x0A,
sactionSubtract= 0x0B,
sactionMultiply= 0x0C,
sactionDivide= 0x0D,
sactionEqual= 0x0E,
sactionLessThan= 0x0F,
sactionLogicalAnd= 0x10,
sactionLogicalOr= 0x11,
sactionLogicalNot= 0x12,
sactionStringEqual= 0x13,
sactionStringLength= 0x14,
sactionSubString= 0x15,
sactionInt= 0x18,
sactionEval= 0x1C,
sactionSetVariable= 0x1D,
sactionSetTargetExpression= 0x20,
sactionStringConcat= 0x21,
sactionGetProperty= 0x22,
sactionSetProperty= 0x23,
sactionDuplicateClip= 0x24,
sactionRemoveClip= 0x25,
sactionTrace= 0x26,
sactionStartDragMovie= 0x27,
sactionStopDragMovie= 0x28,
sactionStringLessThan= 0x29,
sactionRandom= 0x30,
sactionMBLength= 0x31,
sactionOrd= 0x32,
sactionChr= 0x33,
sactionGetTimer= 0x34,
sactionMBSubString= 0x35,
sactionMBOrd= 0x36,
sactionMBChr= 0x37,
sactionHasLength= 0x80,
sactionGotoFrame= 0x81,
sactionGetURL= 0x83,
sactionWaitForFrame= 0x8A,

sactionSetTarget= 0x8B,
sactionGotoLabel= 0x8C,
sactionWaitForFrameExpression= 0x8D,

sactionPushData= 0x96,
sactionBranchAlways= 0x99,
sactionGetURL2= 0x9A,
sactionBranchIfTrue= 0x9D,
sactionCallFrame= 0x9E,
sactionGotoExpression= 0x9F
}actionCode;

/*:111*/
#line 4205 "annoyance-filter.w"
;
/*112:*/
#line 4662 "annoyance-filter.w"

typedef enum{
fontUnicode= 0x20,
fontShiftJIS= 0x10,
fontANSI= 0x08,
fontItalic= 0x04,
fontBold= 0x02,
fontWideCodes= 0x01
}fontFlags;

typedef enum{
isTextControl= 0x80,

textHasFont= 0x08,
textHasColor= 0x04,
textHasYOffset= 0x02,
textHasXOffset= 0x01
}textFlags;

typedef enum{
seditTextFlagsHasFont= 0x0001,
seditTextFlagsHasMaxLength= 0x0002,
seditTextFlagsHasTextColor= 0x0004,
seditTextFlagsReadOnly= 0x0008,
seditTextFlagsPassword= 0x0010,
seditTextFlagsMultiline= 0x0020,
seditTextFlagsWordWrap= 0x0040,
seditTextFlagsHasText= 0x0080,
seditTextFlagsUseOutlines= 0x0100,
seditTextFlagsBorder= 0x0800,
seditTextFlagsNoSelect= 0x1000,
seditTextFlagsHasLayout= 0x2000
}editTextFlags;

/*:112*/
#line 4206 "annoyance-filter.w"
;
/*113:*/
#line 4703 "annoyance-filter.w"

typedef struct{
int xMin,xMax,yMin,yMax;
}rect;

typedef struct{
int a;
int b;
int c;
int d;
int tx;
int ty;
}matrix;

/*:113*/
#line 4207 "annoyance-filter.w"
;



unsigned char sig[3];
unsigned char version;
unsigned int fileLength;
rect frameSize;
unsigned short frameRate;
unsigned short frameCount;



tagType tType;
unsigned int tDataLen;



unsigned int bitBuf,bitPos;

public:

flashStream(mailFolder*f= NULL):
applicationStringParser(f){
}

void readHeader(void);
void describe(ostream&os= cout);
bool nextTag(void);



tagType getTagType(void)const{
return tType;
}

unsigned int getTagDataLength(void)const{
return tDataLen;
}

void ignoreTag(unsigned int lookedAhead= 0);

virtual void close(void){
applicationStringParser::close();
}

protected:

/*108:*/
#line 4454 "annoyance-filter.w"

unsigned short get16(void){
unsigned short u16;

u16= get8();
u16|= get8()<<8;
return u16;
}

unsigned int get32(void){
unsigned int u32;

u32= get8();
u32|= get8()<<8;
u32|= get8()<<16;
u32|= get8()<<24;
return u32;
}

void get16n(unsigned short*buf,const int n){
for(int i= 0;(!eof)&&(i<n);i++){
buf[i]= get16();
}
}

void get32n(unsigned int*buf,const int n){
for(int i= 0;(!eof)&&(i<n);i++){
buf[i]= get32();
}
}

/*:108*/
#line 4255 "annoyance-filter.w"
;


void skip8n(const int n){
for(int i= 0;(!eof)&&(i<n);i++){
get8();
}
}

void getString(string&s,int n= -1);


void initBits(void);
unsigned int getBits(int n);
int getSignedBits(const int n);

void getRect(rect*r);
void getMatrix(matrix*mat);
};

/*:100*//*114:*/
#line 4725 "annoyance-filter.w"

class flashTextExtractor:public flashStream{
protected:
map<unsigned short,vector<unsigned short> *> fontMap;
map<unsigned short,unsigned short> fontGlyphCount;
map<unsigned short,fontFlags> fontInfoBits;
queue<string> strings;

bool initialised;



bool textOnly;

public:
flashTextExtractor(mailFolder*f= NULL):
flashStream(f),initialised(false),textOnly(false){
}

~flashTextExtractor(){
close();
}

virtual string name(void)const{
return"Flash";
}

void setTextOnly(const bool tf){
textOnly= tf;
}

bool getTextOnly(void)const{
return textOnly;
}

bool nextString(string&s);

virtual void close(void){
while(!fontMap.empty()){
delete fontMap.begin()->second;
fontMap.erase(fontMap.begin());
}
fontGlyphCount.clear();
fontInfoBits.clear();
while(!strings.empty()){
strings.pop();
}
initialised= textOnly= false;
flashStream::close();
}
};

/*:114*//*125:*/
#line 5287 "annoyance-filter.w"

#ifdef HAVE_PDF_DECODER
class pdfTextExtractor:public applicationStringParser{
protected:
bool initialised;
#ifdef HAVE_FDSTREAM_COMPATIBILITY
fdistream is;
#else
ifstream is;
#endif
FILE*ip;
#ifdef HAVE_MKSTEMP
char tempfn[256];
#else
char tempfn[L_tmpnam+2];
#endif

public:
pdfTextExtractor(mailFolder*f= NULL):
applicationStringParser(f),
initialised(false),
ip(NULL){
}

~pdfTextExtractor(){
close();
}

virtual string name(void)const{
return"PDF";
}

bool nextString(string&s);

virtual void close(void){
if(ip!=NULL){
#ifndef HAVE_FDSTREAM_COMPATIBILITY
is.close();
#endif
pclose(ip);
remove(tempfn);
ip= NULL;
}
applicationStringParser::close();
initialised= false;
}
};
#endif

/*:125*//*129:*/
#line 5414 "annoyance-filter.w"

/*132:*/
#line 5776 "annoyance-filter.w"


#ifdef HAVE_POPEN
#if (defined HAVE_GUNZIP) || (defined HAVE_GZCAT) || (defined HAVE_GZIP)
#define COMPRESSED_FILES
static const char Compressed_file_type[]= ".gz";

static const char Uncompress_command[]= 
#if(defined HAVE_GUNZIP)
"gunzip -c"
#elif(defined HAVE_GZCAT)
"gzcat"
#elif(defined HAVE_GZIP)
"gzip -cd"
#endif
;
#elif (defined HAVE_ZCAT) || (defined HAVE_UNCOMPRESS) || (defined HAVE_COMPRESS)
#define COMPRESSED_FILES
static const char Compressed_file_type[]= ".Z";

static const char Uncompress_command[]= 
#if(defined HAVE_ZCAT)
"zcat"
#elif(defined HAVE_UNCOMPRESS)
"uncompress -c"
#elif(defined HAVE_COMPRESS)
"compress -cd"
#endif
;
#endif
#endif

/*:132*/
#line 5415 "annoyance-filter.w"


class mailFolder{
public:
istream*is;
dictionaryWord::mailCategory category;
unsigned int nLines;
unsigned int nMessages;
bool newMessage;
bool expectingNewMessage;
bool lastLineBlank;
bool BSDfolder;
bool inHeader;
string lookAheadLine;
bool lookedAhead;
ifstream isc;
#if defined(COMPRESSED_FILES) && defined(HAVE_FDSTREAM_COMPATIBILITY)
fdistream iscc;
#endif

string fromLine;
string messageID;

string lastFromLine;
string lastMessageID;


#if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL)
FILE*ip;
#endif

#ifdef HAVE_DIRECTORY_TRAVERSAL

bool dirFolder;
DIR*dh;
string dirName,cfName;
string pathSeparator;
#ifdef HAVE_FDSTREAM_COMPATIBILITY
fdistream ifcdir;
#endif
ifstream ifdir;
istringstream nullstream;
#endif


string bodyContentType;
string bodyContentTypeCharset;
string bodyContentTypeName;
string bodyContentTransferEncoding;


string partBoundary;
bool multiPart;
bool inPartHeader;
unsigned int partHeaderLines;
stack<string> partBoundaryStack;


string mimeContentType;
string mimeContentTypeCharset;
string mimeContentTypeName;
string mimeContentTypeBoundary;
string mimeContentTransferEncoding;
string mimeContentDispositionFilename;


MIMEdecoder*mdp;
identityMIMEdecoder imd;
base64MIMEdecoder bmd;
sinkMIMEdecoder smd;
quotedPrintableMIMEdecoder qmd;


MBCSinterpreter*mbi;
EUC_MBCSdecoder mbd_euc;
GB2312_MBCSinterpreter mbi_gb2312;
Big5_MBCSdecoder mbd_big5;
Big5_MBCSinterpreter mbi_big5;
KR_MBCSinterpreter mbi_kr;
UTF_8_Unicode_MBCSdecoder mbd_utf_8;
Unicode_MBCSinterpreter mbi_unicode;


applicationStringParser*asp;
flashTextExtractor aspFlash;
#ifdef HAVE_PDF_DECODER
pdfTextExtractor aspPdf;
#endif


bool byteStream;

list<string> *tlist;

list<string> *dlist;

mailFolder(istream&i,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){
#if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL)
ip= NULL;
#endif 
#ifdef HAVE_DIRECTORY_TRAVERSAL
dirFolder= false;
#endif
set(&i,cat);
}

mailFolder(string fname,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){
#if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL)
ip= NULL;
#endif 
/*135:*/
#line 5865 "annoyance-filter.w"

#ifdef HAVE_DIRECTORY_TRAVERSAL
dirFolder= false;
struct stat fs;

if((stat(fname.c_str(),&fs)==0)&&S_ISDIR(fs.st_mode)){
dh= opendir(fname.c_str());
if(dh!=NULL){
dirFolder= true;
dirName= fname;
pathSeparator= '/';
if(!findNextFileInDirectory(fname)){
nullstream.str("");
is= &nullstream;
}else{
if(verbose){
cerr<<"Processing files from directory \""<<
dirName<<"\"."<<endl;
}
}
}else{
cerr<<"Cannot open mail folder directory \""<<fname<<"\""<<endl;
exit(1);
}
}
#endif

/*:135*/
#line 5525 "annoyance-filter.w"
;

#ifdef HAVE_DIRECTORY_TRAVERSAL
if(!dirFolder){
#endif 
#ifdef COMPRESSED_FILES
/*133:*/
#line 5818 "annoyance-filter.w"

#ifdef HAVE_READLINK
int maxSlinks= 50;

string jname= fname;
char slbuf[1024];
while(maxSlinks--> 0){
int sll= readlink(jname.c_str(),slbuf,(sizeof slbuf)-1);
if(sll>=0){
assert(sll<static_cast<int> (sizeof slbuf));
slbuf[sll]= 0;
jname= slbuf;
}else{
break;
}
}
if(maxSlinks<=0){
cerr<<"Warning: probable symbolic link loop for \""<<
fname<<"\""<<endl;
}
#endif

/*:133*/
#line 5531 "annoyance-filter.w"
;

if(jname.rfind(Compressed_file_type)==
(jname.length()-string(Compressed_file_type).length())){
/*134:*/
#line 5847 "annoyance-filter.w"

string cmd(Uncompress_command);
cmd+= ' '+fname;
ip= popen(cmd.c_str(),"r");
#ifdef HAVE_FDSTREAM_COMPATIBILITY
iscc.attach(fileno(ip));
is= &iscc;
#else
isc.attach(fileno(ip));
is= &isc;
#endif

/*:134*/
#line 5535 "annoyance-filter.w"
;
}else{
#endif
if(fname=="-"){
is= &cin;
}else{
isc.open(fname.c_str());
is= &isc;
}
#ifdef COMPRESSED_FILES
}
#endif
#ifdef HAVE_DIRECTORY_TRAVERSAL
}
#endif
if(!(*is)){
cerr<<"Cannot open mail folder file "<<fname<<endl;
exit(1);
}
set(is,cat);
}

~mailFolder(){
#ifdef COMPRESSED_FILES
if(ip!=NULL){
pclose(ip);
}
#endif
}

void set(istream*i,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){
is= i;
nLines= nMessages= 0;
lookedAhead= false;
lookAheadLine= "";
category= cat;
dlist= NULL;
tlist= NULL;
/*142:*/
#line 6145 "annoyance-filter.w"

mimeContentType= mimeContentTypeCharset= 
mimeContentTypeName= mimeContentDispositionFilename= 
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;

/*:142*/
#line 5573 "annoyance-filter.w"
;
bodyContentType= bodyContentTypeCharset= 
bodyContentTypeName= bodyContentTransferEncoding= "";
expectingNewMessage= true;
setNewMessageEligiblity();
BSDfolder= false;
}

void setCategory(dictionaryWord::mailCategory c){
category= c;
}

dictionaryWord::mailCategory getCategory(void)const{
return category;
}

void setBSDmode(bool mode){
BSDfolder= mode;
}

bool getBSDmode(void)const{
return BSDfolder;
}

void setNewMessageEligiblity(bool stat= true){
lastLineBlank= stat;
}

void forceInHeader(bool state= true){
inHeader= state;
}

bool nextLine(string&s);

int nextByte(void);

#ifdef HAVE_DIRECTORY_TRAVERSAL
bool findNextFileInDirectory(string&fname);
bool openNextFileInDirectory(void);
#endif

static void stringCanonicalise(string&s);

static bool compareHeaderField(string&s,const string target,string&arg);

static bool parseHeaderArgument(string&s,const string target,string&arg);

static bool isSpoofedExecutableFileExtension(const string&s);

bool isNewMessage(void)const{
return newMessage;
}

unsigned int getMessageCount(void)const{
return nMessages;
}

unsigned int getLineCount(void)const{
return nLines;
}

bool isByteStream(void)const{
return byteStream;
}

void describe(ostream&os= cout)const{
os<<"Mail folder.  Category: "<<dictionaryWord::categoryName(category)<<endl;
os<<"  Lines: "<<getLineCount()<<"  Messages: "<<getMessageCount()<<endl;
}

void setDiagnosticList(list<string> *lp){
dlist= lp;
}

void setTranscriptList(list<string> *lp){
tlist= lp;
}

unsigned int sizeMessageTranscript(const unsigned int lineOverhead= 1)const;
void writeMessageTranscript(ostream&os= cout)const;
void writeMessageTranscript(const string fname= "-")const;
void clearMessageTranscript(void){
assert(tlist!=NULL);
tlist->clear();
}

void reportParserDiagnostic(const string s);
void reportParserDiagnostic(const ostringstream&os);
};

/*:129*//*170:*/
#line 7057 "annoyance-filter.w"

class tokenDefinition{
protected:
static const int numTokenChars= 256;
bool isToken[numTokenChars],
notExclusively[numTokenChars],
notAtEnd[numTokenChars];
unsigned int minTokenLength,maxTokenLength;

public:
tokenDefinition(){
clear();
}

void clear(void){
for(int i= 0;i<numTokenChars;i++){
isToken[i]= notExclusively[i]= notAtEnd[i]= false;
}
setLengthLimits(1,65535);
}

void setLengthLimits(unsigned int lmin= 0,unsigned int lmax= 0){
if(lmin> 0){
minTokenLength= lmin;
}
if(lmax> 0){
maxTokenLength= lmax;
}
}

unsigned int getLengthMin(void)const{
return minTokenLength;
}

unsigned int getLengthMax(void)const{
return maxTokenLength;
}

bool isTokenMember(const int c)const{
assert(c>=0&&c<numTokenChars);
return isToken[c];
}

bool isTokenNotExclusively(const int c)const{
assert(c>=0&&c<numTokenChars);
return notExclusively[c];
}

bool isTokenNotAtEnd(const int c)const{
assert(c>=0&&c<numTokenChars);
return notAtEnd[c];
}

bool isTokenLengthAcceptable(string::size_type l)const{
return(l>=minTokenLength)&&(l<=maxTokenLength);
}

bool isTokenLengthAcceptable(const string t)const{
return isTokenLengthAcceptable(t.length());
}

void setTokenMember(bool v,const int cstart,const int cend= -1){
assert(cstart>=0&&cstart<=numTokenChars);
assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars));
for(int i= cstart;i<=cend;i++){
isToken[i]= v;
}
}

void setTokenNotExclusively(bool v,const int cstart,const int cend= -1){
assert(cstart>=0&&cstart<=numTokenChars);
assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars));
for(int i= cstart;i<=cend;i++){
notExclusively[i]= v;
}
}

void setTokenNotAtEnd(bool v,const int cstart,const int cend= -1){
assert(cstart>=0&&cstart<=numTokenChars);
assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars));
for(int i= cstart;i<=cend;i++){
notAtEnd[i]= v;
}
}

void setISO_8859defaults(unsigned int lmin= 0,unsigned int lmax= 0);
void setUS_ASCIIdefaults(unsigned int lmin= 0,unsigned int lmax= 0);
};

/*:170*//*173:*/
#line 7193 "annoyance-filter.w"

class tokenParser{
protected:
mailFolder*source;
string cl;
string::size_type clp;
bool atEnd,inHTML,inHTMLcomment;
tokenDefinition*td;
tokenDefinition*btd;

bool saveMessage;

bool assemblePhrases;
deque<string> phraseQueue;
deque<string> pendingPhrases;

public:
list<string> messageQueue;

tokenParser(){
td= NULL;
}

void setSource(mailFolder&mf){
source= &mf;
cl= "";
clp= 0;
atEnd= inHTML= inHTMLcomment= false;
saveMessage= false;
messageQueue.clear();
phraseQueue.clear();
pendingPhrases.clear();
/*179:*/
#line 7601 "annoyance-filter.w"

assemblePhrases= false;
if((phraseMin!=1)||(phraseMax!=1)){
if((phraseMin>=1)&&(phraseMax>=phraseMin)){
if((phraseLimit> 0)&&(phraseLimit<((phraseMax*2)-1))){
cerr<<"Invalid --phraselimit setting.  Too small for specified --phrasemax."<<endl;
}else{
assemblePhrases= true;
}
}else{
cerr<<"Invalid --phrasemin/max parameters.  Must be 1 <= min <= max."<<endl;
}
}

/*:179*/
#line 7225 "annoyance-filter.w"
;
}

void setTokenDefinition(tokenDefinition&t,tokenDefinition&bt){
td= &t;
btd= &bt;
}

void setTokenLengthLimits(unsigned int lMax,unsigned int lMin= 1,
unsigned int blMax= 1,unsigned int blMin= 1){
assert(td!=NULL);
td->setLengthLimits(lMin,lMax);
assert(btd!=NULL);
btd->setLengthLimits(blMin,blMax);
}

unsigned int getTokenLengthMin(void)const{
return td->getLengthMin();
}

unsigned int getTokenLengthMax(void)const{
return td->getLengthMax();
}

void reportParserDiagnostic(const string s)const{
assert(source!=NULL);
source->reportParserDiagnostic(s);
}

void reset(void){
if(inHTML){
reportParserDiagnostic("<HTML> tag unterminated at end of message.");
}
if(inHTMLcomment){
reportParserDiagnostic("HTML comment unterminated at end of message.");
}
inHTML= inHTMLcomment= false;
clearMessageQueue();
phraseQueue.clear();
pendingPhrases.clear();
}

bool nextToken(dictionaryWord&d);

void assembleAllPhrases(dictionaryWord&d);

/*182:*/
#line 7678 "annoyance-filter.w"

void setSaveMessage(bool v){
saveMessage= v;
source->setDiagnosticList(saveMessage?(&messageQueue):NULL);
}

bool getSaveMessage(void)const{
return saveMessage;
}

void clearMessageQueue(void){
if(saveMessage){
string s;

if(isNewMessage()){
s= messageQueue.back();
}
messageQueue.clear();
if(isNewMessage()){
messageQueue.push_back(s);
}
}
}

void writeMessageQueue(ostream&os){
list<string> ::size_type l= messageQueue.size(),n= 0;

for(list<string> ::iterator p= messageQueue.begin();
p!=messageQueue.end();p++,n++){
if(!((n==(l-1))&&
(p->substr(0,(sizeof messageSentinel)-1)==messageSentinel))){
os<<*p<<endl;
}
}
}

/*:182*/
#line 7271 "annoyance-filter.w"
;

bool isNewMessage(void)const{
return atEnd||(source->isNewMessage());
}

private:
void nextLine(void){
while(true){
if(!(source->nextLine(cl))){
atEnd= true;
cl= "";
break;
}
if(saveMessage){
messageQueue.push_back(cl);
}
if(source->isNewMessage()){
reset();
}
break;
}
clp= 0;
}
};

/*:173*//*183:*/
#line 7721 "annoyance-filter.w"

class classifyMessage{
public:
mailFolder*mf;
tokenParser tp;
unsigned int nExtremal;
dictionary*d;
fastDictionary*fd;
double unknownWordProbability;

classifyMessage(mailFolder&m,
dictionary&dt,
fastDictionary*fdt= NULL,
unsigned int nExt= 15,double uwp= 0.2);

double classifyThis(bool createTranscript= false);

protected:
void addSignificantWordDiagnostics(list<string> &l,
list<string> ::iterator where,
multimap<double,string> &rtokens,string endLine= "");
};

/*:183*//*186:*/
#line 7810 "annoyance-filter.w"

#ifdef OLDWAY
double abs(double x){
return(x<0)?(-(x)):x;
}
#endif

/*:186*//*194:*/
#line 8067 "annoyance-filter.w"

#ifdef POP3_PROXY_SERVER

/*216:*/
#line 8672 "annoyance-filter.w"

static RETSIGTYPE absentPlumber(int)
{
if(popProxyTrace){
cerr<<"POP3: Caught SIGPIPE--continuing."<<endl;
}
signal(SIGPIPE,absentPlumber);
}

/*:216*/
#line 8070 "annoyance-filter.w"


typedef void(*POP3ProxyFilterFunction)(const string command,const string argument,char*replyBuffer,int*replyLength,string&reply);

class POP3Proxy{
protected:
unsigned short popProxyPort;
string serverName;
unsigned short serverPort;
bool opened;

private:
set<string> multiLine,cMultiLine;
int listenSocket;
POP3ProxyFilterFunction filterFunction;

public:

POP3Proxy(unsigned short proxyPort= 9110,
string serverN= "",
unsigned short serverP= 110,
POP3ProxyFilterFunction filterF= NULL
):
popProxyPort(proxyPort),
serverName(serverN),
serverPort(serverP),
opened(false),
listenSocket(-1),
filterFunction(filterF){
/*195:*/
#line 8146 "annoyance-filter.w"

multiLine.insert("capa");
multiLine.insert("retr");
multiLine.insert("top");

cMultiLine.insert("list");
cMultiLine.insert("uidl");

/*:195*/
#line 8099 "annoyance-filter.w"
;
}

~POP3Proxy(){
if(listenSocket!=-1){
close(listenSocket);
signal(SIGPIPE,SIG_DFL);
}
}

void setPopProxyPort(unsigned short p){
/*196:*/
#line 8159 "annoyance-filter.w"

#ifndef NDEBUG
if(opened){
cerr<<"Attempt to modify POP3 connection settings after connection opened."<<endl;
abort();
}
#endif

/*:196*/
#line 8110 "annoyance-filter.w"
;
popProxyPort= p;
}

void setServerName(string&s){
/*196:*/
#line 8159 "annoyance-filter.w"

#ifndef NDEBUG
if(opened){
cerr<<"Attempt to modify POP3 connection settings after connection opened."<<endl;
abort();
}
#endif

/*:196*/
#line 8115 "annoyance-filter.w"
;
serverName= s;
}

void setServerPort(unsigned short p){
/*196:*/
#line 8159 "annoyance-filter.w"

#ifndef NDEBUG
if(opened){
cerr<<"Attempt to modify POP3 connection settings after connection opened."<<endl;
abort();
}
#endif

/*:196*/
#line 8120 "annoyance-filter.w"
;
serverPort= p;
}

void setFilterFunction(POP3ProxyFilterFunction ff){
filterFunction= ff;
}

bool acceptConnections(int maxBacklog= 25);

bool serviceConnection(void);

bool operateProxyServer(int maxBacklog= 25);

};
#endif

/*:194*/
#line 9903 "annoyance-filter.w"

/*239:*/
#line 9165 "annoyance-filter.w"

static double mailBias= 2.0;
static unsigned int minOccurrences= 5;
static double junkThreshold= 0.9;
static double mailThreshold= 0.9;
static int significantWords= 15;
static double novelWordProbability= 0.2;
static bitset<1<<(sizeof(char)*8)> annotations;
#ifdef POP3_PROXY_SERVER
static int popProxyPort= 9110;
static string popProxyServer= "";
static int popProxyServerPort= 110;
#endif
static bool bsdFolder= false;

/*:239*//*240:*/
#line 9184 "annoyance-filter.w"

static unsigned int nTested= 0;

/*:240*/
#line 9904 "annoyance-filter.w"

/*11:*/
#line 1352 "annoyance-filter.w"

bool operator<(dictionaryWord a,dictionaryWord b){
return a.get()<b.get();
}

/*:11*//*12:*/
#line 1387 "annoyance-filter.w"

void dictionaryWord::computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias,unsigned int minOccurrences)
{
double nMail= occurrences[Mail]*mailBias,
nJunk= occurrences[Junk];

nMailMessages= max(nMailMessages,1u);
nJunkMessages= max(nJunkMessages,1u);

if((nMail+nJunk)>=minOccurrences){
assert(nMailMessages> 0);
assert(nJunkMessages> 0);
junkProbability= min(0.99,max(0.01,min(nJunk/nJunkMessages,1.0)/
(min(nMail/nMailMessages,1.0)+min(nJunk/nJunkMessages,1.0))));
}else{
junkProbability= -1;
}
}

/*:12*//*13:*/
#line 1412 "annoyance-filter.w"

void dictionaryWord::describe(ostream&os){
os<<text<<
"  Mail: "<<n_mail()<<", Junk: "<<n_junk()<<
", Probability: "<<setprecision(5)<<junkProbability<<endl;
}

/*:13*//*14:*/
#line 1425 "annoyance-filter.w"

void dictionaryWord::exportCSV(ostream&os){
os<<setprecision(5)<<junkProbability<<","<<
occurrences[Mail]<<","<<occurrences[Junk]<<",\""<<
text<<"\""<<endl;
}

/*:14*//*15:*/
#line 1450 "annoyance-filter.w"

bool dictionaryWord::importCSV(istream&is){
while(true){
string s;

if(getline(is,s)){
string::size_type p,p1,p2;

for(p= 0;p<s.length();p++){
if(!isISOspace(s[p])){
break;
}
}
if((p>=s.length())||(s[p]=='#')||(s[p]==';')){
continue;
}

if((s[p]=='-')||isdigit(s[p])){
p= s.find(',');
if(p!=string::npos){
p1= s.find(',',p+1);
if(p1!=string::npos){
p2= s.find(',',p1+1);
if(p2!=string::npos){
junkProbability= atof(s.substr(0,p).c_str());
occurrences[Mail]= atoi(s.substr(p+1,p1-p).c_str());
occurrences[Junk]= atoi(s.substr(p1+1,p2-p).c_str());
p= s.find('"',p2+1);
if(p!=string::npos){
p1= s.find_last_of('"');
if((p1!=string::npos)&&(p1> p)){
text= s.substr(p+1,(p1-p)-1);
return true;
}
}
}
}
}
}

junkProbability= -2;
text= s;
return true;
}
junkProbability= -3;
return false;
}
}

/*:15*//*16:*/
#line 1509 "annoyance-filter.w"

void dictionaryWord::exportToBinaryFile(ostream&os){
unsigned char c;
const unsigned char*fp;
const double k1= -1.0;

#define outCount(x) assert(x <= 255); c =  (x); os.put(c)
#define outNumber(x) os.put((x >> 24) & 0xFF); os.put((x >> 16) & 0xFF); \
               os.put((x >> 8) & 0xFF); os.put(x & 0xFF)

outCount(text.length());
os.write(text.data(),text.length());
outNumber(n_mail());
outNumber(n_junk());
fp= reinterpret_cast<const unsigned char*> (&k1);
if(fp[0]==0){
fp= reinterpret_cast<unsigned char*> (&junkProbability);
for(unsigned int i= 0;i<(sizeof junkProbability);i++){
outCount(fp[((sizeof junkProbability)-1)-i]);
}
}else{

os.write(reinterpret_cast<const char*> (&junkProbability),
sizeof junkProbability);
}

#undef outCount
#undef outNumber
}

/*:16*//*17:*/
#line 1545 "annoyance-filter.w"

bool dictionaryWord::importFromBinaryFile(istream&is){
unsigned char c;
char sval[256];
unsigned char ibyte[4];
unsigned char fb[8];
unsigned char*fp;
const double k1= -1.0;
const unsigned char*kp;

#define iNumber ((ibyte[0] << 24) | (ibyte[1] << 16) | (ibyte[2] << 8) | ibyte[3])
if(is.read(reinterpret_cast<char*> (&c),1)){
if(is.read(sval,c)){
text= string(sval,c);
is.read(reinterpret_cast<char*> (ibyte),4);
occurrences[Mail]= iNumber;
is.read(reinterpret_cast<char*> (ibyte),4);
occurrences[Junk]= iNumber;
kp= reinterpret_cast<const unsigned char*> (&k1);
if(kp[0]==0){
is.read(reinterpret_cast<char*> (fb),8);
fp= reinterpret_cast<unsigned char*> (&junkProbability);
for(unsigned int i= 0;i<(sizeof junkProbability);i++){
fp[((sizeof junkProbability)-1)-i]= fb[i];
}
}else{
is.read(reinterpret_cast<char*> (&junkProbability),
sizeof junkProbability);
}
return true;
}
}
return false;
#undef iNumber
}

/*:17*//*20:*/
#line 1650 "annoyance-filter.w"

void dictionary::add(dictionaryWord w,dictionaryWord::mailCategory category){
dictionary::iterator p;

if((p= find(w.get()))!=end()){
p->second.add(category);
}else{
insert(make_pair(w.get(),w)).first->second.add(category);
memoryRequired+= w.estimateMemoryRequirement();
}
}


/*:20*//*21:*/
#line 1671 "annoyance-filter.w"

void dictionary::include(dictionaryWord&w){
dictionary::iterator p;

if((p= find(w.get()))!=end()){
p->second.occurrences[dictionaryWord::Mail]+= w.occurrences[dictionaryWord::Mail];
p->second.occurrences[dictionaryWord::Junk]+= w.occurrences[dictionaryWord::Junk];
}else{
insert(make_pair(w.get(),w));
}
}

/*:21*//*22:*/
#line 1692 "annoyance-filter.w"

bool byProbability(const dictionaryWord*w1,
const dictionaryWord*w2){
double dp= w1->getJunkProbability()-w2->getJunkProbability();
if(dp==0){
return w1->get()<w2->get();
}
return dp<0;
}

void dictionary::exportCSV(ostream&os){
if(verbose){
cerr<<"Exporting dictionary to CSV file."<<endl;
}
vector<dictionaryWord*> dv;
for(iterator p= begin();p!=end();p++){
dv.push_back(&(p->second));
}
sort(dv.begin(),dv.end(),byProbability);
os<<"; Probability,Mail,Junk,Word"<<endl;
dictionaryWord pdw;

pdw.set(pseudoCountsWord,
messageCount[dictionaryWord::Mail],
messageCount[dictionaryWord::Junk],-1);
pdw.exportCSV(os);
for(vector<dictionaryWord*> ::iterator q= dv.begin();q!=dv.end();q++){
(*q)->exportCSV(os);
}
}

/*:22*//*23:*/
#line 1733 "annoyance-filter.w"

void dictionary::importCSV(istream&is){
if(verbose){
cerr<<"Importing dictionary from CSV file."<<endl;
}

dictionaryWord dw;

while(dw.importCSV(is)){
if(dw.getJunkProbability()==-1&&(dw.get()==pseudoCountsWord)){
messageCount[dictionaryWord::Mail]+= dw.n_mail();
messageCount[dictionaryWord::Junk]+= dw.n_junk();
}else if(dw.getJunkProbability()>=-1){
include(dw);
}else{
if(verbose){
cerr<<"Ill-formed record in CSV import: \""<<dw.get()<<"\""<<endl;
}
}
}
}

/*:23*//*24:*/
#line 1798 "annoyance-filter.w"

#ifdef PURGE_USES_REMOVE_IF
class dictionaryWordProb_less:public unary_function<pair<string,dictionaryWord> ,int> {
int p;
public:
explicit dictionaryWordProb_less(const int pt):p(pt){}
bool operator()(const pair<string,dictionaryWord> &dw)const{
return dw.second.getJunkProbability()<p;
}
};
#endif

void dictionary::purge(unsigned int occurrences){
if(verbose){
cerr<<"Pruning rare words from database: "<<flush;
}
memoryRequired= 0;

#ifdef PURGE_USES_REMOVE_IF
remove_if(begin(),end(),dictionaryWordProb_less(0));
#else
queue<dictionaryWord> pq;
while(!empty()){
if(((occurrences> 0)&&(begin()->second.n_occurrences()> occurrences))||
(begin()->second.getJunkProbability()>=0)){
pq.push(begin()->second);
}
erase(begin());
}
while(!pq.empty()){
insert(make_pair(pq.front().get(),pq.front()));
memoryRequired+= pq.front().estimateMemoryRequirement();
pq.pop();
}
#endif

if(verbose){
cerr<<size()<<" words remaining."<<endl;
cerr<<"  Dictionary size "<<estimateMemoryRequirement()<<" bytes."<<endl;
}
}

/*:24*//*25:*/
#line 1844 "annoyance-filter.w"

void dictionary::resetCat(dictionaryWord::mailCategory category){
if(verbose){
cerr<<"Resetting counts for category "<<
dictionaryWord::categoryName(category)<<endl;
}
for(iterator mp= begin();mp!=end();mp++){
mp->second.resetCat(category);
}
}

/*:25*//*26:*/
#line 1861 "annoyance-filter.w"

void dictionary::printStatistics(ostream&os)const{
if(verbose){
cerr<<"Computing dictionary statistics."<<endl;
}
os<<"Dictionary statistics:"<<endl;
dataTable<double> dt;

for(const_iterator mp= begin();mp!=end();mp++){
if(mp->second.getJunkProbability()>=0){
dt.push_back(mp->second.getJunkProbability());
}
}
os<<"Mean = "<<dt.mean()<<endl;
os<<"Geometric mean = "<<dt.geometricMean()<<endl;
os<<"Harmonic mean = "<<dt.harmonicMean()<<endl;
os<<"RMS = "<<dt.RMS()<<endl;
os<<"Median = "<<dt.median()<<endl;
os<<"Mode = "<<dt.mode()<<endl;
os<<"Percentile(0.5) = "<<dt.percentile(0.5)<<endl;
os<<"Quartile(1) = "<<dt.quartile(1)<<endl;
os<<"Quartile(3) = "<<dt.quartile(3)<<endl;

os<<"Variance = "<<dt.variance()<<endl;
os<<"Standard deviation = "<<dt.stdev()<<endl;
os<<"CentralMoment(3) = "<<dt.centralMoment(3)<<endl;
os<<"Skewness = "<<dt.skewness()<<endl;
os<<"Kurtosis = "<<dt.kurtosis()<<endl;
}

/*:26*//*27:*/
#line 1896 "annoyance-filter.w"

#ifdef HAVE_PLOT_UTILITIES
#define PLOT_DEBUG
void dictionary::plotProbabilityHistogram(string fileName,unsigned int nBins)const{
if(verbose){
cerr<<"Plotting probability histogram to "<<fileName<<".png"<<endl;
}
ofstream gp((fileName+".gp").c_str()),
dat((fileName+".dat").c_str());

/*28:*/
#line 1940 "annoyance-filter.w"

vector<unsigned int> hist(nBins);

for(const_iterator mp= begin();mp!=end();mp++){
if(mp->second.getJunkProbability()>=0){
unsigned int bin= static_cast<unsigned int> (mp->second.getJunkProbability()*nBins);

hist[bin]++;
}
}

/*:28*/
#line 1906 "annoyance-filter.w"
;
/*29:*/
#line 1956 "annoyance-filter.w"

for(unsigned int j= 0;j<nBins;j++){
dat<<(static_cast<double> (j)/nBins)<<" "<<hist[j]<<endl;
}


/*:29*/
#line 1907 "annoyance-filter.w"
;



gp<<"set term pbm small color"<<endl;
gp<<"set ylabel \"Number of Words\""<<endl;
gp<<"set xlabel \"Probability\""<<endl;

gp<<"plot \""<<fileName<<".dat\" using 1:2 title \"\" with boxes"<<endl;

string command("gnuplot ");
command+= fileName+".gp | pnmtopng >"+fileName+".png";
#ifdef PLOT_DEBUG
cout<<command<<endl;
#else
command+= " 2>/dev/null";
#endif
gp.close();
dat.close();
system(command.c_str());
#ifndef PLOT_DEBUG

remove((fileName+".gp").c_str());
remove((fileName+".dat").c_str());
#endif
}
#endif 

/*:27*//*30:*/
#line 1967 "annoyance-filter.w"

void dictionary::computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias,unsigned int minOccurrences)
{
for(dictionary::iterator p= begin();p!=end();p++){
p->second.computeJunkProbability(nMailMessages,nJunkMessages,
mailBias,minOccurrences);
}
}

/*:30*//*31:*/
#line 1997 "annoyance-filter.w"

void dictionary::exportToBinaryFile(ostream&os){
if(verbose){
cerr<<"Exporting dictionary to binary file."<<endl;
}
dictionaryWord pdw;

pdw.set(pseudoCountsWord,
messageCount[dictionaryWord::Mail],
messageCount[dictionaryWord::Junk],-1);
pdw.exportToBinaryFile(os);

for(dictionary::iterator p= begin();p!=end();p++){
p->second.exportToBinaryFile(os);
}
}

void dictionary::importFromBinaryFile(istream&is){
if(verbose){
cerr<<"Importing dictionary from binary file."<<endl;
}

dictionaryWord dw;

if(dw.importFromBinaryFile(is)){
assert(dw.get()==pseudoCountsWord);
messageCount[dictionaryWord::Mail]+= dw.n_mail();
messageCount[dictionaryWord::Junk]+= dw.n_junk();

while(dw.importFromBinaryFile(is)){
include(dw);
}
}
}

/*:31*//*33:*/
#line 2140 "annoyance-filter.w"

bool fastDictionary::load(const string fname){
#ifdef HAVE_MMAP
fileHandle= open(fname.c_str(),O_RDONLY);
if(fileHandle==-1){
cerr<<"Cannot open fast dictionary file "<<fname<<endl;
return false;
}
fileLength= lseek(fileHandle,0,2);
lseek(fileHandle,0,0);
dp= static_cast<char*> (mmap((caddr_t)0,fileLength,
PROT_READ,MAP_SHARED|MAP_NORESERVE,
fileHandle,0));
istrstream is(dp,fileLength);
#else
ifstream is(fname.c_str(),ios::in|ios::binary);

if(!is){
cerr<<"Cannot open fast dictionary file "<<fname<<"."<<endl;
return false;
}
#endif
char signature[4];
is.read(signature,4);
if(memcmp(signature,fastDictionarySignature,4)!=0){
cerr<<"File "<<fname<<" is not a fast dictionary."<<endl;
fdlbail:;
#ifdef HAVE_MMAP
munmap(dp,fileLength);
close(fileHandle);
dp= NULL;
#endif
return false;
}

u_int16_t s;
is.read(reinterpret_cast<char*> (&s),sizeof s);
if(s!=byteOrderMark){
cerr<<"Fast dictionary file "<<fname<<
" was created on a platform with incompatible byte order."<<endl;
regen();
goto fdlbail;
}

is.read(reinterpret_cast<char*> (&s),sizeof s);
if(s!=versionNumber){
cerr<<"Fast dictionary file "<<fname<<
" is version "<<s<<".  Version "<<versionNumber<<" is required."<<endl;
regen();
goto fdlbail;
}

double d;
is.read(reinterpret_cast<char*> (&s),sizeof s);
u_int16_t filler;
is.read(reinterpret_cast<char*> (&filler),sizeof filler);
if(s==doubleSize){
is.read(reinterpret_cast<char*> (&d),sizeof d);
}
if((s!=doubleSize)||(d!=fastDictionaryFloatingTest)){
cerr<<"Fast dictionary file "<<fname<<
" was created on a machine with incompatible floating point format."<<endl;
regen();
goto fdlbail;
}

is.read(reinterpret_cast<char*> (&totalSize),sizeof totalSize);
is.read(reinterpret_cast<char*> (&hashTableOffset),sizeof hashTableOffset);
is.read(reinterpret_cast<char*> (&hashTableBuckets),sizeof hashTableBuckets);
is.read(reinterpret_cast<char*> (&wordTableSize),sizeof wordTableSize);

#ifdef HAVE_MMAP
dblock= reinterpret_cast<unsigned char*> (dp)+is.tellg();
#else
u_int32_t fdsize= (hashTableBuckets*sizeof(u_int32_t))+wordTableSize;
try{
dblock= new unsigned char[fdsize];
}catch(bad_alloc){
cerr<<"Unable to allocate memory for fast dictionary.";
return false;
}
is.read(reinterpret_cast<char*> (dblock),fdsize);
is.close();
#endif

hashTable= reinterpret_cast<u_int32_t*> (dblock);
wordTable= dblock+(hashTableBuckets*sizeof(u_int32_t));

if(verbose){
cerr<<"Loaded fast dictionary from "<<fname<<"."<<endl;
}

return true;
}

/*:33*//*34:*/
#line 2244 "annoyance-filter.w"

double fastDictionary::find(const string&target)const{
assert(dblock!=NULL);
u_int32_t bucket= computeHashValue(target)%hashTableBuckets;
if(hashTable[bucket]!=fastDictionaryVoidLink){
u_int16_t wlen= target.length();
unsigned int sOffset= sizeof(u_int32_t)+sizeof(double);
unsigned char*cword= wordTable+hashTable[bucket];

while(true){
u_int16_t wl;
memcpy(&wl,cword+sOffset,sizeof wl);
if((wl==wlen)&&
(memcmp(target.data(),cword+sOffset+sizeof(u_int16_t),wlen)==0)){
double jp;

memcpy(&jp,cword+sizeof(u_int32_t),sizeof(double));
return jp;
}
u_int32_t lnk;
memcpy(&lnk,cword,sizeof lnk);
if(lnk==fastDictionaryVoidLink){
break;
}
cword= wordTable+lnk;
}
}
return-1;
}

/*:34*//*35:*/
#line 2282 "annoyance-filter.w"

void fastDictionary::exportDictionary(const dictionary&d,ostream&o){
u_int32_t hashSize= nextGreaterPrime(d.size());

vector<u_int32_t> hashTable(hashSize,fastDictionaryVoidLink);
vector<unsigned char> words;

for(dictionary::const_iterator w= d.begin();w!=d.end();w++){
u_int32_t h= computeHashValue(w->first);
unsigned int slot= h%hashSize;

/*36:*/
#line 2369 "annoyance-filter.w"

if(hashTable[slot]==fastDictionaryVoidLink){
hashTable[slot]= words.size();
}else{
u_int32_t p= hashTable[slot];
u_int32_t l;
while(true){
memcpy(&l,&(words[p]),sizeof l);
if(l==fastDictionaryVoidLink){
break;
}
p= l;
}
l= words.size();
memcpy(&(words[p]),&l,sizeof l);
}

/*:36*/
#line 2293 "annoyance-filter.w"
;
/*37:*/
#line 2391 "annoyance-filter.w"

vector<unsigned char> ::size_type wl= words.size();
words.resize(words.size()+sizeof(u_int32_t)+
sizeof(double)+sizeof(u_int16_t)+w->second.get().length());
u_int32_t vl= fastDictionaryVoidLink;
#ifdef OLDWAY
memcpy(words.begin()+wl,&vl,sizeof vl);
#else
Vmemcpy(words,wl,&vl,sizeof vl);
#endif
wl+= sizeof vl;
double jp= w->second.getJunkProbability();
#ifdef OLDWAY
memcpy(words.begin()+wl,&jp,sizeof jp);
#else
Vmemcpy(words,wl,&jp,sizeof jp);
#endif
wl+= sizeof jp;
u_int16_t wlen= w->second.get().length();
#ifdef OLDWAY
memcpy(words.begin()+wl,&wlen,sizeof wlen);
#else
Vmemcpy(words,wl,&wlen,sizeof wlen);
#endif
wl+= sizeof wlen;
#ifdef OLDWAY
memcpy(words.begin()+wl,w->second.get().data(),wlen);
#else
Vmemcpy(words,wl,w->second.get().data(),wlen);
#endif

/*:37*/
#line 2294 "annoyance-filter.w"
;
}

o<<fastDictionarySignature;

u_int16_t b;
b= byteOrderMark;
o.write(reinterpret_cast<const char*> (&b),sizeof b);

b= versionNumber;
o.write(reinterpret_cast<const char*> (&b),sizeof b);

b= doubleSize;
o.write(reinterpret_cast<const char*> (&b),sizeof b);

b= 0;
o.write(reinterpret_cast<const char*> (&b),sizeof b);

double td= fastDictionaryFloatingTest;
o.write(reinterpret_cast<const char*> (&td),sizeof td);

u_int32_t headerSize= 4+(4*sizeof(u_int16_t))+sizeof(double)+
(4*sizeof(u_int32_t));

u_int32_t wordTableSize= words.size();

u_int32_t totalSize= headerSize+
(hashTable.size()*sizeof(u_int32_t))+
wordTableSize;

o.write(reinterpret_cast<const char*> (&totalSize),sizeof totalSize);
o.write(reinterpret_cast<const char*> (&headerSize),sizeof headerSize);
o.write(reinterpret_cast<const char*> (&hashSize),sizeof hashSize);
o.write(reinterpret_cast<const char*> (&wordTableSize),sizeof wordTableSize);

#ifdef OLDWAY
o.write(hashTable.begin(),hashTable.size()*sizeof(u_int32_t));

o.write(words.begin(),words.size());
#else
for(vector<u_int32_t> ::const_iterator htp= hashTable.begin();
htp!=hashTable.end();htp++){
u_int32_t hte= *htp;
o.write(reinterpret_cast<const char*> (&hte),sizeof hte);
}

for(vector<unsigned char> ::const_iterator wtp= words.begin();
wtp!=words.end();wtp++){
o.put(*wtp);
}
#endif

if(verbose){
cerr<<"Exported "<<d.size()<<" words to fast dictionary."<<endl;
}
}

void fastDictionary::exportDictionary(const dictionary&d,const string fname){
ofstream of(fname.c_str(),ios::out|ios::binary);

if(of){
exportDictionary(d,of);
of.close();
}else{
cerr<<"Unable to create fast dictionary file "<<fname<<endl;
}
}

/*:35*//*38:*/
#line 2433 "annoyance-filter.w"

unsigned int fastDictionary::nextGreaterPrime(unsigned int a){
unsigned int sqlim= static_cast<unsigned int> (sqrt(static_cast<double> (a))+1);

if((a&1)==0){
a++;
}

while(true){
unsigned int remainder= 0;

a+= 2;
for(unsigned int n= 3;n<=sqlim;n+= 2){
if((remainder= (a%n))==0){
break;
}
}
if(remainder!=0){
break;
}
}
return a;
}

/*:38*//*39:*/
#line 2464 "annoyance-filter.w"

u_int32_t fastDictionary::computeHashValue(const string&s){
u_int32_t hash= 1;

for(unsigned int i= 0;i<s.length();i++){
hash= (hash*17)^s[i];
}
return hash;
}

/*:39*//*41:*/
#line 2576 "annoyance-filter.w"

bool MIMEdecoder::getNextEncodedLine(void){
if(!atEnd){
if(getline(*is,inputLine)!=NULL){
if(inputLine.substr(0,(sizeof messageSentinel)-1)==messageSentinel){
endBoundary= inputLine;
if(partBoundary!=""){
assert(mf!=NULL);
mf->reportParserDiagnostic("Unterminated MIME sentinel at end of message.");
mf->setNewMessageEligiblity();
}
atEnd= true;
}
if((partBoundary!="")&&(inputLine.substr(0,2)=="--")&&
(inputLine.substr(2,partBoundary.length())==partBoundary)){
if(Annotate('d')){
ostringstream os;

os<<"Part boundary encountered: "<<inputLine;
mf->reportParserDiagnostic(os);
}
endBoundary= inputLine;
atEnd= true;
}else{
if(tlist!=NULL){
tlist->push_back(inputLine);
}
ip= 0;
encodedLineCount++;
}
}else{
atEnd= true;
eofHit= true;
}
}
if(atEnd){
inputLine= "";
ip= 0;
}
return!atEnd;
}

/*:41*//*42:*/
#line 2624 "annoyance-filter.w"

bool MIMEdecoder::getDecodedLine(string&s){
int ch;

s= "";
while(true){
if(lookAhead){
ch= lookChar;
lookAhead= false;
}else{
ch= getDecodedChar();
}
if(ch<0){
break;
}
/*43:*/
#line 2654 "annoyance-filter.w"

if(ch=='\r'||ch=='\n'){
int cht= getDecodedChar();

if(!(((ch=='\r')&&(cht=='\n'))||
((ch=='\n')&&(cht=='\r')))){
lookAhead= true;
lookChar= cht;
}
return true;
}

/*:43*/
#line 2639 "annoyance-filter.w"
;
s+= ch;
}
return s.length()> 0;
}

/*:42*//*44:*/
#line 2671 "annoyance-filter.w"

void MIMEdecoder::saveDecodedStream(ostream&os){
int ch;

while((ch= getDecodedChar())>=0){
os.put(ch);
}
}

/*:44*//*45:*/
#line 2684 "annoyance-filter.w"

void MIMEdecoder::saveDecodedStream(const string fname){
ofstream of(fname.c_str());

if(!of){
if(verbose){
cerr<<"Cannot create MIMEdecoder dump file: "<<fname<<endl;
}
}else{
saveDecodedStream(of);
of.close();
}
}

/*:45*//*49:*/
#line 2799 "annoyance-filter.w"

int base64MIMEdecoder::getDecodedChar(void){
/*55:*/
#line 2928 "annoyance-filter.w"

if(lookAhead){
lookAhead= false;
return lookChar;
}

/*:55*/
#line 2801 "annoyance-filter.w"
;
if(decodedBytes.size()==0){
/*50:*/
#line 2824 "annoyance-filter.w"

unsigned char a[4],b[4],o[3];
int j,k;

/*51:*/
#line 2839 "annoyance-filter.w"

for(int i= 0;i<4;i++){
int c;

/*52:*/
#line 2865 "annoyance-filter.w"

while(true){
c= -1;
while(ip<inputLine.length()){
if(inputLine[ip]> ' '){
c= inputLine[ip++];
break;
}
ip++;
}
if(c>=0){
break;
}
if(!getNextEncodedLine()){
break;
}
}

/*:52*/
#line 2843 "annoyance-filter.w"
;
/*53:*/
#line 2890 "annoyance-filter.w"

if(c==EOF){
if(i> 0){
nDecodeErrors++;
mf->reportParserDiagnostic("Unexpected end of file in Base64 decoding.");
}
return-1;
}

/*:53*/
#line 2844 "annoyance-filter.w"
;
if(dtable[c]&0x80){
nDecodeErrors++;
ostringstream os;
os<<"Illegal character '"<<c<<"' in Base64 input stream.";
mf->reportParserDiagnostic(os.str());


i--;
continue;
}
a[i]= (unsigned char)c;
b[i]= dtable[c];
}

/*:51*/
#line 2828 "annoyance-filter.w"
;
/*54:*/
#line 2910 "annoyance-filter.w"

o[0]= (b[0]<<2)|(b[1]>>4);
o[1]= (b[1]<<4)|(b[2]>>2);
o[2]= (b[2]<<6)|b[3];
j= a[2]=='='?1:(a[3]=='='?2:3);

for(k= 0;k<j;k++){
decodedBytes.push_back(o[k]);
}

/*:54*/
#line 2829 "annoyance-filter.w"
;

/*:50*/
#line 2803 "annoyance-filter.w"
;
}
if(decodedBytes.size()> 0){
unsigned char v= decodedBytes[0];

decodedBytes.pop_front();
return v;
}
return-1;
}

/*:49*//*56:*/
#line 2948 "annoyance-filter.w"

void base64MIMEdecoder::initialiseDecodingTable(void)
{
int i;

for(i= 0;i<255;i++){
dtable[i]= 0x80;
}
for(i= 'A';i<='I';i++){
dtable[i]= 0+(i-'A');
}
for(i= 'J';i<='R';i++){
dtable[i]= 9+(i-'J');
}
for(i= 'S';i<='Z';i++){
dtable[i]= 18+(i-'S');
}
for(i= 'a';i<='i';i++){
dtable[i]= 26+(i-'a');
}
for(i= 'j';i<='r';i++){
dtable[i]= 35+(i-'j');
}
for(i= 's';i<='z';i++){
dtable[i]= 44+(i-'s');
}
for(i= '0';i<='9';i++){
dtable[i]= 52+(i-'0');
}
dtable['+']= 62;
dtable['/']= 63;
dtable['=']= 0;
}

/*:56*//*57:*/
#line 2991 "annoyance-filter.w"

string base64MIMEdecoder::decodeEscapedText(const string s,mailFolder*m){
string r= "";
base64MIMEdecoder dc;
istringstream iss(s);
int dchar;

dc.set(&iss,m,"");

while((dchar= dc.getDecodedChar())>=0){
r+= static_cast<char> (dchar);
}

return r;
}

/*:57*//*59:*/
#line 3039 "annoyance-filter.w"

int quotedPrintableMIMEdecoder::getDecodedChar(void){
int ch;

/*55:*/
#line 2928 "annoyance-filter.w"

if(lookAhead){
lookAhead= false;
return lookChar;
}

/*:55*/
#line 3043 "annoyance-filter.w"
;

while(true){
ch= getNextChar();
if(ch=='='){
/*60:*/
#line 3063 "annoyance-filter.w"

int ch1= getNextChar();
/*63:*/
#line 3140 "annoyance-filter.w"

while(/*62:*/
#line 3126 "annoyance-filter.w"

((ch1==' ')||(ch1=='\t')||(ch1=='\r'))

/*:62*/
#line 3141 "annoyance-filter.w"
){
ch1= getNextChar();
if(ch1=='\n'){
continue;
}
if(!/*62:*/
#line 3126 "annoyance-filter.w"

((ch1==' ')||(ch1=='\t')||(ch1=='\r'))

/*:62*/
#line 3146 "annoyance-filter.w"
){
nDecodeErrors++;
ostringstream os;

os<<"Invalid character '"<<static_cast<char> (ch1)<<
"' after soft line break in Quoted-Printable MIME part.";
mf->reportParserDiagnostic(os.str());
ch1= ' ';
}
}

/*:63*/
#line 3065 "annoyance-filter.w"
;
if(ch1=='\n'){
continue;
}else{
int n1= hex_to_nybble(ch1);
int ch2= getNextChar();
int n2= hex_to_nybble(ch2);
if(n1==-1||n2==-1){
ostringstream os;

os<<"Invalid escape sequence '="<<
static_cast<char> (ch1)<<static_cast<char> (ch2)<<
"' in Quoted-Printable MIME part.";
mf->reportParserDiagnostic(os.str());
nDecodeErrors++;
}
ch= (n1<<4)|n2;
}
return ch;

/*:60*/
#line 3048 "annoyance-filter.w"
;
}else{
return ch;
}
}
}

/*:59*//*61:*/
#line 3092 "annoyance-filter.w"

int quotedPrintableMIMEdecoder::getNextChar(void){
while(true){
if(atEndOfLine){
atEndOfLine= false;
return'\n';
}
if(ip<inputLine.length()){
if(ip==(inputLine.length()-1)){
atEndOfLine= true;
}
return inputLine[ip++];
}
if(!getNextEncodedLine()){
break;
}
if(inputLine.length()==0){
atEndOfLine= true;
}
}
return-1;
}

/*:61*//*64:*/
#line 3163 "annoyance-filter.w"

int quotedPrintableMIMEdecoder::hex_to_nybble(const int ch){
if((ch>='0')&&(ch<=('0'+9))){
return ch-'0';
}else if((ch>='A')&&(ch<=('A'+5))){
return 10+(ch-'A');
}else if((ch>='a')&&(ch<=('a'+5))){
return 10+(ch-'a');
}
return-1;
}

/*:64*//*65:*/
#line 3180 "annoyance-filter.w"

string quotedPrintableMIMEdecoder::decodeEscapedText(const string s,mailFolder*m){
string r= "";
string::size_type p;

for(p= 0;p<s.length();p++){
bool decoded= false;

if(s[p]=='='){
if(p> (s.length()-3)){
if(verbose){
cerr<<"decodeEscapedText: escape too near end of string: "<<s<<endl;
}
}else{
int n1= hex_to_nybble(s[p+1]),
n2= hex_to_nybble(s[p+2]);
if((n1<0)||(n2<0)){
if(verbose){
cerr<<"decodeEscapedText: invalid escape sequence \""<<
s.substr(p,3)<<"\""<<endl;
}
}else{
r+= static_cast<char> ((n1<<4)|n2);
decoded= true;
p+= 2;
}
}
}
if(!decoded){
r+= s[p];
}
}
return r;
}

/*:65*//*69:*/
#line 3345 "annoyance-filter.w"

int MBCSdecoder::getNextNBytes(const unsigned int n){
assert((n>=1)&&(n<=4));
int v= 0;
for(unsigned int i= 0;i<n;i++){
int b= getNextEncodedByte();
if(b<0){
return b;
}
v= (v<<8)|b;
}
assert(v!=-1);
return v;
}

/*:69*//*70:*/
#line 3367 "annoyance-filter.w"

void MBCSdecoder::reportDecoderDiagnostic(const string s)const{
if(mf!=NULL){
mf->reportParserDiagnostic(s);
}else{
if(verbose){
cerr<<s<<endl;
}
}
}

void MBCSdecoder::reportDecoderDiagnostic(const ostringstream&os)const{
reportDecoderDiagnostic(os.str());
}


/*:70*//*72:*/
#line 3412 "annoyance-filter.w"

int EUC_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();

if((c1>=0xA1)&&(c1<=0xFE)){
int c2= getNextEncodedByte();

if((c2>=0x80)&&(c2<=0xFF)){
return(c1<<8)|c2;
}
if(c2==-1){
ostringstream os;

os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}






discardLine();
ostringstream os;

os<<name()<<"_MBCSdecoder: Invalid second byte in two byte character: "
"0x"<<setiosflags(ios::uppercase)<<hex<<c1<<" "<<"0x"<<c2<<".";
reportDecoderDiagnostic(os);
return c1;
}
return c1;
}

/*:72*//*74:*/
#line 3468 "annoyance-filter.w"

int Big5_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();

if((c1>=0)&&((c1&0x80)!=0)){
int c2= getNextEncodedByte();

if(c2==-1){
ostringstream os;

os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}
return(c1<<8)|c2;
}
return c1;
}

/*:74*//*76:*/
#line 3525 "annoyance-filter.w"

int Shift_JIS_MBCSdecoder::getNextDecodedChar(void){
/*78:*/
#line 3577 "annoyance-filter.w"

if(!pending.empty()){
int pc= pending[0];
pending= pending.substr(1);
return pc;
}
/*:78*/
#line 3527 "annoyance-filter.w"
;

int c1= getNextEncodedByte();

if(c1>=0){
/*77:*/
#line 3545 "annoyance-filter.w"

if(((c1>=0x81)&&(c1<=0x9F))||
((c1>=0xE0)&&(c1<=0xEF))||
((c1>=0xF0)&&(c1<=0xFC))){
int c2= getNextEncodedByte();

if(c2==-1){
ostringstream os;

os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}
if(!(((c2>=0x40)&&(c2<=0x7E))||
((c2>=0x80)&&(c2<=0xFC)))){
ostringstream os;

os<<name()<<"_MBCSdecoder: Invalid second byte in two byte character: "
"0x"<<setiosflags(ios::uppercase)<<hex<<c1<<" "<<"0x"<<c2<<".";
reportDecoderDiagnostic(os);
return-1;
}
return(c1<<8)|c2;
}

/*:77*/
#line 3532 "annoyance-filter.w"
;
/*79:*/
#line 3595 "annoyance-filter.w"

switch(c1){
case 0x80:
c1= '\\';
break;

case 0xFD:
c1= 0xA9;
break;

case 0xFE:
c1= 'T';
pending= "M";
break;

case 0xFF:
c1= '.';
pending= "..";
break;
}

/*:79*/
#line 3533 "annoyance-filter.w"
;
}
return c1;
}

/*:76*//*82:*/
#line 3667 "annoyance-filter.w"

int UCS_2_Unicode_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();
int c2= getNextEncodedByte();

if(c2==-1){
ostringstream os;

os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}
if(bigEndian){
c1= (c1<<8)|c2;
}else{
c1|= (c2<<8);
}
return c1;
}

/*:82*//*84:*/
#line 3707 "annoyance-filter.w"

int UTF_8_Unicode_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();

if(c1<0){
return c1;
}
string::size_type nbytes= 0;
unsigned int result;

if(c1<=0x7F){
result= c1;
nbytes= 1;
}else{
unsigned char chn= c1;











while((chn&0x80)!=0){
nbytes++;
chn<<= 1;
}
if(nbytes> 6){
ostringstream os;

os<<name()<<"_MBCSdecoder: Invalid first byte "<<
"0x"<<setiosflags(ios::uppercase)<<hex<<c1<<" in UTF-8 encoded string";
reportDecoderDiagnostic(os);
return-1;
}
result= c1&(0xFF>>(nbytes+1));
for(string::size_type i= 1;i<nbytes;i++){
c1= getNextEncodedByte();
if(c1<0){
ostringstream os;

os<<name()<<"_MBCSdecoder: Premature end of line in UTF-8 character.";
reportDecoderDiagnostic(os);
return-1;
}
if((c1&0xC0)!=0x80){
ostringstream os;

os<<name()<<"_MBCSdecoder: Bad byte 1--n signature in UTF-8 encoded sequence.";
reportDecoderDiagnostic(os);
}
result= (result<<6)|(c1&0x3F);
}
}
return result;
}

/*:84*//*86:*/
#line 3823 "annoyance-filter.w"

int UTF_16_Unicode_MBCSdecoder::getNextDecodedChar(void){
string::size_type nwydes= 0;
int w1,w2,result;

w1= getNextUTF_16Word();
if(w1<0){
return w1;
}

if((w1<=0xD800)||(w1> 0xDFFF)){
result= w1;
nwydes= 1;
}else if((w1>=0xD800)&&(w1<=0xDBFF)){
w2= getNextUTF_16Word();
if(w2<0){
ostringstream os;

os<<name()<<"_MBCSdecoder: Premature end of line in UTF-16 two word character.";
reportDecoderDiagnostic(os);
return-1;
}
nwydes= 2;
if((w2<0xDC00)||(w2> 0xDFFF)){
ostringstream os;

os<<name()<<"_MBCSdecoder: Invalid second word surrogate "<<
"0x"<<setiosflags(ios::uppercase)<<hex<<w2<<" in UTF-16 encoded string.";
reportDecoderDiagnostic(os);
return-1;
}
result= (((w1&0x3FF)<<10)|(w2&0x3FF))+0x10000;
}else{
ostringstream os;

os<<name()<<"_MBCSdecoder: Invalid first word surrogate "<<
"0x"<<setiosflags(ios::uppercase)<<hex<<w1<<" in UTF-16 encoded string.";
reportDecoderDiagnostic(os);
return-1;
}
return result;
}

/*:86*//*89:*/
#line 3922 "annoyance-filter.w"

string MBCSinterpreter::getNextDecodedChar(void){
assert(dp!=NULL);
int dc= dp->getNextDecodedChar();
if(dc<0){
return"";
}
if(dc<256){
string r(1,static_cast<char> (dc));
return r;
}
ostringstream os;
os.setf(ios::uppercase);
os<<prefix<<name()<<"-"<<hex<<dc<<dec<<suffix;
return os.str();
}

/*:89*//*90:*/
#line 3944 "annoyance-filter.w"

string MBCSinterpreter::decodeLine(const string&s){
string r= "",t;

setSource(s);
while((t= getNextDecodedChar())!=""){
r+= t;
}
return r;
}

/*:90*//*94:*/
#line 4015 "annoyance-filter.w"

string Shift_JIS_MBCSinterpreter::getNextDecodedChar(void){
assert(dp!=NULL);
int dc= dp->getNextDecodedChar();
if(dc<0){
return"";
}
if(dc<0xA1){
string r(1,static_cast<char> (dc));
return r;
}
ostringstream os;
os.setf(ios::uppercase);
if((dc>=0xA1)&&(dc<=0xDF)){
os<<"SJIS-K"<<hex<<dc<<dec;
}else{
os<<prefix<<"SJIS-"<<hex<<dc<<dec<<suffix;
}
return os.str();
}

/*:94*//*97:*/
#line 4082 "annoyance-filter.w"

string Unicode_MBCSinterpreter::getNextDecodedChar(void){
assert(dp!=NULL);
int dc= dp->getNextDecodedChar();
if(dc<0){
return"";
}
if(dc<=0xFF){
string r(1,static_cast<char> (dc));
return r;
}
ostringstream os;
os.setf(ios::uppercase);
if(((dc>=0x3200)&&(dc<0xD800))||
((dc>=0xF900)&&(dc<0xFAFF))){
os<<prefix<<"UCS-"<<hex<<dc<<dec<<suffix;
}else{
os<<"UCS-"<<hex<<dc<<dec;
}
return os.str();
}

/*:97*//*99:*/
#line 4177 "annoyance-filter.w"

unsigned char applicationStringParser::get8(void){
assert(mf!=NULL);
int ch= mf->nextByte();
if(ch==EOF){
eof= true;
}
return ch&0xFF;
}

/*:99*//*101:*/
#line 4279 "annoyance-filter.w"

void flashStream::readHeader(void){
sig[0]= get8();
sig[1]= get8();
sig[2]= get8();
if(isEOF()||(memcmp(sig,"FWS",3)!=0)){
error= true;
if(verbose){
cerr<<"Invalid signature in Flash animation file."<<endl;
}
return;
}
version= get8();
fileLength= get32();
getRect(&frameSize);
frameRate= get16();
frameCount= get16();
}

/*:101*//*102:*/
#line 4303 "annoyance-filter.w"

void flashStream::describe(ostream&os){
os<<"Flash animation version "<<
static_cast<unsigned int> (version)<<endl;
os<<"  File length: "<<fileLength<<" bytes."<<endl;
os<<"  Frame size:  X: "<<frameSize.xMin<<" - "<<
frameSize.xMax<<
" Y: "<<frameSize.yMin<<" - "<<
frameSize.yMax<<endl;
os<<"  Frame rate: "<<setprecision(5)<<(frameRate/256.0)<<
" fps."<<endl;
os<<"  Frame count: "<<frameCount<<endl;
}

/*:102*//*103:*/
#line 4327 "annoyance-filter.w"

bool flashStream::nextTag(void){
unsigned short s= get16();
unsigned long l;
if(isOK()){
tType= static_cast<tagType> (s>>6);
l= s&0x3F;
if(l==0x3F){
l= get32();
}
if(isOK()){
tDataLen= l;
return tType!=stagEnd;
}
}

tType= stagEnd;
tDataLen= 0;
return false;
}

/*:103*//*104:*/
#line 4356 "annoyance-filter.w"

void flashStream::ignoreTag(unsigned int lookedAhead){
if(isOK()){

for(unsigned int i= lookedAhead;isOK()&&(i<tDataLen);i++){
get8();
}
}
}

/*:104*//*105:*/
#line 4377 "annoyance-filter.w"

void flashStream::getString(string&s,int n){
s= "";
char ch;

if(n==-1){
while((ch= get8())!=0){
s+= ch;
}
}else{
while(n> 0){
ch= get8();
s+= ch;
n--;
}
}
}

/*:105*//*106:*/
#line 4400 "annoyance-filter.w"

void flashStream::getRect(rect*r){
initBits();
int nBits= static_cast<int> (getBits(5));
r->xMin= getSignedBits(nBits);
r->xMax= getSignedBits(nBits);
r->yMin= getSignedBits(nBits);
r->yMax= getSignedBits(nBits);
}

/*:106*//*107:*/
#line 4418 "annoyance-filter.w"

void flashStream::getMatrix(matrix*mat){
initBits();


if(getBits(1)){
int nBits= static_cast<int> (getBits(5));
mat->a= getSignedBits(nBits);
mat->d= getSignedBits(nBits);
}else{
mat->a= mat->d= 0x00010000L;
}


if(getBits(1)){
int nBits= static_cast<int> (getBits(5));
mat->b= getSignedBits(nBits);
mat->c= getSignedBits(nBits);
}else{
mat->b= mat->c= 0;
}


int nBits= static_cast<int> (getBits(5));
mat->tx= getSignedBits(nBits);
mat->ty= getSignedBits(nBits);
}

/*:107*//*109:*/
#line 4494 "annoyance-filter.w"

void flashStream::initBits(void){

bitPos= 0;
bitBuf= 0;
}


unsigned int flashStream::getBits(int n){
unsigned int v= 0;

while(true){
int s= n-bitPos;
if(s> 0){

v|= bitBuf<<s;
n-= bitPos;


bitBuf= get8();
bitPos= 8;
}else{

v|= bitBuf>>-s;
bitPos-= n;
bitBuf&= 0xFF>>(8-bitPos);

return v;
}
}
}


int flashStream::getSignedBits(const int n){
signed int v= static_cast<int> (getBits(n));


if(v&(1<<(n-1))){

v|= -1<<n;
}
return v;
}

/*:109*//*115:*/
#line 4791 "annoyance-filter.w"

bool flashTextExtractor::nextString(string&s){
if(!initialised){
initialised= true;
readHeader();
if(!isOK()){
if(verbose){
cerr<<"Invalid header in Flash application file."<<endl;
close();
while(!isEOF()){
get8();
}
return false;
}
}
}
while(true){
haveStrings:
/*116:*/
#line 4878 "annoyance-filter.w"

if(!strings.empty()){
s= strings.front();
strings.pop();
return true;
}

/*:116*/
#line 4809 "annoyance-filter.w"
;

while((!isEOF())&&(!isError())&&nextTag()){
unsigned int variant= 0;

switch(tType){
case stagDefineFont:
/*117:*/
#line 4890 "annoyance-filter.w"

{
#ifdef FLASH_PARSE_DEBUG
cout<<"DefineFont"<<endl;
#endif
unsigned short fontID= get16();
unsigned int offsetTable= get16();
#ifdef FLASH_PARSE_DEBUG
cout<<"  Font ID: "<<fontID<<endl;
cout<<"  Glyph count: "<<(offsetTable/2)<<endl;
#endif
fontGlyphCount.insert(make_pair(fontID,offsetTable/2));
ignoreTag(2*2);
}

/*:117*/
#line 4816 "annoyance-filter.w"
;
break;

case stagDefineFont2:
/*118:*/
#line 4910 "annoyance-filter.w"

{
#ifdef FLASH_PARSE_DEBUG
cout<<"DefineFont2"<<endl;
#endif
unsigned short fontID= get16();
get16();


unsigned int fontNameLen= get8();
string fontName;
getString(fontName,fontNameLen);
if(!textOnly){
strings.push(fontName);
}


unsigned int nGlyphs= get16();
fontGlyphCount.insert(make_pair(fontID,nGlyphs));
ignoreTag(2+2+1+fontNameLen+2);
}

/*:118*/
#line 4820 "annoyance-filter.w"
;
break;

case stagDefineFontInfo:
/*119:*/
#line 4943 "annoyance-filter.w"

{
#ifdef FLASH_PARSE_DEBUG
cout<<"DefineFontInfo"<<endl;
#endif
unsigned short fontID= get16();
unsigned int fontNameLen= get8();
string fontName;
getString(fontName,fontNameLen);
if(!textOnly){
strings.push(fontName);
}
fontFlags fFlags= static_cast<fontFlags> (get8());
map<unsigned short,unsigned short> ::iterator fp= fontGlyphCount.find(fontID);
if(fp==fontGlyphCount.end()){
if(verbose){
cerr<<"DefineFontInfo for font ID "<<fontID<<
" without previous DefineFont."<<endl;
}
ignoreTag(4);
}else{
unsigned nGlyphs= fp->second;
vector<unsigned short> *v= new vector<unsigned short> (nGlyphs);
fontMap.insert(make_pair(fontID,v));
fontInfoBits.insert(make_pair(fontID,fFlags));

for(unsigned int g= 0;g<nGlyphs;g++){
if(fFlags&fontWideCodes){
(*v)[g]= get16();
}else{
(*v)[g]= get8();
}
}
}
}

/*:119*/
#line 4824 "annoyance-filter.w"
;
break;

case stagDefineText2:
variant= 2;



case stagDefineText:
/*120:*/
#line 4986 "annoyance-filter.w"

{
#ifdef FLASH_PARSE_DEBUG
unsigned short textID= get16();
cout<<"DefineText.  ID = "<<textID<<endl;
#else
get16();
#endif
rect tr;
getRect(&tr);
matrix tm;
getMatrix(&tm);
unsigned short textGlyphBits= get8();
unsigned short textAdvanceBits= get8();
int fontId= -1;
map<unsigned short,vector<unsigned short> *> ::iterator fontp= fontMap.end();
map<unsigned short,unsigned short> ::iterator fgcp= fontGlyphCount.end();
unsigned int fGlyphs= 0;
fontFlags fFlags= static_cast<fontFlags> (0);

vector<unsigned short> *fontChars= NULL;



while(true){
unsigned int textRecordType= get8();
if(textRecordType==0){
break;
}

if(textRecordType&isTextControl){
#ifdef FLASH_PARSE_DEBUG
cout<<"Text control record."<<endl;
#endif
if(textRecordType&textHasFont){
fontId= get16();
#ifdef FLASH_PARSE_DEBUG
cout<<"    fontId: "<<fontId<<endl;
#endif
fgcp= fontGlyphCount.find(fontId);
if(fgcp==fontGlyphCount.end()){
fontp= fontMap.end();
if(verbose){
cerr<<"Flash DefineText item references undefined font ID "<<
fontId<<endl;
}
}else{
fGlyphs= fgcp->second;
fontChars= fontMap.find(fontId)->second;
fFlags= fontInfoBits.find(fontId)->second;
}
}
if(textRecordType&textHasColor){
#ifdef FLASH_PARSE_DEBUG
int r= get8();
int g= get8();
int b= get8();
if(variant==2){
int a= get8();
cout<<"    tfontColour: ("<<r<<","<<
g<<","<<b<<","<<a<<")"<<endl;
}else{
cout<<"    tfontColour: ("<<r<<","<<
g<<","<<b<<")"<<endl;
}
#else
skip8n(3);
#endif
}
if(textRecordType&textHasXOffset){
#ifdef FLASH_PARSE_DEBUG
int iXOffset= get16();
cout<<"    X offset "<<iXOffset<<endl;
#else
get16();
#endif
}
if(textRecordType&textHasYOffset){
#ifdef FLASH_PARSE_DEBUG
int iYOffset= get16();
cout<<"    Y offset "<<iYOffset<<endl;
#else
get16();
#endif
}
if(textRecordType&textHasFont){
#ifdef FLASH_PARSE_DEBUG
int iFontHeight= get16();
cout<<"    Font Height: "<<iFontHeight<<endl;
#else
get16();
#endif
}
}else{
#ifdef FLASH_PARSE_DEBUG
cout<<"Text glyph record."<<endl;
#endif
unsigned int nGlyphs= textRecordType&0x7F;

initBits();
string s= "";

for(unsigned int i= 0;i<nGlyphs;i++){
unsigned int iIndex= getBits(textGlyphBits);
#ifdef FLASH_PARSE_DEBUG
unsigned int iAdvance= getBits(textAdvanceBits);
cout<<"["<<iIndex<<","<<iAdvance<<"] "<<flush;
#else
getBits(textAdvanceBits);
#endif
if(fontId<0){
if(verbose){
cerr<<"Flash DefineText does not specify font."<<endl;
}
}else if(fgcp!=fontGlyphCount.end()){
if(iIndex>=fGlyphs){
if(verbose){
cerr<<"Flash DefineText glyph index "<<
iIndex<<" exceeds font size of "<<fGlyphs<<"."<<
endl;
}
}else{
if(fFlags&fontWideCodes){
unsigned int wc= (*fontChars)[iIndex];
s+= static_cast<char> ((wc>>8)&0xFF);
s+= static_cast<char> (wc&0xFF);
}else{
s+= static_cast<char> ((*fontChars)[iIndex]);
}
}
}
}
#ifdef FLASH_PARSE_DEBUG
cout<<endl;
cout<<"Decoded: ("<<s<<")"<<endl;
#endif
/*121:*/
#line 5133 "annoyance-filter.w"

if(fFlags&fontUnicode){
UCS_2_Unicode_MBCSdecoder mbd_ucs;
Unicode_MBCSinterpreter mbi_ucs;

mbi_ucs.setDecoder(mbd_ucs);
s= mbi_ucs.decodeLine(s);
}else if(fFlags&fontShiftJIS){
Shift_JIS_MBCSdecoder mbd_sjis;
Shift_JIS_MBCSinterpreter mbi_sjis;

mbi_sjis.setDecoder(mbd_sjis);
s= mbi_sjis.decodeLine(s);
}


/*:121*/
#line 5122 "annoyance-filter.w"
;
strings.push(s);
}
}
}

/*:120*/
#line 4833 "annoyance-filter.w"
;
break;

case stagDefineEditText:
/*122:*/
#line 5155 "annoyance-filter.w"

{
#ifdef FLASH_PARSE_DEBUG
cout<<"Edit text record."<<endl;
#endif
get16();
rect rBounds;
getRect(&rBounds);

unsigned int flags= get16();

#ifdef FLASH_PARSE_DEBUG
cout<<"DefineEditText.  Flags = 0x"<<hex<<flags<<dec<<endl;
#endif

if(flags&seditTextFlagsHasFont){
#ifdef FLASH_PARSE_DEBUG
unsigned short uFontId= get16();
unsigned short uFontHeight= get16();
cout<<"FontId: "<<uFontId<<"  FontHeight: "<<uFontHeight<<endl;
#else
get16();
get16();
#endif
}

if(flags&seditTextFlagsHasTextColor){
skip8n(4);
}

if(flags&seditTextFlagsHasMaxLength){
#ifdef FLASH_PARSE_DEBUG
int iMaxLength= get16();
printf("length:%d ",iMaxLength);
#else
get16();
#endif
}

if(flags&seditTextFlagsHasLayout){
skip8n(1+(2*4));
}

string varname;
getString(varname);
if(!textOnly){
strings.push(varname);
}

if(flags&seditTextFlagsHasText){
string s;
char c;

while((c= get8())!=0){
s+= c;
}
strings.push(s);
}
}

/*:122*/
#line 4837 "annoyance-filter.w"
;
break;

case stagFrameLabel:
/*123:*/
#line 5220 "annoyance-filter.w"

{
string s;

getString(s);
if(!textOnly){
strings.push(s);
}
}

/*:123*/
#line 4841 "annoyance-filter.w"
;
break;

case stagDoAction:
/*124:*/
#line 5235 "annoyance-filter.w"

{
#ifdef FLASH_PARSE_DEBUG
cout<<"Do action:"<<endl;
#endif
actionCode ac;

while(isOK()&&(ac= static_cast<actionCode> (get8()))!=sactionNone){
unsigned int dlen= 0;
if((ac&0x80)!=0){
dlen= get16();
}

switch(ac){
case sactionGetURL:
{
string url,target;

getString(url);
getString(target);
if(!textOnly){
strings.push(url);
}
strings.push(target);
}
break;

default:
if(dlen> 0){
skip8n(dlen);
}
#ifdef FLASH_PARSE_DEBUG
cout<<"  Skipping action code 0x"<<hex<<ac<<dec<<
" data length "<<dlen<<endl;
#endif
break;
}
}
}

/*:124*/
#line 4845 "annoyance-filter.w"
;
break;

default:
#ifdef FLASH_PARSE_DEBUG
cout<<"nextString ignoring tag type "<<getTagType()<<" data length: "<<
getTagDataLength()<<endl;
#endif
ignoreTag();
break;
}
if(!strings.empty()){
goto haveStrings;
}
}
if(strings.empty()){
break;
}
}
while(isOK()){
get8();
}
return false;
}

/*:115*//*126:*/
#line 5344 "annoyance-filter.w"

#ifdef HAVE_PDF_DECODER
bool pdfTextExtractor::nextString(string&s){
if(!initialised){
initialised= true;

/*127:*/
#line 5373 "annoyance-filter.w"

#ifdef HAVE_MKSTEMP
strcpy(tempfn,"PDF_decode_XXXXXX");
mkstemp(tempfn);
#else
tmpnam(tempfn);
#endif
ofstream pdfstr(tempfn,ios::out|ios::binary);
if(!pdfstr){
cerr<<"Cannot create PDF temporary file "<<tempfn<<endl;
error= eof= true;
return false;
}
while(isOK()){
pdfstr<<get8();
}
pdfstr.close();

/*:127*/
#line 5350 "annoyance-filter.w"
;
/*128:*/
#line 5396 "annoyance-filter.w"

string pdfcmd= "pdftotext ";
pdfcmd+= tempfn;
pdfcmd+= " -";
ip= popen(pdfcmd.c_str(),"r");
if(ip==NULL){
cerr<<"Cannot open pipe to pdftotext."<<endl;
error= eof= true;
return false;
}
is.attach(fileno(ip));
is.clear();

/*:128*/
#line 5351 "annoyance-filter.w"
;
}

if(ip==NULL){
return false;
}

if(getline(is,s)!=NULL){
return true;
}
close();
return false;
}
#endif

/*:126*//*130:*/
#line 5673 "annoyance-filter.w"

bool mailFolder::nextLine(string&s){
while(true){
bool decoderEOF= false;

if(lookedAhead){
s= lookAheadLine;
lookedAhead= false;
}else{
if(mdp!=NULL){
if((asp!=NULL)?(!asp->nextString(s)):(!(mdp->getDecodedLine(s)))){
if(asp!=NULL){
if(Annotate('d')){
ostringstream os;

os<<"Closing "<<asp->name()<<" application file decoder.";
reportParserDiagnostic(os);
}
asp->close();
asp= NULL;
}
s= mdp->getTerminatorSentinel();
decoderEOF= mdp->isEndOfFile();
if(decoderEOF){
s= "";
}
if(Annotate('d')){
ostringstream os;

os<<"Closing out "<<mdp->name()<<" decoder.  "<<
mdp->getEncodedLineCount()<<" lines decoded.";
reportParserDiagnostic(os);
os.str("");
os<<"End sentinel: "<<s;
reportParserDiagnostic(os);
}
/*142:*/
#line 6145 "annoyance-filter.w"

mimeContentType= mimeContentTypeCharset= 
mimeContentTypeName= mimeContentDispositionFilename= 
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;

/*:142*/
#line 5709 "annoyance-filter.w"
;
inPartHeader= 
!((s.substr(0,2)=="--")&&
(s.substr(2,partBoundary.length())==partBoundary)&&
(s.substr(partBoundary.length()+2,2)=="--"));
if((!inPartHeader)&&(!(partBoundaryStack.empty()))){
partBoundary= partBoundaryStack.top();
partBoundaryStack.pop();
}
}
}else{
if(!getline(*is,s)){
/*138:*/
#line 5996 "annoyance-filter.w"

#ifdef HAVE_DIRECTORY_TRAVERSAL
if(dirFolder){
if(ip!=NULL){
pclose(ip);
ip= NULL;
}else{
ifdir.close();
}
if(findNextFileInDirectory(cfName)){
continue;
}
}
#endif


/*:138*/
#line 5721 "annoyance-filter.w"
;
return false;
}
}
}
nLines++;
if((mdp==NULL)&&(tlist!=NULL)&&(!decoderEOF)){
tlist->push_back(s);
}
/*139:*/
#line 6058 "annoyance-filter.w"

#ifdef BSD_DIAG
if(s.substr(0,(sizeof messageSentinel)-1)==messageSentinel){
if(!BSDfolder&&!lastLineBlank){
cerr<<"*** NonBSD From line ditched: "<<s<<endl;
}
}
#endif
if(((s.substr(0,(sizeof messageSentinel)-1)==messageSentinel)&&
(BSDfolder||lastLineBlank))
||
(expectingNewMessage&&(s.length()> 0)&&(!isISOspace(s[0])))){
nMessages++;
newMessage= true;
expectingNewMessage= false;
inHeader= true;
multiPart= false;
inPartHeader= false;
partHeaderLines= 0;
bodyContentType= bodyContentTypeCharset= 
bodyContentTypeName= bodyContentTransferEncoding= "";
fromLine= s;
lastFromLine= lastMessageID= messageID= "";
while(!partBoundaryStack.empty()){
ostringstream os;

os<<"Orphaned part boundary on stack: \""<<partBoundaryStack.top()<<"\"";
reportParserDiagnostic(os);
partBoundaryStack.pop();
}
/*142:*/
#line 6145 "annoyance-filter.w"

mimeContentType= mimeContentTypeCharset= 
mimeContentTypeName= mimeContentDispositionFilename= 
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;

/*:142*/
#line 6088 "annoyance-filter.w"
;
}else{
newMessage= false;
}

/*:139*/
#line 5730 "annoyance-filter.w"
;
/*140:*/
#line 6101 "annoyance-filter.w"

while((s.length()> 0)&&(isISOspace(s[s.length()-1]))){
s.erase(s.length()-1);
}
setNewMessageEligiblity(s.empty());

/*:140*/
#line 5731 "annoyance-filter.w"
;
/*141:*/
#line 6117 "annoyance-filter.w"

if(inHeader||inPartHeader){
if(s==""){
if(inHeader){
if((!multiPart)&&(bodyContentTransferEncoding!="")){
mimeContentType= bodyContentType;
mimeContentTypeCharset= bodyContentTypeCharset;
mimeContentTypeName= bodyContentTypeName;
mimeContentTransferEncoding= bodyContentTransferEncoding;
multiPart= true;
partBoundary= "";
}
}
inHeader= inPartHeader= false;
/*153:*/
#line 6483 "annoyance-filter.w"

if(multiPart){
assert(mdp==NULL);

#ifdef TYPE_LOG





typeLog<<mimeContentType<<"\t"<<
mimeContentTypeCharset<<"\t"<<
mimeContentTransferEncoding<<endl;
#endif

/*154:*/
#line 6518 "annoyance-filter.w"

if(mimeContentType=="multipart/alternative"){
if(mimeContentTypeBoundary!=""){
partBoundaryStack.push(partBoundary);
partBoundary= mimeContentTypeBoundary;
}else{
if(Annotate('d')){
ostringstream os;

os<<"Boundary missing from Content-Type of multipart/alternative.";
reportParserDiagnostic(os);
}
}
}

/*:154*/
#line 6498 "annoyance-filter.w"
;

/*155:*/
#line 6538 "annoyance-filter.w"

#ifdef HAVE_PDF_DECODER
if(mimeContentType=="application/pdf"){
asp= &aspPdf;
}else
#endif
if((mimeContentType=="application/x-shockwave-flash")||
(mimeContentType=="image/vnd.rn-realflash")){
asp= &aspFlash;
}
if(asp!=NULL){
asp->setMailFolder(this);
if(Annotate('d')){
ostringstream os;

os<<"Activating "<<asp->name()<<" application file decoder.";
reportParserDiagnostic(os);
}
}

/*:155*/
#line 6500 "annoyance-filter.w"
;

/*156:*/
#line 6574 "annoyance-filter.w"

if((asp==NULL)&&(streamMinTokenLength> 0)&&
((mimeContentType.substr(0,12)=="application/")||
(((mimeContentType.substr(0,6)=="audio/")||
(mimeContentType.substr(0,6)=="image/"))&&
(isSpoofedExecutableFileExtension(mimeContentTypeName)||
isSpoofedExecutableFileExtension(mimeContentDispositionFilename))
)
)
){


if(Annotate('d')){
ostringstream os;

os<<"Activating byte stream parser for \""<<mimeContentType<<"\"";
reportParserDiagnostic(os);
}
byteStream= true;
}
/*:156*/
#line 6502 "annoyance-filter.w"
;

/*157:*/
#line 6604 "annoyance-filter.w"

if(Annotate('d')){
ostringstream os;

reportParserDiagnostic("");
os<<"mimeContentType: {"<<mimeContentType<<"}";
reportParserDiagnostic(os);
os.str("");
os<<"mimeContentTypeCharset: {"<<mimeContentTypeCharset<<"}";
reportParserDiagnostic(os);
os.str("");
os<<"mimeContentTransferEncoding: {"<<mimeContentTransferEncoding<<"}";
reportParserDiagnostic(os);
}

if((asp==NULL)&&
(mimeContentType.substr(0,6)=="image/")||
(mimeContentType.substr(0,6)=="video/")
){
smd.set(is,this,partBoundary,tlist);
mdp= &smd;
if(Annotate('d')){
ostringstream os;

os<<"Activating MIME sink decoder with sentinel: \""<<partBoundary<<
"\" due to Content-Type = "<<mimeContentType;
reportParserDiagnostic(os);
}
if(dlist){
dlist->push_back(Xfile+"-Decoder: Sink");
}
}

/*:157*/
#line 6504 "annoyance-filter.w"


/*158:*/
#line 6644 "annoyance-filter.w"

else if(byteStream||(asp!=NULL)||
(mimeContentType=="plain/txt")||
(mimeContentType.substr(0,5)=="text/")||
(mimeContentType=="message/rfc822")){

/*159:*/
#line 6669 "annoyance-filter.w"

bool gibberish= false;

if(mimeContentTypeCharset.substr(0,6)=="gb2312"){
mbd_euc.setMailFolder(this);
mbi_gb2312.setDecoder(mbd_euc);
mbi= &mbi_gb2312;
}

if(mimeContentTypeCharset=="big5"){
mbd_big5.setMailFolder(this);
mbi_big5.setDecoder(mbd_big5);
mbi= &mbi_big5;
}

if(mimeContentTypeCharset=="utf-8"){
mbd_utf_8.setMailFolder(this);
mbi_unicode.setDecoder(mbd_utf_8);
mbi= &mbi_unicode;
}

if(mimeContentTypeCharset=="euc-kr"){
mbd_euc.setMailFolder(this);
mbi_kr.setDecoder(mbd_euc);
mbi= &mbi_kr;
}

#ifdef CHECK_FOR_GIBBERISH_CHARACTER_SETS
if((mimeContentTypeCharset.length()==0)||
(mimeContentTypeCharset=="us-ascii")||
(mimeContentTypeCharset.substr(0,8)=="iso-8859")||
(mimeContentTypeCharset=="windows-1251")){
if(Annotate('d')){
ostringstream os;

os<<"Accepting part in Content-Type-Charset: "<<mimeContentTypeCharset<<"  ("<<
mimeContentType<<" "<<mimeContentTransferEncoding<<")";
reportParserDiagnostic(os);
}
}else{
if(Annotate('d')){
ostringstream os;

os<<"Rejecting part in Content-Type-Charset: "<<mimeContentTypeCharset<<"  ("<<
mimeContentType<<" "<<mimeContentTransferEncoding<<")";
reportParserDiagnostic(os);
}
gibberish= true;
}
#endif

/*:159*/
#line 6650 "annoyance-filter.w"
;

/*160:*/
#line 6733 "annoyance-filter.w"

if(!gibberish){
if((mimeContentTransferEncoding.length()==0)||
(mimeContentTransferEncoding.substr(0,4)=="7bit")||
(mimeContentTransferEncoding.substr(0,4)=="8bit")||
(mimeContentTransferEncoding=="ascii")){
imd.set(is,this,partBoundary,tlist);
mdp= &imd;
}else if(mimeContentTransferEncoding=="base64"){
bmd.set(is,this,partBoundary,tlist);
mdp= &bmd;
}else if(mimeContentTransferEncoding=="quoted-printable"){
qmd.set(is,this,partBoundary,tlist);
mdp= &qmd;
}else{
gibberish= true;
smd.set(is,this,partBoundary,tlist);
mdp= &smd;
}

assert(mdp!=NULL);
if(Annotate('d')){
ostringstream os;

os<<(gibberish?"Rejecting":"Accepting")<<
" part in Content-Transfer-Encoding: "<<mimeContentTransferEncoding<<"  ("<<
mimeContentTypeCharset<<" "<<mimeContentType<<")";
reportParserDiagnostic(os);
}
if(dlist){
dlist->push_back(Xfile+"-Decoder: "+mdp->name());
}
if(Annotate('d')){
ostringstream os;

os<<"Activating MIME "<<mdp->name()<<" decoder with sentinel: "<<partBoundary;
reportParserDiagnostic(os);
}
}

/*:160*/
#line 6652 "annoyance-filter.w"
;

/*161:*/
#line 6778 "annoyance-filter.w"

if(byteStream&&(mdp==NULL)){
if(Annotate('d')){
ostringstream os;

os<<"Canceling byte stream mode due to Content-Transfer-Encoding: {"<<
mimeContentTransferEncoding<<"}  ("<<
mimeContentTypeCharset<<" "<<mimeContentType<<")";
reportParserDiagnostic(os);
}
byteStream= false;
}

/*:161*/
#line 6654 "annoyance-filter.w"
;

/*162:*/
#line 6802 "annoyance-filter.w"

if(mimeContentType=="message/rfc822"){
/*142:*/
#line 6145 "annoyance-filter.w"

mimeContentType= mimeContentTypeCharset= 
mimeContentTypeName= mimeContentDispositionFilename= 
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;

/*:142*/
#line 6804 "annoyance-filter.w"
;
forceInHeader();
}

/*:162*/
#line 6656 "annoyance-filter.w"
;
}

/*:158*/
#line 6506 "annoyance-filter.w"
;
}

/*:153*/
#line 6131 "annoyance-filter.w"
;
}
/*143:*/
#line 6168 "annoyance-filter.w"

/*144:*/
#line 6216 "annoyance-filter.w"

bool isSpoofedHeader= false;
if(inHeader){
string sc= s,scx= Xfile;

stringCanonicalise(sc);
stringCanonicalise(scx);
scx+= '-';
if(sc.substr(0,scx.length())==scx){
if(tlist!=NULL){
tlist->pop_back();
}
isSpoofedHeader= true;
}
}

/*:144*/
#line 6169 "annoyance-filter.w"
;

while((inHeader||inPartHeader)&&getline(*is,lookAheadLine)!=NULL){
string lal= lookAheadLine;

while((lookAheadLine.length()> 0)&&(isISOspace(lookAheadLine[lookAheadLine.length()-1]))){
lookAheadLine.erase(lookAheadLine.length()-1);
}
if((lookAheadLine.length()> 0)&&isISOspace(lookAheadLine[0])){
string::size_type p= 1;
while(isISOspace(lookAheadLine[p])){
p++;
}
s+= lookAheadLine.substr(p);
if((tlist!=NULL)&&(!isSpoofedHeader)){
tlist->push_back(lal);
}
continue;
}
lookedAhead= true;
lookAheadLine= lal;
break;
}
if(isSpoofedHeader){
ostringstream os;

os<<"Spoofed header rejected: "<<s;
reportParserDiagnostic(os.str());
continue;
}

/*:143*/
#line 6133 "annoyance-filter.w"
;

/*145:*/
#line 6243 "annoyance-filter.w"

{
string arg;

if(inHeader&&compareHeaderField(s,"message-id",arg)){
messageID= arg;
lastMessageID= "";
}
}

/*:145*/
#line 6135 "annoyance-filter.w"
;
/*150:*/
#line 6421 "annoyance-filter.w"

string::size_type p,p1;
string arg;

if(inHeader&&compareHeaderField(s,"content-type",arg)){
string sc= s;
stringCanonicalise(sc);
if((p= sc.find("multipart/",13))!=string::npos){
if((p= sc.find("boundary=",p+10))!=string::npos){
if(s[p+9]=='\"'){
p1= sc.find("\"",p+10);
p+= 10;
}else{
p+= 9;
p1= sc.length()-p;
}
multiPart= true;
partBoundary= s.substr(p,(p1-p));
if(Annotate('d')){
ostringstream os;

os<<"Multi-part boundary: \""<<partBoundary<<"\"";
reportParserDiagnostic(os);
}
}
}
}

/*:150*/
#line 6136 "annoyance-filter.w"
;
/*146:*/
#line 6263 "annoyance-filter.w"

{
string arg,par;

if(compareHeaderField(s,"content-type",arg)){
if(parseHeaderArgument(s,"charset",par)){
stringCanonicalise(par);
bodyContentTypeCharset= par;
}
if(parseHeaderArgument(s,"name",par)){
bodyContentTypeName= par;
}
bodyContentType= arg;
}
if(inHeader&&compareHeaderField(s,"content-transfer-encoding",arg)){
bodyContentTransferEncoding= arg;
}
}

/*:146*/
#line 6137 "annoyance-filter.w"
;
/*147:*/
#line 6294 "annoyance-filter.w"

if(inHeader){
string sc= s;
string::size_type p,p1,p2,p3,p4;
char etype;
unsigned int ndecodes= 0;
string charset;

stringCanonicalise(sc);
p4= 0;
while(((p= sc.find("=?",p4))!=string::npos)){
p4= p+2;
if(((p1= sc.find("?q?",p4))!=string::npos)||
((p1= sc.find("?b?",p4))!=string::npos)){
charset= sc.substr(p4,p1-p4);
etype= sc[p1+1];
p4= p1+3;
if((p2= sc.find("?=",p4))!=string::npos){
p1+= 3;
p3= p2-p1;
string drt;
if(etype=='q'){
drt= quotedPrintableMIMEdecoder::decodeEscapedText(sc.substr(p1,p3),this);
}else{
assert(etype=='b');
drt= base64MIMEdecoder::decodeEscapedText(sc.substr(p1,p3),this);
}
/*148:*/
#line 6341 "annoyance-filter.w"

if(charset.substr(0,6)=="gb2312"){
EUC_MBCSdecoder mbd_euc;
GB2312_MBCSinterpreter mbi_gb2312;

mbd_euc.setMailFolder(this);
mbi_gb2312.setDecoder(mbd_euc);
drt= mbi_gb2312.decodeLine(drt);
}else if(charset=="big5"){
Big5_MBCSdecoder mbd_big5;
Big5_MBCSinterpreter mbi_big5;

mbd_big5.setMailFolder(this);
mbi_big5.setDecoder(mbd_big5);
drt= mbi_big5.decodeLine(drt);
}else if(charset=="utf-8"){
UTF_8_Unicode_MBCSdecoder mbd_utf_8;
Unicode_MBCSinterpreter mbi_unicode;

mbd_utf_8.setMailFolder(this);
mbi_unicode.setDecoder(mbd_utf_8);
drt= mbi_unicode.decodeLine(drt);
}else if(charset=="euc-kr"){
EUC_MBCSdecoder mbd_euc;
KR_MBCSinterpreter mbi_kr;

mbd_euc.setMailFolder(this);
mbi_kr.setDecoder(mbd_euc);
drt= mbi_kr.decodeLine(drt);
}else if((charset.substr(0,8)=="iso-8859")||
(charset=="us-ascii")){

}else{
ostringstream os;

os<<"Header line: no interpreter for ("<<charset<<") character set.";
reportParserDiagnostic(os.str());
}

/*:148*/
#line 6321 "annoyance-filter.w"
;
sc.replace(p,(p2-p)+2,drt);
p4= p+drt.length();
ndecodes++;
}
}
}
if(ndecodes> 0){
s= sc;
}
}

/*:147*/
#line 6138 "annoyance-filter.w"
;
}

/*:141*/
#line 5732 "annoyance-filter.w"
;
/*149:*/
#line 6384 "annoyance-filter.w"

if(multiPart&&inPartHeader){
string arg,par;

partHeaderLines++;
if(compareHeaderField(s,"content-type",arg)){
if(parseHeaderArgument(s,"charset",par)){
stringCanonicalise(par);
mimeContentTypeCharset= par;
}
if(parseHeaderArgument(s,"boundary",par)){
mimeContentTypeBoundary= par;
}
if(parseHeaderArgument(s,"name",par)){
mimeContentTypeName= par;
}
mimeContentType= arg;
}

if(compareHeaderField(s,"content-transfer-encoding",arg)){
mimeContentTransferEncoding= arg;
}

if(compareHeaderField(s,"content-disposition",arg)){
if(parseHeaderArgument(s,"filename",par)){
mimeContentDispositionFilename= par;
}
}
}

/*:149*/
#line 5733 "annoyance-filter.w"
;

/*151:*/
#line 6456 "annoyance-filter.w"

if(multiPart&&(!inHeader)&&
(partBoundary!="")&&
(s.substr(0,2)=="--")&&
(s.substr(2,partBoundary.length())==partBoundary)&&
(s.substr(partBoundary.length()+2)!="--")){
inPartHeader= true;
mimeContentType= mimeContentTypeCharset= mimeContentTypeBoundary= 
mimeContentTransferEncoding= "";
}

/*:151*/
#line 5735 "annoyance-filter.w"
;
/*152:*/
#line 6472 "annoyance-filter.w"

if((mbi!=NULL)&&(!inHeader)&&(!inPartHeader)){
s= mbi->decodeLine(s);
}

/*:152*/
#line 5736 "annoyance-filter.w"
;
return true;
}
}


/*:130*//*131:*/
#line 5751 "annoyance-filter.w"

int mailFolder::nextByte(void){
assert(mdp!=NULL);
int c= mdp->getDecodedChar();
if(c<0){
byteStream= false;
if(Annotate('d')){
ostringstream os;

os<<"End of byte stream.  Deactivating byte stream parser.";
reportParserDiagnostic(os);
}
}
return c;
}

/*:131*//*136:*/
#line 5903 "annoyance-filter.w"

#ifdef HAVE_DIRECTORY_TRAVERSAL
bool mailFolder::findNextFileInDirectory(string&fname){
assert(dirFolder);

if(dh==NULL){
return false;
}

while(true){
struct dirent*de;
struct stat fs;

de= readdir(dh);
if(de==NULL){
closedir(dh);
dh= NULL;
return false;
}
cfName= dirName+pathSeparator+de->d_name;
if(stat(cfName.c_str(),&fs)==0){
if(S_ISREG(fs.st_mode)){
fname= cfName;
return openNextFileInDirectory();
}
}else{
if(verbose){
cerr<<"Cannot get status of "<<cfName<<".  Skipping."<<endl;
}
}
}
}
#endif

/*:136*//*137:*/
#line 5943 "annoyance-filter.w"

#ifdef HAVE_DIRECTORY_TRAVERSAL
bool mailFolder::openNextFileInDirectory(void){
assert(dirFolder);

if(dh==NULL){
return false;
}

#ifdef COMPRESSED_FILES
string fname= cfName;
/*133:*/
#line 5818 "annoyance-filter.w"

#ifdef HAVE_READLINK
int maxSlinks= 50;

string jname= fname;
char slbuf[1024];
while(maxSlinks--> 0){
int sll= readlink(jname.c_str(),slbuf,(sizeof slbuf)-1);
if(sll>=0){
assert(sll<static_cast<int> (sizeof slbuf));
slbuf[sll]= 0;
jname= slbuf;
}else{
break;
}
}
if(maxSlinks<=0){
cerr<<"Warning: probable symbolic link loop for \""<<
fname<<"\""<<endl;
}
#endif

/*:133*/
#line 5954 "annoyance-filter.w"
;

if(jname.rfind(Compressed_file_type)==
(jname.length()-string(Compressed_file_type).length())){
string cmd(Uncompress_command);
cmd+= ' '+fname;
ip= popen(cmd.c_str(),"r");
#ifdef HAVE_FDSTREAM_COMPATIBILITY
ifcdir.attach(fileno(ip));
ifcdir.clear();
is= &ifcdir;
#else
ifdir.attach(fileno(ip));
ifdir.clear();
is= &ifdir;
#endif
}else{
#endif
ifdir.open(cfName.c_str());
if(!ifdir.is_open()){
if(verbose){
cerr<<"Unable to open mail folder directory file \""<<
cfName<<"\""<<endl;
}
return false;
}
ifdir.clear();
is= &ifdir;
#ifdef COMPRESSED_FILES
}
#endif
expectingNewMessage= true;
setNewMessageEligiblity();
return true;
}
#endif

/*:137*//*163:*/
#line 6815 "annoyance-filter.w"

void mailFolder::stringCanonicalise(string&s)
{
for(unsigned int i= 0;i<s.length();i++){
if(isISOupper(s[i])){
s[i]= toISOlower(s[i]);
}
}
}

/*:163*//*164:*/
#line 6831 "annoyance-filter.w"

bool mailFolder::compareHeaderField(string&s,const string target,string&arg)
{
if(s.length()> target.length()){
string sc= s;

stringCanonicalise(sc);
if((sc.substr(0,target.length())==target)&&
(sc[target.length()]==':')){
unsigned int i;

for(i= target.length()+1;i<sc.length();i++){
if(!isISOspace(sc[i])){
break;
}
}
if(i<sc.length()){
int n= 0;

while((i+n)<sc.length()){
if(isISOspace(sc[i+n])||(sc[i+n]==';')){
break;
}
n++;
}
arg= sc.substr(i,n);
}else{
arg= "";
}
return true;
}
}
return false;
}

/*:164*//*165:*/
#line 6872 "annoyance-filter.w"

bool mailFolder::parseHeaderArgument(string&s,const string target,string&arg)
{
if(s.length()> target.length()){
string sc= s;
string::size_type p,p1;

stringCanonicalise(sc);
if(((p= sc.find(target))!=string::npos)&&
(sc.length()> (p+target.length()))&&
(sc[p+target.length()]=='=')){
p+= target.length()+1;
if(p<s.length()){
if(s[p]=='"'){
if((p1= s.find('"',p+1))!=string::npos){
arg= s.substr(p+1,p1-(p+1));
return true;
}
}else{
string::size_type i= p;

for(;i<s.length();i++){
if(!isISOspace(s[i])){
break;
}
}
if(i<s.length()){
int n= 0;

while((i+n)<s.length()){
if((isISOspace(s[i+n]))||(s[i+n]==';')){
break;
}
n++;
}
arg= s.substr(i,n);
}else{
arg= "";
}
return true;
}
}
}
}
return false;
}

/*:165*//*166:*/
#line 6940 "annoyance-filter.w"

bool mailFolder::isSpoofedExecutableFileExtension(const string&s)
{
string sc= s;

stringCanonicalise(sc);
if((sc.length()> 4)&&(sc[sc.length()-4]=='.')){
string ext= sc.substr(sc.length()-3);
stringCanonicalise(ext);
return((ext=="exe")||
(ext=="bat")||
(ext=="scr")||
(ext=="lnk")||
(ext=="pif")||
(ext=="com"));
}
return false;
}

/*:166*//*167:*/
#line 6964 "annoyance-filter.w"

unsigned int mailFolder::sizeMessageTranscript(const unsigned int lineOverhead)const{
assert(tlist!=NULL);
unsigned int n= tlist->size(),totsize= 0;
if((n> 1)&&
(tlist->back().substr(0,(sizeof messageSentinel)-1)==messageSentinel)){
n--;
}
list<string> ::iterator p= tlist->begin();
for(unsigned int i= 0;i<n;i++){
totsize+= p->length()+lineOverhead;
p++;
}
return totsize;
}

/*:167*//*168:*/
#line 6989 "annoyance-filter.w"

void mailFolder::writeMessageTranscript(ostream&os)const{
assert(tlist!=NULL);
unsigned int n= tlist->size();
if((n> 1)&&
(tlist->back().substr(0,(sizeof messageSentinel)-1)==messageSentinel)){
n--;
}
list<string> ::iterator p= tlist->begin();
for(unsigned int i= 0;i<n;i++){
os<<*p++<<endl;;
}
}

void mailFolder::writeMessageTranscript(const string fname)const{
if(fname!="-"){
ofstream of(fname.c_str());
writeMessageTranscript(of);
of.close();
}else{
writeMessageTranscript(cout);
}
}

/*:168*//*169:*/
#line 7023 "annoyance-filter.w"

void mailFolder::reportParserDiagnostic(const string s){
if(verbose){
if((lastFromLine!=fromLine)||(lastMessageID!=messageID)){
cerr<<fromLine<<endl;
if(messageID!=""){
cerr<<"Message-ID: "<<messageID<<":"<<endl;
}
lastFromLine= fromLine;
lastMessageID= messageID;
}
cerr<<"    "<<s<<endl;
}
if(Annotate('p')||Annotate('d')){
parserDiagnostics.push(s);
}
}

void mailFolder::reportParserDiagnostic(const ostringstream&os){
reportParserDiagnostic(os.str());
}

/*:169*//*171:*/
#line 7151 "annoyance-filter.w"

void tokenDefinition::setISO_8859defaults(unsigned int lmin,unsigned int lmax){
clear();
setLengthLimits(lmin,lmax);
for(unsigned int c= 0;c<256;c++){
isToken[c]= (isascii(c)&&isdigit(c))||isISOalpha(c)||
(c=='-')||(c=='\'')||(c=='$');
notExclusively[c]= (isdigit(c)||(c=='-'))?1:0;
}
notAtEnd['-']= notAtEnd['\'']= true;
}


/*:171*//*172:*/
#line 7169 "annoyance-filter.w"

void tokenDefinition::setUS_ASCIIdefaults(unsigned int lmin,unsigned int lmax){
clear();
setLengthLimits(lmin,lmax);
for(unsigned int c= 0;c<128;c++){
isToken[c]= isalpha(c)||isdigit(c);
notExclusively[c]= (isdigit(c)||(c=='-'))?1:0;
}
isToken['_']= notExclusively['_']= true;
notAtEnd['-']= notAtEnd['\'']= true;
}


/*:172*//*174:*/
#line 7305 "annoyance-filter.w"

bool tokenParser::nextToken(dictionaryWord&d){
string token;

while(!atEnd){

/*175:*/
#line 7405 "annoyance-filter.w"

if(!pendingPhrases.empty()){
token= pendingPhrases.front();
pendingPhrases.pop_front();
d.set(token);
d.toLower();
if(pTokenTrace&&saveMessage){
messageQueue.push_back(string("  \"")+d.text+"\"");
}
return true;
}

/*:175*/
#line 7311 "annoyance-filter.w"
;

token= "";
string::size_type necount= 0;

if(source->isByteStream()){
/*178:*/
#line 7525 "annoyance-filter.w"

int b;

while((b= source->nextByte())>=0){


if(!(btd->isTokenMember(b))){
continue;
}



if(btd->isTokenNotAtEnd(b)){
continue;
}



if(btd->isTokenNotExclusively(b)){
necount++;
}

token+= static_cast<char> (b);
while(((b= source->nextByte())>=0)&&
btd->isTokenMember(b)
){
if(btd->isTokenNotExclusively(b)){
necount++;
}
token+= static_cast<char> (b);
}


while((token.length()> 0)&&
btd->isTokenNotAtEnd(ChIx(token[token.length()-1]))
){
token.erase(token.length()-1);
}



if(!(btd->isTokenLengthAcceptable(token))){
token= "";
continue;
}



if(necount==token.length()){
token= "";
continue;
}
d.set(token);
d.toLower();
/*180:*/
#line 7622 "annoyance-filter.w"

if(assemblePhrases){
assembleAllPhrases(d);
continue;
}

/*:180*/
#line 7579 "annoyance-filter.w"
;
if(pTokenTrace&&saveMessage){
messageQueue.push_back(string("  \"")+d.text+"\"");
}
return true;
}
continue;

/*:178*/
#line 7317 "annoyance-filter.w"
;
}


while((clp<cl.length())&&
(inHTMLcomment||
(!(td->isTokenMember(ChIx(cl[clp]))))
)){
/*176:*/
#line 7430 "annoyance-filter.w"

if(inHTML&&!inHTMLcomment&&(cl.substr(clp,4)==HTMLCommentBegin)){
inHTMLcomment= true;
clp+= 4;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment begin: "<<cl<<endl;
#endif
continue;
}
if(inHTML&&inHTMLcomment&&(cl.substr(clp,3)==HTMLCommentEnd)){
inHTMLcomment= false;
clp+= 3;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment end: "<<cl<<endl;
#endif
continue;
}
#ifdef HTML_COMMENT_DEBUG
if(inHTMLcomment){
cout<<cl[clp];
if(clp==(cl.length()-1)){
cout<<endl;
}
}
#endif

/*:176*/
#line 7325 "annoyance-filter.w"
;
/*177:*/
#line 7464 "annoyance-filter.w"

if(cl[clp]=='<'&&(clp<=(cl.length()-6))){
if((cl[clp+1]=='H'||cl[clp+1]=='h')&&
(cl[clp+5]=='>'||cl[clp+5]==' ')){
string tag;
for(int i= 1;i<5;i++){
tag+= (islower(cl[clp+i]))?toupper(cl[clp+i]):cl[clp+i];
}
if(tag=="HTML"){
inHTML= true;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ In HTML: "<<cl<<endl;
#endif
}
}
}

if(cl[clp]=='<'&&(clp<=(cl.length()-7))){
if((cl[clp+1]=='/')&&(cl[clp+2]=='H'||cl[clp+2]=='h')&&
(cl[clp+6]=='>')){
string tag;
for(int i= 2;i<6;i++){
tag+= (islower(cl[clp+i]))?toupper(cl[clp+i]):cl[clp+i];
}
if(tag=="HTML"){
inHTML= false;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ Out of HTML: "<<cl<<endl;
#endif
}
}
}

/*:177*/
#line 7326 "annoyance-filter.w"
;
clp++;
}


if(clp>=cl.length()){
nextLine();
continue;
}


if(td->isTokenNotAtEnd(ChIx(cl[clp]))){
clp++;
continue;
}



if(td->isTokenNotExclusively(ChIx(cl[clp]))){
necount++;
}
token+= cl[clp++];
while((clp<cl.length())){
if((!inHTMLcomment)&&(td->isTokenMember(ChIx(cl[clp])))){
if(td->isTokenNotExclusively(ChIx(cl[clp]))){
necount++;
}
token+= cl[clp++];
}else{
/*176:*/
#line 7430 "annoyance-filter.w"

if(inHTML&&!inHTMLcomment&&(cl.substr(clp,4)==HTMLCommentBegin)){
inHTMLcomment= true;
clp+= 4;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment begin: "<<cl<<endl;
#endif
continue;
}
if(inHTML&&inHTMLcomment&&(cl.substr(clp,3)==HTMLCommentEnd)){
inHTMLcomment= false;
clp+= 3;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment end: "<<cl<<endl;
#endif
continue;
}
#ifdef HTML_COMMENT_DEBUG
if(inHTMLcomment){
cout<<cl[clp];
if(clp==(cl.length()-1)){
cout<<endl;
}
}
#endif

/*:176*/
#line 7355 "annoyance-filter.w"
;
if(inHTMLcomment){
clp++;
continue;
}
break;
}
}


while((token.length()> 0)&&
td->isTokenNotAtEnd(ChIx(token[token.length()-1]))
){
token.erase(token.length()-1);
}



if(!(td->isTokenLengthAcceptable(token))){
continue;
}









if(necount==token.length()){
continue;
}

d.set(token);
d.toLower();
/*180:*/
#line 7622 "annoyance-filter.w"

if(assemblePhrases){
assembleAllPhrases(d);
continue;
}

/*:180*/
#line 7391 "annoyance-filter.w"
;
if(pTokenTrace&&saveMessage){
messageQueue.push_back(string("  \"")+d.text+"\"");
}
return true;
}
return false;
}

/*:174*//*181:*/
#line 7638 "annoyance-filter.w"

void tokenParser::assembleAllPhrases(dictionaryWord&d){
phraseQueue.push_back(d.text);
if(phraseQueue.size()> phraseMax){
phraseQueue.pop_front();
assert(phraseQueue.size()==phraseMax);
}

for(unsigned int p= phraseMin;p<=phraseMax;p++){
if(p<=phraseQueue.size()){
deque<string> ::const_reverse_iterator wp= phraseQueue.rbegin();
string phrase= "";
for(unsigned int i= 0;i<p;i++){

phrase= (*wp)+((phrase=="")?"":" ")+phrase;
wp++;
}
if((phraseLimit==0)||(phrase.length()<=phraseLimit)){
pendingPhrases.push_back(phrase);
}
}
}
}

/*:181*//*185:*/
#line 7771 "annoyance-filter.w"

double classifyMessage::classifyThis(bool createTranscript){
dictionaryWord dw;
double junkProb= -1;

if(createTranscript||(transcriptFilename!="")){
mf->setTranscriptList(&messageTranscript);
if(Annotate('p')||Annotate('d')){
saveParserDiagnostics= true;
}
}

/*187:*/
#line 7823 "annoyance-filter.w"

set<string> utokens;

while(tp.nextToken(dw)){
utokens.insert(dw.get());
}

/*:187*/
#line 7783 "annoyance-filter.w"
;

/*188:*/
#line 7851 "annoyance-filter.w"

multimap<double,string> rtokens;

for(set<string> ::iterator t= utokens.begin();t!=utokens.end();t++){
double pdiff;
dictionary::iterator dp;

if(fd->isDictionaryLoaded()){
pdiff= fd->find(*t);
if(pdiff<0){
pdiff= unknownWordProbability;
}
pdiff= abs(pdiff-0.5);
}else{
if(((dp= d->find(*t))!=d->end())&&
(dp->second.getJunkProbability()>=0)){
pdiff= abs(dp->second.getJunkProbability()-0.5);
}else{
pdiff= abs(unknownWordProbability-0.5);
}
}

rtokens.insert(make_pair(pdiff,*t));
}

/*:188*/
#line 7785 "annoyance-filter.w"
;

/*189:*/
#line 7885 "annoyance-filter.w"

unsigned int n= min(static_cast<multimap<double,string> ::size_type> (nExtremal),rtokens.size());
multimap<double,string> ::const_reverse_iterator rp= rtokens.rbegin();
double probP= 1,probQ= 1;
if(verbose){
cerr<<"Rank   Probability   Token"<<endl;
}

for(unsigned int i= 0;i<n;i++){
double p;

if(fd->isDictionaryLoaded()){
p= fd->find(rp->second);
if(p<0){
p= unknownWordProbability;
}
}else{
dictionary::iterator dp= d->find(rp->second);
p= ((dp==d->end())||(dp->second.getJunkProbability()<0))?
unknownWordProbability:dp->second.getJunkProbability();

}
if(verbose){
cerr<<setw(3)<<setiosflags(ios::right)<<(i+1)<<"      "<<
setw(9)<<setprecision(5)<<setiosflags(ios::left)<<p<<
"  "<<rp->second<<endl;
}
probP*= p;
probQ*= (1-p);
rp++;
}
junkProb= probP/(probP+probQ);
if(verbose){
cerr<<"ProbP = "<<probP<<", ProbQ = "<<probQ<<endl;
}

/*:189*/
#line 7787 "annoyance-filter.w"
;

if(tp.getSaveMessage()){
/*190:*/
#line 7927 "annoyance-filter.w"

ostringstream os;
list<string> ::iterator p;




for(p= tp.messageQueue.begin();p!=tp.messageQueue.end();p++){
if(p->length()==0){
break;
}
}

os<<Xfile<<"-Junk-Probability: "<<setprecision(5)<<junkProb;
tp.messageQueue.insert(p,os.str());
os.str("");

addSignificantWordDiagnostics(messageTranscript,p,rtokens);

/*:190*/
#line 7790 "annoyance-filter.w"
;
ofstream mdump(pDiagFilename.c_str());
tp.writeMessageQueue(mdump);
mdump.close();
}

if(createTranscript||(transcriptFilename!="")){
/*191:*/
#line 7968 "annoyance-filter.w"

ostringstream os;
list<string> ::iterator p;
string transEndl= "";




for(p= messageTranscript.begin();p!=messageTranscript.end();p++){
if(p->length()==0){
break;
}
if(*p=="\r"){
transEndl= "\r";
break;
}
}

double jp= junkProb;



if(jp<0.001){
jp= 0;
}
os<<Xfile<<"-Junk-Probability: "<<setprecision(3)<<jp<<transEndl;
messageTranscript.insert(p,os.str());
os.str("");
os<<Xfile<<"-Classification: ";
if(junkProb>=junkThreshold){
os<<"Junk";
}else if(junkProb<=mailThreshold){
os<<"Mail";
}else{
os<<"Indeterminate";
}
os<<transEndl;
messageTranscript.insert(p,os.str());

if(Annotate('w')){
addSignificantWordDiagnostics(messageTranscript,p,rtokens,transEndl);
}

if(Annotate('p')||Annotate('d')){
while(!parserDiagnostics.empty()){
ostringstream os;

os<<Xfile<<"-Parser-Diagnostic: "<<parserDiagnostics.front()<<transEndl;
messageTranscript.insert(p,os.str());
parserDiagnostics.pop();
}
}

/*:191*/
#line 7797 "annoyance-filter.w"
;
if(transcriptFilename!=""){
mf->writeMessageTranscript(transcriptFilename);
}
}

return junkProb;
}

/*:185*//*192:*/
#line 8027 "annoyance-filter.w"

void classifyMessage::addSignificantWordDiagnostics(list<string> &l,
list<string> ::iterator where,
multimap<double,string> &rtokens,string endLine){
unsigned int n= min(static_cast<multimap<double,string> ::size_type> (nExtremal),rtokens.size());
multimap<double,string> ::const_reverse_iterator rp= rtokens.rbegin();

for(unsigned int i= 0;i<n;i++){
dictionary::iterator dp= d->find(rp->second);
double wp= ((dp==d->end())||((dp->second.getJunkProbability()<0)))?
unknownWordProbability:dp->second.getJunkProbability();
ostringstream os;

os<<Xfile<<"-Significant-Word: "<<
setw(3)<<setiosflags(ios::right)<<(i+1)<<"  "<<
setw(8)<<setprecision(5)<<setiosflags(ios::left)<<wp<<
"  \""<<rp->second<<"\""<<endLine;
l.insert(where,os.str());
os.str("");
rp++;
}
}

/*:192*//*197:*/
#line 8174 "annoyance-filter.w"

#ifdef POP3_PROXY_SERVER
bool POP3Proxy::acceptConnections(int maxBacklog){
struct sockaddr_in name;

listenSocket= socket(AF_INET,SOCK_STREAM,0);
if(listenSocket<0){
perror("POP3Proxy opening socket to listen for connections");
listenSocket= -1;
return false;
}



name.sin_family= AF_INET;
name.sin_addr.s_addr= INADDR_ANY;
name.sin_port= htons(popProxyPort);
if(bind(listenSocket,(struct sockaddr*)&name,sizeof name)<0){
close(listenSocket);
perror("POP3Proxy binding socket to listen for connections");
listenSocket= -1;
return false;
}

if(listen(listenSocket,maxBacklog)<0){
close(listenSocket);
perror("POP3Proxy calling listen for connection socket");
listenSocket= -1;
return false;
}

signal(SIGPIPE,absentPlumber);
opened= true;
return opened;
}
#endif

/*:197*//*198:*/
#line 8216 "annoyance-filter.w"

#ifdef POP3_PROXY_SERVER
bool POP3Proxy::serviceConnection(void){
assert(opened);

int clientSocket;
struct sockaddr_in from;
socklen_t fromlen;

/*199:*/
#line 8239 "annoyance-filter.w"

errno= 0;
do{
fromlen= sizeof from;
clientSocket= accept(listenSocket,(struct sockaddr*)&from,&fromlen);
if(clientSocket>=0){
break;
}
}while(errno==EINTR);
if(clientSocket<0){
perror("POP3Proxy accepting connection from client");
return false;
}
if(verbose){
cout<<"Accepting POP3 connection from "<<inet_ntoa(from.sin_addr)<<endl;
}

/*:199*/
#line 8225 "annoyance-filter.w"
;

/*200:*/
#line 8260 "annoyance-filter.w"

int clientLength,serverLength;
char clientBuffer[POP_BUFFER],serverBuffer[POP_BUFFER];
int serverSocket;
u_int32_t serverIP;
struct hostent*h;
int cstat= -1;
bool ok= true;
string command,argument,reply;

/*201:*/
#line 8286 "annoyance-filter.w"

if(isdigit(serverName[0])&&(serverIP= inet_addr(serverName.c_str()))!=static_cast<u_int32_t> (-1)){
cstat= 0;
}else{
h= gethostbyname(serverName.c_str());
if(h!=NULL){
memcpy(&serverIP,h->h_addr,sizeof serverIP);
cstat= 0;
}else{
cerr<<"POP3Proxy: POP3 server "<<serverName.c_str()<<" unknown."<<endl;
close(clientSocket);
return false;
}
}

/*:201*/
#line 8270 "annoyance-filter.w"
;
/*202:*/
#line 8306 "annoyance-filter.w"

struct sockaddr_in serverHost;
serverHost.sin_family= AF_INET;

serverSocket= socket(AF_INET,SOCK_STREAM,0);
if(serverSocket<0){
perror("POP3Proxy opening socket to POP server");
cstat= -1;
}else{
if(popProxyTrace){
cerr<<"POP3: serverSocket opened."<<endl;
}
serverHost.sin_port= htons(serverPort);
memcpy((char*)&serverHost.sin_addr.s_addr,(char*)(&serverIP),
sizeof serverHost.sin_addr.s_addr);

errno= 0;
do{
cstat= connect(serverSocket,(struct sockaddr*)&(serverHost),sizeof serverHost);
if(popProxyTrace){
cerr<<"POP3: serverSocket connected."<<endl;
}
if(cstat==0){
if(popProxyTrace){
cerr<<"POP3: Connected to POP server on "<<inet_ntoa(serverHost.sin_addr)<<
":"<<ntohs(serverHost.sin_port)<<endl;
}
break;
}else{
perror("POP3Proxy connection to POP server failed");
}
}while(errno==EINTR);

if(cstat<0){
cerr<<"POP3Proxy: Cannot connect to POP3 server "<<serverName.c_str()<<endl;
}
}

/*:202*/
#line 8271 "annoyance-filter.w"
;
/*203:*/
#line 8350 "annoyance-filter.w"

serverLength= recv(serverSocket,serverBuffer,POP_MAX_MESSAGE,0);
if(serverLength<0){
perror("POP3Proxy reading greeting from server");
ok= false;
}else{
clientLength= send(clientSocket,serverBuffer,serverLength,0);
if(clientLength<0){
perror("POP3Proxy forwarding greeting to client");
ok= false;
}
}

/*:203*/
#line 8272 "annoyance-filter.w"
;
/*204:*/
#line 8375 "annoyance-filter.w"

while(ok){

/*205:*/
#line 8400 "annoyance-filter.w"

if(popProxyTrace){
cerr<<"POP3: Reading request from client."<<endl;
}
clientLength= recv(clientSocket,clientBuffer,POP_MAX_MESSAGE,0);
if(popProxyTrace){
cerr<<"POP3: Read "<<clientLength<<" request bytes from client."<<endl;
}
if(clientLength<=0){
break;
}

/*:205*/
#line 8378 "annoyance-filter.w"
;
/*206:*/
#line 8421 "annoyance-filter.w"

if(isspace(clientBuffer[0])){
continue;
}

/*:206*/
#line 8379 "annoyance-filter.w"
;
/*207:*/
#line 8429 "annoyance-filter.w"

serverLength= send(serverSocket,clientBuffer,clientLength,0);
if(serverLength!=clientLength){
perror("POP3Proxy forwarding request to server");
break;
}

/*:207*/
#line 8380 "annoyance-filter.w"
;
/*208:*/
#line 8445 "annoyance-filter.w"

while((clientLength> 0)&&isspace(clientBuffer[clientLength-1])){
clientLength--;
}
command= argument= "";
int i;
for(i= 0;i<clientLength;i++){
if(isspace(clientBuffer[i])){
break;
}
char ch= clientBuffer[i];
if(isalpha(ch)&&isupper(ch)){
ch= tolower(ch);
}
command+= ch;
}

while((i<clientLength)&&isspace(clientBuffer[i])){
i++;
}

if(i<clientLength){
argument= string(clientBuffer+i,clientLength-i);
}

if(popProxyTrace){
cerr<<"POP3: Client command ("<<command<<")  Argument ("<<argument<<")"<<endl;
}

/*:208*/
#line 8381 "annoyance-filter.w"
;
/*209:*/
#line 8479 "annoyance-filter.w"

serverLength= 0;
int rl= -1;
while(true){
rl= recv(serverSocket,serverBuffer+serverLength,1,0);
if(rl<0){
perror("POP3Proxy reading request status from server");
break;
}
serverLength++;
if(serverBuffer[serverLength-1]=='\n'){
break;
}
if(serverLength>=POP_MAX_MESSAGE){
cerr<<"POP3Proxy reply from server too long."<<endl;
rl= -1;
break;
}
}
if(rl<0){
break;
}
if(popProxyTrace){
cerr<<"POP3: Server reply is "<<serverLength<<" bytes"<<endl;
}

/*:209*/
#line 8382 "annoyance-filter.w"
;
/*210:*/
#line 8516 "annoyance-filter.w"

reply= "";
if((serverBuffer[0]=='+')&&
((multiLine.find(command)!=multiLine.end())||
((argument=="")&&(cMultiLine.find(command)!=cMultiLine.end())))){
int bll;
char bp[POP_BUFFER];

if(popProxyTrace){
cerr<<"POP3: Reading multi-line reply from server."<<endl;
}
do{
bll= recv(serverSocket,bp,POP_MAX_MESSAGE,0);
if(bll<0){
perror("POP3Proxy reading multi-line reply to request from server");
break;
}
#ifdef POP3_TRACE_TRANSFER_DETAIL
if(popProxyTrace){
cerr<<"POP3: Appending "<<bll<<" bytes to multi-line reply."<<endl;
}
#endif
reply+= string(bp,bll);
}while((reply.length()<3)||
((reply!=".\r\n")&&
(reply.substr(reply.length()-5)!="\r\n.\r\n")));
}

/*:210*/
#line 8383 "annoyance-filter.w"
;

/*211:*/
#line 8553 "annoyance-filter.w"

if(popProxyTrace){
cerr<<"POP3: Calling filter function."<<endl;
}
if(filterFunction!=NULL){
serverBuffer[serverLength]= 0;
filterFunction(command,argument,serverBuffer,&serverLength,reply);
}
if(popProxyTrace){
cerr<<"POP3: Returned from filter function."<<endl;
}

/*:211*/
#line 8385 "annoyance-filter.w"
;

/*212:*/
#line 8571 "annoyance-filter.w"

clientLength= send(clientSocket,serverBuffer,serverLength,0);
if(clientLength!=serverLength){
perror("POP3Proxy relaying status of request to client");
break;
}
if(popProxyTrace){
cerr<<"POP3: Relaying "<<serverLength<<
" byte status line to client: "<<serverBuffer;
if((serverLength==0)||(serverBuffer[serverLength-1])!='\n'){
cerr<<endl;
}
}

/*:212*/
#line 8387 "annoyance-filter.w"
;
/*213:*/
#line 8592 "annoyance-filter.w"

if(reply!=""){
if(popProxyTrace){
cerr<<"POP3: Relaying "<<reply.length()<<" byte multi-line reply to client."<<endl;
}

#ifdef POP3_MAX_CLIENT_WRITE
clientLength= 0;
int rpl= reply.length();

while(clientLength<((int)reply.length())){
int bcl,pcl;

bcl= min(rpl,POP3_MAX_CLIENT_WRITE);
#ifdef POP3_TRACE_TRANSFER_DETAIL
if(popProxyTrace){
cerr<<"POP3: Writing "<<bcl<<" bytes of multi-line reply to client."<<endl;
}
#endif
pcl= send(clientSocket,reply.data()+clientLength,bcl,0);
if(pcl!=bcl){
if(popProxyTrace){
cerr<<"POP3: Error writing "<<bcl<<" bytes: wrote "<<pcl<<" bytes."<<endl;
}
break;
}
clientLength+= pcl;
rpl-= pcl;
}
#else
clientLength= send(clientSocket,reply.data(),reply.length(),0);
#endif

if(clientLength!=static_cast<int> (reply.length())){
perror("POP3Proxy relaying multi-line reply to request to client");
break;
}
#ifdef POP3_TRACE_TRANSFER_DETAIL
if(popProxyTrace){
cerr<<"POP3: <<<<<< Relaying "<<reply.length()<<" byte multi-line reply body to client. >>>>>>"<<endl;
cerr<<reply;
cerr<<"POP3: <<<<<< End multi-line reply body. >>>>>>"<<endl;
}
#endif
}

/*:213*/
#line 8388 "annoyance-filter.w"
;

if(command=="quit"){
break;
}
}

/*:204*/
#line 8273 "annoyance-filter.w"
;
/*214:*/
#line 8644 "annoyance-filter.w"

close(clientSocket);
close(serverSocket);
if(verbose){
cerr<<"Closing POP3 connection from "<<inet_ntoa(from.sin_addr)<<endl;
}

/*:214*/
#line 8274 "annoyance-filter.w"
;

/*:200*/
#line 8227 "annoyance-filter.w"
;

return true;
}
#endif

/*:198*//*215:*/
#line 8657 "annoyance-filter.w"

#ifdef POP3_PROXY_SERVER
bool POP3Proxy::operateProxyServer(int maxBacklog){
if(acceptConnections(maxBacklog)){
while(serviceConnection());
}
return false;
}
#endif

/*:215*/
#line 9905 "annoyance-filter.w"

/*225:*/
#line 8869 "annoyance-filter.w"

static dictionary dict;
static fastDictionary fDict;

/*:225*//*249:*/
#line 9822 "annoyance-filter.w"

static tokenDefinition isoToken;
static tokenDefinition asciiToken;

/*:249*/
#line 9906 "annoyance-filter.w"

/*184:*/
#line 7748 "annoyance-filter.w"

classifyMessage::classifyMessage(mailFolder&m,
dictionary&dt,fastDictionary*fdt,
unsigned int nExt,double uwp){
mf= &m;
tp.setSource(m);
tp.setTokenDefinition(isoToken,asciiToken);
tp.setTokenLengthLimits(maxTokenLength,minTokenLength,
streamMaxTokenLength,streamMinTokenLength);
if(pDiagFilename.length()> 0){
tp.setSaveMessage(true);
}
d= &dt;
fd= fdt;
nExtremal= nExt;
unknownWordProbability= uwp;
}

/*:184*//*229:*/
#line 8960 "annoyance-filter.w"

static void updateProbability(void)
{
dict.computeJunkProbability(messageCount[dictionaryWord::Mail],messageCount[dictionaryWord::Junk],
mailBias,minOccurrences);
singleDictionaryRead= false;
}

/*:229*//*230:*/
#line 8972 "annoyance-filter.w"

static void printDictionary(ostream&os= cout)
{
updateProbability();
os<<"Dictionary contains "<<dict.size()<<" unique tokens."<<endl;
for(dictionary::iterator dp= dict.begin();dp!=dict.end();dp++){
dp->second.describe(os);
}
}

/*:230*//*231:*/
#line 8986 "annoyance-filter.w"

static double classifyMessages(const char*fname)
{
double jp;

if(dict.empty()&&!fDict.isDictionaryLoaded()){
cerr<<"You cannot --classify or --test a message "
"unless you have first loaded a dictionary."<<endl;
jp= 0.5;
}else{
mailFolder mf(fname,dictionaryWord::Mail);

classifyMessage cm(mf,dict,&fDict,significantWords,novelWordProbability);

jp= cm.classifyThis();
if(verbose){
cerr<<"Message junk probability: "<<setprecision(5)<<jp<<endl;
}
}
nTested++;
return jp;
}

/*:231*//*242:*/
#line 9213 "annoyance-filter.w"

static void usage(void)
{
cout<<PRODUCT<<"  --  Annoyance Filter.  Call"<<endl;
cout<<"                      with "<<PRODUCT<<" [options]"<<endl;
cout<<""<<endl;
cout<<"Options:"<<endl;
cout<<"    --annotate options     Specify optional annotations in --transcript"<<endl;
cout<<"    --autoprune n          Automatically prune unique words when dictionary exceeds n bytes"<<endl;
cout<<"    --biasmail n           Set frequency bias for words and phrases in legitimate mail to n"<<endl;
cout<<"    --binword n            Scan binary streams for words >= n characters (0 = none)"<<endl;
cout<<"    --bsdfolder            Next --mail or --junk folder uses BSD \"From \" separator"<<endl;
cout<<"    --classify fname       Classify first message in fname"<<endl;
cout<<"    --clearjunk            Clear junk counts in dictionary"<<endl;
cout<<"    --clearmail            Clear mail counts in dictionary"<<endl;
cout<<"    --copyright            Print copyright information"<<endl;
cout<<"    --csvread fname        Import dictionary from fname in CSV format"<<endl;
cout<<"    --csvwrite fname       Export dictionary to fname in CSV format"<<endl;
cout<<"    --fread fname          Load fast dictionary from fname"<<endl;
cout<<"    --fwrite fname         Write fast dictionary to fname"<<endl;
cout<<"    --help, -u             Print this message"<<endl;
#ifdef Jig
cout<<"    --jig                  Test component in temporary jig"<<endl;
#endif
cout<<"    --junk, -j folder      Add folder contents to junk mail dictionary"<<endl;
cout<<"    --list                 Print dictionary on standard output"<<endl;
cout<<"    --mail, -m folder      Add folder contents to legitimate mail dictionary"<<endl;
cout<<"    --newword n            Set probability for words not in dictionary to n"<<endl;
cout<<"    --pdiag fname          Print parser diagnostics to fname"<<endl;
cout<<"    --phraselimit n        Set phrase maximum length to n characters"<<endl;
cout<<"    --phrasemax n          Set phrase maximum to n words"<<endl;
cout<<"    --phrasemin n          Set phrase minimum to n words"<<endl;
#ifdef HAVE_PLOT_UTILITIES
cout<<"    --plot fname           Plot histogram of word probabilities in dictionary"<<endl;
#endif
#ifdef POP3_PROXY_SERVER
cout<<"    --pop3port n           Listen for POP3 proxy requests on port n (default 9110)"<<endl;
cout<<"    --pop3server serv[:p]  Operate POP3 proxy for server, port p (default 110)"<<endl;
cout<<"    --pop3trace            Trace POP3 proxy traffic on standard error"<<endl;
#endif
cout<<"    --prune                Prune infrequently used words from dictionary"<<endl;
cout<<"    --ptrace               Include detailed trace in --pdiag output"<<endl;
cout<<"    --read, -r fname       Import dictionary from fname"<<endl;
cout<<"    --sigwords n           Classify message based on n most significant words"<<endl;
cout<<"    --statistics           Print statistics of dictionary"<<endl;
cout<<"    --test, -t fname       Test first message in fname"<<endl;
cout<<"    --threshjunk n         Set junk threshold to n"<<endl;
cout<<"    --threshmail n         Set mail threshold to n"<<endl;
cout<<"    --transcript fname     Write annotated message transcript to fname"<<endl;
cout<<"    --verbose, -v          Print processing information"<<endl;
cout<<"    --version              Print version number"<<endl;
cout<<"    --write fname          Export dictionary to fname"<<endl;
cout<<""<<endl;
cout<<"by John Walker"<<endl;
cout<<"http://www.fourmilab.ch/"<<endl;
}

/*:242*/
#line 9907 "annoyance-filter.w"

/*219:*/
#line 8718 "annoyance-filter.w"

#ifdef POP3_PROXY_SERVER
void popFilter(const string command,const string argument,char*replyBuffer,int*replyLength,string&reply){
if((command=="retr")&&((*replyLength)> 0)&&(replyBuffer[0]=='+')){

/*220:*/
#line 8784 "annoyance-filter.w"

istrstream is(reply.data(),reply.length());

mailFolder mf(is,dictionaryWord::Mail);
mf.forceInHeader();

/*:220*/
#line 8723 "annoyance-filter.w"
;
/*221:*/
#line 8798 "annoyance-filter.w"

classifyMessage cm(mf,dict,&fDict,significantWords,novelWordProbability);
double jp= cm.classifyThis(true);
if(verbose){
cerr<<"Message junk probability: "<<setprecision(5)<<jp<<endl;
}

/*:221*/
#line 8724 "annoyance-filter.w"
;

#define not_POPFILTER_TRACE
#ifdef POPFILTER_TRACE
cerr<<"Classification done."<<endl;
#endif
#ifdef OLDWAY
ostringstream os;
#else
unsigned int mtl= mf.sizeMessageTranscript();
#ifdef POPFILTER_TRACE
cerr<<"Message transcript predicted size: "<<mtl<<endl;
#endif
char*mtbuf= new char[mtl+16];
ostrstream os(mtbuf,mtl+16);
#endif
mf.writeMessageTranscript(os);
#ifdef POPFILTER_TRACE
cerr<<"Transcript written."<<endl;
#endif
mf.clearMessageTranscript();
#ifdef POPFILTER_TRACE
cerr<<"Transcript cleared."<<endl;
cerr<<"Message transcript actual size: "<<os.tellp()<<endl;
#endif
reply.erase();
#ifndef OLDWAY
os<<'\0';
#endif
reply= os.str();
#ifdef POPFILTER_TRACE
cerr<<"Reply string length: "<<reply.length()<<endl;
#endif
#ifndef OLDWAY
delete mtbuf;
#endif
#ifdef POPFILTER_TRACE
cerr<<"Reply created."<<endl;
#endif
/*222:*/
#line 8822 "annoyance-filter.w"

ostringstream rs;
rs<<"+OK "<<(reply.length()-3)<<" octets\r\n";
memcpy(replyBuffer,rs.str().data(),rs.str().length());
*replyLength= rs.str().length();

/*:222*/
#line 8763 "annoyance-filter.w"
;
#ifdef POPFILTER_TRACE
cerr<<"Reply length modification done."<<endl;
#endif 
}
}
#endif

/*:219*//*227:*/
#line 8886 "annoyance-filter.w"

static void addFolder(const char*fname,dictionaryWord::mailCategory cat)
{
if(verbose){
cerr<<"Adding "<<(bsdFolder?"BSD ":"")<<"folder "<<
fname<<" as "<<dictionaryWord::categoryName(cat)<<":"<<endl;
}

mailFolder mf(fname,cat);
mf.setBSDmode(bsdFolder);
bsdFolder= false;
tokenParser tp;

tp.setSource(mf);
tp.setTokenDefinition(isoToken,asciiToken);
tp.setTokenLengthLimits(maxTokenLength,minTokenLength,
streamMaxTokenLength,streamMinTokenLength);
if(pDiagFilename.length()> 0){
tp.setSaveMessage(true);
}
dictionaryWord dw;
unsigned int ntokens= 0;

while(tp.nextToken(dw)){
dict.add(dw,mf.getCategory());
ntokens++;
/*228:*/
#line 8936 "annoyance-filter.w"

if((autoPrune!=0)&&(dict.estimateMemoryRequirement()> autoPrune)){
if(verbose){
cerr<<"Dictionary size "<<dict.estimateMemoryRequirement()<<
"; starting automatic prune."<<endl;
}
dict.purge(1);
if(dict.estimateMemoryRequirement()> ((autoPrune*9)/10)){
cerr<<"Dictionary size after --autoprune is larger than 90%"<<endl;
cerr<<"of --autoprune setting of "<<autoPrune<<" bytes."<<endl;
autoPrune= static_cast<unsigned int> (autoPrune*1.25);
cerr<<"Increasing --autoprune threshold 25% to "<<autoPrune<<
" to avoid thrashing."<<endl;
}
}

/*:228*/
#line 8912 "annoyance-filter.w"
;
}
messageCount[mf.getCategory()]+= mf.getMessageCount();

if(verbose){
cerr<<"  Added "<<mf.getMessageCount()<<" messages, "<<
ntokens<<" tokens in "<<mf.getLineCount()<<" lines."<<endl;
cerr<<"  Dictionary contains "<<dict.size()<<" unique tokens."<<endl;
cerr<<"  Dictionary size "<<dict.estimateMemoryRequirement()<<" bytes."<<endl;
}
}

/*:227*//*246:*/
#line 9772 "annoyance-filter.w"

static unsigned int nOptionalCaps= 0;

static void printOptionalCapability(const string&s)
{
if(nOptionalCaps==0){
cout<<"Optional capabilities configured:"<<endl;
nOptionalCaps++;
}
cout<<"    "<<s<<"."<<endl;
}

/*:246*/
#line 9908 "annoyance-filter.w"

/*223:*/
#line 8834 "annoyance-filter.w"


/*253:*/
#line 9883 "annoyance-filter.w"

#ifdef Jig
#endif

/*:253*/
#line 8836 "annoyance-filter.w"
;

int main(int argc,char*argv[])
{
int opt;

/*224:*/
#line 8857 "annoyance-filter.w"

memset(messageCount,0,sizeof messageCount);
isoToken.setISO_8859defaults(minTokenLength,maxTokenLength);
asciiToken.setUS_ASCIIdefaults(streamMinTokenLength,streamMaxTokenLength);

/*:224*/
#line 8842 "annoyance-filter.w"
;

/*243:*/
#line 9278 "annoyance-filter.w"



static const struct option long_options[]= {
{"annotate",1,NULL,222},
{"autoprune",1,NULL,232},
{"biasmail",1,NULL,225},
{"binword",1,NULL,221},
{"bsdfolder",0,NULL,231},
{"classify",1,NULL,209},
{"clearjunk",0,NULL,215},
{"clearmail",0,NULL,216},
{"copyright",0,NULL,200},
{"csvread",1,NULL,205},
{"csvwrite",1,NULL,207},
{"fread",1,NULL,228},
{"fwrite",1,NULL,229},
{"help",0,NULL,'u'},
#ifdef Jig
{"jig",0,NULL,206},
#endif
{"junk",1,NULL,'j'},
{"list",0,NULL,202},
{"mail",1,NULL,'m'},
{"newword",1,NULL,220},
{"pdiag",1,NULL,212},
{"phraselimit",1,NULL,224},
{"phrasemax",1,NULL,223},
{"phrasemin",1,NULL,217},
#ifdef HAVE_PLOT_UTILITIES
{"plot",1,NULL,211},
#endif
#ifdef POP3_PROXY_SERVER
{"pop3port",1,NULL,226},
{"pop3server",1,NULL,227},
{"pop3trace",0,NULL,230},
#endif
{"prune",0,NULL,203},
{"ptrace",0,NULL,213},
{"purge",0,NULL,203},
{"read",1,NULL,'r'},
{"sigwords",1,NULL,219},
{"statistics",0,NULL,210},
{"test",1,NULL,'t'},
{"threshjunk",1,NULL,208},
{"threshmail",1,NULL,214},
{"transcript",1,NULL,204},
{"verbose",0,NULL,'v'},
{"version",0,NULL,201},
{"write",1,NULL,218},
{0,0,0,0}
};
int option_index= 0;
bool lastOption= false;
int exitStatus= 0;

while((!lastOption)&&
(opt= getopt_long(argc,argv,"j:m:r:t:uv",long_options,&option_index))!=-1){

switch(opt){


case 222:
while((*optarg)!=0){
unsigned int ch= (*optarg++)&0xFF;

if(isascii(ch)&&isalpha(ch)&&isupper(ch)){
ch= islower(ch);
}
annotations.set(ch);
}
break;


case 232:
autoPrune= atoi(optarg);
if(verbose){
cerr<<"Unique words will be automatically pruned from dictionary when it exceeds "<<
autoPrune<<" bytes."<<endl;
}
break;


case 225:
mailBias= atof(optarg);
if(verbose){
cerr<<"Frequency bias for words and phrases in legitimate mail set to "<<
mailBias<<"."<<endl;
}
break;


case 221:
streamMinTokenLength= atoi(optarg);
if(verbose){
if(streamMinTokenLength> 0){
cerr<<"Binary streams will be parsed for words of "<<
streamMinTokenLength<<" characters or more."<<endl;
}else{
cerr<<"Binary streams will not be parsed for words."<<endl;
}
}
break;


case 231:
bsdFolder= true;
break;


case 209:
{
if(optind<argc){
cerr<<"Warning: command line arguments after \"--classify "<<
optarg<<" will be ignored."<<endl;
}
double score= classifyMessages(optarg);

if(score>=junkThreshold){
cout<<"JUNK"<<endl;
exitStatus= 3;
}else if(score<=mailThreshold){
cout<<"MAIL"<<endl;
exitStatus= 0;
}else{
cout<<"INDT"<<endl;
exitStatus= 4;
}
lastOption= true;
break;
}


case 215:
dict.resetCat(dictionaryWord::Junk);
messageCount[dictionaryWord::Junk]= 0;
break;


case 216:
dict.resetCat(dictionaryWord::Mail);
messageCount[dictionaryWord::Mail]= 0;
break;


case 200:
cout<<"This program is in the public domain.\n";
return 0;


case 205:
{ifstream is(optarg);
if(!is){
cerr<<"Cannot open CSV dictionary file "<<optarg<<endl;
return 1;
}
dict.importCSV(is);
if(!singleDictionaryRead){
updateProbability();
}
singleDictionaryRead= false;
is.close();
}
break;


case 207:
{ofstream of(optarg);
if(!of){
cerr<<"Cannot create CSV export file "<<optarg<<endl;
return 1;
}
updateProbability();
dict.exportCSV(of);
of.close();
}
break;


case 228:
if(!fDict.load(optarg)){
cerr<<"Unable to load fast dictionary file."<<endl;
return 1;
}
break;


case 229:
if(dict.size()==0){
cerr<<"No dictionary loaded when --fwrite command issued."<<endl;
return 1;
}
fastDictionary::exportDictionary(dict,optarg);
break;


case'u':
case'?':
usage();
return 0;

#ifdef Jig

case 206:
{
/*252:*/
#line 9874 "annoyance-filter.w"

#ifdef Jig
#endif

/*:252*/
#line 9483 "annoyance-filter.w"
;
}
break;
#endif


case'j':
addFolder(optarg,dictionaryWord::Junk);
updateProbability();
break;


case 202:
printDictionary();
break;


case'm':
addFolder(optarg,dictionaryWord::Mail);
updateProbability();
break;


case 220:
novelWordProbability= atof(optarg);
if(verbose){
cerr<<"Probability for words not in dictionary set to "<<novelWordProbability<<"."<<endl;
}
break;


case 212:
pDiagFilename= optarg;
break;


case 224:
phraseLimit= atoi(optarg);
if(verbose){
cerr<<"Phrase maximum length set to "<<phraseLimit<<" characters."<<endl;
}
break;


case 223:
phraseMax= atoi(optarg);
if(verbose){
cerr<<"Phrase maximum length set to "<<phraseMax<<" word"<<
(phraseMax==1?"":"s")<<"."<<endl;
}
break;


case 217:
phraseMin= atoi(optarg);
if(verbose){
cerr<<"Phrase minimum length set to "<<phraseMin<<" word"<<
(phraseMin==1?"":"s")<<"."<<endl;
}
break;


#ifdef HAVE_PLOT_UTILITIES
case 211:
updateProbability();
dict.plotProbabilityHistogram(optarg);
break;
#endif


#ifdef POP3_PROXY_SERVER
case 226:
popProxyPort= atoi(optarg);
if(verbose){
cerr<<"POP3 proxy server will listen on port "<<popProxyPort<<endl;
}
break;
#endif


#ifdef POP3_PROXY_SERVER
case 227:
{
if(optind<argc){
cerr<<"Warning: command line arguments after \"--pop3server "<<
optarg<<" will be ignored."<<endl;
}
string sarg= optarg;
string::size_type pind= sarg.find_last_of(':');
if(pind!=string::npos){
if((pind<(sarg.length()-1))&&
(pind> 0)&&
isdigit(sarg[pind+1])){
popProxyServerPort= atoi(sarg.substr(pind+1).c_str());
}else{
cerr<<"Invalid port number specification in --pop3server argument."<<endl;
return 1;
}
sarg= sarg.substr(0,pind);
}
popProxyServer= sarg;
if(verbose){
cerr<<"POP3 server will act as proxy for "<<popProxyServer<<":"<<
popProxyServerPort<<endl;
}
lastOption= true;
break;
}
#endif


#ifdef POP3_PROXY_SERVER
case 230:
popProxyTrace= true;
break;
#endif


case 203:
updateProbability();
dict.purge();
break;


case 213:
pTokenTrace= true;
break;


case'r':
{
#ifdef HAVE_MMAP
int fileHandle= open(optarg,O_RDONLY);
if(fileHandle==-1){
cerr<<"Cannot open dictionary file "<<optarg<<endl;
return 1;
}
long fileLength= lseek(fileHandle,0,2);
lseek(fileHandle,0,0);
char*dp= static_cast<char*> (mmap((caddr_t)0,fileLength,
PROT_READ,MAP_SHARED|MAP_NORESERVE,
fileHandle,0));
istrstream is(dp,fileLength);
#else
ifstream is(optarg,ios::binary);
if(!is){
cerr<<"Cannot open dictionary file "<<optarg<<endl;
return 1;
}
#endif
dict.importFromBinaryFile(is);
#ifdef HAVE_MMAP
munmap(dp,fileLength);
close(fileHandle);
#else
is.close();
#endif
if(!singleDictionaryRead){
updateProbability();
}
singleDictionaryRead= false;
}
break;


case 219:
significantWords= atoi(optarg);
if(verbose){
cerr<<"Significant words set to "<<significantWords<<"."<<endl;
}
break;


case 210:
updateProbability();
dict.printStatistics();
break;


case't':
{double score= classifyMessages(optarg);

if(transcriptFilename!="-"){
cout<<"Junk probability "<<score<<endl;
}
}
break;


case 208:
junkThreshold= atof(optarg);
if(verbose){
cerr<<"Junk threshold set to "<<setprecision(5)<<junkThreshold<<"."<<endl;
}
break;


case 214:
mailThreshold= atof(optarg);
if(verbose){
cerr<<"Mail threshold set to "<<setprecision(5)<<mailThreshold<<"."<<endl;
}
break;


case 204:
transcriptFilename= optarg;
break;


case'v':
verbose= true;
break;


case 201:
{
/*245:*/
#line 9758 "annoyance-filter.w"

cout<<PRODUCT" "VERSION<<endl;
cout<<"Last revised: "REVDATE<<endl;
/*247:*/
#line 9788 "annoyance-filter.w"

#ifdef HAVE_PDF_DECODER
printOptionalCapability("Decoding strings in PDF attachments");
#endif

#ifdef HAVE_DIRECTORY_TRAVERSAL
printOptionalCapability("Directory traversal in the --mail and --junk options");
#endif

#ifdef HAVE_MMAP
printOptionalCapability("Memory mapped access to dictionary and fast dictionary files");
#endif

#ifdef HAVE_PLOT_UTILITIES
printOptionalCapability("Plotting distribution histogram (--plot option)");
#endif

#ifdef POP3_PROXY_SERVER
printOptionalCapability("POP3 proxy server");
#endif

if(nOptionalCaps==0){
cout<<"Optional capabilities configured: none."<<endl;
}

/*:247*/
#line 9761 "annoyance-filter.w"
;
cout<<"The latest version is always available from:"<<endl;
cout<<"    http://www.fourmilab.ch/annoyance-filter/"<<endl;
cout<<"Please report bugs to:"<<endl;
cout<<"    bugs@fourmilab.ch"<<endl;

/*:245*/
#line 9700 "annoyance-filter.w"
;
}
return 0;


case 218:
{ofstream of(optarg,ios::binary);
if(!of){
cerr<<"Cannot create dictionary file "<<optarg<<endl;
return 1;
}
updateProbability();
dict.exportToBinaryFile(of);
of.close();
}
break;


default:
cerr<<"***Internal error: unhandled case "<<opt<<
" in option processing."<<endl;
return 1;
}
}

/*244:*/
#line 9733 "annoyance-filter.w"

if(pTokenTrace&&(pDiagFilename=="")){
cerr<<"Warning: --ptrace requested but no --pdiag file specified."<<endl;
}

if((transcriptFilename!="")&&(nTested==0)){
cerr<<"Warning: --transcript requested but no message --test or --classify done."<<endl;
}

if((pDiagFilename!="")&&(nTested==0)){
cerr<<"Warning: --pdiag requested but no message --test or --classify done."<<endl;
}

if(annotations.count()> 0&&(transcriptFilename=="")
#ifdef POP3_PROXY_SERVER
&&(popProxyServer=="")
#endif
){
cerr<<"Warning: --annotate requested but no --transcript or --pop3proxy requested."<<endl;
}

/*:244*/
#line 9725 "annoyance-filter.w"
;

/*:243*/
#line 8844 "annoyance-filter.w"
;

#ifdef POP3_PROXY_SERVER
if(popProxyServer!=""){
/*218:*/
#line 8695 "annoyance-filter.w"

if(dict.empty()&&(!fDict.isDictionaryLoaded())){
cerr<<"You cannot operate a --pop3proxy server "
"unless you have first loaded a dictionary."<<endl;
return 1;
}

if(verbose){
cerr<<"Starting POP3 proxy server on port "<<popProxyPort<<
" with server "<<popProxyServer<<":"<<popProxyServerPort<<endl;
}
POP3Proxy pp(popProxyPort,popProxyServer,popProxyServerPort,&popFilter);

pp.operateProxyServer();

/*:218*/
#line 8848 "annoyance-filter.w"
;
}
#endif


return exitStatus;
}

/*:223*/
#line 9909 "annoyance-filter.w"




#line 1 "log.w"

/*:254*/
