import java.util.*;
import java.io.*;
import org.apache.oro.text.regex.*;

/**
 * Parser for Debian Package files.
 * This is a modified version of the HTML parser.
 * <p>Rather than going to the trouble of extracting the control file and
 * parsing it, we simply rely on dpkg-deb. This does mean that this parser only
 * works on systems with Debian installed, but it does make it a lot easier for
 * us.
 * @author Mark Howard
 * <br><br>$Id: DebParser.java,v 1.2 2002/09/11 13:22:25 howama Exp $
 */
public class DebParser implements FileParser { 
	/**
	 */
	private class WantedItem {
		private IGKey id;
		private String success; 
		private String item;
		private String value;
		private String context;
		private boolean found;
		private boolean inHead;
		private boolean inBody;
		
		// org.apache.oro
		private PatternMatcher matcher;
		private PatternCompiler compiler;
		private Pattern contextRE;
		private PatternMatcherInput input;
		private MatchResult result;

		public WantedItem (String what, IGKey identity, String logmsg, String how) {
			item = what;
			id = identity;
			success = logmsg;
			value = "Not found.";
			found = false;

			// org.apache.oro
			compiler = new Perl5Compiler();
			matcher  = new Perl5Matcher();
			
			try {
				contextRE = compiler.compile(how, Perl5Compiler.CASE_INSENSITIVE_MASK);
			} catch(MalformedPatternException e) {
			log.addError(900, "RE_ERROR", new Object[] {e.getMessage()} );
			if (LOGLEVEL >= IGLog.FILE)
				log.add(IGLog.FILE, "Giving up.");
			return;
	    }
	}

	public boolean match(String str) {
	    input   = new PatternMatcherInput(str);
	    if (!matcher.contains(input, contextRE)) 
		return isFound(false);
	    return isFound(true);
	}
	public String getMatch() {
	    if (!isFound())
		return null;
	    return value(matcher.getMatch().group(1));
	}
	public int getEndIndex() {
	    return matcher.getMatch().endOffset(0);
	}
	public String success() {
	    return success;
	}
	public String getContext() {
	    return context;
	}
	public boolean isFound() {
	    return found;
	}
	public boolean isFound(boolean already) {
	    found = already;
	    return found;
	}
	public String name() {
	    return item;
	}
	public String value() {
	    return value;
	}
	public String value(String aValue) {
	    value = aValue;
	    if (!isFound()) 
		isFound(true);
	    return value;
	}
	public IGKey id() {
	    return id;
	}
	public IGKey id(IGKey identity) {
	    id = identity;
	    return id;
	}
    }

    /** Signature of parser */
    static final String PARSER = "$Id: DebParser.java,v 1.2 2002/09/11 13:22:25 howama Exp $";

    /** Exception for bad character encodings */
    private class BadEncodingException extends Exception { }

    /** Determines if this parser will search for ... */
    private boolean wantURLs;
    private boolean wantFileType;
    private boolean wantParser;
    private boolean wantDescription;
    private boolean wantTitle;

    /** implementation specific data */
    private String filename;
    private int firstChunkSize;    
    
    /** The logging object for this module */
    private IGLog log;

    /** The default logging level for this module */
    private final static int LOGLEVEL = 99;

    /** File extensions for */
    private final static String[] extensions = {"deb"};

    /** Mime types for */
    private final static String[] mimeTypes = {"aplication/x-deb"};

    /** file magic signature */
    private final static byte[][] magic = {
	{	(byte) 'd', (byte) 'e', (byte) 'b', (byte) 'i', (byte) 'a', (byte) 'n', 
		(byte) '-', (byte) 'b', (byte) 'i', (byte) 'n', (byte) 'a', 
		(byte) 'r', (byte) 'y'}};

    /** headers aren't always at the beginning of the file */
    private final static boolean magicOffset = true;

    /** headers aren't case sensitive */
    private final static boolean magicCase = true;

    /** FileMagic structure */
    private final static FileMagic htmlMagic = new FileMagic(magic, magicOffset,
	    magicCase);

    private WantedItem wantedTitle;
    private WantedItem wantedHeading;
    private WantedItem wantedAuthor;
    private WantedItem wantedDescription;
    private WantedItem wantedPara;
    private WantedItem wantedKeywords;
    private WantedItem wantedEncoding;

    private Set wantedItems;

    private WantedItem wantedHead;
    private WantedItem wantedBody;

    private String fileType;
    private String fileSizeBytes;    

    private String fileName;


    /**
     * Construct a new DebParser. Contract says: 
     * caller will use setLog(IGLog) and setWantedItems(IGKeySet) 
     * before calling parse(IGFile) 
     */
    public DebParser() {
		log = null;
		fileName = null;
		firstChunkSize = 2048; //512;
		wantParser = wantFileType = wantURLs = false;
		wantTitle = wantDescription = false;
    }

    /**
     * Set the desired attributes to extract
     * @param wanted A set of bits describing preferences
     */
    public void setWantedItems(IGKeySet wanted) {
		wantURLs = wanted.wants(IGKey.URLS);
		wantFileType = wanted.wants(IGKey.FILE_TYPE);
		wantParser = wanted.wants(IGKey.PARSER);
		// FIXME far too much hard coded boolean scalars ...
		wantTitle = wanted.wants(IGKey.TITLE);
		wantDescription = wanted.wants(IGKey.DESCRIPTION);

		wantedItems = new HashSet(7);

		if (wanted.wants(IGKey.AUTHOR)) {
			// find maintainer from Maintainer: 
			// ... and from '<meta http-equiv="author" content="A. Nonymous">'
			String authorContext = " *Maintainer: ?"
			+ "([^\n]*)\n";
			wantedAuthor = new WantedItem("Maintainer field" , IGKey.AUTHOR, 
						   "FP_FOUND_AUTHOR", authorContext);
			wantedItems.add(wantedAuthor);
		}

		if (wanted.wants(IGKey.DESCRIPTION)) {
			// find description from Description:
			String descriptionContext = "Description: ?"
			+ "([^\n]*)\n";
			wantedDescription = new WantedItem( "Description", IGKey.DESCRIPTION, 
							"FP_FOUND_DESC", descriptionContext);
			wantedItems.add(wantedDescription);
		}

		if (wanted.wants(IGKey.TITLE)) {
			String title = "package name";
			String titleContext = "Package: ?([^\n]*)\n";
			wantedTitle = new WantedItem( title, IGKey.TITLE, 
						  "FP_FOUND_TITLE", titleContext);
			wantedItems.add(wantedTitle);
		}
		return;
    }

    /**
     * Set the logger to use with this module
     * @param logObj The object to use for logging data
     */
    public void setLog(IGLog logObj) {
		log = logObj;
		return;
    }

    /**
     * Utility routine feeds a chunk from REFilterReader into StringBuffer
     *
     * @param stringBuffer StringBuffer to read into
     * @param countChars to read
     * @param reader BufferedReader to read from
     */
    private int readChunk( StringBuffer stringBuffer, 
			   int countChars, InputStream reader )  {
		int ch = 0;
		int readChars = 0;
		try {
			while (readChars < countChars && (ch = reader.read()) != -1) {
			readChars++;
			stringBuffer.append((char) ch);
			}
			/** 
			 * optionally read some more, until space read, 
			 * otherwise normalisation may cause ugly side effects
			 * since after stripping ws tokens, one loses information 
			 * at the end of the normalized slice
			 */
			while ( ((char) ch != ' ') && (ch = reader.read()) != -1) {
			readChars++;
			stringBuffer.append((char) ch);
			}
		}
		catch (IOException e) {
			log.add(IGLog.FILE, "Error reading from resource: " +
				e.getMessage());
			if (LOGLEVEL >= IGLog.FILE)
			log.add(IGLog.FILE, "Giving up.");
			System.exit(-1);
		}

		return readChars;
    }

    /**
     * Perform parsing on an open stream.
     * @param file The IGFile to fill in attributes for
     * @param stream The data source for parsing
     * @throws IOException if an error occurs while reading data
     */
    public void parse(IGFile file, InputStream stream)
		throws IOException, StreamResetException {

		if (LOGLEVEL >= IGLog.PROCEDURE)
			log.add(IGLog.PROCEDURE, "DebParser.parse(IGFile, InputStream)");


		File dpkgDeb = new File("/usr/bin/dpkg-deb");
		if (!dpkgDeb.exists()){
			log.addWarning(60, "NO_DPKG_DEB", new Object[] {fileName});
			return;
		}

		//TODO
		//Save the file in a temporary location then run dpkg-deb on it		
		log.add(1, "Debian package parsing only works for local files at the moment");
		
		Process infoProc = Runtime.getRuntime().exec("/usr/bin/dpkg-deb -I " + "/tmp/indexgen-debparser-temp");
	    
		doParse(file, infoProc.getInputStream() );
	    
		return;
    }

    /**
     * Perform parsing on the given source.
     * @param filename The file to have attributes extracted from
     */
    public void parse(IGFile file) throws IOException, FileNotFoundException {
	
		if (LOGLEVEL >= IGLog.PROCEDURE)
			log.add(IGLog.PROCEDURE, "DebParser.parse(IGFile)");
		
		fileName = file.getLocation();
		File dpkgDeb = new File("/usr/bin/dpkg-deb");
		if (!dpkgDeb.exists()){
			log.addWarning(60, "NO_DPKG_DEB", new Object[] {fileName});
			return;
		}
		Process infoProc = Runtime.getRuntime().exec("/usr/bin/dpkg-deb -I " + fileName);
		doParse(file, infoProc.getInputStream() );
		return;
    }

    /**
     * Perform real work on the given source.
     * @param reader The source to have attributes extracted from
     */
    private boolean doParse(IGFile file, InputStream reader) 
		throws IOException {

		if (LOGLEVEL >= IGLog.PROCEDURE)
			log.add(IGLog.PROCEDURE, "DebParser.doParse(IGFile,BufferedReader)");
		
		if (LOGLEVEL >= IGLog.FILE)
			log.add(IGLog.FILE, "Reading IGFile, chunksize: " + firstChunkSize);

		if (wantParser) {
			file.put(IGKey.PARSER, PARSER);	    
			if (LOGLEVEL >= IGLog.DEBUG)
			log.add(IGLog.DEBUG, "Setting IGKey.PARSER: " + PARSER);
		}

		if (wantFileType) {
			file.put(IGKey.FILE_TYPE, new String("Debian Package"));
			if (LOGLEVEL >= IGLog.DEBUG)
			log.add(IGLog.DEBUG, "Setting IGKey.FILE_TYPE: " + "Debian Package");
		}

		Iterator iterator = wantedItems.iterator();
		while (iterator.hasNext()) {
			WantedItem wantedItem = (WantedItem)iterator.next();
			// reset of internal state, since fileparsers may be reused 
			wantedItem.isFound(false);
		}

		String repl = " ";
        StringBuffer sb = new StringBuffer();

        int ch;
		int readChars = 0;
		int prevReadChars = 0;
		int nth = 1;
		StringBuffer text = new StringBuffer();
		readChars += readChunk(text, firstChunkSize, reader);
		
		
	    iterator = wantedItems.iterator();
	    while (iterator.hasNext()) {
			WantedItem wantedItem = (WantedItem)iterator.next();
			if (wantedItem.match(text.toString())) {
				wantedItem.getMatch();
				file.put(wantedItem.id(), wantedItem.value());
				if (LOGLEVEL >= IGLog.PROGRESS)
					log.addResource(IGLog.PROGRESS, wantedItem.success(),
							new String[]{wantedItem.value()});
			}else{
				if (LOGLEVEL >= IGLog.DEBUG)
					log.add(IGLog.DEBUG, wantedItem.success() + "[" 
						+ wantedItem.name() + "] not found." );
			}
		}

			

		reader.close();        

		return true;
    }

    /**
     * Get the file extensions this parser can handle
     * @return String array of file extensions
     */
    public String[] getExtensions() {
	return (extensions);
    }

    /**
     * Get the mime types this parser can handle
     * @return String array of mime types
     */
    public String[] getMimeTypes() {
	return (mimeTypes);
    }

    /**
     * Supply file magic for files this parser can handle
     * @return Array of byte arrays containing magic signature
     */
    public FileMagic getMagic() {
	return htmlMagic;
    }

}

/*
 * $Log: DebParser.java,v $
 * Revision 1.2  2002/09/11 13:22:25  howama
 * bug fixes
 *
 * Revision 1.1  2002/09/06 13:59:22  howama
 * debian package parser
 *
 */
