/*
* PackageIndexRule.java
* A.L. Borchers, 1998
*
* Rule for reading a package description file (URLs terminating with 
* Package-<packagename>.html). Produces a hashtable result for each 
* such file processed with the following form:
*
* -------------------------------------------
* KEY         VALUE
* -------------------------------------------
* Package     Full package name
* Interface   Vector of Interface names
* Class       Vector of Class names
* Exception   Vector of Exception names
* Error       Vector of Error names
* ------------------------------------------
* 
* If any one of Interface, Class, Exception, or Error is not present as an 
* index in the particular package, nothing is stored in the table for that 
* entry so any Rule which uses this table should check against nulls before 
* trying to examine the vector
*
*/

package JavaDoc;


import Scout.*;

import SGMLKit.Tag;

import java.util.Vector;
import java.util.Hashtable;

import java.net.URL;
import java.net.MalformedURLException;

public class PackageIndexRule extends Rule {
	
	public static final boolean debug= false;
	
	private Hashtable resultHash= null;
	
	public PackageIndexRule(Scout scout, RuleHash h)
		throws RuleFormatException {
		super(scout,h);
	}
	
	public synchronized void processDoc() {
		super.processDoc();
		// We'll want a new table for each package we examine
		resultHash= new Hashtable();
		try {
			if (!doc.isValid() || !doc.isHTML()) {
				throw new Exception("Document is invalid or non-HTML");
			}
			// Get the proverbial text and tags components of the document
			String docText= doc.getText();
			Vector docTags= doc.getTags();
			// we'll use these to delimit tagged ranges in the text
			int open= -1, close= -1;
			// Get fully qualified package name from file name of url
			String tmp= doc.getURL().toString();
			String packageName= tmp.substring(tmp.lastIndexOf("/Package-")+9,
				tmp.lastIndexOf(".html"));
			// Have: fully qualified package name
			// Compare this package name to that given in title and h1 elements
			open= Tag.locateTag(docTags,"title",close+1);
			close= Tag.locateTag(docTags,"/title",open+1);
			String docTitle= 
				docText.substring(((Tag)docTags.elementAt(open)).getPosition(),
				((Tag)docTags.elementAt(close)).getPosition()).trim();
			if (!docTitle.startsWith("Package ")) {
				throw new Exception("Title does not contain package identifier");
			}
			if (!docTitle.substring(docTitle.indexOf("Package ")+8).equals(packageName)) {
				throw new Exception("Title package name conflicts with filename");
			}
			// Have: title ok
			open= Tag.locateTag(docTags,"h1",close+1);
			close= Tag.locateTag(docTags,"/h1",open+1);
			String docHeader= 
				docText.substring(((Tag)docTags.elementAt(open)).getPosition(),
				((Tag)docTags.elementAt(close)).getPosition()).trim();
			if (!docHeader.startsWith("package ")) {
				throw new Exception("H1 header does not contain package identifier");
			}
			if (!docHeader.substring(docHeader.indexOf("package ")+8).equals(packageName)) {
				throw new Exception("H1 header package name conflicts with filename");
			}
			// Have filename, title and header names in agreement, ergo high confidence
			// that this is a legitimate package index file. 
			resultHash.put("Package",packageName);
			// We can now proceed to gather up the interfaces, classes, exceptions, and 
			// errors. Each of these indices should be opened by a level 2 header containing 
			// an image with alternate text we can use to figure out which heading we have
			while (open > 0) {
				open= Tag.locateTag(docTags,"h2",close+1);
				if (open > 0) {
					close= Tag.locateTag(docTags,"/h2",open+1);
					Tag sectionHeaderImage= (Tag)docTags.elementAt(open+1);
					if (!sectionHeaderImage.getIdentifier().equals("img")) {
						throw new Exception("Failed to locate expected img tag in level 2 header");
					}
					String sectionHeaderText= sectionHeaderImage.getUnquoted("alt");
					if (sectionHeaderText == null) {
						throw new Exception("Failed to locate expected alt text in img tag");
					}
					// alt text should be of the form "* Index" (quotes literal)
					// where * = Interface|Class|Exception|Error
					String sectionName= 
						sectionHeaderText.substring(1,sectionHeaderText.indexOf(" Index"));
					// Have: Interface|Class|Exception|Error indicator for the next menu
					open= Tag.locateTag(docTags,"menu",close+1);
					if (open != close+1) {
						throw new Exception("Failed to locate expected menu in " + 
							sectionHeaderText);
					}
					close= Tag.locateTag(docTags,"/menu",open+1);
					if (close < 0) {
						throw new Exception("Failed to locate expected menu close in " + 
							sectionHeaderText);
					}
					// Have: tags delimiting the menu. Can now use open to step through li's
					// appending discovered items to a vector for storing in the index hash
					Vector v= new Vector();
					while (open > 0 && open < close) {
						open= Tag.locateTag(docTags,"li",open+1);
						if (open > 0 && open < close) {
							Tag t= (Tag)docTags.elementAt(open+1);
							// extract the name at this menu item. It should be a linked reference
							// to another file in the API tree
							if (!t.getIdentifier().equals("a")) {
								throw new Exception("Failed to locate expected anchor in " +
									sectionHeaderText);
							}
							else {
								/*
								String name= 
									docText.substring(((Tag)docTags.elementAt(open+1)).getPosition(),
									((Tag)docTags.elementAt(open+2)).getPosition());
								*/
								String name= Tag.extractTaggedRegion(docTags,docText,open+1,open+2);
								if (debug) {
									System.out.println("Adding " + name + " to " + sectionName);
								}
								v.addElement(name);
								// append the url to the search queue
								scout.urls.append(new URL(doc.getURL(),t.getUnquoted("href")));
							}
						}
					}
					// store the vector of entity names under the current section name
					resultHash.put(sectionName,v);
				}
			}
			results.addElement(resultHash);
		}
		catch (Exception e) {
			scout.logger.log(getName() + ".processDoc - " + doc.getURL().toString() + 
				" does not appear to be a package index file: " +
				e.toString());
			if (debug) {
				e.printStackTrace();
			}
			return;
		}
}



}
