/*
* ClassDocRule.java
* A.L. Borchers, 1998
*
* Rule for converting a javadoc class description file into a vector of 
* JavaDocObjects, the first of which represents the class or interface 
* described by the file, and the rest describe members.
*
*/

package JavaDoc;

import java.net.URL;

import java.util.Vector;
import java.util.StringTokenizer;

import Scout.*;

import SGMLKit.Tag;


public class ClassDocRule extends Rule {
	
	public static final boolean debug= true;
	
	// the object primarily described by a document
	private JavaDocObject rootObject= null;

	// vector of objects constructed for this document
	// element 0 is rootObject, rest are members
	private Vector javaObjects= null;
	
	private String docURL= null;
	private Vector docTags= null;
	private String docText= null;
	
	public ClassDocRule(Scout scout, RuleHash h)
		throws RuleFormatException {
		super(scout,h);
	}
	
	public synchronized void processDoc() {
		super.processDoc();
		javaObjects= new Vector();
		try {
			errorCheck(!doc.isValid() || !doc.isHTML(),
				"Document is invalid or non-HTML");
			docURL= doc.getURL().toString();
			docTags= doc.getTags();
			docText= doc.getText();
			errorCheck(docURL == null || docTags == null || docText == null,
				"URL, tags or text missing");
			getPrimary();
			getMembers();
		}
		catch (Exception e) {
			scout.logger.log(getName() + ".processDoc - " + docURL + 
				" does not appear to be a javadoc file: " +
				e.toString());
			if (debug) {
				e.printStackTrace();
			}
			return;
		}
		results.addElement(javaObjects);
	}
	
	private void getPrimary() 
		throws Exception {

		//
		// Items we will fill in for the JavaDocObject represented by this file
		// 
		String nameString= null;				// Name of object
		String classString= null;				// Class of object
		String parent= null;					// Parent object
		String container= null;					// Container package
		Vector interfaces= new Vector();		// Vector of implemented interfaces
		String descString= null;				// Descriptive text
		int type= JavaDocObject._TYPE_UNKNOWN;	// Type flag for object
		int allModifiers= 0;					// Modifier flags for object

		//
		// Additionally, we use this for sanity checking type info
		//
		String typeString= null;
		
		// 
		// Temp string, tokenizer we'll use for reading the file
		// 
		String tempString= null;
		StringTokenizer st= null;
		
		// alloc the root object
		rootObject= new JavaDocObject();

		//
		// Locate the object name and type
		//
		
		// First indication of the object name is from the document URL
		tempString= docURL;

		// we trim package information from the object name
		nameString= JavaDocObject.getObjectNameFromObjectURL(tempString,true);
		// but not from the class name
		classString= JavaDocObject.getObjectNameFromObjectURL(tempString,false);
		// get the container package
		container= JavaDocObject.getPackageNameFromObjectURL(tempString);

		// Find the title and split it into type and name
		int titleOpen= Tag.locateTag(docTags,"title",0);
		int titleClose= Tag.locateTag(docTags,"/title",titleOpen+1);
		errorCheck(titleOpen < 0 || titleClose < 0,"Missing document title");
		tempString= Tag.extractTaggedRegion(docTags,docText,titleOpen,titleClose);
		errorCheck(tempString == null,"Invalid or missing document title");
		st= new StringTokenizer(tempString.trim());
		errorCheck(st.countTokens() != 2,"Unexpected structure in document title");

		// Verify the type is class or interface (the only things acceptable 
		// for a root object)
		tempString= st.nextToken().toLowerCase();
		type= JavaDocObject.getType(tempString);
		errorCheck(type != JavaDocObject._TYPE_INTERFACE && type != JavaDocObject._TYPE_CLASS,
			"Expected \"Interface\" or \"Class\" in title");
		typeString= tempString;
		
		// get the title name and check it against the class name from the url
		tempString= st.nextToken();
		errorCheck(!tempString.equals(classString),"URL and title name mismatch" + 
			classString + "," + tempString);
		
		// The first thing of interest after the title should be a level 1 
		// header giving the class or interface name (which had better match 
		// the one from the title!)
		int h1Open= Tag.locateTag(docTags,"h1",titleClose+1);
		int h1Close= Tag.locateTag(docTags,"/h1",h1Open+1);
		errorCheck(h1Open < 0 || h1Close < 0,"Missing expected header");
		tempString= Tag.extractTaggedRegion(docTags,docText,h1Open,h1Close);
		errorCheck(tempString == null,"Missing expected header");
		st= new StringTokenizer(tempString);
		errorCheck(st.countTokens() != 2,"Unexpected header structure");
		tempString= st.nextToken();
		errorCheck(!tempString.equalsIgnoreCase(typeString),
			"Title and header type mismatch: " +
			tempString + "," + typeString);
		tempString= st.nextToken();
		errorCheck(!tempString.equalsIgnoreCase(classString),
			"Title and header name mismatch" +
			tempString + "," + classString);
		
		// After the <h1>, there may or may not be an inheritance tree contained 
		// in a <pre></pre> tag set. The only case I've observed so far where this
		// structure does not exist is in a root interfaces
		int treeOpen= h1Close+1;
		int treeClose;
		if (((Tag)docTags.elementAt(treeOpen)).getIdentifier().equals("pre")) {
			// get the tree!
			treeClose= Tag.locateTag(docTags,"/pre",treeOpen+1);
			String[] tree= extractInheritanceTree(treeOpen,treeClose);
			if (tree.length > 1) {
				parent= tree[tree.length-2];
			}
			else {
				// This should only be the case for java.lang.Object
				errorCheck(!classString.equals("java.lang.Object"),
					"Parentless item " + classString + " is not java.lang.Object");
			}
		}
		else {
			// No tree -> no parent? NO - sometimes this tree LIES!!! 
			// See java.io.ObjectInput for an example. This doesn't 
			// necessarily ruin us, though, as we'll be examining the 
			// "extends" clause in the class header info later...
		}
		
		// The location of the next interesting bit is sensitive to whether or not we 
		// found the inheritance tree. If we did, then it lies just beyond an <hr> tag.
		// If we did not, there is no <hr> UNLESS we are reading java.lang.Object which
		// has a tree but no parent
		
		int hrOpen, hrClose;
		if (parent == null && !classString.equals("java.lang.Object")) {
			// we find the class header info right after the close of the <h1>
			hrOpen= h1Close;
		}
		else {
			// The class header lies beyond the first <hr> past the <h1> close
			hrOpen= Tag.locateTag(docTags,"hr",h1Close+1);
		}
		hrClose= Tag.locateTag(docTags,"hr",hrOpen+1);
		//
		// TODO: What is a good error check here???
		//
		
		// The following are worst case bounds on the descriptive string bounds
		int descOpen= hrOpen;
		int descClose= hrClose;
		
		// The class header with extends and implements items is contained in a <dl>
		// Unfortunately, the spacing is unpredicatable so we can't just grab the region 
		// and use a StringTokenizer. We have to grind through each dt in turn without 
		// knowing in advance what's there...
		int dlOpen, dlClose, dtOpen, dtClose;
		dlOpen= Tag.locateTag(docTags,"dl",hrOpen+1);
		errorCheck(dlOpen < 0,"Error isolating class header info");
		dlClose= Tag.locateTag(docTags,"/dl",dlOpen+1);
		errorCheck(dlClose < 0,"Error isolating class header info");
		dtOpen= Tag.locateTag(docTags,"dt",dlOpen+1);
		errorCheck(dtOpen < 0 || dtOpen > dlClose,
			"Error finding terms in class header dl");
		// first dt repeats the unqualified class or interface name with modifiers
		// This extends to the next <dt> or </dl>, depending on whether extends 
		// and/or implements are present
		dtClose= Tag.locateTag(docTags,"dt",dtOpen+1);
		// If dtClose not found or found past the </dl>, there are no extends or 
		// implements items, so we just have the class/interface info between the 
		// dtOpen and dlClose
		dtClose= (dtClose < 0 || dtClose > dlClose) ? dlClose : dtClose;
		tempString= Tag.extractTaggedRegion(docTags,docText,dtOpen,dtClose);
		errorCheck(tempString == null,"Error isolating modified class name in header");
		st= new StringTokenizer(tempString.trim());
		// Everything prior to the typeString is a modifier, which we OR into the 
		// allModifiers field
		while (st.hasMoreTokens() && 
			!(tempString= st.nextToken()).equals(typeString)) {
			allModifiers|= JavaDocObject.getModifier(tempString);
		}
		// have processed all modifiers. tempString should now be on typeString and 
		// nextToken should be the unqualified class/interface name
		errorCheck(!tempString.equals(typeString) || !st.hasMoreElements(),
			"Error extracting class/interface modifiers {" +
			typeString + "," + tempString + "," + st.countTokens() + "}" );
		tempString= st.nextToken();
		errorCheck(!classString.endsWith(tempString),
			"Name mismatch in class header: " + 
			tempString + "," + classString);
		// Now we have all modifiers set and name verfied, time to check for 
		// extends/implements stuff`
		if (dtClose < dlClose) {
			// dtClose is set at <dt> giving the extends clause or implements if no 
			// extends is present
			dtOpen= dtClose;
			dtClose= Tag.locateTag(docTags,"dt",dtOpen+1);
			dtClose= (dtClose < 0 || dtClose > dlClose) ? dlClose : dtClose;
			tempString= Tag.extractTaggedRegion(docTags,docText,dtOpen,dtClose);
			errorCheck(tempString == null,
				"Failed to find expected extends or implements clause");
			tempString= tempString.trim();
			// Note - fully qualified names are generally not given in the readable 
			// text of the file so ambiguity of package exists. We need to use the URL
			// given in the link to the parent and/or interfaces to get the full names 
			st= new StringTokenizer(tempString);
			tempString= st.nextToken();
			if (tempString.startsWith("extends")) {
				
				// get the parent via it's link URL
				int aOpen= Tag.locateTag(docTags,"a",dtOpen+1);
				errorCheck(aOpen < dtOpen || aOpen > dtClose,
					"Failed to find expected anchor to parent class");
				tempString= JavaDocObject.getObjectNameFromObjectURL((Tag)docTags.elementAt(aOpen),false);
				
				// If we already identified a parent from the inheritance tree, verify that 
				// it matches the one identified here
				errorCheck(parent != null && !tempString.equals(parent),
					"Tree/extends parent mismatch: " + parent + "," + tempString);

				// Since we just did the error check for a null parent, we can spare another 
				// if by just overwriting an existing parent name with the one we found here
				parent= tempString;
				
				// advance to the implements clause if present  
				if (dtClose < dlClose) {
					// implements clause should be there between dtClose and dlClose
					dtOpen= dtClose; 
					dtClose= dlClose;
					tempString= Tag.extractTaggedRegion(docTags,docText,dtOpen,dtClose);
					errorCheck(tempString == null,
						"Failed to find expected implements clause in " +
						tempString);
					tempString= tempString.trim();
					errorCheck(!tempString.startsWith("implements"),
						"Failed to find expected implements clause in " +
						tempString);
				}
			}
			if (tempString.startsWith("implements")) {
				// get the implemented interfaces by grabbing each anchor from dtOpen to 
				// dtClose
				int aOpen= Tag.locateTag(docTags,"a",dtOpen+1);
				while (aOpen > dtOpen && aOpen < dtClose) {
					tempString= JavaDocObject.getObjectNameFromObjectURL((Tag)docTags.elementAt(aOpen),false);
					errorCheck(tempString == null,
						"Failed to find implemented interface name");
					interfaces.addElement(tempString);
					aOpen= Tag.locateTag(docTags,"a",aOpen+1);
				}
			}
		}
		// We can now step the opening of the description past the end of the dl
		descOpen= dlClose;
		// Get the description
		descString= Tag.extractTaggedRegion(docTags,docText,descOpen,descClose);

		//
		// fill the JavaDocObject
		//
		rootObject.javaDocURL=  docURL;
		rootObject.objectName=  nameString;
		rootObject.className=   classString;
		rootObject.parent=      parent;
		rootObject.container=	container;
		rootObject.interfaces=  interfaces;
		rootObject.description= descString;
		// Set flags
		rootObject.setType(type);
		rootObject.setModifier(allModifiers);
		//
		// Lastly, add the rootObject to the output vector
		// 
		javaObjects.addElement(rootObject);
}

// Extract the inheritance chain of a class/interface from the preformatted
// descendency tree in the document header (just past the level 1 header) 
private String[] extractInheritanceTree(int treeOpen, int treeClose) 
throws Exception {
	String errorMessage= "Missing or malformed inheritance tree";
	errorCheck(!((Tag)docTags.elementAt(treeOpen)).getIdentifier().equals("pre") ||
		!((Tag)docTags.elementAt(treeClose)).getIdentifier().equals("/pre"),
		errorMessage);
	String treeString= Tag.extractTaggedRegion(docTags,docText,treeOpen,treeClose);
	errorCheck(treeString == null, errorMessage);
	// split into tokens on the newlines
	StringTokenizer st= new StringTokenizer(treeString,"\n");
	Vector tokenVector= new Vector();
	while (st.hasMoreTokens()) {
		String nextToken= st.nextToken().trim();
		// descendancy lines in the tree are indicated by a '|'. 
		// We skip over these
		if (!nextToken.equals("|")) {
			// skip over any + or - chars at the head of the string
			while (nextToken.startsWith("+") || nextToken.startsWith("-")) {
				nextToken= nextToken.substring(1);
			}
			tokenVector.addElement(nextToken);
		}
	}
	String[] out= new String[tokenVector.size()];
	tokenVector.copyInto(out);
	return out;
}


private void getMembers() 
	throws Exception {

	JavaDocObject jdo= null;	// each member object discovered

	int currentAnchorIdx= Tag.locateTag(docTags,"a",0);	// index of current anchor
	Tag currentAnchorTag= null;							// current anchor tag

	while (currentAnchorIdx > -1) {
		// get the current anchor tag and name
		currentAnchorTag= (Tag)docTags.elementAt(currentAnchorIdx);
		// if the anchor has a name attribute and is followed by an img tag 
		// (ref'ing the ball graphic) it's our target
		if (currentAnchorTag.getUnquoted("name") != null &&
			((Tag)docTags.elementAt(currentAnchorIdx+1)).getIdentifier().equals("img")) {
				// found a member
				jdo= getMember(currentAnchorTag,currentAnchorIdx);
				javaObjects.addElement(jdo);

		}	
		// else the anchor is irellevant for this purpose
		// get next anchor
		currentAnchorIdx= Tag.locateTag(docTags,"a",currentAnchorIdx+1);
	}

}

// build a JavaDocObject from the current anchorTag at anchorIdx
private JavaDocObject getMember(Tag anchorTag, int anchorIdx) 
	throws Exception {
	// split index for isolating parameter list from declaration/name
	int split= -1;
	// tokenizer for parsing tokens in declaration and list of parameters
	StringTokenizer st= null;
	// new an object
	JavaDocObject result= new JavaDocObject();
	// anchor string contains name and - for methods - parameter info
	String anchorString= anchorTag.getUnquoted("name");
	split= anchorString.indexOf('(');
	if (split > -1) {
		// method object with parameters 
		result.objectName= anchorString.substring(0,split);
		result.setType(JavaDocObject._TYPE_METHOD);
		String parameters= anchorString.substring(split+1,anchorString.length()-1);
		st= new StringTokenizer(parameters,",");
		result.methodargs= new Vector();
		while (st.hasMoreTokens()) {
			result.methodargs.addElement(st.nextToken().trim());
		}
	}
	else {
		// simple name without parameters
		result.objectName= anchorString;
		result.setType(JavaDocObject._TYPE_FIELD);
	}
	// url is stored relative to the parent
	result.javaDocURL= "#" + anchorString;
	// container is fully qualified class/interface name of the rootObject
	result.container= rootObject.className;

	// flag info for modifiers/type comes from a <pre> tagged declaration 
	int declOpen= Tag.locateTag(docTags,"pre",anchorIdx+1);
	errorCheck(declOpen < 0,"Failed to locate object declaration open");
	int declShut= Tag.locateTag(docTags,"/pre",declOpen+1);
	errorCheck(declShut < 0,"Failed to locate object declaration close");
	String decl= Tag.extractTaggedRegion(docTags,docText,declOpen,declShut).trim();

	// we already got the parameter info so discard it if present here
	split= decl.indexOf('(');
	decl= (split > -1) ? decl.substring(0,split) : decl;
	// System.out.println("examing declaration " + decl);

	// now parse the tokens of the declaration

	// a constructor may have as few as two tokens: modifier name  
	// where name == objectName

	st= new StringTokenizer(decl);
	int numTokens= st.countTokens();
	errorCheck(numTokens < 2,"Fewer than expected tokens in object declaration " + decl +
		" while examining " + rootObject.className);

	String[] tokens= new String[numTokens];
	for (int i= 0; i < numTokens; i++) {
		tokens[i]= st.nextToken().trim();
	}

	int numModifiers= 0;

	// handle as constructor or other 
	if (tokens[numTokens-1].equals(rootObject.objectName)) {
		// constructor - requires unique handling as type is implicit
		result.setType(JavaDocObject._TYPE_CLASS);
		result.className= rootObject.className;
		// back off numTokens by only one
		numModifiers= numTokens - 1;
	}
	else {
		// method or field, handled the same - last two tokens are type and name
		// type will return unknown from JavaDocObject.getType if it's a class
		String typeString= tokens[numTokens-2];
		
		boolean isArray= typeString.endsWith("[]");
		if (isArray) {
			typeString= typeString.substring(0,typeString.length()-2);
			result.setType(JavaDocObject._TYPE_ARRAY);
		}

		int typeCode= JavaDocObject.getType(typeString);
		if (typeCode == JavaDocObject._TYPE_UNKNOWN) {
			typeCode= JavaDocObject._TYPE_CLASS;
			// use hyperlink to the class's description to get the heirarchical name
			int typeAnchorIdx= Tag.locateTag(docTags,"a",declOpen+1);
			try {
				errorCheck(typeAnchorIdx < 0 || typeAnchorIdx >= declShut,
					"Couldn't locate link to what appears to be a class valued member " + 
					decl + " while examining " + rootObject.className);
				Tag typeAnchorTag= (Tag)docTags.elementAt(typeAnchorIdx);
				String typeAnchorURL= typeAnchorTag.getUnquoted("href");
				typeString= 
					JavaDocObject.getObjectNameFromObjectURL(typeAnchorURL,false);
			}
			catch (Exception e) {
				scout.logger.log(getName() + ".getMember - in " + rootObject.className +
					"Couldn't locate complete reference to apparent class valued member <" + 
					decl + ">");
			}

			result.className= typeString;
		} 
		// else it's primitive. in either case we set the type flag of the object
		result.setType(typeCode);		
		// back off numTokens by two
		numModifiers= numTokens - 2;
	}
	for (int i= 0; i < numModifiers; i++) {
		result.setModifier(tokens[i]);
	}

	return result;

}

private void errorCheck(boolean condition, String message) 
throws Exception {
	if (condition) {
		throw new Exception(message);
	}
}

private void errorCheck(boolean condition) 
throws Exception {
	errorCheck(condition,"Unexpected document structure");
}


}


// junkyard

/*

// Given an anchor tag, extract the object referenced
private String extractObjectNameFromURL(Tag aTag) 
throws Exception {
	String out= null;
	errorCheck(!aTag.getIdentifier().equals("a"),
		"Failed to find expected anchor tag. Instead found " +
		aTag.toString());
	return extractObjectNameFromURL(aTag.get("href"));
}

private String extractObjectNameFromURL(String urlString) {
    // dequote the URL if quoted
    if (urlString.startsWith("\"") && urlString.endsWith("\"")) {
		urlString= urlString.substring(1,urlString.length()-2);
    }
    // extract just the full class/interface name from the URL 
    urlString= urlString.substring(urlString.lastIndexOf("/")+1);
    return urlString.substring(0,urlString.indexOf(".html"));
}

*/
