/*

RuleHash.java
A.L. Borchers, 1997 November
University of Kentucky Department of Computer Science

Class provides a data structure for the Rule configuration data
that is used to initialize and control data extraction rules 
derived from class Rule (including the provided RegExp rule.)

Also provides static methods for loading a vector or RuleHash
objects from a vector of Tag objects describing rules and from
an InputStream bearing a Scout template.

See http://www.cs.engr.uky.edu/~borchers/SCOUT/TemplateReference.html
for the structure of valid Scout templates.

*/

package Scout;

import SGMLKit.*;

import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;

public class RuleHash extends Hashtable {

  public static final String[] requiredKeys= {
    "name", "type", "value"
  };

  /*
  -----------------------------------------------------------------------------
  TEMPLATE SYNTAX REFERENCE
  -----------------------------------------------------------------------------
  <SCOUT TSTART>
  (<SCOUT
  NAME=(Identifier) 
  TYPE=(Data|Command)
  RULE=(ClassURL)
  VALUE=(Content|Null)
  PARSE=(String|Integer|Float)
  (ARGNAME=ARGVALUE)*
  >)+
  <SCOUT TEND>
  -----------------------------------------------------------------------------
  */

  RuleHash(Tag ruleTag) 
    throws RuleFormatException {
      // the identifier for any scout rule tag is SCOUT so we needn't
      // store it, but we do verify it's presence
      if (!ruleTag.getIdentifier().equals("scout")) {
        throw new RuleFormatException("Required identifier SCOUT missing in tag " + ruleTag.toString());
      }
      Enumeration elements= ruleTag.keys();
      while (elements.hasMoreElements()) {
	      String key= (String)elements.nextElement();
	      // check that key references a string, else set to true
	      Object tmp= ruleTag.get(key); 
	      String val= tmp instanceof String ? (String)tmp : "true";
	      // if val is quoted, remove the quote chars
	      boolean quoted= val.startsWith("\"") || val.startsWith("'");
        if (quoted && val.length() == 2) {
	        // empty string argument case
	        val= "";
        }
        else if (quoted) {
	        // quoted non-empty string
	        val= val.substring(1,val.length()-1);
        }
	      put(key,val);
      }
      int verifyKey= verifyRequiredKeys();
      if (verifyKey < requiredKeys.length) {
        throw new RuleFormatException("Required key " + requiredKeys[verifyKey] +
				      " missing in rule tag " + ruleTag.toString());
      }
  }

  // This should be used with caution as no integrity checking is done on the input
  protected RuleHash(String name, String type, String value, String parse, 
		     String rule, String[] argNames, String[] argValues) {
      put("name",name);
      put("type",type);
      put("value",value);
      put("parse",parse);
      put("rule",rule);
      for (int i= 0; i < argNames.length; i++) {
	      put(argNames[i],argValues[i]);
      }
  }

  // override super.put for strings only
  void put(String key, String val) {
    // store an empty string if attribute has no value assigned
    if (val == null) {
      val="";
    }
    super.put(key,val);
  }
  
  // override super.get for strings only
  String get(String key) {
    return (String)super.get(key);
  }

  // verify that all required keys are present
  // if a verify fails, returns the index of the key that failed
  // if all keys pass, returns the length of the requiredKeys array
  private int verifyRequiredKeys() {
    int i= 0;
    while (i < requiredKeys.length) {
      if (get(requiredKeys[i]) == null) break;
      i++;
    }
    return i;
  }

  // Load rules from a vector of tags
  public static Vector loadRules(Vector tags) 
    throws RuleFormatException {
      Vector out= new Vector();
      for (int i= 0; i < tags.size(); i++) {
        out.addElement(new RuleHash((Tag)tags.elementAt(i)));
      }
      return out;
  }

  // Load rules from an array of tags returning a vector
  public static Vector loadRules(Tag[] tags)
    throws RuleFormatException {
      Vector out= new Vector();
      for (int i= 0; i < tags.length; i++) {
	      out.addElement(new RuleHash(tags[i]));
      }
      return out;
  }

  // Load rules from an HTML file
  public static Vector loadRules(String fileName)
    throws InvalidSGMLException, FileNotFoundException, IOException, RuleFormatException {
      // load tags from file
      Vector tags= Scout.splitter.getTags(new FileInputStream(fileName));
      return loadRules(tags);
  }

}
