/*
 * Indexer.java
 * A.L. Borchers, 1988
 *
 * Scout Rule for weighted keyword indexing of URLs
 * 
 */

package Scout;


import java.net.URL;
import java.net.MalformedURLException;

import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;

public class Indexer extends Rule {

  Rule wordCounter= null;

  Hashtable index= new Hashtable();

  public Indexer(Scout scout, RuleHash rh) 
    throws RuleFormatException {
      super(scout,rh);
      // Get the require field indicating the WordCounterRule
      if (attr.get("require") == null) {
	throw new RuleFormatException("Required element REQUIRE not present in Rule initialization");
      }
      // Verify that REQUIRE field indicates a WordCounterRule
      wordCounter= scout.ruleLookup(attr.get("require"));
      if (wordCounter == null || !(wordCounter instanceof WordCounterRule)) {
	throw new RuleFormatException("Initialization Element REQUIRE does not refer to a WordCounterRule");
      }
  }

  public synchronized void processDoc() {
    super.processDoc();
    // Get the output for this document from the wordCounter
    Vector words= scout.ruleResults.get(this,wordCounter.getName(),sequenceNumber);
    // for each word in the result reported by the word counter, add an IndexerRecord
    // for this document's URL w/ weight equal to the word count
    for (int i= 0; i < words.size(); i++) {
      WordCounterRuleWord word= (WordCounterRuleWord)words.elementAt(i);
      Vector v= (Vector)index.get(word.word);
      if (v == null) {
	// first occurence of word
	v= new Vector();
	index.put(word.word,v);
      }
      URL url= null;
      try {
	url= new URL(doc.getURL().toString());
      }
      catch (MalformedURLException mfue) {
	// never happen
      }
      v.addElement(new IndexerRecord(url,word.count));
    }
    Vector out= new Vector();
    out.addElement(exportIndex());
    scout.ruleResults.put(this,sequenceNumber,out);
  }

  public String exportIndex() {
    Enumeration keys= index.keys();
    String out= "Indexer:\n";
    while (keys.hasMoreElements()) {
      String nextKey= (String)keys.nextElement();
      Vector v= (Vector)index.get(nextKey);
      out+= "Word: " + nextKey + ", References: " + v.size() + "\n"; 
      for (int i= 0; i < v.size(); i++) {
	out+= ((IndexerRecord)v.elementAt(i)).toString() + "\n";
      }
    }
    return out;
  }


}
