/*
* Devulapalli Praveen
* praveen@ccs.uky.edu
*
* Hashlookup.java:
*    This class is based on the lookup in the spelling checker program.
*    It uses multiple hash functions and a bitmap to look up for a string
*    - it is found only if all hash functions return integers whose corres-
*    ponding bits are set. The string as such is not stored.
*    For eg. if hi(string) = xi, hj(string) = xj, .... hk(string) = xk,
*    the bits[xi....xk] are set in the bitmap, where hi...hk are the hash
*    functions and xi...xk are the hashcodes (integers) to which the string
*    hashes to for the corresponding hash functions.
*
*                 store(str)
*                     |   
*                     |        found
*                 lookup(str) -------> ignore str
*                     |   \
*            not found|    \   
*                     |     \ ______________________
*                  set bits  |                      |
*                     |      |hash1(), ...hashi()   |
*                      ----->| ... i hash functions |
*                            |______________________|
*/

package Scout;

import java.util.BitSet; // To include the BitSet Java utility class

import java.io.Serializable;

public class Hashlookup implements Serializable {
  int BITMAPSIZE; // Size of the Bitmp
  BitSet  bitMap; // bitmap to store bits corresponding to hashcodes
  
  //Constructor: Hashlookup(string)
  Hashlookup(int size) {
    BITMAPSIZE = size;
    bitMap = new BitSet(BITMAPSIZE);
  }
  
  // Define some hash functions:
  int hash1(String str) {
    int      length = str.length();	/* Length of the string	        */
    int      HashValue = 0; 	        /* hash value of the string	*/
    int      Shift= 0;                  /* Amount of shift to be given  */
    /* to a byte                    */
    /* The string is viewed as an array of bytes and all the bytes
    * are successively added to get the hash value of the string.
    * Before adding each byte, it is shifed left by certain amount.
    */
    int sizeofInt = 4; // # bytes for storing an integer = 4, fixed in Java
    for(int i = 0; i < length ; i++){
      /* shift and cast it to ulong so that we get a non-ve value */
      int radix = 10;
      HashValue += ((int) str.charAt(i)) << Shift;
      Shift += 8;
      if(Shift == (sizeofInt * 8))
        Shift = 0;
    }
    HashValue %= BITMAPSIZE;
    //System.out.println("hash1(" + str + "): hashval = " +
    //	       (HashValue > 0 ? HashValue : -HashValue));
    return (HashValue > 0 ? HashValue : -HashValue);
  } // hash1(str)
  
  // hash2(str) - this one is from the K&R C Programming Language book 
  int hash2(String str) {
    int hashval = 0;
    int length = str.length();
    
    for(int i = 0; i < length; i++) {
      int radix = 10;
      int charVal = (int) str.charAt(i);
      hashval = charVal + 31 *hashval;
    }
    hashval %= BITMAPSIZE; // make sure it's within the size of the bitmap
    
    //System.out.println("hash2(" + str + "): hashval = " +
    //                           (hashval > 0 ? hashval : -hashval));
    return (hashval > 0 ? hashval : -hashval); // make sure it's non-negative
  }// hash2(str)
  
  boolean lookup(String str) {
    // If we have already seen  this string before, the corresponding bits
    // the above hash functions hash to would have already been set.
    if(bitMap.get(hash1(str)))  // if get returns true
      if(bitMap.get(hash2(str))) // -- do --
      return true;
    return false; // atleast one of the hash functions returned an integer
    // (hashcode) with the corresponding bit not set in the bitmap
    
  } // lookup(str)
  
  boolean store(String str) {
    if(lookup(str))  return false;
    // If we are seeing this string for the first time, we shall set the
    // bits corresponding to hashcodes of all of the above hash functions.
    bitMap.set(hash1(str));
    bitMap.set(hash2(str));   
    return true;
  } // store(str)
  
  public static void main(String[] args) {
    
    if(args.length == 0) {
      System.out.println("Hash: usage: % java Hashlookup  <string1> " +
        " [string2] [string3] ....");
      System.exit(0);
    } 
    
    Hashlookup hashTest = new Hashlookup(4000000); // 4mi bits = bitmap size
    for(int i = 0; i < args.length; i++)
      System.out.println( "Hashlookup: " + (hashTest.store(args[i]) ?
      ("Added: ") : ("Already stored (ignoring it): ")) + args[i]);
  }
  
} // class Hashlookup
