/*
 * dictops.c
 * copyright (c) 2004 Wei-Keat Kong.
 *
 * Functions for manipulating the dictionary.
 *
 * FreeDict: Release memory allocated to dictionary
 * VerifyHashIndex: verify the hash index matches the character.
 * CharToHash: Converts a character into a hash index
 * PrintSlotList: Prints the words in the slotlist
 * FindWords: Find a set of suitable words to insert
 * GetWorkingSet: Find a set of suitable words that lead to possible solutions
 * RandomizeDict: Randomize the dictionary
 * StringToWideT: Convert a ucs string into an array of wide_t characters
 * CompWord: Compare two unicode characters for exact match
*/

#include "dictops.h"

void FreeDict(dict_idx *dict)
{
   int index=0,count=0, bitcount=0, gridcount=0;

   for(index=0;index<GRIDSIZE;index++)
   {
      if (dict[index].numwords > 0)
      {
         for(count=0;count<dict[index].numwords;count++)
         {
            free(dict[index].words[count].string);
         }
      
         for(bitcount=0;bitcount<CHAR_LIMIT;bitcount++)
         { 
            if(mapping[bitcount][0]!=0)
            {
               for(gridcount=0;gridcount<=index;gridcount++)
               {
                  free(dict[index].array[bitcount][gridcount].length);
               }
            }
         }
      }
   }
}

/*
 * Function: PrintDictfromBitSet
 * Purpose: Prints the contents of the dictionary of the given length
 * I/O: In  - dict, length
 *      Out - none.
 * Returns: none.
 * Functions called: PrintSlotList()
*/
void PrintDictfromBitSet(dict_idx *dict, int length)
{
   int i=0,j=0;
   for(i=0;i<length;i++)
   {
      for(j=0;j<CHAR_LIMIT;j++)
      {
         fprintf(stdout, "char=%d pos=%d\n", j, i);
         PrintSlotList(&dict[length-1].array[j][i], length, dict);
         fprintf(stdout, "\n");
      }
   }
}

/*
 * Function: VerifyHashIndex
 * Purpose: verifies that the hash index matches the character being passed
 *          to it.
 * I/O: In  - hash index, character requested
 *      Out - correct hash index.
 * Returns: RC_OK/RC_FAILED
 * Functions called: CompWord()
 * Algorithm: If the hash index matches the character, then return RC_OK.
 *            If not, quadratic search through the hash index to find the
 *            correct index and return RC_OK. If hash is not found,
 *            then return RC_FAILED.
*/
u_short VerifyHashIndex(int *idx, wide_t *unichar)
{
   u_short times=0; // Counter for number of hash conflicts
   int shiftval=1; // quadratic adder

   if(&mapping[*idx][0]==0)
   {
      // fprintf(stderr, "ReturnHashIndex: empty\n");
      return RC_FAILED;
   }

   for(times=0;times<HASH_RETRY;times++)
   {
      if(CompWord(&mapping[*idx][0], unichar)==RC_FAILED)
      {
         *idx = ((*idx)+shiftval) % CHAR_LIMIT;
         shiftval = shiftval << 1;
      }
      else
      {
         return RC_OK;
      }
   }

   fprintf(stderr, "ReturnHashIndex: failed\n");
   return RC_FAILED;
}

/*
 * Function: CharToHash
 * Purpose: Converts a character value into an index to the hash table.
 *          to it.
 * I/O: In  - x, y
 *      Out - none.
 * Returns: hash index
 * Functions called: None.
 * Algorithm: Converts the value of the character in cell x,y using a simple
 *            mod function into a hash index and returning the value.
*/
int CharToHash(int x, int y)
{
   int ucscnt=0; // Counter for number of UCS letters
   int val=0; // hash value

   for(ucscnt=0;ucscnt<UCSLETTER;ucscnt++)
   {
      val = val + grid[x][y].letter[ucscnt];
   }
   val = val % CHAR_LIMIT;

   return val;
}

/*
 * Function: PrintSlotList
 * Purpose: Prints out the words in the slotlist
 * I/O: In  - bitset, length of the word, dictionary
 *      Out - none.
 * Returns: none.
 * Functions called: CreateBitSet(), GetBlockSize(), BitSetExtract()
 * Algorithm: Makes a copy of the bitset, extracts the index of each word 
 *            and prints out the word from the dictionary structure.
*/
void PrintSlotList(BitSet *set, int length, dict_idx *dict)
{
   BitSet *tempset;
   int i, block, idx;

   tempset = CreateBitSet(set->size, 0);
   block = GetBlockSize(set);
   for(i=0;i<=block;i++)
   {
      tempset->length[i] = set->length[i];
   }


   idx = BitSetExtract(tempset);
   while(idx > -1)
   {
      //WriteLog("%c%c%c%c\n", 
      fprintf(stdout, "%c%c%c%c\n",
              dict[length-1].words[idx].string[0],
              dict[length-1].words[idx].string[1],
              dict[length-1].words[idx].string[2],
              dict[length-1].words[idx].string[3]);
      idx = BitSetExtract(tempset);
   }
   fprintf(stdout, "\n");
   free(tempset);
}

// boolean: are there any words that might fill the given slot?
int couldFill(slot_list_t *aSlot) {
	// return(1); // debug
	int charpos=0; // position of the word
	int val=0; // Hash index
	int constraints=0; // At least one character is in the word
	int x = aSlot->h_idx;
	int y = aSlot->w_idx;
	int index;
 
	if (aSlot->bits) FreeBitSet(aSlot->bits);
	aSlot->bits = CreateBitSet(dict[aSlot->length-1].numwords, 1);
	long numblock = GetBlockSize(aSlot->bits);
	for (charpos=0;charpos<aSlot->length;charpos++) {
		val=0;
		if(grid[x][y].exists) {
			constraints++;
			val = CharToHash(x,y);
			/* inlining this code does not make it faster; in fact, it's
			 * slightly slower!  If we inline, we *must* uncomment line below
			register ulong *set1LengthPtr = aSlot->bits->length;
			register ulong *set2LengthPtr =
				dict[aSlot->length-1].array[val][charpos].length;
			for (index=numblock; index>=0; index--) {
				*set1LengthPtr++ &= *set2LengthPtr++;
			}
			*/
			BitSetOP(aSlot->bits, aSlot->bits, &dict[aSlot->length-1].array[val][charpos], AND);
		}
		// Increment position based on direction
		if(aSlot->direction==ACROSS) {
			y++;
		} else {
			x++;
		}
	} // each charpos
	if (constraints == 0) return(1); // no problem filling the slot
	// aSlot->bits->count = -1; // *uncomment* if we inline the code above.
	u_long *ptr = aSlot->bits->length;
	int answer = 0;
	for(index=numblock; index>=0; index -= 1) 
	{
		if (*ptr++) {
			answer = 1;
			break;
		}
	} // each word in block
	// if (answer == 0) {fprintf(stderr, "."); fflush(stderr);}
	if (answer == 0) aSlot->bits->count = 0; // otherwise we don't know
	return (answer);
} // couldFill

/*
 * Function: FindWords
 * Purpose: Finds the bitset of words that fit the current location in the grid
 * I/O: In  - x, y, length of the word, direction, dictionary
 *      Out - none
 * Returns: BitSet
 * Functions called: CreateBitSet(), BitSetOP(), VerifyHashIndex(), CharToHash()
 *                   FreeBitSet(), BitSetCount()
 * Algorithm: For each position in the word, check to see if a character is
 *            already there. If so, add the list of words to the result.
 *            if no characters exist in the word, then load the list from
 *            the dictionary.
 *
*/
BitSet *FindWords(int x, int y, int length, int direction, dict_idx *dict)
{
	int charpos=0; // position of the word
	int val=0; // Hash index
	int exists=0; // At least one character is in the word
	BitSet *resultset; // Bitset to return
 
	resultset = CreateBitSet(dict[length-1].numwords, 1);
	//fprintf(stdout,"%d,%d dir=%d len=%d\n", x, y, direction, length);
	for(charpos=0;charpos<length;charpos++)
	{
		//fprintf(stdout, "pos%d ", charpos);
		val=0;
		if(grid[x][y].exists)
		{
			exists++;
			val = CharToHash(x,y);
			if(VerifyHashIndex(&val, grid[x][y].letter)==RC_OK)
			{
				// AND this word into the resultset. All words has to have
				// this character in this position.
				// SelfBitSetOP(resultset, 
				BitSetOP(resultset, resultset,
							&dict[length-1].array[val][charpos], AND);
			}
			else
			{
				fprintf(stderr, "FindWords: invalid hash value.\n");
				exit(RC_FAILED);
			}
		}
		else
		{
			//fprintf(stderr, "#");
		}
		// Increment position based on direction
		if(direction==ACROSS)
		{
			y++;
		}
		else
		{
			x++;
		}
	}
	if(exists == 0)
	{
		// Since there are no existing words constraining, draw the entire
		// list of words for this position from the dictionary.
		//fprintf(stdout, "No exists\n");
		FreeBitSet(resultset);
		resultset = CreateBitSet(dict[length-1].numwords, 0);
		for(charpos=0;charpos<CHAR_LIMIT;charpos++)
		{
			if(mapping[charpos][0]!=0)
			{
				// SelfBitSetOP(resultset, 
				BitSetOP(resultset, resultset,
							&dict[length-1].array[charpos][0], OR);
			}
		}
	}
	return resultset;
} // FindWords
                  
/*
 * Function: GetWorkingSet
 * Purpose: Narrows down the list of possible words that lead to a solution.
 * I/O: In  - x, y, length of the word, direction, dictionary
 *      Out - none
 * Returns: BitSet
 * Functions called: CreateBitSet(), BitSetOP(), VerifyHashIndex(), CharToHash()
 *                   FreeBitSet(), BitSetCount()
 * Algorithm: FindWords for the current word first, store as current bitset. 
 *            Remove words that are already in use.
 *            For each constraint, FindWords for the index of the constraint
 *            and store it in a tempset.
 *            For each character, compare the tempset with the dictionary
 *            for that character and position. If its in the tempset, then
 *            add words from the dictionary of the current word index 
 *            and position as a possible solution.
 *            check the tempset against the current bitset, taking only
 *            words that are in the both sets. Return this bitset.
*/
void GetWorkingSet(slot_list_t *slotlist, int *wtbl, int currentword,
                        dict_idx *dict)
{
	int i=0; // Counter for removing existing words from the bitset
	BitSet *workset; // Bitset to be returned
	BitSet *tempset; // bitset for containing temporary results for constraints
	BitSet *groupset; // bitset for gathering words from tempset for constraints
	BitSet *resultset; // Final bitset for merging with workset for curr idx
	int numcon=0; // counter for number of constraints
	u_long numbits; // counter for number of bits in groupset
	int idx1, idx2, pos1, pos2, length1, length2; // variable from current word
	slot_list_t *mySlot = &slotlist[wtbl[currentword]];
	 
	// Get the list of words that fit the current word index
	workset = FindWords(mySlot->h_idx, mySlot->w_idx, mySlot->length,
		mySlot->direction, dict);
	// Remove any words that are currently in use in the solution.
	for(i=0;i<currentword;i++)
	{
		if(slotlist[wtbl[i]].length==mySlot->length)
		{
			BitSetFlip(slotlist[wtbl[i]].wordindex, workset, OFF);
		}
	}
	BitSetCount(workset);
	//WriteLog("Num of words before=%ld/%ld\n", num, workset->size);
	//fprintf(stderr, "Num of words before=%ld/%ld\n", num, workset->size);
	//PrintSlotList(workset, mySlot->length, dict);
	if(workset->count > WORKSETTHRESHOLD)
	{
	  // WriteLog("Num of words before=%ld/%ld\n", workset->count, workset->size);
		resultset = CreateBitSet(workset->size, 0);
		for(numcon=0;numcon<mySlot->numconstraint;numcon++)
		{
			idx1 = mySlot->constraint[numcon].idx1;
			pos1 = mySlot->constraint[numcon].pos1;
			idx2 = mySlot->constraint[numcon].idx2;
			pos2 = mySlot->constraint[numcon].pos2;
			length1 = slotlist[idx1].length;
			length2 = slotlist[idx2].length;

			// Get the list of words for each constraint index

			tempset = FindWords(slotlist[idx2].h_idx,
									  slotlist[idx2].w_idx,
									  slotlist[idx2].length,
									  slotlist[idx2].direction,
									  dict);
			groupset = CreateBitSet(tempset->size, 1);
			for(i=0;i<CHAR_LIMIT;i++)
			{
				// If this character can be found in tempset, then add the words
				// that have this character in this position from the current index
				// into the resultset
				BitSetClear(groupset);
				numbits = 0;
				if(mapping[i][0]!=0)
				{
					BitSetOP(groupset, tempset, &dict[length2-1].array[i][pos2],AND);
					BitSetCount(groupset);
					if(groupset->count > 0)
					{
						// SelfBitSetOP(resultset,
						BitSetOP(resultset,resultset,
									&dict[length1-1].array[i][pos1], OR);
					}
				}
			}

			// Merge the workset and resultset, which indicate that these words
			// may lead to a solution
			SelfBitSetOP(workset, resultset, AND);
			BitSetOP(workset, workset, resultset, AND);
			BitSetClear(resultset);
			FreeBitSet(groupset);
			FreeBitSet(tempset);
			workset->count = -1; // not computed yet
		}

		//BitSetCount(workset);
		//WriteLog("Num of words after=%ld/%ld\n", workset->count, workset->size);

		FreeBitSet(resultset);
	} // num > WORKSETTHRESHOLD
	if (mySlot->bits) FreeBitSet(mySlot->bits);
	mySlot->bits = workset;
} // GetWorkingSet

/*
 * Function: RandomizeDict
 * Purpose: randomize the dictionary
 * I/O: In  - dictionary, index to the dictionary
 *      Out - dictionary 
 * Returns: none
 * Functions called: none
 * Algorithm: To be called before initializing the bitsets and generating 
 *            the hash tables.
*/
void RandomizeDict(dict_idx *dict, u_short charindex)
{
   u_short wordindex=0; // number of words to randomize
   u_long i=0; // counter for randomizer
   u_long res=0; // random index variable
   w_list temp; // temp structure

   // validate input
   if(charindex>GRIDSIZE)
   {
      fprintf(stderr, "RandomizeDict: Invalid index %d\n", charindex);
      exit(RC_FAILED);
   }

   wordindex = dict[charindex].numwords;

   // Generate random index. Swap current index with random index.
   for(i=0;i<wordindex-1;i++)
   {
      res = i+(int) ((float)(wordindex-i)*rand()/(RAND_MAX+1.0));

      temp.string = dict[charindex].words[i].string;
      temp.ucslength = dict[charindex].words[i].ucslength;

      dict[charindex].words[i].string = dict[charindex].words[res].string;
      dict[charindex].words[i].ucslength =
         dict[charindex].words[res].ucslength;

      dict[charindex].words[res].string = temp.string;
      dict[charindex].words[res].ucslength = temp.ucslength;
   }
}

/*
 * Function: StringToWideT
 * Purpose: places a string of UCS4 characters into an array of wide_t 
 *          characters so that it can be used in the grid in various 
 *          operations.
 * I/O: In  - string of UCS4 characters, its UCS length, and its actual 
 *            length.
 *      Out - array of UCS4 characters with combining characters.
 * Returns: RC_OK/RC_FAILED
 * Functions called: isCombining()
 * Algorithm: Reads the string, and puts the UCS4 character into the 
 *            return array. If it reads a combining character, add it
 *            to the character before.
*/
u_short StringToWideT(wide_t wide_word[][UCSLETTER], wide_t *string,
                      u_short ucslength, u_short length)
{   
   u_short count=0, index=-1, comb_index=0, count_index=0;
    
   for(count=0;count<ucslength;count++)
   {
      if(isCombining(string[count]))
      {
         if(comb_index > UCSLETTER)
         {
            fprintf(stderr, "StringToWideT: Too many combining characters.\n");
            return RC_FAILED; 
         }
         //fprintf(stderr, " %d-%d ", index, comb_index);
         memcpy(&wide_word[index][comb_index++], &string[count], 
                sizeof(wide_t));
      }
      else
      {
         index++;
         //fprintf(stderr, "%x ", (unsigned int) string[count]);
         //printf("%c", (char) string[count]);
         if(index > length) 
         {
            fprintf(stderr, "StringToWideT: expected EOL.\n");
            return RC_FAILED; 
         }
         for(count_index=1;count_index<UCSLETTER;count_index++)
         {
            wide_word[index][count_index] = 0;
         }
         memcpy(&wide_word[index][0], &string[count], sizeof(wide_t));
         comb_index=1;
      }
   }

   //fprintf(stderr, "\n");

   return RC_OK;
} // StringToWideT

/*
 * Function: CompWord
 * Purpose: compares two unicode characters.
 * I/O: In  - two unicode characters
 *      Out - none
 * Returns: RC_OK/RC_FAILED
 * Functions called: none
 * Algorithm: Compares the unicode array wide_t at a time. 
*/
u_short CompWord(wide_t wide_word1[UCSLETTER], wide_t wide_word2[UCSLETTER])
{  
   int index; // counter for number of UCS letters

   //fprintf(stderr, "%c%c%c%c vs. %c%c%c%c\n",
   //        wide_word1[0], wide_word1[1], wide_word1[2], wide_word1[3],
   //        wide_word2[0], wide_word2[1], wide_word2[2], wide_word2[3]);
   for(index=0;index<UCSLETTER;index++)
   {
      //fprintf(stderr, "%c=%c ", wide_word1[index], wide_word2[index]);
      if(wide_word1[index]!=wide_word2[index])
      { 
         //fprintf(stderr, "miscomp ");
         return RC_FAILED;
      }
   }  

   return RC_OK;
}
