/*
 * dict.c
 * copyright (c) 2004 Wei-Keat Kong.
 *
 * Functions for loading a dictionary from file.
 *
 * FillHashTable: sets up hash table of dictionary
 * InsertWord: Takes a string and inserts it into the global structure
 * load_dictCMD: Tk interface to load the dictionary
 *
*/

#include <string.h>
#include <stdio.h>
#include "dict.h"

/*
 * Function: FillHashTable
 * Purpose: maps a character to an index in an array to use in a BitSet. 
 *          This is needed for Unicode characters with combining characters.
 * I/O: In  - index to the dictionary of the appropriate length, length of
 *            actual number of spaces for the word, actual length of the
 *            string, the pointer to the string, the index of the string
 *            in the dictionary.
 *      Out - dictionary with updated bitsets.
 * Returns: RC_OK/RC_FAILED
 * Functions called: WriteLog(), StringToWideT(), CompWord(), BitSetFlip()
 * Algorithm: The mapping array is a simple mod hash to an array of
 *            CHAR_LIMIT characters. If an entry is occupied, it searches
 *            quadratically for HASH_RETRY times before reporting a failure
 *            to map the character.
*/
u_short FillHashTable(dict_idx *dict, u_long length, u_long ucslength, 
                  wide_t *string, u_long index)
{
   int j=0; // Counter for length
   wide_t wide_word[ucslength][UCSLETTER]; // variable to compare to
   int times=0; // counter for hash conflicts
   int idx=0; // index to the hash table
   int shiftval=1; // hash table offset
   int charfound=0; // Is character found?

   // Initialize wide_word.

   /*
   int len=0,ucscnt=0; // Counters for initialization of wide_word;
   for(len=0;len<ucslength;len++)
   {
      for(ucscnt=0;ucscnt<UCSLETTER;ucscnt++)
      {
         wide_word[len][ucscnt] = 0;
      }
   }
   */
   memset(wide_word, 0, sizeof(wide_t)*ucslength*UCSLETTER);

   // Convert string into wide_word.

   if (StringToWideT(wide_word, string, ucslength, length)==RC_FAILED)
   {
      WriteLog("FillHash: Failed to convert string to wide_t.\n");
	  int index;
	  fprintf(stderr, "The bad string of length %ld: [", length);
	  for (index = 0; index < length; index += 1) {
	  	fprintf(stderr, "%04x ", string[index]);
	  }
	  fprintf(stderr, "]\n");
      return RC_FAILED;
   }

   // For each position of the word, map the character and update
   // the dictionary's bitset.

   for(j=0;j<length;j++)
   {
      idx = (wide_word[j][0] + wide_word[j][1] + 
             wide_word[j][2] + wide_word[j][3]) % CHAR_LIMIT;

      //fprintf(stderr, "idx =%d %x %x %x %x ", idx, wide_word[j][0],
      //        wide_word[j][1], wide_word[j][2], wide_word[j][3]);
      //fprintf(stderr, "mapping = %c\n", mapping[val][0]);
  
      charfound = 0;
      shiftval = 1;
      for(times=0;times<HASH_RETRY;times++)
      {
         if(mapping[idx][0]==0)
         {
            // Found empty slot. Fill it with wide_word.
            charfound = 1;
            mapping[idx][0] = wide_word[j][0];
            mapping[idx][1] = wide_word[j][1];
            mapping[idx][2] = wide_word[j][2];
            mapping[idx][3] = wide_word[j][3];

            // fprintf(stderr, "alt %x %x %x %x new letter@%d\n", 
            //         mapping[idx][0], mapping[idx][1],
            //         mapping[idx][2], mapping[idx][3], idx);

            break;
         }
         else
         {
            // Found a character. Matching to see if its the same.

            //fprintf(stderr, "%d comparing %x %x %x %x=%x %x %x %x?\n", 
            //        idx,
            //        mapping[idx][0], mapping[idx][1], mapping[idx][2],
            //        mapping[idx][3], wide_word[j][0], wide_word[j][1],
            //        wide_word[j][2], wide_word[j][3]);

            if(CompWord(&mapping[idx][0], wide_word[j])==RC_OK)
            {
               // Found matching character, breaking out of the loop.
               charfound = 1;
               break;
            }
         } // End If
         idx = (idx + shiftval) % CHAR_LIMIT;
         shiftval = shiftval << 1;
      } // End For

      // Check to see if we found a place for the character. 
      if(charfound==0)
      {
         WriteLog("FillHash: unable to map char to buffer.\n");
         return RC_FAILED;
      }

      // Turn bit on for character idx in position j in dictionary.
      BitSetFlip(index, (BitSet *) &dict->array[idx][j], ON);
   }

   return RC_OK;
} // FillHashTable

/*
 * Function: InsertWord
 * Purpose: takes the buffer, converts it to a string in UCS-4 format.
 *          Calculate its actual length, add that string into the dictionary.
 * I/O: In  - dictionary, buffer containing utf-8 string, 
 *            length of the utf-8 string.
 *      Out - dictionary.
 * Returns: RC_OK/RC_FAILED
 * Functions called: isCombining(), utf8_wide()
*/
u_short InsertWord(dict_idx *dict, utf8_t *pbuffer, int len)
{
   int i;  // Counter for the number of spaces the word will take.
   int wordindex=0; // where the next word should be put in the dict.
   int charindex=-1; // how many actual characters in the word.
   int num_char=0; // number of utf8 characters returned 
   wide_t *outputstring; // string containing the word, used until exit. 
   utf8_t *sourcestring; // temporary storage
 
   // Ignore single characters and newlines

   if(len==1&&(*pbuffer==10)) return RC_OK;

   if((sourcestring = calloc(len+1, sizeof(utf8_t)))==NULL)
   {
      fprintf(stderr, "InsertWord: Cannot allocate memory for sourcestring\n");
      exit(RC_FAILED);
   }
   if((outputstring = calloc(len+1, sizeof(wide_t)))==NULL)
   {
      fprintf(stderr, "InsertWord: Cannot allocate memory for outputstring\n");
      free(sourcestring);
      exit(RC_FAILED);
   }

   // Copy buffer into utf8_t string.

   memcpy(sourcestring, pbuffer, len);

   // Convert utf8_t to wide_t

   num_char = utf8_wide(outputstring, sourcestring, len);
   if(num_char==0)
   {
      fprintf(stderr, "InsertWord: utf8_wide returned 0.\n");
      free(sourcestring);
      free(outputstring);
      return RC_FAILED;
   }

   // Count how many spaces the word will take.

   for(i=0;i<num_char;i++)
   {
      if(!isCombining(outputstring[i]))
      {
         charindex++;
      }
   }

   // return an error if the index is invalid.

   if(charindex == -1)
   {
      free(sourcestring);
      free(outputstring);
      return RC_FAILED;
   }

   // if the puzzle doesn't have any words that need this length or 
   // string exceeds the grid size, then return, since we won't
   // need it.

   if((lengths[charindex]==0)||(charindex>=GRIDSIZE))
   {
      free(sourcestring);
      free(outputstring);
      return RC_OK;
   }

   // Update the dictionary.

   wordindex = dict[charindex].numwords;
   dict[charindex].words[wordindex].ucslength = num_char;
   dict[charindex].words[wordindex].string = outputstring;
   dict[charindex].words[wordindex].isTaken = 0;
   dict[charindex].numwords++;

   //for(i=0;i<charindex+1;i++)
   //{
   //   fprintf(stderr, "%c", outputstring[i]);
   //}
   //fprintf(stderr, "\n");

   // free sourcestring but never free outputstring while program is running

   free(sourcestring);

   return RC_OK;
}

/*
 * Function: load_dictCMD
 * Purpose: open a file, read its contents and initialize the global
 *          structure dict.
 * I/O: In  - Standard Tk arguments
 *      Out - Standard Tk return codes
 * Returns: TCL_OK/TCL_ERROR
 * Functions called: WriteLog(), FillHashTable(), RandomizeDict(), InsertWord()
 * Algorithm: Read the dictionary into global structure dict.
*/
int load_dictCMD(ClientData clientdata, Tcl_Interp *interp, int argc,
                 char *argv[])
{
   FILE *fp=NULL; // file pointer
   char *pbuffer; // buffer for storing strings
   char *filename;  // string containing file name
   int i; // counter for initializing dictionary
   int letterpos=0, count=0; // counters for initializing bitset
   int dict_idx=0; // Counter for dictionary index
   int block=0; // block size for bitset alloction

   // first argument is the filename

   filename = argv[1]; 

   // fprintf(stderr, "loading dictionary\n"); 
   if((pbuffer = calloc(1, MAXSTRINGSIZE))==NULL)
   {
      WriteLog("load_dictCMD: Failed to allocate buffer.\n");
      return TCL_ERROR;
   }

   if((fp = fopen(filename, "r"))==NULL)
   {
      WriteLog("load_dictCMD: Failed to open %s.\n", filename);
      free(pbuffer);
      return TCL_ERROR;
   }

   for(i=0;i<GRIDSIZE;i++)
   {
      dict[i].numwords = 0;
   }

   // Get a string from the file, insert it into the dictionary.

   while(fgets(pbuffer, MAXSTRINGSIZE, fp)!=NULL)
   {  
      if(InsertWord(dict, pbuffer, strlen(pbuffer))==RC_FAILED)
      {
         WriteLog("load_dictCMD: Error while inserting.\n");
         free(pbuffer);
         return TCL_ERROR;
      }
      memset(pbuffer, 0, MAXSTRINGSIZE);
   }

   // Check to make sure we have enough words to fill out the grid.

   for(dict_idx=0;dict_idx<GRIDSIZE;dict_idx++)
   {
      //fprintf(stderr, "length %d = %ld words\n", dict_idx+1, 
      //        dict[dict_idx].numwords);

      if(lengths[dict_idx]==0)
      {
         continue;
      }

      if(lengths[dict_idx] > dict[dict_idx].numwords)
      {
         fprintf(stderr, 
                 "load_dictCMD: Not enough words of length %d to complete grid. %d\n",
                 lengths[dict_idx],dict_idx+1);
         exit(RC_FAILED);
      }

      // Randomize dictionary
	  srand(1); // for reproducibility

      if(dict[dict_idx].numwords > 0)
      {
         RandomizeDict(dict, dict_idx);
         block = ((dict[dict_idx].numwords)-1)/NUMLONGBITS;
      }

      // Initialize bitsets

      if(lengths[dict_idx]>0)
      {
         for(count=0;count<CHAR_LIMIT;count++)
         {
            for(letterpos=0;letterpos<=dict_idx;letterpos++)
            {
               dict[dict_idx].array[count][letterpos].size = 
                  dict[dict_idx].numwords;
               dict[dict_idx].array[count][letterpos].length = 
                  calloc(block+1, sizeof(u_long));
               if(dict[dict_idx].array[count][letterpos].length==NULL)
               {
                  fprintf(stderr, "Failed to allocate bitset"); 
                  exit(RC_FAILED);
               }  
            }
         }
      }

      // Fill hashtable
	  if (dict[dict_idx].numwords >= DICTMAX) {
	  	fprintf(stderr, "There are %ld words, but DICTMAX is only %d; " 
			"please raise this limit.\n", dict[dict_idx].numwords, DICTMAX);
		exit(1);
	  }
      for(count=0;count<dict[dict_idx].numwords;count++)
      {
         // dictionary, length of the word, ucslength, string, dict index
         if((FillHashTable(&dict[dict_idx], dict_idx+1, 
                           dict[dict_idx].words[count].ucslength, 
                           dict[dict_idx].words[count].string,
                           count)==RC_FAILED))
         {
            WriteLog("load_dictCMD: Error while filling hash.\n");
            fprintf(stderr,
                    "Problem with hash table. Try increasing CHAR_LIMIT.\n");
            return RC_FAILED;
         }
      }

/*
      for(count=0;count<CHAR_LIMIT;count++)
      {
         if(mapping[count][0]==0)
         {
            for(letterpos=0;letterpos<=dict_idx;letterpos++)
            {
               dict[dict_idx].array[count][letterpos].size = 0; 
               if(dict[dict_idx].array[count][letterpos].length!=NULL)  
               {
                  free(dict[dict_idx].array[count][letterpos].length);  
               }
            }
         }
      }
*/
/*
      for(count=0;count<CHAR_LIMIT;count++)
      {
         if(mapping[count][0]!=0)
         {
            for(letterpos=0;letterpos<=dict_idx;letterpos++)
               printf("c=%d ", dict[dict_idx].array[count][letterpos].size);
         }
      }
*/
   }
    
   free(pbuffer);

   // fprintf(stderr, "done loading dictionary\n"); 
   return TCL_OK;
}

int find_dirCMD(ClientData clientdata, Tcl_Interp *interp, int argc,
                 char *argv[])
{
   FILE *fp=NULL; // file pointer
   char *pbuffer; // buffer for storing strings
   char *filename;  // string containing file name
   int len=0;
   int num_char=0; // number of utf8 characters returned 
   wide_t *outputstring; // string containing the word
   utf8_t *sourcestring; // temporary storage

   // first argument is the filename

   filename = argv[1]; 

   if((pbuffer = calloc(1, MAXSTRINGSIZE))==NULL)
   {
      WriteLog("find_dirCMD: Failed to allocate buffer.\n");
      return TCL_ERROR;
   }

   if((fp = fopen(filename, "r"))==NULL)
   {
      WriteLog("find_dirCMD: Failed to open %s.\n", filename);
      free(pbuffer);
      return TCL_ERROR;
   }

   if(fgets(pbuffer, MAXSTRINGSIZE, fp)!=NULL)
   {
      len = strlen(pbuffer);
      if((sourcestring = calloc(len, sizeof(utf8_t)))==NULL)
      {
         fprintf(stderr, "find_dirCMD: Cannot malloc for sourcestring\n");
         exit(RC_FAILED);
      }
      if((outputstring = calloc(len, sizeof(wide_t)))==NULL)
      {
         fprintf(stderr, "find_dirCMD: Cannot malloc for outputstring\n");
         free(sourcestring);
         exit(RC_FAILED);
      }

      // Copy buffer into utf8_t string.
      memcpy(sourcestring, pbuffer, len);

      // Convert utf8_t to wide_t
	  num_char = utf8_wide(outputstring, sourcestring, len);
      if(num_char==0)
      {
         fprintf(stderr, "find_dirCMD: utf8_wide returned 0.\n");
         free(sourcestring);
         free(outputstring);
         return RC_FAILED;
      }

      if(!isRightDirection(outputstring[0]))
      {
         RIGHTDIRECTION = 0;
         //fprintf(stderr, "Left %x\n", outputstring[0]);
      }
      else
      {
         RIGHTDIRECTION = 1;
         //fprintf(stderr, "Right %x\n", outputstring[0]);
      }
      
      free(outputstring);
      free(sourcestring);
   }

   return TCL_OK;
}
