/*
is_alike()     Determins whether two character strings are
               sufficiently equal using the 'Levenstein distance'
               algorithm.

Limitation:    For memory space conservation and execution speed
               this implementation compares the first 20 
               characters of both string only. See l_distance.c()
               listing.
               '#define COMP_LEN' may be changed to decrease or
               increase the number of characters compared.

Returns:       The constant ACCEPTED or REJECTED.

               De-commenting the following '#define DEBUG' will
               create a stand-alone program, which enables you
               to experiment with different values for the 
               constants 'addition', 'change' and 'deletion'.
*/

/* #define DEBUG */

#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <ctype.h>

#ifdef DEBUG
#include <stdio.h>
#endif

int     addition = 1;    /* change constants in this block */
int     change   = 2;    /* of four lines if needed.       */
int     deletion = 3;

#define COMP_LEN         20
#define TU(x)            toupper(x)
#define ARR_SIZE         COMP_LEN + 1
#define SMALLEST_OF(x,y,z)  ( (x<y) ? min(x,z) : min(y,z) )
#define ZERO_IF_EQUAL(x,y)  (TU(requested[x-1])==TU(found[y-1]) ? 0 : change)
#define ACCEPTED         1
#define REJECTED         0

int     distance[ARR_SIZE][ARR_SIZE];

int is_alike(char *requested, char *found)
{
   register int i,j;
   int r_len, f_len, threshold;

   /* first test: compare first character of both strings */
   if (toupper(*requested) != toupper(*found))   {
      #ifdef DEBUG
         printf("\nRejected due to difference in first letters\n");
      #endif
      return(REJECTED);            /* different first characters */
   }

   r_len = (strlen(requested)>COMP_LEN ? COMP_LEN : strlen(requested));
   f_len = (strlen(found)>COMP_LEN ? COMP_LEN : strlen(found));

   /* second test: comparing string length */
   threshold = (int)  floor( (double) 1 + ((r_len + 2) / 4));
   if ( abs(r_len - f_len) > threshold) {
      #ifdef DEBUG
         printf("\nRejected due to large length difference\n");
      #endif
      return(REJECTED);         /* length difference too big */
   }

   distance[0][0] = 0;
   for (j = 1; j <= ARR_SIZE ; j++)
      distance[0][j] = distance[0][j-1] + addition;
   for (j = 1; j <= ARR_SIZE ; j++)
      distance[j][0] = distance[j-1][0] + deletion;

   for (i = 1; i <= r_len; i++)
      for (j = 1; j <= f_len; j++)
         distance[i][j] = SMALLEST_OF(
                          (distance[i-1][j-1] + ZERO_IF_EQUAL(i,j)),
                          (distance[i][j-1]   + addition),
                          (distance[i-1][j]   + deletion)  );
#ifdef DEBUG
   printf("\nis_alike(): threshold %d\n\n",threshold);
   for (i=1; i <= r_len; i++) {
      for (j=1; j <= f_len; j++)
         printf("  %3d",distance[i][j]);
      printf("\n");
   }
   printf("\nis_alike(): l_distance is %d\n",distance[r_len][f_len]);
#endif

   /* third test: Levenstein distance within threshold limit ? */

   if ( distance[r_len][f_len] <= threshold )
      return(ACCEPTED);                /* distance within limits */
   else
      return(REJECTED);                /* distance too big */
}

#ifdef DEBUG
void usage()
{
   printf("\nUsage:  IA requested found\n");
   printf("\n        requested & found are the two strings to");
   printf("\n        be compared with the is_alike() function");
   printf("\n        containing the Levenstein distance algorithm.\n\n");
}

int main(int argc, char *argv[])
{
   if (argc != 3) {
      usage();
      exit(1);
   } else {
      printf("\nMain(): Comparing '%s' and '%s':\n", argv[1], argv[2]);
      if (is_alike(argv[1], argv[2]))
         printf("\nMain(): Strings sufficiently alike...\n\n");
      else
         printf("\nMain(): Strings differing too much...\n\n");
   }
   return(0);
}
#endif
