Logo Search packages:      
Sourcecode: c-cpp-reference version File versions  Download package

approx.c

/***************************************************************
 *
 * Fuzzy string searching subroutines
 *
 * Author:    John Rex
 * Date:      August, 1988
 * References: (1) Computer Algorithms, by Sara Baase
 *                 Addison-Wesley, 1988, pp 242-4.
 *             (2) Hall PAV, Dowling GR: "Approximate string matching",
 *                 ACM Computing Surveys, 12:381-402, 1980.
 *
 * Verified on:
 *    Datalite, DeSmet, Ecosoft, Lattice, MetaWare, MSC, Turbo, Watcom
 *
 * Compile time preprocessor switches:
 *    DEBUG - if defined, include test driver
 *
 * Usage:
 *
 *    char *pattern, *text;  - search for pattern in text
 *    int degree;      - degree of allowed mismatch
 *    char *start, *end;
 *    int howclose;
 *
 *    void App_init(pattern, text, degree);   - setup routine
 *    void App_next(&start, &end, &howclose); - find next match
 *
 *    - searching is done when App_next() returns start==NULL
 *
 **************************************************************/

#define DEBUG 1

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* local, static data */

static char *Text, *Pattern; /* pointers to search strings       */
static int Textloc;          /* current search position in Text  */
static int Plen;             /* length of Pattern                */
static int Degree;           /* max degree of allowed mismatch   */
static int *Ldiff, *Rdiff;   /* dynamic difference arrays        */
static int *Loff,  *Roff;    /* used to calculate start of match */

void App_init(char *pattern, char *text, int degree)
{
      int i;

      /* save parameters */

      Text = text;
      Pattern = pattern;
      Degree = degree;

      /* initialize */

      Plen = strlen(pattern);
      Ldiff  = (int *) malloc(sizeof(int) * (Plen + 1) * 4);
      Rdiff  = Ldiff + Plen + 1;
      Loff   = Rdiff + Plen + 1;
      Roff   = Loff +  Plen + 1;
      for (i = 0; i <= Plen; i++)
      {
            Rdiff[i] = i;   /* initial values for right-hand column */
            Roff[i]  = 1;
      }

      Textloc = -1; /* current offset into Text */
}

void App_next(char **start, char **end, int *howclose)
{
      int *temp, a, b, c, i;

      *start = NULL;
      while (*start == NULL)  /* start computing columns */
      {
            if (Text[++Textloc] == '\0') /* out of text to search! */
                  break;

            temp = Rdiff;   /* move right-hand column to left ... */
            Rdiff = Ldiff;  /* ... so that we can compute new ... */
            Ldiff = temp;   /* ... right-hand column */
            Rdiff[0] = 0;   /* top (boundary) row */

            temp = Roff;    /* and swap offset arrays, too */
            Roff = Loff;
            Loff = temp;
            Roff[1] = 0;

            for (i = 0; i < Plen; i++)    /* run through pattern */
            {
                  /* compute a, b, & c as the three adjacent cells ... */

                  if (Pattern[i] == Text[Textloc])
                        a = Ldiff[i];
                  else  a = Ldiff[i] + 1;
                  b = Ldiff[i+1] + 1;
                  c = Rdiff[i] + 1;

                  /* ... now pick minimum ... */

                  if (b < a)
                        a = b;
                  if (c < a)
                        a = c;

                  /* ... and store */

                  Rdiff[i+1] = a;
            }

            /* now update offset array */
            /* the values in the offset arrays are added to the
               current location to determine the beginning of the
               mismatched substring. (see text for details) */

            if (Plen > 1) for (i=2; i<=Plen; i++)
            {
                  if (Ldiff[i-1] < Rdiff[i])
                        Roff[i] = Loff[i-1] - 1;
                  else if (Rdiff[i-1] < Rdiff[i])
                        Roff[i] = Roff[i-1];
                  else if (Ldiff[i] < Rdiff[i])
                        Roff[i] = Loff[i] - 1;
                  else /* Ldiff[i-1] == Rdiff[i] */
                        Roff[i] = Loff[i-1] - 1;
            }

            /* now, do we have an approximate match? */

            if (Rdiff[Plen] <= Degree)    /* indeed so! */
            {
                  *end = Text + Textloc;
                  *start = *end + Roff[Plen];
                  *howclose = Rdiff[Plen];
            }
      }

      if (start == NULL) /* all done - free dynamic arrays */
            free(Ldiff);
}

#ifdef DEBUG

void main(int argc, char **argv)
{
      char *begin, *end;
      int howclose;

      if (argc != 4)
      {
            puts("Usage is: approx pattern text degree\n");
            exit(0);
      }

      App_init(argv[1], argv[2], atoi(argv[3]));
      App_next(&begin, &end, &howclose);
      while (begin != NULL)
      {
            printf("Degree %d: %.*s\n", howclose, end-begin+1, begin);
            App_next(&begin, &end, &howclose);
      }
}

#endif

Generated by  Doxygen 1.6.0   Back to index