deb-goldedplus/goldlib/myspell/affixmgr.cxx

#include "license.readme"

#include <cstdlib>
#include <cstring>
#include <cstdio>

#include "affixmgr.hxx"
#include "affentry.hxx"

#if !defined(_MSC_VER)
using namespace std;
#endif


// First some base level utility routines
extern void   mychomp(char * s);
extern char * mystrdup(const char * s);
extern char * myrevstrdup(const char * s);
extern char * mystrsep(char ** sptr, const char delim);
extern int    isSubset(const char * s1, const char * s2);


AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
{
  // register hash manager and load affix data from aff file
  pHMgr = ptr;
  trystring = NULL;
  encoding=NULL;
  reptable = NULL;
  numrep = 0;
  maptable = NULL;
  nummap = 0;
  compound=NULL;
  nosplitsugs= (0==1);

  cpdmin = 3;  // default value
  for (int i=0; i < SETSIZE; i++) {
     pStart[i] = NULL;
     sStart[i] = NULL;
     pFlag[i] = NULL;
     sFlag[i] = NULL;
  }
  if (parse_file(affpath)) {
     fprintf(stderr,"Failure loading aff file %s\n",affpath);
     fflush(stderr);
  }
}


AffixMgr::~AffixMgr()
{

  // pass through linked prefix entries and clean up
  for (int i=0; i < SETSIZE ;i++) {
       pFlag[i] = NULL;
       PfxEntry * ptr = (PfxEntry *)pStart[i];
       PfxEntry * nptr = NULL;
       while (ptr) {
            nptr = ptr->getNext();
            delete(ptr);
            ptr = nptr;
            nptr = NULL;
       }
  }

  // pass through linked suffix entries and clean up
  for (int j=0; j < SETSIZE ; j++) {
       sFlag[j] = NULL;
       SfxEntry * ptr = (SfxEntry *)sStart[j];
       SfxEntry * nptr = NULL;
       while (ptr) {
            nptr = ptr->getNext();
            delete(ptr);
            ptr = nptr;
            nptr = NULL;
       }
  }

  if (trystring) free(trystring);
  trystring=NULL;
  if (encoding) free(encoding);
  encoding=NULL;
  if (maptable) {
     for (int j=0; j < nummap; j++) {
        free(maptable[j].set);
        maptable[j].set = NULL;
        maptable[j].len = 0;
     }
     free(maptable);
     maptable = NULL;
  }
  nummap = 0;
  if (reptable) {
     for (int j=0; j < numrep; j++) {
        free(reptable[j].pattern);
        free(reptable[j].replacement);
        reptable[j].pattern = NULL;
        reptable[j].replacement = NULL;
     }
     free(reptable);
     reptable = NULL;
  }
  numrep = 0;
  if (compound) free(compound);
  compound=NULL;
  pHMgr = NULL;
  cpdmin = 0;
}


// read in aff file and build up prefix and suffix entry objects
int  AffixMgr::parse_file(const char * affpath)
{

  // io buffers
  char line[MAXLNLEN+1];

  // affix type
  char ft;

  // open the affix file
  FILE * afflst;
  afflst = fopen(affpath,"r");
  if (!afflst) {
    fprintf(stderr,"Error - could not open affix description file %s\n",affpath);
    return 1;
  }

  // step one is to parse the affix file building up the internal
  // affix data structures


    // read in each line ignoring any that do not
    // start with a known line type indicator

    while (fgets(line,MAXLNLEN,afflst)) {
       mychomp(line);

       /* parse in the try string */
       if (strncmp(line,"TRY",3) == 0) {
          if (parse_try(line)) {
             return 1;
          }
       }

       /* parse in the name of the character set used by the .dict and .aff */
       if (strncmp(line,"SET",3) == 0) {
          if (parse_set(line)) {
             return 1;
          }
       }

       /* parse in the flag used by the controlled compound words */
       if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
          if (parse_cpdflag(line)) {
             return 1;
          }
       }

       /* parse in the flag used by the controlled compound words */
       if (strncmp(line,"COMPOUNDMIN",11) == 0) {
          if (parse_cpdmin(line)) {
             return 1;
          }
       }

       /* parse in the typical fault correcting table */
       if (strncmp(line,"REP",3) == 0) {
          if (parse_reptable(line, afflst)) {
             return 1;
          }
       }

       /* parse in the related character map table */
       if (strncmp(line,"MAP",3) == 0) {
          if (parse_maptable(line, afflst)) {
             return 1;
          }
       }

       // parse this affix: P - prefix, S - suffix
       ft = ' ';
       if (strncmp(line,"PFX",3) == 0) ft = 'P';
       if (strncmp(line,"SFX",3) == 0) ft = 'S';
       if (ft != ' ') {
          if (parse_affix(line, ft, afflst)) {
             return 1;
          }
       }

       // handle NOSPLITSUGS
       if (strncmp(line,"NOSPLITSUGS",11) == 0)
		   nosplitsugs=(0==0);

    }
    fclose(afflst);

    // now we can speed up performance greatly taking advantage of the
    // relationship between the affixes and the idea of "subsets".

    // View each prefix as a potential leading subset of another and view
    // each suffix (reversed) as a potential trailing subset of another.

    // To illustrate this relationship if we know the prefix "ab" is found in the
    // word to examine, only prefixes that "ab" is a leading subset of need be examined.
    // Furthermore is "ab" is not present then none of the prefixes that "ab" is
    // is a subset need be examined.
    // The same argument goes for suffix string that are reversed.

    // Then to top this off why not examine the first char of the word to quickly
    // limit the set of prefixes to examine (i.e. the prefixes to examine must
    // be leading supersets of the first character of the word (if they exist)

    // To take advantage of this "subset" relationship, we need to add two links
    // from entry.  One to take next if the current prefix is found (call it nexteq)
    // and one to take next if the current prefix is not found (call it nextne).

    // Since we have built ordered lists, all that remains is to properly intialize
    // the nextne and nexteq pointers that relate them

    process_pfx_order();
    process_sfx_order();

    return 0;
}

// we want to be able to quickly access prefix information
// both by prefix flag, and sorted by prefix string itself
// so we need to set up two indexes

int AffixMgr::build_pfxlist(AffEntry* pfxptr)
{
  PfxEntry * ptr;
  PfxEntry * pptr;
  PfxEntry * ep = (PfxEntry*) pfxptr;

  // get the right starting points
  const char * key = ep->getKey();
  const unsigned char flg = ep->getFlag();

  // first index by flag which must exist
  ptr = (PfxEntry*)pFlag[flg];
  ep->setFlgNxt(ptr);
  pFlag[flg] = (AffEntry *) ep;


  // next index by affix string

  // handle the special case of null affix string
  if (strlen(key) == 0) {
    // always inset them at head of list at element 0
     ptr = (PfxEntry*)pStart[0];
     ep->setNext(ptr);
     pStart[0] = (AffEntry*)ep;
     return 0;
  }

  // now handle the general case
  unsigned char sp = *((const unsigned char *)key);
  ptr = (PfxEntry*)pStart[sp];

  /* handle the insert at top of list case */
  if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) {
     ep->setNext(ptr);
     pStart[sp] = (AffEntry*)ep;
     return 0;
  }

  /* otherwise find where it fits in order and insert it */
  pptr = NULL;
  for (; ptr != NULL; ptr = ptr->getNext()) {
    if (strcmp( ep->getKey() , ptr->getKey() ) <= 0) break;
    pptr = ptr;
  }
  pptr->setNext(ep);
  ep->setNext(ptr);
  return 0;
}


// we want to be able to quickly access suffix information
// both by suffix flag, and sorted by the reverse of the
// suffix string itself; so we need to set up two indexes
int AffixMgr::build_sfxlist(AffEntry* sfxptr)
{
  SfxEntry * ptr;
  SfxEntry * pptr;
  SfxEntry * ep = (SfxEntry *) sfxptr;

  /* get the right starting point */
  const char * key = ep->getKey();
  const unsigned char flg = ep->getFlag();

  // first index by flag which must exist
  ptr = (SfxEntry*)sFlag[flg];
  ep->setFlgNxt(ptr);
  sFlag[flg] = (AffEntry *) ep;


  // next index by affix string

  // handle the special case of null affix string
  if (strlen(key) == 0) {
    // always inset them at head of list at element 0
     ptr = (SfxEntry*)sStart[0];
     ep->setNext(ptr);
     sStart[0] = (AffEntry*)ep;
     return 0;
  }

  // now handle the normal case
  unsigned char sp = *((const unsigned char *)key);
  ptr = (SfxEntry*)sStart[sp];

  /* handle the insert at top of list case */
  if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) {
     ep->setNext(ptr);
     sStart[sp] = (AffEntry*)ep;
     return 0;
  }

  /* otherwise find where it fits in order and insert it */
  pptr = NULL;
  for (; ptr != NULL; ptr = ptr->getNext()) {
    if (strcmp( ep->getKey(), ptr->getKey() ) <= 0) break;
    pptr = ptr;
  }
  pptr->setNext(ep);
  ep->setNext(ptr);
  return 0;
}


// initialize the PfxEntry links NextEQ and NextNE to speed searching
int AffixMgr::process_pfx_order()
{
    PfxEntry* ptr;

    // loop through each prefix list starting point
    for (int i=1; i < SETSIZE; i++) {

         ptr = (PfxEntry*)pStart[i];

         // look through the remainder of the list
         //  and find next entry with affix that
         // the current one is not a subset of
         // mark that as destination for NextNE
         // use next in list that you are a subset
         // of as NextEQ

         for (; ptr != NULL; ptr = ptr->getNext()) {

	     PfxEntry * nptr = ptr->getNext();
             for (; nptr != NULL; nptr = nptr->getNext()) {
	         if (! isSubset( ptr->getKey() , nptr->getKey() )) break;
             }
             ptr->setNextNE(nptr);
             ptr->setNextEQ(NULL);
             if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey()))
                 ptr->setNextEQ(ptr->getNext());
         }

         // now clean up by adding smart search termination strings:
         // if you are already a superset of the previous prefix
         // but not a subset of the next, search can end here
         // so set NextNE properly

         ptr = (PfxEntry *) pStart[i];
         for (; ptr != NULL; ptr = ptr->getNext()) {
	     PfxEntry * nptr = ptr->getNext();
             PfxEntry * mptr = NULL;
             for (; nptr != NULL; nptr = nptr->getNext()) {
	         if (! isSubset(ptr->getKey(),nptr->getKey())) break;
                 mptr = nptr;
             }
             if (mptr) mptr->setNextNE(NULL);
         }
    }
    return 0;
}


// initialize the SfxEntry links NextEQ and NextNE to speed searching
int AffixMgr::process_sfx_order()
{
    SfxEntry* ptr;

    // loop through each prefix list starting point
    for (int i=1; i < SETSIZE; i++) {

         ptr = (SfxEntry *) sStart[i];

         // look through the remainder of the list
         //  and find next entry with affix that
         // the current one is not a subset of
         // mark that as destination for NextNE
         // use next in list that you are a subset
         // of as NextEQ

         for (; ptr != NULL; ptr = ptr->getNext()) {
	     SfxEntry * nptr = ptr->getNext();
             for (; nptr != NULL; nptr = nptr->getNext()) {
	         if (! isSubset(ptr->getKey(),nptr->getKey())) break;
             }
             ptr->setNextNE(nptr);
             ptr->setNextEQ(NULL);
             if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
                 ptr->setNextEQ(ptr->getNext());
         }


         // now clean up by adding smart search termination strings:
         // if you are already a superset of the previous suffix
         // but not a subset of the next, search can end here
         // so set NextNE properly

         ptr = (SfxEntry *) sStart[i];
         for (; ptr != NULL; ptr = ptr->getNext()) {
	     SfxEntry * nptr = ptr->getNext();
             SfxEntry * mptr = NULL;
             for (; nptr != NULL; nptr = nptr->getNext()) {
	         if (! isSubset(ptr->getKey(),nptr->getKey())) break;
                 mptr = nptr;
             }
             if (mptr) mptr->setNextNE(NULL);
         }
    }
    return 0;
}


// takes aff file condition string and creates the
// conds array - please see the appendix at the end of the
// file affentry.cxx which describes what is going on here
// in much more detail

void AffixMgr::encodeit(struct affentry * ptr, char * cs)
{
  unsigned char c;
  int i, j, k;
  unsigned char mbr[MAXLNLEN];

  // now clear the conditions array */
  for (i=0;i<SETSIZE;i++) ptr->conds[i] = (unsigned char) 0;

  // now parse the string to create the conds array */
  int nc = strlen(cs);
  int neg = 0;   // complement indicator
  int grp = 0;   // group indicator
  int n = 0;     // number of conditions
  int ec = 0;    // end condition indicator
  int nm = 0;    // number of member in group

  // if no condition just return
  if (strcmp(cs,".")==0) {
    ptr->numconds = 0;
    return;
  }

  i = 0;
  while (i < nc) {
    c = *((unsigned char *)(cs + i));

    // start group indicator
    if (c == '[') {
       grp = 1;
       c = 0;
    }

    // complement flag
    if ((grp == 1) && (c == '^')) {
       neg = 1;
       c = 0;
    }

    // end goup indicator
    if (c == ']') {
       ec = 1;
       c = 0;
    }

    // add character of group to list
    if ((grp == 1) && (c != 0)) {
      *(mbr + nm) = c;
      nm++;
      c = 0;
    }

    // end of condition
    if (c != 0) {
       ec = 1;
    }


    if (ec) {
      if (grp == 1) {
        if (neg == 0) {
          // set the proper bits in the condition array vals for those chars
	  for (j=0;j<nm;j++) {
	     k = (unsigned int) mbr[j];
             ptr->conds[k] = ptr->conds[k] | (1 << n);
          }
	} else {
	  // complement so set all of them and then unset indicated ones
	   for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
	   for (j=0;j<nm;j++) {
	     k = (unsigned int) mbr[j];
             ptr->conds[k] = ptr->conds[k] & ~(1 << n);
	   }
        }
        neg = 0;
        grp = 0;
        nm = 0;
      } else {
         // not a group so just set the proper bit for this char
         // but first handle special case of . inside condition
         if (c == '.') {
	    // wild card character so set them all
            for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
         } else {
	    ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
         }
      }
      n++;
      ec = 0;
    }


    i++;
  }
  ptr->numconds = n;
  return;
}


// check word for prefixes
struct hentry * AffixMgr::prefix_check (const char * word, int len)
{
    struct hentry * rv= NULL;

    // first handle the special case of 0 length prefixes
    PfxEntry * pe = (PfxEntry *) pStart[0];
    while (pe) {
       rv = pe->check(word,len);
       if (rv) return rv;
       pe = pe->getNext();
    }

    // now handle the general case
    unsigned char sp = *((const unsigned char *)word);
    PfxEntry * pptr = (PfxEntry *)pStart[sp];

    while (pptr) {
        if (isSubset(pptr->getKey(),word)) {
	     rv = pptr->check(word,len);
             if (rv) return rv;
             pptr = pptr->getNextEQ();
        } else {
	     pptr = pptr->getNextNE();
        }
    }

    return NULL;
}

// check if compound word is correctly spelled
struct hentry * AffixMgr::compound_check (const char * word, int len, char compound_flag)
{
    int i;
    struct hentry * rv= NULL;
    char * st;
    char ch;

    // handle case of string too short to be a piece of a compound word
    if (len < cpdmin) return NULL;

    st = mystrdup(word);

    for (i=cpdmin; i < (len - (cpdmin-1)); i++) {

        ch = st[i];
	st[i] = '\0';

	rv = lookup(st);
        if (!rv) rv = affix_check(st,i);

	if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
	    rv = lookup((word+i));
	    if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
		free(st);
		return rv;
	    }
	    rv = affix_check((word+i),strlen(word+i));
	    if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
		free(st);
		return rv;
	    }
	    rv = compound_check((word+i),strlen(word+i),compound_flag);
	    if (rv) {
		free(st);
		return rv;
	    }

	}
        st[i] = ch;
    }
    free(st);
    return NULL;
}


// check word for suffixes
struct hentry * AffixMgr::suffix_check (const char * word, int len,
                       int sfxopts, AffEntry * ppfx)
{
    struct hentry * rv = NULL;

    // first handle the special case of 0 length suffixes
    SfxEntry * se = (SfxEntry *) sStart[0];
    while (se) {
       rv = se->check(word,len, sfxopts, ppfx);
       if (rv) return rv;
       se = se->getNext();
    }

    // now handle the general case
    char * tmpword = myrevstrdup(word);
    unsigned char sp = *((const unsigned char *)tmpword);
    SfxEntry * sptr = (SfxEntry *) sStart[sp];

    while (sptr) {
        if (isSubset(sptr->getKey(),tmpword)) {
	     rv = sptr->check(word,len, sfxopts, ppfx);
             if (rv) {
                  free(tmpword);
                  return rv;
             }
             sptr = sptr->getNextEQ();
        } else {
	     sptr = sptr->getNextNE();
        }
    }

    free(tmpword);
    return NULL;
}


// check if word with affixes is correctly spelled
struct hentry * AffixMgr::affix_check (const char * word, int len)
{
    struct hentry * rv= NULL;

    // check all prefixes (also crossed with suffixes if allowed)
    rv = prefix_check(word, len);
    if (rv) return rv;

    // if still not found check all suffixes
    rv = suffix_check(word, len, 0, NULL);
    return rv;
}


int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
                       const char * ts, int wl, const char * ap, int al)
{

    int nh=0;

    // first add root word to list

    if (nh < maxn) {
       wlst[nh].word = mystrdup(ts);
       wlst[nh].allow = (1 == 0);
       nh++;
    }

    // handle suffixes
    for (int i = 0; i < al; i++) {
       unsigned char c = (unsigned char) ap[i];
       SfxEntry * sptr = (SfxEntry *)sFlag[c];
       while (sptr) {
	 char * newword = sptr->add(ts, wl);
         if (newword) {
           if (nh < maxn) {
	      wlst[nh].word = newword;
              wlst[nh].allow = sptr->allowCross();
              nh++;
	   } else {
	      free(newword);
           }
	 }
         sptr = (SfxEntry *)sptr ->getFlgNxt();
       }
    }

    int n = nh;

    // handle cross products of prefixes and suffixes
    for (int j=1;j<n ;j++)
       if (wlst[j].allow) {
          for (int k = 0; k < al; k++) {
             unsigned char c = (unsigned char) ap[k];
             PfxEntry * cptr = (PfxEntry *) pFlag[c];
             while (cptr) {
                if (cptr->allowCross()) {
	            int l1 = strlen(wlst[j].word);
	            char * newword = cptr->add(wlst[j].word, l1);
                    if (newword) {
		       if (nh < maxn) {
	                  wlst[nh].word = newword;
                          wlst[nh].allow = cptr->allowCross();
                          nh++;
		       } else {
			  free(newword);
                       }
	            }
                }
                cptr = (PfxEntry *)cptr ->getFlgNxt();
             }
	  }
       }


    // now handle pure prefixes
    for (int m = 0; m < al; m ++) {
       unsigned char c = (unsigned char) ap[m];
       PfxEntry * ptr = (PfxEntry *) pFlag[c];
       while (ptr) {
	 char * newword = ptr->add(ts, wl);
         if (newword) {
	     if (nh < maxn) {
	        wlst[nh].word = newword;
                wlst[nh].allow = ptr->allowCross();
                nh++;
             } else {
	        free(newword);
	     }
	 }
         ptr = (PfxEntry *)ptr ->getFlgNxt();
       }
    }

    return nh;
}


// return length of replacing table
int AffixMgr::get_numrep()
{
  return numrep;
}

// return replacing table
struct replentry * AffixMgr::get_reptable()
{
  if (! reptable ) return NULL;
  return reptable;
}


// return length of character map table
int AffixMgr::get_nummap()
{
  return nummap;
}

// return character map table
struct mapentry * AffixMgr::get_maptable()
{
  if (! maptable ) return NULL;
  return maptable;
}

// return text encoding of dictionary
char * AffixMgr::get_encoding()
{
  if (! encoding ) {
      encoding = mystrdup("ISO8859-1");
  }
  return mystrdup(encoding);
}


// return the preferred try string for suggestions
char * AffixMgr::get_try_string()
{
  if (! trystring ) return NULL;
  return mystrdup(trystring);
}

// return the compound words control flag
char * AffixMgr::get_compound()
{
  if (! compound ) return NULL;
  return mystrdup(compound);
}

// utility method to look up root words in hash table
struct hentry * AffixMgr::lookup(const char * word)
{
  if (! pHMgr) return NULL;
  return pHMgr->lookup(word);
}

// return nosplitsugs
bool AffixMgr::get_nosplitsugs(void)
{
  return nosplitsugs;
}

/* parse in the try string */
int  AffixMgr::parse_try(char * line)
{
   if (trystring) {
      fprintf(stderr,"error: duplicate TRY strings\n");
      return 1;
   }
   char * tp = line;
   char * piece;
   int i = 0;
   int np = 0;
   while ((piece=mystrsep(&tp,' '))) {
      if (*piece != '\0') {
          switch(i) {
	      case 0: { np++; break; }
              case 1: { trystring = mystrdup(piece); np++; break; }
	      default: break;
          }
          i++;
      }
      free(piece);
   }
   if (np != 2) {
      fprintf(stderr,"error: missing TRY information\n");
      return 1;
   }
   return 0;
}


/* parse in the name of the character set used by the .dict and .aff */
int  AffixMgr::parse_set(char * line)
{
   if (encoding) {
      fprintf(stderr,"error: duplicate SET strings\n");
      return 1;
   }
   char * tp = line;
   char * piece;
   int i = 0;
   int np = 0;
   while ((piece=mystrsep(&tp,' '))) {
      if (*piece != '\0') {
          switch(i) {
	     case 0: { np++; break; }
             case 1: { encoding = mystrdup(piece); np++; break; }
	     default: break;
          }
          i++;
      }
      free(piece);
   }
   if (np != 2) {
      fprintf(stderr,"error: missing SET information\n");
      return 1;
   }
   return 0;
}


/* parse in the flag used by the controlled compound words */
int  AffixMgr::parse_cpdflag(char * line)
{
   if (compound) {
      fprintf(stderr,"error: duplicate compound flags used\n");
      return 1;
   }
   char * tp = line;
   char * piece;
   int i = 0;
   int np = 0;
   while ((piece=mystrsep(&tp,' '))) {
      if (*piece != '\0') {
          switch(i) {
	     case 0: { np++; break; }
             case 1: { compound = mystrdup(piece); np++; break; }
	     default: break;
          }
          i++;
      }
      free(piece);
   }
   if (np != 2) {
      fprintf(stderr,"error: missing compound flag information\n");
      return 1;
   }
   return 0;
}


/* parse in the min compound word length */
int  AffixMgr::parse_cpdmin(char * line)
{
   char * tp = line;
   char * piece;
   int i = 0;
   int np = 0;
   while ((piece=mystrsep(&tp,' '))) {
      if (*piece != '\0') {
          switch(i) {
	     case 0: { np++; break; }
             case 1: { cpdmin = atoi(piece); np++; break; }
	     default: break;
          }
          i++;
      }
      free(piece);
   }
   if (np != 2) {
      fprintf(stderr,"error: missing compound min information\n");
      return 1;
   }
   if ((cpdmin < 1) || (cpdmin > 50)) cpdmin = 3;
   return 0;
}


/* parse in the typical fault correcting table */
int  AffixMgr::parse_reptable(char * line, FILE * af)
{
   if (numrep != 0) {
      fprintf(stderr,"error: duplicate REP tables used\n");
      return 1;
   }
   char * tp = line;
   char * piece;
   int i = 0;
   int np = 0;
   while ((piece=mystrsep(&tp,' '))) {
       if (*piece != '\0') {
          switch(i) {
	     case 0: { np++; break; }
             case 1: {
                       numrep = atoi(piece);
	               if (numrep < 1) {
			  fprintf(stderr,"incorrect number of entries in replacement table\n");
			  free(piece);
                          return 1;
                       }
                       reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
                       np++;
                       break;
	             }
	     default: break;
          }
          i++;
       }
       free(piece);
   }
   if (np != 2) {
      fprintf(stderr,"error: missing replacement table information\n");
      return 1;
   }

   /* now parse the numrep lines to read in the remainder of the table */
   char * nl = line;
   for (int j=0; j < numrep; j++) {
        fgets(nl,MAXLNLEN,af);
        mychomp(nl);
        tp = nl;
        i = 0;
        reptable[j].pattern = NULL;
        reptable[j].replacement = NULL;
        while ((piece=mystrsep(&tp,' '))) {
           if (*piece != '\0') {
               switch(i) {
                  case 0: {
		             if (strncmp(piece,"REP",3) != 0) {
		                 fprintf(stderr,"error: replacement table is corrupt\n");
                                 free(piece);
                                 return 1;
                             }
                             break;
		          }
                  case 1: { reptable[j].pattern = mystrdup(piece); break; }
                  case 2: { reptable[j].replacement = mystrdup(piece); break; }
		  default: break;
               }
               i++;
           }
           free(piece);
        }
	if ((!(reptable[j].pattern)) || (!(reptable[j].replacement))) {
	     fprintf(stderr,"error: replacement table is corrupt\n");
             return 1;
        }
   }
   return 0;
}


/* parse in the character map table */
int  AffixMgr::parse_maptable(char * line, FILE * af)
{
   if (nummap != 0) {
      fprintf(stderr,"error: duplicate MAP tables used\n");
      return 1;
   }
   char * tp = line;
   char * piece;
   int i = 0;
   int np = 0;
   while ((piece=mystrsep(&tp,' '))) {
       if (*piece != '\0') {
          switch(i) {
	     case 0: { np++; break; }
             case 1: {
                       nummap = atoi(piece);
	               if (nummap < 1) {
			  fprintf(stderr,"incorrect number of entries in map table\n");
			  free(piece);
                          return 1;
                       }
                       maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
                       np++;
                       break;
	             }
	     default: break;
          }
          i++;
       }
       free(piece);
   }
   if (np != 2) {
      fprintf(stderr,"error: missing map table information\n");
      return 1;
   }

   /* now parse the nummap lines to read in the remainder of the table */
   char * nl = line;
   for (int j=0; j < nummap; j++) {
        fgets(nl,MAXLNLEN,af);
        mychomp(nl);
        tp = nl;
        i = 0;
        maptable[j].set = NULL;
        maptable[j].len = 0;
        while ((piece=mystrsep(&tp,' '))) {
           if (*piece != '\0') {
               switch(i) {
                  case 0: {
		             if (strncmp(piece,"MAP",3) != 0) {
		                 fprintf(stderr,"error: map table is corrupt\n");
                                 free(piece);
                                 return 1;
                             }
                             break;
		          }
                  case 1: { maptable[j].set = mystrdup(piece);
		            maptable[j].len = strlen(maptable[j].set);
                            break; }
		  default: break;
               }
               i++;
           }
           free(piece);
        }
	if ((!(maptable[j].set)) || (!(maptable[j].len))) {
	     fprintf(stderr,"error: map table is corrupt\n");
             return 1;
        }
   }
   return 0;
}


int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
{
   int numents = 0;      // number of affentry structures to parse
   char achar='\0';      // affix char identifier
   short ff=0;
   struct affentry * ptr= NULL;
   struct affentry * nptr= NULL;

   char * tp = line;
   char * nl = line;
   char * piece;
   int i = 0;

   // split affix header line into pieces

   int np = 0;
   while ((piece=mystrsep(&tp,' '))) {
      if (*piece != '\0') {
          switch(i) {
             // piece 1 - is type of affix
             case 0: { np++; break; }

             // piece 2 - is affix char
             case 1: { np++; achar = *piece; break; }

             // piece 3 - is cross product indicator
             case 2: { np++; if (*piece == 'Y') ff = XPRODUCT; break; }

             // piece 4 - is number of affentries
             case 3: {
                       np++;
                       numents = atoi(piece);
                       ptr = (struct affentry *) malloc(numents * sizeof(struct affentry));
                       ptr->xpflg = ff;
                       ptr->achar = achar;
                       break;
                     }

	     default: break;
          }
          i++;
      }
      free(piece);
   }
   // check to make sure we parsed enough pieces
   if (np != 4) {
       fprintf(stderr, "error: affix %c header has insufficient data in line %s\n",achar,nl);
       free(ptr);
       return 1;
   }

   // store away ptr to first affentry
   nptr = ptr;

   // now parse numents affentries for this affix
   for (int j=0; j < numents; j++) {
      fgets(nl,MAXLNLEN,af);
      mychomp(nl);
      tp = nl;
      i = 0;
      np = 0;

      // split line into pieces
      while ((piece=mystrsep(&tp,' '))) {
         if (*piece != '\0') {
             switch(i) {

                // piece 1 - is type
                case 0: {
                          np++;
                          if (nptr != ptr) nptr->xpflg = ptr->xpflg;
                          break;
                        }

                // piece 2 - is affix char
                case 1: {
		          np++;
                          if (*piece != achar) {
                              fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
                              fprintf(stderr, "error: possible incorrect count\n");
                              free(piece);
                              return 1;
                          }
                          if (nptr != ptr) nptr->achar = ptr->achar;
                          break;
		        }

                // piece 3 - is string to strip or 0 for null
                case 2: {
                          np++;
                          nptr->strip = mystrdup(piece);
                          nptr->stripl = strlen(nptr->strip);
                          if (strcmp(nptr->strip,"0") == 0) {
                              free(nptr->strip);
                              nptr->strip=mystrdup("");
			      nptr->stripl = 0;
                          }
                          break;
                        }

                // piece 4 - is affix string or 0 for null
                case 3: {
		          np++;
                          nptr->appnd = mystrdup(piece);
                          nptr->appndl = strlen(nptr->appnd);
                          if (strcmp(nptr->appnd,"0") == 0) {
                              free(nptr->appnd);
                              nptr->appnd=mystrdup("");
			      nptr->appndl = 0;
                          }
                          break;
                        }

                // piece 5 - is the conditions descriptions
                case 4: { np++; encodeit(nptr,piece); }

		default: break;
             }
             i++;
         }
         free(piece);
      }
      // check to make sure we parsed enough pieces
      if (np != 5) {
          fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
          free(ptr);
          return 1;
      }
      nptr++;
   }

   // now create SfxEntry or PfxEntry objects and use links to
   // build an ordered (sorted by affix string) list
   nptr = ptr;
   for (int k = 0; k < numents; k++) {
      if (at == 'P') {
	  PfxEntry * pfxptr = new PfxEntry(this,nptr);
          build_pfxlist((AffEntry *)pfxptr);
      } else {
	  SfxEntry * sfxptr = new SfxEntry(this,nptr);
          build_sfxlist((AffEntry *)sfxptr);
      }
      nptr++;
   }
   free(ptr);
   return 0;
}