#include "license.readme" #include #include #include #include "affixmgr.hxx" #include "affentry.hxx" #if !defined(_MSC_VER) using namespace std; #endif // First some base level utility routines extern void mychomp(char * s); extern char * mystrdup(const char * s); extern char * myrevstrdup(const char * s); extern char * mystrsep(char ** sptr, const char delim); extern int isSubset(const char * s1, const char * s2); AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) { // register hash manager and load affix data from aff file pHMgr = ptr; trystring = NULL; encoding=NULL; reptable = NULL; numrep = 0; maptable = NULL; nummap = 0; compound=NULL; nosplitsugs= (0==1); cpdmin = 3; // default value for (int i=0; i < SETSIZE; i++) { pStart[i] = NULL; sStart[i] = NULL; pFlag[i] = NULL; sFlag[i] = NULL; } if (parse_file(affpath)) { fprintf(stderr,"Failure loading aff file %s\n",affpath); fflush(stderr); } } AffixMgr::~AffixMgr() { // pass through linked prefix entries and clean up for (int i=0; i < SETSIZE ;i++) { pFlag[i] = NULL; PfxEntry * ptr = (PfxEntry *)pStart[i]; PfxEntry * nptr = NULL; while (ptr) { nptr = ptr->getNext(); delete(ptr); ptr = nptr; nptr = NULL; } } // pass through linked suffix entries and clean up for (int j=0; j < SETSIZE ; j++) { sFlag[j] = NULL; SfxEntry * ptr = (SfxEntry *)sStart[j]; SfxEntry * nptr = NULL; while (ptr) { nptr = ptr->getNext(); delete(ptr); ptr = nptr; nptr = NULL; } } if (trystring) free(trystring); trystring=NULL; if (encoding) free(encoding); encoding=NULL; if (maptable) { for (int j=0; j < nummap; j++) { free(maptable[j].set); maptable[j].set = NULL; maptable[j].len = 0; } free(maptable); maptable = NULL; } nummap = 0; if (reptable) { for (int j=0; j < numrep; j++) { free(reptable[j].pattern); free(reptable[j].replacement); reptable[j].pattern = NULL; reptable[j].replacement = NULL; } free(reptable); reptable = NULL; } numrep = 0; if (compound) free(compound); compound=NULL; pHMgr = NULL; cpdmin = 0; } // read in aff file and build up prefix and suffix entry objects int AffixMgr::parse_file(const char * affpath) { // io buffers char line[MAXLNLEN+1]; // affix type char ft; // open the affix file FILE * afflst; afflst = fopen(affpath,"r"); if (!afflst) { fprintf(stderr,"Error - could not open affix description file %s\n",affpath); return 1; } // step one is to parse the affix file building up the internal // affix data structures // read in each line ignoring any that do not // start with a known line type indicator while (fgets(line,MAXLNLEN,afflst)) { mychomp(line); /* parse in the try string */ if (strncmp(line,"TRY",3) == 0) { if (parse_try(line)) { return 1; } } /* parse in the name of the character set used by the .dict and .aff */ if (strncmp(line,"SET",3) == 0) { if (parse_set(line)) { return 1; } } /* parse in the flag used by the controlled compound words */ if (strncmp(line,"COMPOUNDFLAG",12) == 0) { if (parse_cpdflag(line)) { return 1; } } /* parse in the flag used by the controlled compound words */ if (strncmp(line,"COMPOUNDMIN",11) == 0) { if (parse_cpdmin(line)) { return 1; } } /* parse in the typical fault correcting table */ if (strncmp(line,"REP",3) == 0) { if (parse_reptable(line, afflst)) { return 1; } } /* parse in the related character map table */ if (strncmp(line,"MAP",3) == 0) { if (parse_maptable(line, afflst)) { return 1; } } // parse this affix: P - prefix, S - suffix ft = ' '; if (strncmp(line,"PFX",3) == 0) ft = 'P'; if (strncmp(line,"SFX",3) == 0) ft = 'S'; if (ft != ' ') { if (parse_affix(line, ft, afflst)) { return 1; } } // handle NOSPLITSUGS if (strncmp(line,"NOSPLITSUGS",11) == 0) nosplitsugs=(0==0); } fclose(afflst); // now we can speed up performance greatly taking advantage of the // relationship between the affixes and the idea of "subsets". // View each prefix as a potential leading subset of another and view // each suffix (reversed) as a potential trailing subset of another. // To illustrate this relationship if we know the prefix "ab" is found in the // word to examine, only prefixes that "ab" is a leading subset of need be examined. // Furthermore is "ab" is not present then none of the prefixes that "ab" is // is a subset need be examined. // The same argument goes for suffix string that are reversed. // Then to top this off why not examine the first char of the word to quickly // limit the set of prefixes to examine (i.e. the prefixes to examine must // be leading supersets of the first character of the word (if they exist) // To take advantage of this "subset" relationship, we need to add two links // from entry. One to take next if the current prefix is found (call it nexteq) // and one to take next if the current prefix is not found (call it nextne). // Since we have built ordered lists, all that remains is to properly intialize // the nextne and nexteq pointers that relate them process_pfx_order(); process_sfx_order(); return 0; } // we want to be able to quickly access prefix information // both by prefix flag, and sorted by prefix string itself // so we need to set up two indexes int AffixMgr::build_pfxlist(AffEntry* pfxptr) { PfxEntry * ptr; PfxEntry * pptr; PfxEntry * ep = (PfxEntry*) pfxptr; // get the right starting points const char * key = ep->getKey(); const unsigned char flg = ep->getFlag(); // first index by flag which must exist ptr = (PfxEntry*)pFlag[flg]; ep->setFlgNxt(ptr); pFlag[flg] = (AffEntry *) ep; // next index by affix string // handle the special case of null affix string if (strlen(key) == 0) { // always inset them at head of list at element 0 ptr = (PfxEntry*)pStart[0]; ep->setNext(ptr); pStart[0] = (AffEntry*)ep; return 0; } // now handle the general case unsigned char sp = *((const unsigned char *)key); ptr = (PfxEntry*)pStart[sp]; /* handle the insert at top of list case */ if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) { ep->setNext(ptr); pStart[sp] = (AffEntry*)ep; return 0; } /* otherwise find where it fits in order and insert it */ pptr = NULL; for (; ptr != NULL; ptr = ptr->getNext()) { if (strcmp( ep->getKey() , ptr->getKey() ) <= 0) break; pptr = ptr; } pptr->setNext(ep); ep->setNext(ptr); return 0; } // we want to be able to quickly access suffix information // both by suffix flag, and sorted by the reverse of the // suffix string itself; so we need to set up two indexes int AffixMgr::build_sfxlist(AffEntry* sfxptr) { SfxEntry * ptr; SfxEntry * pptr; SfxEntry * ep = (SfxEntry *) sfxptr; /* get the right starting point */ const char * key = ep->getKey(); const unsigned char flg = ep->getFlag(); // first index by flag which must exist ptr = (SfxEntry*)sFlag[flg]; ep->setFlgNxt(ptr); sFlag[flg] = (AffEntry *) ep; // next index by affix string // handle the special case of null affix string if (strlen(key) == 0) { // always inset them at head of list at element 0 ptr = (SfxEntry*)sStart[0]; ep->setNext(ptr); sStart[0] = (AffEntry*)ep; return 0; } // now handle the normal case unsigned char sp = *((const unsigned char *)key); ptr = (SfxEntry*)sStart[sp]; /* handle the insert at top of list case */ if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) { ep->setNext(ptr); sStart[sp] = (AffEntry*)ep; return 0; } /* otherwise find where it fits in order and insert it */ pptr = NULL; for (; ptr != NULL; ptr = ptr->getNext()) { if (strcmp( ep->getKey(), ptr->getKey() ) <= 0) break; pptr = ptr; } pptr->setNext(ep); ep->setNext(ptr); return 0; } // initialize the PfxEntry links NextEQ and NextNE to speed searching int AffixMgr::process_pfx_order() { PfxEntry* ptr; // loop through each prefix list starting point for (int i=1; i < SETSIZE; i++) { ptr = (PfxEntry*)pStart[i]; // look through the remainder of the list // and find next entry with affix that // the current one is not a subset of // mark that as destination for NextNE // use next in list that you are a subset // of as NextEQ for (; ptr != NULL; ptr = ptr->getNext()) { PfxEntry * nptr = ptr->getNext(); for (; nptr != NULL; nptr = nptr->getNext()) { if (! isSubset( ptr->getKey() , nptr->getKey() )) break; } ptr->setNextNE(nptr); ptr->setNextEQ(NULL); if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey())) ptr->setNextEQ(ptr->getNext()); } // now clean up by adding smart search termination strings: // if you are already a superset of the previous prefix // but not a subset of the next, search can end here // so set NextNE properly ptr = (PfxEntry *) pStart[i]; for (; ptr != NULL; ptr = ptr->getNext()) { PfxEntry * nptr = ptr->getNext(); PfxEntry * mptr = NULL; for (; nptr != NULL; nptr = nptr->getNext()) { if (! isSubset(ptr->getKey(),nptr->getKey())) break; mptr = nptr; } if (mptr) mptr->setNextNE(NULL); } } return 0; } // initialize the SfxEntry links NextEQ and NextNE to speed searching int AffixMgr::process_sfx_order() { SfxEntry* ptr; // loop through each prefix list starting point for (int i=1; i < SETSIZE; i++) { ptr = (SfxEntry *) sStart[i]; // look through the remainder of the list // and find next entry with affix that // the current one is not a subset of // mark that as destination for NextNE // use next in list that you are a subset // of as NextEQ for (; ptr != NULL; ptr = ptr->getNext()) { SfxEntry * nptr = ptr->getNext(); for (; nptr != NULL; nptr = nptr->getNext()) { if (! isSubset(ptr->getKey(),nptr->getKey())) break; } ptr->setNextNE(nptr); ptr->setNextEQ(NULL); if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey())) ptr->setNextEQ(ptr->getNext()); } // now clean up by adding smart search termination strings: // if you are already a superset of the previous suffix // but not a subset of the next, search can end here // so set NextNE properly ptr = (SfxEntry *) sStart[i]; for (; ptr != NULL; ptr = ptr->getNext()) { SfxEntry * nptr = ptr->getNext(); SfxEntry * mptr = NULL; for (; nptr != NULL; nptr = nptr->getNext()) { if (! isSubset(ptr->getKey(),nptr->getKey())) break; mptr = nptr; } if (mptr) mptr->setNextNE(NULL); } } return 0; } // takes aff file condition string and creates the // conds array - please see the appendix at the end of the // file affentry.cxx which describes what is going on here // in much more detail void AffixMgr::encodeit(struct affentry * ptr, char * cs) { unsigned char c; int i, j, k; unsigned char mbr[MAXLNLEN]; // now clear the conditions array */ for (i=0;iconds[i] = (unsigned char) 0; // now parse the string to create the conds array */ int nc = strlen(cs); int neg = 0; // complement indicator int grp = 0; // group indicator int n = 0; // number of conditions int ec = 0; // end condition indicator int nm = 0; // number of member in group // if no condition just return if (strcmp(cs,".")==0) { ptr->numconds = 0; return; } i = 0; while (i < nc) { c = *((unsigned char *)(cs + i)); // start group indicator if (c == '[') { grp = 1; c = 0; } // complement flag if ((grp == 1) && (c == '^')) { neg = 1; c = 0; } // end goup indicator if (c == ']') { ec = 1; c = 0; } // add character of group to list if ((grp == 1) && (c != 0)) { *(mbr + nm) = c; nm++; c = 0; } // end of condition if (c != 0) { ec = 1; } if (ec) { if (grp == 1) { if (neg == 0) { // set the proper bits in the condition array vals for those chars for (j=0;jconds[k] = ptr->conds[k] | (1 << n); } } else { // complement so set all of them and then unset indicated ones for (j=0;jconds[j] = ptr->conds[j] | (1 << n); for (j=0;jconds[k] = ptr->conds[k] & ~(1 << n); } } neg = 0; grp = 0; nm = 0; } else { // not a group so just set the proper bit for this char // but first handle special case of . inside condition if (c == '.') { // wild card character so set them all for (j=0;jconds[j] = ptr->conds[j] | (1 << n); } else { ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n); } } n++; ec = 0; } i++; } ptr->numconds = n; return; } // check word for prefixes struct hentry * AffixMgr::prefix_check (const char * word, int len) { struct hentry * rv= NULL; // first handle the special case of 0 length prefixes PfxEntry * pe = (PfxEntry *) pStart[0]; while (pe) { rv = pe->check(word,len); if (rv) return rv; pe = pe->getNext(); } // now handle the general case unsigned char sp = *((const unsigned char *)word); PfxEntry * pptr = (PfxEntry *)pStart[sp]; while (pptr) { if (isSubset(pptr->getKey(),word)) { rv = pptr->check(word,len); if (rv) return rv; pptr = pptr->getNextEQ(); } else { pptr = pptr->getNextNE(); } } return NULL; } // check if compound word is correctly spelled struct hentry * AffixMgr::compound_check (const char * word, int len, char compound_flag) { int i; struct hentry * rv= NULL; char * st; char ch; // handle case of string too short to be a piece of a compound word if (len < cpdmin) return NULL; st = mystrdup(word); for (i=cpdmin; i < (len - (cpdmin-1)); i++) { ch = st[i]; st[i] = '\0'; rv = lookup(st); if (!rv) rv = affix_check(st,i); if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) { rv = lookup((word+i)); if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) { free(st); return rv; } rv = affix_check((word+i),strlen(word+i)); if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) { free(st); return rv; } rv = compound_check((word+i),strlen(word+i),compound_flag); if (rv) { free(st); return rv; } } st[i] = ch; } free(st); return NULL; } // check word for suffixes struct hentry * AffixMgr::suffix_check (const char * word, int len, int sfxopts, AffEntry * ppfx) { struct hentry * rv = NULL; // first handle the special case of 0 length suffixes SfxEntry * se = (SfxEntry *) sStart[0]; while (se) { rv = se->check(word,len, sfxopts, ppfx); if (rv) return rv; se = se->getNext(); } // now handle the general case char * tmpword = myrevstrdup(word); unsigned char sp = *((const unsigned char *)tmpword); SfxEntry * sptr = (SfxEntry *) sStart[sp]; while (sptr) { if (isSubset(sptr->getKey(),tmpword)) { rv = sptr->check(word,len, sfxopts, ppfx); if (rv) { free(tmpword); return rv; } sptr = sptr->getNextEQ(); } else { sptr = sptr->getNextNE(); } } free(tmpword); return NULL; } // check if word with affixes is correctly spelled struct hentry * AffixMgr::affix_check (const char * word, int len) { struct hentry * rv= NULL; // check all prefixes (also crossed with suffixes if allowed) rv = prefix_check(word, len); if (rv) return rv; // if still not found check all suffixes rv = suffix_check(word, len, 0, NULL); return rv; } int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts, int wl, const char * ap, int al) { int nh=0; // first add root word to list if (nh < maxn) { wlst[nh].word = mystrdup(ts); wlst[nh].allow = (1 == 0); nh++; } // handle suffixes for (int i = 0; i < al; i++) { unsigned char c = (unsigned char) ap[i]; SfxEntry * sptr = (SfxEntry *)sFlag[c]; while (sptr) { char * newword = sptr->add(ts, wl); if (newword) { if (nh < maxn) { wlst[nh].word = newword; wlst[nh].allow = sptr->allowCross(); nh++; } else { free(newword); } } sptr = (SfxEntry *)sptr ->getFlgNxt(); } } int n = nh; // handle cross products of prefixes and suffixes for (int j=1;jallowCross()) { int l1 = strlen(wlst[j].word); char * newword = cptr->add(wlst[j].word, l1); if (newword) { if (nh < maxn) { wlst[nh].word = newword; wlst[nh].allow = cptr->allowCross(); nh++; } else { free(newword); } } } cptr = (PfxEntry *)cptr ->getFlgNxt(); } } } // now handle pure prefixes for (int m = 0; m < al; m ++) { unsigned char c = (unsigned char) ap[m]; PfxEntry * ptr = (PfxEntry *) pFlag[c]; while (ptr) { char * newword = ptr->add(ts, wl); if (newword) { if (nh < maxn) { wlst[nh].word = newword; wlst[nh].allow = ptr->allowCross(); nh++; } else { free(newword); } } ptr = (PfxEntry *)ptr ->getFlgNxt(); } } return nh; } // return length of replacing table int AffixMgr::get_numrep() { return numrep; } // return replacing table struct replentry * AffixMgr::get_reptable() { if (! reptable ) return NULL; return reptable; } // return length of character map table int AffixMgr::get_nummap() { return nummap; } // return character map table struct mapentry * AffixMgr::get_maptable() { if (! maptable ) return NULL; return maptable; } // return text encoding of dictionary char * AffixMgr::get_encoding() { if (! encoding ) { encoding = mystrdup("ISO8859-1"); } return mystrdup(encoding); } // return the preferred try string for suggestions char * AffixMgr::get_try_string() { if (! trystring ) return NULL; return mystrdup(trystring); } // return the compound words control flag char * AffixMgr::get_compound() { if (! compound ) return NULL; return mystrdup(compound); } // utility method to look up root words in hash table struct hentry * AffixMgr::lookup(const char * word) { if (! pHMgr) return NULL; return pHMgr->lookup(word); } // return nosplitsugs bool AffixMgr::get_nosplitsugs(void) { return nosplitsugs; } /* parse in the try string */ int AffixMgr::parse_try(char * line) { if (trystring) { fprintf(stderr,"error: duplicate TRY strings\n"); return 1; } char * tp = line; char * piece; int i = 0; int np = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { np++; break; } case 1: { trystring = mystrdup(piece); np++; break; } default: break; } i++; } free(piece); } if (np != 2) { fprintf(stderr,"error: missing TRY information\n"); return 1; } return 0; } /* parse in the name of the character set used by the .dict and .aff */ int AffixMgr::parse_set(char * line) { if (encoding) { fprintf(stderr,"error: duplicate SET strings\n"); return 1; } char * tp = line; char * piece; int i = 0; int np = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { np++; break; } case 1: { encoding = mystrdup(piece); np++; break; } default: break; } i++; } free(piece); } if (np != 2) { fprintf(stderr,"error: missing SET information\n"); return 1; } return 0; } /* parse in the flag used by the controlled compound words */ int AffixMgr::parse_cpdflag(char * line) { if (compound) { fprintf(stderr,"error: duplicate compound flags used\n"); return 1; } char * tp = line; char * piece; int i = 0; int np = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { np++; break; } case 1: { compound = mystrdup(piece); np++; break; } default: break; } i++; } free(piece); } if (np != 2) { fprintf(stderr,"error: missing compound flag information\n"); return 1; } return 0; } /* parse in the min compound word length */ int AffixMgr::parse_cpdmin(char * line) { char * tp = line; char * piece; int i = 0; int np = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { np++; break; } case 1: { cpdmin = atoi(piece); np++; break; } default: break; } i++; } free(piece); } if (np != 2) { fprintf(stderr,"error: missing compound min information\n"); return 1; } if ((cpdmin < 1) || (cpdmin > 50)) cpdmin = 3; return 0; } /* parse in the typical fault correcting table */ int AffixMgr::parse_reptable(char * line, FILE * af) { if (numrep != 0) { fprintf(stderr,"error: duplicate REP tables used\n"); return 1; } char * tp = line; char * piece; int i = 0; int np = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { np++; break; } case 1: { numrep = atoi(piece); if (numrep < 1) { fprintf(stderr,"incorrect number of entries in replacement table\n"); free(piece); return 1; } reptable = (replentry *) malloc(numrep * sizeof(struct replentry)); np++; break; } default: break; } i++; } free(piece); } if (np != 2) { fprintf(stderr,"error: missing replacement table information\n"); return 1; } /* now parse the numrep lines to read in the remainder of the table */ char * nl = line; for (int j=0; j < numrep; j++) { fgets(nl,MAXLNLEN,af); mychomp(nl); tp = nl; i = 0; reptable[j].pattern = NULL; reptable[j].replacement = NULL; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { if (strncmp(piece,"REP",3) != 0) { fprintf(stderr,"error: replacement table is corrupt\n"); free(piece); return 1; } break; } case 1: { reptable[j].pattern = mystrdup(piece); break; } case 2: { reptable[j].replacement = mystrdup(piece); break; } default: break; } i++; } free(piece); } if ((!(reptable[j].pattern)) || (!(reptable[j].replacement))) { fprintf(stderr,"error: replacement table is corrupt\n"); return 1; } } return 0; } /* parse in the character map table */ int AffixMgr::parse_maptable(char * line, FILE * af) { if (nummap != 0) { fprintf(stderr,"error: duplicate MAP tables used\n"); return 1; } char * tp = line; char * piece; int i = 0; int np = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { np++; break; } case 1: { nummap = atoi(piece); if (nummap < 1) { fprintf(stderr,"incorrect number of entries in map table\n"); free(piece); return 1; } maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry)); np++; break; } default: break; } i++; } free(piece); } if (np != 2) { fprintf(stderr,"error: missing map table information\n"); return 1; } /* now parse the nummap lines to read in the remainder of the table */ char * nl = line; for (int j=0; j < nummap; j++) { fgets(nl,MAXLNLEN,af); mychomp(nl); tp = nl; i = 0; maptable[j].set = NULL; maptable[j].len = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { case 0: { if (strncmp(piece,"MAP",3) != 0) { fprintf(stderr,"error: map table is corrupt\n"); free(piece); return 1; } break; } case 1: { maptable[j].set = mystrdup(piece); maptable[j].len = strlen(maptable[j].set); break; } default: break; } i++; } free(piece); } if ((!(maptable[j].set)) || (!(maptable[j].len))) { fprintf(stderr,"error: map table is corrupt\n"); return 1; } } return 0; } int AffixMgr::parse_affix(char * line, const char at, FILE * af) { int numents = 0; // number of affentry structures to parse char achar='\0'; // affix char identifier short ff=0; struct affentry * ptr= NULL; struct affentry * nptr= NULL; char * tp = line; char * nl = line; char * piece; int i = 0; // split affix header line into pieces int np = 0; while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { // piece 1 - is type of affix case 0: { np++; break; } // piece 2 - is affix char case 1: { np++; achar = *piece; break; } // piece 3 - is cross product indicator case 2: { np++; if (*piece == 'Y') ff = XPRODUCT; break; } // piece 4 - is number of affentries case 3: { np++; numents = atoi(piece); ptr = (struct affentry *) malloc(numents * sizeof(struct affentry)); ptr->xpflg = ff; ptr->achar = achar; break; } default: break; } i++; } free(piece); } // check to make sure we parsed enough pieces if (np != 4) { fprintf(stderr, "error: affix %c header has insufficient data in line %s\n",achar,nl); free(ptr); return 1; } // store away ptr to first affentry nptr = ptr; // now parse numents affentries for this affix for (int j=0; j < numents; j++) { fgets(nl,MAXLNLEN,af); mychomp(nl); tp = nl; i = 0; np = 0; // split line into pieces while ((piece=mystrsep(&tp,' '))) { if (*piece != '\0') { switch(i) { // piece 1 - is type case 0: { np++; if (nptr != ptr) nptr->xpflg = ptr->xpflg; break; } // piece 2 - is affix char case 1: { np++; if (*piece != achar) { fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl); fprintf(stderr, "error: possible incorrect count\n"); free(piece); return 1; } if (nptr != ptr) nptr->achar = ptr->achar; break; } // piece 3 - is string to strip or 0 for null case 2: { np++; nptr->strip = mystrdup(piece); nptr->stripl = strlen(nptr->strip); if (strcmp(nptr->strip,"0") == 0) { free(nptr->strip); nptr->strip=mystrdup(""); nptr->stripl = 0; } break; } // piece 4 - is affix string or 0 for null case 3: { np++; nptr->appnd = mystrdup(piece); nptr->appndl = strlen(nptr->appnd); if (strcmp(nptr->appnd,"0") == 0) { free(nptr->appnd); nptr->appnd=mystrdup(""); nptr->appndl = 0; } break; } // piece 5 - is the conditions descriptions case 4: { np++; encodeit(nptr,piece); } default: break; } i++; } free(piece); } // check to make sure we parsed enough pieces if (np != 5) { fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl); free(ptr); return 1; } nptr++; } // now create SfxEntry or PfxEntry objects and use links to // build an ordered (sorted by affix string) list nptr = ptr; for (int k = 0; k < numents; k++) { if (at == 'P') { PfxEntry * pfxptr = new PfxEntry(this,nptr); build_pfxlist((AffEntry *)pfxptr); } else { SfxEntry * sfxptr = new SfxEntry(this,nptr); build_sfxlist((AffEntry *)sfxptr); } nptr++; } free(ptr); return 0; }