Added MySpell library to project
This commit is contained in:
parent
6455734f3d
commit
dbac4566d8
393
goldlib/myspell/affentry.cxx
Normal file
393
goldlib/myspell/affentry.cxx
Normal file
@ -0,0 +1,393 @@
|
|||||||
|
#include "license.readme"
|
||||||
|
|
||||||
|
|
||||||
|
#include <cctype>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "affentry.hxx"
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
using namespace std;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern char * mystrdup(const char * s);
|
||||||
|
extern char * myrevstrdup(const char * s);
|
||||||
|
|
||||||
|
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||||
|
{
|
||||||
|
// register affix manager
|
||||||
|
pmyMgr = pmgr;
|
||||||
|
|
||||||
|
// set up its intial values
|
||||||
|
achar = dp->achar; // char flag
|
||||||
|
strip = dp->strip; // string to strip
|
||||||
|
appnd = dp->appnd; // string to append
|
||||||
|
stripl = dp->stripl; // length of strip string
|
||||||
|
appndl = dp->appndl; // length of append string
|
||||||
|
numconds = dp->numconds; // number of conditions to match
|
||||||
|
xpflg = dp->xpflg; // cross product flag
|
||||||
|
// then copy over all of the conditions
|
||||||
|
memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
|
||||||
|
next = NULL;
|
||||||
|
nextne = NULL;
|
||||||
|
nexteq = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PfxEntry::~PfxEntry()
|
||||||
|
{
|
||||||
|
achar = '\0';
|
||||||
|
if (appnd) free(appnd);
|
||||||
|
if (strip)free(strip);
|
||||||
|
pmyMgr = NULL;
|
||||||
|
appnd = NULL;
|
||||||
|
strip = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// add prefix to this word assuming conditions hold
|
||||||
|
char * PfxEntry::add(const char * word, int len)
|
||||||
|
{
|
||||||
|
int cond;
|
||||||
|
char tword[MAXWORDLEN+1];
|
||||||
|
|
||||||
|
/* make sure all conditions match */
|
||||||
|
if ((len > stripl) && (len >= numconds)) {
|
||||||
|
unsigned char * cp = (unsigned char *) word;
|
||||||
|
for (cond = 0; cond < numconds; cond++) {
|
||||||
|
if ((conds[*cp++] & (1 << cond)) == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (cond >= numconds) {
|
||||||
|
/* we have a match so add prefix */
|
||||||
|
int tlen = 0;
|
||||||
|
if (appndl) {
|
||||||
|
strcpy(tword,appnd);
|
||||||
|
tlen += appndl;
|
||||||
|
}
|
||||||
|
char * pp = tword + tlen;
|
||||||
|
strcpy(pp, (word + stripl));
|
||||||
|
return mystrdup(tword);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// check if this prefix entry matches
|
||||||
|
struct hentry * PfxEntry::check(const char * word, int len)
|
||||||
|
{
|
||||||
|
int cond; // condition number being examined
|
||||||
|
int tmpl; // length of tmpword
|
||||||
|
struct hentry * he; // hash entry of root word or NULL
|
||||||
|
unsigned char * cp;
|
||||||
|
char tmpword[MAXWORDLEN+1];
|
||||||
|
|
||||||
|
|
||||||
|
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||||
|
// So if the remaining root word has positive length
|
||||||
|
// and if there are enough chars in root word and added back strip chars
|
||||||
|
// to meet the number of characters conditions, then test it
|
||||||
|
|
||||||
|
tmpl = len - appndl;
|
||||||
|
|
||||||
|
if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
||||||
|
|
||||||
|
// generate new root word by removing prefix and adding
|
||||||
|
// back any characters that would have been stripped
|
||||||
|
|
||||||
|
if (stripl) strcpy (tmpword, strip);
|
||||||
|
strcpy ((tmpword + stripl), (word + appndl));
|
||||||
|
|
||||||
|
// now make sure all of the conditions on characters
|
||||||
|
// are met. Please see the appendix at the end of
|
||||||
|
// this file for more info on exactly what is being
|
||||||
|
// tested
|
||||||
|
|
||||||
|
cp = (unsigned char *)tmpword;
|
||||||
|
for (cond = 0; cond < numconds; cond++) {
|
||||||
|
if ((conds[*cp++] & (1 << cond)) == 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if all conditions are met then check if resulting
|
||||||
|
// root word in the dictionary
|
||||||
|
|
||||||
|
if (cond >= numconds) {
|
||||||
|
tmpl += stripl;
|
||||||
|
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
|
||||||
|
if (TESTAFF(he->astr, achar, he->alen)) return he;
|
||||||
|
}
|
||||||
|
|
||||||
|
// prefix matched but no root word was found
|
||||||
|
// if XPRODUCT is allowed, try again but now
|
||||||
|
// ross checked combined with a suffix
|
||||||
|
|
||||||
|
if (xpflg & XPRODUCT) {
|
||||||
|
he = pmyMgr->suffix_check(tmpword, tmpl, XPRODUCT, (AffEntry *)this);
|
||||||
|
if (he) return he;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
||||||
|
{
|
||||||
|
// register affix manager
|
||||||
|
pmyMgr = pmgr;
|
||||||
|
|
||||||
|
// set up its intial values
|
||||||
|
achar = dp->achar; // char flag
|
||||||
|
strip = dp->strip; // string to strip
|
||||||
|
appnd = dp->appnd; // string to append
|
||||||
|
stripl = dp->stripl; // length of strip string
|
||||||
|
appndl = dp->appndl; // length of append string
|
||||||
|
numconds = dp->numconds; // number of conditions to match
|
||||||
|
xpflg = dp->xpflg; // cross product flag
|
||||||
|
|
||||||
|
// then copy over all of the conditions
|
||||||
|
memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
|
||||||
|
|
||||||
|
rappnd = myrevstrdup(appnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SfxEntry::~SfxEntry()
|
||||||
|
{
|
||||||
|
achar = '\0';
|
||||||
|
if (appnd) free(appnd);
|
||||||
|
if (rappnd) free(rappnd);
|
||||||
|
if (strip) free(strip);
|
||||||
|
pmyMgr = NULL;
|
||||||
|
appnd = NULL;
|
||||||
|
strip = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// add suffix to this word assuming conditions hold
|
||||||
|
char * SfxEntry::add(const char * word, int len)
|
||||||
|
{
|
||||||
|
int cond;
|
||||||
|
char tword[MAXWORDLEN+1];
|
||||||
|
|
||||||
|
/* make sure all conditions match */
|
||||||
|
if ((len > stripl) && (len >= numconds)) {
|
||||||
|
unsigned char * cp = (unsigned char *) (word + len);
|
||||||
|
for (cond = numconds; --cond >=0; ) {
|
||||||
|
if ((conds[*--cp] & (1 << cond)) == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (cond < 0) {
|
||||||
|
/* we have a match so add suffix */
|
||||||
|
strcpy(tword,word);
|
||||||
|
int tlen = len;
|
||||||
|
if (stripl) {
|
||||||
|
tlen -= stripl;
|
||||||
|
}
|
||||||
|
char * pp = (tword + tlen);
|
||||||
|
if (appndl) {
|
||||||
|
strcpy(pp,appnd);
|
||||||
|
tlen += appndl;
|
||||||
|
} else *pp = '\0';
|
||||||
|
return mystrdup(tword);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// see if this suffix is present in the word
|
||||||
|
struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEntry* ppfx)
|
||||||
|
{
|
||||||
|
int tmpl; // length of tmpword
|
||||||
|
int cond; // condition beng examined
|
||||||
|
struct hentry * he; // hash entry pointer
|
||||||
|
unsigned char * cp;
|
||||||
|
char tmpword[MAXWORDLEN+1];
|
||||||
|
PfxEntry* ep = (PfxEntry *) ppfx;
|
||||||
|
|
||||||
|
|
||||||
|
// if this suffix is being cross checked with a prefix
|
||||||
|
// but it does not support cross products skip it
|
||||||
|
|
||||||
|
if ((optflags & XPRODUCT) != 0 && (xpflg & XPRODUCT) == 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
// upon entry suffix is 0 length or already matches the end of the word.
|
||||||
|
// So if the remaining root word has positive length
|
||||||
|
// and if there are enough chars in root word and added back strip chars
|
||||||
|
// to meet the number of characters conditions, then test it
|
||||||
|
|
||||||
|
tmpl = len - appndl;
|
||||||
|
|
||||||
|
if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
||||||
|
|
||||||
|
// generate new root word by removing suffix and adding
|
||||||
|
// back any characters that would have been stripped or
|
||||||
|
// or null terminating the shorter string
|
||||||
|
|
||||||
|
strcpy (tmpword, word);
|
||||||
|
cp = (unsigned char *)(tmpword + tmpl);
|
||||||
|
if (stripl) {
|
||||||
|
strcpy ((char *)cp, strip);
|
||||||
|
tmpl += stripl;
|
||||||
|
cp = (unsigned char *)(tmpword + tmpl);
|
||||||
|
} else *cp = '\0';
|
||||||
|
|
||||||
|
// now make sure all of the conditions on characters
|
||||||
|
// are met. Please see the appendix at the end of
|
||||||
|
// this file for more info on exactly what is being
|
||||||
|
// tested
|
||||||
|
|
||||||
|
for (cond = numconds; --cond >= 0; ) {
|
||||||
|
if ((conds[*--cp] & (1 << cond)) == 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if all conditions are met then check if resulting
|
||||||
|
// root word in the dictionary
|
||||||
|
|
||||||
|
if (cond < 0) {
|
||||||
|
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
|
||||||
|
if (TESTAFF(he->astr, achar , he->alen) &&
|
||||||
|
((optflags & XPRODUCT) == 0 ||
|
||||||
|
TESTAFF(he->astr, ep->getFlag(), he->alen))) return he;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
|
Appendix: Understanding Affix Code
|
||||||
|
|
||||||
|
|
||||||
|
An affix is either a prefix or a suffix attached to root words to make
|
||||||
|
other words.
|
||||||
|
|
||||||
|
Basically a Prefix or a Suffix is set of AffEntry objects
|
||||||
|
which store information about the prefix or suffix along
|
||||||
|
with supporting routines to check if a word has a particular
|
||||||
|
prefix or suffix or a combination.
|
||||||
|
|
||||||
|
The structure affentry is defined as follows:
|
||||||
|
|
||||||
|
struct affentry
|
||||||
|
{
|
||||||
|
unsigned char achar; // char used to represent the affix
|
||||||
|
char * strip; // string to strip before adding affix
|
||||||
|
char * appnd; // the affix string to add
|
||||||
|
short stripl; // length of the strip string
|
||||||
|
short appndl; // length of the affix string
|
||||||
|
short numconds; // the number of conditions that must be met
|
||||||
|
short xpflg; // flag: XPRODUCT- combine both prefix and suffix
|
||||||
|
char conds[SETSIZE]; // array which encodes the conditions to be met
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Here is a suffix borrowed from the en_US.aff file. This file
|
||||||
|
is whitespace delimited.
|
||||||
|
|
||||||
|
SFX D Y 4
|
||||||
|
SFX D 0 e d
|
||||||
|
SFX D y ied [^aeiou]y
|
||||||
|
SFX D 0 ed [^ey]
|
||||||
|
SFX D 0 ed [aeiou]y
|
||||||
|
|
||||||
|
This information can be interpreted as follows:
|
||||||
|
|
||||||
|
In the first line has 4 fields
|
||||||
|
|
||||||
|
Field
|
||||||
|
-----
|
||||||
|
1 SFX - indicates this is a suffix
|
||||||
|
2 D - is the name of the character flag which represents this suffix
|
||||||
|
3 Y - indicates it can be combined with prefixes (cross product)
|
||||||
|
4 4 - indicates that sequence of 4 affentry structures are needed to
|
||||||
|
properly store the affix information
|
||||||
|
|
||||||
|
The remaining lines describe the unique information for the 4 SfxEntry
|
||||||
|
objects that make up this affix. Each line can be interpreted
|
||||||
|
as follows: (note fields 1 and 2 are as a check against line 1 info)
|
||||||
|
|
||||||
|
Field
|
||||||
|
-----
|
||||||
|
1 SFX - indicates this is a suffix
|
||||||
|
2 D - is the name of the character flag for this affix
|
||||||
|
3 y - the string of chars to strip off before adding affix
|
||||||
|
(a 0 here indicates the NULL string)
|
||||||
|
4 ied - the string of affix characters to add
|
||||||
|
5 [^aeiou]y - the conditions which must be met before the affix
|
||||||
|
can be applied
|
||||||
|
|
||||||
|
Field 5 is interesting. Since this is a suffix, field 5 tells us that
|
||||||
|
there are 2 conditions that must be met. The first condition is that
|
||||||
|
the next to the last character in the word must *NOT* be any of the
|
||||||
|
following "a", "e", "i", "o" or "u". The second condition is that
|
||||||
|
the last character of the word must end in "y".
|
||||||
|
|
||||||
|
So how can we encode this information concisely and be able to
|
||||||
|
test for both conditions in a fast manner? The answer is found
|
||||||
|
but studying the wonderful ispell code of Geoff Kuenning, et.al.
|
||||||
|
(now available under a normal BSD license).
|
||||||
|
|
||||||
|
If we set up a conds array of 256 bytes indexed (0 to 255) and access it
|
||||||
|
using a character (cast to an unsigned char) of a string, we have 8 bits
|
||||||
|
of information we can store about that character. Specifically we
|
||||||
|
could use each bit to say if that character is allowed in any of the
|
||||||
|
last (or first for prefixes) 8 characters of the word.
|
||||||
|
|
||||||
|
Basically, each character at one end of the word (up to the number
|
||||||
|
of conditions) is used to index into the conds array and the resulting
|
||||||
|
value found there says whether the that character is valid for a
|
||||||
|
specific character position in the word.
|
||||||
|
|
||||||
|
For prefixes, it does this by setting bit 0 if that char is valid
|
||||||
|
in the first position, bit 1 if valid in the second position, and so on.
|
||||||
|
|
||||||
|
If a bit is not set, then that char is not valid for that postion in the
|
||||||
|
word.
|
||||||
|
|
||||||
|
If working with suffixes bit 0 is used for the character closest
|
||||||
|
to the front, bit 1 for the next character towards the end, ...,
|
||||||
|
with bit numconds-1 representing the last char at the end of the string.
|
||||||
|
|
||||||
|
Note: since entries in the conds[] are 8 bits, only 8 conditions
|
||||||
|
(read that only 8 character positions) can be examined at one
|
||||||
|
end of a word (the beginning for prefixes and the end for suffixes.
|
||||||
|
|
||||||
|
So to make this clearer, lets encode the conds array values for the
|
||||||
|
first two affentries for the suffix D described earlier.
|
||||||
|
|
||||||
|
|
||||||
|
For the first affentry:
|
||||||
|
numconds = 1 (only examine the last character)
|
||||||
|
|
||||||
|
conds['e'] = (1 << 0) (the word must end in an E)
|
||||||
|
all others are all 0
|
||||||
|
|
||||||
|
For the second affentry:
|
||||||
|
numconds = 2 (only examine the last two characters)
|
||||||
|
|
||||||
|
conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
|
||||||
|
where X is all characters *but* a, e, i, o, or u
|
||||||
|
|
||||||
|
|
||||||
|
conds['y'] = (1 << 1) (the last char must be a y)
|
||||||
|
all other bits for all other entries in the conds array are zero
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
86
goldlib/myspell/affentry.hxx
Normal file
86
goldlib/myspell/affentry.hxx
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#ifndef _AFFIX_HXX_
|
||||||
|
#define _AFFIX_HXX_
|
||||||
|
|
||||||
|
#include "atypes.hxx"
|
||||||
|
#include "baseaffix.hxx"
|
||||||
|
#include "affixmgr.hxx"
|
||||||
|
|
||||||
|
|
||||||
|
/* A Prefix Entry */
|
||||||
|
|
||||||
|
class PfxEntry : public AffEntry
|
||||||
|
{
|
||||||
|
AffixMgr* pmyMgr;
|
||||||
|
|
||||||
|
PfxEntry * next;
|
||||||
|
PfxEntry * nexteq;
|
||||||
|
PfxEntry * nextne;
|
||||||
|
PfxEntry * flgnxt;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
PfxEntry(AffixMgr* pmgr, affentry* dp );
|
||||||
|
~PfxEntry();
|
||||||
|
|
||||||
|
struct hentry * check(const char * word, int len);
|
||||||
|
|
||||||
|
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
|
||||||
|
inline unsigned char getFlag() { return achar; }
|
||||||
|
inline const char * getKey() { return appnd; }
|
||||||
|
char * add(const char * word, int len);
|
||||||
|
|
||||||
|
inline PfxEntry * getNext() { return next; }
|
||||||
|
inline PfxEntry * getNextNE() { return nextne; }
|
||||||
|
inline PfxEntry * getNextEQ() { return nexteq; }
|
||||||
|
inline PfxEntry * getFlgNxt() { return flgnxt; }
|
||||||
|
|
||||||
|
inline void setNext(PfxEntry * ptr) { next = ptr; }
|
||||||
|
inline void setNextNE(PfxEntry * ptr) { nextne = ptr; }
|
||||||
|
inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
|
||||||
|
inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* A Suffix Entry */
|
||||||
|
|
||||||
|
class SfxEntry : public AffEntry
|
||||||
|
{
|
||||||
|
AffixMgr* pmyMgr;
|
||||||
|
char * rappnd;
|
||||||
|
|
||||||
|
SfxEntry * next;
|
||||||
|
SfxEntry * nexteq;
|
||||||
|
SfxEntry * nextne;
|
||||||
|
SfxEntry * flgnxt;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
SfxEntry(AffixMgr* pmgr, affentry* dp );
|
||||||
|
~SfxEntry();
|
||||||
|
|
||||||
|
struct hentry * check(const char * word, int len, int optflags,
|
||||||
|
AffEntry* ppfx);
|
||||||
|
|
||||||
|
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
|
||||||
|
inline unsigned char getFlag() { return achar; }
|
||||||
|
inline const char * getKey() { return rappnd; }
|
||||||
|
char * add(const char * word, int len);
|
||||||
|
|
||||||
|
inline SfxEntry * getNext() { return next; }
|
||||||
|
inline SfxEntry * getNextNE() { return nextne; }
|
||||||
|
inline SfxEntry * getNextEQ() { return nexteq; }
|
||||||
|
inline SfxEntry * getFlgNxt() { return flgnxt; }
|
||||||
|
|
||||||
|
inline void setNext(SfxEntry * ptr) { next = ptr; }
|
||||||
|
inline void setNextNE(SfxEntry * ptr) { nextne = ptr; }
|
||||||
|
inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
|
||||||
|
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
1233
goldlib/myspell/affixmgr.cxx
Normal file
1233
goldlib/myspell/affixmgr.cxx
Normal file
File diff suppressed because it is too large
Load Diff
66
goldlib/myspell/affixmgr.hxx
Normal file
66
goldlib/myspell/affixmgr.hxx
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#ifndef _AFFIXMGR_HXX_
|
||||||
|
#define _AFFIXMGR_HXX_
|
||||||
|
|
||||||
|
#include "atypes.hxx"
|
||||||
|
#include "baseaffix.hxx"
|
||||||
|
#include "hashmgr.hxx"
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
class AffixMgr
|
||||||
|
{
|
||||||
|
|
||||||
|
AffEntry * pStart[SETSIZE];
|
||||||
|
AffEntry * sStart[SETSIZE];
|
||||||
|
AffEntry * pFlag[SETSIZE];
|
||||||
|
AffEntry * sFlag[SETSIZE];
|
||||||
|
HashMgr * pHMgr;
|
||||||
|
char * trystring;
|
||||||
|
char * encoding;
|
||||||
|
char * compound;
|
||||||
|
int cpdmin;
|
||||||
|
int numrep;
|
||||||
|
replentry * reptable;
|
||||||
|
int nummap;
|
||||||
|
mapentry * maptable;
|
||||||
|
bool nosplitsugs;
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
AffixMgr(const char * affpath, HashMgr * ptr);
|
||||||
|
~AffixMgr();
|
||||||
|
struct hentry * affix_check(const char * word, int len);
|
||||||
|
struct hentry * prefix_check(const char * word, int len);
|
||||||
|
struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx);
|
||||||
|
int expand_rootword(struct guessword * wlst, int maxn,
|
||||||
|
const char * ts, int wl, const char * ap, int al);
|
||||||
|
struct hentry * compound_check(const char * word, int len, char compound_flag);
|
||||||
|
struct hentry * lookup(const char * word);
|
||||||
|
int get_numrep();
|
||||||
|
struct replentry * get_reptable();
|
||||||
|
int get_nummap();
|
||||||
|
struct mapentry * get_maptable();
|
||||||
|
char * get_encoding();
|
||||||
|
char * get_try_string();
|
||||||
|
char * get_compound();
|
||||||
|
bool get_nosplitsugs();
|
||||||
|
|
||||||
|
private:
|
||||||
|
int parse_file(const char * affpath);
|
||||||
|
int parse_try(char * line);
|
||||||
|
int parse_set(char * line);
|
||||||
|
int parse_cpdflag(char * line);
|
||||||
|
int parse_cpdmin(char * line);
|
||||||
|
int parse_reptable(char * line, FILE * af);
|
||||||
|
int parse_maptable(char * line, FILE * af);
|
||||||
|
int parse_affix(char * line, const char at, FILE * af);
|
||||||
|
|
||||||
|
void encodeit(struct affentry * ptr, char * cs);
|
||||||
|
int build_pfxlist(AffEntry* pfxptr);
|
||||||
|
int build_sfxlist(AffEntry* sfxptr);
|
||||||
|
int process_pfx_order();
|
||||||
|
int process_sfx_order();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
45
goldlib/myspell/atypes.hxx
Normal file
45
goldlib/myspell/atypes.hxx
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#ifndef _ATYPES_HXX_
|
||||||
|
#define _ATYPES_HXX_
|
||||||
|
|
||||||
|
#define SETSIZE 256
|
||||||
|
#define MAXAFFIXES 256
|
||||||
|
#define MAXWORDLEN 100
|
||||||
|
#define XPRODUCT (1 << 0)
|
||||||
|
|
||||||
|
#define MAXLNLEN 1024
|
||||||
|
|
||||||
|
#define TESTAFF( a , b , c ) memchr((void *)(a), (int)(b), (size_t)(c) )
|
||||||
|
|
||||||
|
struct affentry
|
||||||
|
{
|
||||||
|
char * strip;
|
||||||
|
char * appnd;
|
||||||
|
short stripl;
|
||||||
|
short appndl;
|
||||||
|
short numconds;
|
||||||
|
short xpflg;
|
||||||
|
char achar;
|
||||||
|
char conds[SETSIZE];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct replentry {
|
||||||
|
char * pattern;
|
||||||
|
char * replacement;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct mapentry {
|
||||||
|
char * set;
|
||||||
|
int len;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct guessword {
|
||||||
|
char * word;
|
||||||
|
bool allow;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
17
goldlib/myspell/baseaffix.hxx
Normal file
17
goldlib/myspell/baseaffix.hxx
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#ifndef _BASEAFF_HXX_
|
||||||
|
#define _BASEAFF_HXX_
|
||||||
|
|
||||||
|
class AffEntry
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
char * appnd;
|
||||||
|
char * strip;
|
||||||
|
short appndl;
|
||||||
|
short stripl;
|
||||||
|
short numconds;
|
||||||
|
short xpflg;
|
||||||
|
char achar;
|
||||||
|
char conds[SETSIZE];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
3850
goldlib/myspell/csutil.cxx
Normal file
3850
goldlib/myspell/csutil.cxx
Normal file
File diff suppressed because it is too large
Load Diff
67
goldlib/myspell/csutil.hxx
Normal file
67
goldlib/myspell/csutil.hxx
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#ifndef __CSUTILHXX__
|
||||||
|
#define __CSUTILHXX__
|
||||||
|
|
||||||
|
|
||||||
|
// First some base level utility routines
|
||||||
|
|
||||||
|
// remove end of line char(s)
|
||||||
|
void mychomp(char * s);
|
||||||
|
|
||||||
|
// duplicate string
|
||||||
|
char * mystrdup(const char * s);
|
||||||
|
|
||||||
|
// duplicate reverse of string
|
||||||
|
char * myrevstrdup(const char * s);
|
||||||
|
|
||||||
|
// parse into tokens with char delimiter
|
||||||
|
char * mystrsep(char ** sptr, const char delim);
|
||||||
|
|
||||||
|
// is one string a leading subset of another
|
||||||
|
int isSubset(const char * s1, const char * s2);
|
||||||
|
|
||||||
|
|
||||||
|
// character encoding information
|
||||||
|
|
||||||
|
struct cs_info {
|
||||||
|
unsigned char ccase;
|
||||||
|
unsigned char clower;
|
||||||
|
unsigned char cupper;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct enc_entry {
|
||||||
|
const char * enc_name;
|
||||||
|
struct cs_info * cs_table;
|
||||||
|
};
|
||||||
|
|
||||||
|
// language to encoding default map
|
||||||
|
|
||||||
|
struct lang_map {
|
||||||
|
const char * lang;
|
||||||
|
const char * def_enc;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cs_info * get_current_cs(const char * es);
|
||||||
|
|
||||||
|
const char * get_default_enc(const char * lang);
|
||||||
|
|
||||||
|
// convert null terminated string to all caps using encoding
|
||||||
|
void enmkallcap(char * d, const char * p, const char * encoding);
|
||||||
|
|
||||||
|
// convert null terminated string to all little using encoding
|
||||||
|
void enmkallsmall(char * d, const char * p, const char * encoding);
|
||||||
|
|
||||||
|
// convert null terminated string to have intial capital using encoding
|
||||||
|
void enmkinitcap(char * d, const char * p, const char * encoding);
|
||||||
|
|
||||||
|
// convert null terminated string to all caps
|
||||||
|
void mkallcap(char * p, const struct cs_info * csconv);
|
||||||
|
|
||||||
|
// convert null terminated string to all little
|
||||||
|
void mkallsmall(char * p, const struct cs_info * csconv);
|
||||||
|
|
||||||
|
// convert null terminated string to have intial capital
|
||||||
|
void mkinitcap(char * p, const struct cs_info * csconv);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
127
goldlib/myspell/dictmgr.cxx
Normal file
127
goldlib/myspell/dictmgr.cxx
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cctype>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "dictmgr.hxx"
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
using namespace std;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// some utility functions
|
||||||
|
extern void mychomp(char * s);
|
||||||
|
extern char * mystrdup(const char * s);
|
||||||
|
extern char * mystrsep(char ** stringp, const char delim);
|
||||||
|
|
||||||
|
DictMgr::DictMgr(const char * dictpath, const char * etype)
|
||||||
|
{
|
||||||
|
// load list of etype entries
|
||||||
|
numdict = 0;
|
||||||
|
pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
|
||||||
|
if (pdentry) {
|
||||||
|
if (parse_file(dictpath, etype)) {
|
||||||
|
numdict = 0;
|
||||||
|
// no dictionary.lst found is okay
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
numdict = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DictMgr::~DictMgr()
|
||||||
|
{
|
||||||
|
dictentry * pdict = NULL;
|
||||||
|
if (pdentry) {
|
||||||
|
pdict = pdentry;
|
||||||
|
for (int i=0;i<numdict;i++) {
|
||||||
|
if (pdict->lang) {
|
||||||
|
free(pdict->lang);
|
||||||
|
pdict->lang = NULL;
|
||||||
|
}
|
||||||
|
if (pdict->region) {
|
||||||
|
free(pdict->region);
|
||||||
|
pdict->region=NULL;
|
||||||
|
}
|
||||||
|
if (pdict->filename) {
|
||||||
|
free(pdict->filename);
|
||||||
|
pdict->filename = NULL;
|
||||||
|
}
|
||||||
|
pdict++;
|
||||||
|
}
|
||||||
|
free(pdentry);
|
||||||
|
pdentry = NULL;
|
||||||
|
pdict = NULL;
|
||||||
|
}
|
||||||
|
numdict = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// read in list of etype entries and build up structure to describe them
|
||||||
|
int DictMgr::parse_file(const char * dictpath, const char * etype)
|
||||||
|
{
|
||||||
|
|
||||||
|
int i;
|
||||||
|
char line[MAXDICTENTRYLEN+1];
|
||||||
|
dictentry * pdict = pdentry;
|
||||||
|
|
||||||
|
// open the dictionary list file
|
||||||
|
FILE * dictlst;
|
||||||
|
dictlst = fopen(dictpath,"r");
|
||||||
|
if (!dictlst) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// step one is to parse the dictionary list building up the
|
||||||
|
// descriptive structures
|
||||||
|
|
||||||
|
// read in each line ignoring any that dont start with etype
|
||||||
|
while (fgets(line,MAXDICTENTRYLEN,dictlst)) {
|
||||||
|
mychomp(line);
|
||||||
|
|
||||||
|
/* parse in a dictionary entry */
|
||||||
|
if (strncmp(line,etype,4) == 0) {
|
||||||
|
if (numdict < MAXDICTIONARIES) {
|
||||||
|
char * tp = line;
|
||||||
|
char * piece;
|
||||||
|
i = 0;
|
||||||
|
while ((piece=mystrsep(&tp,' '))) {
|
||||||
|
if (*piece != '\0') {
|
||||||
|
switch(i) {
|
||||||
|
case 0: break;
|
||||||
|
case 1: pdict->lang = mystrdup(piece); break;
|
||||||
|
case 2: if (strcmp (piece, "ANY") == 0)
|
||||||
|
pdict->region = mystrdup("");
|
||||||
|
else
|
||||||
|
pdict->region = mystrdup(piece);
|
||||||
|
break;
|
||||||
|
case 3: pdict->filename = mystrdup(piece); break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
free(piece);
|
||||||
|
}
|
||||||
|
if (i == 4) {
|
||||||
|
numdict++;
|
||||||
|
pdict++;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(dictlst);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// return text encoding of dictionary
|
||||||
|
int DictMgr::get_list(dictentry ** ppentry)
|
||||||
|
{
|
||||||
|
*ppentry = pdentry;
|
||||||
|
return numdict;
|
||||||
|
}
|
||||||
|
|
31
goldlib/myspell/dictmgr.hxx
Normal file
31
goldlib/myspell/dictmgr.hxx
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#ifndef _DICTMGR_HXX_
|
||||||
|
#define _DICTMGR_HXX_
|
||||||
|
|
||||||
|
#define MAXDICTIONARIES 100
|
||||||
|
#define MAXDICTENTRYLEN 1024
|
||||||
|
|
||||||
|
struct dictentry {
|
||||||
|
char * filename;
|
||||||
|
char * lang;
|
||||||
|
char * region;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class DictMgr
|
||||||
|
{
|
||||||
|
|
||||||
|
int numdict;
|
||||||
|
dictentry * pdentry;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
DictMgr(const char * dictpath, const char * etype);
|
||||||
|
~DictMgr();
|
||||||
|
int get_list(dictentry** ppentry);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int parse_file(const char * dictpath, const char * etype);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
89
goldlib/myspell/example.cxx
Normal file
89
goldlib/myspell/example.cxx
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "myspell.hxx"
|
||||||
|
|
||||||
|
extern char * mystrdup(const char * s);
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
|
||||||
|
char * af;
|
||||||
|
char * df;
|
||||||
|
char * wtc;
|
||||||
|
FILE* wtclst;
|
||||||
|
|
||||||
|
/* first parse the command line options */
|
||||||
|
/* arg1 - affix file, arg2 dictionary file, arg3 - file of words to check */
|
||||||
|
|
||||||
|
if (argv[1]) {
|
||||||
|
af = mystrdup(argv[1]);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,"correct syntax is:\n");
|
||||||
|
fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (argv[2]) {
|
||||||
|
df = mystrdup(argv[2]);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,"correct syntax is:\n");
|
||||||
|
fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (argv[3]) {
|
||||||
|
wtc = mystrdup(argv[3]);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,"correct syntax is:\n");
|
||||||
|
fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* open the words to check list */
|
||||||
|
wtclst = fopen(wtc,"r");
|
||||||
|
if (!wtclst) {
|
||||||
|
fprintf(stderr,"Error - could not open file of words to check\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MySpell * pMS= new MySpell(af,df);
|
||||||
|
|
||||||
|
int k;
|
||||||
|
int dp;
|
||||||
|
char buf[101];
|
||||||
|
|
||||||
|
while(fgets(buf,100,wtclst)) {
|
||||||
|
k = strlen(buf);
|
||||||
|
*(buf + k - 1) = '\0';
|
||||||
|
dp = pMS->spell(buf);
|
||||||
|
if (dp) {
|
||||||
|
fprintf(stdout,"\"%s\" is okay\n",buf);
|
||||||
|
fprintf(stdout,"\n");
|
||||||
|
} else {
|
||||||
|
fprintf(stdout,"\"%s\" is incorrect!\n",buf);
|
||||||
|
fprintf(stdout," suggestions:\n");
|
||||||
|
char ** wlst;
|
||||||
|
int ns = pMS->suggest(&wlst,buf);
|
||||||
|
for (int i=0; i < ns; i++) {
|
||||||
|
fprintf(stdout," ...\"%s\"\n",wlst[i]);
|
||||||
|
free(wlst[i]);
|
||||||
|
}
|
||||||
|
fprintf(stdout,"\n");
|
||||||
|
free(wlst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete pMS;
|
||||||
|
fclose(wtclst);
|
||||||
|
free(wtc);
|
||||||
|
free(df);
|
||||||
|
free(af);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
213
goldlib/myspell/hashmgr.cxx
Normal file
213
goldlib/myspell/hashmgr.cxx
Normal file
@ -0,0 +1,213 @@
|
|||||||
|
#include "license.readme"
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "hashmgr.hxx"
|
||||||
|
|
||||||
|
extern void mychomp(char * s);
|
||||||
|
extern char * mystrdup(const char *);
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
using namespace std;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// build a hash table from a munched word list
|
||||||
|
|
||||||
|
HashMgr::HashMgr(const char * tpath)
|
||||||
|
{
|
||||||
|
tablesize = 0;
|
||||||
|
tableptr = NULL;
|
||||||
|
int ec = load_tables(tpath);
|
||||||
|
if (ec) {
|
||||||
|
/* error condition - what should we do here */
|
||||||
|
fprintf(stderr,"Hash Manager Error : %d\n",ec);
|
||||||
|
fflush(stderr);
|
||||||
|
if (tableptr) {
|
||||||
|
free(tableptr);
|
||||||
|
}
|
||||||
|
tablesize = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HashMgr::~HashMgr()
|
||||||
|
{
|
||||||
|
if (tableptr) {
|
||||||
|
// now pass through hash table freeing up everything
|
||||||
|
// go through column by column of the table
|
||||||
|
for (int i=0; i < tablesize; i++) {
|
||||||
|
struct hentry * pt = &tableptr[i];
|
||||||
|
struct hentry * nt = NULL;
|
||||||
|
if (pt) {
|
||||||
|
if (pt->word) free(pt->word);
|
||||||
|
if (pt->astr) free(pt->astr);
|
||||||
|
pt = pt->next;
|
||||||
|
}
|
||||||
|
while(pt) {
|
||||||
|
nt = pt->next;
|
||||||
|
if (pt->word) free(pt->word);
|
||||||
|
if (pt->astr) free(pt->astr);
|
||||||
|
free(pt);
|
||||||
|
pt = nt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(tableptr);
|
||||||
|
}
|
||||||
|
tablesize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// lookup a root word in the hashtable
|
||||||
|
|
||||||
|
struct hentry * HashMgr::lookup(const char *word) const
|
||||||
|
{
|
||||||
|
struct hentry * dp;
|
||||||
|
if (tableptr) {
|
||||||
|
dp = &tableptr[hash(word)];
|
||||||
|
if (dp->word == NULL) return NULL;
|
||||||
|
for ( ; dp != NULL; dp = dp->next) {
|
||||||
|
if (strcmp(word,dp->word) == 0) return dp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// add a word to the hash table (private)
|
||||||
|
|
||||||
|
int HashMgr::add_word(const char * word, int wl, const char * aff, int al)
|
||||||
|
{
|
||||||
|
int i = hash(word);
|
||||||
|
struct hentry * dp = &tableptr[i];
|
||||||
|
struct hentry* hp;
|
||||||
|
if (dp->word == NULL) {
|
||||||
|
dp->wlen = wl;
|
||||||
|
dp->alen = al;
|
||||||
|
dp->word = mystrdup(word);
|
||||||
|
dp->astr = mystrdup(aff);
|
||||||
|
dp->next = NULL;
|
||||||
|
if ((wl) && (dp->word == NULL)) return 1;
|
||||||
|
if ((al) && (dp->astr == NULL)) return 1;
|
||||||
|
} else {
|
||||||
|
hp = (struct hentry *) malloc (sizeof(struct hentry));
|
||||||
|
if (hp == NULL) return 1;
|
||||||
|
hp->wlen = wl;
|
||||||
|
hp->alen = al;
|
||||||
|
hp->word = mystrdup(word);
|
||||||
|
hp->astr = mystrdup(aff);
|
||||||
|
hp->next = NULL;
|
||||||
|
while (dp->next != NULL) dp=dp->next;
|
||||||
|
dp->next = hp;
|
||||||
|
if ((wl) && (hp->word == NULL)) return 1;
|
||||||
|
if ((al) && (hp->astr == NULL)) return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// walk the hash table entry by entry - null at end
|
||||||
|
struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
|
||||||
|
{
|
||||||
|
//reset to start
|
||||||
|
if ((col < 0) || (hp == NULL)) {
|
||||||
|
col = -1;
|
||||||
|
hp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hp && hp->next != NULL) {
|
||||||
|
hp = hp->next;
|
||||||
|
} else {
|
||||||
|
col++;
|
||||||
|
hp = (col < tablesize) ? &tableptr[col] : NULL;
|
||||||
|
// search for next non-blank column entry
|
||||||
|
while (hp && (hp->word == NULL)) {
|
||||||
|
col ++;
|
||||||
|
hp = (col < tablesize) ? &tableptr[col] : NULL;
|
||||||
|
}
|
||||||
|
if (col < tablesize) return hp;
|
||||||
|
hp = NULL;
|
||||||
|
col = -1;
|
||||||
|
}
|
||||||
|
return hp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// load a munched word list and build a hash table on the fly
|
||||||
|
|
||||||
|
int HashMgr::load_tables(const char * tpath)
|
||||||
|
{
|
||||||
|
int wl, al;
|
||||||
|
char * ap;
|
||||||
|
|
||||||
|
// raw dictionary - munched file
|
||||||
|
FILE * rawdict = fopen(tpath, "r");
|
||||||
|
if (rawdict == NULL) return 1;
|
||||||
|
|
||||||
|
// first read the first line of file to get hash table size */
|
||||||
|
char ts[MAXDELEN];
|
||||||
|
if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
|
||||||
|
mychomp(ts);
|
||||||
|
tablesize = atoi(ts);
|
||||||
|
if (!tablesize) return 4;
|
||||||
|
tablesize = tablesize + 5;
|
||||||
|
if ((tablesize %2) == 0) tablesize++;
|
||||||
|
|
||||||
|
// allocate the hash table
|
||||||
|
tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
|
||||||
|
if (! tableptr) return 3;
|
||||||
|
|
||||||
|
// loop through all words on much list and add to hash
|
||||||
|
// table and create word and affix strings
|
||||||
|
|
||||||
|
while (fgets(ts,MAXDELEN-1,rawdict)) {
|
||||||
|
mychomp(ts);
|
||||||
|
// split each line into word and affix char strings
|
||||||
|
ap = strchr(ts,'/');
|
||||||
|
if (ap) {
|
||||||
|
*ap = '\0';
|
||||||
|
ap++;
|
||||||
|
al = strlen(ap);
|
||||||
|
} else {
|
||||||
|
al = 0;
|
||||||
|
ap = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
wl = strlen(ts);
|
||||||
|
|
||||||
|
// add the word and its index
|
||||||
|
if (add_word(ts,wl,ap,al))
|
||||||
|
return 5;;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(rawdict);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// the hash function is a simple load and rotate
|
||||||
|
// algorithm borrowed
|
||||||
|
|
||||||
|
int HashMgr::hash(const char * word) const
|
||||||
|
{
|
||||||
|
long hv = 0;
|
||||||
|
for (int i=0; i < 4 && *word != 0; i++)
|
||||||
|
hv = (hv << 8) | (*word++);
|
||||||
|
while (*word != 0) {
|
||||||
|
ROTATE(hv,ROTATE_LEN);
|
||||||
|
hv ^= (*word++);
|
||||||
|
}
|
||||||
|
return (unsigned long) hv % tablesize;
|
||||||
|
}
|
||||||
|
|
27
goldlib/myspell/hashmgr.hxx
Normal file
27
goldlib/myspell/hashmgr.hxx
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#ifndef _HASHMGR_HXX_
|
||||||
|
#define _HASHMGR_HXX_
|
||||||
|
|
||||||
|
#include "htypes.hxx"
|
||||||
|
|
||||||
|
class HashMgr
|
||||||
|
{
|
||||||
|
int tablesize;
|
||||||
|
struct hentry * tableptr;
|
||||||
|
|
||||||
|
public:
|
||||||
|
HashMgr(const char * tpath);
|
||||||
|
~HashMgr();
|
||||||
|
|
||||||
|
struct hentry * lookup(const char *) const;
|
||||||
|
int hash(const char *) const;
|
||||||
|
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
HashMgr( const HashMgr & ); // not implemented
|
||||||
|
HashMgr &operator=( const HashMgr & ); // not implemented
|
||||||
|
int load_tables(const char * tpath);
|
||||||
|
int add_word(const char * word, int wl, const char * ap, int al);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
20
goldlib/myspell/htypes.hxx
Normal file
20
goldlib/myspell/htypes.hxx
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#ifndef _HTYPES_HXX_
|
||||||
|
#define _HTYPES_HXX_
|
||||||
|
|
||||||
|
#define MAXDELEN 256
|
||||||
|
|
||||||
|
#define ROTATE_LEN 5
|
||||||
|
|
||||||
|
#define ROTATE(v,q) \
|
||||||
|
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
|
||||||
|
|
||||||
|
struct hentry
|
||||||
|
{
|
||||||
|
short wlen;
|
||||||
|
short alen;
|
||||||
|
char * word;
|
||||||
|
char * astr;
|
||||||
|
struct hentry * next;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
61
goldlib/myspell/license.readme
Normal file
61
goldlib/myspell/license.readme
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* NOTE: A special thanks and credit goes to Geoff Kuenning
|
||||||
|
* the creator of ispell. MySpell's affix algorithms were
|
||||||
|
* based on those of ispell which should be noted is
|
||||||
|
* copyright Geoff Kuenning et.al. and now available
|
||||||
|
* under a BSD style license. For more information on ispell
|
||||||
|
* and affix compression in general, please see:
|
||||||
|
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
|
||||||
|
* (the home page for ispell)
|
||||||
|
*
|
||||||
|
* An almost complete rewrite of MySpell for use by
|
||||||
|
* the Mozilla project has been developed by David Einstein
|
||||||
|
* (Deinst@world.std.com). David and I are now
|
||||||
|
* working on parallel development tracks to help
|
||||||
|
* our respective projects (Mozilla and OpenOffice.org
|
||||||
|
* and we will maintain full affix file and dictionary
|
||||||
|
* file compatibility and work on merging our versions
|
||||||
|
* of MySpell back into a single tree. David has been
|
||||||
|
* a significant help in improving MySpell.
|
||||||
|
*
|
||||||
|
* Special thanks also go to La'szlo' Ne'meth
|
||||||
|
* <nemethl@gyorsposta.hu> who is the author of the
|
||||||
|
* Hungarian dictionary and who developed and contributed
|
||||||
|
* the code to support compound words in MySpell
|
||||||
|
* and fixed numerous problems with the encoding
|
||||||
|
* case conversion tables.
|
||||||
|
*
|
||||||
|
*/
|
302
goldlib/myspell/myspell.cxx
Normal file
302
goldlib/myspell/myspell.cxx
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
#include "license.readme"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "myspell.hxx"
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
using namespace std;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
MySpell::MySpell(const char * affpath, const char * dpath)
|
||||||
|
{
|
||||||
|
encoding = NULL;
|
||||||
|
csconv = NULL;
|
||||||
|
|
||||||
|
/* first set up the hash manager */
|
||||||
|
pHMgr = new HashMgr(dpath);
|
||||||
|
|
||||||
|
/* next set up the affix manager */
|
||||||
|
/* it needs access to the hash manager lookup methods */
|
||||||
|
pAMgr = new AffixMgr(affpath,pHMgr);
|
||||||
|
|
||||||
|
/* get the preferred try string and the dictionary */
|
||||||
|
/* encoding from the Affix Manager for that dictionary */
|
||||||
|
char * try_string = pAMgr->get_try_string();
|
||||||
|
encoding = pAMgr->get_encoding();
|
||||||
|
csconv = get_current_cs(encoding);
|
||||||
|
|
||||||
|
/* and finally set up the suggestion manager */
|
||||||
|
maxSug = 100;
|
||||||
|
pSMgr = new SuggestMgr(try_string, maxSug, pAMgr);
|
||||||
|
if (try_string) free(try_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MySpell::~MySpell()
|
||||||
|
{
|
||||||
|
if (pSMgr) delete pSMgr;
|
||||||
|
if (pAMgr) delete pAMgr;
|
||||||
|
if (pHMgr) delete pHMgr;
|
||||||
|
pSMgr = NULL;
|
||||||
|
pAMgr = NULL;
|
||||||
|
pHMgr = NULL;
|
||||||
|
csconv= NULL;
|
||||||
|
if (encoding) free(encoding);
|
||||||
|
encoding = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// make a copy of src at destination while removing all leading
|
||||||
|
// blanks and removing any trailing periods after recording
|
||||||
|
// their presence with the abbreviation flag
|
||||||
|
// also since already going through character by character,
|
||||||
|
// set the capitalization type
|
||||||
|
// return the length of the "cleaned" word
|
||||||
|
|
||||||
|
int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabbrev)
|
||||||
|
{
|
||||||
|
|
||||||
|
// with the new breakiterator code this should not be needed anymore
|
||||||
|
const char * special_chars = "._#$%&()* +,-/:;<=>[]\\^`{|}~\t \x0a\x0d\x01\'\"";
|
||||||
|
|
||||||
|
unsigned char * p = (unsigned char *) dest;
|
||||||
|
const unsigned char * q = (const unsigned char * ) src;
|
||||||
|
|
||||||
|
// first skip over any leading special characters
|
||||||
|
while ((*q != '\0') && (strchr(special_chars,(int)(*q)))) q++;
|
||||||
|
|
||||||
|
// now strip off any trailing special characters
|
||||||
|
// if a period comes after a normal char record its presence
|
||||||
|
*pabbrev = 0;
|
||||||
|
int nl = strlen((const char *)q);
|
||||||
|
while ((nl > 0) && (strchr(special_chars,(int)(*(q+nl-1))))) {
|
||||||
|
nl--;
|
||||||
|
}
|
||||||
|
if ( *(q+nl) == '.' ) *pabbrev = 1;
|
||||||
|
|
||||||
|
// if no characters are left it can't be an abbreviation and can't be capitalized
|
||||||
|
if (nl <= 0) {
|
||||||
|
*pcaptype = NOCAP;
|
||||||
|
*pabbrev = 0;
|
||||||
|
*p = '\0';
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// now determine the capitalization type of the first nl letters
|
||||||
|
int ncap = 0;
|
||||||
|
int nneutral = 0;
|
||||||
|
int nc = 0;
|
||||||
|
while (nl > 0) {
|
||||||
|
nc++;
|
||||||
|
if (csconv[(*q)].ccase) ncap++;
|
||||||
|
if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
|
||||||
|
*p++ = *q++;
|
||||||
|
nl--;
|
||||||
|
}
|
||||||
|
// remember to terminate the destination string
|
||||||
|
*p = '\0';
|
||||||
|
|
||||||
|
// now finally set the captype
|
||||||
|
if (ncap == 0) {
|
||||||
|
*pcaptype = NOCAP;
|
||||||
|
} else if ((ncap == 1) && csconv[(unsigned char)(*dest)].ccase) {
|
||||||
|
*pcaptype = INITCAP;
|
||||||
|
} else if ((ncap == nc) || ((ncap + nneutral) == nc)){
|
||||||
|
*pcaptype = ALLCAP;
|
||||||
|
} else {
|
||||||
|
*pcaptype = HUHCAP;
|
||||||
|
}
|
||||||
|
return nc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int MySpell::spell(const char * word)
|
||||||
|
{
|
||||||
|
char * rv=NULL;
|
||||||
|
char cw[MAXWORDLEN+1];
|
||||||
|
char wspace[MAXWORDLEN+1];
|
||||||
|
|
||||||
|
int wl = strlen(word);
|
||||||
|
if (wl > (MAXWORDLEN - 1)) return 0;
|
||||||
|
int captype = 0;
|
||||||
|
int abbv = 0;
|
||||||
|
wl = cleanword(cw, word, &captype, &abbv);
|
||||||
|
if (wl == 0) return 1;
|
||||||
|
|
||||||
|
switch(captype) {
|
||||||
|
case HUHCAP:
|
||||||
|
case NOCAP: {
|
||||||
|
rv = check(cw);
|
||||||
|
if ((abbv) && !(rv)) {
|
||||||
|
memcpy(wspace,cw,wl);
|
||||||
|
*(wspace+wl) = '.';
|
||||||
|
*(wspace+wl+1) = '\0';
|
||||||
|
rv = check(wspace);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case ALLCAP: {
|
||||||
|
memcpy(wspace,cw,(wl+1));
|
||||||
|
mkallsmall(wspace, csconv);
|
||||||
|
rv = check(wspace);
|
||||||
|
if (!rv) {
|
||||||
|
mkinitcap(wspace, csconv);
|
||||||
|
rv = check(wspace);
|
||||||
|
}
|
||||||
|
if (!rv) rv = check(cw);
|
||||||
|
if ((abbv) && !(rv)) {
|
||||||
|
memcpy(wspace,cw,wl);
|
||||||
|
*(wspace+wl) = '.';
|
||||||
|
*(wspace+wl+1) = '\0';
|
||||||
|
rv = check(wspace);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case INITCAP: {
|
||||||
|
memcpy(wspace,cw,(wl+1));
|
||||||
|
mkallsmall(wspace, csconv);
|
||||||
|
rv = check(wspace);
|
||||||
|
if (!rv) rv = check(cw);
|
||||||
|
if ((abbv) && !(rv)) {
|
||||||
|
memcpy(wspace,cw,wl);
|
||||||
|
*(wspace+wl) = '.';
|
||||||
|
*(wspace+wl+1) = '\0';
|
||||||
|
rv = check(wspace);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rv) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char * MySpell::check(const char * word)
|
||||||
|
{
|
||||||
|
struct hentry * he = NULL;
|
||||||
|
if (pHMgr)
|
||||||
|
he = pHMgr->lookup (word);
|
||||||
|
|
||||||
|
if ((he == NULL) && (pAMgr)) {
|
||||||
|
// try stripping off affixes */
|
||||||
|
he = pAMgr->affix_check(word, strlen(word));
|
||||||
|
|
||||||
|
// try check compound word
|
||||||
|
if ((he == NULL) && (pAMgr->get_compound())) {
|
||||||
|
he = pAMgr->compound_check(word, strlen(word), (pAMgr->get_compound())[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (he) return he->word;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int MySpell::suggest(char*** slst, const char * word)
|
||||||
|
{
|
||||||
|
char cw[MAXWORDLEN+1];
|
||||||
|
char wspace[MAXWORDLEN+1];
|
||||||
|
if (! pSMgr) return 0;
|
||||||
|
int wl = strlen(word);
|
||||||
|
if (wl > (MAXWORDLEN-1)) return 0;
|
||||||
|
int captype = 0;
|
||||||
|
int abbv = 0;
|
||||||
|
wl = cleanword(cw, word, &captype, &abbv);
|
||||||
|
if (wl == 0) return 0;
|
||||||
|
|
||||||
|
int ns = 0;
|
||||||
|
char ** wlst = (char **) calloc(maxSug, sizeof(char *));
|
||||||
|
if (wlst == NULL) return 0;
|
||||||
|
|
||||||
|
switch(captype) {
|
||||||
|
case NOCAP: {
|
||||||
|
ns = pSMgr->suggest(wlst, ns, cw);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case INITCAP: {
|
||||||
|
|
||||||
|
memcpy(wspace,cw,(wl+1));
|
||||||
|
mkallsmall(wspace, csconv);
|
||||||
|
ns = pSMgr->suggest(wlst, ns, wspace);
|
||||||
|
if (ns > 0) {
|
||||||
|
for (int j=0; j < ns; j++)
|
||||||
|
mkinitcap(wlst[j], csconv);
|
||||||
|
}
|
||||||
|
ns = pSMgr->suggest(wlst,ns,cw);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case HUHCAP: {
|
||||||
|
ns = pSMgr->suggest(wlst, ns, cw);
|
||||||
|
if (ns != -1) {
|
||||||
|
memcpy(wspace,cw,(wl+1));
|
||||||
|
mkallsmall(wspace, csconv);
|
||||||
|
ns = pSMgr->suggest(wlst, ns, wspace);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case ALLCAP: {
|
||||||
|
memcpy(wspace,cw,(wl+1));
|
||||||
|
mkallsmall(wspace, csconv);
|
||||||
|
ns = pSMgr->suggest(wlst, ns, wspace);
|
||||||
|
if (ns > 0) {
|
||||||
|
for (int j=0; j < ns; j++)
|
||||||
|
mkallcap(wlst[j], csconv);
|
||||||
|
}
|
||||||
|
if (ns != -1)
|
||||||
|
ns = pSMgr->suggest(wlst, ns , cw);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ns > 0) {
|
||||||
|
*slst = wlst;
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
// try ngram approach since found nothing
|
||||||
|
if (ns == 0) {
|
||||||
|
ns = pSMgr->ngsuggest(wlst, cw, pHMgr);
|
||||||
|
if (ns) {
|
||||||
|
switch(captype) {
|
||||||
|
case NOCAP: break;
|
||||||
|
case HUHCAP: break;
|
||||||
|
case INITCAP: {
|
||||||
|
for (int j=0; j < ns; j++)
|
||||||
|
mkinitcap(wlst[j], csconv);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ALLCAP: {
|
||||||
|
for (int j=0; j < ns; j++)
|
||||||
|
mkallcap(wlst[j], csconv);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*slst = wlst;
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ns < 0) {
|
||||||
|
// we ran out of memory - we should free up as much as possible
|
||||||
|
for (int i=0;i<maxSug; i++)
|
||||||
|
if (wlst[i] != NULL) free(wlst[i]);
|
||||||
|
}
|
||||||
|
if (wlst) free(wlst);
|
||||||
|
*slst = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char * MySpell::get_dic_encoding()
|
||||||
|
{
|
||||||
|
return encoding;
|
||||||
|
}
|
||||||
|
|
37
goldlib/myspell/myspell.hxx
Normal file
37
goldlib/myspell/myspell.hxx
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#include "hashmgr.hxx"
|
||||||
|
#include "affixmgr.hxx"
|
||||||
|
#include "suggestmgr.hxx"
|
||||||
|
#include "csutil.hxx"
|
||||||
|
|
||||||
|
#define NOCAP 0
|
||||||
|
#define INITCAP 1
|
||||||
|
#define ALLCAP 2
|
||||||
|
#define HUHCAP 3
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _MYSPELLMGR_HXX_
|
||||||
|
#define _MYSPELLMGR_HXX_
|
||||||
|
|
||||||
|
class MySpell
|
||||||
|
{
|
||||||
|
AffixMgr* pAMgr;
|
||||||
|
HashMgr* pHMgr;
|
||||||
|
SuggestMgr* pSMgr;
|
||||||
|
char * encoding;
|
||||||
|
struct cs_info * csconv;
|
||||||
|
int maxSug;
|
||||||
|
|
||||||
|
public:
|
||||||
|
MySpell(const char * affpath, const char * dpath);
|
||||||
|
~MySpell();
|
||||||
|
|
||||||
|
int suggest(char*** slst, const char * word);
|
||||||
|
int spell(const char *);
|
||||||
|
char * get_dic_encoding();
|
||||||
|
|
||||||
|
private:
|
||||||
|
int cleanword(char *, const char *, int *, int *);
|
||||||
|
char * check(const char *);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
539
goldlib/myspell/suggestmgr.cxx
Normal file
539
goldlib/myspell/suggestmgr.cxx
Normal file
@ -0,0 +1,539 @@
|
|||||||
|
#include "license.readme"
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cctype>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "suggestmgr.hxx"
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER)
|
||||||
|
using namespace std;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern char * mystrdup(const char *);
|
||||||
|
|
||||||
|
|
||||||
|
SuggestMgr::SuggestMgr(const char * tryme, int maxn,
|
||||||
|
AffixMgr * aptr)
|
||||||
|
{
|
||||||
|
|
||||||
|
// register affix manager and check in string of chars to
|
||||||
|
// try when building candidate suggestions
|
||||||
|
pAMgr = aptr;
|
||||||
|
ctry = mystrdup(tryme);
|
||||||
|
ctryl = 0;
|
||||||
|
if (ctry)
|
||||||
|
ctryl = strlen(ctry);
|
||||||
|
maxSug = maxn;
|
||||||
|
nosplitsugs=(0==1);
|
||||||
|
if (pAMgr) pAMgr->get_nosplitsugs();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SuggestMgr::~SuggestMgr()
|
||||||
|
{
|
||||||
|
pAMgr = NULL;
|
||||||
|
if (ctry) free(ctry);
|
||||||
|
ctry = NULL;
|
||||||
|
ctryl = 0;
|
||||||
|
maxSug = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// generate suggestions for a mispelled word
|
||||||
|
// pass in address of array of char * pointers
|
||||||
|
|
||||||
|
int SuggestMgr::suggest(char** wlst, int ns, const char * word)
|
||||||
|
{
|
||||||
|
|
||||||
|
int nsug = ns;
|
||||||
|
|
||||||
|
// perhaps we made chose the wrong char from a related set
|
||||||
|
if ((nsug < maxSug) && (nsug > -1))
|
||||||
|
nsug = mapchars(wlst, word, nsug);
|
||||||
|
|
||||||
|
// perhaps we made a typical fault of spelling
|
||||||
|
if ((nsug < maxSug) && (nsug > -1))
|
||||||
|
nsug = replchars(wlst, word, nsug);
|
||||||
|
|
||||||
|
// did we forget to add a char
|
||||||
|
if ((nsug < maxSug) && (nsug > -1))
|
||||||
|
nsug = forgotchar(wlst, word, nsug);
|
||||||
|
|
||||||
|
// did we swap the order of chars by mistake
|
||||||
|
if ((nsug < maxSug) && (nsug > -1))
|
||||||
|
nsug = swapchar(wlst, word, nsug);
|
||||||
|
|
||||||
|
// did we add a char that should not be there
|
||||||
|
if ((nsug < maxSug) && (nsug > -1))
|
||||||
|
nsug = extrachar(wlst, word, nsug);
|
||||||
|
|
||||||
|
// did we just hit the wrong key in place of a good char
|
||||||
|
if ((nsug < maxSug) && (nsug > -1))
|
||||||
|
nsug = badchar(wlst, word, nsug);
|
||||||
|
|
||||||
|
// perhaps we forgot to hit space and two words ran together
|
||||||
|
if (!nosplitsugs) {
|
||||||
|
if ((nsug < maxSug) && (nsug > -1))
|
||||||
|
nsug = twowords(wlst, word, nsug);
|
||||||
|
}
|
||||||
|
return nsug;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// suggestions for when chose the wrong char out of a related set
|
||||||
|
int SuggestMgr::mapchars(char** wlst, const char * word, int ns)
|
||||||
|
{
|
||||||
|
int wl = strlen(word);
|
||||||
|
if (wl < 2 || ! pAMgr) return ns;
|
||||||
|
|
||||||
|
int nummap = pAMgr->get_nummap();
|
||||||
|
struct mapentry* maptable = pAMgr->get_maptable();
|
||||||
|
if (maptable==NULL) return ns;
|
||||||
|
ns = map_related(word, 0, wlst, ns, maptable, nummap);
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns, const mapentry* maptable, int nummap)
|
||||||
|
{
|
||||||
|
char c = *(word + i);
|
||||||
|
if (c == 0) {
|
||||||
|
int cwrd = 1;
|
||||||
|
for (int m=0; m < ns; m++)
|
||||||
|
if (strcmp(word,wlst[m]) == 0) cwrd = 0;
|
||||||
|
if ((cwrd) && check(word,strlen(word))) {
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(word);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
int in_map = 0;
|
||||||
|
for (int j = 0; j < nummap; j++) {
|
||||||
|
if (strchr(maptable[j].set,c) != 0) {
|
||||||
|
in_map = 1;
|
||||||
|
char * newword = strdup(word);
|
||||||
|
for (int k = 0; k < maptable[j].len; k++) {
|
||||||
|
*(newword + i) = *(maptable[j].set + k);
|
||||||
|
ns = map_related(newword, (i+1), wlst, ns, maptable, nummap);
|
||||||
|
}
|
||||||
|
free(newword);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!in_map) {
|
||||||
|
i++;
|
||||||
|
ns = map_related(word, i, wlst, ns, maptable, nummap);
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// suggestions for a typical fault of spelling, that
|
||||||
|
// differs with more, than 1 letter from the right form.
|
||||||
|
int SuggestMgr::replchars(char** wlst, const char * word, int ns)
|
||||||
|
{
|
||||||
|
char candidate[MAXSWL];
|
||||||
|
const char * r;
|
||||||
|
int lenr, lenp;
|
||||||
|
int cwrd;
|
||||||
|
|
||||||
|
int wl = strlen(word);
|
||||||
|
if (wl < 2 || ! pAMgr) return ns;
|
||||||
|
|
||||||
|
int numrep = pAMgr->get_numrep();
|
||||||
|
struct replentry* reptable = pAMgr->get_reptable();
|
||||||
|
if (reptable==NULL) return ns;
|
||||||
|
|
||||||
|
for (int i=0; i < numrep; i++ ) {
|
||||||
|
r = word;
|
||||||
|
lenr = strlen(reptable[i].replacement);
|
||||||
|
lenp = strlen(reptable[i].pattern);
|
||||||
|
// search every occurence of the pattern in the word
|
||||||
|
while ((r=strstr(r, reptable[i].pattern)) != NULL) {
|
||||||
|
strcpy(candidate, word);
|
||||||
|
if (r-word + lenr + strlen(r+lenp) >= MAXSWL) break;
|
||||||
|
strcpy(candidate+(r-word),reptable[i].replacement);
|
||||||
|
strcpy(candidate+(r-word)+lenr, r+lenp);
|
||||||
|
cwrd = 1;
|
||||||
|
for (int k=0; k < ns; k++)
|
||||||
|
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||||
|
if ((cwrd) && check(candidate,strlen(candidate))) {
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
} else return ns;
|
||||||
|
}
|
||||||
|
r++; // search for the next letter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// error is wrong char in place of correct one
|
||||||
|
int SuggestMgr::badchar(char ** wlst, const char * word, int ns)
|
||||||
|
{
|
||||||
|
char tmpc;
|
||||||
|
char candidate[MAXSWL];
|
||||||
|
|
||||||
|
int wl = strlen(word);
|
||||||
|
int cwrd;
|
||||||
|
strcpy (candidate, word);
|
||||||
|
|
||||||
|
// swap out each char one by one and try all the tryme
|
||||||
|
// chars in its place to see if that makes a good word
|
||||||
|
for (int i=0; i < wl; i++) {
|
||||||
|
tmpc = candidate[i];
|
||||||
|
for (int j=0; j < ctryl; j++) {
|
||||||
|
if (ctry[j] == tmpc) continue;
|
||||||
|
candidate[i] = ctry[j];
|
||||||
|
cwrd = 1;
|
||||||
|
for (int k=0; k < ns; k++)
|
||||||
|
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||||
|
if ((cwrd) && check(candidate,wl)) {
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
} else return ns;
|
||||||
|
}
|
||||||
|
candidate[i] = tmpc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// error is word has an extra letter it does not need
|
||||||
|
int SuggestMgr::extrachar(char** wlst, const char * word, int ns)
|
||||||
|
{
|
||||||
|
char candidate[MAXSWL];
|
||||||
|
const char * p;
|
||||||
|
char * r;
|
||||||
|
int cwrd;
|
||||||
|
|
||||||
|
int wl = strlen(word);
|
||||||
|
if (wl < 2) return ns;
|
||||||
|
|
||||||
|
// try omitting one char of word at a time
|
||||||
|
strcpy (candidate, word + 1);
|
||||||
|
for (p = word, r = candidate; *p != 0; ) {
|
||||||
|
cwrd = 1;
|
||||||
|
for (int k=0; k < ns; k++)
|
||||||
|
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||||
|
if ((cwrd) && check(candidate,wl-1)) {
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
} else return ns;
|
||||||
|
}
|
||||||
|
*r++ = *p++;
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// error is mising a letter it needs
|
||||||
|
int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns)
|
||||||
|
{
|
||||||
|
char candidate[MAXSWL];
|
||||||
|
const char * p;
|
||||||
|
char * q;
|
||||||
|
int cwrd;
|
||||||
|
|
||||||
|
int wl = strlen(word);
|
||||||
|
|
||||||
|
// try inserting a tryme character before every letter
|
||||||
|
strcpy(candidate + 1, word);
|
||||||
|
for (p = word, q = candidate; *p != 0; ) {
|
||||||
|
for (int i = 0; i < ctryl; i++) {
|
||||||
|
*q = ctry[i];
|
||||||
|
cwrd = 1;
|
||||||
|
for (int k=0; k < ns; k++)
|
||||||
|
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||||
|
if ((cwrd) && check(candidate,wl+1)) {
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
} else return ns;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*q++ = *p++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// now try adding one to end */
|
||||||
|
for (int i = 0; i < ctryl; i++) {
|
||||||
|
*q = ctry[i];
|
||||||
|
cwrd = 1;
|
||||||
|
for (int k=0; k < ns; k++)
|
||||||
|
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||||
|
if ((cwrd) && check(candidate,wl+1)) {
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
} else return ns;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* error is should have been two words */
|
||||||
|
int SuggestMgr::twowords(char ** wlst, const char * word, int ns)
|
||||||
|
{
|
||||||
|
char candidate[MAXSWL];
|
||||||
|
char * p;
|
||||||
|
|
||||||
|
int wl=strlen(word);
|
||||||
|
if (wl < 3) return ns;
|
||||||
|
strcpy(candidate + 1, word);
|
||||||
|
|
||||||
|
// split the string into two pieces after every char
|
||||||
|
// if both pieces are good words make them a suggestion
|
||||||
|
for (p = candidate + 1; p[1] != '\0'; p++) {
|
||||||
|
p[-1] = *p;
|
||||||
|
*p = '\0';
|
||||||
|
if (check(candidate,strlen(candidate))) {
|
||||||
|
if (check((p+1),strlen(p+1))) {
|
||||||
|
*p = ' ';
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
} else return ns;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// error is adjacent letter were swapped
|
||||||
|
int SuggestMgr::swapchar(char ** wlst, const char * word, int ns)
|
||||||
|
{
|
||||||
|
char candidate[MAXSWL];
|
||||||
|
char * p;
|
||||||
|
char tmpc;
|
||||||
|
int cwrd;
|
||||||
|
|
||||||
|
int wl = strlen(word);
|
||||||
|
|
||||||
|
// try swapping adjacent chars one by one
|
||||||
|
strcpy(candidate, word);
|
||||||
|
for (p = candidate; p[1] != 0; p++) {
|
||||||
|
tmpc = *p;
|
||||||
|
*p = p[1];
|
||||||
|
p[1] = tmpc;
|
||||||
|
cwrd = 1;
|
||||||
|
for (int k=0; k < ns; k++)
|
||||||
|
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
||||||
|
if ((cwrd) && check(candidate,wl)) {
|
||||||
|
if (ns < maxSug) {
|
||||||
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
if (wlst[ns] == NULL) return -1;
|
||||||
|
ns++;
|
||||||
|
} else return ns;
|
||||||
|
}
|
||||||
|
tmpc = *p;
|
||||||
|
*p = p[1];
|
||||||
|
p[1] = tmpc;
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// generate a set of suggestions for very poorly spelled words
|
||||||
|
int SuggestMgr::ngsuggest(char** wlst, char * word, HashMgr* pHMgr)
|
||||||
|
{
|
||||||
|
|
||||||
|
int i, j;
|
||||||
|
int lval;
|
||||||
|
int sc;
|
||||||
|
int lp;
|
||||||
|
|
||||||
|
if (! pHMgr) return 0;
|
||||||
|
|
||||||
|
// exhaustively search through all root words
|
||||||
|
// keeping track of the MAX_ROOTS most similar root words
|
||||||
|
struct hentry * roots[MAX_ROOTS];
|
||||||
|
int scores[MAX_ROOTS];
|
||||||
|
for (i = 0; i < MAX_ROOTS; i++) {
|
||||||
|
roots[i] = NULL;
|
||||||
|
scores[i] = -100 * i;
|
||||||
|
}
|
||||||
|
lp = MAX_ROOTS - 1;
|
||||||
|
|
||||||
|
int n = strlen(word);
|
||||||
|
|
||||||
|
struct hentry* hp = NULL;
|
||||||
|
int col = -1;
|
||||||
|
while ((hp = pHMgr->walk_hashtable(col, hp))) {
|
||||||
|
sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE);
|
||||||
|
if (sc > scores[lp]) {
|
||||||
|
scores[lp] = sc;
|
||||||
|
roots[lp] = hp;
|
||||||
|
int lval = sc;
|
||||||
|
for (j=0; j < MAX_ROOTS; j++)
|
||||||
|
if (scores[j] < lval) {
|
||||||
|
lp = j;
|
||||||
|
lval = scores[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find minimum threshhold for a passable suggestion
|
||||||
|
// mangle original word three differnt ways
|
||||||
|
// and score them to generate a minimum acceptable score
|
||||||
|
int thresh = 0;
|
||||||
|
char * mw = NULL;
|
||||||
|
for (int sp = 1; sp < 4; sp++) {
|
||||||
|
mw = strdup(word);
|
||||||
|
for (int k=sp; k < n; k+=4) *(mw + k) = '*';
|
||||||
|
thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH);
|
||||||
|
free(mw);
|
||||||
|
}
|
||||||
|
mw = NULL;
|
||||||
|
thresh = thresh / 3;
|
||||||
|
thresh--;
|
||||||
|
|
||||||
|
// now expand affixes on each of these root words and
|
||||||
|
// and use length adjusted ngram scores to select
|
||||||
|
// possible suggestions
|
||||||
|
char * guess[MAX_GUESS];
|
||||||
|
int gscore[MAX_GUESS];
|
||||||
|
for(i=0;i<MAX_GUESS;i++) {
|
||||||
|
guess[i] = NULL;
|
||||||
|
gscore[i] = -100 * i;
|
||||||
|
}
|
||||||
|
|
||||||
|
lp = MAX_GUESS - 1;
|
||||||
|
|
||||||
|
struct guessword * glst;
|
||||||
|
glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword));
|
||||||
|
if (! glst) return 0;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_ROOTS; i++) {
|
||||||
|
|
||||||
|
if (roots[i]) {
|
||||||
|
struct hentry * rp = roots[i];
|
||||||
|
int nw = pAMgr->expand_rootword(glst, MAX_WORDS, rp->word, rp->wlen,
|
||||||
|
rp->astr, rp->alen);
|
||||||
|
for (int k = 0; k < nw; k++) {
|
||||||
|
sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH);
|
||||||
|
if (sc > thresh) {
|
||||||
|
if (sc > gscore[lp]) {
|
||||||
|
if (guess[lp]) free (guess[lp]);
|
||||||
|
gscore[lp] = sc;
|
||||||
|
guess[lp] = glst[k].word;
|
||||||
|
lval = sc;
|
||||||
|
for (j=0; j < MAX_GUESS; j++)
|
||||||
|
if (gscore[j] < lval) {
|
||||||
|
lp = j;
|
||||||
|
lval = gscore[j];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
free (glst[k].word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (glst) free(glst);
|
||||||
|
|
||||||
|
// now we are done generating guesses
|
||||||
|
// sort in order of decreasing score and copy over
|
||||||
|
|
||||||
|
bubblesort(&guess[0], &gscore[0], MAX_GUESS);
|
||||||
|
int ns = 0;
|
||||||
|
for (i=0; i < MAX_GUESS; i++) {
|
||||||
|
if (guess[i]) {
|
||||||
|
int unique = 1;
|
||||||
|
for (j=i+1; j < MAX_GUESS; j++)
|
||||||
|
if (guess[j])
|
||||||
|
if (!strcmp(guess[i], guess[j])) unique = 0;
|
||||||
|
if (unique) {
|
||||||
|
wlst[ns++] = guess[i];
|
||||||
|
} else {
|
||||||
|
free(guess[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// see if a candidate suggestion is spelled correctly
|
||||||
|
// needs to check both root words and words with affixes
|
||||||
|
int SuggestMgr::check(const char * word, int len)
|
||||||
|
{
|
||||||
|
struct hentry * rv=NULL;
|
||||||
|
if (pAMgr) {
|
||||||
|
rv = pAMgr->lookup(word);
|
||||||
|
if (rv == NULL) rv = pAMgr->affix_check(word,len);
|
||||||
|
}
|
||||||
|
if (rv) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// generate an n-gram score comparing s1 and s2
|
||||||
|
int SuggestMgr::ngram(int n, char * s1, const char * s2, int uselen)
|
||||||
|
{
|
||||||
|
int nscore = 0;
|
||||||
|
int l1 = strlen(s1);
|
||||||
|
int l2 = strlen(s2);
|
||||||
|
int ns;
|
||||||
|
for (int j=1;j<=n;j++) {
|
||||||
|
ns = 0;
|
||||||
|
for (int i=0;i<=(l1-j);i++) {
|
||||||
|
char c = *(s1 + i + j);
|
||||||
|
*(s1 + i + j) = '\0';
|
||||||
|
if (strstr(s2,(s1+i))) ns++;
|
||||||
|
*(s1 + i + j ) = c;
|
||||||
|
}
|
||||||
|
nscore = nscore + ns;
|
||||||
|
if (ns < 2) break;
|
||||||
|
}
|
||||||
|
ns = 0;
|
||||||
|
if (uselen == NGRAM_LONGER_WORSE) ns = (l2-l1)-2;
|
||||||
|
if (uselen == NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2;
|
||||||
|
return (nscore - ((ns > 0) ? ns : 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// sort in decreasing order of score
|
||||||
|
void SuggestMgr::bubblesort(char** rword, int* rsc, int n )
|
||||||
|
{
|
||||||
|
int m = 1;
|
||||||
|
while (m < n) {
|
||||||
|
int j = m;
|
||||||
|
while (j > 0) {
|
||||||
|
if (rsc[j-1] < rsc[j]) {
|
||||||
|
int sctmp = rsc[j-1];
|
||||||
|
char * wdtmp = rword[j-1];
|
||||||
|
rsc[j-1] = rsc[j];
|
||||||
|
rword[j-1] = rword[j];
|
||||||
|
rsc[j] = sctmp;
|
||||||
|
rword[j] = wdtmp;
|
||||||
|
j--;
|
||||||
|
} else break;
|
||||||
|
}
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
48
goldlib/myspell/suggestmgr.hxx
Normal file
48
goldlib/myspell/suggestmgr.hxx
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#ifndef _SUGGESTMGR_HXX_
|
||||||
|
#define _SUGGESTMGR_HXX_
|
||||||
|
|
||||||
|
#define MAXSWL 100
|
||||||
|
#define MAX_ROOTS 10
|
||||||
|
#define MAX_WORDS 500
|
||||||
|
#define MAX_GUESS 10
|
||||||
|
|
||||||
|
#define NGRAM_IGNORE_LENGTH 0
|
||||||
|
#define NGRAM_LONGER_WORSE 1
|
||||||
|
#define NGRAM_ANY_MISMATCH 2
|
||||||
|
|
||||||
|
|
||||||
|
#include "atypes.hxx"
|
||||||
|
#include "affixmgr.hxx"
|
||||||
|
#include "hashmgr.hxx"
|
||||||
|
|
||||||
|
class SuggestMgr
|
||||||
|
{
|
||||||
|
char * ctry;
|
||||||
|
int ctryl;
|
||||||
|
AffixMgr* pAMgr;
|
||||||
|
int maxSug;
|
||||||
|
bool nosplitsugs;
|
||||||
|
|
||||||
|
public:
|
||||||
|
SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
|
||||||
|
~SuggestMgr();
|
||||||
|
|
||||||
|
int suggest(char** wlst, int ns, const char * word);
|
||||||
|
int check(const char *, int);
|
||||||
|
int ngsuggest(char ** wlst, char * word, HashMgr* pHMgr);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int replchars(char**, const char *, int);
|
||||||
|
int mapchars(char**, const char *, int);
|
||||||
|
int map_related(const char *, int, char ** wlst, int, const mapentry*, int);
|
||||||
|
int forgotchar(char **, const char *, int);
|
||||||
|
int swapchar(char **, const char *, int);
|
||||||
|
int extrachar(char **, const char *, int);
|
||||||
|
int badchar(char **, const char *, int);
|
||||||
|
int twowords(char **, const char *, int);
|
||||||
|
int ngram(int n, char * s1, const char * s2, int uselen);
|
||||||
|
void bubblesort( char ** rwd, int * rsc, int n);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Reference in New Issue
Block a user