GCC build with Myspell support (mingw-cygwin)

This commit is contained in:
Stas Degteff 2006-03-12 07:57:03 +00:00
parent 99aedb1707
commit df1529fd6a
19 changed files with 280 additions and 163 deletions

View File

@ -76,6 +76,6 @@
# Uncomment to disable Synchronet BBS support
#CPPFLAGS+=-DGCFG_NOSYNCHRONET
# Uncomment to disable MS Office spellchecker support (win32 only)
CPPFLAGS+=-DGCFG_NO_MSSPELL
#CPPFLAGS+=-DGCFG_NO_MSSPELL
# Uncomment to disable MySpell spellchecker support (multiplatform)
CPPFLAGS+=-DGCFG_NO_MYSPELL
#CPPFLAGS+=-DGCFG_NO_MYSPELL

View File

@ -16,7 +16,7 @@ FOBJPATH=$(TOP)/$(OBJPATH)/$(PLATFORM)/$(TARGET)
FDEPPATH=$(TOP)/$(OBJPATH)/$(PLATFORM)/$(TARGET)
FLIBPATH=$(TOP)/$(LIBPATH)/$(PLATFORM)
.SUFFIXES: .c .cpp .all .rc
.SUFFIXES: .c .cpp .all .rc .cxx
sourcelist: bld$(PLATFORM).inc
@ -33,6 +33,14 @@ $(FOBJPATH)/%$(OBJEXT): %.cpp
$(CAT) $(patsubst %.cpp,%.d,$<)>>$(FDEPPATH)/$(patsubst %.cpp,%.d,$<) &&\
rm $(patsubst %.cpp,%.d,$<) || true"
$(FOBJPATH)/%$(OBJEXT): %.cxx
@echo building $(basename $<)$(OBJEXT)
$(CXX) -c -MD $(CPPFLAGS) -o $@ $<
@$(SHELL) -c "[ -s $(patsubst %.cxx,%.d,$<) ] && \
echo -n $(FOBJPATH)/>$(FDEPPATH)/$(patsubst %.cxx,%.d,$<) && \
$(CAT) $(patsubst %.cxx,%.d,$<)>>$(FDEPPATH)/$(patsubst %.cxx,%.d,$<) &&\
rm $(patsubst %.cxx,%.d,$<) || true"
$(FOBJPATH)/%$(OBJEXT): %.c
@echo building $(basename $<)$(OBJEXT)
$(CC) -c -MD $(CFLAGS) -o $@ $<
@ -59,12 +67,12 @@ include bld$(PLATFORM).inc
endif
ifeq ($(PLATFORM),cyg)
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%$(OBJEXT),$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(filter %.c %.cpp %.rc,$(SOURCES))))))
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%$(OBJEXT),$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(patsubst %.cxx,%$(OBJEXT),$(filter %.c %.cpp %.cxx %.rc,$(SOURCES)))))))
else
ifeq ($(PLATFORM),emx)
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%.res,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(filter %.c %.cpp %.rc,$(SOURCES))))))
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%.res,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(patsubst %.cxx,%$(OBJEXT),$(filter %.c %.cpp %.cxx %.rc,$(SOURCES)))))))
else
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(filter %.c %.cpp %.rc,$(SOURCES)))))
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(patsubst %.cxx,%$(OBJEXT),$(filter %.c %.cpp %.cxx %.rc,$(SOURCES))))))
endif
endif
DEPS = $(wildcard $(FDEPPATH)/*.d)

View File

@ -1,10 +1,15 @@
# -*- makefile -*-
include GNUmakef.def
include Config.def
.PHONY: all clean distclean dirs sourcelists deps docs
LIBS=gall gcfg gmb3 glibc uulib smblib msgidlib myspell
LIBS=gall gcfg gmb3 glibc uulib smblib msgidlib
ifneq ($(findstring GCFG_NO_MYSPELL, $(CPPFLAGS)), GCFG_NO_MYSPELL)
LIBS+=myspell
endif
EXECUTABLES=golded3 goldnode rddt
all: sourcelists

View File

@ -300,6 +300,7 @@ REPLYLINKLIST
REPLYLINKSHOWALWAYS
ROBOTNAME
SCHECKERDEFLANG
SCHECKERDICPATH
SCHECKERENABLED
SCHECKERUSERDIC
SCREENBLANKER
@ -354,6 +355,7 @@ TIMEOUTSAVEMSG
TIMESLICE
TIMESREAD
TITLESTATUS
TRANSLATE
TWITMODE
TWITNAME
TWITSUBJ

View File

@ -3,6 +3,7 @@
* These macroses may be used in:
* template file, externutil command line, tearline and tagline.
==========================================================================
@align
@areaname
@areapath
@areatype
@ -57,7 +58,9 @@
@otime
@oto
@otzoffset
@pad
@pid
@pipe
@pseudo
@rev
@subject
@ -69,6 +72,7 @@
@tlname
@tname
@tpseudo
@tr
@ver
@version
@widepid

View File

@ -9,9 +9,14 @@ endif
endif
TOP=..
include $(TOP)/Config.def
SHORTTARGET=ged
TARGET=golded3
GLIBS=gmb3 gall gcfg uulib smblib msgidlib
ifneq ($(findstring GCFG_NO_MYSPELL, $(CPPFLAGS)), GCFG_NO_MYSPELL)
GLIBS+=myspell
endif
INCS=-I. -I$(TOP)/goldlib/gall -I$(TOP)/goldlib/gcfg -I$(TOP)/goldlib/gmb3 -I$(TOP)/goldlib/uulib -I$(TOP)/goldlib/smblib -I$(TOP)/goldlib/msgidlib
ifeq ($(findstring EMX, $(PATH)), EMX)

View File

@ -19,6 +19,11 @@ INCS+=-I$(TOP)/goldlib/glibc
endif
endif
include $(TOP)/Config.def
ifneq ($(findstring GCFG_NO_MYSPELL, $(CPPFLAGS)), GCFG_NO_MYSPELL)
INCS+=-I$(TOP)/goldlib/myspell
endif
include $(TOP)/GNUmakef.inc
ifeq ($(PLATFORM),emx)

View File

@ -35,7 +35,9 @@
#include <gdirposx.h>
#include <gstrall.h>
#if !defined(GCFG_NO_MYSPELL)
#include <myspell.hxx>
#endif
#include <gespell.h>
typedef char XlatName[17];
@ -455,7 +457,7 @@ bool CMSSpellLang::AddWord(const char *text)
bool CMYSpellLang::Init(const gdirentry *entry)
{
gposixdir dir(entry->dirname);
std::string affname = entry->name.substr(0, entry->name.length()-4);
strcpy(mLangCode, affname.c_str());

9
goldlib/myspell/Makefile Normal file
View File

@ -0,0 +1,9 @@
# -*- makefile -*-
TOP=../..
TARGET=myspell
INCS=-I$(TOP)/goldlib/myspell -I$(TOP)/goldlib/gall
CFLAGS=
include $(TOP)/GNUmakef.inc
include $(TOP)/GNUmakef.lib

View File

@ -1,4 +1,4 @@
#include "license.readme"
#include "license.rea"
#include <cctype>
@ -21,7 +21,7 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
pmyMgr = pmgr;
// set up its intial values
achar = dp->achar; // char flag
achar = dp->achar; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
@ -43,7 +43,7 @@ PfxEntry::~PfxEntry()
if (strip)free(strip);
pmyMgr = NULL;
appnd = NULL;
strip = NULL;
strip = NULL;
}
@ -67,19 +67,19 @@ char * PfxEntry::add(const char * word, int len)
if (appndl) {
strcpy(tword,appnd);
tlen += appndl;
}
}
char * pp = tword + tlen;
strcpy(pp, (word + stripl));
return mystrdup(tword);
}
}
return NULL;
return NULL;
}
// check if this prefix entry matches
// check if this prefix entry matches
struct hentry * PfxEntry::check(const char * word, int len)
{
int cond; // condition number being examined
@ -123,8 +123,8 @@ struct hentry * PfxEntry::check(const char * word, int len)
if (TESTAFF(he->astr, achar, he->alen)) return he;
}
// prefix matched but no root word was found
// if XPRODUCT is allowed, try again but now
// prefix matched but no root word was found
// if XPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if (xpflg & XPRODUCT) {
@ -144,7 +144,7 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
pmyMgr = pmgr;
// set up its intial values
achar = dp->achar; // char flag
achar = dp->achar; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
@ -167,7 +167,7 @@ SfxEntry::~SfxEntry()
if (strip) free(strip);
pmyMgr = NULL;
appnd = NULL;
strip = NULL;
strip = NULL;
}
@ -205,10 +205,10 @@ char * SfxEntry::add(const char * word, int len)
// see if this suffix is present in the word
// see if this suffix is present in the word
struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEntry* ppfx)
{
int tmpl; // length of tmpword
int tmpl; // length of tmpword
int cond; // condition beng examined
struct hentry * he; // hash entry pointer
unsigned char * cp;
@ -257,10 +257,10 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEnt
if (cond < 0) {
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
if (TESTAFF(he->astr, achar , he->alen) &&
((optflags & XPRODUCT) == 0 ||
if (TESTAFF(he->astr, achar , he->alen) &&
((optflags & XPRODUCT) == 0 ||
TESTAFF(he->astr, ep->getFlag(), he->alen))) return he;
}
}
}
}
return NULL;
@ -274,12 +274,12 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEnt
Appendix: Understanding Affix Code
An affix is either a prefix or a suffix attached to root words to make
An affix is either a prefix or a suffix attached to root words to make
other words.
Basically a Prefix or a Suffix is set of AffEntry objects
which store information about the prefix or suffix along
with supporting routines to check if a word has a particular
which store information about the prefix or suffix along
with supporting routines to check if a word has a particular
prefix or suffix or a combination.
The structure affentry is defined as follows:
@ -292,15 +292,15 @@ struct affentry
short stripl; // length of the strip string
short appndl; // length of the affix string
short numconds; // the number of conditions that must be met
short xpflg; // flag: XPRODUCT- combine both prefix and suffix
short xpflg; // flag: XPRODUCT- combine both prefix and suffix
char conds[SETSIZE]; // array which encodes the conditions to be met
};
Here is a suffix borrowed from the en_US.aff file. This file
Here is a suffix borrowed from the en_US.aff file. This file
is whitespace delimited.
SFX D Y 4
SFX D Y 4
SFX D 0 e d
SFX D y ied [^aeiou]y
SFX D 0 ed [^ey]
@ -318,7 +318,7 @@ Field
4 4 - indicates that sequence of 4 affentry structures are needed to
properly store the affix information
The remaining lines describe the unique information for the 4 SfxEntry
The remaining lines describe the unique information for the 4 SfxEntry
objects that make up this affix. Each line can be interpreted
as follows: (note fields 1 and 2 are as a check against line 1 info)
@ -333,57 +333,57 @@ Field
can be applied
Field 5 is interesting. Since this is a suffix, field 5 tells us that
there are 2 conditions that must be met. The first condition is that
the next to the last character in the word must *NOT* be any of the
there are 2 conditions that must be met. The first condition is that
the next to the last character in the word must *NOT* be any of the
following "a", "e", "i", "o" or "u". The second condition is that
the last character of the word must end in "y".
So how can we encode this information concisely and be able to
So how can we encode this information concisely and be able to
test for both conditions in a fast manner? The answer is found
but studying the wonderful ispell code of Geoff Kuenning, et.al.
but studying the wonderful ispell code of Geoff Kuenning, et.al.
(now available under a normal BSD license).
If we set up a conds array of 256 bytes indexed (0 to 255) and access it
using a character (cast to an unsigned char) of a string, we have 8 bits
of information we can store about that character. Specifically we
could use each bit to say if that character is allowed in any of the
could use each bit to say if that character is allowed in any of the
last (or first for prefixes) 8 characters of the word.
Basically, each character at one end of the word (up to the number
of conditions) is used to index into the conds array and the resulting
value found there says whether the that character is valid for a
specific character position in the word.
Basically, each character at one end of the word (up to the number
of conditions) is used to index into the conds array and the resulting
value found there says whether the that character is valid for a
specific character position in the word.
For prefixes, it does this by setting bit 0 if that char is valid
in the first position, bit 1 if valid in the second position, and so on.
For prefixes, it does this by setting bit 0 if that char is valid
in the first position, bit 1 if valid in the second position, and so on.
If a bit is not set, then that char is not valid for that postion in the
word.
If working with suffixes bit 0 is used for the character closest
to the front, bit 1 for the next character towards the end, ...,
with bit numconds-1 representing the last char at the end of the string.
If working with suffixes bit 0 is used for the character closest
to the front, bit 1 for the next character towards the end, ...,
with bit numconds-1 representing the last char at the end of the string.
Note: since entries in the conds[] are 8 bits, only 8 conditions
Note: since entries in the conds[] are 8 bits, only 8 conditions
(read that only 8 character positions) can be examined at one
end of a word (the beginning for prefixes and the end for suffixes.
So to make this clearer, lets encode the conds array values for the
So to make this clearer, lets encode the conds array values for the
first two affentries for the suffix D described earlier.
For the first affentry:
For the first affentry:
numconds = 1 (only examine the last character)
conds['e'] = (1 << 0) (the word must end in an E)
all others are all 0
For the second affentry:
numconds = 2 (only examine the last two characters)
numconds = 2 (only examine the last two characters)
conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
where X is all characters *but* a, e, i, o, or u
conds['y'] = (1 << 1) (the last char must be a y)
all other bits for all other entries in the conds array are zero

View File

@ -2,7 +2,7 @@
#define _AFFIX_HXX_
#include "atypes.hxx"
#include "baseaffix.hxx"
#include "baseaffi.hxx"
#include "affixmgr.hxx"
@ -26,7 +26,7 @@ public:
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
inline unsigned char getFlag() { return achar; }
inline const char * getKey() { return appnd; }
inline const char * getKey() { return appnd; }
char * add(const char * word, int len);
inline PfxEntry * getNext() { return next; }
@ -60,12 +60,12 @@ public:
SfxEntry(AffixMgr* pmgr, affentry* dp );
~SfxEntry();
struct hentry * check(const char * word, int len, int optflags,
struct hentry * check(const char * word, int len, int optflags,
AffEntry* ppfx);
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
inline unsigned char getFlag() { return achar; }
inline const char * getKey() { return rappnd; }
inline const char * getKey() { return rappnd; }
char * add(const char * word, int len);
inline SfxEntry * getNext() { return next; }

View File

@ -1,4 +1,4 @@
#include "license.readme"
#include "license.rea"
#include <cstdlib>
#include <cstring>
@ -17,10 +17,10 @@ extern void mychomp(char * s);
extern char * mystrdup(const char * s);
extern char * myrevstrdup(const char * s);
extern char * mystrsep(char ** sptr, const char delim);
extern int isSubset(const char * s1, const char * s2);
extern int isSubset(const char * s1, const char * s2);
AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
{
// register hash manager and load affix data from aff file
pHMgr = ptr;
@ -47,9 +47,9 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
}
AffixMgr::~AffixMgr()
AffixMgr::~AffixMgr()
{
// pass through linked prefix entries and clean up
for (int i=0; i < SETSIZE ;i++) {
pFlag[i] = NULL;
@ -60,7 +60,7 @@ AffixMgr::~AffixMgr()
delete(ptr);
ptr = nptr;
nptr = NULL;
}
}
}
// pass through linked suffix entries and clean up
@ -73,31 +73,31 @@ AffixMgr::~AffixMgr()
delete(ptr);
ptr = nptr;
nptr = NULL;
}
}
}
if (trystring) free(trystring);
trystring=NULL;
if (encoding) free(encoding);
encoding=NULL;
if (maptable) {
if (maptable) {
for (int j=0; j < nummap; j++) {
free(maptable[j].set);
maptable[j].set = NULL;
maptable[j].len = 0;
}
free(maptable);
free(maptable);
maptable = NULL;
}
nummap = 0;
if (reptable) {
if (reptable) {
for (int j=0; j < numrep; j++) {
free(reptable[j].pattern);
free(reptable[j].replacement);
reptable[j].pattern = NULL;
reptable[j].replacement = NULL;
}
free(reptable);
free(reptable);
reptable = NULL;
}
numrep = 0;
@ -108,13 +108,13 @@ AffixMgr::~AffixMgr()
}
// read in aff file and build up prefix and suffix entry objects
// read in aff file and build up prefix and suffix entry objects
int AffixMgr::parse_file(const char * affpath)
{
// io buffers
char line[MAXLNLEN+1];
// affix type
char ft;
@ -195,7 +195,7 @@ int AffixMgr::parse_file(const char * affpath)
}
fclose(afflst);
// now we can speed up performance greatly taking advantage of the
// now we can speed up performance greatly taking advantage of the
// relationship between the affixes and the idea of "subsets".
// View each prefix as a potential leading subset of another and view
@ -208,14 +208,14 @@ int AffixMgr::parse_file(const char * affpath)
// The same argument goes for suffix string that are reversed.
// Then to top this off why not examine the first char of the word to quickly
// limit the set of prefixes to examine (i.e. the prefixes to examine must
// limit the set of prefixes to examine (i.e. the prefixes to examine must
// be leading supersets of the first character of the word (if they exist)
// To take advantage of this "subset" relationship, we need to add two links
// from entry. One to take next if the current prefix is found (call it nexteq)
// and one to take next if the current prefix is not found (call it nextne).
// Since we have built ordered lists, all that remains is to properly intialize
// Since we have built ordered lists, all that remains is to properly intialize
// the nextne and nexteq pointers that relate them
process_pfx_order();
@ -225,7 +225,7 @@ int AffixMgr::parse_file(const char * affpath)
}
// we want to be able to quickly access prefix information
// both by prefix flag, and sorted by prefix string itself
// both by prefix flag, and sorted by prefix string itself
// so we need to set up two indexes
int AffixMgr::build_pfxlist(AffEntry* pfxptr)
@ -258,7 +258,7 @@ int AffixMgr::build_pfxlist(AffEntry* pfxptr)
// now handle the general case
unsigned char sp = *((const unsigned char *)key);
ptr = (PfxEntry*)pStart[sp];
/* handle the insert at top of list case */
if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) {
ep->setNext(ptr);
@ -312,7 +312,7 @@ int AffixMgr::build_sfxlist(AffEntry* sfxptr)
// now handle the normal case
unsigned char sp = *((const unsigned char *)key);
ptr = (SfxEntry*)sStart[sp];
/* handle the insert at top of list case */
if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) {
ep->setNext(ptr);
@ -344,7 +344,7 @@ int AffixMgr::process_pfx_order()
ptr = (PfxEntry*)pStart[i];
// look through the remainder of the list
// and find next entry with affix that
// and find next entry with affix that
// the current one is not a subset of
// mark that as destination for NextNE
// use next in list that you are a subset
@ -358,7 +358,7 @@ int AffixMgr::process_pfx_order()
}
ptr->setNextNE(nptr);
ptr->setNextEQ(NULL);
if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey()))
if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey()))
ptr->setNextEQ(ptr->getNext());
}
@ -394,7 +394,7 @@ int AffixMgr::process_sfx_order()
ptr = (SfxEntry *) sStart[i];
// look through the remainder of the list
// and find next entry with affix that
// and find next entry with affix that
// the current one is not a subset of
// mark that as destination for NextNE
// use next in list that you are a subset
@ -407,7 +407,7 @@ int AffixMgr::process_sfx_order()
}
ptr->setNextNE(nptr);
ptr->setNextEQ(NULL);
if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
ptr->setNextEQ(ptr->getNext());
}
@ -490,12 +490,12 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
c = 0;
}
// end of condition
// end of condition
if (c != 0) {
ec = 1;
}
if (ec) {
if (grp == 1) {
if (neg == 0) {
@ -513,7 +513,7 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
}
}
neg = 0;
grp = 0;
grp = 0;
nm = 0;
} else {
// not a group so just set the proper bit for this char
@ -521,7 +521,7 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
if (c == '.') {
// wild card character so set them all
for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
} else {
} else {
ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
}
}
@ -541,7 +541,7 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
struct hentry * AffixMgr::prefix_check (const char * word, int len)
{
struct hentry * rv= NULL;
// first handle the special case of 0 length prefixes
PfxEntry * pe = (PfxEntry *) pStart[0];
while (pe) {
@ -549,7 +549,7 @@ struct hentry * AffixMgr::prefix_check (const char * word, int len)
if (rv) return rv;
pe = pe->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)word);
PfxEntry * pptr = (PfxEntry *)pStart[sp];
@ -563,7 +563,7 @@ struct hentry * AffixMgr::prefix_check (const char * word, int len)
pptr = pptr->getNextNE();
}
}
return NULL;
}
@ -574,12 +574,12 @@ struct hentry * AffixMgr::compound_check (const char * word, int len, char compo
struct hentry * rv= NULL;
char * st;
char ch;
// handle case of string too short to be a piece of a compound word
// handle case of string too short to be a piece of a compound word
if (len < cpdmin) return NULL;
st = mystrdup(word);
for (i=cpdmin; i < (len - (cpdmin-1)); i++) {
ch = st[i];
@ -599,23 +599,23 @@ struct hentry * AffixMgr::compound_check (const char * word, int len, char compo
free(st);
return rv;
}
rv = compound_check((word+i),strlen(word+i),compound_flag);
rv = compound_check((word+i),strlen(word+i),compound_flag);
if (rv) {
free(st);
return rv;
}
}
st[i] = ch;
}
free(st);
return NULL;
}
}
// check word for suffixes
struct hentry * AffixMgr::suffix_check (const char * word, int len,
struct hentry * AffixMgr::suffix_check (const char * word, int len,
int sfxopts, AffEntry * ppfx)
{
struct hentry * rv = NULL;
@ -627,7 +627,7 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
if (rv) return rv;
se = se->getNext();
}
// now handle the general case
char * tmpword = myrevstrdup(word);
unsigned char sp = *((const unsigned char *)tmpword);
@ -645,7 +645,7 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
sptr = sptr->getNextNE();
}
}
free(tmpword);
return NULL;
}
@ -657,7 +657,7 @@ struct hentry * AffixMgr::affix_check (const char * word, int len)
{
struct hentry * rv= NULL;
// check all prefixes (also crossed with suffixes if allowed)
// check all prefixes (also crossed with suffixes if allowed)
rv = prefix_check(word, len);
if (rv) return rv;
@ -667,7 +667,7 @@ struct hentry * AffixMgr::affix_check (const char * word, int len)
}
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
const char * ts, int wl, const char * ap, int al)
{
@ -741,7 +741,7 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
nh++;
} else {
free(newword);
}
}
}
ptr = (PfxEntry *)ptr ->getFlgNxt();
}
@ -840,7 +840,7 @@ int AffixMgr::parse_try(char * line)
if (np != 2) {
fprintf(stderr,"error: missing TRY information\n");
return 1;
}
}
return 0;
}
@ -870,7 +870,7 @@ int AffixMgr::parse_set(char * line)
if (np != 2) {
fprintf(stderr,"error: missing SET information\n");
return 1;
}
}
return 0;
}
@ -926,7 +926,7 @@ int AffixMgr::parse_cpdmin(char * line)
if (np != 2) {
fprintf(stderr,"error: missing compound min information\n");
return 1;
}
}
if ((cpdmin < 1) || (cpdmin > 50)) cpdmin = 3;
return 0;
}
@ -947,7 +947,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: {
case 1: {
numrep = atoi(piece);
if (numrep < 1) {
fprintf(stderr,"incorrect number of entries in replacement table\n");
@ -967,8 +967,8 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
if (np != 2) {
fprintf(stderr,"error: missing replacement table information\n");
return 1;
}
}
/* now parse the numrep lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < numrep; j++) {
@ -1022,7 +1022,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: {
case 1: {
nummap = atoi(piece);
if (nummap < 1) {
fprintf(stderr,"incorrect number of entries in map table\n");
@ -1042,8 +1042,8 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
if (np != 2) {
fprintf(stderr,"error: missing map table information\n");
return 1;
}
}
/* now parse the nummap lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < nummap; j++) {
@ -1064,7 +1064,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
}
break;
}
case 1: { maptable[j].set = mystrdup(piece);
case 1: { maptable[j].set = mystrdup(piece);
maptable[j].len = strlen(maptable[j].set);
break; }
default: break;
@ -1105,17 +1105,17 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
switch(i) {
// piece 1 - is type of affix
case 0: { np++; break; }
// piece 2 - is affix char
case 1: { np++; achar = *piece; break; }
// piece 3 - is cross product indicator
// piece 3 - is cross product indicator
case 2: { np++; if (*piece == 'Y') ff = XPRODUCT; break; }
// piece 4 - is number of affentries
case 3: {
case 3: {
np++;
numents = atoi(piece);
numents = atoi(piece);
ptr = (struct affentry *) malloc(numents * sizeof(struct affentry));
ptr->xpflg = ff;
ptr->achar = achar;
@ -1134,7 +1134,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
free(ptr);
return 1;
}
// store away ptr to first affentry
nptr = ptr;
@ -1152,14 +1152,14 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
switch(i) {
// piece 1 - is type
case 0: {
case 0: {
np++;
if (nptr != ptr) nptr->xpflg = ptr->xpflg;
break;
}
// piece 2 - is affix char
case 1: {
case 1: {
np++;
if (*piece != achar) {
fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
@ -1171,8 +1171,8 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
break;
}
// piece 3 - is string to strip or 0 for null
case 2: {
// piece 3 - is string to strip or 0 for null
case 2: {
np++;
nptr->strip = mystrdup(piece);
nptr->stripl = strlen(nptr->strip);
@ -1180,12 +1180,12 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
free(nptr->strip);
nptr->strip=mystrdup("");
nptr->stripl = 0;
}
break;
}
break;
}
// piece 4 - is affix string or 0 for null
case 3: {
case 3: {
np++;
nptr->appnd = mystrdup(piece);
nptr->appndl = strlen(nptr->appnd);
@ -1193,8 +1193,8 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
free(nptr->appnd);
nptr->appnd=mystrdup("");
nptr->appndl = 0;
}
break;
}
break;
}
// piece 5 - is the conditions descriptions
@ -1214,7 +1214,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
}
nptr++;
}
// now create SfxEntry or PfxEntry objects and use links to
// build an ordered (sorted by affix string) list
nptr = ptr;
@ -1224,10 +1224,10 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
build_pfxlist((AffEntry *)pfxptr);
} else {
SfxEntry * sfxptr = new SfxEntry(this,nptr);
build_sfxlist((AffEntry *)sfxptr);
build_sfxlist((AffEntry *)sfxptr);
}
nptr++;
}
}
free(ptr);
return 0;
}

View File

@ -2,7 +2,7 @@
#define _AFFIXMGR_HXX_
#include "atypes.hxx"
#include "baseaffix.hxx"
#include "baseaffi.hxx"
#include "hashmgr.hxx"
#include <cstdio>
@ -26,13 +26,13 @@ class AffixMgr
public:
AffixMgr(const char * affpath, HashMgr * ptr);
~AffixMgr();
struct hentry * affix_check(const char * word, int len);
struct hentry * prefix_check(const char * word, int len);
struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx);
int expand_rootword(struct guessword * wlst, int maxn,
int expand_rootword(struct guessword * wlst, int maxn,
const char * ts, int wl, const char * ap, int al);
struct hentry * compound_check(const char * word, int len, char compound_flag);
struct hentry * lookup(const char * word);
@ -44,7 +44,7 @@ public:
char * get_try_string();
char * get_compound();
bool get_nosplitsugs();
private:
int parse_file(const char * affpath);
int parse_try(char * line);

View File

@ -0,0 +1,17 @@
#ifndef _BASEAFF_HXX_
#define _BASEAFF_HXX_
class AffEntry
{
protected:
char * appnd;
char * strip;
short appndl;
short stripl;
short numconds;
short xpflg;
char achar;
char conds[SETSIZE];
};
#endif

View File

@ -1,4 +1,4 @@
#include "license.readme"
#include "license.rea"
#if !defined(_MSC_VER)
#include <unistd.h>
@ -104,14 +104,14 @@ int HashMgr::add_word(const char * word, int wl, const char * aff, int al)
hp->alen = al;
hp->word = mystrdup(word);
hp->astr = mystrdup(aff);
hp->next = NULL;
while (dp->next != NULL) dp=dp->next;
hp->next = NULL;
while (dp->next != NULL) dp=dp->next;
dp->next = hp;
if ((wl) && (hp->word == NULL)) return 1;
if ((al) && (hp->astr == NULL)) return 1;
}
return 0;
}
}
@ -159,7 +159,7 @@ int HashMgr::load_tables(const char * tpath)
if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
mychomp(ts);
tablesize = atoi(ts);
if (!tablesize) return 4;
if (!tablesize) return 4;
tablesize = tablesize + 5;
if ((tablesize %2) == 0) tablesize++;
@ -186,7 +186,7 @@ int HashMgr::load_tables(const char * tpath)
wl = strlen(ts);
// add the word and its index
if (add_word(ts,wl,ap,al))
if (add_word(ts,wl,ap,al))
return 5;;
}

View File

@ -0,0 +1,61 @@
/*
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
* And Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All modifications to the source code must be clearly marked as
* such. Binary redistributions based on modified source code
* must be clearly marked as modified versions in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* NOTE: A special thanks and credit goes to Geoff Kuenning
* the creator of ispell. MySpell's affix algorithms were
* based on those of ispell which should be noted is
* copyright Geoff Kuenning et.al. and now available
* under a BSD style license. For more information on ispell
* and affix compression in general, please see:
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
* (the home page for ispell)
*
* An almost complete rewrite of MySpell for use by
* the Mozilla project has been developed by David Einstein
* (Deinst@world.std.com). David and I are now
* working on parallel development tracks to help
* our respective projects (Mozilla and OpenOffice.org
* and we will maintain full affix file and dictionary
* file compatibility and work on merging our versions
* of MySpell back into a single tree. David has been
* a significant help in improving MySpell.
*
* Special thanks also go to La'szlo' Ne'meth
* <nemethl@gyorsposta.hu> who is the author of the
* Hungarian dictionary and who developed and contributed
* the code to support compound words in MySpell
* and fixed numerous problems with the encoding
* case conversion tables.
*
*/

View File

@ -2,7 +2,6 @@ affentry cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
affixmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
csutil cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
dictmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
example cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
hashmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
hashmgr hxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
myspell cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun

View File

@ -1,4 +1,4 @@
#include "license.readme"
#include "license.rea"
#include <cstring>
#include <cstdlib>
@ -53,12 +53,12 @@ MySpell::~MySpell()
// make a copy of src at destination while removing all leading
// blanks and removing any trailing periods after recording
// their presence with the abbreviation flag
// also since already going through character by character,
// also since already going through character by character,
// set the capitalization type
// return the length of the "cleaned" word
int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabbrev)
{
{
// with the new breakiterator code this should not be needed anymore
const char * special_chars = "._#$%&()* +,-/:;<=>[]\\^`{|}~\t \x0a\x0d\x01\'\"";
@ -68,8 +68,8 @@ int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabb
// first skip over any leading special characters
while ((*q != '\0') && (strchr(special_chars,(int)(*q)))) q++;
// now strip off any trailing special characters
// now strip off any trailing special characters
// if a period comes after a normal char record its presence
*pabbrev = 0;
int nl = strlen((const char *)q);
@ -77,9 +77,9 @@ int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabb
nl--;
}
if ( *(q+nl) == '.' ) *pabbrev = 1;
// if no characters are left it can't be an abbreviation and can't be capitalized
if (nl <= 0) {
if (nl <= 0) {
*pcaptype = NOCAP;
*pabbrev = 0;
*p = '\0';
@ -111,8 +111,8 @@ int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabb
*pcaptype = HUHCAP;
}
return nc;
}
}
int MySpell::spell(const char * word)
{
@ -129,8 +129,8 @@ int MySpell::spell(const char * word)
switch(captype) {
case HUHCAP:
case NOCAP: {
rv = check(cw);
case NOCAP: {
rv = check(cw);
if ((abbv) && !(rv)) {
memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
@ -155,9 +155,9 @@ int MySpell::spell(const char * word)
*(wspace+wl+1) = '\0';
rv = check(wspace);
}
break;
break;
}
case INITCAP: {
case INITCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
rv = check(wspace);
@ -168,7 +168,7 @@ int MySpell::spell(const char * word)
*(wspace+wl+1) = '\0';
rv = check(wspace);
}
break;
break;
}
}
if (rv) return 1;
@ -216,12 +216,12 @@ int MySpell::suggest(char*** slst, const char * word)
if (wlst == NULL) return 0;
switch(captype) {
case NOCAP: {
ns = pSMgr->suggest(wlst, ns, cw);
case NOCAP: {
ns = pSMgr->suggest(wlst, ns, cw);
break;
}
case INITCAP: {
case INITCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
@ -230,29 +230,29 @@ int MySpell::suggest(char*** slst, const char * word)
for (int j=0; j < ns; j++)
mkinitcap(wlst[j], csconv);
}
ns = pSMgr->suggest(wlst,ns,cw);
ns = pSMgr->suggest(wlst,ns,cw);
break;
}
case HUHCAP: {
case HUHCAP: {
ns = pSMgr->suggest(wlst, ns, cw);
if (ns != -1) {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
ns = pSMgr->suggest(wlst, ns, wspace);
}
}
break;
}
case ALLCAP: {
case ALLCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv);
ns = pSMgr->suggest(wlst, ns, wspace);
if (ns > 0) {
for (int j=0; j < ns; j++)
mkallcap(wlst[j], csconv);
}
if (ns != -1)
}
if (ns != -1)
ns = pSMgr->suggest(wlst, ns , cw);
break;
}
@ -262,22 +262,22 @@ int MySpell::suggest(char*** slst, const char * word)
return ns;
}
// try ngram approach since found nothing
if (ns == 0) {
if (ns == 0) {
ns = pSMgr->ngsuggest(wlst, cw, pHMgr);
if (ns) {
switch(captype) {
case NOCAP: break;
case HUHCAP: break;
case INITCAP: {
case HUHCAP: break;
case INITCAP: {
for (int j=0; j < ns; j++)
mkinitcap(wlst[j], csconv);
}
break;
case ALLCAP: {
case ALLCAP: {
for (int j=0; j < ns; j++)
mkallcap(wlst[j], csconv);
}
}
break;
}
*slst = wlst;

View File

@ -1,4 +1,4 @@
#include "license.readme"
#include "license.rea"
#include <cstdlib>
#include <cctype>