Added HunSpell library to project

2006-04-05 17:20:12 +00:00 · 2006-04-05 17:20:12 +00:00 · b31a923d9d
commit b31a923d9d
parent dd7b2256f8
22 changed files with 23609 additions and 0 deletions
--- a/goldlib/hunspell/README
+++ b/goldlib/hunspell/README
@ -0,0 +1,21 @@
+Hunspell spell checker and morphological analyser library
+
+Documentation, tests, examples: http://hunspell.sourceforge.net
+
+Author of Hunspell:
+László Németh (nemethl (at) gyorsposta.hu)
+
+Hunspell based on OpenOffice.org's Myspell. MySpell's author:
+Kevin Hendricks (kevin.hendricks (at) sympatico.ca)
+
+License: GPL 2.0/LGPL 2.1/MPL 1.1 tri-license
+
+The contents of this library may be used under the terms of
+the GNU General Public License Version 2 or later (the "GPL"), or
+the GNU Lesser General Public License Version 2.1 or later (the "LGPL",
+see http://gnu.org/copyleft/lesser.html) or the Mozilla Public License
+Version 1.1 or later (the "MPL", see http://mozilla.org/MPL/MPL-1.1.html).
+
+Software distributed under these licenses is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences
+for the specific language governing rights and limitations under the licenses.
--- a/goldlib/hunspell/affentry.cxx
+++ b/goldlib/hunspell/affentry.cxx
@ -0,0 +1,845 @@
+#include "license.hun"
+#include "license.mys"
+
+#include <cctype>
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+
+#include "affentry.hxx"
+#include "csutil.hxx"
+
+#if !defined(_MSC_VER)
+using namespace std;
+#endif
+
+
+PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
+{
+  // register affix manager
+  pmyMgr = pmgr;
+
+  // set up its intial values
+ 
+  aflag = dp->aflag;         // flag 
+  strip = dp->strip;         // string to strip
+  appnd = dp->appnd;         // string to append
+  stripl = dp->stripl;       // length of strip string
+  appndl = dp->appndl;       // length of append string
+  numconds = dp->numconds;   // number of conditions to match
+  opts = dp->opts;         // cross product flag
+  // then copy over all of the conditions
+  memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
+  next = NULL;
+  nextne = NULL;
+  nexteq = NULL;
+  morphcode = dp->morphcode;
+  contclass = dp->contclass;
+  contclasslen = dp->contclasslen;
+}
+
+
+PfxEntry::~PfxEntry()
+{
+    aflag = 0;
+    if (appnd) free(appnd);
+    if (strip) free(strip);
+    pmyMgr = NULL;
+    appnd = NULL;
+    strip = NULL;
+    if (opts & aeUTF8) {
+        for (int i = 0; i < 8; i++) {
+            if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);
+        }
+    }
+    if (morphcode && !(opts & aeALIASM)) free(morphcode);
+    if (contclass && !(opts & aeALIASF)) free(contclass);
+}
+
+// add prefix to this word assuming conditions hold
+char * PfxEntry::add(const char * word, int len)
+{
+    char tword[MAXWORDUTF8LEN + 4];
+
+    if ((len > stripl) && (len >= numconds) && test_condition(word) &&
+       (!stripl || (strncmp(word, strip, stripl) == 0)) && 
+       ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
+    /* we have a match so add prefix */
+              char * pp = tword;
+              if (appndl) {
+                  strcpy(tword,appnd);
+                  pp += appndl;
+               }
+               strcpy(pp, (word + stripl));
+               return mystrdup(tword);
+     }
+     return NULL;    
+}
+
+
+inline int PfxEntry::test_condition(const char * st)
+{
+    int cond;
+    unsigned char * cp = (unsigned char *)st;
+    if (!(opts & aeUTF8)) { // 256-character codepage
+        for (cond = 0;  cond < numconds;  cond++) {
+	    if ((conds.base[*cp++] & (1 << cond)) == 0) return 0;
+        }
+    } else { // UTF-8 encoding
+      unsigned short wc;
+      for (cond = 0;  cond < numconds;  cond++) {
+        // a simple 7-bit ASCII character in UTF-8
+        if ((*cp >> 7) == 0) {
+            // also check limit (end of word)
+	    if ((!*cp) || ((conds.utf8.ascii[*cp++] & (1 << cond)) == 0)) return 0;
+        // UTF-8 multibyte character
+        } else {
+            // not dot wildcard in rule
+            if (!conds.utf8.all[cond]) {
+                if (conds.utf8.neg[cond]) {
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (conds.utf8.wchars[cond] && 
+                        flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                            wc, (short) conds.utf8.wlen[cond])) return 0;
+                } else {
+                    if (!conds.utf8.wchars[cond]) return 0;
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                         wc, (short)conds.utf8.wlen[cond])) return 0;
+                }
+            }
+            // jump to next UTF-8 character
+            for(cp++; (*cp & 0xc0) == 0x80; cp++);
+        }
+      }
+    }
+    return 1;
+}
+
+
+// check if this prefix entry matches 
+struct hentry * PfxEntry::check(const char * word, int len, char in_compound, const FLAG needflag)
+{
+    int	                tmpl;   // length of tmpword
+    struct hentry *     he;     // hash entry of root word or NULL
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+
+    // on entry prefix is 0 length or already matches the beginning of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+     tmpl = len - appndl;
+
+     if ((tmpl > 0) &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing prefix and adding
+	    // back any characters that would have been stripped
+
+	    if (stripl) strcpy (tmpword, strip);
+	    strcpy ((tmpword + stripl), (word + appndl));
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+            if (test_condition(tmpword)) {
+		tmpl += stripl;
+		if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+                   do {
+		      if (TESTAFF(he->astr, aflag, he->alen) &&
+                        // forbid single prefixes with pseudoroot flag
+                        ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+                        // needflag
+                        ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+                         (contclass && TESTAFF(contclass, needflag, contclasslen))))
+                            return he;
+                   } while ((he = he->next_homonym)); // check homonyms
+		}
+                
+		// prefix matched but no root word was found 
+                // if aeXPRODUCT is allowed, try again but now 
+                // ross checked combined with a suffix
+
+		//if ((opts & aeXPRODUCT) && in_compound) {
+		if ((opts & aeXPRODUCT)) {
+		   he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL, 
+                        0, NULL, FLAG_NULL, needflag, in_compound);
+                   if (he) return he;
+		}
+	    }
+     }
+    return NULL;
+}
+
+// check if this prefix entry matches 
+struct hentry * PfxEntry::check_twosfx(const char * word, int len,
+    char in_compound, const FLAG needflag)
+{
+    int	                tmpl;   // length of tmpword
+    struct hentry *     he;     // hash entry of root word or NULL
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+
+    // on entry prefix is 0 length or already matches the beginning of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+     tmpl = len - appndl;
+
+     if ((tmpl > 0) &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing prefix and adding
+	    // back any characters that would have been stripped
+
+	    if (stripl) strcpy (tmpword, strip);
+	    strcpy ((tmpword + stripl), (word + appndl));
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+	    if (test_condition(tmpword)) {
+		tmpl += stripl;
+
+		// prefix matched but no root word was found 
+                // if aeXPRODUCT is allowed, try again but now 
+                // cross checked combined with a suffix
+
+		if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+		   he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, needflag);
+                   if (he) return he;
+		}
+	    }
+     }
+    return NULL;
+}
+
+
+// check if this prefix entry matches 
+char * PfxEntry::check_twosfx_morph(const char * word, int len,
+         char in_compound, const FLAG needflag)
+{
+    int	                tmpl;   // length of tmpword
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+
+    // on entry prefix is 0 length or already matches the beginning of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+     tmpl = len - appndl;
+
+     if ((tmpl > 0) &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing prefix and adding
+	    // back any characters that would have been stripped
+
+	    if (stripl) strcpy (tmpword, strip);
+	    strcpy ((tmpword + stripl), (word + appndl));
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+	    if (test_condition(tmpword)) {
+		tmpl += stripl;
+
+		// prefix matched but no root word was found 
+                // if aeXPRODUCT is allowed, try again but now 
+                // ross checked combined with a suffix
+
+		if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+		    return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl,
+			     aeXPRODUCT, (AffEntry *)this, needflag);
+		}
+	    }
+     }
+    return NULL;
+}
+
+// check if this prefix entry matches 
+char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
+{
+    int	                tmpl;   // length of tmpword
+    struct hentry *     he;     // hash entry of root word or NULL
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+    char                result[MAXLNLEN];
+    char * st;
+    
+    *result = '\0';
+
+    // on entry prefix is 0 length or already matches the beginning of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+     tmpl = len - appndl;
+
+     if ((tmpl > 0) &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing prefix and adding
+	    // back any characters that would have been stripped
+
+	    if (stripl) strcpy (tmpword, strip);
+	    strcpy ((tmpword + stripl), (word + appndl));
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+	    if (test_condition(tmpword)) {
+		tmpl += stripl;
+		if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+                    do {
+		      if (TESTAFF(he->astr, aflag, he->alen) &&
+                        // forbid single prefixes with pseudoroot flag
+                        ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+                        // needflag
+                        ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+                         (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
+                            if (morphcode) strcat(result, morphcode); else strcat(result,getKey());
+                            if (he->description) {
+                                if ((*(he->description)=='[')||(*(he->description)=='<')) strcat(result,he->word);
+                                strcat(result,he->description);
+                            }
+                            strcat(result, "\n");
+                      }
+                    } while ((he = he->next_homonym));
+		}
+
+		// prefix matched but no root word was found 
+                // if aeXPRODUCT is allowed, try again but now 
+                // ross checked combined with a suffix
+
+		if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+		   st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, 
+                     FLAG_NULL, needflag);
+                   if (st) {
+                        strcat(result, st);
+                        free(st);
+                   }
+		}
+	    }
+     }
+     
+    if (*result) return mystrdup(result);
+    return NULL;
+}
+
+
+SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
+{
+  // register affix manager
+  pmyMgr = pmgr;
+
+  // set up its intial values
+  aflag = dp->aflag;         // char flag 
+  strip = dp->strip;         // string to strip
+  appnd = dp->appnd;         // string to append
+  stripl = dp->stripl;       // length of strip string
+  appndl = dp->appndl;       // length of append string
+  numconds = dp->numconds;   // number of conditions to match
+  opts = dp->opts;         // cross product flag
+
+  // then copy over all of the conditions
+  memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
+
+  rappnd = myrevstrdup(appnd);
+
+  morphcode = dp->morphcode;
+  contclass = dp->contclass;
+  contclasslen = dp->contclasslen;
+}
+
+
+SfxEntry::~SfxEntry()
+{
+    aflag = 0;
+    if (appnd) free(appnd);
+    if (rappnd) free(rappnd);
+    if (strip) free(strip);
+    pmyMgr = NULL;
+    appnd = NULL;
+    strip = NULL;    
+    if (opts & aeUTF8) {
+        for (int i = 0; i < 8; i++) {
+            if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);  
+        }
+    }
+    if (morphcode && !(opts & aeALIASM)) free(morphcode);
+    if (contclass && !(opts & aeALIASF)) free(contclass);
+}
+
+// add suffix to this word assuming conditions hold
+char * SfxEntry::add(const char * word, int len)
+{
+    char	        tword[MAXWORDUTF8LEN + 4];
+
+     /* make sure all conditions match */
+     if ((len > stripl) && (len >= numconds) && test_condition(word + len, word) &&
+        (!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
+        ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
+	      /* we have a match so add suffix */
+              strcpy(tword,word);
+              if (appndl) {
+                  strcpy(tword + len - stripl, appnd);
+              } else {
+                  *(tword + len - stripl) = '\0';
+              }
+              return mystrdup(tword);
+     }
+     return NULL;
+}
+
+
+inline int SfxEntry::test_condition(const char * st, const char * beg)
+{
+    int cond;
+    unsigned char * cp = (unsigned char *) st;
+    if (!(opts & aeUTF8)) { // 256-character codepage
+        // Dömölki affix algorithm
+	for (cond = numconds;  --cond >= 0; ) {
+	    if ((conds.base[*--cp] & (1 << cond)) == 0) return 0;
+	}
+    } else { // UTF-8 encoding
+      unsigned short wc;
+      for (cond = numconds;  --cond >= 0; ) {
+        // go to next character position and check limit
+        if ((char *) --cp < beg) return 0;
+        // a simple 7-bit ASCII character in UTF-8
+        if ((*cp >> 7) == 0) {
+	    if ((conds.utf8.ascii[*cp] & (1 << cond)) == 0) return 0;
+        // UTF-8 multibyte character
+        } else {
+            // go to first character of UTF-8 multibyte character
+            for (; (*cp & 0xc0) == 0x80; cp--);
+            // not dot wildcard in rule
+            if (!conds.utf8.all[cond]) {
+                if (conds.utf8.neg[cond]) {
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (conds.utf8.wchars[cond] && 
+                        flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                            wc, (short) conds.utf8.wlen[cond])) return 0;
+                } else {
+                    if (!conds.utf8.wchars[cond]) return 0;
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                         wc, (short)conds.utf8.wlen[cond])) return 0;
+                }
+            }
+        }
+      }
+    }
+    return 1;
+}
+
+
+
+// see if this suffix is present in the word 
+struct hentry * SfxEntry::check(const char * word, int len, int optflags,
+    AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag)
+{
+    int	                tmpl;		 // length of tmpword 
+    struct hentry *     he;              // hash entry pointer
+    unsigned char *	cp;
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+    PfxEntry* ep = (PfxEntry *) ppfx;
+
+    // if this suffix is being cross checked with a prefix
+    // but it does not support cross products skip it
+
+    if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
+        return NULL;
+
+    // upon entry suffix is 0 length or already matches the end of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+    tmpl = len - appndl;
+    // the second condition is not enough for UTF-8 strings
+    // it checked in test_condition()
+    
+    if ((tmpl > 0)  &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing suffix and adding
+	    // back any characters that would have been stripped or
+	    // or null terminating the shorter string
+
+	    strcpy (tmpword, word);
+	    cp = (unsigned char *)(tmpword + tmpl);
+	    if (stripl) {
+		strcpy ((char *)cp, strip);
+		tmpl += stripl;
+		cp = (unsigned char *)(tmpword + tmpl);
+	    } else *cp = '\0';
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+	    if (test_condition((char *) cp, (char *) tmpword)) {
+
+#ifdef SZOSZABLYA_POSSIBLE_ROOTS
+		fprintf(stdout,"%s %s %c\n", word, tmpword, aflag);
+#endif
+	        if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+                    do {
+                        // check conditional suffix (enabled by prefix)
+                        if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
+                                    TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && 
+                            (((optflags & aeXPRODUCT) == 0) || 
+                            TESTAFF(he->astr, ep->getFlag(), he->alen) ||
+                             // enabled by prefix
+                            ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+                            ) &&
+                            // handle cont. class
+                            ((!cclass) || 
+                                ((contclass) && TESTAFF(contclass, cclass, contclasslen))
+                            ) &&
+                            // handle required flag
+                            ((!needflag) || 
+                              (TESTAFF(he->astr, needflag, he->alen) ||
+                              ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
+                            )
+                        ) return he;
+                    } while ((he = he->next_homonym)); // check homonyms
+
+                // obsolote stemming code (used only by the 
+                // experimental SuffixMgr:suggest_pos_stems)
+	        // store resulting root in wlst
+		} else if (wlst && (*ns < maxSug)) {
+		    int cwrd = 1;
+        	    for (int k=0; k < *ns; k++) 
+			if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
+        	    if (cwrd) {
+			wlst[*ns] = mystrdup(tmpword);
+			if (wlst[*ns] == NULL) {
+			    for (int j=0; j<*ns; j++) free(wlst[j]);
+			    *ns = -1;
+			    return NULL;
+			}
+			(*ns)++;
+		    }
+		}
+	    }
+    }
+    return NULL;
+}
+
+// see if two-level suffix is present in the word 
+struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
+    AffEntry* ppfx, const FLAG needflag)
+{
+    int	                tmpl;		 // length of tmpword 
+    struct hentry *     he;              // hash entry pointer
+    unsigned char *	cp;
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+    PfxEntry* ep = (PfxEntry *) ppfx;
+
+
+    // if this suffix is being cross checked with a prefix
+    // but it does not support cross products skip it
+
+    if ((optflags & aeXPRODUCT) != 0 &&  (opts & aeXPRODUCT) == 0)
+        return NULL;
+
+    // upon entry suffix is 0 length or already matches the end of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+    tmpl = len - appndl;
+
+    if ((tmpl > 0)  &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing suffix and adding
+	    // back any characters that would have been stripped or
+	    // or null terminating the shorter string
+
+	    strcpy (tmpword, word);
+	    cp = (unsigned char *)(tmpword + tmpl);
+	    if (stripl) {
+		strcpy ((char *)cp, strip);
+		tmpl += stripl;
+		cp = (unsigned char *)(tmpword + tmpl);
+	    } else *cp = '\0';
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then recall suffix_check
+
+	    if (test_condition((char *) cp, (char *) tmpword)) {
+                if (ppfx) {
+                    // handle conditional suffix
+                    if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) 
+                        he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
+                    else
+                        he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, NULL, (FLAG) aflag, needflag);
+                } else {
+                    he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
+                }
+                if (he) return he;
+            }
+    }
+    return NULL;
+}
+
+
+// see if two-level suffix is present in the word 
+char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
+    AffEntry* ppfx, const FLAG needflag)
+{
+    int	                tmpl;		 // length of tmpword 
+    unsigned char *	cp;
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+    PfxEntry* ep = (PfxEntry *) ppfx;
+    char * st;
+
+    char result[MAXLNLEN];
+    
+    *result = '\0';
+
+    // if this suffix is being cross checked with a prefix
+    // but it does not support cross products skip it
+
+    if ((optflags & aeXPRODUCT) != 0 &&  (opts & aeXPRODUCT) == 0)
+        return NULL;
+
+    // upon entry suffix is 0 length or already matches the end of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+    tmpl = len - appndl;
+
+    if ((tmpl > 0)  &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing suffix and adding
+	    // back any characters that would have been stripped or
+	    // or null terminating the shorter string
+
+	    strcpy (tmpword, word);
+	    cp = (unsigned char *)(tmpword + tmpl);
+	    if (stripl) {
+		strcpy ((char *)cp, strip);
+		tmpl += stripl;
+		cp = (unsigned char *)(tmpword + tmpl);
+	    } else *cp = '\0';
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then recall suffix_check
+
+	    if (test_condition((char *) cp, (char *) tmpword)) {
+                if (ppfx) {
+                    // handle conditional suffix
+                    if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
+                        st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
+                        if (st) {
+                            if (((PfxEntry *) ppfx)->getMorph()) {
+                                strcat(result, ((PfxEntry *) ppfx)->getMorph());
+                            }
+                            strcat(result,st);
+                            free(st);
+                            mychomp(result);
+                        }
+                    } else {
+                        st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
+                        if (st) {
+                            strcat(result, st);
+                            free(st);
+                            mychomp(result);
+                        }
+                    }
+                } else {
+                        st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
+                        if (st) {
+                            strcat(result, st);
+                            free(st);
+                            mychomp(result);
+                        }
+                }
+                if (*result) return mystrdup(result);
+            }
+    }
+    return NULL;
+}
+
+// get next homonym with same affix
+struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx, 
+    const FLAG cclass, const FLAG needflag)
+{
+    PfxEntry* ep = (PfxEntry *) ppfx;
+
+    while (he->next_homonym) {
+        he = he->next_homonym;
+        if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && 
+                            ((optflags & aeXPRODUCT) == 0 || 
+                            TESTAFF(he->astr, ep->getFlag(), he->alen) ||
+                             // handle conditional suffix
+                            ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+                            ) &&
+                            // handle cont. class
+                            ((!cclass) || 
+                                ((contclass) && TESTAFF(contclass, cclass, contclasslen))
+                            ) &&
+                            // handle required flag
+                            ((!needflag) || 
+                              (TESTAFF(he->astr, needflag, he->alen) ||
+                              ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
+                            )
+                        ) return he;
+    }
+    return NULL;
+}
+
+
+#if 0
+
+Appendix:  Understanding Affix Code
+
+
+An affix is either a  prefix or a suffix attached to root words to make 
+other words.
+
+Basically a Prefix or a Suffix is set of AffEntry objects
+which store information about the prefix or suffix along 
+with supporting routines to check if a word has a particular 
+prefix or suffix or a combination.
+
+The structure affentry is defined as follows:
+
+struct affentry
+{
+   unsigned short aflag;    // ID used to represent the affix
+   char * strip;            // string to strip before adding affix
+   char * appnd;            // the affix string to add
+   unsigned char stripl;    // length of the strip string
+   unsigned char appndl;    // length of the affix string
+   char numconds;           // the number of conditions that must be met
+   char opts;               // flag: aeXPRODUCT- combine both prefix and suffix 
+   char   conds[SETSIZE];   // array which encodes the conditions to be met
+};
+
+
+Here is a suffix borrowed from the en_US.aff file.  This file 
+is whitespace delimited.
+
+SFX D Y 4 
+SFX D   0     e          d
+SFX D   y     ied        [^aeiou]y
+SFX D   0     ed         [^ey]
+SFX D   0     ed         [aeiou]y
+
+This information can be interpreted as follows:
+
+In the first line has 4 fields
+
+Field
+-----
+1     SFX - indicates this is a suffix
+2     D   - is the name of the character flag which represents this suffix
+3     Y   - indicates it can be combined with prefixes (cross product)
+4     4   - indicates that sequence of 4 affentry structures are needed to
+               properly store the affix information
+
+The remaining lines describe the unique information for the 4 SfxEntry 
+objects that make up this affix.  Each line can be interpreted
+as follows: (note fields 1 and 2 are as a check against line 1 info)
+
+Field
+-----
+1     SFX         - indicates this is a suffix
+2     D           - is the name of the character flag for this affix
+3     y           - the string of chars to strip off before adding affix
+                         (a 0 here indicates the NULL string)
+4     ied         - the string of affix characters to add
+5     [^aeiou]y   - the conditions which must be met before the affix
+                    can be applied
+
+Field 5 is interesting.  Since this is a suffix, field 5 tells us that
+there are 2 conditions that must be met.  The first condition is that 
+the next to the last character in the word must *NOT* be any of the 
+following "a", "e", "i", "o" or "u".  The second condition is that
+the last character of the word must end in "y".
+
+So how can we encode this information concisely and be able to 
+test for both conditions in a fast manner?  The answer is found
+but studying the wonderful ispell code of Geoff Kuenning, et.al. 
+(now available under a normal BSD license).
+
+If we set up a conds array of 256 bytes indexed (0 to 255) and access it
+using a character (cast to an unsigned char) of a string, we have 8 bits
+of information we can store about that character.  Specifically we
+could use each bit to say if that character is allowed in any of the 
+last (or first for prefixes) 8 characters of the word.
+
+Basically, each character at one end of the word (up to the number 
+of conditions) is used to index into the conds array and the resulting 
+value found there says whether the that character is valid for a 
+specific character position in the word.  
+
+For prefixes, it does this by setting bit 0 if that char is valid 
+in the first position, bit 1 if valid in the second position, and so on. 
+
+If a bit is not set, then that char is not valid for that postion in the
+word.
+
+If working with suffixes bit 0 is used for the character closest 
+to the front, bit 1 for the next character towards the end, ..., 
+with bit numconds-1 representing the last char at the end of the string. 
+
+Note: since entries in the conds[] are 8 bits, only 8 conditions 
+(read that only 8 character positions) can be examined at one
+end of a word (the beginning for prefixes and the end for suffixes.
+
+So to make this clearer, lets encode the conds array values for the 
+first two affentries for the suffix D described earlier.
+
+
+  For the first affentry:    
+     numconds = 1             (only examine the last character)
+
+     conds['e'] =  (1 << 0)   (the word must end in an E)
+     all others are all 0
+
+  For the second affentry:
+     numconds = 2             (only examine the last two characters)     
+
+     conds[X] = conds[X] | (1 << 0)     (aeiou are not allowed)
+         where X is all characters *but* a, e, i, o, or u
+         
+
+     conds['y'] = (1 << 1)     (the last char must be a y)
+     all other bits for all other entries in the conds array are zero
+
+
+#endif
+
--- a/goldlib/hunspell/affentry.hxx
+++ b/goldlib/hunspell/affentry.hxx
@ -0,0 +1,130 @@
+#ifndef _AFFIX_HXX_
+#define _AFFIX_HXX_
+
+#include "atypes.hxx"
+#include "baseaffi.hxx"
+#include "affixmgr.hxx"
+
+/* A Prefix Entry  */
+
+class PfxEntry : public AffEntry
+{
+       AffixMgr*    pmyMgr;
+
+       PfxEntry * next;
+       PfxEntry * nexteq;
+       PfxEntry * nextne;
+       PfxEntry * flgnxt;
+
+public:
+
+  PfxEntry(AffixMgr* pmgr, affentry* dp );
+  ~PfxEntry();
+
+  inline bool          allowCross() { return ((opts & aeXPRODUCT) != 0); }
+  struct hentry *      check(const char * word, int len, char in_compound, 
+                            const FLAG needflag = FLAG_NULL);
+
+  struct hentry *      check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = NULL);
+
+  char *      check_morph(const char * word, int len, char in_compound,
+                            const FLAG needflag = FLAG_NULL);
+
+  char *      check_twosfx_morph(const char * word, int len,
+                  char in_compound, const FLAG needflag = FLAG_NULL);
+
+  inline FLAG getFlag()   { return aflag;   }
+  inline const char *  getKey()    { return appnd;  } 
+  char *               add(const char * word, int len);
+
+  inline short getKeyLen() { return appndl; } 
+
+  inline const char *  getMorph()    { return morphcode;  } 
+
+  inline const unsigned short * getCont()    { return contclass;  } 
+  inline short           getContLen()    { return contclasslen;  } 
+
+  inline PfxEntry *    getNext()   { return next;   }
+  inline PfxEntry *    getNextNE() { return nextne; }
+  inline PfxEntry *    getNextEQ() { return nexteq; }
+  inline PfxEntry *    getFlgNxt() { return flgnxt; }
+
+  inline void   setNext(PfxEntry * ptr)   { next = ptr;   }
+  inline void   setNextNE(PfxEntry * ptr) { nextne = ptr; }
+  inline void   setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
+  inline void   setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
+  
+  inline int    test_condition(const char * st);
+};
+
+
+
+
+/* A Suffix Entry */
+
+class SfxEntry : public AffEntry
+{
+       AffixMgr*    pmyMgr;
+       char *       rappnd;
+
+       SfxEntry *   next;
+       SfxEntry *   nexteq;
+       SfxEntry *   nextne;
+       SfxEntry *   flgnxt;
+	   
+       SfxEntry *   l_morph;
+       SfxEntry *   r_morph;
+       SfxEntry *   eq_morph;
+
+public:
+
+  SfxEntry(AffixMgr* pmgr, affentry* dp );
+  ~SfxEntry();
+
+  inline bool          allowCross() { return ((opts & aeXPRODUCT) != 0); }
+  struct hentry *   check(const char * word, int len, int optflags, 
+                    AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
+                    const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL);
+
+  struct hentry *   check_twosfx(const char * word, int len, int optflags, AffEntry* ppfx, const FLAG needflag = NULL);
+
+  char *      check_twosfx_morph(const char * word, int len, int optflags,
+                 AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
+  struct hentry * get_next_homonym(struct hentry * he);
+  struct hentry * get_next_homonym(struct hentry * word, int optflags, AffEntry* ppfx, 
+    const FLAG cclass, const FLAG needflag);
+
+
+  inline FLAG getFlag()   { return aflag;   }
+  inline const char *  getKey()    { return rappnd; } 
+  char *               add(const char * word, int len);
+
+
+  inline const char *  getMorph()    { return morphcode;  } 
+
+  inline const unsigned short * getCont()    { return contclass;  } 
+  inline short           getContLen()    { return contclasslen;  } 
+  inline const char *  getAffix()    { return appnd; } 
+
+  inline short getKeyLen() { return appndl; } 
+
+  inline SfxEntry *    getNext()   { return next;   }
+  inline SfxEntry *    getNextNE() { return nextne; }
+  inline SfxEntry *    getNextEQ() { return nexteq; }
+
+  inline SfxEntry *    getLM() { return l_morph; }
+  inline SfxEntry *    getRM() { return r_morph; }
+  inline SfxEntry *    getEQM() { return eq_morph; }
+  inline SfxEntry *    getFlgNxt() { return flgnxt; }
+
+  inline void   setNext(SfxEntry * ptr)   { next = ptr;   }
+  inline void   setNextNE(SfxEntry * ptr) { nextne = ptr; }
+  inline void   setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
+  inline void   setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
+
+  inline int    test_condition(const char * st, const char * begin);
+};
+
+#endif
+
+
--- a/goldlib/hunspell/affixmgr.cxx
+++ b/goldlib/hunspell/affixmgr.cxx
--- a/goldlib/hunspell/affixmgr.hxx
+++ b/goldlib/hunspell/affixmgr.hxx
@ -0,0 +1,206 @@
+#ifndef _AFFIXMGR_HXX_
+#define _AFFIXMGR_HXX_
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+#include "atypes.hxx"
+#include "baseaffi.hxx"
+#include "hashmgr.hxx"
+
+// check flag duplication
+#define dupSFX        (1 << 0)
+#define dupPFX        (1 << 1)
+
+class AffixMgr
+{
+
+  AffEntry *          pStart[SETSIZE];
+  AffEntry *          sStart[SETSIZE];
+  AffEntry *          pFlag[CONTSIZE];
+  AffEntry *          sFlag[CONTSIZE];
+  HashMgr *           pHMgr;
+  char *              trystring;
+  char *              encoding;
+  struct cs_info *    csconv;
+  int                 utf8;
+  struct unicode_info2 * utf_tbl;
+  int                 complexprefixes;
+  FLAG                compoundflag;  
+  FLAG                compoundbegin;
+  FLAG                compoundmiddle;
+  FLAG                compoundend;
+  FLAG                compoundroot;
+  FLAG                compoundforbidflag;
+  FLAG                compoundpermitflag;
+  int                 checkcompounddup;
+  int                 checkcompoundrep;
+  int                 checkcompoundcase;
+  int                 checkcompoundtriple;
+  FLAG                forbiddenword;
+  FLAG                nosuggest;
+  FLAG                pseudoroot;
+  int                 cpdmin;
+  int                 numrep;
+  replentry *         reptable;
+  int                 nummap;
+  mapentry *          maptable;
+  int                 numbreak;
+  char **             breaktable;
+  int                 numcheckcpd;
+  replentry *         checkcpdtable;
+  int                 numdefcpd;
+  flagentry *         defcpdtable;
+  int                 maxngramsugs;
+  int                 nosplitsugs;
+  int                 sugswithdots;
+  int                 cpdwordmax;
+  int                 cpdmaxsyllable;
+  char *              cpdvowels;
+  w_char *            cpdvowels_utf16;
+  int                 cpdvowels_utf16_len;
+  char *              cpdsyllablenum;
+  const char *        pfxappnd; // BUG: not stateless
+  const char *        sfxappnd; // BUG: not stateless
+  FLAG                sfxflag;  // BUG: not stateless
+  char *              derived;  // BUG: not stateless
+  AffEntry *          sfx;      // BUG: not stateless
+  AffEntry *          pfx;      // BUG: not stateless
+  int                 checknum;
+  char *              wordchars;
+  unsigned short *    wordchars_utf16;
+  int                 wordchars_utf16_len;
+  char *              version;
+  char *              lang;
+  int                 langnum;
+  FLAG                lemma_present;
+  FLAG                circumfix;
+  FLAG                onlyincompound;
+  FLAG                keepcase;
+  int                 checksharps;
+
+  int                 havecontclass; // boolean variable
+  char                contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
+  flag                flag_mode;
+  
+public:
+ 
+  AffixMgr(const char * affpath, HashMgr * ptr);
+  ~AffixMgr();
+  struct hentry *     affix_check(const char * word, int len,
+            const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT);
+  struct hentry *     prefix_check(const char * word, int len,
+            char in_compound, const FLAG needflag = FLAG_NULL);
+  inline int isSubset(const char * s1, const char * s2);
+  struct hentry *     prefix_check_twosfx(const char * word, int len,
+            char in_compound, const FLAG needflag = FLAG_NULL);
+  inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
+  struct hentry *     suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx,
+			char ** wlst, int maxSug, int * ns, const FLAG cclass = FLAG_NULL,
+                        const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+  struct hentry *     suffix_check_twosfx(const char * word, int len,
+            int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
+
+  char * affix_check_morph(const char * word, int len,
+                    const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+  char * prefix_check_morph(const char * word, int len,
+                    char in_compound, const FLAG needflag = FLAG_NULL);
+  char * suffix_check_morph (const char * word, int len, int sfxopts, AffEntry * ppfx,
+            const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+
+  char * prefix_check_twosfx_morph(const char * word, int len,
+            char in_compound, const FLAG needflag = FLAG_NULL);
+  char * suffix_check_twosfx_morph(const char * word, int len,
+            int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL);
+
+  int                 expand_rootword(struct guessword * wlst, int maxn, const char * ts,
+                        int wl, const unsigned short * ap, unsigned short al, char * bad, int);
+
+  int                 get_syllable (const char * word, int wlen);
+  int                 cpdrep_check(const char * word, int len);
+  int                 cpdpat_check(const char * word, int len);
+  int                 defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** rwords, char all);
+  int                 cpdcase_check(const char * word, int len);
+  int                 candidate_check(const char * word, int len);
+  struct hentry *     compound_check(const char * word, int len,
+                              short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
+                              char hu_mov_rule, int * cmpdstemnum, int * cmpdstem, char is_sug);
+
+  int compound_check_morph(const char * word, int len,
+                              short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
+                              char hu_mov_rule, char ** result, char * partresult);
+
+  struct hentry *     lookup(const char * word);
+  int                 get_numrep();
+  struct replentry *  get_reptable();
+  int                 get_nummap();
+  struct mapentry *   get_maptable();
+  int                 get_numbreak();
+  char **             get_breaktable();
+  char *              get_encoding();
+  int                 get_langnum();
+  struct unicode_info2 * get_utf_conv();
+  char *              get_try_string();
+  const char *        get_wordchars();
+  unsigned short * get_wordchars_utf16(int * len);
+  int                 get_compound();
+  FLAG                get_compoundflag();
+  FLAG                get_compoundbegin();
+  FLAG                get_forbiddenword();
+  FLAG                get_nosuggest();
+  FLAG                get_pseudoroot();
+  FLAG                get_onlyincompound();
+  FLAG                get_compoundroot();
+  FLAG                get_lemma_present();
+  int                 get_checknum();
+  char *              get_possible_root();
+  const char *        get_prefix();
+  const char *        get_suffix();
+  const char *        get_derived();
+  const char *        get_version();
+  const int           have_contclass();
+  int                 get_utf8();
+  int                 get_complexprefixes();
+  char *              get_suffixed(char );
+  int                 get_maxngramsugs();
+  int                 get_nosplitsugs();
+  int                 get_sugswithdots(void);
+  FLAG                get_keepcase(void);
+  int                 get_checksharps(void);
+
+private:
+  int  parse_file(const char * affpath);
+  int  parse_try(char * line);
+  int  parse_set(char * line);
+  int  parse_flag(char * line, unsigned short * out, char * name);
+  int  parse_num(char * line, int * out, char * name);
+  int  parse_cpdflag(char * line);
+  int  parse_cpdforbid(char * line);
+  int  parse_forbid(char * line);
+  int  parse_cpdsyllable(char * line);
+  int  parse_syllablenum(char * line);
+  int  parse_reptable(char * line, FILE * af);
+  int  parse_maptable(char * line, FILE * af);
+  int  parse_breaktable(char * line, FILE * af);
+  int  parse_checkcpdtable(char * line, FILE * af);
+  int  parse_defcpdtable(char * line, FILE * af);
+  int  parse_affix(char * line, const char at, FILE * af, char * dupflags);
+  int  parse_wordchars(char * line);
+  int  parse_lang(char * line);
+  int  parse_version(char * line);
+
+  int encodeit(struct affentry * ptr, char * cs);
+  int build_pfxtree(AffEntry* pfxptr);
+  int build_sfxtree(AffEntry* sfxptr);
+  int process_pfx_order();
+  int process_sfx_order();
+  AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr);
+  AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr);
+  int process_pfx_tree_to_list();
+  int process_sfx_tree_to_list();
+  void set_spec_utf8_encoding();
+  int redundant_condition(char, char * strip, int stripl, const char * cond, char *);
+};
+
+#endif
+
--- a/goldlib/hunspell/atypes.hxx
+++ b/goldlib/hunspell/atypes.hxx
@ -0,0 +1,85 @@
+#ifndef _ATYPES_HXX_
+#define _ATYPES_HXX_
+
+// HUNSTEM def.
+#define HUNSTEM
+
+#include "csutil.hxx"
+#include "hashmgr.hxx"
+
+#define SETSIZE         256
+#define CONTSIZE        65536
+#define MAXWORDLEN      100
+#define MAXWORDUTF8LEN  (MAXWORDLEN * 4)
+
+// affentry options
+#define aeXPRODUCT      (1 << 0)
+#define aeUTF8          (1 << 1)
+#define aeALIASF        (1 << 2)
+#define aeALIASM        (1 << 3)
+
+enum {IN_CPD_NOT, IN_CPD_BEGIN, IN_CPD_END, IN_CPD_OTHER};
+
+#define MAXLNLEN        8192 * 4
+
+#define MAXCOMPOUND	10
+
+#define MAXACC          1000
+
+#define FLAG unsigned short
+#define FLAG_NULL 0x00
+#define FREE_FLAG(a) a = 0
+
+#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c)
+
+struct affentry
+{
+   char * strip;
+   char * appnd;
+   unsigned char stripl;
+   unsigned char appndl;
+   char  numconds;
+   char  opts;
+   unsigned short aflag;
+   union {
+   	char   base[SETSIZE];
+	struct {
+		char ascii[SETSIZE/2];
+                char neg[8];
+                char all[8];
+                w_char * wchars[8];
+		int wlen[8];
+	} utf8;
+   } conds;
+   char *       morphcode;
+   unsigned short * contclass;
+   short        contclasslen;
+};
+
+struct replentry {
+  char * pattern;
+  char * pattern2;
+};
+
+struct mapentry {
+  char * set;
+  w_char * set_utf16;
+  int len;
+};
+
+struct flagentry {
+  FLAG * def;
+  int len;
+};
+
+struct guessword {
+  char * word;
+  bool allow;
+};
+
+#endif
+
+
+
+
+
--- a/goldlib/hunspell/baseaffi.hxx
+++ b/goldlib/hunspell/baseaffi.hxx
@ -0,0 +1,31 @@
+#ifndef _BASEAFF_HXX_
+#define _BASEAFF_HXX_
+
+class AffEntry
+{
+public:
+
+protected:
+       char *       appnd;
+       char *       strip;
+       unsigned char  appndl;
+       unsigned char  stripl;
+       char         numconds;
+       char  opts;
+       unsigned short aflag;
+       union {
+   	 char   base[SETSIZE];
+	 struct {
+		char  ascii[SETSIZE/2];
+                char neg[8];
+                char all[8];
+                w_char * wchars[8];
+		int wlen[8];
+	 } utf8;
+       } conds;
+       char *       morphcode;
+       unsigned short * contclass;
+       short        contclasslen;
+};
+
+#endif
--- a/goldlib/hunspell/csutil.cxx
+++ b/goldlib/hunspell/csutil.cxx
--- a/goldlib/hunspell/csutil.hxx
+++ b/goldlib/hunspell/csutil.hxx
@ -0,0 +1,125 @@
+#ifndef __CSUTILHXX__
+#define __CSUTILHXX__
+
+// First some base level utility routines
+
+typedef struct {
+    unsigned char l;
+    unsigned char h;
+} w_char;
+
+// convert UTF-16 characters to UTF-8
+char * u16_u8(char * dest, int size, const w_char * src, int srclen);
+
+// convert UTF-8 characters to UTF-16
+int u8_u16(w_char * dest, int size, const char * src);
+
+// sort 2-byte vector
+void flag_qsort(unsigned short flags[], int begin, int end);
+
+// binary search in 2-byte vector
+int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
+
+// remove end of line char(s)
+void   mychomp(char * s);
+
+// duplicate string
+char * mystrdup(const char * s);
+
+// duplicate reverse of string
+char * myrevstrdup(const char * s);
+
+// parse into tokens with char delimiter
+char * mystrsep(char ** sptr, const char delim);
+// parse into tokens with char delimiter
+char * mystrsep2(char ** sptr, const char delim);
+
+// parse into tokens with char delimiter
+char * mystrrep(char *, const char *, const char *);
+
+// append s to ends of every lines in text
+void strlinecat(char * lines, const char * s);
+
+// tokenize into lines with new line
+   int line_tok(const char * text, char *** lines);
+
+// tokenize into lines with new line and uniq in place
+   char * line_uniq(char * text);
+
+// change \n to c in place
+   char * line_join(char * text, char c);
+
+// leave only last {[^}]*} pattern in string
+   char * delete_zeros(char * morphout);
+
+// reverse word
+   void reverseword(char *);
+
+// reverse word
+   void reverseword_utf(char *);
+
+// character encoding information
+struct cs_info {
+  unsigned char ccase;
+  unsigned char clower;
+  unsigned char cupper;
+};
+
+// Unicode character encoding information
+struct unicode_info {
+  unsigned short c;
+  unsigned short cupper;
+  unsigned short clower;
+};
+
+struct unicode_info2 {
+  char cletter;
+  unsigned short cupper;
+  unsigned short clower;
+};
+
+struct enc_entry {
+  const char * enc_name;
+  struct cs_info * cs_table;
+};
+
+// language to encoding default map
+
+struct lang_map {
+  const char * lang;
+  const char * def_enc;
+  int num;
+};
+
+struct cs_info * get_current_cs(const char * es);
+
+struct unicode_info * get_utf_cs();
+
+int get_utf_cs_len();
+
+const char * get_default_enc(const char * lang);
+
+int get_lang_num(const char * lang);
+
+// convert null terminated string to all caps using encoding
+void enmkallcap(char * d, const char * p, const char * encoding);
+
+// convert null terminated string to all little using encoding
+void enmkallsmall(char * d, const char * p, const char * encoding);
+
+// convert null terminated string to have intial capital using encoding
+void enmkinitcap(char * d, const char * p, const char * encoding);
+
+// convert null terminated string to all caps
+void mkallcap(char * p, const struct cs_info * csconv);
+
+// convert null terminated string to all little
+void mkallsmall(char * p, const struct cs_info * csconv);
+
+// convert null terminated string to have intial capital
+void mkinitcap(char * p, const struct cs_info * csconv);
+
+// convert first nc characters of UTF-8 string to little
+void mkallsmall_utf(w_char * u, int nc, struct unicode_info2 * utfconv);
+
+#endif
--- a/goldlib/hunspell/dictmgr.cxx
+++ b/goldlib/hunspell/dictmgr.cxx
@ -0,0 +1,175 @@
+
+#include <cstdlib>
+#include <cstring>
+#include <cctype>
+#include <cstdio>
+
+#include "dictmgr.hxx"
+
+#if !defined(_MSC_VER)
+using namespace std;
+#endif
+
+DictMgr::DictMgr(const char * dictpath, const char * etype) 
+{
+  // load list of etype entries
+  numdict = 0;
+  pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
+  if (pdentry) {
+     if (parse_file(dictpath, etype)) {
+        numdict = 0;
+        // no dictionary.lst found is okay
+     }
+  } else {
+     numdict = 0;
+  }
+}
+
+
+DictMgr::~DictMgr() 
+{
+  dictentry * pdict = NULL;
+  if (pdentry) {
+     pdict = pdentry;
+     for (int i=0;i<numdict;i++) {
+        if (pdict->lang) {
+            free(pdict->lang);
+            pdict->lang = NULL;
+        }
+        if (pdict->region) {
+            free(pdict->region);
+            pdict->region=NULL;
+        }
+        if (pdict->filename) {
+            free(pdict->filename);
+            pdict->filename = NULL;
+        }
+        pdict++;
+     }
+     free(pdentry);
+     pdentry = NULL;
+     pdict = NULL;
+  }
+  numdict = 0;
+}
+
+
+// read in list of etype entries and build up structure to describe them
+int  DictMgr::parse_file(const char * dictpath, const char * etype)
+{
+
+    int i;
+    char line[MAXDICTENTRYLEN+1];
+    dictentry * pdict = pdentry;
+
+    // open the dictionary list file
+    FILE * dictlst;
+    dictlst = fopen(dictpath,"r");
+    if (!dictlst) {
+      return 1;
+    }
+
+    // step one is to parse the dictionary list building up the 
+    // descriptive structures
+
+    // read in each line ignoring any that dont start with etype
+    while (fgets(line,MAXDICTENTRYLEN,dictlst)) {
+       mychomp(line);
+
+       /* parse in a dictionary entry */
+       if (strncmp(line,etype,4) == 0) {
+	  if (numdict < MAXDICTIONARIES) {
+             char * tp = line;
+             char * piece;
+             i = 0;
+             while ((piece=mystrsep(&tp,' '))) {
+                if (*piece != '\0') {
+                    switch(i) {
+                       case 0: break;
+		       case 1: pdict->lang = mystrdup(piece); break;
+                       case 2: if (strcmp (piece, "ANY") == 0)
+				 pdict->region = mystrdup("");
+			       else
+				 pdict->region = mystrdup(piece);
+			       break;
+                       case 3: pdict->filename = mystrdup(piece); break;
+		       default: break;
+                    }
+                    i++;
+                }
+                free(piece);
+	     }
+             if (i == 4) {
+                 numdict++;
+                 pdict++;
+	     } else {
+                 fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
+                 fflush(stderr);
+	     }
+	  }
+       }
+    }
+    fclose(dictlst);
+    return 0;
+}
+
+// return text encoding of dictionary
+int DictMgr::get_list(dictentry ** ppentry)
+{
+  *ppentry = pdentry;
+  return numdict;
+}
+
+
+
+// strip strings into token based on single char delimiter
+// acts like strsep() but only uses a delim char and not 
+// a delim string
+
+char * DictMgr::mystrsep(char ** stringp, const char delim)
+{
+  char * rv = NULL;
+  char * mp = *stringp;
+  int n = strlen(mp);
+  if (n > 0) {
+     char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+     if (dp) {
+        *stringp = dp+1;
+        int nc = (int)((unsigned long)dp - (unsigned long)mp); 
+        rv = (char *) malloc(nc+1);
+        memcpy(rv,mp,nc);
+        *(rv+nc) = '\0';
+        return rv;
+     } else {
+       rv = (char *) malloc(n+1);
+       memcpy(rv, mp, n);
+       *(rv+n) = '\0';
+       *stringp = mp + n;
+       return rv;
+     }
+  }
+  return NULL;
+}
+
+
+// replaces strdup with ansi version
+char * DictMgr::mystrdup(const char * s)
+{
+  char * d = NULL;
+  if (s) {
+     int sl = strlen(s);
+     d = (char *) malloc(((sl+1) * sizeof(char)));
+     if (d) memcpy(d,s,((sl+1)*sizeof(char)));
+  }
+  return d;
+}
+
+
+// remove cross-platform text line end characters
+void DictMgr:: mychomp(char * s)
+{
+  int k = strlen(s);
+  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
+  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+
--- a/goldlib/hunspell/dictmgr.hxx
+++ b/goldlib/hunspell/dictmgr.hxx
@ -0,0 +1,34 @@
+#ifndef _DICTMGR_HXX_
+#define _DICTMGR_HXX_
+
+#define MAXDICTIONARIES 100
+#define MAXDICTENTRYLEN 1024
+
+struct dictentry {
+  char * filename;
+  char * lang;
+  char * region;
+};
+
+
+class DictMgr
+{
+
+  int                 numdict;
+  dictentry *         pdentry;
+
+public:
+ 
+  DictMgr(const char * dictpath, const char * etype);
+  ~DictMgr();
+  int get_list(dictentry** ppentry);
+            
+private:
+  int  parse_file(const char * dictpath, const char * etype);
+  char * mystrsep(char ** stringp, const char delim);
+  char * mystrdup(const char * s);
+  void mychomp(char * s);
+
+};
+
+#endif
--- a/goldlib/hunspell/hashmgr.cxx
+++ b/goldlib/hunspell/hashmgr.cxx
@ -0,0 +1,679 @@
+#include "license.hun"
+#include "license.mys"
+
+#include <cstdlib>
+#include <cstring>
+#include <cctype>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#include <cstdio>
+
+#include "hashmgr.hxx"
+#include "csutil.hxx"
+
+#if !defined(_MSC_VER)
+#include <unistd.h>
+using namespace std;
+#endif
+
+// build a hash table from a munched word list
+
+HashMgr::HashMgr(const char * tpath, const char * apath)
+{
+  tablesize = 0;
+  tableptr = NULL;
+  flag_mode = FLAG_CHAR;
+  complexprefixes = 0;
+  utf8 = 0;
+  numaliasf = 0;
+  aliasf = NULL;
+  numaliasm = 0;
+  aliasm = NULL;
+  load_config(apath);  
+  int ec = load_tables(tpath);
+  if (ec) {
+    /* error condition - what should we do here */
+    fprintf(stderr,"Hash Manager Error : %d\n",ec);
+    fflush(stderr);
+    if (tableptr) {
+      free(tableptr);
+    }
+    tablesize = 0;
+  }
+}
+
+
+HashMgr::~HashMgr()
+{
+  if (tableptr) {
+    // now pass through hash table freeing up everything
+    // go through column by column of the table
+    for (int i=0; i < tablesize; i++) {
+      struct hentry * pt = &tableptr[i];
+      struct hentry * nt = NULL;
+      if (pt) {
+        if (pt->astr && !aliasf) free(pt->astr);
+	if (pt->word) free(pt->word);
+        if (pt->description && !aliasm) free(pt->description);
+
+        pt = pt->next;
+      }
+      while(pt) {
+        nt = pt->next;
+        if (pt->astr && !aliasf) free(pt->astr);
+	if (pt->word) free(pt->word);
+        if (pt->description && !aliasm) free(pt->description);
+        free(pt);
+	pt = nt;
+      }
+    }
+    free(tableptr);
+  }
+  tablesize = 0;
+
+  if (aliasf) {
+    for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
+    free(aliasf);
+    aliasf = NULL;
+    if (aliasflen) {
+      free(aliasflen);
+      aliasflen = NULL;
+    }
+  }
+  if (aliasm) {
+    for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
+    free(aliasm);
+    aliasm = NULL;
+  }  
+}
+
+// lookup a root word in the hashtable
+
+struct hentry * HashMgr::lookup(const char *word) const
+{
+    struct hentry * dp;
+    if (tableptr) {
+       dp = &tableptr[hash(word)];
+       if (dp->word == NULL) return NULL;
+       for (  ;  dp != NULL;  dp = dp->next) {
+          if (strcmp(word,dp->word) == 0) return dp;
+       }
+    }
+    return NULL;
+}
+
+// add a word to the hash table (private)
+
+int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc)
+{
+    char * st = mystrdup(word);
+    if (wl && !st) return 1;
+    if (complexprefixes) {
+        if (utf8) reverseword_utf(st); else reverseword(st);
+    }
+    int i = hash(st);
+    struct hentry * dp = &tableptr[i];
+    if (dp->word == NULL) {
+       dp->wlen = wl;
+       dp->alen = al;
+       dp->word = st;
+       dp->astr = aff;
+       dp->next = NULL;
+       dp->next_homonym = NULL;
+       if (aliasm) {
+            dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
+       } else {
+            dp->description = mystrdup(desc);
+            if (desc && !dp->description) return 1;
+            if (dp->description && complexprefixes) {
+                if (utf8) reverseword_utf(dp->description); else reverseword(dp->description);
+            }
+       }
+    } else {
+       struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
+       if (!hp) return 1;
+       hp->wlen = wl;
+       hp->alen = al;
+       hp->word = st;
+       hp->astr = aff;
+       hp->next = NULL;      
+       hp->next_homonym = NULL;
+       if (aliasm) {
+            hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
+       } else {
+            hp->description = mystrdup(desc);
+            if (desc && !hp->description) return 1;
+            if (dp->description && complexprefixes) {
+                if (utf8) reverseword_utf(hp->description); else reverseword(hp->description);
+            }
+       }
+       while (dp->next != NULL) {
+         if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
+         dp=dp->next;
+       }
+       if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
+       dp->next = hp;
+    }
+    return 0;
+}     
+
+// add a custom dic. word to the hash table (public)
+
+int HashMgr::put_word(const char * word, int wl, char * aff)
+{
+    unsigned short * flags;
+    int al = 0;
+    if (aff) {
+	al = decode_flags(&flags, aff);
+	flag_qsort(flags, 0, al);
+    } else {
+	flags = NULL;
+    }
+    add_word(word, wl, flags, al, NULL);
+    return 0;
+}
+
+int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern)
+{
+    unsigned short * flags;
+    struct hentry * dp = lookup(pattern);
+    if (!dp || !dp->astr) return 1;
+    flags = (unsigned short *) malloc (dp->alen * sizeof(short));
+    memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
+    add_word(word, wl, flags, dp->alen, NULL);
+    return 0;
+}
+
+// walk the hash table entry by entry - null at end
+struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
+{
+  //reset to start
+  if ((col < 0) || (hp == NULL)) {
+    col = -1;
+    hp = NULL;
+  }
+
+  if (hp && hp->next != NULL) {
+    hp = hp->next;
+  } else {
+    col++;
+    hp = (col < tablesize) ? &tableptr[col] : NULL;
+    // search for next non-blank column entry
+    while (hp && (hp->word == NULL)) {
+        col ++;
+        hp = (col < tablesize) ? &tableptr[col] : NULL;
+    }
+    if (col < tablesize) return hp;
+    hp = NULL;
+    col = -1;
+  }
+  return hp;
+}
+
+// load a munched word list and build a hash table on the fly
+int HashMgr::load_tables(const char * tpath)
+{
+  int wl, al;
+  char * ap;
+  char * dp;
+  unsigned short * flags;
+
+  // raw dictionary - munched file
+  FILE * rawdict = fopen(tpath, "r");
+  if (rawdict == NULL) return 1;
+
+  // first read the first line of file to get hash table size */
+  char ts[MAXDELEN];
+  if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
+  mychomp(ts);
+  if ((*ts < '1') || (*ts > '9')) fprintf(stderr, "error - missing word count in dictionary file\n");
+  tablesize = atoi(ts);
+  if (!tablesize) return 4; 
+  tablesize = tablesize + 5 + USERWORD;
+  if ((tablesize %2) == 0) tablesize++;
+
+  // allocate the hash table
+  tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
+  if (! tableptr) return 3;
+  for (int i=0; i<tablesize; i++) tableptr[i].word = NULL;
+
+  // loop through all words on much list and add to hash
+  // table and create word and affix strings
+
+  while (fgets(ts,MAXDELEN-1,rawdict)) {
+    mychomp(ts);
+    // split each line into word and morphological description
+    dp = strchr(ts,'\t');
+
+    if (dp) {
+      *dp = '\0';
+      dp++;
+    } else {
+      dp = NULL;
+    }
+
+    // split each line into word and affix char strings
+    // "\/" signs slash in words (not affix separator)
+    // "/" at beginning of the line is word character (not affix separator)
+    ap = ts;
+    while (ap = strchr(ap,'/')) {
+        if (ap == ts) {
+            ap++;
+            continue;
+        } else if (*(ap - 1) != '\\') break;
+        // replace "\/" with "/"
+        for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
+        
+    }
+
+    if (ap) {
+      *ap = '\0';
+      if (aliasf) {
+        int index = atoi(ap + 1);
+        al = get_aliasf(index, &flags);
+        if (!al) {
+            fprintf(stderr, "error - bad flag vector alias: %s\n", ts);
+            *ap = '\0';
+        }
+      } else {
+        al = decode_flags(&flags, ap + 1);
+        flag_qsort(flags, 0, al);
+      }
+    } else {
+      al = 0;
+      ap = NULL;
+      flags = NULL;
+    }
+
+    wl = strlen(ts);
+
+    // add the word and its index
+    if (add_word(ts,wl,flags,al,dp)) return 5;
+
+  }
+ 
+  fclose(rawdict);
+  return 0;
+}
+
+
+// the hash function is a simple load and rotate
+// algorithm borrowed
+
+int HashMgr::hash(const char * word) const
+{
+    long  hv = 0;
+    for (int i=0; i < 4  &&  *word != 0; i++)
+	hv = (hv << 8) | (*word++);
+    while (*word != 0) {
+      ROTATE(hv,ROTATE_LEN);
+      hv ^= (*word++);
+    }
+    return (unsigned long) hv % tablesize;
+}
+
+int HashMgr::decode_flags(unsigned short ** result, char * flags) {
+    int len;
+    switch (flag_mode) {
+      case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
+        len = strlen(flags);
+        if (len%2 == 1) fprintf(stderr,"error: length of FLAG_LONG flagvector is odd: %s\n", flags);
+        len = len/2;
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        for (int i = 0; i < len; i++) {
+            (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1]; 
+        }
+        break;
+      }
+      case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
+        len = 1;
+        char * src = flags; 
+        unsigned short * dest;
+        char * p;
+        for (p = flags; *p; p++) {
+          if (*p == ',') len++;
+        }
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        dest = *result;
+        for (p = flags; *p; p++) {
+          if (*p == ',') {
+            *dest = (unsigned short) atoi(src);
+            if (*dest == 0) fprintf(stderr, "error: 0 is wrong flag id\n");
+            src = p + 1;
+            dest++;
+          }
+        }
+        *dest = (unsigned short) atoi(src);
+        if (*dest == 0) fprintf(stderr, "error: 0 is wrong flag id\n");
+        break;
+      }    
+      case FLAG_UNI: { // UTF-8 characters
+        w_char w[MAXDELEN/2];
+        len = u8_u16(w, MAXDELEN/2, flags);
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        memcpy(*result, w, len * sizeof(short));
+        break;
+      }
+      default: { // Ispell's one-character flags (erfg -> e r f g)
+        unsigned short * dest;
+        len = strlen(flags);
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        dest = *result;
+        for (unsigned char * p = (unsigned char *) flags; *p; p++) {
+          *dest = (unsigned short) *p;
+          dest++;
+        }
+      }
+    }      
+    return len;
+}
+
+unsigned short HashMgr::decode_flag(const char * f) {
+    unsigned short s = 0;
+    switch (flag_mode) {
+      case FLAG_LONG:
+        s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
+        break;
+      case FLAG_NUM:
+        s = (unsigned short) atoi(f);
+        break;
+      case FLAG_UNI:
+        u8_u16((w_char *) &s, 1, f);
+        break;
+      default:
+        s = (unsigned short) *((unsigned char *)f);
+    }
+    if (!s) fprintf(stderr, "error: 0 is wrong flag id\n");
+    return s;
+}
+
+char * HashMgr::encode_flag(unsigned short f) {
+    unsigned char ch[10];
+    if (f==0) return mystrdup("(NULL)");
+    if (flag_mode == FLAG_LONG) {
+        ch[0] = (unsigned char) (f >> 8);
+        ch[1] = (unsigned char) (f - ((f >> 8) << 8));
+        ch[2] = '\0';
+    } else if (flag_mode == FLAG_NUM) {
+        sprintf((char *) ch, "%d", f);
+    } else if (flag_mode == FLAG_UNI) {
+        u16_u8((char *) &ch, 10, (w_char *) &f, 1);
+    } else {
+        ch[0] = (unsigned char) (f);
+        ch[1] = '\0';
+    }
+    return mystrdup((char *) ch);
+}
+
+// read in aff file and set flag mode
+int  HashMgr::load_config(const char * affpath)
+{
+
+  // io buffers
+  char line[MAXDELEN+1];
+ 
+  // open the affix file
+  FILE * afflst;
+  afflst = fopen(affpath,"r");
+  if (!afflst) {
+    fprintf(stderr,"Error - could not open affix description file %s\n",affpath);
+    return 1;
+  }
+
+    // read in each line ignoring any that do not
+    // start with a known line type indicator
+
+    while (fgets(line,MAXDELEN,afflst)) {
+        mychomp(line);
+
+        /* parse in the try string */
+        if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
+            if (flag_mode != FLAG_CHAR) {
+                fprintf(stderr,"error: duplicate FLAG parameter\n");
+            }
+            if (strstr(line, "long")) flag_mode = FLAG_LONG;
+            if (strstr(line, "num")) flag_mode = FLAG_NUM;
+            if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
+            if (flag_mode == FLAG_CHAR) {
+                fprintf(stderr,"error: FLAG need `num', `long' or `UTF-8' parameter: %s\n", line);
+            }
+        }
+        if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UTF-8")) utf8 = 1;
+
+       if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
+          if (parse_aliasf(line, afflst)) {
+             return 1;
+          }
+       }
+
+       if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
+          if (parse_aliasm(line, afflst)) {
+             return 1;
+          }
+       }
+
+        if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
+        if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
+    }
+    fclose(afflst);
+    return 0;
+}
+
+/* parse in the ALIAS table */
+int  HashMgr::parse_aliasf(char * line, FILE * af)
+{
+   if (numaliasf != 0) {
+      fprintf(stderr,"error: duplicate AF (alias for flag vector) tables used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+       if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                       numaliasf = atoi(piece);
+	               if (numaliasf < 1) {
+                          numaliasf = 0;
+                          aliasf = NULL;
+                          aliasflen = NULL;
+			  fprintf(stderr,"incorrect number of entries in AF table\n");
+			  free(piece);
+                          return 1;
+                       }
+                       aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
+                       aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
+                       if (!aliasf || !aliasflen) {
+                          numaliasf = 0;
+                          if (aliasf) free(aliasf);
+                          if (aliasflen) free(aliasflen);
+                          aliasf = NULL;
+                          aliasflen = NULL;
+                          return 1;
+                       }
+                       np++;
+                       break;
+	             }
+	     default: break;
+          }
+          i++;
+       }
+       free(piece);
+   }
+   if (np != 2) {
+      numaliasf = 0;
+      free(aliasf);
+      free(aliasflen);
+      aliasf = NULL;
+      aliasflen = NULL;
+      fprintf(stderr,"error: missing AF table information\n");
+      return 1;
+   } 
+ 
+   /* now parse the numaliasf lines to read in the remainder of the table */
+   char * nl = line;
+   for (int j=0; j < numaliasf; j++) {
+        if (!fgets(nl,MAXDELEN,af)) return 1;
+        mychomp(nl);
+        tp = nl;
+        i = 0;
+        aliasf[j] = NULL;
+        aliasflen[j] = 0;
+        while ((piece=mystrsep(&tp, 0))) {
+           if (*piece != '\0') {
+               switch(i) {
+                  case 0: {
+		             if (strncmp(piece,"AF",2) != 0) {
+                                 numaliasf = 0;
+                                 free(aliasf);
+                                 free(aliasflen);
+                                 aliasf = NULL;
+                                 aliasflen = NULL;
+		                 fprintf(stderr,"error: AF table is corrupt\n");
+                                 free(piece);
+                                 return 1;
+                             }
+                             break;
+		          }
+                  case 1: {
+                            aliasflen[j] = decode_flags(&(aliasf[j]), piece);
+                            flag_qsort(aliasf[j], 0, aliasflen[j]);
+                            break; 
+                          }
+		  default: break;
+               }
+               i++;
+           }
+           free(piece);
+        }
+	if (!aliasf[j]) {
+             free(aliasf);
+             free(aliasflen);
+             aliasf = NULL;
+             aliasflen = NULL;
+             numaliasf = 0;
+	     fprintf(stderr,"error: AF table is corrupt\n");
+             return 1;
+        }
+   }
+   return 0;
+}
+
+/* parse morph alias definitions */
+int  HashMgr::parse_aliasm(char * line, FILE * af)
+{
+   if (numaliasm != 0) {
+      fprintf(stderr,"error: duplicate AM (aliases for morphological descriptions) tables used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+       if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                       numaliasm = atoi(piece);
+	               if (numaliasm < 1) {
+			  fprintf(stderr,"incorrect number of entries in AM table\n");
+			  free(piece);
+                          return 1;
+                       }
+                       aliasm = (char **) malloc(numaliasm * sizeof(char *));
+                       if (!aliasm) {
+                          numaliasm = 0;
+                          return 1;
+                       }
+                       np++;
+                       break;
+	             }
+	     default: break;
+          }
+          i++;
+       }
+       free(piece);
+   }
+   if (np != 2) {
+      numaliasm = 0;
+      free(aliasm);
+      aliasm = NULL;
+      fprintf(stderr,"error: missing AM alias information\n");
+      return 1;
+   } 
+ 
+   /* now parse the numaliasm lines to read in the remainder of the table */
+   char * nl = line;
+   for (int j=0; j < numaliasm; j++) {
+        if (!fgets(nl,MAXDELEN,af)) return 1;
+        mychomp(nl);
+        tp = nl;
+        i = 0;
+        aliasm[j] = NULL;
+        while ((piece=mystrsep(&tp, 0))) {
+           if (*piece != '\0') {
+               switch(i) {
+                  case 0: {
+		             if (strncmp(piece,"AM",2) != 0) {
+		                 fprintf(stderr,"error: AM table is corrupt\n");
+                                 free(piece);
+                                 numaliasm = 0;
+                                 free(aliasm);
+                                 aliasm = NULL;
+                                 return 1;
+                             }
+                             break;
+		          }
+                  case 1: {
+                            if (complexprefixes) {
+                                if (utf8) reverseword_utf(piece);
+                                    else reverseword(piece);
+                            }
+                            aliasm[j] = mystrdup(piece);
+                            break; }
+		  default: break;
+               }
+               i++;
+           }
+           free(piece);
+        }
+	if (!aliasm[j]) {
+             numaliasm = 0;
+             free(aliasm);
+             aliasm = NULL;
+	     fprintf(stderr,"error: map table is corrupt\n");
+             return 1;
+        }
+   }
+   return 0;
+}
+
+int HashMgr::is_aliasf() {
+    return (aliasf != NULL);
+}
+
+int HashMgr::is_aliasm() {
+    return (aliasm != NULL);
+}
+
+int HashMgr::get_aliasf(int index, unsigned short ** fvec) {
+    if ((index > 0) && (index <= numaliasf)) {
+        *fvec = aliasf[index - 1];
+        return aliasflen[index - 1];
+    }
+    fprintf(stderr,"error: bad flag alias index: %d\n", index);
+    fprintf(stderr,"hiba: %d\n", index);
+    *fvec = NULL;
+    return 0;
+}
+
+char * HashMgr::get_aliasm(int index) {
+    if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
+    fprintf(stderr,"error: bad morph. alias index: %d\n", index);
+    return NULL;
+}
--- a/goldlib/hunspell/hashmgr.hxx
+++ b/goldlib/hunspell/hashmgr.hxx
@ -0,0 +1,51 @@
+#ifndef _HASHMGR_HXX_
+#define _HASHMGR_HXX_
+
+#include <cstdio>
+#include "htypes.hxx"
+
+enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
+
+class HashMgr
+{
+  int             tablesize;
+  struct hentry * tableptr;
+  int	          userword;
+  flag            flag_mode;
+  int             complexprefixes;
+  int             utf8;
+  int                 numaliasf; // flag vector `compression' with aliases
+  unsigned short **   aliasf;
+  unsigned short *    aliasflen;
+  int                 numaliasm; // morphological desciption `compression' with aliases
+  char **             aliasm;
+
+
+public:
+  HashMgr(const char * tpath, const char * apath);
+  ~HashMgr();
+
+  struct hentry * lookup(const char *) const;
+  int hash(const char *) const;
+  struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
+
+  int put_word(const char * word, int wl, char * ap);
+  int put_word_pattern(const char * word, int wl, const char * pattern);
+  int decode_flags(unsigned short ** result, char * flags);
+  unsigned short        decode_flag(const char * flag);
+  char *                encode_flag(unsigned short flag);
+  int is_aliasf();
+  int is_aliasm();
+  int get_aliasf(int index, unsigned short ** fvec);
+  char * get_aliasm(int index);
+  
+private:
+  int load_tables(const char * tpath);
+  int add_word(const char * word, int wl, unsigned short * ap, int al, const char * desc);
+  int load_config(const char * affpath);
+  int parse_aliasf(char * line, FILE * af);
+  int parse_aliasm(char * line, FILE * af);
+
+};
+
+#endif
--- a/goldlib/hunspell/htypes.hxx
+++ b/goldlib/hunspell/htypes.hxx
@ -0,0 +1,25 @@
+#ifndef _HTYPES_HXX_
+#define _HTYPES_HXX_
+
+#define MAXDELEN    8192
+
+#define ROTATE_LEN   5
+
+#define ROTATE(v,q) \
+   (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
+
+// approx. number  of user defined words
+#define USERWORD 1000
+
+struct hentry
+{
+  short    wlen;
+  short    alen;
+  char *   word;
+  unsigned short * astr;
+  struct   hentry * next;
+  struct   hentry * next_homonym;
+  char *   description;
+};
+
+#endif
--- a/goldlib/hunspell/hunspell.cxx
+++ b/goldlib/hunspell/hunspell.cxx
--- a/goldlib/hunspell/hunspell.hxx
+++ b/goldlib/hunspell/hunspell.hxx
@ -0,0 +1,143 @@
+#include "hashmgr.hxx"
+#include "affixmgr.hxx"
+#include "suggmgr.hxx"
+#include "csutil.hxx"
+#include "langnum.hxx"
+
+#define NOCAP   0
+#define INITCAP 1
+#define ALLCAP  2
+#define HUHCAP  3
+#define HUHINITCAP  4
+
+#define MAXSUGGESTION 15
+#define MAXSHARPS 5
+
+#ifdef W32
+#define DLLTEST2_API __declspec(dllexport)
+#endif
+
+#ifndef _MYSPELLMGR_HXX_
+#define _MYSPELLMGR_HXX_
+
+#ifdef W32
+class DLLTEST2_API Hunspell
+#else
+class Hunspell
+#endif
+{
+  AffixMgr*       pAMgr;
+  HashMgr*        pHMgr;
+  SuggestMgr*     pSMgr;
+  char *          encoding;
+  struct cs_info * csconv;
+  struct unicode_info2 * utfconv;
+  int             langnum;
+  int             utf8;
+  int             complexprefixes;
+  char**          wordbreak;
+
+/* XXX not stateless variables for compound handling */
+  char *	  prevroot;
+  int             prevcompound;
+
+/* forbidden_compound:
+ * 0 = not forbidden
+ * 1 = forbidden
+ * 2 = forbidden compound (written without dash in Hungarian)
+ */
+  int		  forbidden_compound;
+  
+
+public:
+
+  /* Hunspell(aff, dic) - constructor of Hunspell class
+   * input: path of affix file and dictionary file
+   */
+  
+  Hunspell(const char * affpath, const char * dpath);
+
+  ~Hunspell();
+
+  /* spell(word) - spellcheck word
+   * output: 0 = bad word, not 0 = good word
+   */
+   
+  int spell(const char *);
+
+  /* suggest(suggestions, word) - search suggestions
+   * input: pointer to an array of strings pointer and the (bad) word
+   *   array of strings pointer (here *slst) may not be initialized
+   * output: number of suggestions in string array, and suggestions in
+   *   a newly allocated array of strings (*slts will be NULL when number
+   *   of suggestion equals 0.)
+   */
+
+  int suggest(char*** slst, const char * word);
+
+  /* handling custom dictionary */
+
+  int put_word(const char * word);
+
+  /* suffix is an affix flag string, similarly in dictionary files */
+  
+  int put_word_suffix(const char * word, const char * suffix);
+  
+  /* pattern is a sample dictionary word 
+   * put word into custom dictionary with affix flags of pattern word
+   */
+  
+  int put_word_pattern(const char * word, const char * pattern);
+
+  /* other */
+
+  char * get_dic_encoding();
+  const char * get_wordchars();
+  unsigned short * get_wordchars_utf16(int * len);
+  struct cs_info * get_csconv();
+  struct unicode_info2 * get_utf_conv();
+  const char * get_version();
+
+  /* experimental functions */
+
+  /* morphological analysis */
+  
+  char * morph(const char * word);
+  int analyze(char*** out, const char *word);
+
+  char * morph_with_correction(const char * word);
+
+  /* stemmer function */
+  
+  int stem(char*** slst, const char * word);
+
+  /* spec. suggestions */
+  int suggest_auto(char*** slst, const char * word);
+  int suggest_pos_stems(char*** slst, const char * word);
+  char * get_possible_root();
+
+  /* not threadsafe functions for Hunspell command line API */
+  
+  char * get_prevroot();
+  int get_prevcompound();
+  int get_forbidden_compound();
+
+private:
+   int    cleanword(char *, const char *, int * pcaptype, int * pabbrev);
+   int    cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev);
+   void   mkinitcap(char *);
+   int    mkinitcap2(char * p, w_char * u, int nc);
+   int    mkinitsmall2(char * p, w_char * u, int nc);
+   void   mkallcap(char *);
+   int    mkallcap2(char * p, w_char * u, int nc);
+   void   mkallsmall(char *);
+   int    mkallsmall2(char * p, w_char * u, int nc);
+   struct hentry * check(const char *);
+   char * sharps_u8_l1(char * dest, char * source);
+   hentry * spellsharps(char * base, char *, int, int, char * tmp);
+   int    is_keepcase(const hentry * rv);
+   int    insert_sug(char ***slst, char * word, int *ns);
+
+};
+
+#endif
--- a/goldlib/hunspell/langnum.hxx
+++ b/goldlib/hunspell/langnum.hxx
@ -0,0 +1,37 @@
+#ifndef _LANGNUM_HXX_
+#define _LANGNUM_HXX_
+
+/*
+ language numbers for language specific codes
+ see http://l10n.openoffice.org/languages.html
+*/
+
+enum {
+LANG_az=100, // custom number
+LANG_bg=41,
+LANG_ca=37,
+LANG_cs=42,
+LANG_da=45,
+LANG_de=49,
+LANG_el=30,
+LANG_en=01,
+LANG_es=34,
+LANG_eu=10,
+LANG_fr=02,
+LANG_gl=38,
+LANG_hr=78,
+LANG_hu=36,
+LANG_it=39,
+LANG_la=99, // custom number
+LANG_lv=101, // custom number
+LANG_nl=31,
+LANG_pl=48,
+LANG_pt=03,
+LANG_ru=07,
+LANG_sv=50,
+LANG_tr=90,
+LANG_uk=80,
+LANG_xx=999
+};
+
+#endif
--- a/goldlib/hunspell/license.hun
+++ b/goldlib/hunspell/license.hun
@ -0,0 +1,57 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s):
+ * David Einstein 
+ * Davide Prina
+ * Giuseppe Modugno 
+ * Gianluca Turconi
+ * Simon Brouwer
+ * Noll János
+ * Bíró Árpád
+ * Goldman Eleonóra
+ * Sarlós Tamás
+ * Bencsáth Boldizsár
+ * Halácsy Péter
+ * Dvornik László
+ * Gefferth András
+ * Nagy Viktor
+ * Varga Dániel
+ * Chris Halls
+ * Rene Engelhard
+ * Bram Moolenaar
+ * Dafydd Jones
+ * Harri Pitkänen
+ * András Tímár
+ * Tor Lillqvist
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
--- a/goldlib/hunspell/license.mys
+++ b/goldlib/hunspell/license.mys
@ -0,0 +1,61 @@
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ *    such.  Binary redistributions based on modified source code
+ *    must be clearly marked as modified versions in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *
+ * NOTE: A special thanks and credit goes to Geoff Kuenning
+ * the creator of ispell.  MySpell's affix algorithms were
+ * based on those of ispell which should be noted is
+ * copyright Geoff Kuenning et.al. and now available
+ * under a BSD style license. For more information on ispell
+ * and affix compression in general, please see:
+ * http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
+ * (the home page for ispell)
+ *
+ * An almost complete rewrite  of MySpell for use by  
+ * the Mozilla project has been developed by David Einstein 
+ * (Deinst@world.std.com).  David and I are now 
+ * working on parallel development tracks to help 
+ * our respective projects (Mozilla and OpenOffice.org 
+ * and we will maintain full affix file and dictionary 
+ * file compatibility and work on merging our versions 
+ * of MySpell back into a single tree. David has been 
+ * a significant help in improving MySpell.
+ * 
+ * Special thanks also go to La'szlo' Ne'meth 
+ * <nemethl@gyorsposta.hu> who is the author of the 
+ * Hungarian dictionary and who developed and contributed 
+ * the code to support compound words in MySpell 
+ * and fixed numerous problems with the encoding 
+ * case conversion tables.
+ *
+ */
--- a/goldlib/hunspell/suggmgr.cxx
+++ b/goldlib/hunspell/suggmgr.cxx
--- a/goldlib/hunspell/suggmgr.hxx
+++ b/goldlib/hunspell/suggmgr.hxx
@ -0,0 +1,87 @@
+#ifndef _SUGGESTMGR_HXX_
+#define _SUGGESTMGR_HXX_
+
+#define MAXSWL 100
+#define MAXSWUTF8L (MAXSWL * 4)
+#define MAX_ROOTS 50
+#define MAX_WORDS 200
+#define MAX_GUESS 200
+#define MAXNGRAMSUGS 5
+
+#define MINTIMER 500
+#define MAXPLUSTIMER 500
+
+#define NGRAM_IGNORE_LENGTH 0
+#define NGRAM_LONGER_WORSE  1
+#define NGRAM_ANY_MISMATCH  2
+
+#include "atypes.hxx"
+#include "affixmgr.hxx"
+#include "hashmgr.hxx"
+#include "langnum.hxx"
+#include <time.h>
+
+enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
+
+class SuggestMgr
+{
+  char *          ctry;
+  int             ctryl;
+  w_char *        ctry_utf;
+
+  AffixMgr*       pAMgr;
+  int             maxSug;
+  struct cs_info * csconv;
+  struct unicode_info2 * utfconv;
+  int             utf8;
+  int             nosplitsugs;
+  int             maxngramsugs;
+  int             complexprefixes;
+
+
+public:
+  SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
+  ~SuggestMgr();
+
+  int suggest(char*** slst, const char * word, int nsug);
+  int ngsuggest(char ** wlst, char * word, HashMgr* pHMgr);
+  int suggest_auto(char*** slst, const char * word, int nsug);
+  int suggest_stems(char*** slst, const char * word, int nsug);
+  int suggest_pos_stems(char*** slst, const char * word, int nsug);
+
+  char * suggest_morph(const char * word);
+  char * suggest_morph_for_spelling_error(const char * word);
+
+private:
+   int check(const char *, int, int, int *, time_t *);
+   int check_forbidden(const char *, int);
+
+   int replchars(char**, const char *, int, int);
+   int doubledsyllable(char**, const char *, int, int);
+   int forgotchar(char **, const char *, int, int);
+   int swapchar(char **, const char *, int, int);
+   int extrachar(char **, const char *, int, int);
+   int badchar(char **, const char *, int, int);
+   int twowords(char **, const char *, int, int);
+   int fixstems(char **, const char *, int);
+
+   int forgotchar_utf(char**, const w_char *, int wl, int, int);
+   int extrachar_utf(char**, const w_char *, int wl, int, int);
+   int badchar_utf(char **, const w_char *, int wl, int, int);
+   int swapchar_utf(char **, const w_char *, int wl, int, int);
+
+   int mapchars(char**, const char *, int, int);
+   int map_related(const char *, int, char ** wlst, int, const mapentry*, int, int *, time_t *);
+   int map_related_utf(w_char *, int, int, char ** wlst, int, const mapentry*, int, int *, time_t *);
+   int ngram(int n, char * s1, const char * s2, int uselen);
+   int mystrlen(const char * word);
+   int equalfirstletter(char * s1, const char * s2);
+   int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
+   void bubblesort( char ** rwd, int * rsc, int n);
+   void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
+   int lcslen(const char * s, const char* s2);
+
+};
+
+#endif
+
--- a/goldlib/hunspell/utf_info.cxx
+++ b/goldlib/hunspell/utf_info.cxx