diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/unit/liang.cc | 165 |
1 files changed, 96 insertions, 69 deletions
diff --git a/test/unit/liang.cc b/test/unit/liang.cc index ee3f8e9e..f25ce23e 100644 --- a/test/unit/liang.cc +++ b/test/unit/liang.cc @@ -1,79 +1,106 @@ +/* + * File: liang.cc + * + * Copyright 2012-2016 Sebastian Geerken <sgeerken@dillo.org> + * Copyright 2012-2013 Johannes Hofmann <Johannes.Hofmann@gmx.de> + * Copyright 2023 Rodrigo Arias Mallo <rodarima@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/* Tests the hyphenation of words in different languages with the Liang + * algorithm. The hyphenator requires the .pat pattern files which can + * be downloaded from CTAN. */ + #include <unistd.h> +#include <stdio.h> +#include <errno.h> + +#include "dw/fltkcore.hh" +#include "dw/hyphenator.hh" + +dw::fltk::FltkPlatform *platform; + +void hyph(dw::Hyphenator *h, const char *word, const char *parts) +{ + int p = 0; + char buf[1024]; + int numBreaks; + int *breakPos = h->hyphenateWord(platform, word, &numBreaks); + memset(buf, 0, 1024); + for (int i = 0; i < numBreaks + 1; i++) { + int start = (i == 0 ? 0 : breakPos[i - 1]); + int end = (i == numBreaks ? strlen (word) : breakPos[i]); + if (i != 0) + buf[p++] = '-'; + for (int j = start; j < end; j++) + buf[p++] = word[j]; + } + + if (strcmp(parts, buf) != 0) { + fprintf(stderr, "mismatch input=%s output=%s expected=%s\n", + word, buf, parts); + exit(1); + } -#include "../dw/fltkcore.hh" -#include "../dw/hyphenator.hh" + printf("%s\n", buf); -void hyphenateWord (dw::core::Platform *p, const char *lang, const char *word) + if (breakPos) + free(breakPos); +} + +dw::Hyphenator get_hyphenator(const char *path) { - dw::Hyphenator *h = dw::Hyphenator::getHyphenator (lang); - - int numBreaks; - int *breakPos = h->hyphenateWord (p, word, &numBreaks); - for (int i = 0; i < numBreaks + 1; i++) { - if (i != 0) - printf (" \xc2\xad "); - int start = (i == 0 ? 0 : breakPos[i - 1]); - int end = (i == numBreaks ? strlen (word) : breakPos[i]); - for (int j = start; j < end; j++) - putchar (word[j]); - } - putchar ('\n'); - if (breakPos) - free (breakPos); + if (access(path, F_OK) != 0) { + fprintf(stderr, "cannot access %s file: %s", path, + strerror(errno)); + exit(1); + } + + return dw::Hyphenator(path, "", 512); } -int main (int argc, char *argv[]) +void hyph_en_us() { - dw::fltk::FltkPlatform p; - - if (argc > 1) { - // Usage: liang [-l LANG] WORD ... - - const char *lang = "de"; - char opt; - - while ((opt = getopt(argc, argv, "l:")) != -1) { - switch (opt) { - case 'l': - lang = optarg; - break; - } - } - - for (int i = optind; i < argc; i++) - hyphenateWord (&p, lang, argv[i]); - - } else { - hyphenateWord (&p, "de", "..."); - hyphenateWord (&p, "de", "Jahrhundertroman"); - hyphenateWord (&p, "de", "JAHRHUNDERTROMAN"); - hyphenateWord (&p, "de", "„Jahrhundertroman“"); - hyphenateWord (&p, "de", "währenddessen"); - hyphenateWord (&p, "de", "„währenddessen“"); - hyphenateWord (&p, "de", "Ückendorf"); - hyphenateWord (&p, "de", "über"); - hyphenateWord (&p, "de", "aber"); - hyphenateWord (&p, "de", "Ackermann"); - hyphenateWord (&p, "de", "„Ackermann“"); - hyphenateWord (&p, "de", "entscheidet."); - hyphenateWord (&p, "de", "Grundstücksverkehrsgenehmigungszuständigkeits" - "übertragungsverordnung"); - hyphenateWord (&p, "de", "„Grundstücksverkehrsgenehmigungszuständigkeits" - "übertragungsverordnung“"); - hyphenateWord (&p, "de", "Grundstücksverkehrsgenehmigungszuständigkeit"); - hyphenateWord (&p, "de", - "„Grundstücksverkehrsgenehmigungszuständigkeit“"); - hyphenateWord (&p, "de", - "(6R,7R)-7-[2-(2-Amino-4-thiazolyl)-glyoxylamido]-3-" - "(2,5-dihydro-6-hydroxy-2-methyl-5-oxo-1,2,4-triazin-3-yl-" - "thiomethyl)-8-oxo-5-thia-1-azabicyclo[4.2.0]oct-2-en-2-" - "carbonsäure-7²-(Z)-(O-methyloxim)"); - hyphenateWord (&p, "de", "Abtei-Stadt"); - hyphenateWord (&p, "de", "Nordrhein-Westfalen"); - hyphenateWord (&p, "de", "kurz\xc2\xa0und\xc2\xa0knapp"); - hyphenateWord (&p, "de", "weiß"); - hyphenateWord (&p, "de", "www.dillo.org"); - } + dw::Hyphenator h = get_hyphenator(CUR_SRC_DIR "/hyph-en-us.pat"); + + hyph(&h, "supercalifragilisticexpialidocious", "su-per-cal-ifrag-ilis-tic-ex-pi-ali-do-cious"); + hyph(&h, "incredible", "in-cred-i-ble"); + hyph(&h, "hyphenation", "hy-phen-ation"); + hyph(&h, "...", "..."); +} + +void hyph_de() +{ + dw::Hyphenator h = get_hyphenator(CUR_SRC_DIR "/hyph-de.pat"); + + hyph(&h, "...", "..."); + hyph(&h, "weiß", "weiß"); + hyph(&h, "Ackermann", "Acker-mann"); + hyph(&h, "Grundstücksverkehrsgenehmigungszuständigkeits", + "Grund-stücks-ver-kehrs-ge-neh-mi-gungs-zu-stän-dig-keits"); + hyph(&h, "Donaudampfschifffahrtskapitänsmützenknopf", + "Do-nau-dampf-schiff-fahrts-ka-pi-täns-müt-zen-knopf"); + hyph(&h, "www.dillo.org", "www.dil-lo.org"); +} + +int main(void) +{ + platform = new dw::fltk::FltkPlatform(); + + hyph_en_us(); + hyph_de(); return 0; } |