xref: /Lucene/gradle/generation/snowball/snowball.patch (revision 8ac26737913d0c1555019e93bc6bf7db1ab9047e)
1diff --git a/algorithms/armenian.sbl b/algorithms/armenian.sbl
2new file mode 100644
3index 0000000..3a9a926
4--- /dev/null
5+++ b/algorithms/armenian.sbl
6@@ -0,0 +1,301 @@
7+stringescapes {}
8+
9+stringdef a    '{U+0561}' // 531
10+stringdef b    '{U+0562}' // 532
11+stringdef g    '{U+0563}' // 533
12+stringdef d    '{U+0564}' // 534
13+stringdef ye   '{U+0565}' // 535
14+stringdef z    '{U+0566}' // 536
15+stringdef e    '{U+0567}' // 537
16+stringdef y    '{U+0568}' // 538
17+stringdef dt   '{U+0569}' // 539
18+stringdef zh   '{U+056A}' // 53A
19+stringdef i    '{U+056B}' // 53B
20+stringdef l    '{U+056C}' // 53C
21+stringdef kh   '{U+056D}' // 53D
22+stringdef ts   '{U+056E}' // 53E
23+stringdef k    '{U+056F}' // 53F
24+stringdef h    '{U+0570}' // 540
25+stringdef dz   '{U+0571}' // 541
26+stringdef gh   '{U+0572}' // 542
27+stringdef djch '{U+0573}' // 543
28+stringdef m    '{U+0574}' // 544
29+stringdef j    '{U+0575}' // 545
30+stringdef n    '{U+0576}' // 546
31+stringdef sh   '{U+0577}' // 547
32+stringdef vo   '{U+0578}' // 548
33+stringdef ch   '{U+0579}' // 549
34+stringdef p    '{U+057A}' // 54A
35+stringdef dj   '{U+057B}' // 54B
36+stringdef r    '{U+057C}' // 54C
37+stringdef s    '{U+057D}' // 54D
38+stringdef v    '{U+057E}' // 54E
39+stringdef t    '{U+057F}' // 54F
40+stringdef r'   '{U+0580}' // 550
41+stringdef c    '{U+0581}' // 551
42+stringdef u    '{U+0582}' // 552                  //vjun
43+stringdef bp   '{U+0583}' // 553
44+stringdef q    '{U+0584}' // 554
45+stringdef ev   '{U+0587}'
46+stringdef o    '{U+0585}' // 555
47+stringdef f    '{U+0586}' // 556
48+
49+routines ( mark_regions R2
50+           adjective
51+           verb
52+           noun
53+           ending
54+)
55+
56+externals ( stem )
57+
58+integers ( pV p2 )
59+
60+groupings ( v )
61+
62+define v '{a}{e}{i}{o}{u}{ye}{vo}{y}'
63+
64+define mark_regions as (
65+
66+    $pV = limit
67+    $p2 = limit
68+    do (
69+        gopast v  setmark pV  gopast non-v
70+        gopast v  gopast non-v  setmark p2
71+       )
72+)
73+
74+backwardmode (
75+
76+    define R2 as $p2 <= cursor
77+
78+    define adjective as (
79+        [substring] among (
80+            '{b}{a}{r'}'
81+            '{p}{ye}{s}'
82+            '{vo}{r'}{e}{n}'
83+            '{vo}{v}{i}{n}'
84+            '{a}{k}{i}'
85+            '{l}{a}{j}{n}'
86+            '{r'}{vo}{r'}{d}'
87+            '{ye}{r'}{vo}{r'}{d}'
88+            '{a}{k}{a}{n}'
89+            '{a}{l}{i}'
90+            '{k}{vo}{t}'
91+            '{ye}{k}{ye}{n}'
92+            '{vo}{r'}{a}{k}'
93+            '{ye}{gh}'
94+            '{v}{vo}{u}{n}'
95+            '{ye}{r'}{ye}{n}'
96+            '{a}{r'}{a}{n}'
97+            '{ye}{n}'
98+            '{a}{v}{ye}{t}'
99+            '{g}{i}{n}'
100+            '{i}{v}'
101+            '{a}{t}'
102+            '{i}{n}'
103+
104+              (delete)
105+        )
106+    )
107+
108+    define verb as (
109+        [substring] among (
110+            '{vo}{u}{m}'
111+            '{v}{vo}{u}{m}'
112+            '{a}{l}{vo}{u}'
113+            '{ye}{l}{vo}{u}'
114+            '{v}{ye}{l}'
115+            '{a}{n}{a}{l}'
116+            '{ye}{l}{vo}{u}{c}'
117+            '{a}{l}{vo}{u}{c}'
118+            '{y}{a}{l}'
119+            '{y}{ye}{l}'
120+            '{a}{l}{vo}{v}'
121+            '{ye}{l}{vo}{v}'
122+            '{a}{l}{i}{s}'
123+            '{ye}{l}{i}{s}'
124+            '{ye}{n}{a}{l}'
125+            '{a}{c}{n}{a}{l}'
126+            '{ye}{c}{n}{ye}{l}'
127+            '{c}{n}{ye}{l}'
128+            '{n}{ye}{l}'
129+            '{a}{t}{ye}{l}'
130+            '{vo}{t}{ye}{l}'
131+            '{k}{vo}{t}{ye}{l}'
132+            '{t}{ye}{l}'
133+            '{v}{a}{ts}'
134+            '{ye}{c}{v}{ye}{l}'
135+            '{a}{c}{v}{ye}{l}'
136+            '{ye}{c}{i}{r'}'
137+            '{a}{c}{i}{r'}'
138+            '{ye}{c}{i}{n}{q}'
139+            '{a}{c}{i}{n}{q}'
140+            '{v}{ye}{c}{i}{r'}'
141+            '{v}{ye}{c}{i}{n}{q}'
142+            '{v}{ye}{c}{i}{q}'
143+            '{v}{ye}{c}{i}{n}'
144+            '{a}{c}{r'}{i}{r'}'
145+            '{a}{c}{r'}{ye}{c}'
146+            '{a}{c}{r'}{i}{n}{q}'
147+            '{a}{c}{r'}{i}{q}'
148+            '{a}{c}{r'}{i}{n}'
149+            '{ye}{c}{i}{q}'
150+            '{a}{c}{i}{q}'
151+            '{ye}{c}{i}{n}'
152+            '{a}{c}{i}{n}'
153+            '{a}{c}{a}{r'}'
154+            '{a}{c}{a}{v}'
155+            '{a}{c}{a}{n}{q}'
156+            '{a}{c}{a}{q}'
157+            '{a}{c}{a}{n}'
158+            '{v}{ye}{c}{i}'
159+            '{a}{c}{r'}{i}'
160+            '{ye}{c}{a}{r'}'
161+            '{ye}{c}{a}{v}'
162+            '{c}{a}{n}{q}'
163+            '{c}{a}{q}'
164+            '{c}{a}{n}'
165+            '{a}{c}{a}'
166+            '{a}{c}{i}'
167+            '{ye}{c}{a}'
168+            '{ch}{ye}{l}'
169+            '{ye}{c}{i}'
170+            '{a}{r'}'
171+            '{a}{v}'
172+            '{a}{n}{q}'
173+            '{a}{q}'
174+            '{a}{n}'
175+            '{a}{l}'
176+            '{ye}{l}'
177+            '{ye}{c}'
178+            '{a}{c}'
179+            '{v}{ye}'
180+            '{a}'
181+
182+                (delete)
183+        )
184+    )
185+
186+    define noun as (
187+        [substring] among (
188+            '{a}{ts}{vo}'
189+            '{a}{n}{a}{k}'
190+            '{a}{n}{o}{c}'
191+            '{a}{r'}{a}{n}'
192+            '{a}{r'}{q}'
193+            '{p}{a}{n}'
194+            '{s}{t}{a}{n}'
195+            '{ye}{gh}{e}{n}'
196+            '{ye}{n}{q}'
197+            '{i}{k}'
198+            '{i}{ch}'
199+            '{i}{q}'
200+            '{m}{vo}{u}{n}{q}'
201+            '{j}{a}{k}'
202+            '{j}{vo}{u}{n}'
203+            '{vo}{n}{q}'
204+            '{vo}{r'}{d}'
205+            '{vo}{c}'
206+            '{ch}{ye}{q}'
207+            '{v}{a}{ts}{q}'
208+            '{v}{vo}{r'}'
209+            '{a}{v}{vo}{r'}'
210+            '{vo}{u}{dt}{j}{vo}{u}{n}'
211+            '{vo}{u}{k}'
212+            '{vo}{u}{h}{i}'
213+            '{vo}{u}{j}{dt}'
214+            '{vo}{u}{j}{q}'
215+            '{vo}{u}{s}{t}'
216+            '{vo}{u}{s}'
217+            '{c}{i}'
218+            '{a}{l}{i}{q}'
219+            '{a}{n}{i}{q}'
220+            '{i}{l}'
221+            '{i}{ch}{q}'
222+            '{vo}{u}{n}{q}'
223+            '{g}{a}{r'}'
224+            '{vo}{u}'
225+            '{a}{k}'
226+            '{a}{n}'
227+            '{q}'
228+
229+                (delete)
230+        )
231+    )
232+
233+    define ending as (
234+        [substring] R2 among (
235+            '{n}{ye}{r'}{y}'
236+            '{n}{ye}{r'}{n}'
237+            '{n}{ye}{r'}{i}'
238+            '{n}{ye}{r'}{d}'
239+            '{ye}{r'}{i}{c}'
240+            '{n}{ye}{r'}{i}{c}'
241+            '{ye}{r'}{i}'
242+            '{ye}{r'}{d}'
243+            '{ye}{r'}{n}'
244+            '{ye}{r'}{y}'
245+            '{n}{ye}{r'}{i}{n}'
246+            '{vo}{u}{dt}{j}{a}{n}{n}'
247+            '{vo}{u}{dt}{j}{a}{n}{y}'
248+            '{vo}{u}{dt}{j}{a}{n}{s}'
249+            '{vo}{u}{dt}{j}{a}{n}{d}'
250+            '{vo}{u}{dt}{j}{a}{n}'
251+            '{ye}{r'}{i}{n}'
252+            '{i}{n}'
253+            '{s}{a}'
254+            '{vo}{dj}'
255+            '{i}{c}'
256+            '{ye}{r'}{vo}{v}'
257+            '{n}{ye}{r'}{vo}{v}'
258+            '{ye}{r'}{vo}{u}{m}'
259+            '{n}{ye}{r'}{vo}{u}{m}'
260+            '{vo}{u}{n}'
261+            '{vo}{u}{d}'
262+            '{v}{a}{n}{s}'
263+            '{v}{a}{n}{y}'
264+            '{v}{a}{n}{d}'
265+            '{a}{n}{y}'
266+            '{a}{n}{d}'
267+            '{v}{a}{n}'
268+            '{vo}{dj}{y}'
269+            '{vo}{dj}{s}'
270+            '{vo}{dj}{d}'
271+            '{vo}{c}'
272+            '{vo}{u}{c}'
273+            '{vo}{dj}{i}{c}'
274+            '{c}{i}{c}'
275+            '{v}{i}{c}'
276+            '{v}{i}'
277+            '{v}{vo}{v}'
278+            '{vo}{v}'
279+            '{a}{n}{vo}{v}'
280+            '{a}{n}{vo}{u}{m}'
281+            '{v}{a}{n}{i}{c}'
282+            '{a}{m}{b}'
283+            '{a}{n}'
284+            '{n}{ye}{r'}'
285+            '{ye}{r'}'
286+            '{v}{a}'
287+            '{y}'
288+            '{n}'
289+            '{d}'
290+            '{c}'
291+            '{i}'
292+
293+                (delete)
294+        )
295+    )
296+)
297+
298+define stem as (
299+
300+    do mark_regions
301+    backwards setlimit tomark pV for (
302+        do ending
303+        do verb
304+        do adjective
305+        do noun
306+    )
307+)
308diff --git a/algorithms/estonian.sbl b/algorithms/estonian.sbl
309new file mode 100644
310index 0000000..0cc2b60
311--- /dev/null
312+++ b/algorithms/estonian.sbl
313@@ -0,0 +1,258 @@
314+/* Estonian stemmer version 1.3
315+
316+Made by Linda Freienthal in January 2019.
317+
318+*/
319+
320+routines (
321+	mark_regions
322+	LONGV
323+	special_noun_endings
324+	case_ending
325+	emphasis
326+	plural_three_first_cases
327+	remove_double_kpt
328+	double
329+	undouble
330+	i_plural
331+	degrees
332+	substantive
333+	verb_exceptions
334+	verb
335+	nu
336+)
337+
338+stringescapes {}
339+stringdef a" '{U+00E4}' //a-umlaut ä
340+stringdef o" '{U+00F6}' //o-umlaut ö
341+stringdef o' '{U+00F5}' //o with tilde õ
342+stringdef u" '{U+00FC}' //u-umlaut ü
343+stringdef s" '{U+0161}' //s with caron š
344+stringdef z" '{U+017E}' //z with caron ž
345+
346+externals ( stem )
347+booleans ( is_verb )
348+integers ( p1 )
349+groupings ( V1 RV KI GI)
350+
351+define V1 'aeiou{o'}{a"}{o"}{u"}'
352+define RV 'aeiuo'
353+define KI 'kptgbdshf{s"}z{z"}'
354+define GI 'cjlmnqrvwxaeiou{o'}{a"}{o"}{u"}'
355+define mark_regions as (
356+
357+    $p1 = limit
358+
359+    goto V1 gopast non-V1  setmark p1
360+)
361+
362+
363+backwardmode (
364+
365+	define emphasis as (
366+	    setlimit tomark p1 for ([substring])
367+	    test hop 4 //kingi -> kingi
368+            among(
369+		'gi' ((GI and not LONGV) delete) //jooksemegi -> jookseme, bioloogi -> bioloogi
370+		'ki' (KI delete) //kookki -> kook
371+	    )
372+
373+	)
374+
375+	define verb as (
376+	    setlimit tomark p1 for ([substring])
377+            among(
378+		'nuksin' 'nuksime' 'nuksid' 'nuksite' (delete) //seleta-nuksite
379+		'ksin' 'ksid' 'ksime' 'ksite' (delete) //personal conditional: rõõmusta-ksin
380+		'mata' (delete)
381+		'takse' 'dakse' (delete) //impersonal: laul-dakse, luba-takse
382+		'taks' 'daks' (delete) 	//impersonal conditional: laul-daks, saade-taks
383+		'akse' (<-'a') 		//impersonal: tulla-kse, süüa-kse, teha-kse, püüt-akse, leita-kse
384+		'sime' (delete) 	//pl1pst: saat-sime
385+		'site' (delete) 	//pl2pst: saat-site
386+		'sin' (delete) 		//sg1pst: laul-sin, saat-sin
387+		'me' (V1 delete) 	//pl1prs: laula-me, tule-me
388+		'da' (V1 delete) 	//da-infinitive: luba-da
389+		'n' (V1 delete) 	//sg1prs: kirjuta-n
390+		'b' (V1 delete) 	//sg3prs: laula-b
391+	    )
392+	    set is_verb
393+	)
394+
395+	define LONGV as
396+		among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}' '{u"}{u"}' '{o'}{o'}')
397+
398+	define i_plural as (
399+	    setlimit tomark p1 for ([substring])
400+	    among(
401+		'i' (RV) //raamatui -> raamatu, lapsikui -> lapsiku
402+	    )
403+	    delete
404+	)
405+
406+	define special_noun_endings as (
407+	    setlimit tomark p1 for ([substring])
408+	    among(
409+		'lasse' (<- 'lase') //teadlasse -> teadlase
410+		'last' (<- 'lase') //teadlast -> teadlase
411+		'lane' (<- 'lase') //teadlane -> teadlase
412+		'lasi'(<- 'lase') //teadlasi -> teadlase
413+		'misse' (<- 'mise') //tegemisse -> tegemise
414+		'mist' (<- 'mise') //kasutamist -> kasutamise
415+		'mine' (<- 'mise') //tegemine -> tegemise
416+		'misi' (<- 'mise') //kasutamisi -> kasutamise
417+		'lisse' (<- 'lise') //rohelisse -> rohelise
418+		'list' (<- 'lise') //tavalist -> tavalise
419+		'line' (<- 'lise') //roheline -> rohelise
420+		'lisi' (<- 'lise') //tavalisi -> tavalise
421+	     )
422+
423+	)
424+	define case_ending as (
425+	    setlimit tomark p1 for ([substring])
426+            among(
427+		'sse' (RV or LONGV) //illative: saapa-sse, tegemisse -> tegemisse
428+		'st' (RV or LONGV) //elative: saapa-st, rohelist -> rohelist
429+		'le' (RV or LONGV) //allative: raamatu-le
430+		'lt' (RV or LONGV) //ablative: raamatu-lt
431+		'ga' (RV or LONGV) //komitatiive: õpetaja-ga
432+		'ks' (RV or LONGV) //translative: õpetaja-ks
433+		'ta' (RV or LONGV) //abessive and da-infinitive: õpetaja-ta and hüpa-ta
434+		't' //partitiiv, raamatu-t and kapsas-t
435+		's'  (RV or LONGV)//inessive and sg3pst: raamatu-s and sõiti-s
436+		'l'  (RV or LONGV) //adessive: raamatu-l and kapsa-l.
437+	    )
438+	    delete
439+	)
440+
441+
442+	define plural_three_first_cases as (
443+	    setlimit tomark p1 for ([substring])
444+            among(
445+		'ikkude' (<-'iku') //plural genitive: õnnelikkude -> õnneliku
446+		'ikke' (<-'iku') //plural partitive: rahulikke -> rahuliku
447+		'ike' (<-'iku') //plural genitive: ohtlike -> ohtliku
448+		'sid' (not LONGV delete) //plural partitive and sg2pst and pl3pst: auto-sid and laul-sid (exludes plural nominative with words like gaasid, roosid)
449+		'te' ((test hop 4 (('mis' <- 'e') or ('las' <- 'e') or ('lis' <- 'e') or (not 't' delete))) or (not 't' <-'t')) //plural genitive and pl2: ministri-te, olulis-te and saada-te, laula-te; also torte -> tort (if not in compound word) and kokkuvõtte -> kokkuvõte and roheliste -> rohelise, tegemiste -> tegemise, teadlaste -> teadlase
450+		'de' ((RV or LONGV) delete) //plural genitive: lauda-de
451+		'd' ((RV or LONGV) delete) //plural nominative: voodid -> voodi, rattaid -> rattai, lapsikuid -> lapsiku
452+	    )
453+	)
454+
455+	define double as (
456+            test among('kk' 'tt' 'pp')
457+	)
458+
459+	define undouble as (
460+            next [hop 1] delete
461+	)
462+
463+	define nu as (
464+	    setlimit tomark p1 for ([substring])
465+            among(
466+		'nu' //haka-nu(-te-ga)
467+		'tu' //luba-tu(-d)
468+		'du' //laul-du(-te-st)
469+		'va' //laul-va(-te-le)
470+	    )
471+	    delete
472+	)
473+
474+	define remove_double_kpt as (// undouble kpt consonant if 'C1C1V': mõtte(-le) -> mõte, hakka(-n) -> haka, haka(-nu-d) -> haka
475+	    (V1) (double)
476+	    and undouble
477+	)
478+
479+	define degrees as (
480+	    setlimit tomark p1 for ([substring])
481+            among(
482+		'mai' (RV delete) //heleda-mai(-le)
483+		'ma' (delete)  //tugeva-ma(-le) and ma-infinitive: sõit-ma
484+		'm' (RV delete) //kauge-i-m, rõõmsa-m
485+	    )
486+	)
487+
488+	define substantive as (
489+	    do special_noun_endings
490+	    do case_ending
491+	    do plural_three_first_cases
492+	    do degrees
493+	    do i_plural
494+	    do nu
495+	)
496+)
497+
498+
499+define verb_exceptions as (
500+	 [substring] atlimit
501+	among(
502+		'joon' 'jood' 'joob' 'joote' 'joome' 'joovad' (<-'joo')
503+		'j{o'}in' 'j{o'}id' 'j{o'}i' 'j{o'}ime' 'j{o'}ite'  (<-'joo')
504+		'joomata' 'juuakse' 'joodakse' 'juua' 'jooma' (<- 'joo')
505+		'saan' 'saad' 'saab' 'saate' 'saame' 'saavad' (<-'saa')
506+		'saaksin' 'saaksid' 'saaks' 'saaksite' 'saaksime' (<-'saa')
507+		'sain' 'said' 'sai' 'saite' 'saime' (<-'saa')
508+		'saamata' 'saadakse' 'saadi' 'saama' 'saada' (<-'saa')
509+		'viin' 'viid' 'viib' 'viite' 'viime' 'viivad' (<-'viima')
510+		'viiksin' 'viiksid' 'viiks' 'viiksite' 'viiksime' (<-'viima')
511+		'viisin' 'viisite' 'viisime' (<-'viima')
512+		'viimata' 'viiakse' 'viidi' 'viima' 'viia' (<-'viima')
513+		'keen' 'keeb' 'keed' 'kees' 'keeme' 'keete' 'keevad' (<-'keesi')
514+		'keeksin' 'keeks' 'keeksid' 'keeksime' 'keeksite' (<-'keesi')
515+		'keemata' 'keema' 'keeta' 'keedakse' (<-'keesi')
516+		'l{o"}{o"}n' 'l{o"}{o"}d' 'l{o"}{o"}b' 'l{o"}{o"}me' 'l{o"}{o"}te' 'l{o"}{o"}vad'  (<-'l{o"}{o"}')
517+		'l{o"}{o"}ksin' 'l{o"}{o"}ksid' 'l{o"}{o"}ks' 'l{o"}{o"}ksime' 'l{o"}{o"}ksite' (<-'l{o"}{o"}')
518+		'l{o"}{o"}mata' 'l{u"}{u"}akse' 'l{o"}{o"}dakse' 'l{o"}{o"}di' 'l{o"}{o"}ma' 'l{u"}{u"}a' (<-'l{o"}{o"}')
519+		'l{o'}in' 'l{o'}id' 'l{o'}i' 'l{o'}ime' 'l{o'}ite' (<-'l{o"}i') //looma-lõi, lööma-lõi
520+		'loon' 'lood' 'loob' 'loome' 'loote' 'loovad' (<-'loo')
521+		'looksin' 'looksid' 'looks' 'looksime' 'looksite' (<-'loo')
522+		'loomata' 'luuakse' 'loodi' 'luua' 'looma' (<-'loo')
523+		'k{a"}in' 'k{a"}ib' 'k{a"}id' 'k{a"}is'  'k{a"}ime' 'k{a"}ite' 'k{a"}ivad' (<-'k{a"}isi')
524+		'k{a"}iksin' 'k{a"}iks' 'k{a"}iksid' 'k{a"}iksime' 'k{a"}iksite' (<-'k{a"}isi')
525+		'k{a"}imata' 'k{a"}iakse' 'k{a"}idi' 'k{a"}ia' 'k{a"}ima' (<-'k{a"}isi')
526+		's{o"}{o"}n' 's{o"}{o"}b' 's{o"}{o"}d' 's{o"}{o"}me' 's{o"}{o"}te' 's{o"}{o"}vad' (<-'s{o"}{o"}')
527+		's{o"}{o"}ksin' 's{o"}{o"}ks' 's{o"}{o"}ksid' 's{o"}{o"}ksime' 's{o"}{o"}ksite' (<-'s{o"}{o"}')
528+		's{o'}in' 's{o'}i' 's{o'}id' 's{o'}ime' 's{o'}ite' (<-'s{o"}{o"}')
529+		's{o"}{o"}mata' 's{u"}{u"}akse' 's{o"}{o"}dakse' 's{o"}{o"}di' 's{o"}{o"}ma' 's{u"}{u"}a' (<-'s{o"}{o"}')
530+		'toon' 'tood' 'toob' 'toote' 'toome' 'toovad' (<-'too')
531+		'tooksin' 'tooksid' 'tooks' 'tooksite' 'tooksime' (<-'too')
532+		't{o'}in' 't{o'}id' 't{o'}i' 't{o'}ime' 't{o'}ite' (<-'too')
533+		'toomata' 'tuuakse' 'toodi' 'tooma' 'tuua' (<-'too')
534+		'v{o'}in' 'v{o'}id' 'v{o'}ib' 'v{o'}ime' 'v{o'}is' 'v{o'}ite' 'v{o'}ivad' (<-'v{o'}isi')
535+		'v{o'}iksin' 'v{o'}iksid' 'v{o'}iks' 'v{o'}iksime' 'v{o'}iksite' (<-'v{o'}isi')
536+		'v{o'}imata' 'v{o'}idakse' 'v{o'}idi' 'v{o'}ida' 'v{o'}ima' (<-'v{o'}isi')
537+		'j{a"}{a"}n' 'j{a"}{a"}d' 'j{a"}{a"}b' 'j{a"}{a"}me' 'j{a"}{a"}te' 'j{a"}{a"}vad' (<-'j{a"}{a"}ma')
538+		'j{a"}{a"}ksin' 'j{a"}{a"}ksid' 'j{a"}{a"}ks' 'j{a"}{a"}ksime' 'j{a"}{a"}ksite' (<-'j{a"}{a"}ma')
539+		'j{a"}ime' 'j{a"}ite' 'j{a"}in' 'j{a"}id' 'j{a"}i' (<-'j{a"}{a"}ma')
540+		'j{a"}{a"}mata' 'j{a"}{a"}dakse' 'j{a"}{a"}da' 'j{a"}{a"}ma' 'j{a"}{a"}di' (<-'j{a"}{a"}ma')
541+		'm{u"}{u"}n' 'm{u"}{u"}d' 'm{u"}{u"}b' 'm{u"}{u"}s' 'm{u"}{u"}me' 'm{u"}{u"}te' 'm{u"}{u"}vad' (<-'m{u"}{u"}si')
542+		'm{u"}{u"}ksin' 'm{u"}{u"}ksid' 'm{u"}{u"}ks' 'm{u"}{u"}ksime' 'm{u"}{u"}ksite' (<-'m{u"}{u"}si')
543+		'm{u"}{u"}mata' 'm{u"}{u"}akse' 'm{u"}{u"}di' 'm{u"}{u"}a' 'm{u"}{u"}ma' (<-'m{u"}{u"}si')
544+		'loeb' 'loen' 'loed' 'loeme' 'loete' 'loevad' (<- 'luge')
545+		'loeks' 'loeksin' 'loeksid' 'loeksime' 'loeksite' (<- 'luge')
546+		'p{o'}en' 'p{o'}eb' 'p{o'}ed' 'p{o'}eme' 'p{o'}ete' 'p{o'}evad' (<- 'p{o'}de')
547+		'p{o'}eksin' 'p{o'}eks' 'p{o'}eksid' 'p{o'}eksime' 'p{o'}eksite' (<- 'p{o'}de')
548+		'laon' 'laob' 'laod' 'laome' 'laote' 'laovad' (<- 'ladu')
549+		'laoksin' 'laoks' 'laoksid' 'laoksime' 'laoksite' (<- 'ladu')
550+		'teeksin' 'teeks' 'teeksid' 'teeksime' 'teeksite' (<- 'tegi')
551+		'teen' 'teeb' 'teed' 'teeme' 'teete' 'teevad' (<- 'tegi')
552+		'tegemata' 'tehakse' 'tehti' 'tegema' 'teha' (<-'tegi')
553+		'n{a"}en' 'n{a"}eb' 'n{a"}ed' 'n{a"}eme' 'n{a"}ete' 'n{a"}evad' (<-'n{a"}gi')
554+		'n{a"}eksin' 'n{a"}eks' 'n{a"}eksid' 'n{a"}eksime' 'n{a"}eksite' (<-'n{a"}gi')
555+		'n{a"}gemata' 'n{a"}hakse' 'n{a"}hti' 'n{a"}ha' 'n{a"}gema' (<-'n{a"}gi')
556+	)
557+)
558+
559+
560+define stem as (
561+	do mark_regions
562+	not verb_exceptions
563+	unset is_verb
564+	backwards (
565+	    do emphasis
566+	    do verb
567+	    try (not is_verb do substantive)
568+	    do remove_double_kpt
569+
570+	)
571+)
572diff --git a/compiler/generator_java.c b/compiler/generator_java.c
573index 2958452..c254dbe 100644
574--- a/compiler/generator_java.c
575+++ b/compiler/generator_java.c
576@@ -272,7 +272,7 @@ static void generate_AE(struct generator * g, struct node * p) {
577             break;
578         case c_len: /* Same as size() for Java. */
579         case c_size:
580-            w(g, "current.length()");
581+            w(g, "limit");
582             break;
583     }
584 }
585@@ -941,9 +941,12 @@ static void generate_define(struct generator * g, struct node * p) {
586      * be required to allow the SnowballProgram base class to invoke them.
587      * FIXME: Is this avoidable?
588      */
589-    if (q->type == t_routine && !q->used_in_among) {
590+    if (q->used_in_among) {
591+        g->S[0] = "public";
592+    } else if (q->type == t_routine) {
593         g->S[0] = "private";
594     } else {
595+        w(g, "~N~M@Override");
596         g->S[0] = "public";
597     }
598     g->V[0] = q;
599@@ -1140,6 +1143,7 @@ static void generate_class_begin(struct generator * g) {
600     w(g, " {~+~N"
601          "~N"
602          "~Mprivate static final long serialVersionUID = 1L;~N"
603+         "~Mprivate static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();~N"
604          "~N");
605 }
606
607@@ -1186,7 +1190,7 @@ static void generate_among_table(struct generator * g, struct among * x) {
608             if (v->function != 0) {
609                 w(g, ", \"");
610                 write_varname(g, v->function);
611-                w(g, "\", ~n.class");
612+                w(g, "\", methodObject");
613             }
614             w(g, ")~S0~N");
615             v++;
616diff --git a/java/org/tartarus/snowball/Among.java b/java/org/tartarus/snowball/Among.java
617index 8261503..abb8685 100644
618--- a/java/org/tartarus/snowball/Among.java
619+++ b/java/org/tartarus/snowball/Among.java
620@@ -1,7 +1,13 @@
621 package org.tartarus.snowball;
622
623-import java.lang.reflect.Method;
624+import java.lang.invoke.MethodHandle;
625+import java.lang.invoke.MethodHandles;
626+import java.lang.invoke.MethodType;
627+import java.util.Locale;
628
629+/**
630+ * Internal class used by Snowball stemmers
631+ */
632 public class Among {
633     public Among (String s, int substring_i, int result) {
634         this.s = s.toCharArray();
635@@ -11,19 +17,30 @@ public class Among {
636     }
637
638     public Among (String s, int substring_i, int result, String methodname,
639-		  Class<? extends SnowballProgram> programclass) {
640+		  MethodHandles.Lookup methodobject) {
641         this.s = s.toCharArray();
642         this.substring_i = substring_i;
643 	this.result = result;
644-	try {
645-	    this.method = programclass.getDeclaredMethod(methodname);
646-	} catch (NoSuchMethodException e) {
647-	    throw new RuntimeException(e);
648-	}
649+	final Class<? extends SnowballProgram> clazz = methodobject.lookupClass().asSubclass(SnowballProgram.class);
650+	if (methodname.length() > 0) {
651+	    try {
652+	        this.method = methodobject.findVirtual(clazz, methodname, MethodType.methodType(boolean.class))
653+	            .asType(MethodType.methodType(boolean.class, SnowballProgram.class));
654+	    } catch (NoSuchMethodException | IllegalAccessException e) {
655+	        throw new RuntimeException(String.format(Locale.ENGLISH,
656+	            "Snowball program '%s' is broken, cannot access method: boolean %s()",
657+	            clazz.getSimpleName(), methodname
658+	        ), e);
659+	    }
660+	} else {
661+	    this.method = null;
662+        }
663     }
664
665-    public final char[] s; /* search string */
666-    public final int substring_i; /* index to longest matching substring */
667-    public final int result; /* result of the lookup */
668-    public final Method method; /* method to use if substring matches */
669+    final char[] s; /* search string */
670+    final int substring_i; /* index to longest matching substring */
671+    final int result; /* result of the lookup */
672+
673+    // Make sure this is not accessible outside package for Java security reasons!
674+    final MethodHandle method; /* method to use if substring matches */
675 };
676diff --git a/java/org/tartarus/snowball/SnowballProgram.java b/java/org/tartarus/snowball/SnowballProgram.java
677index 1b27b96..94f2d4b 100644
678--- a/java/org/tartarus/snowball/SnowballProgram.java
679+++ b/java/org/tartarus/snowball/SnowballProgram.java
680@@ -1,50 +1,84 @@
681
682 package org.tartarus.snowball;
683-import java.lang.reflect.InvocationTargetException;
684+import java.lang.reflect.UndeclaredThrowableException;
685 import java.io.Serializable;
686
687+/**
688+ * Base class for a snowball stemmer
689+ */
690 public class SnowballProgram implements Serializable {
691     protected SnowballProgram()
692     {
693-	current = new StringBuilder();
694-	init();
695+	current = new char[8];
696+	setCurrent("");
697     }
698
699     static final long serialVersionUID = 2016072500L;
700
701-    private void init() {
702+    /**
703+     * Set the current string.
704+     */
705+    public void setCurrent(String value)
706+    {
707+	current = value.toCharArray();
708 	cursor = 0;
709-	limit = current.length();
710+	limit = value.length();
711 	limit_backward = 0;
712 	bra = cursor;
713 	ket = limit;
714     }
715
716     /**
717-     * Set the current string.
718+     * Get the current string.
719      */
720-    public void setCurrent(String value)
721+    public String getCurrent()
722     {
723-        // Make a new StringBuilder.  If we reuse the old one, and a user of
724-        // the library keeps a reference to the buffer returned (for example,
725-        // by converting it to a String in a way which doesn't force a copy),
726-        // the buffer size will not decrease, and we will risk wasting a large
727-        // amount of memory.
728-        // Thanks to Wolfram Esser for spotting this problem.
729-        current = new StringBuilder(value);
730-	init();
731+        return new String(current, 0, limit);
732     }
733
734     /**
735-     * Get the current string.
736+     * Set the current string.
737+     * @param text character array containing input
738+     * @param length valid length of text.
739      */
740-    public String getCurrent()
741-    {
742-        return current.toString();
743+    public void setCurrent(char[] text, int length) {
744+        current = text;
745+        cursor = 0;
746+        limit = length;
747+        limit_backward = 0;
748+        bra = cursor;
749+        ket = limit;
750+    }
751+
752+    /**
753+     * Get the current buffer containing the stem.
754+     * <p>
755+     * NOTE: this may be a reference to a different character array than the
756+     * one originally provided with setCurrent, in the exceptional case that
757+     * stemming produced a longer intermediate or result string.
758+     * </p>
759+     * <p>
760+     * It is necessary to use {@link #getCurrentBufferLength()} to determine
761+     * the valid length of the returned buffer. For example, many words are
762+     * stemmed simply by subtracting from the length to remove suffixes.
763+     * </p>
764+     * @see #getCurrentBufferLength()
765+     */
766+    public char[] getCurrentBuffer() {
767+        return current;
768+    }
769+
770+    /**
771+     * Get the valid length of the character array in
772+     * {@link #getCurrentBuffer()}.
773+     * @return valid length of the array.
774+     */
775+    public int getCurrentBufferLength() {
776+        return limit;
777     }
778
779     // current string
780-    protected StringBuilder current;
781+    private char[] current;
782
783     protected int cursor;
784     protected int limit;
785@@ -74,7 +108,7 @@ public class SnowballProgram implements Serializable {
786     protected boolean in_grouping(char [] s, int min, int max)
787     {
788 	if (cursor >= limit) return false;
789-	char ch = current.charAt(cursor);
790+	char ch = current[cursor];
791 	if (ch > max || ch < min) return false;
792 	ch -= min;
793 	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
794@@ -85,7 +119,7 @@ public class SnowballProgram implements Serializable {
795     protected boolean in_grouping_b(char [] s, int min, int max)
796     {
797 	if (cursor <= limit_backward) return false;
798-	char ch = current.charAt(cursor - 1);
799+	char ch = current[cursor - 1];
800 	if (ch > max || ch < min) return false;
801 	ch -= min;
802 	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
803@@ -96,7 +130,7 @@ public class SnowballProgram implements Serializable {
804     protected boolean out_grouping(char [] s, int min, int max)
805     {
806 	if (cursor >= limit) return false;
807-	char ch = current.charAt(cursor);
808+	char ch = current[cursor];
809 	if (ch > max || ch < min) {
810 	    cursor++;
811 	    return true;
812@@ -112,7 +146,7 @@ public class SnowballProgram implements Serializable {
813     protected boolean out_grouping_b(char [] s, int min, int max)
814     {
815 	if (cursor <= limit_backward) return false;
816-	char ch = current.charAt(cursor - 1);
817+	char ch = current[cursor - 1];
818 	if (ch > max || ch < min) {
819 	    cursor--;
820 	    return true;
821@@ -130,7 +164,7 @@ public class SnowballProgram implements Serializable {
822 	if (limit - cursor < s.length()) return false;
823 	int i;
824 	for (i = 0; i != s.length(); i++) {
825-	    if (current.charAt(cursor + i) != s.charAt(i)) return false;
826+	    if (current[cursor + i] != s.charAt(i)) return false;
827 	}
828 	cursor += s.length();
829 	return true;
830@@ -141,7 +175,7 @@ public class SnowballProgram implements Serializable {
831 	if (cursor - limit_backward < s.length()) return false;
832 	int i;
833 	for (i = 0; i != s.length(); i++) {
834-	    if (current.charAt(cursor - s.length() + i) != s.charAt(i)) return false;
835+	    if (current[cursor - s.length() + i] != s.charAt(i)) return false;
836 	}
837 	cursor -= s.length();
838 	return true;
839@@ -171,7 +205,7 @@ public class SnowballProgram implements Serializable {
840 		    diff = -1;
841 		    break;
842 		}
843-		diff = current.charAt(c + common) - w.s[i2];
844+		diff = current[c + common] - w.s[i2];
845 		if (diff != 0) break;
846 		common++;
847 	    }
848@@ -199,16 +233,13 @@ public class SnowballProgram implements Serializable {
849 	    if (common_i >= w.s.length) {
850 		cursor = c + w.s.length;
851 		if (w.method == null) return w.result;
852-		boolean res;
853+		boolean res = false;
854 		try {
855-		    Object resobj = w.method.invoke(this);
856-		    res = resobj.toString().equals("true");
857-		} catch (InvocationTargetException e) {
858-		    res = false;
859-		    // FIXME - debug message
860-		} catch (IllegalAccessException e) {
861-		    res = false;
862-		    // FIXME - debug message
863+		    res = (boolean) w.method.invokeExact(this);
864+		} catch (Error | RuntimeException e) {
865+		    throw e;
866+		} catch (Throwable e) {
867+		    throw new UndeclaredThrowableException(e);
868 		}
869 		cursor = c + w.s.length;
870 		if (res) return w.result;
871@@ -243,7 +274,7 @@ public class SnowballProgram implements Serializable {
872 		    diff = -1;
873 		    break;
874 		}
875-		diff = current.charAt(c - 1 - common) - w.s[i2];
876+		diff = current[c - 1 - common] - w.s[i2];
877 		if (diff != 0) break;
878 		common++;
879 	    }
880@@ -267,16 +298,13 @@ public class SnowballProgram implements Serializable {
881 		cursor = c - w.s.length;
882 		if (w.method == null) return w.result;
883
884-		boolean res;
885+		boolean res = false;
886 		try {
887-		    Object resobj = w.method.invoke(this);
888-		    res = resobj.toString().equals("true");
889-		} catch (InvocationTargetException e) {
890-		    res = false;
891-		    // FIXME - debug message
892-		} catch (IllegalAccessException e) {
893-		    res = false;
894-		    // FIXME - debug message
895+		    res = (boolean) w.method.invokeExact(this);
896+		} catch (Error | RuntimeException e) {
897+		    throw e;
898+		} catch (Throwable e) {
899+		    throw new UndeclaredThrowableException(e);
900 		}
901 		cursor = c - w.s.length;
902 		if (res) return w.result;
903@@ -286,13 +314,41 @@ public class SnowballProgram implements Serializable {
904 	}
905     }
906
907+    // mini version of ArrayUtil.oversize from lucene, specialized to chars
908+    static int oversize(int minTargetSize) {
909+	int extra = minTargetSize >> 3;
910+	if (extra < 3) {
911+	    extra = 3;
912+	}
913+	int newSize = minTargetSize + extra;
914+	return (newSize + 3) & 0x7ffffffc;
915+    }
916+
917     /* to replace chars between c_bra and c_ket in current by the
918      * chars in s.
919      */
920-    protected int replace_s(int c_bra, int c_ket, String s)
921+    protected int replace_s(int c_bra, int c_ket, CharSequence s)
922     {
923-	int adjustment = s.length() - (c_ket - c_bra);
924-	current.replace(c_bra, c_ket, s);
925+	final int adjustment = s.length() - (c_ket - c_bra);
926+	final int newLength = limit + adjustment;
927+	//resize if necessary
928+	if (newLength > current.length) {
929+	    char[] newBuffer = new char[oversize(newLength)];
930+	    System.arraycopy(current, 0, newBuffer, 0, limit);
931+	    current = newBuffer;
932+	}
933+	// if the substring being replaced is longer or shorter than the
934+	// replacement, need to shift things around
935+	if (adjustment != 0 && c_ket < limit) {
936+	    System.arraycopy(current, c_ket, current, c_bra + s.length(),
937+	        limit - c_ket);
938+	}
939+	// insert the replacement text
940+	// Note, faster is s.getChars(0, s.length(), current, c_bra);
941+	// but would have to duplicate this method for both String and StringBuilder
942+	for (int i = 0; i < s.length(); i++)
943+	    current[c_bra + i] = s.charAt(i);
944+
945 	limit += adjustment;
946 	if (cursor >= c_ket) cursor += adjustment;
947 	else if (cursor > c_bra) cursor = c_bra;
948@@ -303,57 +359,43 @@ public class SnowballProgram implements Serializable {
949     {
950 	if (bra < 0 ||
951 	    bra > ket ||
952-	    ket > limit ||
953-	    limit > current.length())   // this line could be removed
954+	    ket > limit)
955 	{
956-	    System.err.println("faulty slice operation");
957-	// FIXME: report error somehow.
958-	/*
959-	    fprintf(stderr, "faulty slice operation:\n");
960-	    debug(z, -1, 0);
961-	    exit(1);
962-	    */
963+	     throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
964 	}
965     }
966
967-    protected void slice_from(String s)
968+    protected void slice_from(CharSequence s)
969     {
970 	slice_check();
971 	replace_s(bra, ket, s);
972     }
973
974-    protected void slice_from(CharSequence s)
975-    {
976-        slice_from(s.toString());
977-    }
978-
979     protected void slice_del()
980     {
981 	slice_from("");
982     }
983
984-    protected void insert(int c_bra, int c_ket, String s)
985+    protected void insert(int c_bra, int c_ket, CharSequence s)
986     {
987 	int adjustment = replace_s(c_bra, c_ket, s);
988 	if (c_bra <= bra) bra += adjustment;
989 	if (c_bra <= ket) ket += adjustment;
990     }
991
992-    protected void insert(int c_bra, int c_ket, CharSequence s)
993-    {
994-	insert(c_bra, c_ket, s.toString());
995-    }
996-
997     /* Copy the slice into the supplied StringBuilder */
998     protected void slice_to(StringBuilder s)
999     {
1000 	slice_check();
1001-	s.replace(0, s.length(), current.substring(bra, ket));
1002+	int len = ket - bra;
1003+	s.setLength(0);
1004+	s.append(current, bra, len);
1005     }
1006
1007     protected void assign_to(StringBuilder s)
1008     {
1009-	s.replace(0, s.length(), current.substring(0, limit));
1010+	s.setLength(0);
1011+	s.append(current, 0, limit);
1012     }
1013
1014 /*
1015diff --git a/java/org/tartarus/snowball/SnowballStemmer.java b/java/org/tartarus/snowball/SnowballStemmer.java
1016index 73a81a9..f7772d3 100644
1017--- a/java/org/tartarus/snowball/SnowballStemmer.java
1018+++ b/java/org/tartarus/snowball/SnowballStemmer.java
1019@@ -1,6 +1,9 @@
1020
1021 package org.tartarus.snowball;
1022
1023+/**
1024+ * Parent class of all snowball stemmers, which must implement <code>stem</code>
1025+ */
1026 public abstract class SnowballStemmer extends SnowballProgram {
1027     public abstract boolean stem();
1028
1029diff --git a/libstemmer/modules.txt b/libstemmer/modules.txt
1030index b8ec17a..d2c8e61 100644
1031--- a/libstemmer/modules.txt
1032+++ b/libstemmer/modules.txt
1033@@ -10,11 +10,13 @@
1034 # the most commonly used encoding.
1035
1036 arabic          UTF_8                   arabic,ar,ara
1037+armenian        UTF_8                   armenian,hy,arm,hye
1038 basque          UTF_8,ISO_8859_1        basque,eu,eus,baq
1039 catalan         UTF_8,ISO_8859_1        catalan,ca,cat
1040 danish          UTF_8,ISO_8859_1        danish,da,dan
1041 dutch           UTF_8,ISO_8859_1        dutch,nl,dut,nld
1042 english         UTF_8,ISO_8859_1        english,en,eng
1043+estonian        UTF_8                   estonian,et,est
1044 finnish         UTF_8,ISO_8859_1        finnish,fi,fin
1045 french          UTF_8,ISO_8859_1        french,fr,fre,fra
1046 german          UTF_8,ISO_8859_1        german,de,ger,deu
1047@@ -51,12 +53,12 @@ porter          UTF_8,ISO_8859_1        porter			english
1048 # algorithms are:
1049 #
1050 # german2          - This is a slight modification of the german stemmer.
1051-#german2          UTF_8,ISO_8859_1        german2		german
1052+german2          UTF_8,ISO_8859_1        german2		german
1053 #
1054 # kraaij_pohlmann  - This is a different dutch stemmer.
1055-#kraaij_pohlmann  UTF_8,ISO_8859_1        kraaij_pohlmann	dutch
1056+kraaij_pohlmann  UTF_8,ISO_8859_1        kraaij_pohlmann	dutch
1057 #
1058 # lovins           - This is an english stemmer, but fairly outdated, and
1059 #                    only really applicable to a restricted type of input text
1060 #                    (keywords in academic publications).
1061-#lovins           UTF_8,ISO_8859_1        lovins		english
1062+lovins           UTF_8,ISO_8859_1        lovins		english
1063