1 /* 2 Egothor Software License version 1.00 3 Copyright (C) 1997-2004 Leo Galambos. 4 Copyright (C) 2002-2004 "Egothor developers" 5 on behalf of the Egothor Project. 6 All rights reserved. 7 8 This software is copyrighted by the "Egothor developers". If this 9 license applies to a single file or document, the "Egothor developers" 10 are the people or entities mentioned as copyright holders in that file 11 or document. If this license applies to the Egothor project as a 12 whole, the copyright holders are the people or entities mentioned in 13 the file CREDITS. This file can be found in the same location as this 14 license in the distribution. 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions are 18 met: 19 1. Redistributions of source code must retain the above copyright 20 notice, the list of contributors, this list of conditions, and the 21 following disclaimer. 22 2. Redistributions in binary form must reproduce the above copyright 23 notice, the list of contributors, this list of conditions, and the 24 disclaimer that follows these conditions in the documentation 25 and/or other materials provided with the distribution. 26 3. The name "Egothor" must not be used to endorse or promote products 27 derived from this software without prior written permission. For 28 written permission, please contact Leo.G@seznam.cz 29 4. Products derived from this software may not be called "Egothor", 30 nor may "Egothor" appear in their name, without prior written 31 permission from Leo.G@seznam.cz. 32 33 In addition, we request that you include in the end-user documentation 34 provided with the redistribution and/or in the software itself an 35 acknowledgement equivalent to the following: 36 "This product includes software developed by the Egothor Project. 37 http://egothor.sf.net/" 38 39 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 40 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 41 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 42 IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE 43 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 44 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 45 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 46 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 47 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 48 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 49 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 51 This software consists of voluntary contributions made by many 52 individuals on behalf of the Egothor Project and was originally 53 created by Leo Galambos (Leo.G@seznam.cz). 54 */ 55 package org.egothor.stemmer; 56 57 /** 58 * The Diff object generates a patch string. 59 * 60 * <p>A patch string is actually a command to a stemmer telling it how to reduce a word to its root. 61 * For example, to reduce the word teacher to its root teach the patch string Db would be generated. 62 * This command tells the stemmer to delete the last 2 characters from the word teacher to reach the 63 * stem (the patch commands are applied starting from the last character in order to save 64 */ 65 public class Diff { 66 int sizex = 0; 67 int sizey = 0; 68 int[][] net; 69 int[][] way; 70 71 int INSERT; 72 int DELETE; 73 int REPLACE; 74 int NOOP; 75 76 /** Constructor for the Diff object. */ Diff()77 public Diff() { 78 this(1, 1, 1, 0); 79 } 80 81 /** 82 * Constructor for the Diff object 83 * 84 * @param ins Description of the Parameter 85 * @param del Description of the Parameter 86 * @param rep Description of the Parameter 87 * @param noop Description of the Parameter 88 */ Diff(int ins, int del, int rep, int noop)89 public Diff(int ins, int del, int rep, int noop) { 90 INSERT = ins; 91 DELETE = del; 92 REPLACE = rep; 93 NOOP = noop; 94 } 95 96 /** 97 * Apply the given patch string <code>diff</code> to the given string <code> 98 * dest</code>. 99 * 100 * @param dest Destination string 101 * @param diff Patch string 102 */ apply(StringBuilder dest, CharSequence diff)103 public static void apply(StringBuilder dest, CharSequence diff) { 104 try { 105 106 if (diff == null) { 107 return; 108 } 109 110 int pos = dest.length() - 1; 111 if (pos < 0) { 112 return; 113 } 114 // orig == "" 115 for (int i = 0; i < diff.length() / 2; i++) { 116 char cmd = diff.charAt(2 * i); 117 char param = diff.charAt(2 * i + 1); 118 int par_num = (param - 'a' + 1); 119 switch (cmd) { 120 case '-': 121 pos = pos - par_num + 1; 122 break; 123 case 'R': 124 dest.setCharAt(pos, param); 125 break; 126 case 'D': 127 int o = pos; 128 pos -= par_num - 1; 129 /* 130 * delete par_num chars from index pos 131 */ 132 // String s = orig.toString(); 133 // s = s.substring( 0, pos ) + s.substring( o + 1 ); 134 // orig = new StringBuffer( s ); 135 dest.delete(pos, o + 1); 136 break; 137 case 'I': 138 dest.insert(pos += 1, param); 139 break; 140 } 141 pos--; 142 } 143 } catch ( 144 @SuppressWarnings("unused") 145 StringIndexOutOfBoundsException x) { 146 // x.printStackTrace(); 147 } catch ( 148 @SuppressWarnings("unused") 149 ArrayIndexOutOfBoundsException x) { 150 // x.printStackTrace(); 151 } 152 } 153 154 /** 155 * Construct a patch string that transforms a to b. 156 * 157 * @param a String 1st string 158 * @param b String 2nd string 159 * @return String 160 */ exec(String a, String b)161 public synchronized String exec(String a, String b) { 162 if (a == null || b == null) { 163 return null; 164 } 165 166 int x; 167 int y; 168 int maxx; 169 int maxy; 170 int[] go = new int[4]; 171 final int X = 1; 172 final int Y = 2; 173 final int R = 3; 174 final int D = 0; 175 176 /* 177 * setup memory if needed => processing speed up 178 */ 179 maxx = a.length() + 1; 180 maxy = b.length() + 1; 181 if ((maxx >= sizex) || (maxy >= sizey)) { 182 sizex = maxx + 8; 183 sizey = maxy + 8; 184 net = new int[sizex][sizey]; 185 way = new int[sizex][sizey]; 186 } 187 188 /* 189 * clear the network 190 */ 191 for (x = 0; x < maxx; x++) { 192 for (y = 0; y < maxy; y++) { 193 net[x][y] = 0; 194 } 195 } 196 197 /* 198 * set known persistent values 199 */ 200 for (x = 1; x < maxx; x++) { 201 net[x][0] = x; 202 way[x][0] = X; 203 } 204 for (y = 1; y < maxy; y++) { 205 net[0][y] = y; 206 way[0][y] = Y; 207 } 208 209 for (x = 1; x < maxx; x++) { 210 for (y = 1; y < maxy; y++) { 211 go[X] = net[x - 1][y] + DELETE; 212 // way on x costs 1 unit 213 go[Y] = net[x][y - 1] + INSERT; 214 // way on y costs 1 unit 215 go[R] = net[x - 1][y - 1] + REPLACE; 216 go[D] = net[x - 1][y - 1] + ((a.charAt(x - 1) == b.charAt(y - 1)) ? NOOP : 100); 217 // diagonal costs 0, when no change 218 short min = D; 219 if (go[min] >= go[X]) { 220 min = X; 221 } 222 if (go[min] > go[Y]) { 223 min = Y; 224 } 225 if (go[min] > go[R]) { 226 min = R; 227 } 228 way[x][y] = min; 229 net[x][y] = (short) go[min]; 230 } 231 } 232 233 // read the patch string 234 StringBuilder result = new StringBuilder(); 235 final char base = 'a' - 1; 236 char deletes = base; 237 char equals = base; 238 for (x = maxx - 1, y = maxy - 1; x + y != 0; ) { 239 switch (way[x][y]) { 240 case X: 241 if (equals != base) { 242 result.append('-').append(equals); 243 equals = base; 244 } 245 deletes++; 246 x--; 247 break; 248 // delete 249 case Y: 250 if (deletes != base) { 251 result.append('D').append(deletes); 252 deletes = base; 253 } 254 if (equals != base) { 255 result.append('-').append(equals); 256 equals = base; 257 } 258 result.append('I'); 259 result.append(b.charAt(--y)); 260 break; 261 // insert 262 case R: 263 if (deletes != base) { 264 result.append('D').append(deletes); 265 deletes = base; 266 } 267 if (equals != base) { 268 result.append('-').append(equals); 269 equals = base; 270 } 271 result.append('R'); 272 result.append(b.charAt(--y)); 273 x--; 274 break; 275 // replace 276 case D: 277 if (deletes != base) { 278 result.append('D').append(deletes); 279 deletes = base; 280 } 281 equals++; 282 x--; 283 y--; 284 break; 285 // no change 286 } 287 } 288 if (deletes != base) { 289 result.append('D').append(deletes); 290 deletes = base; 291 } 292 293 return result.toString(); 294 } 295 } 296