1 /* 2 Egothor Software License version 1.00 3 Copyright (C) 1997-2004 Leo Galambos. 4 Copyright (C) 2002-2004 "Egothor developers" 5 on behalf of the Egothor Project. 6 All rights reserved. 7 8 This software is copyrighted by the "Egothor developers". If this 9 license applies to a single file or document, the "Egothor developers" 10 are the people or entities mentioned as copyright holders in that file 11 or document. If this license applies to the Egothor project as a 12 whole, the copyright holders are the people or entities mentioned in 13 the file CREDITS. This file can be found in the same location as this 14 license in the distribution. 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions are 18 met: 19 1. Redistributions of source code must retain the above copyright 20 notice, the list of contributors, this list of conditions, and the 21 following disclaimer. 22 2. Redistributions in binary form must reproduce the above copyright 23 notice, the list of contributors, this list of conditions, and the 24 disclaimer that follows these conditions in the documentation 25 and/or other materials provided with the distribution. 26 3. The name "Egothor" must not be used to endorse or promote products 27 derived from this software without prior written permission. For 28 written permission, please contact Leo.G@seznam.cz 29 4. Products derived from this software may not be called "Egothor", 30 nor may "Egothor" appear in their name, without prior written 31 permission from Leo.G@seznam.cz. 32 33 In addition, we request that you include in the end-user documentation 34 provided with the redistribution and/or in the software itself an 35 acknowledgement equivalent to the following: 36 "This product includes software developed by the Egothor Project. 37 http://egothor.sf.net/" 38 39 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 40 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 41 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 42 IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE 43 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 44 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 45 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 46 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 47 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 48 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 49 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 51 This software consists of voluntary contributions made by many 52 individuals on behalf of the Egothor Project and was originally 53 created by Leo Galambos (Leo.G@seznam.cz). 54 */ 55 package org.egothor.stemmer; 56 57 import java.io.DataInput; 58 import java.io.DataOutput; 59 import java.io.IOException; 60 import java.io.PrintStream; 61 import java.util.Iterator; 62 import java.util.TreeMap; 63 64 /** The Row class represents a row in a matrix representation of a trie. */ 65 public class Row { 66 TreeMap<Character, Cell> cells = new TreeMap<>(); 67 int uniformCnt = 0; 68 int uniformSkip = 0; 69 70 /** 71 * Construct a Row object from input carried in via the given input stream. 72 * 73 * @param is the input stream 74 * @exception IOException if an I/O error occurs 75 */ Row(DataInput is)76 public Row(DataInput is) throws IOException { 77 for (int i = is.readInt(); i > 0; i--) { 78 char ch = is.readChar(); 79 Cell c = new Cell(); 80 c.cmd = is.readInt(); 81 c.cnt = is.readInt(); 82 c.ref = is.readInt(); 83 c.skip = is.readInt(); 84 cells.put(ch, c); 85 } 86 } 87 88 /** The default constructor for the Row object. */ Row()89 public Row() {} 90 91 /** 92 * Construct a Row using the cells of the given Row. 93 * 94 * @param old the Row to copy 95 */ Row(Row old)96 public Row(Row old) { 97 cells = old.cells; 98 } 99 100 /** 101 * Set the command in the Cell of the given Character to the given integer. 102 * 103 * @param way the Character defining the Cell 104 * @param cmd the new command 105 */ setCmd(Character way, int cmd)106 public void setCmd(Character way, int cmd) { 107 Cell c = at(way); 108 if (c == null) { 109 c = new Cell(); 110 c.cmd = cmd; 111 cells.put(way, c); 112 } else { 113 c.cmd = cmd; 114 } 115 c.cnt = (cmd >= 0) ? 1 : 0; 116 } 117 118 /** 119 * Set the reference to the next row in the Cell of the given Character to the given integer. 120 * 121 * @param way the Character defining the Cell 122 * @param ref The new ref value 123 */ setRef(Character way, int ref)124 public void setRef(Character way, int ref) { 125 Cell c = at(way); 126 if (c == null) { 127 c = new Cell(); 128 c.ref = ref; 129 cells.put(way, c); 130 } else { 131 c.ref = ref; 132 } 133 } 134 135 /** 136 * Return the number of cells in use. 137 * 138 * @return the number of cells in use 139 */ getCells()140 public int getCells() { 141 Iterator<Character> i = cells.keySet().iterator(); 142 int size = 0; 143 for (; i.hasNext(); ) { 144 Character c = i.next(); 145 Cell e = at(c); 146 if (e.cmd >= 0 || e.ref >= 0) { 147 size++; 148 } 149 } 150 return size; 151 } 152 153 /** 154 * Return the number of references (how many transitions) to other rows. 155 * 156 * @return the number of references 157 */ getCellsPnt()158 public int getCellsPnt() { 159 Iterator<Character> i = cells.keySet().iterator(); 160 int size = 0; 161 for (; i.hasNext(); ) { 162 Character c = i.next(); 163 Cell e = at(c); 164 if (e.ref >= 0) { 165 size++; 166 } 167 } 168 return size; 169 } 170 171 /** 172 * Return the number of patch commands saved in this Row. 173 * 174 * @return the number of patch commands 175 */ getCellsVal()176 public int getCellsVal() { 177 Iterator<Character> i = cells.keySet().iterator(); 178 int size = 0; 179 for (; i.hasNext(); ) { 180 Character c = i.next(); 181 Cell e = at(c); 182 if (e.cmd >= 0) { 183 size++; 184 } 185 } 186 return size; 187 } 188 189 /** 190 * Return the command in the Cell associated with the given Character. 191 * 192 * @param way the Character associated with the Cell holding the desired command 193 * @return the command 194 */ getCmd(Character way)195 public int getCmd(Character way) { 196 Cell c = at(way); 197 return (c == null) ? -1 : c.cmd; 198 } 199 200 /** 201 * Return the number of patch commands were in the Cell associated with the given Character before 202 * the Trie containing this Row was reduced. 203 * 204 * @param way the Character associated with the desired Cell 205 * @return the number of patch commands before reduction 206 */ getCnt(Character way)207 public int getCnt(Character way) { 208 Cell c = at(way); 209 return (c == null) ? -1 : c.cnt; 210 } 211 212 /** 213 * Return the reference to the next Row in the Cell associated with the given Character. 214 * 215 * @param way the Character associated with the desired Cell 216 * @return the reference, or -1 if the Cell is <code>null</code> 217 */ getRef(Character way)218 public int getRef(Character way) { 219 Cell c = at(way); 220 return (c == null) ? -1 : c.ref; 221 } 222 223 /** 224 * Write the contents of this Row to the given output stream. 225 * 226 * @param os the output stream 227 * @exception IOException if an I/O error occurs 228 */ store(DataOutput os)229 public void store(DataOutput os) throws IOException { 230 os.writeInt(cells.size()); 231 Iterator<Character> i = cells.keySet().iterator(); 232 for (; i.hasNext(); ) { 233 Character c = i.next(); 234 Cell e = at(c); 235 if (e.cmd < 0 && e.ref < 0) { 236 continue; 237 } 238 239 os.writeChar(c.charValue()); 240 os.writeInt(e.cmd); 241 os.writeInt(e.cnt); 242 os.writeInt(e.ref); 243 os.writeInt(e.skip); 244 } 245 } 246 247 /** 248 * Return the number of identical Cells (containing patch commands) in this Row. 249 * 250 * @param eqSkip when set to <code>false</code> the removed patch commands are considered 251 * @return the number of identical Cells, or -1 if there are (at least) two different cells 252 */ uniformCmd(boolean eqSkip)253 public int uniformCmd(boolean eqSkip) { 254 Iterator<Cell> i = cells.values().iterator(); 255 int ret = -1; 256 uniformCnt = 1; 257 uniformSkip = 0; 258 for (; i.hasNext(); ) { 259 Cell c = i.next(); 260 if (c.ref >= 0) { 261 return -1; 262 } 263 if (c.cmd >= 0) { 264 if (ret < 0) { 265 ret = c.cmd; 266 uniformSkip = c.skip; 267 } else if (ret == c.cmd) { 268 if (eqSkip) { 269 if (uniformSkip == c.skip) { 270 uniformCnt++; 271 } else { 272 return -1; 273 } 274 } else { 275 uniformCnt++; 276 } 277 } else { 278 return -1; 279 } 280 } 281 } 282 return ret; 283 } 284 285 /** Write the contents of this Row to the printstream. */ print(PrintStream out)286 public void print(PrintStream out) { 287 for (Iterator<Character> i = cells.keySet().iterator(); i.hasNext(); ) { 288 Character ch = i.next(); 289 Cell c = at(ch); 290 out.print("[" + ch + ":" + c + "]"); 291 } 292 out.println(); 293 } 294 at(Character index)295 Cell at(Character index) { 296 return cells.get(index); 297 } 298 } 299