xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/web/EftarFileReader.java (revision edf8e58e8be5a0bcb9281ff841102d55f59d002d)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
21c6f0939bSAdam Hornacek  * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22b5840353SAdam Hornáček  */
239805b761SAdam Hornáček package org.opengrok.indexer.web;
24b5840353SAdam Hornáček 
25ebb754a9SVladimir Kotal import java.io.Closeable;
26b5840353SAdam Hornáček import java.io.EOFException;
27b5840353SAdam Hornáček import java.io.File;
28b5840353SAdam Hornáček import java.io.FileNotFoundException;
29b5840353SAdam Hornáček import java.io.IOException;
30b5840353SAdam Hornáček import java.io.RandomAccessFile;
31b5840353SAdam Hornáček import java.util.StringTokenizer;
32b5840353SAdam Hornáček import java.util.logging.Level;
33b5840353SAdam Hornáček import java.util.logging.Logger;
34b5840353SAdam Hornáček 
359805b761SAdam Hornáček import org.opengrok.indexer.logger.LoggerFactory;
369805b761SAdam Hornáček import org.opengrok.indexer.util.IOUtils;
37b5840353SAdam Hornáček 
38b5840353SAdam Hornáček /**
39ff44f24aSAdam Hornáček  * An Extremely Fast Tagged Attribute Read-only File Reader.
40b5840353SAdam Hornáček  * Created on October 12, 2005
41b5840353SAdam Hornáček  *
42b5840353SAdam Hornáček  * @author Chandan
43b5840353SAdam Hornáček  */
44ebb754a9SVladimir Kotal public class EftarFileReader implements Closeable {
45b5840353SAdam Hornáček 
46b5840353SAdam Hornáček     private static final Logger LOGGER = LoggerFactory.getLogger(EftarFileReader.class);
47b5840353SAdam Hornáček 
48b5840353SAdam Hornáček     private final RandomAccessFile f;
49b5840353SAdam Hornáček     private boolean isOpen;
50b5840353SAdam Hornáček 
519805b761SAdam Hornáček     public class FNode {
52b5840353SAdam Hornáček 
53c6f0939bSAdam Hornacek         private final long offset;
5487c193f7SVladimir Kotal         private long hash;
5587c193f7SVladimir Kotal         private int childOffset;
5687c193f7SVladimir Kotal         private int numChildren;
5787c193f7SVladimir Kotal         private int tagOffset;
58b5840353SAdam Hornáček 
FNode()59b5840353SAdam Hornáček         public FNode() throws IOException {
60b5840353SAdam Hornáček             offset = f.getFilePointer();
6187c193f7SVladimir Kotal 
62b5840353SAdam Hornáček             try {
63b5840353SAdam Hornáček                 hash = f.readLong();
64b5840353SAdam Hornáček                 childOffset = f.readUnsignedShort();
65b5840353SAdam Hornáček                 numChildren = f.readUnsignedShort();
66b5840353SAdam Hornáček                 tagOffset = f.readUnsignedShort();
67b5840353SAdam Hornáček             } catch (EOFException e) {
68b5840353SAdam Hornáček                 numChildren = 0;
69b5840353SAdam Hornáček                 tagOffset = 0;
70b5840353SAdam Hornáček             }
71b5840353SAdam Hornáček         }
72b5840353SAdam Hornáček 
FNode(long hash, long offset, int childOffset, int num, int tagOffset)73b5840353SAdam Hornáček         public FNode(long hash, long offset, int childOffset, int num, int tagOffset) {
74b5840353SAdam Hornáček             this.hash = hash;
75b5840353SAdam Hornáček             this.offset = offset;
76b5840353SAdam Hornáček             this.childOffset = childOffset;
77b5840353SAdam Hornáček             this.numChildren = num;
78b5840353SAdam Hornáček             this.tagOffset = tagOffset;
79b5840353SAdam Hornáček         }
80b5840353SAdam Hornáček 
get(long hash)81b5840353SAdam Hornáček         public FNode get(long hash) throws IOException {
82b5840353SAdam Hornáček             if (childOffset == 0 || numChildren == 0) {
83b5840353SAdam Hornáček                 return null;
84b5840353SAdam Hornáček             }
85b5840353SAdam Hornáček             return binarySearch(offset + childOffset, numChildren, hash);
86b5840353SAdam Hornáček         }
87b5840353SAdam Hornáček 
binarySearch(long start, int len, long hash)88b5840353SAdam Hornáček         private FNode binarySearch(long start, int len, long hash) throws IOException {
89b5840353SAdam Hornáček             int b = 0;
90b5840353SAdam Hornáček             int e = len;
91b5840353SAdam Hornáček             while (b <= e) {
92b5840353SAdam Hornáček                 int m = (b + e) / 2;
93*edf8e58eSVladimir Kotal                 f.seek(start + (long) m * EftarFile.RECORD_LENGTH);
94b5840353SAdam Hornáček                 long mhash = f.readLong();
95b5840353SAdam Hornáček                 if (hash > mhash) {
96b5840353SAdam Hornáček                     b = m + 1;
97b5840353SAdam Hornáček                 } else if (hash < mhash) {
98b5840353SAdam Hornáček                     e = m - 1;
99b5840353SAdam Hornáček                 } else {
100*edf8e58eSVladimir Kotal                     return new FNode(mhash, f.getFilePointer() - 8L, f.readUnsignedShort(), f.readUnsignedShort(),
101*edf8e58eSVladimir Kotal                             f.readUnsignedShort());
102b5840353SAdam Hornáček                 }
103b5840353SAdam Hornáček             }
104b5840353SAdam Hornáček             return null;
105b5840353SAdam Hornáček         }
106b5840353SAdam Hornáček 
getTag()107b5840353SAdam Hornáček         public String getTag() throws IOException {
108b5840353SAdam Hornáček             if (tagOffset == 0) {
109b5840353SAdam Hornáček                 return null;
110b5840353SAdam Hornáček             }
111b5840353SAdam Hornáček             f.seek(offset + tagOffset);
112b5840353SAdam Hornáček             byte[] tagString;
113b5840353SAdam Hornáček             if (childOffset == 0) {
114b5840353SAdam Hornáček                 tagString = new byte[numChildren];
115b5840353SAdam Hornáček             } else {
116b5840353SAdam Hornáček                 tagString = new byte[childOffset - tagOffset];
117b5840353SAdam Hornáček             }
118b5840353SAdam Hornáček             int len = f.read(tagString);
119b5840353SAdam Hornáček             if (len == -1) {
120b5840353SAdam Hornáček                 throw new EOFException();
121b5840353SAdam Hornáček             }
122b5840353SAdam Hornáček             return new String(tagString, 0, len);
123b5840353SAdam Hornáček         }
124b5840353SAdam Hornáček 
125b5840353SAdam Hornáček         @Override
toString()126b5840353SAdam Hornáček         public String toString() {
127b5840353SAdam Hornáček             String tagString = null;
128b5840353SAdam Hornáček             try {
129b5840353SAdam Hornáček                 tagString = getTag();
130b5840353SAdam Hornáček             } catch (EOFException e) { // NOPMD
131b5840353SAdam Hornáček                 // ignore
132b5840353SAdam Hornáček             } catch (IOException e) {
133b5840353SAdam Hornáček                 LOGGER.log(Level.WARNING, "Got exception while getting the tag: ", e);
134b5840353SAdam Hornáček             }
135b5840353SAdam Hornáček             return "H[" + hash + "] num = " + numChildren + " tag = " + tagString;
136b5840353SAdam Hornáček         }
13787c193f7SVladimir Kotal 
getChildOffset()13887c193f7SVladimir Kotal         public int getChildOffset() {
13987c193f7SVladimir Kotal             return childOffset;
14087c193f7SVladimir Kotal         }
141b5840353SAdam Hornáček     }
142b5840353SAdam Hornáček 
EftarFileReader(String file)143b5840353SAdam Hornáček     public EftarFileReader(String file) throws FileNotFoundException {
144b5840353SAdam Hornáček         this(new File(file));
145b5840353SAdam Hornáček     }
146f197cd77SVladimir Kotal 
EftarFileReader(File file)147b5840353SAdam Hornáček     public EftarFileReader(File file) throws FileNotFoundException {
148b5840353SAdam Hornáček         f = new RandomAccessFile(file, "r");
149b5840353SAdam Hornáček         isOpen = true;
150b5840353SAdam Hornáček     }
151b5840353SAdam Hornáček 
getNode(String path)152b5840353SAdam Hornáček     public FNode getNode(String path) throws IOException {
153b5840353SAdam Hornáček         StringTokenizer toks = new StringTokenizer(path, "/");
154b5840353SAdam Hornáček         f.seek(0);
155b5840353SAdam Hornáček         FNode n = new FNode();
156b5840353SAdam Hornáček         if (File.separator.equals(path) || path.length() == 0) {
157b5840353SAdam Hornáček             return n;
158b5840353SAdam Hornáček         }
159b5840353SAdam Hornáček         FNode next = null;
160b5840353SAdam Hornáček         while (toks.hasMoreTokens() && ((next = n.get(EftarFile.myHash(toks.nextToken()))) != null)) {
161b5840353SAdam Hornáček             n = next;
162b5840353SAdam Hornáček         }
163b5840353SAdam Hornáček         if (!toks.hasMoreElements()) {
164b5840353SAdam Hornáček             return next;
165b5840353SAdam Hornáček         }
166b5840353SAdam Hornáček         return null;
167b5840353SAdam Hornáček     }
168b5840353SAdam Hornáček 
getChildTag(FNode fn, String name)1699805b761SAdam Hornáček     public String getChildTag(FNode fn, String name) throws IOException {
170b5840353SAdam Hornáček         if (fn != null && fn.childOffset != 0 && fn.numChildren != 0) {
171b5840353SAdam Hornáček             FNode ch = fn.binarySearch(fn.offset + fn.childOffset, fn.numChildren, EftarFile.myHash(name));
172b5840353SAdam Hornáček             if (ch != null) {
173b5840353SAdam Hornáček                 return ch.getTag();
174b5840353SAdam Hornáček             }
175b5840353SAdam Hornáček         }
176b5840353SAdam Hornáček         return null;
177b5840353SAdam Hornáček     }
178b5840353SAdam Hornáček 
179f197cd77SVladimir Kotal     /**
180ff44f24aSAdam Hornáček      * Get description for path.
181f197cd77SVladimir Kotal      * @param path path relative to source root
182f197cd77SVladimir Kotal      * @return path description string
18381b586e6SVladimir Kotal      * @throws IOException I/O
184f197cd77SVladimir Kotal      */
get(String path)185b5840353SAdam Hornáček     public String get(String path) throws IOException {
186b5840353SAdam Hornáček         StringTokenizer toks = new StringTokenizer(path, "/");
187b5840353SAdam Hornáček         f.seek(0);
188b5840353SAdam Hornáček         FNode n = new FNode();
189b5840353SAdam Hornáček         FNode next;
190b5840353SAdam Hornáček         long tagOffset = 0;
191b5840353SAdam Hornáček         int tagLength = 0;
192b5840353SAdam Hornáček         while (toks.hasMoreTokens()) {
193b5840353SAdam Hornáček             String tok = toks.nextToken();
194b5840353SAdam Hornáček             if (tok == null || tok.length() == 0) {
195b5840353SAdam Hornáček                 continue;
196b5840353SAdam Hornáček             }
197b5840353SAdam Hornáček             next = n.get(EftarFile.myHash(tok));
198b5840353SAdam Hornáček             if (next == null) {
199b5840353SAdam Hornáček                 break;
200b5840353SAdam Hornáček             }
201b5840353SAdam Hornáček             if (next.tagOffset != 0) {
202b5840353SAdam Hornáček                 tagOffset = next.offset + next.tagOffset;
203b5840353SAdam Hornáček                 if (next.childOffset == 0) {
204b5840353SAdam Hornáček                     tagLength = next.numChildren;
205b5840353SAdam Hornáček                 } else {
206b5840353SAdam Hornáček                     tagLength = next.childOffset - next.tagOffset;
207b5840353SAdam Hornáček                 }
208b5840353SAdam Hornáček             }
209b5840353SAdam Hornáček             n = next;
210b5840353SAdam Hornáček         }
211b5840353SAdam Hornáček         if (tagOffset != 0) {
212b5840353SAdam Hornáček             f.seek(tagOffset);
213b5840353SAdam Hornáček             byte[] desc = new byte[tagLength];
214b5840353SAdam Hornáček             int len = f.read(desc);
215b5840353SAdam Hornáček             if (len == -1) {
216b5840353SAdam Hornáček                 throw new EOFException();
217b5840353SAdam Hornáček             }
218b5840353SAdam Hornáček             return new String(desc, 0, len);
219b5840353SAdam Hornáček         }
220b5840353SAdam Hornáček         return "";
221b5840353SAdam Hornáček     }
222b5840353SAdam Hornáček 
223b5840353SAdam Hornáček     /**
224b5840353SAdam Hornáček      * Check, whether this instance has been already closed.
225b5840353SAdam Hornáček      * @return {@code true} if closed.
226b5840353SAdam Hornáček      */
isClosed()227b5840353SAdam Hornáček     public boolean isClosed() {
228b5840353SAdam Hornáček         return !isOpen;
229b5840353SAdam Hornáček     }
230b5840353SAdam Hornáček 
231c6f0939bSAdam Hornacek     @Override
close()232b5840353SAdam Hornáček     public void close() {
233b5840353SAdam Hornáček         if (isOpen) {
234b5840353SAdam Hornáček             IOUtils.close(f);
235b5840353SAdam Hornáček             isOpen = false;
236b5840353SAdam Hornáček         }
237b5840353SAdam Hornáček     }
238b5840353SAdam Hornáček }
239