xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/web/EftarFile.java (revision 7483fa4f6ea5b37f9dc2e3053394dfc04d7212e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22  */
23 package org.opengrok.indexer.web;
24 
25 import java.io.BufferedOutputStream;
26 import java.io.DataOutputStream;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.util.Map;
30 import java.util.Set;
31 import java.util.StringTokenizer;
32 import java.util.TreeMap;
33 
34 /**
35  * An Extremely Fast Tagged Attribute Read-only File System.
36  * Created on October 12, 2005
37  *
38  * <i>Eftar</i> File has the following format
39  * <code>
40  * FILE --&gt; Record  ( Record | tagString ) *
41  * <br>
42  * Record --&gt; 64bit:Hash 16bit:childrenOffset  16bit:(numberChildren|lenthOfTag) 16bit:tagOffset
43  * </code>
44  *
45  * It is a tree of tagged names, doing binary search in sorted list of children
46  *
47  * @author Chandan
48  */
49 public class EftarFile {
50 
51     public static final int RECORD_LENGTH = 14;
52     private long offset;
53     private Node root;
54 
55     static class Node {
56 
57         private final long hash;
58         private String tag;
59         private final Map<Long, Node> children;
60         private long tagOffset;
61         private long childOffset;
62 
Node(long hash, String tag)63         Node(long hash, String tag) {
64             this.hash = hash;
65             this.tag = tag;
66             children = new TreeMap<>();
67         }
68 
put(long hash, String desc)69         public Node put(long hash, String desc) {
70             return children.computeIfAbsent(hash, newNode -> new Node(hash, desc));
71         }
72 
get(long hash)73         public Node get(long hash) {
74             return children.get(hash);
75         }
76     }
77 
myHash(String name)78     public static long myHash(String name) {
79         if (name == null || name.length() == 0) {
80             return 0;
81         }
82         long hash = 2861;
83         int n = name.length();
84         if (n > 100) {
85             n = 100;
86         }
87         for (int i = 0; i < n; i++) {
88             hash = (hash * 641L + name.charAt(i) * 2969 + hash << 6) % 9322397;
89         }
90         return hash;
91     }
92 
write(Node n, DataOutputStream out)93     private void write(Node n, DataOutputStream out) throws IOException {
94         if (n.tag != null) {
95             out.write(n.tag.getBytes());
96             offset += n.tag.length();
97         }
98         for (Node childNode : n.children.values()) {
99             out.writeLong(childNode.hash);
100             if (childNode.children.size() > 0) {
101                 out.writeShort((short) (childNode.childOffset - offset));
102                 out.writeShort((short) childNode.children.size());
103             } else {
104                 out.writeShort(0);
105                 if (childNode.tag == null) {
106                     out.writeShort((short) 0);
107                 } else {
108                     out.writeShort((short) childNode.tag.length());
109                 }
110             }
111             if (childNode.tag == null) {
112                 out.writeShort(0);
113             } else {
114                 out.writeShort((short) (childNode.tagOffset - offset));
115             }
116             offset += RECORD_LENGTH;
117         }
118         for (Node childNode : n.children.values()) {
119             write(childNode, out);
120         }
121     }
122 
traverse(Node n)123     private void traverse(Node n) {
124         if (n.tag == null) {
125             n.tagOffset = 0;
126         } else {
127             n.tagOffset = offset;
128             offset += n.tag.length();
129         }
130         if (n.children.size() > 0) {
131             n.childOffset = offset;
132             offset += ((long) RECORD_LENGTH * n.children.size());
133         } else {
134             n.childOffset = 0;
135         }
136         for (Node childnode : n.children.values()) {
137             traverse(childnode);
138         }
139     }
140 
141     /**
142      * Reads the input into interim representation. Can be called multiple times.
143      * @param descriptions set of PathDescription
144      */
readInput(Set<PathDescription> descriptions)145     private void readInput(Set<PathDescription> descriptions) {
146         if (root == null) {
147             root = new Node(1, null);
148         }
149 
150         for (PathDescription desc : descriptions) {
151             StringTokenizer toks = new StringTokenizer(desc.getPath(), "\\/");
152             Node n = root;
153             while (toks.hasMoreTokens()) {
154                 n = n.put(myHash(toks.nextToken()), null);
155             }
156             n.tag = desc.getDescription();
157         }
158     }
159 
write(String outPath)160     public void write(String outPath) throws IOException {
161         offset = RECORD_LENGTH;
162         traverse(root);
163         try (DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outPath)))) {
164             out.writeLong(0x5e33);
165             out.writeShort(RECORD_LENGTH);
166             out.writeShort(root.children.size());
167             out.writeShort(0);
168             offset = RECORD_LENGTH;
169             write(root, out);
170         }
171     }
172 
create(Set<PathDescription> descriptions, String outputPath)173     public void create(Set<PathDescription> descriptions, String outputPath) throws IOException {
174         readInput(descriptions);
175         write(outputPath);
176     }
177 }
178