1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. 22 */ 23 package org.opengrok.indexer.web; 24 25 import java.io.BufferedOutputStream; 26 import java.io.DataOutputStream; 27 import java.io.FileOutputStream; 28 import java.io.IOException; 29 import java.util.Map; 30 import java.util.Set; 31 import java.util.StringTokenizer; 32 import java.util.TreeMap; 33 34 /** 35 * An Extremely Fast Tagged Attribute Read-only File System. 36 * Created on October 12, 2005 37 * 38 * <i>Eftar</i> File has the following format 39 * <code> 40 * FILE --> Record ( Record | tagString ) * 41 * <br> 42 * Record --> 64bit:Hash 16bit:childrenOffset 16bit:(numberChildren|lenthOfTag) 16bit:tagOffset 43 * </code> 44 * 45 * It is a tree of tagged names, doing binary search in sorted list of children 46 * 47 * @author Chandan 48 */ 49 public class EftarFile { 50 51 public static final int RECORD_LENGTH = 14; 52 private long offset; 53 private Node root; 54 55 static class Node { 56 57 private final long hash; 58 private String tag; 59 private final Map<Long, Node> children; 60 private long tagOffset; 61 private long childOffset; 62 Node(long hash, String tag)63 Node(long hash, String tag) { 64 this.hash = hash; 65 this.tag = tag; 66 children = new TreeMap<>(); 67 } 68 put(long hash, String desc)69 public Node put(long hash, String desc) { 70 return children.computeIfAbsent(hash, newNode -> new Node(hash, desc)); 71 } 72 get(long hash)73 public Node get(long hash) { 74 return children.get(hash); 75 } 76 } 77 myHash(String name)78 public static long myHash(String name) { 79 if (name == null || name.length() == 0) { 80 return 0; 81 } 82 long hash = 2861; 83 int n = name.length(); 84 if (n > 100) { 85 n = 100; 86 } 87 for (int i = 0; i < n; i++) { 88 hash = (hash * 641L + name.charAt(i) * 2969 + hash << 6) % 9322397; 89 } 90 return hash; 91 } 92 write(Node n, DataOutputStream out)93 private void write(Node n, DataOutputStream out) throws IOException { 94 if (n.tag != null) { 95 out.write(n.tag.getBytes()); 96 offset += n.tag.length(); 97 } 98 for (Node childNode : n.children.values()) { 99 out.writeLong(childNode.hash); 100 if (childNode.children.size() > 0) { 101 out.writeShort((short) (childNode.childOffset - offset)); 102 out.writeShort((short) childNode.children.size()); 103 } else { 104 out.writeShort(0); 105 if (childNode.tag == null) { 106 out.writeShort((short) 0); 107 } else { 108 out.writeShort((short) childNode.tag.length()); 109 } 110 } 111 if (childNode.tag == null) { 112 out.writeShort(0); 113 } else { 114 out.writeShort((short) (childNode.tagOffset - offset)); 115 } 116 offset += RECORD_LENGTH; 117 } 118 for (Node childNode : n.children.values()) { 119 write(childNode, out); 120 } 121 } 122 traverse(Node n)123 private void traverse(Node n) { 124 if (n.tag == null) { 125 n.tagOffset = 0; 126 } else { 127 n.tagOffset = offset; 128 offset += n.tag.length(); 129 } 130 if (n.children.size() > 0) { 131 n.childOffset = offset; 132 offset += ((long) RECORD_LENGTH * n.children.size()); 133 } else { 134 n.childOffset = 0; 135 } 136 for (Node childnode : n.children.values()) { 137 traverse(childnode); 138 } 139 } 140 141 /** 142 * Reads the input into interim representation. Can be called multiple times. 143 * @param descriptions set of PathDescription 144 */ readInput(Set<PathDescription> descriptions)145 private void readInput(Set<PathDescription> descriptions) { 146 if (root == null) { 147 root = new Node(1, null); 148 } 149 150 for (PathDescription desc : descriptions) { 151 StringTokenizer toks = new StringTokenizer(desc.getPath(), "\\/"); 152 Node n = root; 153 while (toks.hasMoreTokens()) { 154 n = n.put(myHash(toks.nextToken()), null); 155 } 156 n.tag = desc.getDescription(); 157 } 158 } 159 write(String outPath)160 public void write(String outPath) throws IOException { 161 offset = RECORD_LENGTH; 162 traverse(root); 163 try (DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outPath)))) { 164 out.writeLong(0x5e33); 165 out.writeShort(RECORD_LENGTH); 166 out.writeShort(root.children.size()); 167 out.writeShort(0); 168 offset = RECORD_LENGTH; 169 write(root, out); 170 } 171 } 172 create(Set<PathDescription> descriptions, String outputPath)173 public void create(Set<PathDescription> descriptions, String outputPath) throws IOException { 174 readInput(descriptions); 175 write(outputPath); 176 } 177 } 178