1b5840353SAdam Hornáček /* 2b5840353SAdam Hornáček * CDDL HEADER START 3b5840353SAdam Hornáček * 4b5840353SAdam Hornáček * The contents of this file are subject to the terms of the 5b5840353SAdam Hornáček * Common Development and Distribution License (the "License"). 6b5840353SAdam Hornáček * You may not use this file except in compliance with the License. 7b5840353SAdam Hornáček * 8b5840353SAdam Hornáček * See LICENSE.txt included in this distribution for the specific 9b5840353SAdam Hornáček * language governing permissions and limitations under the License. 10b5840353SAdam Hornáček * 11b5840353SAdam Hornáček * When distributing Covered Code, include this CDDL HEADER in each 12b5840353SAdam Hornáček * file and include the License file at LICENSE.txt. 13b5840353SAdam Hornáček * If applicable, add the following below this CDDL HEADER, with the 14b5840353SAdam Hornáček * fields enclosed by brackets "[]" replaced with your own identifying 15b5840353SAdam Hornáček * information: Portions Copyright [yyyy] [name of copyright owner] 16b5840353SAdam Hornáček * 17b5840353SAdam Hornáček * CDDL HEADER END 18b5840353SAdam Hornáček */ 19b5840353SAdam Hornáček 20b5840353SAdam Hornáček /* 21c6f0939bSAdam Hornacek * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. 22b5840353SAdam Hornáček */ 239805b761SAdam Hornáček package org.opengrok.indexer.web; 24b5840353SAdam Hornáček 25b5840353SAdam Hornáček import java.io.BufferedOutputStream; 26b5840353SAdam Hornáček import java.io.DataOutputStream; 27b5840353SAdam Hornáček import java.io.FileOutputStream; 28b5840353SAdam Hornáček import java.io.IOException; 29b5840353SAdam Hornáček import java.util.Map; 30a11b766cSVladimir Kotal import java.util.Set; 31b5840353SAdam Hornáček import java.util.StringTokenizer; 32b5840353SAdam Hornáček import java.util.TreeMap; 33b5840353SAdam Hornáček 34b5840353SAdam Hornáček /** 35ff44f24aSAdam Hornáček * An Extremely Fast Tagged Attribute Read-only File System. 36b5840353SAdam Hornáček * Created on October 12, 2005 37b5840353SAdam Hornáček * 38*7483fa4fSVladimir Kotal * <i>Eftar</i> File has the following format 39*7483fa4fSVladimir Kotal * <code> 40b5840353SAdam Hornáček * FILE --> Record ( Record | tagString ) * 41*7483fa4fSVladimir Kotal * <br> 42b5840353SAdam Hornáček * Record --> 64bit:Hash 16bit:childrenOffset 16bit:(numberChildren|lenthOfTag) 16bit:tagOffset 43*7483fa4fSVladimir Kotal * </code> 44b5840353SAdam Hornáček * 45*7483fa4fSVladimir Kotal * It is a tree of tagged names, doing binary search in sorted list of children 46b5840353SAdam Hornáček * 47b5840353SAdam Hornáček * @author Chandan 48b5840353SAdam Hornáček */ 49b5840353SAdam Hornáček public class EftarFile { 50b5840353SAdam Hornáček 51b5840353SAdam Hornáček public static final int RECORD_LENGTH = 14; 52b5840353SAdam Hornáček private long offset; 53a11b766cSVladimir Kotal private Node root; 54b5840353SAdam Hornáček 55c6f0939bSAdam Hornacek static class Node { 56b5840353SAdam Hornáček 57*7483fa4fSVladimir Kotal private final long hash; 58*7483fa4fSVladimir Kotal private String tag; 59*7483fa4fSVladimir Kotal private final Map<Long, Node> children; 60*7483fa4fSVladimir Kotal private long tagOffset; 61*7483fa4fSVladimir Kotal private long childOffset; 62b5840353SAdam Hornáček Node(long hash, String tag)63d1e826faSAdam Hornáček Node(long hash, String tag) { 64b5840353SAdam Hornáček this.hash = hash; 65b5840353SAdam Hornáček this.tag = tag; 66b5840353SAdam Hornáček children = new TreeMap<>(); 67b5840353SAdam Hornáček } 68b5840353SAdam Hornáček put(long hash, String desc)69b5840353SAdam Hornáček public Node put(long hash, String desc) { 70*7483fa4fSVladimir Kotal return children.computeIfAbsent(hash, newNode -> new Node(hash, desc)); 71b5840353SAdam Hornáček } 72b5840353SAdam Hornáček get(long hash)73b5840353SAdam Hornáček public Node get(long hash) { 74b5840353SAdam Hornáček return children.get(hash); 75b5840353SAdam Hornáček } 76b5840353SAdam Hornáček } 77b5840353SAdam Hornáček myHash(String name)78b5840353SAdam Hornáček public static long myHash(String name) { 79b5840353SAdam Hornáček if (name == null || name.length() == 0) { 80b5840353SAdam Hornáček return 0; 81b5840353SAdam Hornáček } 82b5840353SAdam Hornáček long hash = 2861; 83b5840353SAdam Hornáček int n = name.length(); 84b5840353SAdam Hornáček if (n > 100) { 85b5840353SAdam Hornáček n = 100; 86b5840353SAdam Hornáček } 87b5840353SAdam Hornáček for (int i = 0; i < n; i++) { 88edf8e58eSVladimir Kotal hash = (hash * 641L + name.charAt(i) * 2969 + hash << 6) % 9322397; 89b5840353SAdam Hornáček } 90b5840353SAdam Hornáček return hash; 91b5840353SAdam Hornáček } 92b5840353SAdam Hornáček write(Node n, DataOutputStream out)93b5840353SAdam Hornáček private void write(Node n, DataOutputStream out) throws IOException { 94b5840353SAdam Hornáček if (n.tag != null) { 95b5840353SAdam Hornáček out.write(n.tag.getBytes()); 96b5840353SAdam Hornáček offset += n.tag.length(); 97b5840353SAdam Hornáček } 98edf8e58eSVladimir Kotal for (Node childNode : n.children.values()) { 99edf8e58eSVladimir Kotal out.writeLong(childNode.hash); 100edf8e58eSVladimir Kotal if (childNode.children.size() > 0) { 101edf8e58eSVladimir Kotal out.writeShort((short) (childNode.childOffset - offset)); 102edf8e58eSVladimir Kotal out.writeShort((short) childNode.children.size()); 103b5840353SAdam Hornáček } else { 104b5840353SAdam Hornáček out.writeShort(0); 105edf8e58eSVladimir Kotal if (childNode.tag == null) { 106b5840353SAdam Hornáček out.writeShort((short) 0); 107b5840353SAdam Hornáček } else { 108edf8e58eSVladimir Kotal out.writeShort((short) childNode.tag.length()); 109b5840353SAdam Hornáček } 110b5840353SAdam Hornáček } 111edf8e58eSVladimir Kotal if (childNode.tag == null) { 112b5840353SAdam Hornáček out.writeShort(0); 113b5840353SAdam Hornáček } else { 114edf8e58eSVladimir Kotal out.writeShort((short) (childNode.tagOffset - offset)); 115b5840353SAdam Hornáček } 116b5840353SAdam Hornáček offset += RECORD_LENGTH; 117b5840353SAdam Hornáček } 118edf8e58eSVladimir Kotal for (Node childNode : n.children.values()) { 119edf8e58eSVladimir Kotal write(childNode, out); 120b5840353SAdam Hornáček } 121b5840353SAdam Hornáček } 122b5840353SAdam Hornáček traverse(Node n)123b5840353SAdam Hornáček private void traverse(Node n) { 124b5840353SAdam Hornáček if (n.tag == null) { 125b5840353SAdam Hornáček n.tagOffset = 0; 126b5840353SAdam Hornáček } else { 127b5840353SAdam Hornáček n.tagOffset = offset; 128b5840353SAdam Hornáček offset += n.tag.length(); 129b5840353SAdam Hornáček } 130b5840353SAdam Hornáček if (n.children.size() > 0) { 131b5840353SAdam Hornáček n.childOffset = offset; 132edf8e58eSVladimir Kotal offset += ((long) RECORD_LENGTH * n.children.size()); 133b5840353SAdam Hornáček } else { 134b5840353SAdam Hornáček n.childOffset = 0; 135b5840353SAdam Hornáček } 136b5840353SAdam Hornáček for (Node childnode : n.children.values()) { 137b5840353SAdam Hornáček traverse(childnode); 138b5840353SAdam Hornáček } 139b5840353SAdam Hornáček } 140f197cd77SVladimir Kotal 141f197cd77SVladimir Kotal /** 142f197cd77SVladimir Kotal * Reads the input into interim representation. Can be called multiple times. 143a11b766cSVladimir Kotal * @param descriptions set of PathDescription 144f197cd77SVladimir Kotal */ readInput(Set<PathDescription> descriptions)145edf8e58eSVladimir Kotal private void readInput(Set<PathDescription> descriptions) { 146b5840353SAdam Hornáček if (root == null) { 147b5840353SAdam Hornáček root = new Node(1, null); 148b5840353SAdam Hornáček } 149a11b766cSVladimir Kotal 150a11b766cSVladimir Kotal for (PathDescription desc : descriptions) { 151a11b766cSVladimir Kotal StringTokenizer toks = new StringTokenizer(desc.getPath(), "\\/"); 152b5840353SAdam Hornáček Node n = root; 153b5840353SAdam Hornáček while (toks.hasMoreTokens()) { 154b5840353SAdam Hornáček n = n.put(myHash(toks.nextToken()), null); 155b5840353SAdam Hornáček } 156a11b766cSVladimir Kotal n.tag = desc.getDescription(); 157b5840353SAdam Hornáček } 158b5840353SAdam Hornáček } 159b5840353SAdam Hornáček write(String outPath)160a11b766cSVladimir Kotal public void write(String outPath) throws IOException { 161b5840353SAdam Hornáček offset = RECORD_LENGTH; 162b5840353SAdam Hornáček traverse(root); 163edf8e58eSVladimir Kotal try (DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outPath)))) { 164b5840353SAdam Hornáček out.writeLong(0x5e33); 165b5840353SAdam Hornáček out.writeShort(RECORD_LENGTH); 166b5840353SAdam Hornáček out.writeShort(root.children.size()); 167b5840353SAdam Hornáček out.writeShort(0); 168b5840353SAdam Hornáček offset = RECORD_LENGTH; 169b5840353SAdam Hornáček write(root, out); 170b5840353SAdam Hornáček } 171b5840353SAdam Hornáček } 172b5840353SAdam Hornáček create(Set<PathDescription> descriptions, String outputPath)173a11b766cSVladimir Kotal public void create(Set<PathDescription> descriptions, String outputPath) throws IOException { 174a11b766cSVladimir Kotal readInput(descriptions); 175f197cd77SVladimir Kotal write(outputPath); 176b5840353SAdam Hornáček } 177b5840353SAdam Hornáček } 178