1b5840353SAdam Hornáček /* 2b5840353SAdam Hornáček * CDDL HEADER START 3b5840353SAdam Hornáček * 4b5840353SAdam Hornáček * The contents of this file are subject to the terms of the 5b5840353SAdam Hornáček * Common Development and Distribution License (the "License"). 6b5840353SAdam Hornáček * You may not use this file except in compliance with the License. 7b5840353SAdam Hornáček * 8b5840353SAdam Hornáček * See LICENSE.txt included in this distribution for the specific 9b5840353SAdam Hornáček * language governing permissions and limitations under the License. 10b5840353SAdam Hornáček * 11b5840353SAdam Hornáček * When distributing Covered Code, include this CDDL HEADER in each 12b5840353SAdam Hornáček * file and include the License file at LICENSE.txt. 13b5840353SAdam Hornáček * If applicable, add the following below this CDDL HEADER, with the 14b5840353SAdam Hornáček * fields enclosed by brackets "[]" replaced with your own identifying 15b5840353SAdam Hornáček * information: Portions Copyright [yyyy] [name of copyright owner] 16b5840353SAdam Hornáček * 17b5840353SAdam Hornáček * CDDL HEADER END 18b5840353SAdam Hornáček */ 19b5840353SAdam Hornáček 20b5840353SAdam Hornáček /* 21a3065a28SAdam Hornacek * Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. 225d9f3aa0SAdam Hornáček * Copyright (c) 2011, Trond Norbye. 23750b3115SChris Fraire * Portions Copyright (c) 2017, 2021, Chris Fraire <cfraire@me.com>. 24b5840353SAdam Hornáček */ 259805b761SAdam Hornáček package org.opengrok.indexer.util; 26b5840353SAdam Hornáček 27b5840353SAdam Hornáček import java.io.BufferedInputStream; 28b5840353SAdam Hornáček import java.io.Closeable; 29b5840353SAdam Hornáček import java.io.File; 30b5840353SAdam Hornáček import java.io.IOException; 31b5840353SAdam Hornáček import java.io.InputStream; 32b5840353SAdam Hornáček import java.io.InputStreamReader; 33b5840353SAdam Hornáček import java.io.Reader; 34b5840353SAdam Hornáček import java.nio.charset.Charset; 35b5840353SAdam Hornáček import java.nio.charset.StandardCharsets; 36b5840353SAdam Hornáček import java.nio.file.FileVisitResult; 37b5840353SAdam Hornáček import java.nio.file.Files; 38b5840353SAdam Hornáček import java.nio.file.Path; 39b5840353SAdam Hornáček import java.nio.file.SimpleFileVisitor; 40b5840353SAdam Hornáček import java.nio.file.attribute.BasicFileAttributes; 41b5840353SAdam Hornáček import java.util.ArrayList; 42b5840353SAdam Hornáček import java.util.Arrays; 43b5840353SAdam Hornáček import java.util.List; 44b5840353SAdam Hornáček import java.util.Map; 45b5840353SAdam Hornáček import java.util.logging.Level; 46b5840353SAdam Hornáček import java.util.logging.Logger; 479805b761SAdam Hornáček import org.opengrok.indexer.logger.LoggerFactory; 48b5840353SAdam Hornáček 49b5840353SAdam Hornáček /** 50b5840353SAdam Hornáček * A small utility class to provide common functionality related to 51b5840353SAdam Hornáček * IO so that we don't need to duplicate the logic all over the place. 52b5840353SAdam Hornáček * 53b5840353SAdam Hornáček * @author Trond Norbye <trond.norbye@gmail.com> 54b5840353SAdam Hornáček */ 55b5840353SAdam Hornáček public final class IOUtils { 56b5840353SAdam Hornáček 57b5840353SAdam Hornáček private static final Logger LOGGER = LoggerFactory.getLogger(IOUtils.class); 58b5840353SAdam Hornáček 59750b3115SChris Fraire // private to enforce static IOUtils()60b5840353SAdam Hornáček private IOUtils() { 61b5840353SAdam Hornáček } 62b5840353SAdam Hornáček 63b5840353SAdam Hornáček /** 64b5840353SAdam Hornáček * If {@code c} is not null, tries to {@code close}, catching and logging 65b5840353SAdam Hornáček * any {@link IOException}. 66b5840353SAdam Hornáček * @param c null or a defined instance 67b5840353SAdam Hornáček */ close(Closeable c)68b5840353SAdam Hornáček public static void close(Closeable c) { 69b5840353SAdam Hornáček if (c != null) { 70b5840353SAdam Hornáček try { 71b5840353SAdam Hornáček c.close(); 72b5840353SAdam Hornáček } catch (IOException e) { 73750b3115SChris Fraire LOGGER.log(Level.WARNING, "Failed to close resource", e); 74b5840353SAdam Hornáček } 75b5840353SAdam Hornáček } 76b5840353SAdam Hornáček } 77b5840353SAdam Hornáček 78b5840353SAdam Hornáček /** 79b5840353SAdam Hornáček * Delete directory recursively. This method does not follow symlinks. 80b5840353SAdam Hornáček * @param path directory to delete 81b5840353SAdam Hornáček * @throws IOException if any read error 82b5840353SAdam Hornáček */ removeRecursive(Path path)83ff44f24aSAdam Hornáček public static void removeRecursive(Path path) throws IOException { 84c6f0939bSAdam Hornacek Files.walkFileTree(path, new SimpleFileVisitor<>() { 85b5840353SAdam Hornáček @Override 86b5840353SAdam Hornáček public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) 87b5840353SAdam Hornáček throws IOException { 88b5840353SAdam Hornáček Files.delete(file); 89b5840353SAdam Hornáček return FileVisitResult.CONTINUE; 90b5840353SAdam Hornáček } 91b5840353SAdam Hornáček 92b5840353SAdam Hornáček @Override 93b5840353SAdam Hornáček public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { 94b5840353SAdam Hornáček // Try to delete the file anyway. 95b5840353SAdam Hornáček Files.delete(file); 96b5840353SAdam Hornáček return FileVisitResult.CONTINUE; 97b5840353SAdam Hornáček } 98b5840353SAdam Hornáček 99b5840353SAdam Hornáček @Override 100b5840353SAdam Hornáček public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { 101b5840353SAdam Hornáček if (exc == null) { 102b5840353SAdam Hornáček Files.delete(dir); 103b5840353SAdam Hornáček return FileVisitResult.CONTINUE; 104b5840353SAdam Hornáček } else { 105b5840353SAdam Hornáček // Directory traversal failed. 106b5840353SAdam Hornáček throw exc; 107b5840353SAdam Hornáček } 108b5840353SAdam Hornáček } 109b5840353SAdam Hornáček }); 110b5840353SAdam Hornáček } 111b5840353SAdam Hornáček 112b5840353SAdam Hornáček /** 113b5840353SAdam Hornáček * List files in the directory recursively when looking for files only 114b5840353SAdam Hornáček * ending with suffix. 115b5840353SAdam Hornáček * 116b5840353SAdam Hornáček * @param root starting directory 117b5840353SAdam Hornáček * @param suffix suffix for the files 118b5840353SAdam Hornáček * @return recursively traversed list of files with given suffix 119b5840353SAdam Hornáček */ listFilesRec(File root, String suffix)120b5840353SAdam Hornáček public static List<File> listFilesRec(File root, String suffix) { 121b5840353SAdam Hornáček List<File> results = new ArrayList<>(); 122b5840353SAdam Hornáček List<File> files = listFiles(root); 123b5840353SAdam Hornáček for (File f : files) { 124b5840353SAdam Hornáček if (f.isDirectory() && f.canRead() && !f.getName().equals(".") && !f.getName().equals("..")) { 125b5840353SAdam Hornáček results.addAll(listFilesRec(f, suffix)); 126b5840353SAdam Hornáček } else if (suffix != null && !suffix.isEmpty() && f.getName().endsWith(suffix)) { 127b5840353SAdam Hornáček results.add(f); 128b5840353SAdam Hornáček } else if (suffix == null || suffix.isEmpty()) { 129b5840353SAdam Hornáček results.add(f); 130b5840353SAdam Hornáček } 131b5840353SAdam Hornáček } 132b5840353SAdam Hornáček return results; 133b5840353SAdam Hornáček } 134b5840353SAdam Hornáček 135b5840353SAdam Hornáček /** 136b5840353SAdam Hornáček * List files in the directory. 137b5840353SAdam Hornáček * 138b5840353SAdam Hornáček * @param root starting directory 139b5840353SAdam Hornáček * @return list of file with suffix 140b5840353SAdam Hornáček */ listFiles(File root)141b5840353SAdam Hornáček public static List<File> listFiles(File root) { 142b5840353SAdam Hornáček return listFiles(root, null); 143b5840353SAdam Hornáček } 144b5840353SAdam Hornáček 145b5840353SAdam Hornáček /** 146b5840353SAdam Hornáček * List files in the directory when looking for files only ending with 147b5840353SAdam Hornáček * suffix. 148b5840353SAdam Hornáček * 149b5840353SAdam Hornáček * @param root starting directory 150b5840353SAdam Hornáček * @param suffix suffix for the files 151b5840353SAdam Hornáček * @return list of file with suffix 152b5840353SAdam Hornáček */ listFiles(File root, String suffix)153b5840353SAdam Hornáček public static List<File> listFiles(File root, String suffix) { 1541161d3e8SAdam Hornacek File[] files = root.listFiles((dir, name) -> { 155b5840353SAdam Hornáček if (suffix != null && !suffix.isEmpty()) { 1564b613dedSAdam Hornacek return name.endsWith(suffix); 157b5840353SAdam Hornáček } else { 158b5840353SAdam Hornáček return true; 159b5840353SAdam Hornáček } 160b5840353SAdam Hornáček }); 161b5840353SAdam Hornáček if (files == null) { 162b5840353SAdam Hornáček return new ArrayList<>(); 163b5840353SAdam Hornáček } 164b5840353SAdam Hornáček return Arrays.asList(files); 165b5840353SAdam Hornáček } 166b5840353SAdam Hornáček 167b5840353SAdam Hornáček /** 168b5840353SAdam Hornáček * Create BOM stripped reader from the stream. 169b5840353SAdam Hornáček * Charset of the reader is set to UTF-8, UTF-16 or system's default. 170b5840353SAdam Hornáček * @param stream input stream 171b5840353SAdam Hornáček * @return reader for the stream without BOM 172b5840353SAdam Hornáček * @throws IOException if I/O exception occurred 173b5840353SAdam Hornáček */ createBOMStrippedReader(InputStream stream)174b5840353SAdam Hornáček public static Reader createBOMStrippedReader(InputStream stream) throws IOException { 175b5840353SAdam Hornáček return createBOMStrippedReader(stream, Charset.defaultCharset().name()); 176b5840353SAdam Hornáček } 177b5840353SAdam Hornáček 178b5840353SAdam Hornáček /** 179b5840353SAdam Hornáček * Create BOM stripped reader from the stream. 180b5840353SAdam Hornáček * Charset of the reader is set to UTF-8, UTF-16 or default. 181b5840353SAdam Hornáček * @param stream input stream 182b5840353SAdam Hornáček * @param defaultCharset default charset 183b5840353SAdam Hornáček * @return reader for the stream without BOM 184b5840353SAdam Hornáček * @throws IOException if I/O exception occurred 185b5840353SAdam Hornáček */ createBOMStrippedReader(InputStream stream, String defaultCharset)186b5840353SAdam Hornáček public static Reader createBOMStrippedReader(InputStream stream, String defaultCharset) throws IOException { 187b5840353SAdam Hornáček InputStream in = stream.markSupported() ? 188b5840353SAdam Hornáček stream : new BufferedInputStream(stream); 189b5840353SAdam Hornáček 190b5840353SAdam Hornáček String charset = null; 191b5840353SAdam Hornáček 192b5840353SAdam Hornáček in.mark(3); 193b5840353SAdam Hornáček 194b5840353SAdam Hornáček byte[] head = new byte[3]; 195b5840353SAdam Hornáček int br = in.read(head, 0, 3); 196b5840353SAdam Hornáček 197b5840353SAdam Hornáček if (br >= 2 198b5840353SAdam Hornáček && (head[0] == (byte) 0xFE && head[1] == (byte) 0xFF) 199b5840353SAdam Hornáček || (head[0] == (byte) 0xFF && head[1] == (byte) 0xFE)) { 200750b3115SChris Fraire charset = StandardCharsets.UTF_16.name(); 201b5840353SAdam Hornáček in.reset(); 202b5840353SAdam Hornáček } else if (br >= 3 && head[0] == (byte) 0xEF && head[1] == (byte) 0xBB 203b5840353SAdam Hornáček && head[2] == (byte) 0xBF) { 204b5840353SAdam Hornáček // InputStreamReader does not properly discard BOM on UTF8 streams, 205b5840353SAdam Hornáček // so don't reset the stream. 206b5840353SAdam Hornáček charset = StandardCharsets.UTF_8.name(); 207b5840353SAdam Hornáček } 208b5840353SAdam Hornáček 209b5840353SAdam Hornáček if (charset == null) { 210b5840353SAdam Hornáček in.reset(); 211b5840353SAdam Hornáček charset = defaultCharset; 212b5840353SAdam Hornáček } 213b5840353SAdam Hornáček 214b5840353SAdam Hornáček return new InputStreamReader(in, charset); 215b5840353SAdam Hornáček } 216b5840353SAdam Hornáček 217b5840353SAdam Hornáček /** 218b5840353SAdam Hornáček * Byte-order markers. 219b5840353SAdam Hornáček */ 220a3065a28SAdam Hornacek private static final Map<String, byte[]> BOMS = Map.of( 221*d6df19e1SAdam Hornacek StandardCharsets.UTF_8.name(), utf8Bom(), 222*d6df19e1SAdam Hornacek StandardCharsets.UTF_16BE.name(), utf16BeBom(), 223*d6df19e1SAdam Hornacek StandardCharsets.UTF_16LE.name(), utf16LeBom() 224a3065a28SAdam Hornacek ); 225b5840353SAdam Hornáček 226b5840353SAdam Hornáček /** 227750b3115SChris Fraire * Gets a new array containing the UTF-8 BOM. 228750b3115SChris Fraire */ utf8Bom()229*d6df19e1SAdam Hornacek public static byte[] utf8Bom() { 230750b3115SChris Fraire return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; 231750b3115SChris Fraire } 232750b3115SChris Fraire 233750b3115SChris Fraire /** 234750b3115SChris Fraire * Gets a new array containing the UTF-16BE BOM (Big-Endian). 235750b3115SChris Fraire */ utf16BeBom()236*d6df19e1SAdam Hornacek public static byte[] utf16BeBom() { 237750b3115SChris Fraire return new byte[]{(byte) 0xFE, (byte) 0xFF}; 238750b3115SChris Fraire } 239750b3115SChris Fraire 240750b3115SChris Fraire /** 241750b3115SChris Fraire * Gets a new array containing the UTF-16LE BOM (Little-Endian). 242750b3115SChris Fraire */ utf16LeBom()243*d6df19e1SAdam Hornacek public static byte[] utf16LeBom() { 244750b3115SChris Fraire return new byte[]{(byte) 0xFF, (byte) 0xFE}; 245750b3115SChris Fraire } 246750b3115SChris Fraire 247750b3115SChris Fraire /** 248b5840353SAdam Hornáček * Gets a value indicating a UTF encoding if the array starts with a 249b5840353SAdam Hornáček * known byte sequence. 250b5840353SAdam Hornáček * 251b5840353SAdam Hornáček * @param sig a sequence of bytes to inspect for a BOM 252b5840353SAdam Hornáček * @return null if no BOM was identified; otherwise a defined charset name 253b5840353SAdam Hornáček */ findBOMEncoding(byte[] sig)254b5840353SAdam Hornáček public static String findBOMEncoding(byte[] sig) { 255b5840353SAdam Hornáček for (Map.Entry<String, byte[]> entry : BOMS.entrySet()) { 256b5840353SAdam Hornáček String encoding = entry.getKey(); 257b5840353SAdam Hornáček byte[] bom = entry.getValue(); 258b5840353SAdam Hornáček if (sig.length > bom.length) { 259b5840353SAdam Hornáček int i = 0; 260b5840353SAdam Hornáček while (i < bom.length && sig[i] == bom[i]) { 261b5840353SAdam Hornáček i++; 262b5840353SAdam Hornáček } 263a72324b1SAdam Hornáček if (i == bom.length) { 264a72324b1SAdam Hornáček return encoding; 265a72324b1SAdam Hornáček } 266b5840353SAdam Hornáček } 267b5840353SAdam Hornáček } 268b5840353SAdam Hornáček return null; 269b5840353SAdam Hornáček } 270b5840353SAdam Hornáček 271b5840353SAdam Hornáček /** 272b5840353SAdam Hornáček * Gets a value indicating the number of UTF BOM bytes at the start of an 273b5840353SAdam Hornáček * array. 274b5840353SAdam Hornáček * 275b5840353SAdam Hornáček * @param sig a sequence of bytes to inspect for a BOM 276b5840353SAdam Hornáček * @return 0 if the array doesn't start with a BOM; otherwise the number of 277b5840353SAdam Hornáček * BOM bytes 278b5840353SAdam Hornáček */ skipForBOM(byte[] sig)279b5840353SAdam Hornáček public static int skipForBOM(byte[] sig) { 280b5840353SAdam Hornáček String encoding = findBOMEncoding(sig); 281b5840353SAdam Hornáček if (encoding != null) { 282b5840353SAdam Hornáček byte[] bom = BOMS.get(encoding); 283b5840353SAdam Hornáček return bom.length; 284b5840353SAdam Hornáček } 285b5840353SAdam Hornáček return 0; 286b5840353SAdam Hornáček } 2872ffbb0cfSVladimir Kotal 2882ffbb0cfSVladimir Kotal /** 2892ffbb0cfSVladimir Kotal * Get the contents of a file or empty string if the file cannot be read. 29081b586e6SVladimir Kotal * @param file file object 29181b586e6SVladimir Kotal * @return string with the file contents 2922ffbb0cfSVladimir Kotal */ getFileContent(File file)2932ffbb0cfSVladimir Kotal public static String getFileContent(File file) { 2942ffbb0cfSVladimir Kotal if (file == null || !file.canRead()) { 2952ffbb0cfSVladimir Kotal return ""; 2962ffbb0cfSVladimir Kotal } 2972ffbb0cfSVladimir Kotal try { 298a3065a28SAdam Hornacek return Files.readString(file.toPath(), Charset.defaultCharset()); 299a3065a28SAdam Hornacek } catch (IOException e) { 300a3065a28SAdam Hornacek LOGGER.log(Level.WARNING, "failed to read file: {0}", e.getMessage()); 3012ffbb0cfSVladimir Kotal } 3022ffbb0cfSVladimir Kotal return ""; 3032ffbb0cfSVladimir Kotal } 304b5840353SAdam Hornáček } 305