1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Copyright (c) 2011, Trond Norbye. 23 * Portions Copyright (c) 2017, 2021, Chris Fraire <cfraire@me.com>. 24 */ 25 package org.opengrok.indexer.util; 26 27 import java.io.BufferedInputStream; 28 import java.io.Closeable; 29 import java.io.File; 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.io.InputStreamReader; 33 import java.io.Reader; 34 import java.nio.charset.Charset; 35 import java.nio.charset.StandardCharsets; 36 import java.nio.file.FileVisitResult; 37 import java.nio.file.Files; 38 import java.nio.file.Path; 39 import java.nio.file.SimpleFileVisitor; 40 import java.nio.file.attribute.BasicFileAttributes; 41 import java.util.ArrayList; 42 import java.util.Arrays; 43 import java.util.List; 44 import java.util.Map; 45 import java.util.logging.Level; 46 import java.util.logging.Logger; 47 import org.opengrok.indexer.logger.LoggerFactory; 48 49 /** 50 * A small utility class to provide common functionality related to 51 * IO so that we don't need to duplicate the logic all over the place. 52 * 53 * @author Trond Norbye <trond.norbye@gmail.com> 54 */ 55 public final class IOUtils { 56 57 private static final Logger LOGGER = LoggerFactory.getLogger(IOUtils.class); 58 59 // private to enforce static IOUtils()60 private IOUtils() { 61 } 62 63 /** 64 * If {@code c} is not null, tries to {@code close}, catching and logging 65 * any {@link IOException}. 66 * @param c null or a defined instance 67 */ close(Closeable c)68 public static void close(Closeable c) { 69 if (c != null) { 70 try { 71 c.close(); 72 } catch (IOException e) { 73 LOGGER.log(Level.WARNING, "Failed to close resource", e); 74 } 75 } 76 } 77 78 /** 79 * Delete directory recursively. This method does not follow symlinks. 80 * @param path directory to delete 81 * @throws IOException if any read error 82 */ removeRecursive(Path path)83 public static void removeRecursive(Path path) throws IOException { 84 Files.walkFileTree(path, new SimpleFileVisitor<>() { 85 @Override 86 public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) 87 throws IOException { 88 Files.delete(file); 89 return FileVisitResult.CONTINUE; 90 } 91 92 @Override 93 public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { 94 // Try to delete the file anyway. 95 Files.delete(file); 96 return FileVisitResult.CONTINUE; 97 } 98 99 @Override 100 public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { 101 if (exc == null) { 102 Files.delete(dir); 103 return FileVisitResult.CONTINUE; 104 } else { 105 // Directory traversal failed. 106 throw exc; 107 } 108 } 109 }); 110 } 111 112 /** 113 * List files in the directory recursively when looking for files only 114 * ending with suffix. 115 * 116 * @param root starting directory 117 * @param suffix suffix for the files 118 * @return recursively traversed list of files with given suffix 119 */ listFilesRec(File root, String suffix)120 public static List<File> listFilesRec(File root, String suffix) { 121 List<File> results = new ArrayList<>(); 122 List<File> files = listFiles(root); 123 for (File f : files) { 124 if (f.isDirectory() && f.canRead() && !f.getName().equals(".") && !f.getName().equals("..")) { 125 results.addAll(listFilesRec(f, suffix)); 126 } else if (suffix != null && !suffix.isEmpty() && f.getName().endsWith(suffix)) { 127 results.add(f); 128 } else if (suffix == null || suffix.isEmpty()) { 129 results.add(f); 130 } 131 } 132 return results; 133 } 134 135 /** 136 * List files in the directory. 137 * 138 * @param root starting directory 139 * @return list of file with suffix 140 */ listFiles(File root)141 public static List<File> listFiles(File root) { 142 return listFiles(root, null); 143 } 144 145 /** 146 * List files in the directory when looking for files only ending with 147 * suffix. 148 * 149 * @param root starting directory 150 * @param suffix suffix for the files 151 * @return list of file with suffix 152 */ listFiles(File root, String suffix)153 public static List<File> listFiles(File root, String suffix) { 154 File[] files = root.listFiles((dir, name) -> { 155 if (suffix != null && !suffix.isEmpty()) { 156 return name.endsWith(suffix); 157 } else { 158 return true; 159 } 160 }); 161 if (files == null) { 162 return new ArrayList<>(); 163 } 164 return Arrays.asList(files); 165 } 166 167 /** 168 * Create BOM stripped reader from the stream. 169 * Charset of the reader is set to UTF-8, UTF-16 or system's default. 170 * @param stream input stream 171 * @return reader for the stream without BOM 172 * @throws IOException if I/O exception occurred 173 */ createBOMStrippedReader(InputStream stream)174 public static Reader createBOMStrippedReader(InputStream stream) throws IOException { 175 return createBOMStrippedReader(stream, Charset.defaultCharset().name()); 176 } 177 178 /** 179 * Create BOM stripped reader from the stream. 180 * Charset of the reader is set to UTF-8, UTF-16 or default. 181 * @param stream input stream 182 * @param defaultCharset default charset 183 * @return reader for the stream without BOM 184 * @throws IOException if I/O exception occurred 185 */ createBOMStrippedReader(InputStream stream, String defaultCharset)186 public static Reader createBOMStrippedReader(InputStream stream, String defaultCharset) throws IOException { 187 InputStream in = stream.markSupported() ? 188 stream : new BufferedInputStream(stream); 189 190 String charset = null; 191 192 in.mark(3); 193 194 byte[] head = new byte[3]; 195 int br = in.read(head, 0, 3); 196 197 if (br >= 2 198 && (head[0] == (byte) 0xFE && head[1] == (byte) 0xFF) 199 || (head[0] == (byte) 0xFF && head[1] == (byte) 0xFE)) { 200 charset = StandardCharsets.UTF_16.name(); 201 in.reset(); 202 } else if (br >= 3 && head[0] == (byte) 0xEF && head[1] == (byte) 0xBB 203 && head[2] == (byte) 0xBF) { 204 // InputStreamReader does not properly discard BOM on UTF8 streams, 205 // so don't reset the stream. 206 charset = StandardCharsets.UTF_8.name(); 207 } 208 209 if (charset == null) { 210 in.reset(); 211 charset = defaultCharset; 212 } 213 214 return new InputStreamReader(in, charset); 215 } 216 217 /** 218 * Byte-order markers. 219 */ 220 private static final Map<String, byte[]> BOMS = Map.of( 221 StandardCharsets.UTF_8.name(), utf8Bom(), 222 StandardCharsets.UTF_16BE.name(), utf16BeBom(), 223 StandardCharsets.UTF_16LE.name(), utf16LeBom() 224 ); 225 226 /** 227 * Gets a new array containing the UTF-8 BOM. 228 */ utf8Bom()229 public static byte[] utf8Bom() { 230 return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; 231 } 232 233 /** 234 * Gets a new array containing the UTF-16BE BOM (Big-Endian). 235 */ utf16BeBom()236 public static byte[] utf16BeBom() { 237 return new byte[]{(byte) 0xFE, (byte) 0xFF}; 238 } 239 240 /** 241 * Gets a new array containing the UTF-16LE BOM (Little-Endian). 242 */ utf16LeBom()243 public static byte[] utf16LeBom() { 244 return new byte[]{(byte) 0xFF, (byte) 0xFE}; 245 } 246 247 /** 248 * Gets a value indicating a UTF encoding if the array starts with a 249 * known byte sequence. 250 * 251 * @param sig a sequence of bytes to inspect for a BOM 252 * @return null if no BOM was identified; otherwise a defined charset name 253 */ findBOMEncoding(byte[] sig)254 public static String findBOMEncoding(byte[] sig) { 255 for (Map.Entry<String, byte[]> entry : BOMS.entrySet()) { 256 String encoding = entry.getKey(); 257 byte[] bom = entry.getValue(); 258 if (sig.length > bom.length) { 259 int i = 0; 260 while (i < bom.length && sig[i] == bom[i]) { 261 i++; 262 } 263 if (i == bom.length) { 264 return encoding; 265 } 266 } 267 } 268 return null; 269 } 270 271 /** 272 * Gets a value indicating the number of UTF BOM bytes at the start of an 273 * array. 274 * 275 * @param sig a sequence of bytes to inspect for a BOM 276 * @return 0 if the array doesn't start with a BOM; otherwise the number of 277 * BOM bytes 278 */ skipForBOM(byte[] sig)279 public static int skipForBOM(byte[] sig) { 280 String encoding = findBOMEncoding(sig); 281 if (encoding != null) { 282 byte[] bom = BOMS.get(encoding); 283 return bom.length; 284 } 285 return 0; 286 } 287 288 /** 289 * Get the contents of a file or empty string if the file cannot be read. 290 * @param file file object 291 * @return string with the file contents 292 */ getFileContent(File file)293 public static String getFileContent(File file) { 294 if (file == null || !file.canRead()) { 295 return ""; 296 } 297 try { 298 return Files.readString(file.toPath(), Charset.defaultCharset()); 299 } catch (IOException e) { 300 LOGGER.log(Level.WARNING, "failed to read file: {0}", e.getMessage()); 301 } 302 return ""; 303 } 304 } 305