1b5840353SAdam Hornáček /* 2b5840353SAdam Hornáček * CDDL HEADER START 3b5840353SAdam Hornáček * 4b5840353SAdam Hornáček * The contents of this file are subject to the terms of the 5b5840353SAdam Hornáček * Common Development and Distribution License (the "License"). 6b5840353SAdam Hornáček * You may not use this file except in compliance with the License. 7b5840353SAdam Hornáček * 8b5840353SAdam Hornáček * See LICENSE.txt included in this distribution for the specific 9b5840353SAdam Hornáček * language governing permissions and limitations under the License. 10b5840353SAdam Hornáček * 11b5840353SAdam Hornáček * When distributing Covered Code, include this CDDL HEADER in each 12b5840353SAdam Hornáček * file and include the License file at LICENSE.txt. 13b5840353SAdam Hornáček * If applicable, add the following below this CDDL HEADER, with the 14b5840353SAdam Hornáček * fields enclosed by brackets "[]" replaced with your own identifying 15b5840353SAdam Hornáček * information: Portions Copyright [yyyy] [name of copyright owner] 16b5840353SAdam Hornáček * 17b5840353SAdam Hornáček * CDDL HEADER END 18b5840353SAdam Hornáček */ 19b5840353SAdam Hornáček 20b5840353SAdam Hornáček /* 219f5ca421SChris Fraire * Copyright (c) 2017, 2019, Chris Fraire <cfraire@me.com>. 22b5840353SAdam Hornáček */ 239805b761SAdam Hornáček package org.opengrok.indexer.util; 24b5840353SAdam Hornáček 256d9d3df9SAnatoly Akkerman import org.opengrok.indexer.logger.LoggerFactory; 266d9d3df9SAnatoly Akkerman 27b5840353SAdam Hornáček import java.io.IOException; 286d9d3df9SAnatoly Akkerman import java.nio.file.FileSystem; 29b5840353SAdam Hornáček import java.nio.file.Files; 30b5840353SAdam Hornáček import java.nio.file.InvalidPathException; 316d9d3df9SAnatoly Akkerman import java.nio.file.Path; 32b5840353SAdam Hornáček import java.nio.file.Paths; 33b5840353SAdam Hornáček import java.util.Deque; 34b5840353SAdam Hornáček import java.util.LinkedList; 356d9d3df9SAnatoly Akkerman import java.util.Optional; 36b5840353SAdam Hornáček import java.util.Set; 37b5840353SAdam Hornáček import java.util.logging.Level; 38b5840353SAdam Hornáček import java.util.logging.Logger; 392ffbb0cfSVladimir Kotal 40b5840353SAdam Hornáček /** 41b5840353SAdam Hornáček * Represents a container for file system paths-related utility methods. 42b5840353SAdam Hornáček */ 43b5840353SAdam Hornáček public class PathUtils { 44b5840353SAdam Hornáček private static final Logger LOGGER = 45b5840353SAdam Hornáček LoggerFactory.getLogger(PathUtils.class); 46b5840353SAdam Hornáček 47b5840353SAdam Hornáček /** 486d9d3df9SAnatoly Akkerman * Calls {@link #getRelativeToCanonical(Path, Path, Set, Set)} 490e0ac58dSChris Fraire * with {@code path}, {@code canonical}, {@code allowedSymlinks=null}, and 500e0ac58dSChris Fraire * {@code canonicalRoots=null} (to disable validation of links). 51b5840353SAdam Hornáček * @param path a non-canonical (or canonical) path to compare 52b5840353SAdam Hornáček * @param canonical a canonical path to compare against 530e0ac58dSChris Fraire * @return a relative path determined as described -- or {@code path} if no 540e0ac58dSChris Fraire * canonical relativity is found. 55b5840353SAdam Hornáček * @throws IOException if an error occurs determining canonical paths 56b5840353SAdam Hornáček * for portions of {@code path} 57b5840353SAdam Hornáček */ getRelativeToCanonical(Path path, Path canonical)586d9d3df9SAnatoly Akkerman public static String getRelativeToCanonical(Path path, Path canonical) 59b5840353SAdam Hornáček throws IOException { 60b5840353SAdam Hornáček try { 610e0ac58dSChris Fraire return getRelativeToCanonical(path, canonical, null, null); 62b5840353SAdam Hornáček } catch (ForbiddenSymlinkException e) { 63b5840353SAdam Hornáček // should not get here with allowedSymlinks==null 646d9d3df9SAnatoly Akkerman return path.toString(); 65b5840353SAdam Hornáček } 66b5840353SAdam Hornáček } 67b5840353SAdam Hornáček 68b5840353SAdam Hornáček /** 69b5840353SAdam Hornáček * Determine a relative path comparing {@code path} to {@code canonical}, 70b5840353SAdam Hornáček * with an algorithm that can handle the possibility of one or more 71b5840353SAdam Hornáček * symbolic links as components of {@code path}. 72b5840353SAdam Hornáček * <p> 73b5840353SAdam Hornáček * When {@code allowedSymlinks} is not null, any symbolic links as 74b5840353SAdam Hornáček * components of {@code path} (below {@code canonical}) are required to 758eaf39ddSChris Fraire * match an element of {@code allowedSymlinks} or target a canonical child 768eaf39ddSChris Fraire * of an element of {@code allowedSymlinks}. 77b5840353SAdam Hornáček * <p> 78b5840353SAdam Hornáček * E.g., with {@code path="/var/opengrok/src/proj_a"} and 79b5840353SAdam Hornáček * {@code canonical="/private/var/opengrok/src"} where /var is linked to 80b5840353SAdam Hornáček * /private/var and where /var/opengrok/src/proj_a is linked to /proj/a, 81b5840353SAdam Hornáček * the function will return {@code "proj_a"} as a relative path. 82b5840353SAdam Hornáček * <p> 83b5840353SAdam Hornáček * The algorithm will have evaluated canonical paths upward from 84b5840353SAdam Hornáček * (non-canonical) /var/opengrok/src/proj_a (a.k.a. /proj/a) to find a 85b5840353SAdam Hornáček * canonical similarity at /var/opengrok/src (a.k.a. 86b5840353SAdam Hornáček * /private/var/opengrok/src). 87b5840353SAdam Hornáček * @param path a non-canonical (or canonical) path to compare 88b5840353SAdam Hornáček * @param canonical a canonical path to compare against 89b5840353SAdam Hornáček * @param allowedSymlinks optional set of allowed symbolic links, so that 900e0ac58dSChris Fraire * any links encountered within {@code path} and not covered by the set (or 910e0ac58dSChris Fraire * whitelisted in a defined {@code canonicalRoots}) will abort the algorithm 920e0ac58dSChris Fraire * @param canonicalRoots optional set of allowed canonicalRoots, so that 930e0ac58dSChris Fraire * any checks done because of a defined {@code allowedSymlinks} will first 940e0ac58dSChris Fraire * check against the whitelist of canonical roots and possibly short-circuit 950e0ac58dSChris Fraire * the explicit validation against {@code allowedSymlinks}. 96b5840353SAdam Hornáček * @return a relative path determined as described above -- or {@code path} 97b5840353SAdam Hornáček * if no canonical relativity is found 98b5840353SAdam Hornáček * @throws ForbiddenSymlinkException if symbolic-link checking is active 99b5840353SAdam Hornáček * and it encounters an ineligible link 100b5840353SAdam Hornáček * @throws InvalidPathException if path cannot be decoded 101b5840353SAdam Hornáček */ getRelativeToCanonical(Path path, Path canonical, Set<String> allowedSymlinks, Set<String> canonicalRoots)1026d9d3df9SAnatoly Akkerman public static String getRelativeToCanonical(Path path, Path canonical, 1030e0ac58dSChris Fraire Set<String> allowedSymlinks, Set<String> canonicalRoots) 104b5840353SAdam Hornáček throws IOException, ForbiddenSymlinkException, InvalidPathException { 105b5840353SAdam Hornáček 106a72324b1SAdam Hornáček if (path.equals(canonical)) { 107a72324b1SAdam Hornáček return ""; 108a72324b1SAdam Hornáček } 109b5840353SAdam Hornáček 110b5840353SAdam Hornáček // The following fixup of \\ is really to allow 111b5840353SAdam Hornáček // IndexDatabaseTest.testGetDefinitions() to succeed on Linux or macOS. 112b5840353SAdam Hornáček // That test has an assertion that operation is the "same for windows 113b5840353SAdam Hornáček // delimiters" and passes a path with backslashes. On Windows, the 114b5840353SAdam Hornáček // following fixup would not be needed, since File and Paths recognize 115b5840353SAdam Hornáček // backslash as a delimiter. On Linux and macOS, any backslash needs to 116b5840353SAdam Hornáček // be normalized. 1176d9d3df9SAnatoly Akkerman final FileSystem fileSystem = path.getFileSystem(); 1186d9d3df9SAnatoly Akkerman final String separator = fileSystem.getSeparator(); 1196d9d3df9SAnatoly Akkerman String strPath = path.toString(); 1206d9d3df9SAnatoly Akkerman strPath = strPath.replace("\\", separator); 1216d9d3df9SAnatoly Akkerman 1226d9d3df9SAnatoly Akkerman String strCanonical = canonical.toString(); 1236d9d3df9SAnatoly Akkerman strCanonical = strCanonical.replace("\\", separator); 1246d9d3df9SAnatoly Akkerman String normCanonical = strCanonical.endsWith(separator) ? 1256d9d3df9SAnatoly Akkerman strCanonical : strCanonical + separator; 126b5840353SAdam Hornáček Deque<String> tail = null; 127b5840353SAdam Hornáček 1286d9d3df9SAnatoly Akkerman Path iterPath = fileSystem.getPath(strPath); 129b5840353SAdam Hornáček while (iterPath != null) { 1306d9d3df9SAnatoly Akkerman Path iterCanon; 1316d9d3df9SAnatoly Akkerman try { 1326d9d3df9SAnatoly Akkerman iterCanon = iterPath.toRealPath(); 133*5e19181cSanatoly } catch (IOException e) { 1346d9d3df9SAnatoly Akkerman iterCanon = iterPath.normalize().toAbsolutePath(); 1356d9d3df9SAnatoly Akkerman } 136b5840353SAdam Hornáček 137b5840353SAdam Hornáček // optional symbolic-link check 138b5840353SAdam Hornáček if (allowedSymlinks != null) { 1396d9d3df9SAnatoly Akkerman if (Files.isSymbolicLink(iterPath) && 1406d9d3df9SAnatoly Akkerman !isWhitelisted(iterCanon.toString(), canonicalRoots) && 141b5840353SAdam Hornáček !isAllowedSymlink(iterCanon, allowedSymlinks)) { 1426d9d3df9SAnatoly Akkerman String format = String.format("%1$s is prohibited symlink", iterPath); 143b5840353SAdam Hornáček LOGGER.finest(format); 144b5840353SAdam Hornáček throw new ForbiddenSymlinkException(format); 145b5840353SAdam Hornáček } 146b5840353SAdam Hornáček } 147b5840353SAdam Hornáček 148b5840353SAdam Hornáček String rel = null; 149b5840353SAdam Hornáček if (iterCanon.startsWith(normCanonical)) { 1506d9d3df9SAnatoly Akkerman rel = fileSystem.getPath(normCanonical).relativize(iterCanon).toString(); 1516d9d3df9SAnatoly Akkerman } else if (normCanonical.equals(iterCanon + separator)) { 152b5840353SAdam Hornáček rel = ""; 153b5840353SAdam Hornáček } 154b5840353SAdam Hornáček if (rel != null) { 155b5840353SAdam Hornáček if (tail != null) { 156b5840353SAdam Hornáček while (tail.size() > 0) { 157b5840353SAdam Hornáček rel = Paths.get(rel, tail.pop()).toString(); 158b5840353SAdam Hornáček } 159b5840353SAdam Hornáček } 160b5840353SAdam Hornáček return rel; 161b5840353SAdam Hornáček } 162b5840353SAdam Hornáček 163a72324b1SAdam Hornáček if (tail == null) { 164a72324b1SAdam Hornáček tail = new LinkedList<>(); 165a72324b1SAdam Hornáček } 1666d9d3df9SAnatoly Akkerman tail.push(Optional.ofNullable(iterPath.getFileName()).map(Path::toString).orElse("")); 1676d9d3df9SAnatoly Akkerman iterPath = iterPath.getParent(); 168b5840353SAdam Hornáček } 169b5840353SAdam Hornáček 170b5840353SAdam Hornáček // `path' is not found to be relative to `canonical', so return as is. 1716d9d3df9SAnatoly Akkerman return path.toString(); 172b5840353SAdam Hornáček } 173b5840353SAdam Hornáček isAllowedSymlink(Path canonicalFile, Set<String> allowedSymlinks)1746d9d3df9SAnatoly Akkerman private static boolean isAllowedSymlink(Path canonicalFile, 175b5840353SAdam Hornáček Set<String> allowedSymlinks) { 1766d9d3df9SAnatoly Akkerman final FileSystem fileSystem = canonicalFile.getFileSystem(); 1776d9d3df9SAnatoly Akkerman String canonicalFileStr = canonicalFile.toString(); 178b5840353SAdam Hornáček for (String allowedSymlink : allowedSymlinks) { 179b5840353SAdam Hornáček String canonicalLink; 180b5840353SAdam Hornáček try { 1816d9d3df9SAnatoly Akkerman canonicalLink = fileSystem.getPath(allowedSymlink).toRealPath().toString(); 182b5840353SAdam Hornáček } catch (IOException e) { 183b5840353SAdam Hornáček if (LOGGER.isLoggable(Level.FINE)) { 184b5840353SAdam Hornáček LOGGER.fine(String.format("unresolvable symlink: %s", 185b5840353SAdam Hornáček allowedSymlink)); 186b5840353SAdam Hornáček } 187b5840353SAdam Hornáček continue; 188b5840353SAdam Hornáček } 1896d9d3df9SAnatoly Akkerman if (canonicalFileStr.equals(canonicalLink) || 1906d9d3df9SAnatoly Akkerman canonicalFile.startsWith(canonicalLink + fileSystem.getSeparator())) { 191a72324b1SAdam Hornáček return true; 192a72324b1SAdam Hornáček } 193b5840353SAdam Hornáček } 194b5840353SAdam Hornáček return false; 195b5840353SAdam Hornáček } 196b5840353SAdam Hornáček isWhitelisted(String canonical, Set<String> canonicalRoots)1970e0ac58dSChris Fraire private static boolean isWhitelisted(String canonical, Set<String> canonicalRoots) { 1980e0ac58dSChris Fraire if (canonicalRoots != null) { 1990e0ac58dSChris Fraire for (String canonicalRoot : canonicalRoots) { 2000e0ac58dSChris Fraire if (canonical.startsWith(canonicalRoot)) { 2010e0ac58dSChris Fraire return true; 2020e0ac58dSChris Fraire } 2030e0ac58dSChris Fraire } 2040e0ac58dSChris Fraire } 2050e0ac58dSChris Fraire return false; 2060e0ac58dSChris Fraire } 2070e0ac58dSChris Fraire 208ff44f24aSAdam Hornáček /** Private to enforce static. */ PathUtils()209b5840353SAdam Hornáček private PathUtils() { 210b5840353SAdam Hornáček } 211b5840353SAdam Hornáček } 212