xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/util/PathUtils.java (revision 5e19181c54478b4bbe70b9bdf72e94e49df1b1c7)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
219f5ca421SChris Fraire  * Copyright (c) 2017, 2019, Chris Fraire <cfraire@me.com>.
22b5840353SAdam Hornáček  */
239805b761SAdam Hornáček package org.opengrok.indexer.util;
24b5840353SAdam Hornáček 
256d9d3df9SAnatoly Akkerman import org.opengrok.indexer.logger.LoggerFactory;
266d9d3df9SAnatoly Akkerman 
27b5840353SAdam Hornáček import java.io.IOException;
286d9d3df9SAnatoly Akkerman import java.nio.file.FileSystem;
29b5840353SAdam Hornáček import java.nio.file.Files;
30b5840353SAdam Hornáček import java.nio.file.InvalidPathException;
316d9d3df9SAnatoly Akkerman import java.nio.file.Path;
32b5840353SAdam Hornáček import java.nio.file.Paths;
33b5840353SAdam Hornáček import java.util.Deque;
34b5840353SAdam Hornáček import java.util.LinkedList;
356d9d3df9SAnatoly Akkerman import java.util.Optional;
36b5840353SAdam Hornáček import java.util.Set;
37b5840353SAdam Hornáček import java.util.logging.Level;
38b5840353SAdam Hornáček import java.util.logging.Logger;
392ffbb0cfSVladimir Kotal 
40b5840353SAdam Hornáček /**
41b5840353SAdam Hornáček  * Represents a container for file system paths-related utility methods.
42b5840353SAdam Hornáček  */
43b5840353SAdam Hornáček public class PathUtils {
44b5840353SAdam Hornáček     private static final Logger LOGGER =
45b5840353SAdam Hornáček         LoggerFactory.getLogger(PathUtils.class);
46b5840353SAdam Hornáček 
47b5840353SAdam Hornáček     /**
486d9d3df9SAnatoly Akkerman      * Calls {@link #getRelativeToCanonical(Path, Path, Set, Set)}
490e0ac58dSChris Fraire      * with {@code path}, {@code canonical}, {@code allowedSymlinks=null}, and
500e0ac58dSChris Fraire      * {@code canonicalRoots=null} (to disable validation of links).
51b5840353SAdam Hornáček      * @param path a non-canonical (or canonical) path to compare
52b5840353SAdam Hornáček      * @param canonical a canonical path to compare against
530e0ac58dSChris Fraire      * @return a relative path determined as described -- or {@code path} if no
540e0ac58dSChris Fraire      * canonical relativity is found.
55b5840353SAdam Hornáček      * @throws IOException if an error occurs determining canonical paths
56b5840353SAdam Hornáček      * for portions of {@code path}
57b5840353SAdam Hornáček      */
getRelativeToCanonical(Path path, Path canonical)586d9d3df9SAnatoly Akkerman     public static String getRelativeToCanonical(Path path, Path canonical)
59b5840353SAdam Hornáček         throws IOException {
60b5840353SAdam Hornáček         try {
610e0ac58dSChris Fraire             return getRelativeToCanonical(path, canonical, null, null);
62b5840353SAdam Hornáček         } catch (ForbiddenSymlinkException e) {
63b5840353SAdam Hornáček             // should not get here with allowedSymlinks==null
646d9d3df9SAnatoly Akkerman             return path.toString();
65b5840353SAdam Hornáček         }
66b5840353SAdam Hornáček     }
67b5840353SAdam Hornáček 
68b5840353SAdam Hornáček     /**
69b5840353SAdam Hornáček      * Determine a relative path comparing {@code path} to {@code canonical},
70b5840353SAdam Hornáček      * with an algorithm that can handle the possibility of one or more
71b5840353SAdam Hornáček      * symbolic links as components of {@code path}.
72b5840353SAdam Hornáček      * <p>
73b5840353SAdam Hornáček      * When {@code allowedSymlinks} is not null, any symbolic links as
74b5840353SAdam Hornáček      * components of {@code path} (below {@code canonical}) are required to
758eaf39ddSChris Fraire      * match an element of {@code allowedSymlinks} or target a canonical child
768eaf39ddSChris Fraire      * of an element of {@code allowedSymlinks}.
77b5840353SAdam Hornáček      * <p>
78b5840353SAdam Hornáček      * E.g., with {@code path="/var/opengrok/src/proj_a"} and
79b5840353SAdam Hornáček      * {@code canonical="/private/var/opengrok/src"} where /var is linked to
80b5840353SAdam Hornáček      * /private/var and where /var/opengrok/src/proj_a is linked to /proj/a,
81b5840353SAdam Hornáček      * the function will return {@code "proj_a"} as a relative path.
82b5840353SAdam Hornáček      * <p>
83b5840353SAdam Hornáček      * The algorithm will have evaluated canonical paths upward from
84b5840353SAdam Hornáček      * (non-canonical) /var/opengrok/src/proj_a (a.k.a. /proj/a) to find a
85b5840353SAdam Hornáček      * canonical similarity at /var/opengrok/src (a.k.a.
86b5840353SAdam Hornáček      * /private/var/opengrok/src).
87b5840353SAdam Hornáček      * @param path a non-canonical (or canonical) path to compare
88b5840353SAdam Hornáček      * @param canonical a canonical path to compare against
89b5840353SAdam Hornáček      * @param allowedSymlinks optional set of allowed symbolic links, so that
900e0ac58dSChris Fraire      * any links encountered within {@code path} and not covered by the set (or
910e0ac58dSChris Fraire      * whitelisted in a defined {@code canonicalRoots}) will abort the algorithm
920e0ac58dSChris Fraire      * @param canonicalRoots optional set of allowed canonicalRoots, so that
930e0ac58dSChris Fraire      * any checks done because of a defined {@code allowedSymlinks} will first
940e0ac58dSChris Fraire      * check against the whitelist of canonical roots and possibly short-circuit
950e0ac58dSChris Fraire      * the explicit validation against {@code allowedSymlinks}.
96b5840353SAdam Hornáček      * @return a relative path determined as described above -- or {@code path}
97b5840353SAdam Hornáček      * if no canonical relativity is found
98b5840353SAdam Hornáček      * @throws ForbiddenSymlinkException if symbolic-link checking is active
99b5840353SAdam Hornáček      * and it encounters an ineligible link
100b5840353SAdam Hornáček      * @throws InvalidPathException if path cannot be decoded
101b5840353SAdam Hornáček      */
getRelativeToCanonical(Path path, Path canonical, Set<String> allowedSymlinks, Set<String> canonicalRoots)1026d9d3df9SAnatoly Akkerman     public static String getRelativeToCanonical(Path path, Path canonical,
1030e0ac58dSChris Fraire             Set<String> allowedSymlinks, Set<String> canonicalRoots)
104b5840353SAdam Hornáček             throws IOException, ForbiddenSymlinkException, InvalidPathException {
105b5840353SAdam Hornáček 
106a72324b1SAdam Hornáček         if (path.equals(canonical)) {
107a72324b1SAdam Hornáček             return "";
108a72324b1SAdam Hornáček         }
109b5840353SAdam Hornáček 
110b5840353SAdam Hornáček         // The following fixup of \\ is really to allow
111b5840353SAdam Hornáček         // IndexDatabaseTest.testGetDefinitions() to succeed on Linux or macOS.
112b5840353SAdam Hornáček         // That test has an assertion that operation is the "same for windows
113b5840353SAdam Hornáček         // delimiters" and passes a path with backslashes. On Windows, the
114b5840353SAdam Hornáček         // following fixup would not be needed, since File and Paths recognize
115b5840353SAdam Hornáček         // backslash as a delimiter. On Linux and macOS, any backslash needs to
116b5840353SAdam Hornáček         // be normalized.
1176d9d3df9SAnatoly Akkerman         final FileSystem fileSystem = path.getFileSystem();
1186d9d3df9SAnatoly Akkerman         final String separator = fileSystem.getSeparator();
1196d9d3df9SAnatoly Akkerman         String strPath = path.toString();
1206d9d3df9SAnatoly Akkerman         strPath = strPath.replace("\\", separator);
1216d9d3df9SAnatoly Akkerman 
1226d9d3df9SAnatoly Akkerman         String strCanonical = canonical.toString();
1236d9d3df9SAnatoly Akkerman         strCanonical = strCanonical.replace("\\", separator);
1246d9d3df9SAnatoly Akkerman         String normCanonical = strCanonical.endsWith(separator) ?
1256d9d3df9SAnatoly Akkerman             strCanonical : strCanonical + separator;
126b5840353SAdam Hornáček         Deque<String> tail = null;
127b5840353SAdam Hornáček 
1286d9d3df9SAnatoly Akkerman         Path iterPath = fileSystem.getPath(strPath);
129b5840353SAdam Hornáček         while (iterPath != null) {
1306d9d3df9SAnatoly Akkerman             Path iterCanon;
1316d9d3df9SAnatoly Akkerman             try {
1326d9d3df9SAnatoly Akkerman                 iterCanon = iterPath.toRealPath();
133*5e19181cSanatoly             } catch (IOException e) {
1346d9d3df9SAnatoly Akkerman                 iterCanon = iterPath.normalize().toAbsolutePath();
1356d9d3df9SAnatoly Akkerman             }
136b5840353SAdam Hornáček 
137b5840353SAdam Hornáček             // optional symbolic-link check
138b5840353SAdam Hornáček             if (allowedSymlinks != null) {
1396d9d3df9SAnatoly Akkerman                 if (Files.isSymbolicLink(iterPath) &&
1406d9d3df9SAnatoly Akkerman                     !isWhitelisted(iterCanon.toString(), canonicalRoots) &&
141b5840353SAdam Hornáček                     !isAllowedSymlink(iterCanon, allowedSymlinks)) {
1426d9d3df9SAnatoly Akkerman                     String format = String.format("%1$s is prohibited symlink", iterPath);
143b5840353SAdam Hornáček                     LOGGER.finest(format);
144b5840353SAdam Hornáček                     throw new ForbiddenSymlinkException(format);
145b5840353SAdam Hornáček                 }
146b5840353SAdam Hornáček             }
147b5840353SAdam Hornáček 
148b5840353SAdam Hornáček             String rel = null;
149b5840353SAdam Hornáček             if (iterCanon.startsWith(normCanonical)) {
1506d9d3df9SAnatoly Akkerman                 rel = fileSystem.getPath(normCanonical).relativize(iterCanon).toString();
1516d9d3df9SAnatoly Akkerman             } else if (normCanonical.equals(iterCanon + separator)) {
152b5840353SAdam Hornáček                 rel = "";
153b5840353SAdam Hornáček             }
154b5840353SAdam Hornáček             if (rel != null) {
155b5840353SAdam Hornáček                 if (tail != null) {
156b5840353SAdam Hornáček                     while (tail.size() > 0) {
157b5840353SAdam Hornáček                         rel = Paths.get(rel, tail.pop()).toString();
158b5840353SAdam Hornáček                     }
159b5840353SAdam Hornáček                 }
160b5840353SAdam Hornáček                 return rel;
161b5840353SAdam Hornáček             }
162b5840353SAdam Hornáček 
163a72324b1SAdam Hornáček             if (tail == null) {
164a72324b1SAdam Hornáček                 tail = new LinkedList<>();
165a72324b1SAdam Hornáček             }
1666d9d3df9SAnatoly Akkerman             tail.push(Optional.ofNullable(iterPath.getFileName()).map(Path::toString).orElse(""));
1676d9d3df9SAnatoly Akkerman             iterPath = iterPath.getParent();
168b5840353SAdam Hornáček         }
169b5840353SAdam Hornáček 
170b5840353SAdam Hornáček         // `path' is not found to be relative to `canonical', so return as is.
1716d9d3df9SAnatoly Akkerman         return path.toString();
172b5840353SAdam Hornáček     }
173b5840353SAdam Hornáček 
isAllowedSymlink(Path canonicalFile, Set<String> allowedSymlinks)1746d9d3df9SAnatoly Akkerman     private static boolean isAllowedSymlink(Path canonicalFile,
175b5840353SAdam Hornáček         Set<String> allowedSymlinks) {
1766d9d3df9SAnatoly Akkerman         final FileSystem fileSystem = canonicalFile.getFileSystem();
1776d9d3df9SAnatoly Akkerman         String canonicalFileStr = canonicalFile.toString();
178b5840353SAdam Hornáček         for (String allowedSymlink : allowedSymlinks) {
179b5840353SAdam Hornáček             String canonicalLink;
180b5840353SAdam Hornáček             try {
1816d9d3df9SAnatoly Akkerman                 canonicalLink = fileSystem.getPath(allowedSymlink).toRealPath().toString();
182b5840353SAdam Hornáček             } catch (IOException e) {
183b5840353SAdam Hornáček                 if (LOGGER.isLoggable(Level.FINE)) {
184b5840353SAdam Hornáček                     LOGGER.fine(String.format("unresolvable symlink: %s",
185b5840353SAdam Hornáček                         allowedSymlink));
186b5840353SAdam Hornáček                 }
187b5840353SAdam Hornáček                 continue;
188b5840353SAdam Hornáček             }
1896d9d3df9SAnatoly Akkerman             if (canonicalFileStr.equals(canonicalLink) ||
1906d9d3df9SAnatoly Akkerman                 canonicalFile.startsWith(canonicalLink + fileSystem.getSeparator())) {
191a72324b1SAdam Hornáček                 return true;
192a72324b1SAdam Hornáček             }
193b5840353SAdam Hornáček         }
194b5840353SAdam Hornáček         return false;
195b5840353SAdam Hornáček     }
196b5840353SAdam Hornáček 
isWhitelisted(String canonical, Set<String> canonicalRoots)1970e0ac58dSChris Fraire     private static boolean isWhitelisted(String canonical, Set<String> canonicalRoots) {
1980e0ac58dSChris Fraire         if (canonicalRoots != null) {
1990e0ac58dSChris Fraire             for (String canonicalRoot : canonicalRoots) {
2000e0ac58dSChris Fraire                 if (canonical.startsWith(canonicalRoot)) {
2010e0ac58dSChris Fraire                     return true;
2020e0ac58dSChris Fraire                 }
2030e0ac58dSChris Fraire             }
2040e0ac58dSChris Fraire         }
2050e0ac58dSChris Fraire         return false;
2060e0ac58dSChris Fraire     }
2070e0ac58dSChris Fraire 
208ff44f24aSAdam Hornáček     /** Private to enforce static. */
PathUtils()209b5840353SAdam Hornáček     private PathUtils() {
210b5840353SAdam Hornáček     }
211b5840353SAdam Hornáček }
212