1b5840353SAdam Hornáček /* 2b5840353SAdam Hornáček * CDDL HEADER START 3b5840353SAdam Hornáček * 4b5840353SAdam Hornáček * The contents of this file are subject to the terms of the 5b5840353SAdam Hornáček * Common Development and Distribution License (the "License"). 6b5840353SAdam Hornáček * You may not use this file except in compliance with the License. 7b5840353SAdam Hornáček * 8b5840353SAdam Hornáček * See LICENSE.txt included in this distribution for the specific 9b5840353SAdam Hornáček * language governing permissions and limitations under the License. 10b5840353SAdam Hornáček * 11b5840353SAdam Hornáček * When distributing Covered Code, include this CDDL HEADER in each 12b5840353SAdam Hornáček * file and include the License file at LICENSE.txt. 13b5840353SAdam Hornáček * If applicable, add the following below this CDDL HEADER, with the 14b5840353SAdam Hornáček * fields enclosed by brackets "[]" replaced with your own identifying 15b5840353SAdam Hornáček * information: Portions Copyright [yyyy] [name of copyright owner] 16b5840353SAdam Hornáček * 17b5840353SAdam Hornáček * CDDL HEADER END 18b5840353SAdam Hornáček */ 19b5840353SAdam Hornáček 20b5840353SAdam Hornáček /* 216c62ede9SAdam Hornacek * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. 225d9f3aa0SAdam Hornáček * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23b5840353SAdam Hornáček */ 249805b761SAdam Hornáček package org.opengrok.indexer.util; 25b5840353SAdam Hornáček 26b5840353SAdam Hornáček import java.util.regex.Matcher; 27b5840353SAdam Hornáček import java.util.regex.Pattern; 28b5840353SAdam Hornáček 29b5840353SAdam Hornáček /** 30b5840353SAdam Hornáček * Various String utility methods. 31b5840353SAdam Hornáček * 32b5840353SAdam Hornáček * @author austvik 33b5840353SAdam Hornáček */ 34b5840353SAdam Hornáček public final class StringUtils { 35b5840353SAdam Hornáček 36b5840353SAdam Hornáček /** 37ff44f24aSAdam Hornáček * Matches a standard end-of-line indicator, identical to Common.lexh's {EOL}. 38b5840353SAdam Hornáček */ 3954544730SChris Fraire public static final Pattern STANDARD_EOL = Pattern.compile("\\r\\n?|\\n"); 40b5840353SAdam Hornáček 41b5840353SAdam Hornáček /** 42b5840353SAdam Hornáček * Matches an apostrophe not following a backslash escape or following an 43b5840353SAdam Hornáček * even number¹ of backslash escapes. 44b5840353SAdam Hornáček * <p> 45b5840353SAdam Hornáček * ¹See {@link RegexUtils#getNotFollowingEscapePattern()} for a caveat 46b5840353SAdam Hornáček * about the backslash assertion. 47b5840353SAdam Hornáček */ 48b5840353SAdam Hornáček public static final Pattern APOS_NO_BSESC = 49b5840353SAdam Hornáček Pattern.compile("\\'" + RegexUtils.getNotFollowingEscapePattern()); 50b5840353SAdam Hornáček 51b5840353SAdam Hornáček /** 52ff44f24aSAdam Hornáček * Matches the close of a C comment. 53b5840353SAdam Hornáček */ 54ff44f24aSAdam Hornáček public static final Pattern END_C_COMMENT = Pattern.compile("\\*\\/"); 55b5840353SAdam Hornáček 56b5840353SAdam Hornáček /** 57ff44f24aSAdam Hornáček * Matches one of the same possible characters as CommonPath.lexh's {FPath}. 58b5840353SAdam Hornáček */ 59ff44f24aSAdam Hornáček private static final String FPATH_CHAR_PAT = "[a-zA-Z0-9_\\-\\./]"; 60b5840353SAdam Hornáček 616c62ede9SAdam Hornacek private static final Pattern FPATH_CHAR_STARTSMATCH = Pattern.compile("^" + FPATH_CHAR_PAT); 62b5840353SAdam Hornáček 63ff44f24aSAdam Hornáček /** Private to enforce singleton. */ StringUtils()64b5840353SAdam Hornáček private StringUtils() { 65b5840353SAdam Hornáček } 66b5840353SAdam Hornáček 67b5840353SAdam Hornáček static final Pattern javaClassPattern = 68b5840353SAdam Hornáček Pattern.compile("([a-z][A-Za-z]*\\.)+[A-Z][A-Za-z0-9]*"); 69b5840353SAdam Hornáček /** 7014bbb99cSChris Fraire * Returns true if the string is possibly a Java class name, and only 7114bbb99cSChris Fraire * matching a subset of possible class names to prevent false positives. 7214bbb99cSChris Fraire * <p><ul> 7314bbb99cSChris Fraire * <li>class must be qualified with a package name</li> 7414bbb99cSChris Fraire * <li>package name must contain only letters and start lower case</li> 7514bbb99cSChris Fraire * <li>class name must be in CamelCase and start upper case</li> 7614bbb99cSChris Fraire * </ul> 77b5840353SAdam Hornáček * 78b5840353SAdam Hornáček * @param s the string to be checked 79b5840353SAdam Hornáček * @return true if string could be a java class name 80b5840353SAdam Hornáček */ isPossiblyJavaClass(String s)81b5840353SAdam Hornáček public static boolean isPossiblyJavaClass(String s) { 82b5840353SAdam Hornáček return javaClassPattern.matcher(s).matches(); 83b5840353SAdam Hornáček } 84b5840353SAdam Hornáček 85b5840353SAdam Hornáček /** 86b5840353SAdam Hornáček * Convert value in milliseconds to readable time. 87*0ec550ccSAdam Hornacek * @param timeMs delta in milliseconds 88b5840353SAdam Hornáček * @return human readable string 89b5840353SAdam Hornáček */ getReadableTime(long timeMs)90*0ec550ccSAdam Hornacek public static String getReadableTime(long timeMs) { 9112fc2901SChris Fraire StringBuilder output = new StringBuilder(); 92*0ec550ccSAdam Hornacek long timeDelta = timeMs; 93b5840353SAdam Hornáček 94*0ec550ccSAdam Hornacek int milliseconds = (int) (timeDelta % 1000); 95*0ec550ccSAdam Hornacek timeDelta /= 1000; 96*0ec550ccSAdam Hornacek int seconds = (int) (timeDelta % 60); 97*0ec550ccSAdam Hornacek timeDelta /= 60; 98*0ec550ccSAdam Hornacek int minutes = (int) (timeDelta % 60); 99*0ec550ccSAdam Hornacek timeDelta /= 60; 100*0ec550ccSAdam Hornacek int hours = (int) (timeDelta % 24); 101*0ec550ccSAdam Hornacek int days = (int) (timeDelta / 24); 102b5840353SAdam Hornáček 103b5840353SAdam Hornáček if (days != 0) { 10412fc2901SChris Fraire output.append(days); 10512fc2901SChris Fraire output.append(" day"); 106b5840353SAdam Hornáček if (days > 1) { 10712fc2901SChris Fraire output.append("s"); 108b5840353SAdam Hornáček } 109b5840353SAdam Hornáček } 110b5840353SAdam Hornáček if ((hours != 0) || (minutes != 0)) { 11112fc2901SChris Fraire if (output.length() > 0) { 11212fc2901SChris Fraire // Use zero-padded hours here as it's longer than a day. 11312fc2901SChris Fraire output.append(String.format(" %02d:%02d:%02d", hours, minutes, seconds)); 11412fc2901SChris Fraire } else { 11512fc2901SChris Fraire // Don't pad hours if less than a day. 11612fc2901SChris Fraire output.append(String.format("%d:%02d:%02d", hours, minutes, seconds)); 117b5840353SAdam Hornáček } 11812fc2901SChris Fraire } else if (output.length() > 0) { 11912fc2901SChris Fraire /* 12012fc2901SChris Fraire * If a day+ with zero hours and zero minutes, just report the days. 12112fc2901SChris Fraire * E.g. "1 day", and not "1 day 35 ms". 12212fc2901SChris Fraire */ 12312fc2901SChris Fraire return output.toString(); 12412fc2901SChris Fraire } else if (seconds != 0) { 12512fc2901SChris Fraire output.append(String.format("%d.%d seconds", seconds, milliseconds)); 12612fc2901SChris Fraire } else if (milliseconds != 0) { 12712fc2901SChris Fraire output.append(String.format("%d ms", milliseconds)); 128b5840353SAdam Hornáček } 129b5840353SAdam Hornáček 13012fc2901SChris Fraire return (output.length() == 0 ? "0 ms" : output.toString()); 131b5840353SAdam Hornáček } 132b5840353SAdam Hornáček 133b5840353SAdam Hornáček /** 134b5840353SAdam Hornáček * Finds n-th index of a given substring in a string. 135b5840353SAdam Hornáček * 136b5840353SAdam Hornáček * @param str an original string 137b5840353SAdam Hornáček * @param substr a substring to match 138b5840353SAdam Hornáček * @param n n-th occurrence 139b5840353SAdam Hornáček * @return the index of the first character of the substring in the original 140b5840353SAdam Hornáček * string where the substring occurred n-th times in the string. If the n-th 141b5840353SAdam Hornáček * candidate does not exist, -1 is returned. 142b5840353SAdam Hornáček */ nthIndexOf(String str, String substr, int n)143b5840353SAdam Hornáček public static int nthIndexOf(String str, String substr, int n) { 144b5840353SAdam Hornáček int pos = -1; 145b5840353SAdam Hornáček while (n > 0) { 146b5840353SAdam Hornáček if (pos >= str.length()) { 147b5840353SAdam Hornáček return -1; 148b5840353SAdam Hornáček } 149b5840353SAdam Hornáček if ((pos = str.indexOf(substr, pos + 1)) == -1) { 150b5840353SAdam Hornáček break; 151b5840353SAdam Hornáček } 152b5840353SAdam Hornáček n--; 153b5840353SAdam Hornáček } 154b5840353SAdam Hornáček return pos; 155b5840353SAdam Hornáček } 156b5840353SAdam Hornáček 157b5840353SAdam Hornáček /** 158b5840353SAdam Hornáček * Count the number of ending pushback characters from a matched URI. 159b5840353SAdam Hornáček * <p> 160b5840353SAdam Hornáček * jflex does not support negative lookbehind, so modifying a URI matcher 161b5840353SAdam Hornáček * to backtrack on ending characters that are otherwise normally valid 162b5840353SAdam Hornáček * (e.g. '.') is difficult. Instead, this method allows counting and 163b5840353SAdam Hornáček * pushing back. 164b5840353SAdam Hornáček * @param value the URI to test 165b5840353SAdam Hornáček * @return the number of characters greater than or equal to zero which 166b5840353SAdam Hornáček * could be pushed back. 167b5840353SAdam Hornáček */ countURIEndingPushback(String value)168b5840353SAdam Hornáček public static int countURIEndingPushback(String value) { 169b5840353SAdam Hornáček int n = 0; 170b5840353SAdam Hornáček for (int i = value.length() - 1; i >= 0; --i) { 171b5840353SAdam Hornáček char c = value.charAt(i); 172b5840353SAdam Hornáček if (c == '.') { 173b5840353SAdam Hornáček ++n; 174b5840353SAdam Hornáček } else { 175b5840353SAdam Hornáček break; 176b5840353SAdam Hornáček } 177b5840353SAdam Hornáček } 178b5840353SAdam Hornáček return n; 179b5840353SAdam Hornáček } 180b5840353SAdam Hornáček 181b5840353SAdam Hornáček /** 182b5840353SAdam Hornáček * Determines if the specified {@code pattern} matches in the specified 183b5840353SAdam Hornáček * {@code value}. 184b5840353SAdam Hornáček * @param value the string to inspect 185b5840353SAdam Hornáček * @param pattern the pattern to match 186b5840353SAdam Hornáček * @return the index of the first occurrence of the specified pattern, or 187b5840353SAdam Hornáček * -1 if there is no such occurrence 188b5840353SAdam Hornáček */ patindexOf(String value, Pattern pattern)189b5840353SAdam Hornáček public static int patindexOf(String value, Pattern pattern) { 190b5840353SAdam Hornáček Matcher m = pattern.matcher(value); 191a72324b1SAdam Hornáček if (!m.find()) { 192a72324b1SAdam Hornáček return -1; 193a72324b1SAdam Hornáček } 194b5840353SAdam Hornáček return m.start(); 195b5840353SAdam Hornáček } 196b5840353SAdam Hornáček 197b5840353SAdam Hornáček /** 198b5840353SAdam Hornáček * Determines if the {@code value} starts with a character in 199b5840353SAdam Hornáček * CommonPath.lexh's {FPath}. 200b5840353SAdam Hornáček * @param value the input to test 201b5840353SAdam Hornáček * @return true if {@code value} matches at its start 202b5840353SAdam Hornáček */ startsWithFpathChar(String value)203b5840353SAdam Hornáček public static boolean startsWithFpathChar(String value) { 204b5840353SAdam Hornáček return FPATH_CHAR_STARTSMATCH.matcher(value).matches(); 205b5840353SAdam Hornáček } 206b5840353SAdam Hornáček 207b5840353SAdam Hornáček /** 208b5840353SAdam Hornáček * Determines if the specified pattern, {@code pat}, matches the specified 209b5840353SAdam Hornáček * {@code capture}, and computes an eligible pushback. 210b5840353SAdam Hornáček * @param capture a defined input 211b5840353SAdam Hornáček * @param pat a pattern, or null to skip computation 212b5840353SAdam Hornáček * @return a positive value if {@code pat} matches in {@code capture} at or 213b5840353SAdam Hornáček * after the second character to indicate the number of characters to 214b5840353SAdam Hornáček * pushback including the first-matched character; otherwise 0 to indicate 215b5840353SAdam Hornáček * no match or a match at the 0-th character. (The 0-th chracter is 216b5840353SAdam Hornáček * ineligible for fear of looping non-stop upon pushing back the entire 217b5840353SAdam Hornáček * {@code yytext()}.) 218b5840353SAdam Hornáček */ countPushback(String capture, Pattern pat)219b5840353SAdam Hornáček public static int countPushback(String capture, Pattern pat) { 220b5840353SAdam Hornáček if (pat != null) { 221b5840353SAdam Hornáček int o = StringUtils.patindexOf(capture, pat); 222b5840353SAdam Hornáček if (o >= 0) { 223b5840353SAdam Hornáček int n = capture.length() - o; 224b5840353SAdam Hornáček // Push back if positive, but not if equal to the full length. 225a72324b1SAdam Hornáček if (n > 0 && n < capture.length()) { 226a72324b1SAdam Hornáček return n; 227a72324b1SAdam Hornáček } 228b5840353SAdam Hornáček } 229b5840353SAdam Hornáček } 230b5840353SAdam Hornáček return 0; 231b5840353SAdam Hornáček } 2326c593e21SChris Fraire 2336c593e21SChris Fraire /** 2346c593e21SChris Fraire * Determine the length of the next whitespace- or ISO control 2356c593e21SChris Fraire * character-related sequence within a string. 2366c593e21SChris Fraire * @param str a defined instance 2376c593e21SChris Fraire * @param off the starting offset within {@code str} 2386c593e21SChris Fraire * @param shouldMatch a value indicating whether to match all contiguous 2396c593e21SChris Fraire * whitespace or ISO control characters ({@code true}) or 2406c593e21SChris Fraire * all contiguous non-whitespace and non-control 2416c593e21SChris Fraire * characters ({@code false}) starting at {@code off} 2426c593e21SChris Fraire * @return a length greater than or equal to zero 2436c593e21SChris Fraire */ whitespaceOrControlLength(String str, int off, boolean shouldMatch)2446c593e21SChris Fraire public static int whitespaceOrControlLength(String str, int off, boolean shouldMatch) { 2456c593e21SChris Fraire int i = 0; 2466c593e21SChris Fraire while (off + i < str.length()) { 2476c593e21SChris Fraire int cp = Character.codePointAt(str, off + i); 2486c593e21SChris Fraire if ((Character.isWhitespace(cp) || Character.isISOControl(cp)) != shouldMatch) { 2496c593e21SChris Fraire return i; 2506c593e21SChris Fraire } 2516c593e21SChris Fraire i += Character.charCount(cp); 2526c593e21SChris Fraire } 2536c593e21SChris Fraire return str.length() - off; 2546c593e21SChris Fraire } 255b5840353SAdam Hornáček } 256