1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.util; 25 26 import java.util.regex.Matcher; 27 import java.util.regex.Pattern; 28 29 /** 30 * Various String utility methods. 31 * 32 * @author austvik 33 */ 34 public final class StringUtils { 35 36 /** 37 * Matches a standard end-of-line indicator, identical to Common.lexh's {EOL}. 38 */ 39 public static final Pattern STANDARD_EOL = Pattern.compile("\\r\\n?|\\n"); 40 41 /** 42 * Matches an apostrophe not following a backslash escape or following an 43 * even number¹ of backslash escapes. 44 * <p> 45 * ¹See {@link RegexUtils#getNotFollowingEscapePattern()} for a caveat 46 * about the backslash assertion. 47 */ 48 public static final Pattern APOS_NO_BSESC = 49 Pattern.compile("\\'" + RegexUtils.getNotFollowingEscapePattern()); 50 51 /** 52 * Matches the close of a C comment. 53 */ 54 public static final Pattern END_C_COMMENT = Pattern.compile("\\*\\/"); 55 56 /** 57 * Matches one of the same possible characters as CommonPath.lexh's {FPath}. 58 */ 59 private static final String FPATH_CHAR_PAT = "[a-zA-Z0-9_\\-\\./]"; 60 61 private static final Pattern FPATH_CHAR_STARTSMATCH = Pattern.compile("^" + FPATH_CHAR_PAT); 62 63 /** Private to enforce singleton. */ StringUtils()64 private StringUtils() { 65 } 66 67 static final Pattern javaClassPattern = 68 Pattern.compile("([a-z][A-Za-z]*\\.)+[A-Z][A-Za-z0-9]*"); 69 /** 70 * Returns true if the string is possibly a Java class name, and only 71 * matching a subset of possible class names to prevent false positives. 72 * <p><ul> 73 * <li>class must be qualified with a package name</li> 74 * <li>package name must contain only letters and start lower case</li> 75 * <li>class name must be in CamelCase and start upper case</li> 76 * </ul> 77 * 78 * @param s the string to be checked 79 * @return true if string could be a java class name 80 */ isPossiblyJavaClass(String s)81 public static boolean isPossiblyJavaClass(String s) { 82 return javaClassPattern.matcher(s).matches(); 83 } 84 85 /** 86 * Convert value in milliseconds to readable time. 87 * @param timeMs delta in milliseconds 88 * @return human readable string 89 */ getReadableTime(long timeMs)90 public static String getReadableTime(long timeMs) { 91 StringBuilder output = new StringBuilder(); 92 long timeDelta = timeMs; 93 94 int milliseconds = (int) (timeDelta % 1000); 95 timeDelta /= 1000; 96 int seconds = (int) (timeDelta % 60); 97 timeDelta /= 60; 98 int minutes = (int) (timeDelta % 60); 99 timeDelta /= 60; 100 int hours = (int) (timeDelta % 24); 101 int days = (int) (timeDelta / 24); 102 103 if (days != 0) { 104 output.append(days); 105 output.append(" day"); 106 if (days > 1) { 107 output.append("s"); 108 } 109 } 110 if ((hours != 0) || (minutes != 0)) { 111 if (output.length() > 0) { 112 // Use zero-padded hours here as it's longer than a day. 113 output.append(String.format(" %02d:%02d:%02d", hours, minutes, seconds)); 114 } else { 115 // Don't pad hours if less than a day. 116 output.append(String.format("%d:%02d:%02d", hours, minutes, seconds)); 117 } 118 } else if (output.length() > 0) { 119 /* 120 * If a day+ with zero hours and zero minutes, just report the days. 121 * E.g. "1 day", and not "1 day 35 ms". 122 */ 123 return output.toString(); 124 } else if (seconds != 0) { 125 output.append(String.format("%d.%d seconds", seconds, milliseconds)); 126 } else if (milliseconds != 0) { 127 output.append(String.format("%d ms", milliseconds)); 128 } 129 130 return (output.length() == 0 ? "0 ms" : output.toString()); 131 } 132 133 /** 134 * Finds n-th index of a given substring in a string. 135 * 136 * @param str an original string 137 * @param substr a substring to match 138 * @param n n-th occurrence 139 * @return the index of the first character of the substring in the original 140 * string where the substring occurred n-th times in the string. If the n-th 141 * candidate does not exist, -1 is returned. 142 */ nthIndexOf(String str, String substr, int n)143 public static int nthIndexOf(String str, String substr, int n) { 144 int pos = -1; 145 while (n > 0) { 146 if (pos >= str.length()) { 147 return -1; 148 } 149 if ((pos = str.indexOf(substr, pos + 1)) == -1) { 150 break; 151 } 152 n--; 153 } 154 return pos; 155 } 156 157 /** 158 * Count the number of ending pushback characters from a matched URI. 159 * <p> 160 * jflex does not support negative lookbehind, so modifying a URI matcher 161 * to backtrack on ending characters that are otherwise normally valid 162 * (e.g. '.') is difficult. Instead, this method allows counting and 163 * pushing back. 164 * @param value the URI to test 165 * @return the number of characters greater than or equal to zero which 166 * could be pushed back. 167 */ countURIEndingPushback(String value)168 public static int countURIEndingPushback(String value) { 169 int n = 0; 170 for (int i = value.length() - 1; i >= 0; --i) { 171 char c = value.charAt(i); 172 if (c == '.') { 173 ++n; 174 } else { 175 break; 176 } 177 } 178 return n; 179 } 180 181 /** 182 * Determines if the specified {@code pattern} matches in the specified 183 * {@code value}. 184 * @param value the string to inspect 185 * @param pattern the pattern to match 186 * @return the index of the first occurrence of the specified pattern, or 187 * -1 if there is no such occurrence 188 */ patindexOf(String value, Pattern pattern)189 public static int patindexOf(String value, Pattern pattern) { 190 Matcher m = pattern.matcher(value); 191 if (!m.find()) { 192 return -1; 193 } 194 return m.start(); 195 } 196 197 /** 198 * Determines if the {@code value} starts with a character in 199 * CommonPath.lexh's {FPath}. 200 * @param value the input to test 201 * @return true if {@code value} matches at its start 202 */ startsWithFpathChar(String value)203 public static boolean startsWithFpathChar(String value) { 204 return FPATH_CHAR_STARTSMATCH.matcher(value).matches(); 205 } 206 207 /** 208 * Determines if the specified pattern, {@code pat}, matches the specified 209 * {@code capture}, and computes an eligible pushback. 210 * @param capture a defined input 211 * @param pat a pattern, or null to skip computation 212 * @return a positive value if {@code pat} matches in {@code capture} at or 213 * after the second character to indicate the number of characters to 214 * pushback including the first-matched character; otherwise 0 to indicate 215 * no match or a match at the 0-th character. (The 0-th chracter is 216 * ineligible for fear of looping non-stop upon pushing back the entire 217 * {@code yytext()}.) 218 */ countPushback(String capture, Pattern pat)219 public static int countPushback(String capture, Pattern pat) { 220 if (pat != null) { 221 int o = StringUtils.patindexOf(capture, pat); 222 if (o >= 0) { 223 int n = capture.length() - o; 224 // Push back if positive, but not if equal to the full length. 225 if (n > 0 && n < capture.length()) { 226 return n; 227 } 228 } 229 } 230 return 0; 231 } 232 233 /** 234 * Determine the length of the next whitespace- or ISO control 235 * character-related sequence within a string. 236 * @param str a defined instance 237 * @param off the starting offset within {@code str} 238 * @param shouldMatch a value indicating whether to match all contiguous 239 * whitespace or ISO control characters ({@code true}) or 240 * all contiguous non-whitespace and non-control 241 * characters ({@code false}) starting at {@code off} 242 * @return a length greater than or equal to zero 243 */ whitespaceOrControlLength(String str, int off, boolean shouldMatch)244 public static int whitespaceOrControlLength(String str, int off, boolean shouldMatch) { 245 int i = 0; 246 while (off + i < str.length()) { 247 int cp = Character.codePointAt(str, off + i); 248 if ((Character.isWhitespace(cp) || Character.isISOControl(cp)) != shouldMatch) { 249 return i; 250 } 251 i += Character.charCount(cp); 252 } 253 return str.length() - off; 254 } 255 } 256