001package Torello.Java; 002 003import java.util.*; 004import java.util.regex.*; 005import java.util.zip.*; 006import java.io.*; 007import java.util.stream.*; 008import java.util.function.*; 009 010import java.text.DecimalFormat; 011import java.net.URL; 012 013import Torello.Java.Function.IntCharFunction; 014import Torello.Java.Function.IntTFunction; 015 016import Torello.Java.ReadOnly.ReadOnlyList; 017import Torello.Java.ReadOnly.ReadOnlyArrayList; 018 019import Torello.Java.Additional.Counter; 020 021/** 022 * A plethora of extensions to Java's {@code String} class. 023 * 024 * <EMBED CLASS='external-html' DATA-FILE-ID=STRING_PARSE> 025 */ 026@Torello.JavaDoc.StaticFunctional 027public class StringParse 028{ 029 private StringParse() { } 030 031 032 // ******************************************************************************************** 033 // ******************************************************************************************** 034 // Constants 035 // ******************************************************************************************** 036 // ******************************************************************************************** 037 038 039 private static final DecimalFormat formatter = new DecimalFormat("#,###"); 040 041 /** 042 * This regular expression simply matches white-space found in a java {@code String}. 043 * @see #removeWhiteSpace(String) 044 */ 045 public static final Pattern WHITE_SPACE_REGEX = Pattern.compile("\\s+"); 046 047 /** 048 * This {@code Predicate<String>} checks whether the contents of a {@code java.lang.String} 049 * are comprised of only White-Space Characters. 050 * 051 * <BR /><BR />Java's {@code 'asMatchPredicate'} is very similar to appending the Reg-Ex 052 * Control-Characters {@code '^'} and {@code '$'} to the beginning and ending of a 053 * {@code String}. 054 * 055 * <BR /><BR /><B CLASS=JDDescLabel>Important:</B> Since the Regular Expression used is the 056 * one defined, above, as {@code \w+} - <I>using the {@code '+'}, rather than the 057 * {@code '*'}</I> - this {@code Predicate} will return {@code FALSE} when a Zero-Length 058 * {@code String} is passed as input. 059 * 060 * @see #WHITE_SPACE_REGEX 061 * @see #onlyWhiteSpace_OrZeroLen 062 */ 063 public static final Predicate<String> onlyWhiteSpace = 064 WHITE_SPACE_REGEX.asMatchPredicate(); 065 066 /** 067 * This is a {@code Predicate} that works in an identical manner to {@link #onlyWhiteSpace}, 068 * with the minor addded stipulation that a Zero-Length {@code String} will generate a 069 * {@code TRUE} / pass result from the {@code Predicate.test(String)} method. 070 * 071 * @see #onlyWhiteSpace 072 */ 073 public static final Predicate<String> onlyWhiteSpace_OrZeroLen = 074 Pattern.compile("^\\s*$").asPredicate(); 075 076 /** 077 * This regular expression simply matches the comma. The only reason for including this here 078 * is because the java {@code class 'Pattern'} contains a method called 079 * {@code Stream<String> 'splitAsStream(CharSequence)'} which is used for the CSV method 080 * further below 081 * 082 * @see StrCSV#CSV(String, boolean, boolean) 083 * @see FileRW#readDoublesFromFile(String, boolean, boolean) 084 * @see FileRW#readLongsFromFile(String, boolean, boolean, int) 085 */ 086 public static final Pattern COMMA_REGEX = Pattern.compile(","); 087 088 /** 089 * This regular expression is used for integer and floating-point numbers that use the 090 * comma ({@code ','}) between the digits that comprise the number. For example, this 091 * Regular Expression would match the {@code String} {@code "900,800,75.00"}. 092 * 093 * @see FileRW#readIntsFromFile(String, boolean, boolean, int) 094 */ 095 public static final Pattern NUMBER_COMMMA_REGEX = Pattern.compile("(\\d),(\\d)"); 096 097 /** 098 * This represents any version of the new-line character. Note that the {@code '\r\n'} version 099 * comes before the single {@code '\r'} version in the regular-expression, to guarantee that 100 * if both are present, they are treated as a single newline. 101 */ 102 public static final Pattern NEWLINEP = Pattern.compile("\\r\\n|\\r|\\n"); 103 104 /** 105 * Predicate for new-line characters 106 * @see #NEWLINEP 107 */ 108 public static final Predicate<String> newLinePred = NEWLINEP.asPredicate(); 109 110 /** The months of the year, as an immutable list of {@code String's}. */ 111 public static final ReadOnlyList<String> months = new ReadOnlyArrayList<>( 112 "January", "February", "March", "April", "May", "June", 113 "July", "August", "September", "October", "November", "December" 114 ); 115 116 private static final Calendar internalCalendar = Calendar.getInstance(); 117 118 /** This is the list of characters that need to be escaped for a regular expression */ 119 public static final String REG_EX_ESCAPE_CHARS = "\\/()[]{}$^+*?-."; 120 121 /** Alpha-Numeric RegEx */ 122 public static final Pattern ALPHA_NUMERIC = Pattern.compile("^[\\d\\w]*$"); 123 124 /** 125 * Alpha-Numeric {@code String} Predicate. 126 * @see #ALPHA_NUMERIC 127 */ 128 public static final Predicate<String> alphaNumPred = ALPHA_NUMERIC.asPredicate(); 129 130 // The minimum value for the byte primitive type, without the minus sign. 131 private static final char[] BYTE_MIN_VALUE_DIGITS_AS_CHARS = { '2', '5', '6' }; 132 133 // The minimum value for the short primitive type, without the minus sign. 134 private static final char[] SHORT_MIN_VALUE_DIGITS_AS_CHARS = { '6', '5', '5', '3', '6' }; 135 136 // The minimum value for the int primitive type, without the minus sign. 137 private static final char[] INT_MIN_VALUE_DIGITS_AS_CHARS = 138 { '2', '1', '4', '7', '4', '8', '3', '6', '4', '8' }; 139 140 // The minimum value for the long primitive type, without the minus sign. 141 private static final char[] LONG_MIN_VALUE_DIGITS_AS_CHARS = 142 { 143 '2', '1', '4', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5', '4', 144 '7', '7', '5', '8', '0', '8' 145 }; 146 147 /** An empty {@code String} array. */ 148 public static final String[] EMPTY_STR_ARRAY = {}; 149 150 151 // ******************************************************************************************** 152 // ******************************************************************************************** 153 // methods 154 // ******************************************************************************************** 155 // ******************************************************************************************** 156 157 158 /** 159 * Makes a {@code long} number like {@code 123456789} into a number-string such as: 160 * {@code "123,456,789"}. Java's {@code package java.text.*} is easy to use, and versatile, but 161 * the commands are not always so easy to remember. 162 * 163 * @param l Any {@code long} integer. Comma's will be inserted for every third power of ten 164 * 165 * @return After calling java's {@code java.text.DecimalFormat} class, a {@code String} 166 * representing this parameter will be returned. 167 */ 168 public static String commas(long l) 169 { return formatter.format(l); } 170 171 /** 172 * Trims any white-space {@code Characters} from the end of a {@code String}. 173 * 174 * <BR /><TABLE CLASS=JDBriefTable> 175 * <TR><TH>Input String:</TH><TH>Output String:</TH></TR> 176 * <TR><TD>{@code "A Quick Brown Fox\n \t"}</TD><TD>{@code "A Quick Brown Fox"}</TD></TR> 177 * <TR><TD>{@code "\tA Lazy Dog."}</TD><TD>{@code "\tA Lazy Dog."}</TD></TR> 178 * <TR><TD>{@code " " (only white-space)}</TD><TD>{@code ""}</TD></TR> 179 * <TR><TD>{@code "" (empty-string)}</TD><TD>{@code ""}</TD></TR> 180 * <TR><TD>{@code null}</TD><TD>throws {@code NullPointerException}</TD></TR> 181 * </TABLE> 182 * 183 * @param s Any Java {@code String} 184 * 185 * @return A copy of the same {@code String} - <I>but all characters that matched Java 186 * method {@code java.lang.Character.isWhitespace(char)}</I> and were at the end of the 187 * {@code String} will not be included in the returned {@code String}. 188 * 189 * <BR /><BR />If the {@code zero-length String} is passed to parameter {@code 's'}, it 190 * shall be returned immediately. 191 * 192 * <BR /><BR />If the resultant-{@code String} has zero-length, it is returned, without 193 * exception. 194 */ 195 public static String trimRight(String s) 196 { 197 if (s.length() == 0) return s; 198 199 int pos = s.length(); 200 201 while ((pos > 0) && Character.isWhitespace(s.charAt(--pos))); 202 203 if (pos == 0) if (Character.isWhitespace(s.charAt(0))) return ""; 204 205 return s.substring(0, pos + 1); 206 } 207 208 /** 209 * Trims any white-space {@code Characters} from the beginning of a {@code String}. 210 * 211 * <TABLE CLASS=JDBriefTable> 212 * <TR><TH>Input String:</TH><TH>Output String:</TH></TR> 213 * <TR><TD>{@code "\t A Quick Brown Fox"}</TD><TD>{@code "A Quick Brown Fox"}</TD></TR> 214 * <TR><TD>{@code "A Lazy Dog. \n\r\t"}</TD><TD>{@code "A Lazy Dog. \n\r\t"}</TD></TR> 215 * <TR><TD>{@code " " (only white-space)}</TD><TD>{@code ""}</TD></TR> 216 * <TR><TD>{@code "" (empty-string)}</TD><TD>{@code ""}</TD></TR> 217 * <TR><TD>{@code null}</TD><TD>throws {@code NullPointerException}</TD></TR> 218 * </TABLE> 219 * 220 * @param s Any Java {@code String} 221 * 222 * @return A copy of the same {@code String} - <I>but all characters that matched Java 223 * method {@code java.lang.Character.isWhitespace(char)}</I> and were at the start of the 224 * {@code String} will not be included in the returned {@code String}. 225 * 226 * <BR /><BR />If the {@code zero-length String} is passed to parameter {@code 's'}, it 227 * shall be returned immediately. 228 * 229 * <BR /><BR />If the resultant-{@code String} has zero-length, it is returned, without 230 * exception. 231 */ 232 public static String trimLeft(String s) 233 { 234 int pos = 0; 235 int len = s.length(); 236 237 if (len == 0) return s; 238 239 while ((pos < len) && Character.isWhitespace(s.charAt(pos++))); 240 241 if (pos == len) if (Character.isWhitespace(s.charAt(len-1))) return ""; 242 243 return s.substring(pos - 1); 244 } 245 246 /** 247 * Primarily for convenience in remembering earlier C-style {@code printf(...)} formatting 248 * commands. 249 * 250 * <BR /><BR />This method will "left pad" an input {@code String} with spaces, if 251 * {@code s.length() < totalStrLength}. If input-parameter {@code 's'} is equal-to or 252 * longer-than the value in {@code 'totalStringLength'}, then the original {@code String} shall 253 * be returned. 254 * 255 * <TABLE CLASS=JDBriefTable> 256 * <TR><TH>Input Parameters</TH><TH>Returned String</TH></TR> 257 * <TR><TD>{@code "Quick Brown Fox"}<BR />{@code 20}</TD> 258 * <TD><PRE>{@code " Quick Brown Fox"}</PRE></TD> 259 * </TR> 260 * <TR><TD>{@code "Hello World"}<BR />{@code 15}</TD> 261 * <TD><PRE>{@code " Hello World"}</PRE></TD> 262 * </TR> 263 * <TR><TD>{@code "Write Once, Run Anywhere"}<BR />{@code 10}</TD> 264 * <TD>{@code "Write Once, Run Anywhere"}</TD> 265 * </TR> 266 * <TR><TD>{@code null}</TD><TD>{@code NullPointerException}</TD></TR> 267 * </TABLE> 268 * 269 * @param s This may be any {@code java.lang.String} 270 * 271 * @param totalStringLength If {@code s.length()} is smaller than {@code 'totalStringLength'}, 272 * then as many space characters ({@code ' '}) as are needed to ensure that the returned 273 * {@code 'String'} has length equal to {@code 'totalStringLength'} will be 274 * <B><I>prepended</B></I> to the input {@code String} parameter {@code 's'}. 275 * 276 * <BR /><BR />If {@code s.length()} is greater than {@code 'totalStringLength'}, then the 277 * original input shall be returned. 278 * 279 * @throws IllegalArgumentException If {@code totalStringLength} is zero or negative. 280 * 281 * @see #rightSpacePad(String, int) 282 */ 283 public static String leftSpacePad(String s, int totalStringLength) 284 { 285 if (totalStringLength <= 0) throw new IllegalArgumentException( 286 "totalString length was '" + totalStringLength + ", " + 287 "however it is expected to be a positive integer." 288 ); 289 290 return (s.length() >= totalStringLength) 291 ? s 292 : String.format("%1$" + totalStringLength + "s", s); 293 } 294 295 /** 296 * Primarily for convenience in remembering earlier C-style {@code printf(...)} formatting 297 * commands. 298 * 299 * <BR /><BR />This method will "right pad" an input {@code String} with spaces, if 300 * {@code s.length() < totalStrLength}. If input-parameter {@code 's'} is equal-to or 301 * longer-than the value in {@code 'totalStringLength'}, then the original {@code String} shall 302 * be returned. 303 * 304 * <TABLE CLASS=JDBriefTable> 305 * <TR><TH>Input Parameters</TH><TH>Returned String</TH></TR> 306 * <TR><TD>{@code "Quick Brown Fox"}<BR />{@code 20}</TD> 307 * <TD><PRE>{@code "Quick Brown Fox "}</PRE></TD> 308 * </TR> 309 * <TR><TD>{@code "Hello World"}<BR />{@code 15}</TD> 310 * <TD><PRE>{@code "Hello World "}</PRE></TD> 311 * </TR> 312 * <TR><TD>{@code "Write Once, Run Anywhere"}<BR />{@code 10}</TD> 313 * <TD>{@code "Write Once, Run Anywhere"}</TD> 314 * </TR> 315 * <TR><TD>{@code null}</TD><TD>{@code NullPointerException}</TD></TR> 316 * </TABLE> 317 * 318 * @param s This may be any {@code java.lang.String} 319 * 320 * @param totalStringLength If {@code s.length()} is smaller than {@code 'totalStringLength'}, 321 * then as many space characters ({@code ' '}) as are needed to ensure that the returned 322 * {@code 'String'} has length equal to {@code 'totalStringLength'} will be 323 * <B><I>postpended</B></I> to the input {@code String} parameter {@code 's'}. 324 * 325 * <BR /><BR />If {@code s.length()} is greater than {@code 'totalStringLength'}, then the 326 * original input shall be returned. 327 * 328 * @throws IllegalArgumentException If {@code totalStringLength} is zero or negative. 329 * 330 * @see #leftSpacePad(String, int) 331 */ 332 public static String rightSpacePad(String s, int totalStringLength) 333 { 334 if (totalStringLength <= 0) throw new IllegalArgumentException( 335 "totalString length was '" + totalStringLength + "', " + 336 "however it is expected to be a positive integer." 337 ); 338 339 return (s.length() >= totalStringLength) 340 ? s 341 : String.format("%1$-" + totalStringLength + "s", s); 342 } 343 344 /** 345 * Runs a Regular-Expression over a {@code String} to retrieve all matches that occur between 346 * input {@code String} parameter {@code 's'} and Regular-Expression {@code 'regEx'}. 347 * 348 * @param s Any Java {@code String} 349 * @param regEx Any Java Regular-Expression 350 * 351 * @param eliminateOverlappingMatches When this parameter is passed {@code 'TRUE'}, successive 352 * matches that have portions which overlap each-other are eliminated. 353 * 354 * @return An array of all {@code MatchResult's} (from package {@code 'java.util.regex.*'}) that 355 * were produced by iterating the {@code Matcher's} {@code 'find()'} method. 356 */ 357 public static MatchResult[] getAllMatches 358 (String s, Pattern regEx, boolean eliminateOverlappingMatches) 359 { 360 Stream.Builder<MatchResult> b = Stream.builder(); 361 Matcher m = regEx.matcher(s); 362 int prevEnd = 0; 363 364 while (m.find()) 365 { 366 MatchResult matchResult = m.toMatchResult(); 367 368 // This skip any / all overlapping matches - if the user has requested it 369 if (eliminateOverlappingMatches) if (matchResult.start() < prevEnd) continue; 370 371 b.accept(matchResult); 372 373 prevEnd = matchResult.end(); 374 } 375 376 // Convert the Java-Stream into a Java-Array and return the result 377 return b.build().toArray(MatchResult[]::new); 378 } 379 380 381 // ******************************************************************************************** 382 // ******************************************************************************************** 383 // Helper set & get for strings 384 // ******************************************************************************************** 385 // ******************************************************************************************** 386 387 388 /** 389 * This sets a character in a {@code String} to a new value, and returns a result 390 * @param str Any java {@code String} 391 * @param i An index into the underlying character array of that {@code String}. 392 * @param c A new character to be placed at the <I>i'th position</I> of this {@code String}. 393 * 394 * @return a new java {@code String}, with the appropriate index into the {@code String} 395 * substituted using character parameter {@code 'c'}. 396 */ 397 public static String setChar(String str, int i, char c) 398 { 399 return ((i + 1) < str.length()) 400 ? (str.substring(0, i) + c + str.substring(i + 1)) 401 : (str.substring(0, i) + c); 402 } 403 404 /** 405 * This removes a character from a {@code String}, and returns a new {@code String} as a 406 * result. 407 * 408 * @param str Any Java-{@code String}. 409 * 410 * @param i This is the index into the underlying java {@code char}-array whose character will 411 * be removed from the return {@code String}. 412 * 413 * @return Since Java {@code String}'s are all immutable, this {@code String} that is returned 414 * is completely new, with the character that was originally at index 'i' removed. 415 */ 416 public static String delChar(String str, int i) 417 { 418 if ((i + 1) < str.length()) 419 return str.substring(0, i) + str.substring(i + 1); 420 else 421 return str.substring(0, i); 422 } 423 424 /** 425 * Returns the same {@code String} is input, but trims all spaces down to a single space. 426 * Each and every <I>lone / independent or contiguous</I> white-space character is reduced 427 * to a single space-character. 428 * 429 * <TABLE CLASS=JDBriefTable> 430 * <TR><TH>Input String</TH><TH>Output String</TH></TR> 431 * <TR><TD><PRE>{@code "This has extra spaces\n"}</PRE></TD> 432 * <TD>{@code "This has extra spaces "}</TD> 433 * </TR> 434 * <TR><TD>{@code "This does not"}</TD> 435 * <TD>{@code "This does not"}</TD> 436 * </TR> 437 * <TR><TD>{@code "\tThis\nhas\ttabs\nand\tnewlines\n"}</TD> 438 * <TD>{@code " This has tabs and newlines "}</TD> 439 * </TR> 440 * </TABLE> 441 * 442 * @param s Any Java {@code String} 443 * 444 * @return A {@code String} where all white-space is compacted to a single space. This is 445 * generally how HTML works, when it is displayed in a browser. 446 */ 447 public static String removeDuplicateSpaces(String s) 448 { return StringParse.WHITE_SPACE_REGEX.matcher(s).replaceAll(" "); } 449 450 /** 451 * This string-modify method simply removes any and all white-space matches found within a 452 * java-{@code String}. 453 * 454 * <TABLE CLASS=JDBriefTable> 455 * <TR><TH>Input String</TH><TH>Output String</TH></TR> 456 * <TR><TD><PRE>{@code "This Has Extra Spaces\n"}</PRE></TD> 457 * <TD>{@code "ThisHasExtraSpaces"}</TD> 458 * </TR> 459 * <TR><TD>{@code "This Does Not"}</TD> 460 * <TD>{@code "ThisDoesNot"}</TD> 461 * </TR> 462 * <TR><TD>{@code "\tThis\nHas\tTabs\nAnd\tNewlines\n"}</TD> 463 * <TD>{@code "ThisHasTabsAndNewlines"}</TD> 464 * </TR> 465 * </TABLE> 466 * 467 * @param s Any {@code String}, but if it has any white-space (space that matches 468 * regular-expression: {@code \w+}) then those character-blocks will be removed 469 * 470 * @return A new {@code String} without any {@code \w} (RegEx for 'whitespace') 471 * 472 * @see #WHITE_SPACE_REGEX 473 */ 474 public static String removeWhiteSpace(String s) 475 { return WHITE_SPACE_REGEX.matcher(s).replaceAll(""); } 476 477 /** 478 * Generates a {@code String} that contains {@code n} copies of character {@code c}. 479 * @return {@code n} copies of {@code c}, as a {@code String}. 480 * @throws IllegalArgumentException If the value passed to parameter {@code 'n'} is negative 481 * @see StrSource#caretBeneath(String, int) 482 */ 483 public static String nChars(char c, int n) 484 { 485 if (n < 0) throw new IllegalArgumentException("Value of parameter 'n' is negative: " + n); 486 487 char[] cArr = new char[n]; 488 Arrays.fill(cArr, c); 489 return new String(cArr); 490 } 491 492 /** 493 * Generates a {@code String} that contains {@code n} copies of {@code s}. 494 * @return {@code n} copies of {@code s} as a {@code String}. 495 * @throws NException if the value provided to parameter {@code 'n'} is negative. 496 */ 497 public static String nStrings(String s, int n) 498 { 499 if (n < 0) throw new NException("A negative value was passed to 'n' [" + n + ']'); 500 501 StringBuilder sb = new StringBuilder(); 502 503 for (int i=0; i < n; i++) sb.append(s); 504 505 return sb.toString(); 506 } 507 508 /** 509 * This method checks whether or not a java-{@code String} has white-space. 510 * 511 * @param s Any Java-{@code String}. If this {@code String} has any white-space, this method 512 * will return {@code TRUE} 513 * 514 * @return {@code TRUE} If there is any white-space in this method, and {@code FALSE} otherwise. 515 * 516 * @see #WHITE_SPACE_REGEX 517 */ 518 public static boolean hasWhiteSpace(String s) 519 { return WHITE_SPACE_REGEX.matcher(s).find(); } 520 521 /** 522 * Counts the number of instances of character input {@code char c} contained by the 523 * input {@code String s} 524 * 525 * @param s Any {@code String} containing any combination of ASCII/UniCode characters 526 * 527 * @param c Any ASCII/UniCode character. 528 * 529 * @return The number of times {@code char c} occurs in {@code String s} 530 */ 531 public static int countCharacters(String s, char c) 532 { 533 int count = 0; 534 int pos = 0; 535 while ((pos = s.indexOf(c, pos + 1)) != -1) count++; 536 return count; 537 } 538 539 540 /** 541 * If the {@code String} passed to this method contains a single-quote on both sides of the 542 * {@code String}, or if it contains a double-quote on both sides of this {@code String}, then 543 * this method shall return a new {@code String} that is shorter in length by 2, and leaves off 544 * the first and last characters of the input parameter {@code String}. 545 * 546 * <BR /><BR /><B>HOPEFULLY,</B> The name of this method explains clearly what this method does 547 * 548 * @param s This may be any java {@code String}. Only {@code String's} whose first and last 549 * characters are not only quotation marks (single or double), but also they are <B>the same, 550 * identical, quotation marks on each side.</B> 551 * 552 * @return A new {@code String} that whose first and last quotation marks are gone - if they 553 * were there when this method began. 554 */ 555 public static String ifQuotesStripQuotes(String s) 556 { 557 if (s == null) return null; 558 if (s.length() < 2) return s; 559 560 int lenM1 = s.length() - 1; // Position of the last character in the String 561 562 if ( ((s.charAt(0) == '\"') && (s.charAt(lenM1) == '\"')) // String has Double-Quotation-Marks 563 || // ** or *** 564 ((s.charAt(0) == '\'') && (s.charAt(lenM1) == '\'')) ) // String has Single-Quotation-Marks 565 return s.substring(1, lenM1); 566 else 567 return s; 568 } 569 570 /** 571 * Counts the number of lines of text inside of a Java {@code String}. 572 * 573 * @param text This may be any text, as a {@code String}. 574 * 575 * @return Returns the number of lines of text. The integer returned shall be precisely 576 * equal to the number of {@code '\n'} characters <B><I>plus one!</I></B> 577 */ 578 public static int numLines(String text) 579 { 580 if (text.length() == 0) return 0; 581 582 int pos = -1; 583 int count = 0; 584 585 do 586 { 587 pos = text.indexOf('\n', pos + 1); 588 count++; 589 } 590 while (pos != -1); 591 592 return count; 593 } 594 595 596 // ******************************************************************************************** 597 // ******************************************************************************************** 598 // Misc Date String Functions 599 // ******************************************************************************************** 600 // ******************************************************************************************** 601 602 603 /** 604 * Converts an integer into a Month. I could just use the class {@code java.util.Calendar}, 605 * but it is so complicated, that using an internal list is easier. 606 * 607 * @param month The month, as a number from {@code '1'} to {@code '12'}. 608 * @return A month as a {@code String} like: {@code "January"} or {@code "August"} 609 * @see #months 610 */ 611 public static String monthStr(int month) { return months.get(month); } 612 613 /** 614 * Generates a "Date String" using the character separator {@code '.'} 615 * @return A {@code String} in the form: {@code YYYY.MM.DD} 616 */ 617 public static String dateStr() { return dateStr('.', false); } 618 619 /** 620 * Generates a "Date String" using the <I>separator</I> parameter as the separator between 621 * numbers 622 * 623 * @param separator Any ASCII or UniCode character. 624 * 625 * @return A {@code String} of the form: {@code YYYYcMMcDD} where {@code 'c'} is the passed 626 * {@code 'separator'} parameter. 627 */ 628 public static String dateStr(char separator) { return dateStr(separator, false); } 629 630 /** 631 * Generates a "Date String" that is consistent with the directory-name file-storage locations 632 * used to store articles from {@code http://Gov.CN}. 633 * 634 * @return The {@code String's} used for the Chinese Government Web-Portal Translation Pages 635 */ 636 public static String dateStrGOVCN() { return dateStr('/', false).replaceFirst("/", "-"); } 637 // "2017-12/05" 638 639 /** 640 * This class is primary included because although Java has a pretty reasonable "general 641 * purpose" calendar class/interface, but a consistent / same {@code String} since is needed 642 * because the primary use here is for building the names of files. 643 * 644 * @param separator Any ASCII or Uni-Code character. 645 * 646 * @param includeMonthName When <I>TRUE</I>, the English-Name of the month ({@code 'January'} 647 * ... {@code 'December'}) will be appended to the month number in the returned {@code String}. 648 * 649 * @return The year, month, and day as a {@code String}. 650 */ 651 public static String dateStr(char separator, boolean includeMonthName) 652 { 653 Calendar c = internalCalendar; 654 String m = zeroPad10e2(c.get(Calendar.MONTH) + 1); // January is month zero! 655 String d = zeroPad10e2(c.get(Calendar.DAY_OF_MONTH)); 656 657 if (includeMonthName) m += " - " + c.getDisplayName(Calendar.MONTH, 2, Locale.US); 658 659 if (separator != 0) return c.get(Calendar.YEAR) + "" + separator + m + separator + d; 660 else return c.get(Calendar.YEAR) + "" + m + d; 661 } 662 663 /** 664 * Returns a {@code String} that has the year and the month (but not the day, or other time 665 * components). 666 * 667 * @return Returns the current year and month as a {@code String}. 668 */ 669 public static String ymDateStr() { return ymDateStr('.', false); } 670 671 /** 672 * Returns a {@code String} that has the year and the month (but not the day, or other time 673 * components). 674 * 675 * @param separator The single-character separator used between year, month and day. 676 * 677 * @return The current year and month as a {@code String}. 678 */ 679 public static String ymDateStr(char separator) { return ymDateStr(separator, false); } 680 681 /** 682 * Returns a {@code String} that has the year and the month (but not the day, or other time 683 * components). 684 * 685 * @param separator The single-character separator used between year, month and day. 686 * 687 * @param includeMonthName When this is true, the name of the month, in English, is included 688 * with the return {@code String}. 689 * 690 * @return YYYYseparatorMM(? include-month-name) 691 */ 692 public static String ymDateStr(char separator, boolean includeMonthName) 693 { 694 Calendar c = internalCalendar; 695 String m = zeroPad10e2(c.get(Calendar.MONTH) + 1); // January is month zero! 696 697 if (includeMonthName) m += " - " + c.getDisplayName(Calendar.MONTH, 2, Locale.US); 698 699 if (separator != 0) return c.get(Calendar.YEAR) + "" + separator + m; 700 else return c.get(Calendar.YEAR) + "" + m; 701 } 702 703 704 // ******************************************************************************************** 705 // ******************************************************************************************** 706 // Misc Time String Functions 707 // ******************************************************************************************** 708 // ******************************************************************************************** 709 710 711 /** 712 * Returns the current time as a {@code String}. 713 * 714 * @return military time - with AM|PM (redundant) added too. 715 * Includes only Hour and Minute - separated by a colon character {@code ':'} 716 * 717 * @see #timeStr(char) 718 */ 719 public static String timeStr() { return timeStr(':'); } 720 721 /** 722 * Returns the current time as a {@code String}. 723 * 724 * @param separator The character used to separate the minute & hour fields 725 * 726 * @return military time - with AM|PM added redundantly, and a separator of your choosing. 727 */ 728 public static String timeStr(char separator) 729 { 730 Calendar c = internalCalendar; 731 int ht = c.get(Calendar.HOUR) + ((c.get(Calendar.AM_PM) == Calendar.AM) ? 0 : 12); 732 String h = zeroPad10e2((ht == 0) ? 12 : ht); // 12:00 is represented as "0"... changes this... 733 String m = zeroPad10e2(c.get(Calendar.MINUTE)); 734 String p = (c.get(Calendar.AM_PM) == Calendar.AM) ? "AM" : "PM"; 735 736 if (separator != 0) return h + separator + m + separator + p; 737 else return h + m + p; 738 } 739 740 /** 741 * Returns the current time as a {@code String}. This method uses all time components 742 * available. 743 * 744 * @return military time - with AM|PM added redundantly. 745 */ 746 public static String timeStrComplete() 747 { 748 Calendar c = internalCalendar; 749 int ht = c.get(Calendar.HOUR) + ((c.get(Calendar.AM_PM) == Calendar.AM) ? 0 : 12); 750 String h = zeroPad10e2((ht == 0) ? 12 : ht); // 12:00 is represented as "0" 751 String m = zeroPad10e2(c.get(Calendar.MINUTE)); 752 String s = zeroPad10e2(c.get(Calendar.SECOND)); 753 String ms = zeroPad(c.get(Calendar.MILLISECOND)); 754 String p = (c.get(Calendar.AM_PM) == Calendar.AM) ? "AM" : "PM"; 755 756 return h + '-' + m + '-' + p + '-' + s + '-' + ms + "ms"; 757 } 758 759 /** 760 * The words "ordinal indicator" are referring to the little character {@code String} that is 761 * often used in English to make a number seem more a part of an english sentence. 762 * 763 * @param i Any positive integer (greater than 0) 764 * 765 * @return This will return the following strings: 766 * 767 * <TABLE CLASS=JDBriefTable> 768 * <TR><TH>Input: </TH><TH>RETURNS:</TH></TR> 769 * <TR><TD>i = 1 </TD><TD>"st" (as in "1st","first") </TD></TR> 770 * <TR><TD>i = 2 </TD><TD>"nd" (as in "2nd", "second") </TD></TR> 771 * <TR><TD>i = 4 </TD><TD>"th" (as in "4th") </TD></TR> 772 * <TR><TD>i = 23 </TD><TD>"rd" (as in "23rd") </TD></TR> 773 * </TABLE> 774 * 775 * @throws IllegalArgumentException If i is negative, or zero 776 */ 777 public static String ordinalIndicator(int i) 778 { 779 if (i < 1) 780 throw new IllegalArgumentException("i: " + i + "\tshould be a natural number > 0."); 781 782 // Returns the last 2 digits of the number, or the number itself if it is less than 100. 783 // Any number greater than 100 - will not have the "text-ending" (1st, 2nd, 3rd..) affected 784 // by the digits after the first two digits. Just analyze the two least-significant digits 785 i = i % 100; 786 787 // All numbers between "4th" and "19th" end with "th" 788 if ((i > 3) && (i < 20)) return "th"; 789 790 // set i to be the least-significant digit of the number - if that number was 1, 2, or 3 791 i = i % 10; 792 793 // Obvious: English Rules. 794 if (i == 1) return "st"; 795 if (i == 2) return "nd"; 796 if (i == 3) return "rd"; 797 798 // Compiler is complaining. This statement should never be executed. 799 return "th"; 800 } 801 802 803 // ******************************************************************************************** 804 // ******************************************************************************************** 805 // Zero Padding stuff 806 // ******************************************************************************************** 807 // ******************************************************************************************** 808 809 810 /** 811 * This just zero-pads integers with "prepended" zero's. java.text has all kinds of extremely 812 * intricate zero-padding and text-formatting classes. However, here, these are generally used 813 * for <B>debug, line-number, or count</B> information that is printed to the UNIX terminal. 814 * When this is the case, a simple and easily remembered <I>'one line method'</I> is a lot more 815 * useful than all of the highly-scalable versions of the text-formatting classes in java.text. 816 * 817 * @param n Any Integer. If {@code 'n'} is negative or greater than 1,000 - then null is 818 * returned. 819 * 820 * @return A zero-padded {@code String} - <B><I>to precisely three orders of 10</I></B>, as in 821 * the example table below: 822 * 823 * <TABLE CLASS=JDBriefTable> 824 * <TR><TH>Input </TH><TH><I>RETURNS:</I></TH></TR> 825 * <TR><TD>n = 9 </TD><TD>"009"</TD></TR> 826 * <TR><TD>n = 99 </TD><TD>"099"</TD></TR> 827 * <TR><TD>n = 999 </TD><TD>"999"</TD></TR> 828 * <TR><TD>n = 9999 </TD><TD>null</TD></TR> 829 * <TR><TD>n = -10 </TD><TD>null</TD></TR> 830 * </TABLE> 831 * 832 * @see #zeroPad10e2(int) 833 * @see #zeroPad10e4(int) 834 */ 835 public static String zeroPad(int n) 836 { 837 if (n < 0) return null; 838 if (n < 10) return "00" + n; 839 if (n < 100) return "0" + n; 840 if (n < 1000) return "" + n; 841 return null; 842 } 843 844 /** 845 * Pads an integer such that it contains enough leading zero's to ensure a String-length of 846 * two. 847 * 848 * @param n Must be an integer between 0 and 99, or else null will be returned 849 * 850 * @return A zero-padded String of the integer, <B><I>to precisely two orders of 851 * 10</I></B><BR />. Null is returned if the number cannot fit within two spaces. Example 852 * table follows: 853 * 854 * <TABLE CLASS=JDBriefTable> 855 * <TR><TH>Input </TH><TH><I>RETURNS:</I></TH></TR> 856 * <TR><TD>n = 9 </TD><TD>"09"</TD></TR> 857 * <TR><TD>n = 99 </TD><TD>"99"</TD></TR> 858 * <TR><TD>n = 999 </TD><TD>null</TD></TR> 859 * <TR><TD>n = -10 </TD><TD>null</TD></TR> 860 * </TABLE> 861 * 862 * @see #zeroPad(int) 863 */ 864 public static String zeroPad10e2(int n) 865 { 866 if (n < 0) return null; 867 if (n < 10) return "0" + n; 868 if (n < 100) return "" + n; 869 return null; 870 } 871 872 /** 873 * Pads an integer such that it contains enough leading zero's to ensure a String-length of 874 * four. 875 * 876 * @param n Must be an integer between 0 and 9999, or else null will be returned 877 * 878 * @return A zero-padded String of the integer, <B><I>to precisely four orders of 10</I></B>. 879 * Null is returned if the number cannot fit within four spaces. Example table follows: 880 * 881 * <TABLE CLASS=JDBriefTable> 882 * <TR><TH>Input </TH><TH><I>RETURNS:</I></TH></TR> 883 * <TR><TD>n = 9 </TD><TD>"0009"</TD></TR> 884 * <TR><TD>n = 99 </TD><TD>"0099"</TD></TR> 885 * <TR><TD>n = 999 </TD><TD>"0999"</TD></TR> 886 * <TR><TD>n = 9999 </TD><TD>"9999" </TD></TR> 887 * <TR><TD>n = 99999 </TD><TD>null</TD></TR> 888 * <TR><TD>n = -10 </TD><TD>null</TD></TR> 889 * </TABLE> 890 * 891 * @see #zeroPad(int) 892 */ 893 public static String zeroPad10e4(int n) 894 { 895 if (n < 0) return null; 896 if (n < 10) return "000" + n; 897 if (n < 100) return "00" + n; 898 if (n < 1000) return "0" + n; 899 if (n < 10000) return "" + n; 900 return null; 901 } 902 903 /** 904 * Pad's an integer with leading zeroes into a {@code String}. The number of zeroes padded is 905 * equal to parameter {@code 'powerOf10'}. If {@code int 'powerOf10'} were equal to zero, then 906 * any integer passed to this function would return a {@code String} that was precisely three 907 * characters long. If the value of parameter {@code int 'n'} were larger than {@code 1,000} 908 * or negative, then null would be returned. 909 * 910 * @param n Must be an integer between {@code '0'} and {@code '9999'} where the number of 911 * {@code '9'} digits is equal to the value of parameter {@code int 'powerOf10'} 912 * 913 * @param powerOf10 This must be a positive integer greater than {@code '1'}. It may not be 914 * larger {@code '11'}. The largest value that any integer in Java may attain is 915 * {@code '2,147,483, 647'} 916 * 917 * @return A zero padded {@code String}. If a negative number is passed to parameter 918 * {@code 'n'}, then 'null' shall be returned. Null shall also be returned if the "Power of 10 919 * Exponent of parameter {@code n}" is greater than the integer-value of parameter 920 * {@code 'powerOf10'} 921 * 922 * <BR /><BR /><B>FOR INSTANCE:</B> a call to: {@code zeroPad(54321, 4);} would return null 923 * since the value of parameter {@code 'n'} has five-decimal-places, but {@code 'powerOf10'} is 924 * only 4! 925 * 926 * @throws IllegalArgumentException if the value parameter {@code 'powerOf10'} is less than 2, 927 * or greater than {@code 11}. 928 */ 929 public static String zeroPad(int n, int powerOf10) 930 { 931 if (n < 0) return null; // Negative Values of 'n' not allowed 932 933 char[] cArr = new char[powerOf10]; // The String's length will be equal to 'powerOf10' 934 String s = "" + n; // (or else 'null' would be returned) 935 int i = powerOf10 - 1; // Internal Loop variable 936 int j = s.length() - 1; // Internal Loop variable 937 938 Arrays.fill(cArr, '0'); // Initially, fill the output char-array with all 939 // zeros 940 941 while ((i >= 0) && (j >= 0)) // Now start filling that char array with the 942 cArr[i--] = s.charAt(j--); // actual number 943 944 if (j >= 0) return null; // if all of parameter 'n' was inserted into the 945 // output (number 'n' didn't fit) then powerOf10 946 // was insufficient, so return null. 947 948 return new String(cArr); 949 } 950 951 952 // ******************************************************************************************** 953 // ******************************************************************************************** 954 // Find / Front Last-Front-Slash 955 // ******************************************************************************************** 956 // ******************************************************************************************** 957 958 959 /** 960 * This function finds the position of the last "front-slash" character {@code '/'} in a 961 * java-{@code String} 962 * 963 * @param urlOrDir This is any java-{@code String}, but preferably one that is a 964 * {@code URL}, or directory. 965 * 966 * @return The {@code String}-index of the last 'front-slash' {@code '/'} position in a 967 * {@code String}, or {@code -1} if there are not front-slashes. 968 */ 969 public static int findLastFrontSlashPos(String urlOrDir) 970 { return urlOrDir.lastIndexOf('/'); } 971 972 /** 973 * This returns the contents of a {@code String}, after the last front-slash found. 974 * 975 * <BR /><BR /><B>NOTE:</B> If not front-slash {@code '/'} character is found, then the 976 * original {@code String} is returned. 977 * 978 * @param urlOrDir This is any java-{@code String}, but preferably one that is a 979 * {@code URL}, or directory. 980 * 981 * @return the portion of the {@code String} after the final front-slash {@code '/'} character. 982 * If there are no front-slash characters found in this {@code String}, then the original 983 * {@code String} shall be returned. 984 */ 985 public static String fromLastFrontSlashPos(String urlOrDir) 986 { 987 int pos = urlOrDir.lastIndexOf('/'); 988 if (pos == -1) return urlOrDir; 989 return urlOrDir.substring(pos + 1); 990 } 991 992 /** 993 * This returns the contents of a {@code String}, before the last front-slash found (including 994 * the front-slash {@code '/'} itself). 995 * 996 * <BR /><BR /><B>NOTE:</B> If no front-slash {@code '/'} character is found, then null is 997 * returned. 998 * 999 * @param urlOrDir This is any java-{@code String}, but preferably one that is a 1000 * {@code URL}, or directory. 1001 * 1002 * @return the portion of the {@code String} <I><B>before and including</B></I> the final 1003 * front-slash {@code '/'} character. If there are no front-slash characters found in this 1004 * {@code String}, then null. 1005 */ 1006 public static String beforeLastFrontSlashPos(String urlOrDir) 1007 { 1008 int pos = urlOrDir.lastIndexOf('/'); 1009 if (pos == -1) return null; 1010 return urlOrDir.substring(0, pos + 1); 1011 } 1012 1013 1014 // ******************************************************************************************** 1015 // ******************************************************************************************** 1016 // Find / From Last-File-Separator 1017 // ******************************************************************************************** 1018 // ******************************************************************************************** 1019 1020 1021 /** 1022 * This function finds the position of the last {@code 'java.io.File.separator'} character in a 1023 * java-{@code String}. In UNIX-based systems, this is a forward-slash {@code '/'} character, 1024 * but in Windows-MSDOS, this is a back-slash {@code '\'} character. Identifying which of the 1025 * two is used is obtained by "using" Java's {@code File.separator} class and field. 1026 * 1027 * @param fileOrDir This may be any Java-{@code String}, but preferably one that represents a 1028 * file or directory. 1029 * 1030 * @return The {@code String}-index of the last 'file-separator' position in a {@code String}, 1031 * or {@code -1} if there are no such file-separators. 1032 */ 1033 public static int findLastFileSeparatorPos(String fileOrDir) 1034 { return fileOrDir.lastIndexOf(File.separator.charAt(0)); } 1035 1036 /** 1037 * This returns the contents of a {@code String}, after the last 1038 * {@code 'java.io.File.separator'} found. 1039 * 1040 * <BR /><BR /><B>NOTE:</B> If no {@code 'java.io.File.separator'} character is found, then 1041 * the original {@code String} is returned. 1042 * 1043 * @param fileOrDir This is any java-{@code String}, but preferably one that is a filename or 1044 * directory-name 1045 * 1046 * @return the portion of the {@code String} after the final {@code 'java.io.File.separator'} 1047 * character. If there are no such characters found, then the original {@code String} shall 1048 * be returned. 1049 */ 1050 public static String fromLastFileSeparatorPos(String fileOrDir) 1051 { 1052 int pos = fileOrDir.lastIndexOf(File.separator.charAt(0)); 1053 if (pos == -1) return fileOrDir; 1054 return fileOrDir.substring(pos + 1); 1055 } 1056 1057 /** 1058 * This returns the contents of a {@code String}, before the last 1059 * {@code 'java.io.File.separator'} (including the separator itself). 1060 * 1061 * <BR /><BR /><B>NOTE:</B> If no {@code 'java.io.File.separator'} character is found, 1062 * then null is returned. 1063 * 1064 * @param urlOrDir This is any java-{@code String}, but preferably one that is a 1065 * {@code URL}, or directory. 1066 * 1067 * @return the portion of the {@code String} <I><B>before and including</B></I> the final 1068 * {@code 'java.io.File.separator'} character. If there are no such characters found in this 1069 * {@code String}, then null is returned. 1070 */ 1071 public static String beforeLastFileSeparatorPos(String urlOrDir) 1072 { 1073 int pos = urlOrDir.lastIndexOf(File.separator.charAt(0)); 1074 if (pos == -1) return null; 1075 return urlOrDir.substring(0, pos + 1); 1076 } 1077 1078 1079 // ******************************************************************************************** 1080 // ******************************************************************************************** 1081 // Find / From File-Extension 1082 // ******************************************************************************************** 1083 // ******************************************************************************************** 1084 1085 1086 /** 1087 * This method swaps the ending 'File Extension' with another, parameter-provided, 1088 * extension. 1089 * 1090 * @param fileNameOrURLWithExtension Any file-name (or {@code URL}) that has an extension. 1091 * 1092 * @param newExtension The file or {@code URL} extension used as a substitute for the old 1093 * extension. This {@code String} may begin with the dot / period character ({@code '.'}), 1094 * and if it does not, one wil be appended. 1095 * 1096 * @return The new file-name or {@code URL} having the substituted extension. 1097 * 1098 * @throws StringFormatException If the {@code String} passed does not have any 1099 * {@code '.'} (period) characters, then this exception will throw. 1100 * 1101 * <BR /><BR /><B STYLE='color:red'>CAUTION:</B> In lieu of an exhaustive check on whether 1102 * or not the input file-name is a valid name, this method will simply check for the presence 1103 * or absence of a period-character ({@code '.'}). <I>Checking the validity of the input name 1104 * is <B>far</B> beyond the scope of this method.</I> 1105 * 1106 * <BR /><BR /><B>ALSO:</B> This method shall check to ensure that the {@code 'newExtension'} 1107 * parameter does not have length zero. 1108 * 1109 * <BR /><BR />To remove a file-extension, use {@link #removeExtension(String)} 1110 */ 1111 public static String swapExtension(String fileNameOrURLWithExtension, String newExtension) 1112 { 1113 int dotPos = fileNameOrURLWithExtension.lastIndexOf('.'); 1114 1115 if (dotPos == -1) throw new StringFormatException( 1116 "The file-name provided\n[" + fileNameOrURLWithExtension + "]\n" + 1117 "does not have a file-extension" 1118 ); 1119 1120 if (newExtension.length() == 0) throw new StringFormatException( 1121 "The new file-name extension has length 0. " + 1122 " To remove an extension, use 'StringParse.removeFileExtension(fileName)'" 1123 ); 1124 1125 return (newExtension.charAt(0) == '.') 1126 ? fileNameOrURLWithExtension.substring(0, dotPos) + newExtension 1127 : fileNameOrURLWithExtension.substring(0, dotPos) + '.' + newExtension; 1128 } 1129 1130 /** 1131 * This method simply removes all character data after the last identified period character 1132 * ({@code '.'}) found within {@code fileNameOrURL}. 1133 * 1134 * <BR /><BR />If the input-{@code String} does not have a period-character, the original 1135 * {@code String} will be returned, unmodified. 1136 * 1137 * @param fileNameOrURL Any file-name or {@code URL}, as a {@code String}. 1138 * 1139 * @return The modified file-name, or {@code URL}, as a {@code String}. 1140 * 1141 * <BR /><BR /><B STYLE='color:red'>NOTE:</B> No validity checks <I>of any kind</I> are 1142 * performed on {@code 'fileNameOrURL'}. This method merely checks for the presence or 1143 * absence of a {@code '.'} (period-character), and if it finds one, removes everything 1144 * after-and-including the last-period. 1145 */ 1146 public static String removeExtension(String fileNameOrURL) 1147 { 1148 int dotPos = fileNameOrURL.lastIndexOf('.'); 1149 if (dotPos == -1) return fileNameOrURL; 1150 return fileNameOrURL.substring(0, dotPos); 1151 } 1152 1153 /** 1154 * This will return the location within a {@code String} where the last period ({@code '.'}) 1155 * is found. 1156 * 1157 * <BR /><BR /><B>ALSO:</B> No validity checks for valid file-system names are performed. 1158 * Rather, the portion of the input-{@code String} starting at the location of the last period 1159 * is returned, regardless of what the {@code String} contains. 1160 * 1161 * @param file This may be any Java-{@code String}, but preferably one that represents a 1162 * file. 1163 * 1164 * @param includeDot When this parameter is passed {@code TRUE}, the position-index that is 1165 * returned will be the location of the last index where a period ({@code '.'}) is found. 1166 * When {@code FALSE}, the index returned will be the location of that period {@code + 1}. 1167 * 1168 * @return This will return the location of the file-extension. If no period is found, then 1169 * {@code -1} is returned. If the period is the last {@code char} in the {@code String}, 1170 * and parameter {@code 'includeDot'} is {@code FALSE}, then {@code -1} is returned. 1171 */ 1172 public static int findExtension(String file, boolean includeDot) 1173 { 1174 int pos = file.lastIndexOf('.'); 1175 1176 if (pos == -1) return -1; 1177 if (includeDot) return pos; 1178 1179 pos++; 1180 return (pos < file.length()) ? pos : -1; 1181 } 1182 1183 /** 1184 * This returns the contents of a {@code String}, after the last period {@code '.'} in that 1185 * {@code String}. For file-system and web files, this is often referred to as the <B>file 1186 * extension.</B> 1187 * 1188 * <BR /><BR /><B>NOTE:</B> If no period {@code '.'} character is found, then null is returned. 1189 * 1190 * <BR /><BR /><B>ALSO:</B> No validity checks for valid file-system names are performed. 1191 * Rather, the portion of the input-{@code String} starting at the location of the last period 1192 * is returned, regardless of what the {@code String} contains. 1193 * 1194 * @param file This is any java-{@code String}, but preferably one that is a filename. 1195 * 1196 * @param includeDot This determines whether the period {@code '.'} is to be included in the 1197 * returned-{@code String}. 1198 * 1199 * @return the portion of the {@code String} after the final period {@code '.'} character. 1200 * If parameter {@code includeDot} has been passed {@code FALSE}, then the portion of the 1201 * input-{@code String} beginning after the last period is returned. 1202 * 1203 * <BR /><BR />If there are no period characters found in this {@code String}, then null 1204 * is returned. 1205 */ 1206 public static String fromExtension(String file, boolean includeDot) 1207 { 1208 int pos = findExtension(file, includeDot); 1209 if (pos == -1) return null; 1210 return file.substring(pos); 1211 } 1212 1213 /** 1214 * This returns the contents of a {@code String}, before the last period {@code '.'} in that 1215 * {@code String}. For file-system and web files, this is often referred to as the <B>file 1216 * extension.</B> 1217 * 1218 * <BR /><BR /><B>NOTE:</B> If no period {@code '.'} character is found, then the original 1219 * {@code String} is returned. 1220 * 1221 * <BR /><BR /><B>ALSO:</B> No validity checks for valid file-system names are performed. 1222 * Rather, the portion of the input-{@code String} starting at the location of the last period 1223 * is returned, regardless of what the {@code String} contains. 1224 * 1225 * @param file This is any java-{@code String}, but preferably one that is a filename. 1226 * 1227 * @return the portion of the {@code String} before the final period {@code '.'} character. 1228 * 1229 * <BR /><BR />If there are no period characters found in this {@code String}, then the 1230 * original file is returned. 1231 */ 1232 public static String beforeExtension(String file) 1233 { 1234 int pos = file.lastIndexOf('.'); 1235 if (pos == -1) return file; 1236 return file.substring(0, pos); 1237 } 1238 1239 /** 1240 * This function returns the root URL-directory of a {@code String} 1241 * 1242 * <BR /><BR /><B>SPECIFICALLY:</B> it searches for the "last forward slash" in a 1243 * {@code String}, and returns a substring from position 0 to that point. If there aren't any 1244 * forward slashes in this {@code String}, null is returned. The front-slash itself is 1245 * included in the returned {@code String}. 1246 * 1247 * <BR /><BR /><B>NOTE:</B> It is similar to the old MS-DOS call to "DIR PART" 1248 * 1249 * @param url Any {@code String} that is intended to be an "Internet URL" - usually 1250 * http://domain/directory/[file] 1251 * 1252 * @return substring(0, index of last front-slash ({@code '/'}) in {@code String}) 1253 */ 1254 public static String findURLRoot(String url) 1255 { 1256 int pos = findLastFrontSlashPos(url); 1257 1258 if (pos == -1) return null; 1259 else return url.substring(0, pos + 1); 1260 } 1261 1262 /** 1263 * 1264 * @return After breaking the {@code String} by white-space, this returns the first 'chunk' 1265 * before the first whitespace. 1266 */ 1267 public static String firstWord(String s) 1268 { 1269 int pos = s.indexOf(" "); 1270 1271 if (pos == -1) return s; 1272 else return s.substring(0, pos); 1273 } 1274 1275 1276 // ******************************************************************************************** 1277 // ******************************************************************************************** 1278 // Removing parts of a string 1279 // ******************************************************************************************** 1280 // ******************************************************************************************** 1281 1282 1283 /** 1284 * This function will remove any pairs of Brackets within a {@code String}, and returned the 1285 * paired down {@code String} 1286 * 1287 * @param s Any {@code String}, which may or may not contain a "Bracket Pair" 1288 * 1289 * <BR /><BR /><B>For Example:</B> 1290 * 1291 * <BR /><BR /> 1292 * 1293 * <UL CLASS=JDUL> 1294 * <LI>This {@code String} does contain [a pair of brackets] within!</LI> 1295 * <LI>But this {@code String} does not.</LI> 1296 * </UL> 1297 * 1298 * @return The same {@code String}, but with any bracket-pairs removed. 1299 */ 1300 public static String removeBrackets(String s) { return remove_(s, '[', ']'); } 1301 1302 /** 1303 * Functions the same as {@code removeBrackets(String)} - but removes pairs of curly-braces, 1304 * instead<BR /> <B>NOTE:</B>These are { curly braces } that will be removed by this 1305 * {@code String}! 1306 * 1307 * @param s Any valid {@code String} { such as } - <I>(even this {@code String})</I>. 1308 * 1309 * <BR /><BR /><B>For Example:</B> 1310 * 1311 * <BR /><BR /> 1312 * 1313 * <UL CLASS=JDUL> 1314 * <LI>This {@code String} does contain {a pair of curly-braces} within!</LI> 1315 * <LI>But this {@code String} does not.</LI> 1316 * </UL> 1317 * 1318 * @return The same {@code String}, but with any curly-brace-pairs removed. 1319 * 1320 * @see #removeBrackets(String) 1321 */ 1322 public static String removeBraces(String s) { return remove_(s, '{', '}'); } 1323 1324 /** 1325 * Removes Parenthesis, similar to other parenthetical removing functions. 1326 * 1327 * @param s Any (valid) {@code String}. Below are sample inputs: 1328 * 1329 * <BR /><BR /><UL CLASS=JDUL> 1330 * <LI>This {@code String} does contain (a pair of parenthesis) within!</LI> 1331 * <LI>But this {@code String} does not.</LI> 1332 * </UL> 1333 * 1334 * @return The same {@code String}, but with any parenthesis removed. 1335 * 1336 * @see #removeBrackets(String) 1337 */ 1338 public static String removeParens(String s) { return remove_(s, '(', ')'); } 1339 1340 /** 1341 * Removes all parenthetical notations. Calls all <I><B>remove functions</B></I> 1342 * 1343 * @param s Any valid string 1344 * 1345 * @return The same string, but with all parenthesis, curly-brace & bracket pairs removed. 1346 * 1347 * @see #removeParens(String) 1348 * @see #removeBraces(String) 1349 * @see #removeBrackets(String) 1350 */ 1351 public static String removeAllParenthetical(String s) 1352 { return removeParens(removeBraces(removeBrackets(s))); } 1353 1354 private static String remove_(String s, char left, char right) 1355 { 1356 int p = s.indexOf(left); 1357 if (p == -1) return s; 1358 1359 String ret = s.substring(0, p).trim(); 1360 1361 for (++p; (s.charAt(p) != right) && (p < s.length()); p++); 1362 1363 if (p >= (s.length() - 1)) return ret; 1364 1365 ret += " " + s.substring(p + 1).trim(); 1366 1367 if (ret.indexOf(left) != -1) return remove_(ret.trim(), left, right); 1368 else return ret.trim(); 1369 } 1370 1371 1372 1373 // ******************************************************************************************** 1374 // ******************************************************************************************** 1375 // Base-64 Encoded Java Objects 1376 // ******************************************************************************************** 1377 // ******************************************************************************************** 1378 1379 1380 /** 1381 * This will convert any Serializable Java Object into a base-64 String. This {@code String} 1382 * may be saved, transmitted, <I>even e-mailed to another party, if you wish</I> and decoded 1383 * else-where. 1384 * 1385 * <BR /><BR /><B>REQUIREMENTS:</B> 1386 * 1387 * <BR /><BR /><OL CLASS=JDOL> 1388 * <LI> Object must implement the {@code interface java.io.Serializable}</LI> 1389 * 1390 * <LI> Receiving party or storage-device must have access to the {@code .jar file, or .class 1391 * file(s)} needed to instantiate that object! <I>(You must have shared your classes if 1392 * you intend to let other people de-serialize instances of that class)</I> 1393 * </LI> 1394 * </OL> 1395 * 1396 * @param o Any java {@code java.lang.Object}. This object must be Serializable, or else the 1397 * code will generate an exception. 1398 * 1399 * @return A {@code String} version of this object. It will be: 1400 * 1401 * <BR /><BR /><OL CLASS=JDOL> 1402 * <LI> Serialized using the {@code java.io.ObjectOutputStream(...)} <I>object-serialization 1403 * method</I> 1404 * </LI> 1405 * 1406 * <LI> Compressed using the {@code java.io.GZIPOutputStream(...)} <I>stream-compression 1407 * method</I> 1408 * </LI> 1409 * <LI>Encoded to a {@code String}, via Base-64 Encoding 1410 * {@code java.util.Base64.getEncoder()}</LI> 1411 * </OL> 1412 * 1413 * <BR /><B><SPAN STYLE="color: red">NOTE:</B></SPAN> Compression does not always make much 1414 * difference, however often times when doing web-scraping projects, there are large Java 1415 * {@code java.util.Vector<String>} filled with many lines of text, and these lists may be 1416 * instantly and easily saved using object-serialization. Furthermore, in these cases, the 1417 * compression will sometimes reduce file-size by an order of magnitude. 1418 * 1419 * @see #b64StrToObj(String) 1420 */ 1421 public static String objToB64Str(Object o) throws IOException 1422 { 1423 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1424 GZIPOutputStream gzip = new GZIPOutputStream(bos); 1425 ObjectOutputStream oos = new ObjectOutputStream(gzip); 1426 1427 oos.writeObject(o); oos.flush(); gzip.finish(); oos.close(); bos.close(); 1428 1429 return Base64.getEncoder().encodeToString(bos.toByteArray()); 1430 } 1431 1432 /** 1433 * This converts <B><I>to</B></I> any <I><B>java.io.Serializable</B></I> object 1434 * <I><B>from</B></I> a compressed, serialized, Base-64 Encoded {@code java.lang.String}. This 1435 * method can be thought of as one which converts objects which have been previously encoded as 1436 * a {@code String}, and possibly even transmitted across the internet, back into an Java 1437 * {@code Object}. 1438 * 1439 * <BR /><BR /><B>REQUIREMENTS:</B> The {@code Object} that is to be instantiated must have its 1440 * class files accessible to the class-loader. This is the exact-same requirement expected by 1441 * all Java "de-serializations" routines. 1442 * 1443 * @param str Any previously Base-64 encoded, serialized, compressed {@code java.lang.Object'} 1444 * that has been saved as a {@code String}. That {@code String} should have been generated 1445 * using the {@code Programming.objToB64Str(Object o)} method in this class. 1446 * 1447 * <BR /><BR /><OL CLASS=JDOL> 1448 * <LI>Serialized using the {@code java.io.ObjectOutputStream(...)} <I>object-serialization 1449 * method</I></LI> 1450 * <LI>Compressed using the {@code java.io.GZIPOutputStream(...)} <I>sream-compression 1451 * method</I></LI> 1452 * <LI>Encoded to a {@code String}, via Base-64 Encoding 1453 * {@code java.util.Base64.getEncoder()}</LI> 1454 * </OL> 1455 * 1456 * <BR /><B><SPAN STYLE="color: red">NOTE:</B></SPAN> Compression does not always make much 1457 * difference, however often times when doing web-scraping projects, there are large Java 1458 * {@code java.util.Vector<String>} filled with many lines of text, and these lists may be 1459 * instantly and easily saved using object-serialization. Furthermore, in these cases, the 1460 * compression will sometimes reduce file-size by an order of magnitude. 1461 * 1462 * @return The de-compressed {@code java.lang.Object} converted back from a {@code String}. 1463 * 1464 * @see #objToB64Str(Object) 1465 */ 1466 public static Object b64StrToObj(String str) throws IOException 1467 { 1468 ByteArrayInputStream bis = new ByteArrayInputStream(Base64.getDecoder().decode(str)); 1469 GZIPInputStream gzip = new GZIPInputStream(bis); 1470 ObjectInputStream ois = new ObjectInputStream(gzip); 1471 Object ret = null; 1472 1473 try 1474 { ret = ois.readObject(); } 1475 catch (ClassNotFoundException e) 1476 { 1477 throw new IOException( 1478 "There were no serialized objects found in your String. See e.getCause();", 1479 e 1480 ); 1481 } 1482 1483 bis.close(); ois.close(); 1484 return ret; 1485 } 1486 1487 /** 1488 * This performs an identical operation as the method: {@code objToB64Str}, however it 1489 * generates an output {@code String} that is "MIME" compatible. All this means is that the 1490 * {@code String} itself - <I>which could conceivable by thousands or even hundreds of 1491 * thousands of characters long</I> - will have {@code new-line characters} inserted such that 1492 * it may be printed on paper or included in a text-file that is (slightly) more 1493 * human-readable. Base64 MIME encoded {@code String's} look like very long paragraphs of 1494 * random-text data, while regular Base64-encodings are a single, very-long, {@code String} 1495 * with no space characters. 1496 * 1497 * @param o Any {@code java.lang.Object}. This object must be Serializable, or else the code 1498 * will generate an exception. 1499 * 1500 * @return A Base-64 MIME Encoded {@code String} version of any serializable 1501 * {@code java.lang.Object}. 1502 * 1503 * @see #objToB64Str(Object) 1504 * @see #b64MimeStrToObj(String) 1505 */ 1506 public static String objToB64MimeStr(Object o) throws IOException 1507 { 1508 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1509 GZIPOutputStream gzip = new GZIPOutputStream(bos); 1510 ObjectOutputStream oos = new ObjectOutputStream(gzip); 1511 1512 oos.writeObject(o); oos.flush(); gzip.finish(); oos.close(); bos.close(); 1513 1514 return Base64.getMimeEncoder().encodeToString(bos.toByteArray()); 1515 } 1516 1517 /** 1518 * This performs an identical operation as the method: {@code b64StrToObj}, however receives a 1519 * "MIME" compatible encoded {@code String}. All this means is that the {@code String} itself 1520 * - <I>which could conceivable by thousands or even hundreds of thousands of characters 1521 * long</I> - will have {@code new-line characters} inserted such that it may be printed on 1522 * paper or included in a text-file that is (slightly) more human-readable. Base64 MIME 1523 * encoded {@code String's} look like very long paragraphs of random-text data, while regular 1524 * Base64 encodings a single, very-long, {@code String's}. 1525 * 1526 * @return The (de-serialized) java object that was read from the input parameter 1527 * {@code String 'str'} 1528 * 1529 * <BR /><BR /><B>REQUIREMENTS:</B> The object that is to be instantiated must have its class 1530 * files accessible to the class-loader. This is the exact-same requirement expected by all 1531 * Java "de-serializations" routines. 1532 * 1533 * @see #b64StrToObj(String) 1534 * @see #objToB64MimeStr(Object) 1535 */ 1536 public static Object b64MimeStrToObj(String str) throws IOException 1537 { 1538 ByteArrayInputStream bis = new ByteArrayInputStream(Base64.getMimeDecoder().decode(str)); 1539 GZIPInputStream gzip = new GZIPInputStream(bis); 1540 ObjectInputStream ois = new ObjectInputStream(gzip); 1541 Object ret = null; 1542 1543 try 1544 { ret = ois.readObject(); } 1545 catch (ClassNotFoundException e) 1546 { 1547 throw new IOException( 1548 "There were no serialized objects found in your String. See e.getCause();", 1549 e 1550 ); 1551 } 1552 1553 bis.close(); ois.close(); 1554 return ret; 1555 } 1556 1557 1558 // ******************************************************************************************** 1559 // ******************************************************************************************** 1560 // '../' (Parent Directory) 1561 // ******************************************************************************************** 1562 // ******************************************************************************************** 1563 1564 1565 /** 1566 * Computes a "relative {@code URL String}". 1567 * 1568 * @param fileName This is a fileName whose ancestor directory needs to be 1569 * <I>'relative-ised'</I> 1570 * 1571 * @param ancestorDirectory This is an ancestor (container) directory. 1572 * 1573 * @param separator The separator character used to separate file-system directory names. 1574 * 1575 * @return This shall return the "../.." structure needed to insert a relative-{@code URL} or 1576 * link into a web-page. 1577 * 1578 * @throws IllegalArgumentException This exception shall throw if the separator character is 1579 * not one of the standard file & directory separators: forward-slash {@code '/'} or 1580 * back-slash {@code '\'}. 1581 * 1582 * <BR /><BR />This exception also throws if the {@code String} provided to parameter 1583 * {@code 'fileName'} does not begin-with the {@code String} provided to parameter 1584 * {@code 'ancestorDirectory'}. 1585 */ 1586 public static String dotDots(String fileName, String ancestorDirectory, char separator) 1587 { 1588 if ((separator != '/') && (separator != '\\')) throw new IllegalArgumentException( 1589 "The separator character provided to this method must be either a forward-slash '/' " + 1590 "or a back-slash ('\\') character. You have provided: ['" + separator + "']." 1591 ); 1592 1593 if (! fileName.startsWith(ancestorDirectory)) throw new IllegalArgumentException( 1594 "The file-name you have provided [" + fileName + "] is a String that does " + 1595 "start with the ancestorDirectory String [" + ancestorDirectory + "]. " + 1596 "Therefore there is no relative path using the dot-dot construct to the named " + 1597 "ancestor directory fromm the directory where the named file resides." 1598 ); 1599 1600 int levelsDeep = StringParse.countCharacters(fileName, separator) - 1601 StringParse.countCharacters(ancestorDirectory, separator); 1602 1603 String dotDots = ""; 1604 1605 while (levelsDeep-- > 0) dotDots = dotDots + ".." + separator; 1606 1607 return dotDots; 1608 } 1609 1610 /** 1611 * Convenience Method. 1612 * <BR />Invokes: {@link #dotDotParentDirectory(String, char, short)} 1613 * <BR />Converts: {@code URL} to {@code String}, eliminates non-essential 1614 * {@code URI}-information (Such as: {@code ASP, JSP, PHP Query-Strings, and others too}) 1615 * <BR />Passes: {@code char '/'}, the separator character used in {@code URL's} 1616 * <BR />Passes: {@code '1'} to parameter {@code 'nLevels'} - only going up on directory 1617 */ 1618 public static String dotDotParentDirectory(URL url) 1619 { 1620 String urlStr = url.getProtocol() + "://" + url.getHost() + url.getPath(); 1621 return dotDotParentDirectory(urlStr, '/', (short) 1); 1622 } 1623 1624 /** 1625 * Convenience Method. 1626 * <BR />Invokes: {@link #dotDotParentDirectory(String, char, short)} 1627 * <BR />Passes: {@code char '/'}, the separator character used in {@code URL's} 1628 * <BR />Passes: {@code '1'} to parameter {@code 'nLevels'} - only going up on directory 1629 */ 1630 public static String dotDotParentDirectory(String urlAsStr) 1631 { return dotDotParentDirectory(urlAsStr, '/', (short) 1); } 1632 1633 /** 1634 * Convenience Method. 1635 * <BR />Invokes: {@link #dotDotParentDirectory(String, char, short)} 1636 * <BR />Converts: {@code URL} to {@code String}, eliminates non-essential 1637 * {@code URI}-information (Such as: {@code ASP, JSP, PHP Query-Strings, and others too}) 1638 * <BR />Passes: {@code char '/'}, the separator character used in {@code URL's} 1639 */ 1640 public static String dotDotParentDirectory(URL url, short nLevels) 1641 { 1642 String urlStr = url.getProtocol() + "://" + url.getHost() + url.getPath(); 1643 return dotDotParentDirectory(urlStr, '/', nLevels); 1644 } 1645 1646 /** 1647 * Convenience Method. 1648 * <BR />Invokes: {@link #dotDotParentDirectory(String, char, short)} 1649 * <BR />Passes: {@code char '/'}, the separator character used in {@code URL's} 1650 */ 1651 public static String dotDotParentDirectory(String urlAsStr, short nLevels) 1652 { return dotDotParentDirectory(urlAsStr, '/', nLevels); } 1653 1654 /** 1655 * Convenience Method. 1656 * <BR />Invokes: {@link #dotDotParentDirectory(String, char, short)}. 1657 * <BR />Passes: {@code '1'} to parameter {@code nLevels} - only going up one directory. 1658 */ 1659 public static String dotDotParentDirectory(String directoryStr, char dirSeparator) 1660 { return dotDotParentDirectory(directoryStr, dirSeparator, (short) 1); } 1661 1662 /** 1663 * This does traverses up a directory-tree structure, and returns a 'parent-level' directory 1664 * that is {@code 'nLevels'} up the tree. 1665 * 1666 * <BR /><BR /><B>NOTE:</B> The character used as the "File Separator" and/or "Directory 1667 * Separator" can be obtained using the field: {@code java.io.File.Separator.charAt(0).} The 1668 * class {@code java.io.File} provides access to the file-separator used by the file-system on 1669 * which the JVM is currently running, although it treats it as a multi-character 1670 * {@code String}. Just use the commonly-used java method {@code 'charAt(0)'} to obtain the 1671 * forward-slash {@code '/'} or backward-slash {@code '\'} character. 1672 * 1673 * <BR /><BR /><B><SPAN STYLE="color: red;">IMPORTANT:</B></SPAN> There is no error-checking 1674 * performed by this method regarding whether the input {@code String} represents a valid file 1675 * or directory. Instead, this method just looks for the <I><B>second from last 1676 * separator-character (usually a {@code '/'} forward-slash char)</B></I> and returns a 1677 * substring that starts at index 0, and continues to that position-plus-1 (in order to include 1678 * that second-to-last separator char). 1679 * 1680 * @param directoryStr This may be any java-{@code String}, although it is expected to be on 1681 * which represents the file & directory structure of file on the file-system. It may also 1682 * be {@code URL} for a web-site 1683 * 1684 * @param separator This is the separator currently used by that file & directory system. 1685 * If trying to find the parent directory of a {@code URL}, this should be the forward-slash 1686 * character {@code '/'}. 1687 * 1688 * @param nLevels This is how many "parent-level directories" (how many levels up the tree) 1689 * need to be computed. This parameter must '1' or greater. If the passed parameter 1690 * {@code 'directoryStr'} does not contain enough directories to traverse up the tree, then 1691 * this method will throw an {@code IllegalArgumentException}. 1692 * 1693 * @return a {@code String} that represents 'nLevels' up the directory tree, either for 1694 * a directory on the local-file system, or on a web-server from a Uniform Resource 1695 * Locator. 1696 * 1697 * @throws IllegalArgumentException If the value of parameter {@code short 'nLevels'} is 1698 * negative, or does not identify a number consistent with the number of directories that are 1699 * contained by the input urlAsStr parameter. 1700 * 1701 * <BR /><BR />This exception shall also throw if the {@code 'separator'} character is not one 1702 * of the standard file & directory separators: forward-slash {@code '/'} or back-slash 1703 * {@code '\'}. 1704 */ 1705 public static String dotDotParentDirectory(String directoryStr, char separator, short nLevels) 1706 { 1707 if (nLevels < 1) throw new IllegalArgumentException( 1708 "The parameter nLevels may not be less than 1, nor negative. You have passed: " + nLevels 1709 ); 1710 1711 if ((separator != '/') && (separator != '\\')) throw new IllegalArgumentException( 1712 "The separator character provided to this method must be either a forward-slash '/' " + 1713 "or a back-slash ('\\') character. You have provided: ['" + separator + "']." 1714 ); 1715 1716 int count = 0; 1717 1718 for (int i=directoryStr.length() - 1; i >= 0; i--) 1719 if (directoryStr.charAt(i) == separator) 1720 if (++count == (nLevels + 1)) 1721 return directoryStr.substring(0, i + 1); 1722 1723 throw new IllegalArgumentException( 1724 "The parameter nLevels was: " + nLevels + ", but unfortunately there only were: " + count + 1725 "'" + separator + "' characters found in the directory-string." 1726 ); 1727 } 1728 1729 1730 // ******************************************************************************************** 1731 // ******************************************************************************************** 1732 // Quick 'isNumber' methods 1733 // ******************************************************************************************** 1734 // ******************************************************************************************** 1735 1736 1737 /** 1738 * Determines, efficiently, whether an input {@code String} is also an integer. 1739 * 1740 * <BR /><BR /><B>NOTE:</B> A leading plus-sign ({@code '+'}) will, in fact, generate a 1741 * {@code FALSE} return-value for this method. 1742 * 1743 * @param s Any java {@code String} 1744 * 1745 * @return {@code TRUE} if the input {@code String} is any integer, and false otherwise. 1746 * 1747 * <BR /><BR /><B>NOTE:</B> This method does not check whether the number, itself, will 1748 * actually fit into a field or variable of type {@code 'int'}. For example, the input 1749 * {@code String '12345678901234567890'} (a very large integer), though an integer from a 1750 * mathematical perspective, is not a valid java {@code 'int'}. In such cases, {@code TRUE} 1751 * is returned, but if Java's {@code Integer.parseInt} method were subsequently used, that 1752 * method would throw an exception. 1753 * 1754 * <BR /><BR /><B>NOTE:</B> The primary purpose of this method is to avoid having to write 1755 * {@code try {} catch (NumberFormatException)} code-blocks. Furthermore, if only a check 1756 * is desired, and the {@code String} does not actually need to be converted to a number, 1757 * this is also more efficient than actually performing the conversion. 1758 * 1759 * @see #isInt(String) 1760 */ 1761 public static boolean isInteger(String s) 1762 { 1763 if (s == null) return false; 1764 1765 int length = s.length(); 1766 1767 if (length == 0) return false; 1768 1769 int i = 0; 1770 1771 if (s.charAt(0) == '-') 1772 { 1773 if (length == 1) return false; 1774 i = 1; 1775 } 1776 1777 while (i < length) 1778 { 1779 char c = s.charAt(i++); 1780 if (c < '0' || c > '9') return false; 1781 } 1782 1783 return true; 1784 } 1785 1786 /** 1787 * Convenience Method. 1788 * <BR />Invokes: {@link #isOfPrimitiveType(String, char[])} 1789 * <BR />Passes: The ASCII characters that comprise {@code Integer.MIN_VALUE} 1790 */ 1791 public static boolean isInt(String s) 1792 { return isOfPrimitiveType(s, INT_MIN_VALUE_DIGITS_AS_CHARS); } 1793 1794 /** 1795 * Convenience Method. 1796 * <BR />Invokes: {@link #isOfPrimitiveType(String, char[])} 1797 * <BR />Passes: The ASCII characters that comprise {@code Long.MIN_VALUE} 1798 */ 1799 public static boolean isLong(String s) 1800 { return isOfPrimitiveType(s, LONG_MIN_VALUE_DIGITS_AS_CHARS); } 1801 1802 /** 1803 * Convenience Method. 1804 * <BR />Invokes: {@link #isOfPrimitiveType(String, char[])} 1805 * <BR />Passes: ASCII characters that comprise {@code Byte.MIN_VALUE} 1806 */ 1807 public static boolean isByte(String s) 1808 { return isOfPrimitiveType(s, BYTE_MIN_VALUE_DIGITS_AS_CHARS); } 1809 1810 /** 1811 * Convenience Method. 1812 * <BR />Invokes: {@link #isOfPrimitiveType(String, char[])} 1813 * <BR />Passes: ASCII characters that comprise {@code Short.MIN_VALUE} 1814 */ 1815 public static boolean isShort(String s) 1816 { return isOfPrimitiveType(s, SHORT_MIN_VALUE_DIGITS_AS_CHARS); } 1817 1818 1819 /** 1820 * Determines whether the input {@code String} is an integer in the range of Java's primitive 1821 * type specified by an input {@code char[]} array parameter. Specifically, if the the input 1822 * {@code String} is both a mathematical integer, and also an integer in the range of 1823 * {@code MIN_VALUE} and {@code MAX_VALUE} for that primitive-type and then (and only then) 1824 * will {@code TRUE} be returned. 1825 * 1826 * <BR /><BR /><B>NOTE:</B> The max and min values in which the range of valid integers 1827 * <B><I>must reside</I></B> (for primitive-type {@code 'int'}, for instance) are as below: 1828 * {@code -2147483648} ... {@code 2147483647}. 1829 * 1830 * <BR /><BR /><B>ALSO:</B> A leading plus-sign ({@code '+'}) will, in fact, generate a 1831 * {@code FALSE} return-value for this method. 1832 * 1833 * @param s Any Java {@code String} 1834 * 1835 * @param minArr The value of a Java Primitive {@code MIN_VALUE}, without the minus-sign, 1836 * represented as a {@code char[]} array. 1837 * 1838 * <TABLE CLASS=JDBriefTable> 1839 * <TR> <TH>Primitive Type</TH> <TH>Integer as ASCII {@code char[]} array</TH></TR> 1840 * <TR> <TD>{@code byte}</TD> <TD>{@code '2', '5', '6'}</TD></TR> 1841 * <TR> <TD>{@code short}</TD> <TD>{@code '6', '5', '5', '3', '6'}</TD></TR> 1842 * 1843 * <TR> <TD>{@code int}</TD> 1844 * <TD>{@code '2', '1', '4', '7,' '4', '8', '3', '6', '4', '8'}</TD> 1845 * </TR> 1846 * 1847 * <TR> <TD>{@code long}</TD> 1848 * <TD>{@code '2', '1', '4', '9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5', 1849 * '4', '7', '7', '5', '8', '0', '8'}</TD> 1850 * </TR> 1851 * </TABLE> 1852 * 1853 * @return {@code TRUE} If the input {@code String} is both an integer, and also one which 1854 * falls in the range comprised by the specified Java Primitive Type. Return {@code FALSE} 1855 * otherwise. 1856 * 1857 * <BR /><BR /><B>NOTE:</B> The primary purpose of this method is to avoid having to write 1858 * {@code try {} catch (NumberFormatException)} code-blocks. Furthermore, if only a check 1859 * is desired, and the {@code String} does not actually need to be converted to a number, 1860 * this is also more efficient than actually performing the conversion. 1861 * 1862 * @see #isInteger(String) 1863 * @see #isInt(String) 1864 * @see #isByte(String) 1865 * @see #isLong(String) 1866 * @see #isShort(String) 1867 */ 1868 protected static boolean isOfPrimitiveType(String s, char[] minArr) 1869 { 1870 int length = s.length(); 1871 1872 // Zero length string's are not valid integers. 1873 if (length == 0) return false; 1874 1875 // A negative integer may begin with a minus-sign. 1876 boolean negative = s.charAt(0) == '-'; 1877 1878 // **************************************************************************************** 1879 // If the string is too short or too long, this method doesn't need to do any work. 1880 // We either know the answer immediately (too long), or we can call the simpler method 1881 // (in the case that it is too short) 1882 // **************************************************************************************** 1883 1884 // If a string is shorter than (for type 'int', for example): 2147483647 (10 chars) 1885 // then we ought use the simplified method which just checks if the string is an integer. 1886 if (length < minArr.length) return isInteger(s); 1887 1888 // If the string is longer than (for type 'int', for example): -2147483648 (11 chars) 1889 // then it cannot be an integer that fits into primitive 'int', so return false. 1890 if (length > (minArr.length + 1)) return false; 1891 1892 // If the String is *EXACTLY* 11 characters long (for primitive-type 'int', for example), 1893 // but doesn't begin with a negative sign, we also know the answer immediately. 1894 if ((!negative) && (length == (minArr.length + 1))) return false; 1895 1896 // If the String *EXACTLY* the length of MAX_NUUMBER, but it begins with a negative sign, 1897 // we can call the simplified method, instead as well. 1898 if (negative && (length == minArr.length)) return isInteger(s); 1899 1900 // The **REST** of the code is only executed if the numeric part of the String 1901 // (Specifically: leaving out the '-' negative sign, which may or may not be present) 1902 // ... if the numeric part of the String is precisely the length of MAX_VALUE / MAX_NUMBER 1903 // as determined by the length of the array 'minArr'... If the input string is 1904 // **PRECISELY** that length, then the string must be checked in the loop below. 1905 1906 int i = negative ? 1 : 0; 1907 int j = 0; 1908 boolean guaranteedFitIfInteger = false; 1909 char c = 0; 1910 1911 while (i < length) 1912 { 1913 c = s.charAt(i); 1914 1915 if (! guaranteedFitIfInteger) 1916 { 1917 if (c > minArr[j]) return false; 1918 if (c < minArr[j]) guaranteedFitIfInteger = true; 1919 } 1920 1921 if (c < '0') return false; 1922 if (c > '9') return false; 1923 1924 i++; j++; 1925 } 1926 1927 // THE COMMENT BELOW DELINEATES WHAT HAPPENS FOR THE INPUT-CASE OF PRIMITIVE-TYPE 'INT' 1928 // (2147483648)... But it generalizes for byte, short, and long as well. 1929 1930 // This might seem very strange. Since the MIN_VALUE ends with an '8', but the 1931 // MAX_VALUE ends with a '7', and since we are checking each character to see that 1932 // it falls within the array above, **RATHER THAN** just returning TRUE right here, 1933 // we have to catch the **LONE** border/edge case where some joker actually passed the 1934 // String 2147483648 - which must return FALSE, since the last positive integer is 1935 // 2147483647 (see that it has an ending of '7', rather than an '8'). 1936 1937 return guaranteedFitIfInteger || negative || (c != minArr[minArr.length-1]); 1938 } 1939 1940 private static final String Digits = "(\\p{Digit}+)"; 1941 private static final String HexDigits = "(\\p{XDigit}+)"; 1942 1943 // an exponent is 'e' or 'E' followed by an optionally 1944 // signed decimal integer. 1945 1946 private static final String Exp = "[eE][+-]?"+Digits; 1947 1948 /** 1949 * A Predicate which uses a regular-expression for checking whether a {@code String} is a valid 1950 * & parseable {@code double}, which is guaranteed not to throw a 1951 * {@code NumberFormatException} when using the parser {@code Double.parseDouble}. 1952 * 1953 * <BR /><BR /><SPAN CLASS=CopiedJDK>The Following Description is Directly Copied From: 1954 * {@code java.lang.Double.valueOf(String)}, <B>JDK 1.8</B></SPAN> 1955 * 1956 * <EMBED CLASS='external-html' DATA-FILE-ID=STRP_D_VALUEOF> 1957 * 1958 * @see #floatingPointPred 1959 * @see #isDouble(String) 1960 */ 1961 public static final Pattern FLOATING_POINT_REGEX = Pattern.compile( 1962 // NOTE: Digits, HexDigits & Exp defined ABOVE 1963 1964 "[\\x00-\\x20]*"+ // Optional leading "whitespace" 1965 "[+-]?(" + // Optional sign character 1966 "NaN|" + // "NaN" string 1967 "Infinity|" + // "Infinity" string 1968 1969 // A decimal floating-point string representing a finite positive 1970 // number without a leading sign has at most five basic pieces: 1971 // Digits . Digits ExponentPart FloatTypeSuffix 1972 // 1973 // Since this method allows integer-only strings as input 1974 // in addition to strings of floating-point literals, the 1975 // two sub-patterns below are simplifications of the grammar 1976 // productions from section 3.10.2 of 1977 // The Java Language Specification. 1978 1979 // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt 1980 "((("+Digits+"(\\.)?("+Digits+"?)("+Exp+")?)|"+ 1981 1982 // . Digits ExponentPart_opt FloatTypeSuffix_opt 1983 "(\\.("+Digits+")("+Exp+")?)|"+ 1984 1985 // Hexadecimal strings 1986 "((" + 1987 1988 // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt 1989 "(0[xX]" + HexDigits + "(\\.)?)|" + 1990 1991 // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt 1992 "(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" + 1993 1994 ")[pP][+-]?" + Digits + "))" + 1995 "[fFdD]?))" + 1996 "[\\x00-\\x20]*" 1997 // Optional trailing "whitespace"; 1998 ); 1999 2000 /** 2001 * This is the floating-point regular-expression, simply converted to a predicate. 2002 * @see #FLOATING_POINT_REGEX 2003 * @see #isDouble(String) 2004 */ 2005 public static final Predicate<String> floatingPointPred = FLOATING_POINT_REGEX.asPredicate(); 2006 2007 /** 2008 * Tests whether an input-{@code String} can be parsed into a {@code double}, without throwing 2009 * an exception. 2010 * 2011 * @return {@code TRUE} <I>if and only if</I> calling {@code Double.valueOf(s)} (or 2012 * {@code Double.parseDouble(s)}) is guaranteed to produce a result, without throwing a 2013 * {@code NumberFormatException}. 2014 * 2015 * <BR /><BR /><B STYLE='color: red;'>NOTE:</B> Whenever analyzing performance and 2016 * optimizations, it is important to know just "how costly" (as an order of magnitude) a 2017 * certain operation really is. Constructors, for instance, that don't allocated much memory 2018 * can be two orders of magnitude <I>less costly than</I> the JRE's costs for creating the 2019 * {@code StackTrace} object when an exception (such as {@code NumberFormatException}) is 2020 * thrown. 2021 * 2022 * <BR /><BR />Though it costs "extra" to check whether a {@code String} can be parsed by the 2023 * Double-String Parser, if the programmer expects that exceptions will occasionally occur, the 2024 * amount of time saved by checking a {@code String} before parsing it as a Double-String will 2025 * actually save time - <I>even if only 1 in 500 of those {@code String's} are invalid and 2026 * would throw the exception, causing a {@code StackTrace} constructor to be invoked.</I> 2027 * 2028 * @see #FLOATING_POINT_REGEX 2029 * @see #floatingPointPred 2030 */ 2031 public static boolean isDouble(String s) 2032 { return floatingPointPred.test(s); } 2033}