001package Torello.Java; 002 003import java.util.*; 004import java.io.*; 005import java.util.regex.*; 006import java.util.zip.*; 007 008/** 009 * A utility for saving Regular-Expressions in a text-file that may be lazily-loaded at run-time 010 * when needed. 011 * 012 * <EMBED CLASS='external-html' DATA-FILE-ID=REGEX_FILES> 013 */ 014@Torello.JavaDoc.StaticFunctional 015public class RegExFiles 016{ 017 private RegExFiles() { } 018 019 /** 020 * This loads a regular expression text file. Each line is interpreted as a new Regular 021 * Expression {@code Pattern}. 022 * 023 * <BR /><BR />This method expects the <B><I>entire regular expression to fit on a single 024 * line</I></B>, and therefore, each new line containing text-data (without a starting 025 * <B>{@code '#'}</B>) will be compile into a new regular expression. Use the {@code '\n'} 026 * within the expression to generated newlines. 027 * 028 * <BR /><BR /><B CLASS=JDDescLabel>Some Syntax Rules:</B> 029 * 030 * <BR /><UL CLASS=JDUL> 031 * 032 * <LI> <B>Comment lines</B> are lines beginning with the <I>POUND</I> <B>({@code '#'})</B> 033 * sign. 034 * <BR /><BR /> 035 * </LI> 036 * 037 * <LI> <B>Blank lines</B> are ignored by the file-parse completely. 038 * <BR /><BR /> 039 * </LI> 040 * 041 * <LI> Lines with <B>only white-space</B> are considered blank. 042 * <BR /><BR /> 043 * </LI> 044 * 045 * <LI> <B>Flag Lines</B> are lines that begin with two, successive, <I>POUND</I> 046 * <B>({@code '##'})</B> signs. 047 * <BR /><BR /> 048 * </LI> 049 * 050 * <LI> All non-comment, non-blank and non-flag lines are converted into Regular-Expression 051 * {@code Pattern's} 052 * </LI> 053 * </UL> 054 * 055 * <BR /><BR /><B CLASS=JDDescLabel>LFEC Note:</B> 056 * 057 * <BR />This method will <I>halt program execution</I> if any exceptions occur when loading a 058 * Regular-Expression text file! This is the primary-purpose of all {@code 'LFEC'} - Load File 059 * Exception Catch methods. 060 * 061 * @param f Filename for a Regular Expression 062 * 063 * @return A {@code Vector} containing one compiled regular expression per line. Comment lines 064 * & blank lines will all be ignored. 065 * 066 * @see java.util.regex.Pattern 067 * @see #generateFlags(String) 068 * @see LFEC#ERROR_EXIT(Throwable, String) 069 */ 070 public static Vector<Pattern> LFEC(String f) 071 { 072 try 073 { return parse(FileRW.loadFileToVector(f, false), f); } 074 075 catch (Throwable t) 076 { 077 LFEC.ERROR_EXIT(t, "Attempt to load Regular Expression file: [" + f + "], failed.\n"); 078 } 079 080 return null; // Should NOT be possible to reach this statement... 081 } 082 083 /** 084 * This does the <B><I>exact same thing</I></B> as {@link LFEC}, but loads the file into a 085 * {@code Vector} using the "JAR File" information included here. In this case, parameter 086 * {@code f} indicates a jar-file class-loader pointer. It will not load from the standard 087 * file-system. 088 * 089 * <BR /><BR /><B CLASS=JDDescLabel>Java's <CODE>getResourceAsStream</CODE>:</B> 090 * 091 * <BR />The JAR implies that Java's "load resource as stream" features are being used in place 092 * of standard file i/o routines. Specifically, this loads from a JAR file, as seen below: 093 * 094 * <DIV CLASS=SNIP>{@code 095 * BufferedReader br = 096 * new BufferedReader(new InputStreamReader(c.getResourceAsStream(f))); 097 * }</DIV> 098 * 099 * @param c This contains the class that is loading the file. It is not too important to use 100 * the "exact class" - since the only reason the class doing the loading is because the 101 * "Class Loader" employs the exact "Package Name" of the class for figuring out the 102 * directory / sub-directory where the data-file is stored. This variable may not be null. 103 * 104 * <BR /><BR /><DIV CLASS=JDHint> 105 * <B STYLE='color:red;'>Example:</B> If you wanted to load a "Regular Expressions.txt" file 106 * that was in the same BASH/Debian/etc... directory as the following class - the following 107 * call to {@code 'RegExFiles'} would load the text-file "Regular Expressions.txt" into memory 108 * quickly. The primary purpose being that text files are <B>much easier to read than 109 * 'double-escaped' Java {@code String's}.</B> 110 * </DIV> 111 * 112 * <BR /><DIV CLASS=JDHintAlt> 113 * <B>Note:</B> It might be important to read the Java Doc's about the 114 * {@code 'getResourceAsStream(String)'} method for retrieving data that was stored to a JAR 115 * file instead of a UNIX/BASH/MS-DOS system file. Oracle's Java 8 would help. 116 * </DIV> 117 * 118 * <EMBED CLASS='external-html' DATA-FILE-ID=REF_RAW_TYPES_NOTE> 119 * 120 * @param f This is a file-pointer to a file stored inside a Java JAR file. 121 * 122 * @return A Vector containing one compiled regular expression per line. Comment lines & 123 * blank lines will all be ignored. 124 * 125 * @see #LFEC(String) 126 * @see #parse(Vector, String) 127 * @see LFEC#ERROR_EXIT(Throwable, String) 128 */ 129 public static Vector<Pattern> LFEC_JAR(Class<?> c, String f) 130 { 131 try ( 132 InputStream is = c.getResourceAsStream(f); 133 BufferedReader br = new BufferedReader(new InputStreamReader(is)); 134 ) 135 { 136 String s = ""; 137 StringBuilder sb = new StringBuilder(); 138 Vector<String> file = new Vector<String>(); 139 140 while ((s = br.readLine()) != null) file.addElement(s); 141 142 return parse(file, f); 143 } 144 145 catch (Throwable t) 146 { 147 LFEC.ERROR_EXIT( 148 t, 149 "Attempted to load Regular Expression file: [" + f + "]\n" + 150 "From jar-file using class: [" + c.getCanonicalName() + "]\n" + 151 "Did not load successfully." 152 ); 153 } 154 155 156 // Should NOT be possible to reach this statement... 157 // Compiler does not recognize LFEC.ERROR_EXIT 158 159 return null; 160 } 161 162 /** 163 * This is identical to {@code LFEC_JAR}, except that it presumes the file was compressed 164 * before saving. 165 * 166 * @param c This contains the class that is loading the file. It is not too important to use 167 * the "exact class" - since the only reason the class doing the loading is because the "Class 168 * Loader" employs the exact "Package Name" of the class for figuring out the directory / 169 * sub-directory where the data-file is stored. This variable may not be null. Again, the 170 * class-loader looks in the directory of the package that contains this class! 171 * 172 * <BR /><BR /><DIV CLASS=JDHintAlt> 173 * <B STYLE='color:red;'>Note:</B> The method {@code public static Vector<Pattern> 174 * LFEC_JAR(Class, String;)} has a more detailed look at the particular use of this parameter. 175 * The easy way to understand is: just pass the class that is doing the actual loading of the 176 * regular-expression <B>(presuming the regex.dat file is in the same directory as the 177 * {@code '.class'} file!)</B> 178 * </DIV> 179 * 180 * <EMBED CLASS='external-html' DATA-FILE-ID=REF_RAW_TYPES_NOTE> 181 * 182 * @param f This is a file-pointer to a file stored inside a Java JAR file. 183 * 184 * @return A {@code Vector} containing one compiled regular expression per line. Comment 185 * lines & blank lines will all be ignored. 186 * 187 * @see #LFEC_JAR(Class, String) 188 * @see #parse(Vector, String) 189 * @see LFEC#ERROR_EXIT(Throwable, String) 190 */ 191 public static Vector<Pattern> LFEC_JAR_ZIP(Class<?> c, String f) 192 { 193 try ( 194 InputStream is = c.getResourceAsStream(f); 195 GZIPInputStream gzip = new GZIPInputStream(is); 196 ObjectInputStream ois = new ObjectInputStream(gzip); 197 ) 198 { 199 Object ret = ois.readObject(); 200 String fileStr = (String) ret; 201 Vector<String> file = new Vector<>(); 202 int newLinePos = 0; 203 204 while ((newLinePos = fileStr.indexOf('\n')) != -1) 205 { 206 file.addElement(fileStr.substring(0, newLinePos)); 207 fileStr = fileStr.substring(newLinePos + 1); 208 } 209 210 return parse(file, f); 211 } 212 213 catch (Throwable t) 214 { 215 LFEC.ERROR_EXIT(t, 216 "Attempted to load Regular Expression file: [" + f + "]\n" + 217 "From jar-file using class: [" + c.getCanonicalName() + "]\n" + 218 "Content was zipped, but failed to load." 219 ); 220 } 221 222 return null; // Should NOT be possible to reach this statement... 223 } 224 225 /** 226 * This does the <B><I>exact same thing</I></B> as {@link LFEC}, but takes a "pre-loaded file" 227 * as a {@code Vector}. This is an internal class - used to ensure that the methods: 228 * {@code LFEC_JAR} and {@code LFEC} do the exact same thing. 229 * 230 * @param file This presumes that the regular-expression text-file has been loaded into a 231 * {@code Vector<String>} (w/out the "include newlines" option!) 232 * 233 * @param name The name of the file loading is required so that error-printing-information is 234 * easier. 235 * 236 * @return A {@code Vector} containing one compiled regular expression per line. Comment lines 237 * & blank lines will all be ignored. 238 * 239 * @see #LFEC(String) 240 */ 241 protected static Vector<Pattern> parse(Vector<String> file, String name) 242 { 243 try 244 { 245 Vector<Pattern> ret = new Vector<Pattern>(); 246 int flags = 0; 247 248 for (String line : file) 249 { 250 if (line.trim().length() == 0) continue; 251 252 if (line.charAt(0) == '#') 253 { 254 if (line.length() > 1) if (line.charAt(1) == '#') flags = generateFlags(line); 255 continue; 256 } 257 258 if (flags != 0) ret.add(Pattern.compile(line, flags)); 259 else ret.add(Pattern.compile(line)); 260 261 flags = 0; 262 } 263 264 return ret; 265 } 266 267 catch (Throwable t) 268 { LFEC.ERROR_EXIT(t, "error parsing regular expression file: " + name); } 269 270 return null; // Should NOT be possible to reach this statement... 271 } 272 273 /** 274 * This information has been copied from Java's regular expression: {@code Pattern}. This is a 275 * Helper function as it converts the text-{@code String's} into their constants, so that a 276 * user may include these text {@code String's} in a regular expression file. 277 * 278 * <BR /><BR /><DIV CLASS=JDHint> 279 * <B STYLE='color:red;'>Note:</B> The regular expression loader will only load regular 280 * expressions that fit on a single line of text. Other than lines that begin with a comment, 281 * each line is intended/interpreted as an independent Regular Expression. 282 * </DIV> 283 * 284 * @see java.util.regex.Pattern 285 */ 286 protected static int generateFlags(String line) 287 { 288 int mask = 0; 289 290 if (line.contains("CANON_EQ")) mask |= Pattern.CANON_EQ; 291 if (line.contains("CASE_INSENSITIVE")) mask |= Pattern.CASE_INSENSITIVE; 292 if (line.contains("DOTALL")) mask |= Pattern.DOTALL; 293 if (line.contains("COMMENTS")) mask |= Pattern.COMMENTS; 294 if (line.contains("LITERAL")) mask |= Pattern.LITERAL; 295 if (line.contains("MULTILINE")) mask |= Pattern.MULTILINE; 296 if (line.contains("UNICODE_CASE")) mask |= Pattern.UNICODE_CASE; 297 298 return mask; 299 } 300}