1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | package Torello.HTML.helper; import java.util.regex.Pattern; import java.util.function.Predicate; // I have made this public, because I want to use this in: // Torello.HTML.Tools.SyntaxHiLite // // It is now inside of a "Helper-Package", as per the rules specified by the Build // ******************************************************************************************** // ******************************************************************************************** // Internally used Regular Expressions, (STATIC FIELDS INSIDE STATIC CLASS) // ******************************************************************************************** // ******************************************************************************************** /** * Regular-Expressions that are used by both the parsing class {@link HTMLPage}, and class * {@link TagNode} for searching HTML tags for attributes and even data. * * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_ATTR_REGEX> */ public class AttrRegEx { private AttrRegEx() { } /** * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_KV> * @see TagNode#allAV(boolean, boolean) */ public static final Pattern KEY_VALUE_REGEX = Pattern.compile( "(?:\\s+?" + // mandatory leading white-space "(([\\w-]+?)=(" + // inner-tag name (a.k.a. 'key' or 'attribute-name') "'[^']*?'" + "|" + // inner-tag value using single-quotes ... 'OR' "\"[^\"]*?\"" + "|" + // inner-tag value using double-quotes ... 'OR' "[^\"'>\\s]*" + // inner-tag value without quotes ")))", Pattern.CASE_INSENSITIVE | Pattern.DOTALL ); /** * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_RE_KO_DATA> * @see TagNode#toUpperCase(boolean) * @see TagNode#toLowerCase(boolean) */ public static final Pattern KEY_ONLY_DATA_REGEX = Pattern.compile ("\\s([dD][aA][tT][aA])-([\\w-]+)(?:\\s|>$)"); /** * This matches all valid attribute-<B STYLE='color: red;'>keys</B> <I>(not values)</I> of * HTML Element <B STYLE='color: red;'>key-value pairs</B>. * * <BR /><BR /><UL CLASS=JDUL> * <LI> <B>PART-1:</B> {@code [A-Za-z_]} The first character must be a letter or the * underscore. * </LI> * <LI> <B>PART-2:</B> {@code [A-Za-z0-9_-]} All other characters must be alpha-numeric, * the dash {@code '-'}, or the underscore {@code '_'}. * </LI> * </UL> * * @see InnerTagKeyException#check(String[]) * @see #allKeyOnlyAttributes(boolean) */ public static final Pattern ATTRIBUTE_KEY_REGEX = Pattern.compile("^[A-Za-z_][A-Za-z0-9_-]*$"); public static final Predicate<String> ATTRIBUTE_KEY_REGEX_PRED = ATTRIBUTE_KEY_REGEX.asPredicate(); /** * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_DATA> * @see TagNode#getDataAN() * @see TagNode#getDataAV() */ public static final Pattern DATA_ATTRIBUTE_REGEX = Pattern.compile( // regex will match, for example: data-src="https://cdn.imgur.com/MyImage.jpg" "(?:\\s+?" + // mandatory leading white-space "(data-([\\w-]+?)=" + // data inner-tag name "(" + "'[^']*?'" + "|" + // inner-tag value using single-quotes ... 'OR' "\"[^\"]*?\"" + "|" + // inner-tag value using double-quotes ... 'OR "[^\"'>\\s]*" + // inner-tag value without quotes ")))", Pattern.CASE_INSENSITIVE | Pattern.DOTALL ); /** * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_CSS> * @see TagNode#cssStyle() */ public static final Pattern CSS_INLINE_STYLE_REGEX = Pattern.compile( // regex will match, for example: font-weight: bold; // CSS Style Property Name - Must begin with letter or underscore "([_\\-a-zA-Z]+" + "[_\\-a-zA-Z0-9]*)" + // The ":" symbol between property-name and property-value "\\s*?" + ":" + "\\s*?" + // CSS Style Property Value "([^;]+?\\s*)" + // text after the "Name : Value" definition "(;|$|[\\w]+$)" ); } |