1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110 | package Torello.HTML.helper;
import java.util.regex.Pattern;
import java.util.function.Predicate;
// I have made this public, because I want to use this in:
// Torello.HTML.Tools.SyntaxHiLite
//
// It is now inside of a "Helper-Package", as per the rules specified by the Build
// ********************************************************************************************
// ********************************************************************************************
// Internally used Regular Expressions, (STATIC FIELDS INSIDE STATIC CLASS)
// ********************************************************************************************
// ********************************************************************************************
/**
* Regular-Expressions that are used by both the parsing class {@link HTMLPage}, and class
* {@link TagNode} for searching HTML tags for attributes and even data.
*
* <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_ATTR_REGEX>
*/
public class AttrRegEx
{
private AttrRegEx() { }
/**
* <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_KV>
* @see TagNode#allAV(boolean, boolean)
*/
public static final Pattern KEY_VALUE_REGEX = Pattern.compile(
"(?:\\s+?" + // mandatory leading white-space
"(([\\w-]+?)=(" + // inner-tag name (a.k.a. 'key' or 'attribute-name')
"'[^']*?'" + "|" + // inner-tag value using single-quotes ... 'OR'
"\"[^\"]*?\"" + "|" + // inner-tag value using double-quotes ... 'OR'
"[^\"'>\\s]*" + // inner-tag value without quotes
")))",
Pattern.CASE_INSENSITIVE | Pattern.DOTALL
);
/**
* <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_RE_KO_DATA>
* @see TagNode#toUpperCase(boolean)
* @see TagNode#toLowerCase(boolean)
*/
public static final Pattern KEY_ONLY_DATA_REGEX = Pattern.compile
("\\s([dD][aA][tT][aA])-([\\w-]+)(?:\\s|>$)");
/**
* This matches all valid attribute-<B STYLE='color: red;'>keys</B> <I>(not values)</I> of
* HTML Element <B STYLE='color: red;'>key-value pairs</B>.
*
* <BR /><BR /><UL CLASS=JDUL>
* <LI> <B>PART-1:</B> {@code [A-Za-z_]} The first character must be a letter or the
* underscore.
* </LI>
* <LI> <B>PART-2:</B> {@code [A-Za-z0-9_-]} All other characters must be alpha-numeric,
* the dash {@code '-'}, or the underscore {@code '_'}.
* </LI>
* </UL>
*
* @see InnerTagKeyException#check(String[])
* @see #allKeyOnlyAttributes(boolean)
*/
public static final Pattern ATTRIBUTE_KEY_REGEX =
Pattern.compile("^[A-Za-z_][A-Za-z0-9_-]*$");
public static final Predicate<String> ATTRIBUTE_KEY_REGEX_PRED =
ATTRIBUTE_KEY_REGEX.asPredicate();
/**
* <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_DATA>
* @see TagNode#getDataAN()
* @see TagNode#getDataAV()
*/
public static final Pattern DATA_ATTRIBUTE_REGEX = Pattern.compile(
// regex will match, for example: data-src="https://cdn.imgur.com/MyImage.jpg"
"(?:\\s+?" + // mandatory leading white-space
"(data-([\\w-]+?)=" + // data inner-tag name
"(" + "'[^']*?'" + "|" + // inner-tag value using single-quotes ... 'OR'
"\"[^\"]*?\"" + "|" + // inner-tag value using double-quotes ... 'OR
"[^\"'>\\s]*" + // inner-tag value without quotes
")))",
Pattern.CASE_INSENSITIVE | Pattern.DOTALL
);
/**
* <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_CSS>
* @see TagNode#cssStyle()
*/
public static final Pattern CSS_INLINE_STYLE_REGEX = Pattern.compile(
// regex will match, for example: font-weight: bold;
// CSS Style Property Name - Must begin with letter or underscore
"([_\\-a-zA-Z]+" + "[_\\-a-zA-Z0-9]*)" +
// The ":" symbol between property-name and property-value
"\\s*?" + ":" + "\\s*?" +
// CSS Style Property Value
"([^;]+?\\s*)" +
// text after the "Name : Value" definition
"(;|$|[\\w]+$)"
);
}
|