1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 | package Torello.HTML;
import java.util.regex.*;
final class HTMLRegEx
{
// Used by class ParserRE to parse comment nodes, and by Torello.HTML.TagNode
static final Pattern P1 = Pattern.compile(
"<\\/?(\\w{1,127})" +
"(?:" + "[\\w-]+=\"[^\"]*?\"" + "|" // attribute="any valid string, without (the same) quote"
+ "[\\w-]+='[^']*?'" + "|" // attribute='any valid string without (the same) quote'
+ "[\\w-]+=[\\w-]*" + "|" // attribute=any-valid-string-no-spaces-or-punctuation-etc
+ "[\\w-]+" + "|" // attribute
+ "\\s+" + "|" // any white-space
+ "[^>]+" + ")*" // Any miscellaneous characters ("Junk?"), *EXCEPT* a "greater-than"
// (MUST be THE LAST or-option)
// NOTE: The above "|" (or-branch), *MUST* be at the end... or else
// It will match everything, (except '>'), and miss the whole point.
// (Specifically, the first three attribute-value pair clauses are
// how to avoid the greater-than-within-tag problem!!!
+ "\\/?>", // Ending-HTML-Tag symbol is a "greater-than" or "slash-greater-than"
Pattern.DOTALL
);
// (Package-Local RegEx) Used by class ParserRE and ParserHM to parse comment-nodes.
static final Pattern P2 = Pattern.compile("<!--.*?-->", Pattern.DOTALL);
}
|