1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package Torello.HTML.helper;

import java.util.regex.Pattern;
import java.util.function.Predicate;


// I have made this public, because I want to use this in:
//      Torello.HTML.Tools.SyntaxHiLite
//
// It is now inside of a "Helper-Package", as per the rules specified by the Build


// ********************************************************************************************
// ********************************************************************************************
// Internally used Regular Expressions, (STATIC FIELDS INSIDE STATIC CLASS)
// ********************************************************************************************
// ********************************************************************************************


/**
 * Regular-Expressions that are used by both the parsing class {@link HTMLPage}, and class 
 * {@link TagNode} for searching HTML tags for attributes and even data.
 * 
 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_ATTR_REGEX>
 */
public class AttrRegEx
{
    private AttrRegEx() { }

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_KV>
     * @see TagNode#allAV(boolean, boolean)
     */
    public static final Pattern KEY_VALUE_REGEX = Pattern.compile(
        "(?:\\s+?" +                    // mandatory leading white-space
            "(([\\w-]+?)=(" +           // inner-tag name (a.k.a. 'key' or 'attribute-name')
                "'[^']*?'"     + "|" +  // inner-tag value using single-quotes ... 'OR'
                "\"[^\"]*?\""   + "|" + // inner-tag value using double-quotes ... 'OR'
                "[^\"'>\\s]*"   +       // inner-tag value without quotes
        ")))",
        Pattern.CASE_INSENSITIVE | Pattern.DOTALL
    );

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_RE_KO_DATA>
     * @see TagNode#toUpperCase(boolean)
     * @see TagNode#toLowerCase(boolean)
     */
    public static final Pattern KEY_ONLY_DATA_REGEX = Pattern.compile
        ("\\s([dD][aA][tT][aA])-([\\w-]+)(?:\\s|>$)");


    /**
     * This matches all valid attribute-<B STYLE='color: red;'>keys</B> <I>(not values)</I> of
     * HTML Element <B STYLE='color: red;'>key-value pairs</B>.
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI> <B>PART-1:</B> {@code [A-Za-z_]} The first character must be a letter or the
     *      underscore.
     * </LI>
     * <LI> <B>PART-2:</B> {@code [A-Za-z0-9_-]} All other characters must be alpha-numeric,
     *      the dash {@code '-'}, or the underscore {@code '_'}.
     * </LI>
     * </UL>
     * 
     * @see InnerTagKeyException#check(String[])
     * @see #allKeyOnlyAttributes(boolean)
     */
    public static final Pattern ATTRIBUTE_KEY_REGEX = 
        Pattern.compile("^[A-Za-z_][A-Za-z0-9_-]*$");

    public static final Predicate<String> ATTRIBUTE_KEY_REGEX_PRED =
        ATTRIBUTE_KEY_REGEX.asPredicate();

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_DATA>
     * @see TagNode#getDataAN()
     * @see TagNode#getDataAV()
     */
    public static final Pattern DATA_ATTRIBUTE_REGEX = Pattern.compile(
        // regex will match, for example:   data-src="https://cdn.imgur.com/MyImage.jpg"
        "(?:\\s+?" +                            // mandatory leading white-space
            "(data-([\\w-]+?)=" +               // data inner-tag name 
                "(" +   "'[^']*?'"      + "|" + // inner-tag value using single-quotes ... 'OR'
                        "\"[^\"]*?\""   + "|" + // inner-tag value using double-quotes ... 'OR
                        "[^\"'>\\s]*"   +       // inner-tag value without quotes
            ")))",
        Pattern.CASE_INSENSITIVE | Pattern.DOTALL  
    );

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_REGEX_CSS>
     * @see TagNode#cssStyle()
     */
    public static final Pattern CSS_INLINE_STYLE_REGEX = Pattern.compile(
            // regex will match, for example:  font-weight: bold;

            // CSS Style Property Name - Must begin with letter or underscore
            "([_\\-a-zA-Z]+" + "[_\\-a-zA-Z0-9]*)" +

            // The ":" symbol between property-name and property-value
            "\\s*?" + ":" + "\\s*?" +

            // CSS Style Property Value
            "([^;]+?\\s*)" +

            // text after the "Name : Value" definition    
            "(;|$|[\\w]+$)"
    );
}