TagNode-CaseChange.java.html

package Torello.HTML;

import Torello.HTML.helper.AttrRegEx;

import Torello.Java.StrCmpr;

import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class CaseChange
{
    // ********************************************************************************************
    // ********************************************************************************************
    // Upper/Lower ALL-ATTRIBUTES
    // ********************************************************************************************
    // ********************************************************************************************


    private static void toCaseInternalDataHelper
        (String s, StringBuilder sb, Function<String, String> caseFunc)
    {
        Matcher m   = AttrRegEx.KEY_ONLY_DATA_REGEX.matcher(s);
        int     pos = 0;


        // System.out.println("toUpperDataHelper s: [" + s + ']');

        // NOTE: This loop is NEVER entered unless a TagNode actually has a Boolean
        //       Attriute-Name-Only that is ALSO a DATA-xxx  Boolean-Tag-Only-Data-Attribute
        //
        // SPECIFICALLY: This loop is essentially never entered, and the very last line of this
        //               method is all that happens.
        //
        // FOR EXAMPLE: The loop below is not entered for any of these cases, except the last one
        //      NO:     <DIV>
        //      NO:     <DIV CLASS='hello'>
        //      NO:     <DIV DATA-FILE='MyFile.txt'>
        //      NO:     <DIV CLASS=SomeClass DATA-FILE='MyFile.txt'>
        //      NO:     <DIV CLASS=SomeClass HIDDEN>
        //      YES:    <DIV DATA-MARKER Other="something">
        //
        // The very last TagNode has a "Boolean-Only (no attribute-value)" that is also a Data-XXX
        // attribute.  That is the ONLY PURPOSE of this entire blasted-infernal method.

        while (m.find())
        {
            /*
            System.out.println(
                "s.substring(pos, m.start(1)): " + s.substring(pos, m.start(1)) + '\n' +
                "    m.group(2):               " + m.group(2)
            );
            */

            // NOTE: This is saved to a separate-string for readability purposes ONLY.
            //       (Remembering the mechanics of String.split / stuff-in-between is impossile)
            //       This is the text that occurs between "DATA-xxx" Matches

            String inBetween = s.substring(pos, m.start(1));

            sb
                .append(caseFunc.apply(inBetween))
                .append(caseFunc.apply("data-"))
                .append(m.group(2));

            pos = m.end(2);
        }

        // Append the rest of it
        sb.append(caseFunc.apply(s.substring(pos)));
    }

    static TagNode toCaseInternal(
            final TagNode                   tn,
            final boolean                   justTag_Or_TagAndAttributeNames,
            final Function<String, String>  caseFunc
        )
    {
        if (    justTag_Or_TagAndAttributeNames             // TRUE ==> justTag
            ||  tn.isClosing                                // Closing Tags have 0 attributes
            ||  (tn.str.length() == (tn.tok.length() + 2))  // Length shows no attributes
        )

            return new TagNode(
                tn.isClosing
                    ? ("</" + caseFunc.apply(tn.tok) + tn.str.substring(2 + tn.tok.length()))
                    : ('<' + caseFunc.apply(tn.tok) + tn.str.substring(1 + tn.tok.length()))
            );

        StringBuilder   sb  = new StringBuilder();
        Matcher         m   = AttrRegEx.KEY_VALUE_REGEX.matcher(tn.str);

        sb.append("<").append(caseFunc.apply(tn.tok));

        // Skip over the opening '<' and the Tag-Name
        int pos = tn.tok.length() + 1;


        // Here, the Key-Value (Attribue-Name & Attribute-Value) pairs are iterated.  Care is 
        // taken to ensure that only the names (not the values) are modified.

        while (m.find())
        {
            // Apppend white-space that occurs **BEFORE** the Name-Value Pair
            //
            // NOTE: The 'toUpperCase' here will catch any Attribute-Name-Only Attributes
            //       (also known as "Boolean-Attributes").  Any text that falls between Reg-Ex
            //       Matches using the "KEY_VALUE_REGEX" must be a boolean-attribute. 
            //
            // ALSO: There is a more recent development that was discovered in the Spring of 2024.
            //       For Boolean-Attribute-Name-Only Attributes that begin with "DATA-" - these may
            //       not have their case modified after the "DATA-" part.
            // 
            // The line directly-below used to just say (prior to 2024):
            // sb.append(tn.str.substring(pos, m.start(2)).toUpperCase())
            //
            // This had the minor faux-pas of capitalizing DATA-xxx Attributes, which isn't allowed
            //
            // NOTE: This is saved to a String for READABILITY ONLY.  There is no need for the 
            //       String "inBetween" anywhere, except for on the very-next-line after the
            //       declaration,  It is always difficult to remember how String.split and
            //       RegEx.split actually work (the text-characters that occur between regex
            //       matches)

            String inBetween = tn.str.substring(pos, m.start(2));

            toCaseInternalDataHelper(inBetween, sb, caseFunc);


            // Append the Attribute-Name, and make sure to Capitalize it.  First this needs to be
            // retrieved, and (more importantly), do not capitalize the actual name-part of 
            // "data-" Attributes, their case **COULD POSSIBLY** be important... (They are for the
            // EmbedTag Parameters Data-Attributes)

            String attrName = m.group(2);

            // System.out.println(attrName + " => " + m.group(3));

            if (StrCmpr.startsWithIgnoreCase(attrName, "data-"))
                sb.append(caseFunc.apply("data-") + attrName.substring(5));
            else
                sb.append(caseFunc.apply(attrName));


            // Append the Attribute-Value, and update the 'pos' variable to reflect where
            // in the String the current Match-Location ends...
            //
            // NOTE: DO NOT CHANGE THE CASE OF THE "VALUE" - ONLY KEYS/ATTRIBUTES
            //
            // matcher.group(1): Returns entire key-value pair (as a String), leaving out the
            //                   leading white-space
            // matcher.group(2): Returns 'key' String of the key-value attribute
            // matcher.group(3): Returns 'value' String of the key-value attribute. Note that if
            //                   there are surrounding-quotes, they will be includedd in this
            //                   return String.

            /*
            // I found a TagNode bug, March 2024.  First HTML Bug in over a year.
            System.out.println(
                "m.group(1): " + m.group(1) + '\n' +
                "m.group(2): " + m.group(2) + '\n' +
                "m.group(3): " + m.group(3) + '\n'
            );
            */

            sb.append('=').append(m.group(3));
            pos = m.end();
        }


        // ALWAYS: After the last match of a RegEx, remember to append any text that occurs
        //         after the last match.  This is also quite important in the HTML-Parser
        //         not to forget this line.

        toCaseInternalDataHelper(tn.str.substring(pos), sb, caseFunc);


        // Return the new TagNode
        // return new TagNode(sb.toString());

        return new TagNode(tn.tok, sb.toString());
    }


    // ********************************************************************************************
    // ********************************************************************************************
    // Upper/Lower SELECTED-ATTRIBUTES
    // ********************************************************************************************
    // ********************************************************************************************


    private static final Pattern BOOL_ATTR_REGEX = Pattern.compile("[\\w-]+");

    private static void toCaseInternalBooleanAttrHelper(
            final String                    s,
            final StringBuilder             sb,
            final Predicate<String>         attrNameTest,
            final Function<String, String>  caseFunc
        )
    {
        Matcher m   = BOOL_ATTR_REGEX.matcher(s);
        int     pos = 0;

        while (m.find())
        {
            String inBetween    = s.substring(pos, m.start());
            String boolAttrName = m.group();

            sb.append(inBetween);

            if (attrNameTest.test(boolAttrName))
            {
                if (StrCmpr.startsWithIgnoreCase(boolAttrName, "data-"))
                    sb.append(caseFunc.apply("data-") + boolAttrName.substring(5));
                else 
                    sb.append(caseFunc.apply(boolAttrName));
            }

            else sb.append(boolAttrName);

            pos = m.end();
        }

        sb.append(s.substring(pos));
    }

    static TagNode toCaseInternal(
            final TagNode                   tn,
            final boolean                   changeTagsCase,
            final Predicate<String>         attrNameTest,
            final Function<String, String>  caseFunc
        )
    {
        if (    tn.isClosing                                // Closing Tags have 0 attributes
            ||  (tn.str.length() == (tn.tok.length() + 2))  // Length shows there are 0 attributes
        )
        {
            if (changeTagsCase) return new TagNode(
                tn.isClosing
                    ? ("</" + caseFunc.apply(tn.tok) + tn.str.substring(2 + tn.tok.length()))
                    : ('<' + caseFunc.apply(tn.tok) + tn.str.substring(1 + tn.tok.length()))
            );

            // Nothing to do!  No Attributes, and "Change Tag's Case" was false - return orig node
            else return tn;
        }

        StringBuilder   sb  = new StringBuilder();
        Matcher         m   = AttrRegEx.KEY_VALUE_REGEX.matcher(tn.str);

        sb.append(tn.isClosing ? "</" : "<");
        sb.append(changeTagsCase ? caseFunc.apply(tn.tok) : tn.tok);

        // Skip over the opening '<' and the Tag-Name
        int pos = tn.tok.length() + 1;

        while (m.find())
        {
            toCaseInternalBooleanAttrHelper
                (tn.str.substring(pos, m.start(2)), sb, attrNameTest, caseFunc);

            String attrName = m.group(2);

            if (attrNameTest.test(attrName))
            {
                if (StrCmpr.startsWithIgnoreCase(attrName, "data-"))
                    sb.append(caseFunc.apply("data-") + attrName.substring(5));
                else
                    sb.append(caseFunc.apply(attrName));
            }

            else sb.append(attrName);

            sb.append('=').append(m.group(3));
            pos = m.end();
        }

        toCaseInternalBooleanAttrHelper(tn.str.substring(pos), sb, attrNameTest, caseFunc);


        // Return the new TagNode
        // return new TagNode(sb.toString());

        return new TagNode(tn.tok, sb.toString());
    }
}