001package Torello.HTML;
002
003import Torello.HTML.NodeSearch.*;
004import Torello.Java.FileRW; // used in @see comments
005import Torello.Java.StringParse;
006import Torello.Java.Additional.Ret2;
007
008import java.util.*;
009import java.util.stream.IntStream;
010
011/**
012 * Utilities for checking that opening and closing {@link TagNode} elements match up (that the HTML
013 * is balanced).
014 * 
015 * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE>
016 */
017@Torello.JavaDoc.StaticFunctional
018public class Balance
019{
020    private Balance() { }
021
022    /**
023     * Invokes:
024     * 
025     * <BR /><BR /><UL CLASS=JDUL>
026     *  <LI>{@link #check(Vector)}</LI>
027     *  <LI>{@link #checkNonZero(Hashtable)}</LI>
028     *  <LI>{@link #toStringBalance(Hashtable)}</LI>
029     * </UL>
030     * 
031     * <DIV CLASS=EXAMPLE>{@code
032     * String b = Balance.CB(a.articleBody);
033     * System.out.println((b == null) ? "Page has Balanced HTML" : b);
034     * 
035     * // If Page has equal number of open and close tags prints:
036     * // Page Has Balanced HTML
037     * // OTHERWISE PRINTS REPORT
038     * }</DIV>
039     * 
040     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
041     * 
042     * @return Will return null if the snippet or page has 'balanced' HTML, otherwise returns the
043     * trimmed balance-report as a {@code String}.
044     */
045    public static String CB(Vector<HTMLNode> html)
046    {
047        String ret = toStringBalance(checkNonZero(check(html)));
048
049        return (ret.length() == 0) ? null : ret;
050    }
051
052    /**
053     * Creates a {@code Hashtable} that has a count of all open and closed HTML tags found on the
054     * page.
055     *
056     * <BR /><BR />This {@code Hashtable} may be regarded as maintaining "counts" on each-and-every
057     * HTML tag to identify whether there is <I><B>a one-to-one balance mapping between opening and
058     * closing tags</I></B> for each element.  When the {@code Hashtable} generated by
059     * this method is non-zero (for a particular HTML-Tag) it means that there are an unequal
060     * number of opening and closing elements for that tag.
061     * 
062     * <BR /><BR />Suppose this method were to produce a {@code Hashtable}, and that
063     * {@code Hashtable} queried for a count on the HTML <B CLASS=JDHTags>{@code <DIV>}</B> tag
064     * (dividers).  If that count turned out to be a non-zero positive number it would mean that
065     * the Vectorized-HTML had more opening <B CLASS=JDHTags>{@code <DIV>}</B> tags than the 
066     * number of closing <B CLASS=JDHTags>{@code </DIV>}</B> tags on that page.
067     * 
068     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
069     * 
070     * <BR /><BR />The following example will help explain the use of this method.  If an HTML page
071     * needs to be checked to see that all elements are properly opened and closed, this method can
072     * be used to return a list of any HTML element tag that does not have an equal number of
073     * opening and closing tags.
074     * 
075     * <BR /><BR />In this example, the generated Java-Doc HTML-Page for class {@code TagNode} is
076     * checked.
077     * 
078     * <DIV CLASS="EXAMPLE">{@code
079     * String                      html    = FileRW.loadFileToString(htmlFileName);
080     * Vector<HTMLNode>            v       = HTMLPage.getPageTokens(html, false);
081     * Hashtable<String, Integer>  b       = Balance.check(v);
082     * StringBuffer                sb      = new StringBuffer();
083     *
084     * // This part just prints a text-output to a string buffer, which is printed to the screen.
085     * for (String key : b.keySet())
086     * {
087     *     Integer i = b.get(key);
088     * 
089     *     // Only print keys that had a "non-zero count"
090     *     // A Non-Zero-Count implies Opening-Tag-Count and Closing-Tag-Count are not equal!
091     * 
092     *     if (i.intValue() != 0) sb.append(key + "\t" + i.intValue() + "\n");
093     * }
094     * 
095     * // This example output was: "i   -1", because of an unclosed italics element.
096     * // NOTE: To find where this unclosed element is, use method: nonNestedCheck(Vector, String)
097     * }</DIV>
098     * 
099     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
100     * 
101     * @return A {@code Hashtable} map of the count of each HTML-Tag present in the
102     * input {@code Vector}.
103     * 
104     * <BR /><BR />For instance, if this {@code Vector} had five
105     * <B CLASS=JDHTags>{@code <A HREF=...>}</B> (Anchor-Link) tags, and six
106     * <B CLASS=JDHTags>{@code </A>}</B> tags, then the returned {@code Hashtable} would have a
107     * {@code String}-key equal to {@code "A"} with an integer value of {@code -1}.
108     * 
109     * @see FileRW#loadFileToString(String)
110     * @see HTMLPage#getPageTokens(CharSequence, boolean)
111     */
112    public static Hashtable<String, Integer> check(Vector<? super TagNode> html)
113    {
114        Hashtable<String, Integer> ht = new Hashtable<>();
115
116        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
117        // not HTML Comments
118
119        for (Object o : html) if (o instanceof TagNode)
120        {
121            TagNode tn = (TagNode) o;
122
123            // Singleton tags are also known as 'self-closing' tags.  BR, HR, IMG, etc...
124            if (HTMLTags.isSingleton(tn.tok)) continue;
125
126            Integer I = ht.get(tn.tok);
127            int     i = (I != null) ? I.intValue() : 0;
128
129            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
130            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
131
132            i += tn.isClosing ? -1 : 1;
133
134            // Update the return result Hashtable for this particular HTML-Element (tn.tok)
135            ht.put(tn.tok, Integer.valueOf(i));
136        }
137
138        return ht;
139    }
140
141    /**
142     * Creates an array that includes an open-and-close {@code 'count'} for each HTML-Tag / 
143     * that was requested via the passed input {@code String[]}-Array parameter {@code 'htmlTags'}.
144     * 
145     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
146     * 
147     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
148     * 
149     * <BR /><BR />The HTML-Element Open-Close-Counts are computed from this page.
150     * 
151     * @param htmlTags This may be one, or many, HTML-Tags whose open-close count needs to be
152     * computed.  Any HTML Element that is not present in this list - <I>will not have a count
153     * computed.</I>
154     * 
155     * <BR /><BR />The {@code count} results which are stored in an {@code int[]}-Array that should
156     * be considered "parallel" to this input Var-Args-Array.
157     * 
158     * @return An array of the count of each html-element present in the input vectorized-html
159     * parameter {@code 'html'}.
160     * For instance, If the following values were passed to this method:
161     * 
162     * <BR /><BR /><UL CLASS=JDUL>
163     * <LI> A Vectorized-HTML page that had 5 {@code '<SPAN ...>'} open-elements, and 6
164     *      {@code '</SPAN>'} closing {@code SPAN}-Tags.
165     *      </LI>
166     * 
167     * <LI> And at least one of the {@code String's} in the Var-Args parameter {@code 'htmlTags'}
168     *      was equal to the {@code String} {@code "SPAN"} (case insensitive).
169     *      </LI>
170     * 
171     * <LI> <B>==&gt;</B> Then the array-position corresponding to the position in array 
172     *      {@code 'htmlTags'} that had the {@code "SPAN"} would have a value of {@code '-1'}.
173     *      </LI>
174     * </UL>
175     * 
176     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
177     * 
178     * @throws SingletonException If and of the {@code String}-Tags passed to parameter
179     * {@code 'htmlTags'} are {@code 'singleton'} (Self-Closing) Tags, then this exception throws
180     */
181    public static int[] check(Vector<? super TagNode> html, String... htmlTags)
182    {
183        // Check that these are all valid HTML Tags, throw an exception if not.
184        htmlTags = ARGCHECK.htmlTags(htmlTags);
185
186        // Temporary Hash-table, used to store the count of each htmlTag
187        Hashtable<String, Integer> ht = new Hashtable<>();
188
189        // Initialize the temporary hash-table.  This will be discarded at the end of the method,
190        // and converted into a parallel array.  (Parallel to the input String... htmlTags array).
191        // Also, check to make sure the user hasn't requested a count of Singleton HTML Elements.
192
193        for (String htmlTag : htmlTags)
194        {
195            if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
196                "One of the tags you have passed: [" + htmlTag + "] is a singleton-tag, " +
197                "and is only allowed opening versions of the tag."
198            );
199
200            ht.put(htmlTag, Integer.valueOf(0));
201        }
202
203        Integer I;
204
205        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
206        // not HTML Comments
207        for (Object o : html) if (o instanceof TagNode)
208        {
209            TagNode tn = (TagNode) o;
210
211            // Get the current count from the hash-table
212            I = ht.get(tn.tok);
213
214            // The hash-table only holds elements we are counting, if null, then skip.
215            if (I == null) continue;
216
217            // Save the new, computed count, in the hash-table
218            //
219            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
220            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
221
222            ht.put(tn.tok, Integer.valueOf(I.intValue() + (tn.isClosing ? -1 : 1)));
223        }
224
225        // Convert the hash-table to an integer-array, and return this to the user
226        int[] ret = new int[htmlTags.length];
227
228        for (int i=0; i < ret.length; i++)
229            ret[i] = 0;
230
231        for (int i=0; i < htmlTags.length; i++)
232            if ((I = ht.get(htmlTags[i])) != null) 
233                ret[i] = I.intValue();
234    
235        return ret;
236    }
237
238    /**
239     * Creates a {@code Hashtable} that has a count of all open and closed HTML-Tags found on
240     * the page - whose count-value is not equal to zero.
241     * 
242     * <BR /><BR />This method will report when there are unbalanced HTML-Tags on a page, <I><B>and
243     * strictly ignore any &amp; all tags with a count of zero</B></I>.  Specifically, if a tag has
244     * a {@code 1-to-1} open-close count, then it will not have any keys avialable in the returned
245     * {@code Hashtable}.
246     *
247     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
248     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_CLONE> <!-- Clone Note -->
249     *
250     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
251     * available {@code check(...)} methods.
252     * 
253     * @return A {@code Hashtable} map of the count of each html-element present in this
254     * {@code Vector}.  For instance, if this {@code Vector} had 5 {@code '<A ...>'} (Anchor-Link)
255     * elements, and six {@code '</A>'} then this {@code Hashtable} would have a {@code String}-key
256     * {@code 'a'} with an integer value of {@code '-1'}.
257     */
258    public static Hashtable<String, Integer> checkNonZero(Hashtable<String, Integer> ht)
259    {
260        @SuppressWarnings("unchecked")
261        Hashtable<String, Integer>  ret     = (Hashtable<String, Integer>) ht.clone();
262        Enumeration<String>         keys    = ret.keys();
263
264        while (keys.hasMoreElements())
265        {
266            String key = keys.nextElement();
267
268            // Remove any keys (HTML element-names) that have a normal ('0') count.
269            if (ret.get(key).intValue() == 0) ret.remove(key);
270        }
271
272        return ret;
273    }
274
275
276    /**
277     * This will compute a {@code count} for just one, particular, HTML Element of whether that
278     * Element has been properly opened and closed.  An open and close {@code count} (integer
279     * value) will be returned by this method.
280     * 
281     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
282     * 
283     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
284     * 
285     * @param htmlTag This the html element whose open-close count needs to be kept.
286     * 
287     * @return The count of each html-element present in this {@code Vector}.  For instance, if the
288     * user had requested that HTML Anchor Links be counted, and if the input {@code Vector} had 5
289     * {@code '<A ...>'} (Anchor-Link) elements, and six {@code '</A>'} then this method would
290     * return {@code -1}.
291     * 
292     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
293     * 
294     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
295     * Tag, this exception will throw.
296     */
297    public static int checkTag(Vector<? super TagNode> html, String htmlTag)
298    {
299        // Check that this is a valid HTML Tag, throw an exception if invalid
300        htmlTag = ARGCHECK.htmlTag(htmlTag);
301
302        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
303            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
304            "allowed opening versions of the tag."
305        );
306
307        TagNode tn;     int i = 0;
308
309        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
310        // not HTML Comments
311
312        for (Object o : html) if (o instanceof TagNode) 
313
314            // If we encounter an HTML Element whose tag is the tag whose count we are 
315            // computing, then....
316
317            if ((tn = (TagNode) o).tok.equals(htmlTag))
318            
319                // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
320                // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
321
322                i += tn.isClosing ? -1 : 1;
323
324        return i;
325    }
326
327
328    /**
329     * This method will calculate the "Maximum" and "Minimum" depth for every HTML 5.0 Tag found on
330     * a page.  The Max-Depth is the "Maximum-Number" of Opening HTML Element Opening Tags were
331     * found for a particular element, before a matching closing version of the same Element is
332     * encountered. In the example below, the maximum "open-count" for the HTML 'divider' Element
333     * ({@code <DIV>}) is {@code '2'}.  This is because a second {@code <DIV>} element is opened
334     * before the first is closed.
335     *
336     * <DIV CLASS="HTML">{@code
337     * <DIV class="MySection"><H1>These are my ideas:</H1>
338     * <!-- Above is an outer divider, below is an inner divider -->
339     * <DIV class="MyNumbers">Here are the points:
340     * <!-- HTML Content Here -->
341     * </DIV></DIV>
342     * }</DIV>
343     *
344     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
345     *
346     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
347     * 
348     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
349     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
350     * for-loop which calculaties the {@code count} shall hopefully explain this computation
351     * clearly enough.  This may be viewed in this method's hilited source-code, below.
352     *
353     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
354     * 
355     * @return The returned {@code Hashtable} will contain an integer-array for each HTML Element
356     * that was found on the page.  Each of these arrays shall be of length {@code 3}.
357     * 
358     * <BR /><BR /><OL CLASS=JDUL>
359     * <LI>Minimum Depth: {@code return_array[0]}</LI>
360     * <LI>Maximum Depth: {@code return_array[1]}</LI>
361     * <LI>Total Count: {@code return_array[2]}</LI>
362     * </OL>
363     *
364     * <BR /><DIV CLASS=JDHint>
365     * <B>REDUNDANCY NOTE:</B> The third element of the returned array should be identical to the
366     * result produced by an invocation of method: {@code Balance.checkTag(html, htmlTag);}
367     * </DIV>
368     * 
369     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
370     * 
371     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
372     * Tag, this exception will throw.
373     */
374    public static Hashtable<String, int[]> depth(Vector<? super TagNode> html)
375    {
376        Hashtable<String, int[]> ht = new Hashtable<>();
377
378        // Iterate through the HTML List, we are only counting HTML Elements, not text, and not HTML Comments
379        for (Object o : html) if (o instanceof TagNode) 
380        {
381            TagNode tn = (TagNode) o;
382
383            // Don't keep a count on singleton tags.
384            if (HTMLTags.isSingleton(tn.tok)) continue;
385
386            int[] curMaxAndMinArr = ht.get(tn.tok);
387
388            // If this is the first encounter of a particular HTML Element, create a MAX/MIN
389            // integer array, and initialize it's values to zero.
390
391            if (curMaxAndMinArr == null)
392            {
393                curMaxAndMinArr = new int[3];
394
395                curMaxAndMinArr[0] = 0;     // Current Min Depth Count for Element "tn.tok" is zero
396                curMaxAndMinArr[1] = 0;     // Current Max Depth Count for Element "tn.tok" is zero
397                curMaxAndMinArr[2] = 0;     // Current Computed Depth Count for "tn.tok" is zero
398
399                ht.put(tn.tok, curMaxAndMinArr);
400            }
401
402            // curCount += tn.isClosing ? -1 : 1;
403            //
404            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
405            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
406
407            curMaxAndMinArr[2] += tn.isClosing ? -1 : 1;
408
409            // If the current depth-count is a "New Minimum" (a new low! :), then save it in the
410            // minimum pos of the output-array.
411
412            if (curMaxAndMinArr[2] < curMaxAndMinArr[0]) curMaxAndMinArr[0] = curMaxAndMinArr[2];
413
414            // If the current depth-count (for this tag) is a "New Maximum" (a new high), save it
415            // to the max-pos of the output-array.
416
417            if (curMaxAndMinArr[2] > curMaxAndMinArr[1]) curMaxAndMinArr[1] = curMaxAndMinArr[2];
418        }
419
420        return ht;
421    }
422
423
424
425    /**
426     * This method will calculate the "Maximum" and "Minimum" depth for every HTML Tag listed in
427     * the {@code var-args String[] htmlTags} parameter.  The Max-Depth is the "Maximum-Number" of
428     * Opening HTML Element Opening Tags were found for a particular element, before a matching
429     * closing version of the same Element is encountered.  In the example below, the maximum
430     * {@code 'open-count'} for the HTML 'divider' Element ({@code <DIV>}) is {@code '2'}.  This is
431     * because a second {@code <DIV>} element is opened before the first is closed.
432     *
433     * <DIV CLASS="HTML">{@code
434     * <DIV class="MySection"><H1>These are my ideas:</H1>
435     * <!-- Above is an outer divider, below is an inner divider -->
436     * <DIV class="MyNumbers">Here are the points:
437     * <!-- HTML Content Here -->
438     * </DIV></DIV>
439     * }</DIV>
440     *
441     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
442     *
443     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
444     * 
445     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
446     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
447     * for-loop which calculaties the {@code count} shall hopefully explain this computation
448     * clearly enough.  This may be viewed in this method's hilited source-code, below.
449     *
450     * <BR /><BR /><B CLASS=JDDescLabel>Var-Args Addition:</B>
451     * 
452     * <BR />This method differs from the method with an identical name (defined above) in that it
453     * adds a <I>{@code String}-VarArgs parameter</I> that allows a user to decide which tags he
454     * would like counted and returned in this {@code Hashtable}, and which he would like to ignore.
455     * 
456     * <BR /><BR />If one of the requested HTML-Tags from this{@code String}-VarArgs parameter is not
457     * actually an HTML Element present on the page, the returned {@code Hashtable} will still
458     * contain an {@code int[]}-Array for that tag.  The values in that array will be equal to
459     * zero.
460     *
461     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
462     * 
463     * @return The returned {@code Hashtable} will contain an integer-array for each HTML Element
464     * that was found on the page.  Each of these arrays shall be of length {@code 3}.
465     * 
466     * <BR /><BR /><OL CLASS=JDUL>
467     * <LI>Minimum Depth: {@code return_array[0]}</LI>
468     * <LI>Maximum Depth: {@code return_array[1]}</LI>
469     * <LI>Total Count: {@code return_array[2]}</LI>
470     * </OL>
471     *
472     * <BR /><DIV CLASS=JDHint>
473     * <B>REDUNDANCY NOTE:</B> The third element of the returned array should be identical to the
474     * result produced by an invocation of method: {@code Balance.checkTag(html, htmlTag);}
475     * </DIV>
476     * 
477     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
478     * 
479     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'}
480     * (Self-Closing) Tag, this exception will throw.
481     */
482    public static Hashtable<String, int[]> depth(Vector<? super TagNode> html, String... htmlTags)
483    {
484        // Check that these are all valid HTML Tags, throw an exception if not.
485        htmlTags = ARGCHECK.htmlTags(htmlTags);
486
487        Hashtable<String, int[]> ht = new Hashtable<>();
488
489        // Initialize the temporary hash-table.  This will be discarded at the end of the method,
490        // and converted into a parallel array.  (Parallel to the input String... htmlTags array).
491        // Also, check to make sure the user hasn't requested a count of Singleton HTML Elements.
492
493        for (String htmlTag : htmlTags)
494        {
495            if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
496                "One of the tags you have passed: [" + htmlTag + "] is a singleton-tag, " +
497                "and is only allowed opening versions of the tag."
498            );
499
500            // Insert an initialized array (init to zero) for this HTML Tag/Token
501            int[] arr = new int[3];
502
503            arr[0] = 0;     // Current Minimum Depth Count for HTML Element "tn.tok" is zero
504            arr[1] = 0;     // Current Maximum Depth Count for HTML Element "tn.tok" is zero
505            arr[2] = 0;     // Current Computed Depth Count is HTML Element "tn.tok" is zero
506
507            ht.put(htmlTag, arr);
508        }
509
510        // Iterate through the HTML List, we are only counting HTML Elements, not text,
511        // and not HTML Comments
512
513        for (Object o: html) if (o instanceof TagNode) 
514        {
515            TagNode tn = (TagNode) o;
516
517            int[] curMaxAndMinArr = ht.get(tn.tok);
518
519            // If this is null, we are attempting to perform the count on an HTML Element that
520            // wasn't requested by the user with the var-args 'String... htmlTags' parameter.
521            // The Hashtable was initialized to only have those tags. (see about 5 lines above 
522            // where the Hashtable is initialized)
523
524            if (curMaxAndMinArr == null) continue;
525
526            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
527            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
528
529            curMaxAndMinArr[2] += tn.isClosing ? -1 : 1;
530
531            // If the current depth-count is a "New Minimum" (a new low! :), then save it in the
532            // minimum pos of the output-array.
533
534            if (curMaxAndMinArr[2] < curMaxAndMinArr[0]) curMaxAndMinArr[0] = curMaxAndMinArr[2];
535
536            // If the current depth-count (for this tag) is a "New Maximum" (a new high), save it
537            // to the max-pos of the output-array.
538
539            if (curMaxAndMinArr[2] > curMaxAndMinArr[1]) curMaxAndMinArr[1] = curMaxAndMinArr[2];
540
541            // NOTE:    No need to update the hash-table, since this is an array - changing its
542            //          values is already "reflected" into the Hashtable.
543        }
544
545        return ht;
546    }
547
548
549    /**
550     * Creates a {@code Hashtable} that has a maximum and minimum depth for all HTML tags found on
551     * the page.  Any HTML Tags that meet ALL of these criteria shall be removed from the
552     * result-set {@code Hashtable} ...
553     * 
554     * <BR /><BR /><UL CLASS=JDUL>
555     * <LI>Minimum Depth Is {@code '0'} - i.e. <I>closing tag never precedes opening.</I></LI>
556     * <LI>Count is {@code '0'} - i.ei. <I>there is a {@code 1-to-1} ratio of opening and closing
557     * tags</I> for the particular HTML Element.</LI>
558     * </UL>
559     * 
560     * <BR /><DIV CLASS=JDHint>
561     * This means that there is a {@code 1:1} ratio of opening and closing versions of the tag,
562     * <B><I>and also</I></B> that there are no positions in the vector where a closing tag to come
563     * before an tag to open it.
564     * </DIV>
565     *
566     * <BR /><BR /><B CLASS=JDDescLabel>Cloned Input:</B>
567     * 
568     * <BR />This method clones the original input {@code Hashtable}, and removes the tags whose
569     * depth-calculations are invalid - as described above.  This allows the user to perform other
570     * operations with the original table, while this class is processing.
571     *
572     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
573     * available {@code depth(...)} methods.
574     * 
575     * @return This shall a return a list of HTML Tags that are <I>potentially (but not guaranteed
576     * to be)</I> invalid.
577     */
578    public static Hashtable<String, int[]> depthInvalid(Hashtable<String, int[]> ht)
579    {
580        @SuppressWarnings("unchecked")
581        Hashtable<String, int[]>    ret     = (Hashtable<String, int[]>) ht.clone();
582        Enumeration<String>         keys    = ret.keys();
583
584        // Using the "Enumeration" class allows the situation where elements can be removed from
585        // the underlying data-structure - while iterating through that data-structure.  This is
586        // not possible using a keySet Iterator.
587
588        while (keys.hasMoreElements())
589        {
590            String  key = keys.nextElement();
591            int[]   arr = ret.get(key);
592
593            if ((arr[1] >= 0) && (arr[2] == 0)) ret.remove(key);
594        }
595
596        return ret;
597    }
598
599    /**
600     * Creates a {@code Hashtable} that has a maximum and minimum depth for all HTML tags found on
601     * the page.  Any HTML Tags that meet ALL of these criteria, below, shall be removed from the
602     * result-set {@code Hashtable} ...
603     * 
604     * <BR /><BR /><UL CLASS=JDUL>
605     * <LI> Maximum Depth is precisely {@code '1'} - i.e. <I>Each element of this tag is closed
606     *      before a second is open.</I>
607     *      </LI>
608     * </UL>
609     * 
610     * <BR /><BR /><B CLASS=JDDescLabel>Cloned Input:</B>
611     * 
612     * <BR />This method clones the original input {@code Hashtable}, and removes the tags whose
613     * maximum-depth is not greater than one.  This allows the user to perform other operations
614     * with the original table, while this class is processing.
615     *
616     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
617     * available {@code depth(...)} methods.
618     * 
619     * @return This shall a return a list of HTML Tags that are <I>potentially (but not guaranteed
620     * to be)</I>
621     * invalid.
622     */
623    public static Hashtable<String, int[]> depthGreaterThanOne(Hashtable<String, int[]> ht)
624    {
625        @SuppressWarnings("unchecked")
626        Hashtable<String, int[]>    ret     = (Hashtable<String, int[]>) ht.clone();
627        Enumeration<String>         keys    = ret.keys();
628
629        // Using the "Enumeration" class allows the situation where elements can be removed from
630        // the underlying data-structure - while iterating through that data-structure.  This is not
631        // possible using a keySet Iterator.
632
633        while (keys.hasMoreElements())
634        {
635            String  key = keys.nextElement();
636            int[]   arr = ret.get(key);
637
638            if (arr[1] == 1) ret.remove(key);
639        }
640
641        return ret;
642    }
643
644
645    /**
646     * This method will calculate the "Maximum" and "Minimum" depth for a particular HTML Tag.
647     * The Max-Depth just means the number of Maximum-Number of Opening HTML Element Opening Tags
648     * were found, before a matching closing version of the same Element is encountered.  For
649     * instance: {@code <DIV ...><DIV ..> Some Page</DIV></DIV>} has a maximum depth of
650     * {@code '2'}.  This means there is a point in the vectorized-html where there are 2
651     * successive divider elements that are opened, before even one has been closed.
652     *
653     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
654     *
655     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
656     * 
657     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
658     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
659     * for-loop which calculaties the {@code count} shall hopefully explain this computation
660     * clearly enough.  This may be viewed in this method's hilited source-code, below.
661     *
662     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
663     * 
664     * @param htmlTag This the html element whose maximum and minimum depth-count needs to be
665     * computed.
666     * 
667     * @return The returned integer-array, shall be of length 3.
668     * 
669     * <BR /><BR /><OL CLASS=JDUL>
670     * <LI>Minimum Depth: {@code return_array[0]}</LI>
671     * <LI>Maximum Depth: {@code return_array[1]}</LI>
672     * <LI>Total Count: {@code return_array[2]}</LI>
673     * </OL>
674     * 
675     * <BR /><DIV CLASS=JDHint>
676     * <B>REDUNDANCY NOTE:</B> The third element of the returned array should be identical to the
677     * result produced by an invocation of method: {@code Balance.checkTag(html, htmlTag);}
678     * </DIV>
679     * 
680     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
681     * 
682     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
683     * Tag, this exception will throw.
684     */
685    public static int[] depthTag(Vector<? super TagNode> html, String htmlTag)
686    {
687        // Check that this is a valid HTML Tag, throw an exception if invalid
688        htmlTag = ARGCHECK.htmlTag(htmlTag);
689
690        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
691            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only allowed " +
692            "opening versions of the tag."
693        );
694
695        TagNode tn;     int i = 0;      int max = 0;        int min = 0;
696
697        // Iterate through the HTML List, we are only counting HTML Elements, not text, and not HTML Comments
698        for (Object o : html) if (o instanceof TagNode)
699
700            if ((tn = (TagNode) o).tok.equals(htmlTag))
701            {
702                // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
703                // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
704
705                i += tn.isClosing ? -1 : 1;
706
707                if (i > max) max = i;
708                if (i < min) min = i;
709            }
710
711        // Generate the output array, and return
712        int[] ret = new int[2];
713
714        ret[0] = min;
715        ret[1] = max;
716        ret[2] = i;
717
718        return ret;
719    }
720
721    /**
722     * This will find the (likely) places where the "non-nested HTML Elements" have become nested.
723     * For the purposes of finding mismatched elements - such as an unclosed "Italics" Element, or
724     * an "Extra" Italics Element - this method will find places where a new HTML Tag has opened
725     * before a previous one has been closed - <I>or vice-versa (where there is an 'extra'
726     * closed-tag).</I> 
727     * 
728     * <BR /><BR />Certainly, if "nesting" is usually acceptable (for instance the HTML divider
729     * {@code '<DIV>...</DIV>'} construct) <I><B>then the results of this method would not have any
730     * meaning.</I></B>  Fortunately, for the vast majority of HTML Elements {@code <I>, <B>, <A>,
731     * etc...} nesting the tags is not allowed or encouraged. 
732     *
733     * <BR /><BR />The following example use of this method should make clear the application.  If
734     * a user has identified that there is an unclosed HTML italics element ({@code <I>...</I>})
735     * somewhere on a page, for-example, and that page has numerous italics elements, this method
736     * can pinpoint the failure instantly, using this example.  Note that the file-name is a
737     * Java-Doc generated output HTML file.  The documentation for this package received a copious
738     * amount of attention due to the sheer number of method-names and class-names used throughout.
739     * 
740     * <DIV CLASS="EXAMPLE">{@code 
741     * String           fStr    = FileRW.loadFileToString("javadoc/Torello/HTML/TagNode.html");
742     * Vector<HTMLNode> v       = HTMLPage.getPageTokens(fStr, false);
743     * int[]            posArr  = Balance.nonNestedCheck(v, "i");
744     * 
745     * // Below, the class 'Debug' is used to pretty-print the vectorized-html page.  Here, the
746     * // output will find the lone, non-closed, HTML italics <I> ... </I> tag-element, and output
747     * // it to the terminal-window.  The parameter '5' means the nearest 5 elements (in either
748     * // direction) are printed, in addition to the elements at the indices in the posArr.
749     * // Parameter 'true' implies that two curly braces are printed surrounding the matched node.
750     * 
751     * System.out.println(Debug.print(v, posArr, 5, " Skip a few ", true, Debug::K));
752     * }</DIV>
753     * 
754     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
755     * 
756     * @param htmlTag This the html element whose maximum and minimum depth-count was not {@code 1}
757     * and {@code 0}, respectively.  The precise location where the depth achieved either a
758     * negative depth, or depth greater than {@code 1} will be returned in the integer array.  In
759     * English: When two opening-tags or two closing-tags are identified, successively, then the
760     * index where the second tag was found is recorded into the output array.
761     * 
762     * @return This will return an array of vectorized-html index-locations / index-pointers where
763     * the first instance of an extra opening, or an extra-closing tag, occurs.  This will
764     * facilitate finding tags that are not intended to be nested.  If "tag-nesting" (for example
765     * HTML divider, {@code 'DIV'}, elements), then the results returned by this method will not be
766     * useful.
767     * 
768     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
769     * 
770     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
771     * Tag, this exception will throw.
772     * 
773     * @see FileRW#loadFileToString(String)
774     * @see HTMLPage#getPageTokens(CharSequence, boolean)
775     * @see Debug#print(Vector, int[], int, String, boolean, BiConsumer)
776     */
777    public static int[] nonNestedCheck(Vector<? super TagNode> html, String htmlTag)
778    {
779        // Java Streams are an easier way to keep variable-length lists.  They use
780        // "builders" - and this one is for an "IntStream"
781
782        IntStream.Builder b = IntStream.builder();
783
784        // Check that this is a valid HTML Tag, throw an exception if invalid
785        htmlTag = ARGCHECK.htmlTag(htmlTag);
786
787        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
788            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
789            "allowed opening versions of the tag."
790        );
791
792        Object o;     TagNode tn;     int len = html.size();      TC last = null;
793
794        // Iterate through the HTML List, we are only counting HTML Elements, not text,
795        // and not HTML Comments
796
797        for (int i=0; i < len; i++)
798
799            if ((o = html.elementAt(i)) instanceof TagNode) 
800                if ((tn = (TagNode) o).tok.equals(htmlTag))
801                {
802                    if ((tn.isClosing)      && (last == TC.ClosingTags))    b.add(i);
803                    if ((! tn.isClosing)    && (last == TC.OpeningTags))    b.add(i);
804
805                    last = tn.isClosing ? TC.ClosingTags : TC.OpeningTags;
806                }
807
808        return b.build().toArray();
809    }
810
811    /**
812     * For likely greater than 95% of HTML tags - finding situations where that tag has 
813     * <I><B>'nested tags'</I></B> is highly unlikely.  Unfortunately, two or three of the most
814     * common tags in use, for instance {@code <DIV>, <SPAN>}, finding where a mis-match has
815     * occurred (tracking down an "Unclosed divider") is an order of magnitude more difficult than
816     * finding an unclosed anchor {@code '<A HREF...>'}.  This method shall return two parallel
817     * arrays.  The first array will contain vector indices.  The second array contains the depth
818     * (nesting level) of that tag at that position.  In this way, finding an unclosed divider is
819     * tantamount to finding where all closing-dividers seem to evaluate to a depth of '1' (one)
820     * rather than '0' (zero). 
821     * 
822     * <BR /><BR /><DIV CLASS=JDHint>
823     * This method can highly useful for {@code <SPAN>} and {@code DIV}, while the "non-standard
824     * depth locations" method can be extremely useful for simple, non-nested tags such as Anchor,
825     * Paragraph, Section, etc... - HTML Elements that are mostly never nested.
826     * </DIV>
827     * 
828     * <DIV CLASS="EXAMPLE">{@code
829     * // Load an HTML File to a String
830     * String file = LFEC.loadFile("~/HTML/MyHTMLFile.html");
831     * 
832     * // Parse, and convert to vectorized-html
833     * Vector<HTMLNode> v = HTMLPage.getPageTokens(file, false);
834     * 
835     * // Run this method
836     * Ret2<int[], int[]> r = Balance.locationsAndDepth(v, "div");
837     * 
838     * // This array has vector-indices
839     * int[] posArr = (int[]) r.a;
840     * 
841     * // This (parallel) array has the depth at that index.
842     * int[] depthArr = (int[]) r.b;
843     * 
844     * for (int i=0; i < posArr.length; i++) System.out.println(
845     *     "(" + posArr[i] + ", " + depthArr[i] + "):\t" +    // Prints the Vector-Index, and Depth
846     *     C.BRED + v.elementAt(posArr[i]).str + C.RESET      // Prints the actual HTML divider.
847     * );
848     * }</DIV>
849     * 
850     * <BR />The above code would produce a list of HTML Divider elements, along with their index
851     * in the {@code Vector}, and the exact depth (number of nested, open {@code 'DIV'} elements)
852     * at that location.  This is usually helpful when trying to find unclosed HTML Tags.
853     * 
854     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
855     * 
856     * @param htmlTag This the html element that has an imbalanced OPEN-CLOSE ratio in the tree.
857     * 
858     * @return Two parallel arrays, as follows:
859     * 
860     * <BR /><BR /><OL CLASS=JDOL>
861     * <LI> {@code Ret2.a (int[])}
862     *      <BR /><BR />
863     *      This shall be an integer array of {@code Vector}-indices where the HTML Element has
864     *      been found.
865     *      <BR /><BR />
866     * </LI>
867     * <LI> {@code Ret2.b (int[])}
868     *      <BR /><BR />
869     *      This shall contain an array of the value of the depth for the {@code 'htmlTag'}
870     *      at the particular {@code Vector}-index identified in the first-array.
871     * </LI>
872     * </OL>
873     * 
874     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
875     * 
876     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
877     * Tag, this exception will throw.
878     */
879    public static Ret2<int[], int[]> locationsAndDepth(Vector<? super TagNode> html, String htmlTag)
880    {
881        // Java Streams are an easier way to keep variable-length lists.  They use
882        // "builders" - and this one is for an "IntStream"
883
884        IntStream.Builder locations         = IntStream.builder();
885        IntStream.Builder depthAtLocation   = IntStream.builder();
886
887        // Check that this is a valid HTML Tag, throw an exception if invalid
888        htmlTag = ARGCHECK.htmlTag(htmlTag);
889
890        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
891            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
892            "allowed opening versions of the tag."
893        );
894
895        Object o;     TagNode tn;     int len = html.size();      int depth = 0;
896
897        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
898        // not HTML Comments
899
900        for (int i=0; i < len; i++) if ((o = html.elementAt(i)) instanceof TagNode) 
901
902        if ((tn = (TagNode) o).tok.equals(htmlTag))
903            {
904                depth += tn.isClosing ? -1 : 1;
905
906                locations.add(i);
907
908                depthAtLocation.add(depth);
909            }
910
911        return new Ret2<int[], int[]>
912            (locations.build().toArray(), depthAtLocation.build().toArray());
913    }
914
915    /**
916     * Converts a depth report to a {@code String}, for printing.
917     * 
918     * @param depthReport This should be a {@code Hashtable} returned by any of the depth-methods.
919     * 
920     * @return This shall return the report as a {@code String}.
921     */
922    public static String toStringDepth(Hashtable<String, int[]> depthReport)
923    {
924        StringBuilder sb = new StringBuilder();
925
926        for (String htmlTag : depthReport.keySet())
927        {
928            int[] arr = depthReport.get(htmlTag);
929
930            sb.append(
931                "HTML Element: [" + htmlTag + "]:\t" +
932                "Min-Depth: " + arr[0] + ",\tMax-Depth: " + arr[1] + ",\tCount: " + arr[2] + "\n"
933            );
934        }
935
936        return sb.toString();
937    }
938
939
940    /**
941     * Converts a balance report to a {@code String}, for printing.
942     * 
943     * @param balanceCheckReport This should be a {@code Hashtable} returned by any of the
944     * balance-check methods.
945     * 
946     * @return This shall return the report as a {@code String}.
947     */
948    public static String toStringBalance(Hashtable<String, Integer> balanceCheckReport)
949    {
950        StringBuilder   sb              = new StringBuilder();
951        int             maxTagLen       = 0;
952        int             maxValStrLen    = 0;
953        int             maxAbsValStrLen = 0;
954        int             val;
955        String          valAsStr;
956
957        // For good spacing purposes, we need the length of the longest of the tags.
958        for (String htmlTag : balanceCheckReport.keySet())
959            if (htmlTag.length() > maxTagLen)
960                maxTagLen = htmlTag.length();
961
962        // 17 is the length of the string below, 2 is the amount of extra-space needed
963        maxTagLen += 17 + 2; 
964
965        for (int v : balanceCheckReport.values())
966            if ((valAsStr = ("" + v)).length() > maxValStrLen)
967                maxValStrLen = valAsStr.length();
968
969        for (int v : balanceCheckReport.values())
970            if ((valAsStr = ("" + Math.abs(v))).length() > maxAbsValStrLen)
971                maxAbsValStrLen = valAsStr.length();
972
973        for (String htmlTag : balanceCheckReport.keySet())
974
975            sb.append(
976                StringParse.rightSpacePad("HTML Element: [" + htmlTag + "]:", maxTagLen) +
977                StringParse.rightSpacePad(
978                    ("" + (val = balanceCheckReport.get(htmlTag).intValue())),
979                    maxValStrLen
980                ) +
981                NOTE(val, htmlTag, maxAbsValStrLen) +
982                "\n"
983            );
984
985        return sb.toString();
986    }
987
988    private static String NOTE(int val, String htmlTag, int padding)
989    {
990        if (val == 0) return "";
991
992        else if (val > 0) return
993            ", which implies " + StringParse.rightSpacePad("" + Math.abs(val), padding) +
994            " unclosed <" + htmlTag + "> element(s)";
995
996        else return
997            ", which implies " + StringParse.rightSpacePad("" + Math.abs(val), padding) +
998            " extra </" + htmlTag + "> element(s)";
999    }
1000
1001    /**
1002     * Converts a balance report to a {@code String}, for printing.
1003     * 
1004     * @param balanceCheckReport This should be a {@code Hashtable} returned by any of the
1005     * balance-check methods.
1006     * 
1007     * @return This shall return the report as a {@code String}.
1008     * 
1009     * @throws IllegalArgumentException This exception throws if the length of the two input arrays
1010     * are not equal.  It is imperative that the balance report being printed was created by the
1011     * html-tags that are listed in the HTML Token var-args parameter.  If the two arrays are the
1012     * same length, but the tags used to create the report Hashtable are not the same ones being
1013     * passed to the var-args parameter {@code 'htmlTags'} - <I>the logic will not know the
1014     * difference, and no exception is thrown.</I>
1015     */
1016    public static String toStringBalance(int[] balanceCheckReport, String... htmlTags)
1017    {
1018        if (balanceCheckReport.length != htmlTags.length) throw new IllegalArgumentException(
1019            "The balance report that you are checking was not generated using the html token " +
1020            "list provided, they are different lengths.  balanceCheckReport.length: " +
1021            "[" + balanceCheckReport.length + "]\t htmlTags.length: [" + htmlTags.length + "]"
1022        );
1023
1024        StringBuilder sb = new StringBuilder();
1025
1026        for (int i=0; i < balanceCheckReport.length; i++)
1027            sb.append("HTML Element: [" + htmlTags[i] + "]:\t" + balanceCheckReport[i] + "\n");
1028
1029        return sb.toString();
1030    }
1031
1032}