001package Torello.HTML;
002
003import java.util.stream.*;
004import java.util.*;
005
006import Torello.Java.LV;
007import Torello.Java.StrCSV;
008import Torello.Java.ExceptionCheckError;
009
010/**
011 * A simple utility class that, used ubiquitously throughout Java HTML, which maintains two integer
012 * fields - <CODE><B><A HREF='#start'>DotPai&#46;start</A></B></CODE> and
013 * <CODE><B><A HREF='#end'>DotPai&#46;end</A></B></CODE> , for demarcating the begining and ending
014 * of a sub-list within an HTML web-page.
015 * 
016 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=DOT_PAIR>
017 * 
018 * @see NodeIndex
019 * @see SubSection
020 */
021public final class DotPair
022implements java.io.Serializable, Comparable<DotPair>, Cloneable, Iterable<Integer>
023{
024    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */
025    public static final long serialVersionUID = 1;
026
027    /**
028     * This is intended to be the "starting index" into an sub-array of an HTML {@code Vector} of
029     * {@code HTMLNode} elements.
030     */
031    public final int start;
032
033    /**
034     * This is intended to be the "ending index" into a sub-array of an HTML {@code Vector} of 
035     * {@code HTMLNode} elements.
036     */
037    public final int end;
038
039
040    // ********************************************************************************************
041    // ********************************************************************************************
042    // Constructor
043    // ********************************************************************************************
044    // ********************************************************************************************
045
046
047    /**
048     * This constructor takes two integers and saves them into the {@code public} member fields.
049     * 
050     * @param start This is intended to store the starting position of a vectorized-webpage
051     * sub-list or subpage.
052     * 
053     * @param end This will store the ending position of a vectorized-html webpage or subpage.
054     * 
055     * @throws IndexOutOfBoundsException A negative {@code 'start'} or {@code 'end'}
056     * parameter-value will cause this exception throw.
057     * 
058     * @throws IllegalArgumentException A {@code 'start'} parameter-value that is larger than the
059     * {@code 'end'} parameter will cause this exception throw.
060     * 
061     * @see NodeIndex
062     * @see SubSection
063     */
064    public DotPair(int start, int end)
065    {
066        if (start < 0) throw new IndexOutOfBoundsException
067            ("Negative start value passed to DotPair constructor: start = " + start);
068
069        if (end < 0) throw new IndexOutOfBoundsException
070            ("Negative ending value passed to DotPair constructor: end = " + end);
071
072        if (end < start) throw new IllegalArgumentException(
073            "Start-parameter value passed to constructor is greater than ending-parameter: " +
074            "start: [" + start + "], end: [" + end + ']'
075        );
076
077        this.start  = start;
078        this.end    = end;
079    }
080
081    /**
082     * Creates a new instance that has been shifted by {@code 'delta'}.
083     * 
084     * @param delta The number of array indices to shift {@code 'this'} intance.  This parameter
085     * may be negative, and if so, {@code 'this'} will be shifted left, instead of right.
086     * 
087     * @return A new, shifted, instance of {@code 'this'}
088     */
089    public DotPair shift(int delta)
090    { return new DotPair(this.start + delta, this.end + delta); }
091
092
093    // ********************************************************************************************
094    // ********************************************************************************************
095    // Standard Java Methods
096    // ********************************************************************************************
097    // ********************************************************************************************
098
099
100    /**
101     * Implements the standard java {@code 'hashCode()'} method.  This will provide a hash-code
102     * that is likely to avoid crashes.
103     * 
104     * @return A hash-code that may be used for inserting {@code 'this'} instance into a hashed
105     * table, map or list.
106     */
107    public int hashCode()
108    { return this.start + (1000 * this.end); }
109
110    /**
111     * The purpose of this is to remind the user that the array bounds are inclusive at <B>BOTH</B>
112     * ends of the sub-list.  
113     * 
114     * <BR /><BR /><B CLASS=JDDescLabel>Inclusive &amp; Exclusive:</B>
115     * 
116     * <BR />For an instance of {@code 'DotPair'}, the intention is to include both the
117     * characters located at the {@code Vector}-index positions {@link #start} and the one at
118     * {@link #end}.  Specifically,  (and unlike many of the {@code Node-Search} package methods)
119     * both of the internal fields to this class are <B STYLE='color: red'><I>inclusive</I></B>,
120     * rather than exclusive.
121     * 
122     * <BR /><BR />For many of the search methods in package {@link Torello.HTML.NodeSearch}, the
123     * {@code 'ePos'} parameters are always <B STYLE='color: red'><I>exclusive</I></B> - meaning
124     * the character at {@code Vector}=index {@code 'ePos'} is not included in the search.
125     * 
126     * @return The length of a sub-array that would be indicated by this dotted pair.
127     */
128    public int size() { return this.end - this.start + 1; }
129
130    /**
131     * Java's {@code toString()} requirement.
132     * 
133     * @return A string representing 'this' instance of DotPair.
134     */
135    public String toString() { return "[" + start + ", " + end + "]"; }
136
137    /**
138     * Java's {@code public boolean equals(Object o)} requirements.
139     * 
140     * @param o This may be any Java {@code Object}, but only ones of {@code 'this'} type whose 
141     * internal-values are identical will force this method to return {@code TRUE}.
142     * 
143     * @return {@code TRUE} if (and only if) parameter {@code 'o'} is an {@code instanceof DotPair}
144     * and, also, both have equal start and ending field values.
145     */
146    public boolean equals(Object o)
147    {
148        if (o instanceof DotPair)
149        {
150            DotPair dp = (DotPair) o;
151            return (this.start == dp.start) && (this.end == dp.end);
152        }
153
154        else return false;
155    }
156
157    /**
158     * Java's {@code interface Cloneable} requirements.  This instantiates a new {@code DotPair}
159     * with identical {@code 'start', 'end'} fields.
160     * 
161     * @return A new {@code DotPair} whose internal fields are identical to this one.
162     */
163    public DotPair clone() { return new DotPair(this.start, this.end); }
164
165    /**
166     * Java's {@code interface Comparable<T>} requirements.  <I>This is not the only comparison4
167     * operation possible,</I> but it does satisfy one reasonable requirement -
168     * <I>SPECIFICALLY:</I> which of two separate instances of {@code DotPair} start first.
169     * 
170     * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B>
171     * 
172     * <BR />If two {@code DotPair} instances begin at the same {@code Vector}-index, then the
173     * shorter of the two shall come first.
174     * 
175     * @param other Any other {@code DotPair} to be compared to {@code 'this' DotPair}
176     * 
177     * @return An integer that fulfils Java's
178     * {@code interface Comparable<T> public boolean compareTo(T t)} method requirements.
179     */
180    public int compareTo(DotPair other)
181    {
182        int ret = this.start - other.start;
183
184        return (ret != 0) ? ret : (this.size() - other.size());
185    }
186
187    /**
188     * This is an "alternative Comparitor" that can be used for sorting instances of this class.
189     * It should work with the {@code Collections.sort(List, Comparator)} method in the standard
190     * JDK package {@code java.util.*;}
191     * 
192     * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B>
193     * 
194     * <BR />This "extra <CODE>Comparitor</CODE>" simply compares the size of one {@code DotPair}
195     * to a second.  The smaller shall be sorted first, and the larger (longer-in-length)
196     * {@code DotPair} shall be sorted later.  If they are of equal size, whichever of the two has
197     * an earlier {@link #start} position in the {@code Vector} is considered first.
198     * 
199     * @see CommentNode#body
200     */
201    public static Comparator<DotPair> comp2 = (DotPair dp1, DotPair dp2) ->
202    {
203        int ret = dp1.size() - dp2.size();
204
205        return (ret != 0) ? ret : (dp1.start - dp2.start);
206    };
207
208    /**
209     * This shall return an {@code int Iterator} (which is properly named
210     * {@code class java.util.PrimitiveIterator.OfInt}) that iterates integers beginning with the
211     * value in {@code this.start} and ending with the value in {@code this.end}.
212     * 
213     * @return An {@code Iterator} that iterates {@code 'this'} instance of {@code DotPair} from
214     * the beginning of the range, to the end of the range.  The {@code Iterator} returned will
215     * produce Java's primitive type {@code int}.
216     * 
217     * <BR /><BR /><DIV CLASS=JDHint>
218     * The elements returned by the {@code Iterator}
219     * are integers, and this is, in effect, nothing more than one which counts from {@link #start}
220     * to {@link #end}.
221     * </DIV>
222     */
223    public PrimitiveIterator.OfInt iterator()
224    { 
225        return new PrimitiveIterator.OfInt()
226        {
227            private int cursor = start;
228
229            public boolean hasNext()    { return this.cursor <= end; }
230
231            public int nextInt()
232            {
233                if (cursor == end) throw new NoSuchElementException
234                    ("Cursor has reached the value stored in 'end' [" + end + "]");
235
236                return cursor++;
237            }
238        };
239    }
240
241    /**
242     * A simple {@code Iterator} that will iterate elements on an input page, using {@code 'this'}
243     * intance of {@code DotPair's} indices, {@link #start}, and {@link #end}.
244     * 
245     * @param page This may be any HTML page or sub-page.  This page should correspond to 
246     * {@code 'this'} instance of {@code DotPair}.
247     * 
248     * @return An {@code Iterator} that will iterate each node in the page, beginning with the
249     * node at {@code page.elementAt(this.start)}, and ending with {@code page.elementAt(this.end)}
250     * 
251     * @throws IndexOutOfBoundsException This throws if {@code 'this'} instance does not have a
252     * range that adheres to the size of the input {@code 'page'} parameter.
253     */
254    public <T extends HTMLNode> Iterator<T> iterator(Vector<T> page)
255    {
256        if (this.start >= page.size()) throw new IndexOutOfBoundsException(
257            "This instance of DotPair points to elements that are outside of the range of the" +
258            "input 'page' Vector.\n" +
259            "'page' parameter size: " + page.size() + ", this.start: [" + this.start + "]"
260        );
261
262        if (this.end >= page.size()) throw new IndexOutOfBoundsException(
263            "This instance of DotPair points to elements that are outside of the range of the" +
264            "input 'page' Vector.\n" +
265            "'page' parameter size: " + page.size() + ", this.end: [" + this.end + "]"
266        );
267
268        return new Iterator<T>()
269        {
270            private int cursor          = start;    // a.k.a. 'this.start'
271            private int expectedSize    = page.size();
272            private int last            = end;      // a.k.a. 'this.end'
273
274            public boolean hasNext() { return cursor < last; }
275
276            public T next()
277            {
278                if (++cursor > last) throw new NoSuchElementException(
279                    "This iterator's cursor has run past the end of the DotPaiar instance that " +
280                    "formed this Iterator.  No more elements to iterate.  Did you call hasNext() ?"
281                );
282
283                if (page.size() != expectedSize) throw new ConcurrentModificationException(
284                    "The expected size of the underlying vector has changed." +
285                    "\nCurrent-Size " +
286                    "[" + page.size() + "], Expected-Size [" + expectedSize + "]\n" +
287                    "\nCursor location: [" + cursor + "]"
288                );
289
290                return page.elementAt(cursor);
291            }
292
293            // Removes the node from the underlying {@code Vector at the cursor's location.
294            public void remove()
295            { page.removeElementAt(cursor); expectedSize--; cursor--; last--; }
296        };
297    }
298
299
300    // ********************************************************************************************
301    // ********************************************************************************************
302    // Simple Boolean tests
303    // ********************************************************************************************
304    // ********************************************************************************************
305
306
307    /**
308     * This will test whether a specific index is contained (between {@code this.start} and
309     * {@code this.end}, inclusively.
310     * 
311     * @param index This is any integer index value.  It must be greater than zero.
312     * 
313     * @return {@code TRUE} If the value of index is greater-than-or-equal-to the value stored in
314     * field {@code 'start'} and furthermore is less-than-or-equal-to the value of field
315     * {@code 'end'}
316     * 
317     * @throws IndexOutOfBoundsException If the value is negative, this exception will throw.
318     */
319    public boolean isInside(int index)
320    {
321        if (index < 0) throw new IndexOutOfBoundsException
322            ("You have passed a negative index [" + index + "] here, but this is not allowed.");
323
324        return (index >= start) && (index <= end);
325    }
326
327    /**
328     * Tests whether {@code 'this' DotPair} is fully enclosed by {@code DotPair} parameter
329     * {@code 'other'}
330     * 
331     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
332     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
333     * comparison is likely meaningless.
334     * 
335     * @return {@code TRUE} If (and only if) parameter {@code 'other'} encloses {@code 'this'}.
336     */
337    public boolean enclosedBy(DotPair other)
338    { return (other.start <= this.start) && (other.end >= this.end); }
339
340    /**
341     * Tests whether {@code 'this' DotPair} is enclosed, completely, by parameter {@code DotPair}
342     * parameter {@code 'other'}
343     * 
344     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
345     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
346     * comparison is likely meaningless.
347     * 
348     * @return {@code TRUE} If (and only if) parameter {@code 'other'} is enclosed completely by
349     * {@code 'this'}.
350     */
351    public boolean encloses(DotPair other)
352    { return (this.start <= other.start) && (this.end >= other.end); }
353
354    /**
355     * Tests whether parameter {@code 'other'} has any overlapping {@code Vector}-indices with
356     * {@code 'this' DotPair}
357     * 
358     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
359     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
360     * comparison is likely meaningless.
361     * 
362     * @return {@code TRUE} If (and only if) parameter {@code 'other'} and {@code 'this'} have any
363     * overlap.
364     */
365    public boolean overlaps(DotPair other)
366    {
367        return
368            ((this.start >= other.start)    && (this.start <= other.end)) ||
369            ((this.end >= other.start)      && (this.end <= other.end));
370    }
371
372    /**
373     * Tests whether {@code 'this'} lays, <I>completely</I>, before {@code DotPair} parameter
374     * {@code 'other'}.
375     * 
376     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
377     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
378     * comparison is likely meaningless.
379     * 
380     * @return {@code TRUE} if <I>every index</I> of {@code 'this'} has a value that is less than
381     * every index of {@code 'other'}
382     */
383    public boolean isBefore(DotPair other)
384    { return this.end < other.start; }
385
386    /**
387     * Tests whether {@code 'this'} begins before {@code DotPair} parameter {@code 'other'}.
388     * 
389     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
390     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
391     * comparison is likely meaningless.
392     * 
393     * @return {@code TRUE} if {@code this.start} is less than {@code other.start}, and
394     * {@code FALSE} otherwise.
395     */
396    public boolean startsBefore(DotPair other)
397    { return this.start < other.start; }
398
399    /**
400     * Tests whether {@code 'this'} lays, <I>completely</I>, after {@code DotPair} parameter
401     * {@code 'other'}.
402     * 
403     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
404     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
405     * comparison is likely meaningless.
406     * 
407     * @return {@code TRUE} if <I>every index</I> of {@code 'this'} has a value that is greater
408     * than every index of {@code 'other'}
409     */
410    public boolean isAfter(DotPair other)
411    { return this.start > other.end; }
412
413    /**
414     * Tests whether {@code 'this'} ends after {@code DotPair} parameter {@code 'other'}.
415     * 
416     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
417     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
418     * comparison is likely meaningless.
419     * 
420     * @return {@code TRUE} if {@code this.end} is greater than {@code other.end}, and
421     * {@code FALSE} otherwise.
422     */
423    public boolean endsAfter(DotPair other)
424    { return this.end > other.end; }
425
426
427
428
429    // ********************************************************************************************
430    // ********************************************************************************************
431    // Exception Check
432    // ********************************************************************************************
433    // ********************************************************************************************
434
435
436    /**
437     * A method that will do a fast check that {@code 'this'} intance holds index-pointers to
438     * an opening and closing HTML-Tag pair.  Note, though these mistakes may seem trivial, when
439     * parsing Internet Web-Pages, these are exactly the type of basic mistakes that users will
440     * make when their level of 'concentration' is low.  This is no different that checking an
441     * array-index or {@code String}-index for an {@code IndexOutOfBoundsException}.
442     * 
443     * <BR /><BR />This type of detailed exception message can make analyzing web-pages more
444     * direct and less error-prone.  The 'cost' incurred includes only a few {@code if}-statement
445     * comparisons, and <I>this check should be performed immediatley <B>before a loop is
446     * entered.</B></I>
447     * 
448     * @param page Any web-page, or sub-page.  It needs to be the page from whence {@code 'this'}
449     * instance of {@code DotPair} was retrieved.
450     * 
451     * @throws TagNodeExpectedException If {@code 'this'} instance' {@link #start} or {@link #end}
452     * fields do not point to {@code TagNode} elements on the {@code 'page'}.
453     * 
454     * @throws HTMLTokException If {@link #start} or {@link #end} do not point to a {@code TagNode}
455     * whose {@link TagNode#tok} field equals the {@code String} contained by parameter
456     * {@code 'token'}.
457     * 
458     * @throws OpeningTagNodeExpectedException If {@link #start} does not point to an opening
459     * {@code TagNode}.
460     * 
461     * @throws ClosingTagNodeExpectedException If {@link #end} does not point to a closing
462     * {@code TagNode}.
463     * 
464     * @throws NullPointerException If the {@code 'page'} parameter is null.
465     * 
466     * @throws ExceptionCheckError <B STYLE='color:red;'>IMPORTANT</B> Since this method is,
467     * indubuitably, a method for performing error checking, the presumption is that the programmer
468     * is trying to check for <I>his users input</I>.  If in the processes of checking for user
469     * error, another mistake is made that would generate an exception, this must thought of as a
470     * more serious error.
471     * 
472     * <BR /><BR />The purpose of the {@code 'possibleTokens'} array is to check that those tokens
473     * match the tokens that are contained by the {@code TagNode's} on the page at index 
474     * {@code this.start}, and {@code this.end}.  If invalid HTML tokens, null tokens, or even
475     * HTML Singleton tokens are passed <B>this exception-check, itself, is flawed!</B>  If there
476     * are problems with this var-args array, this error is thrown.
477     * 
478     * <BR /><BR />It is more serious because it indicates that the programmer has made a mistake
479     * in attempting to check for user-errors.
480     */
481    public void exceptionCheck(Vector<HTMLNode> page, String... possibleTokens)
482    {
483        if (page == null) throw new NullPointerException
484            ("HTML-Vector parameter was passed a null reference.");
485
486        if (possibleTokens == null) throw new ExceptionCheckError
487            ("HTML tags string-list was passed a null reference.");
488
489        for (String token : possibleTokens)
490        {
491            if (token == null) throw new ExceptionCheckError
492                ("One of the HTML Tag's in the tag-list String-array was null.");
493
494            if (! HTMLTags.isTag(token)) throw new ExceptionCheckError
495                ("One of the passed tokens [" + token +"] is not a valid HTML token.");
496
497            if (HTMLTags.isSingleton(token)) throw new ExceptionCheckError
498                ("One of the passed tokens [" + token +"] is an HTML Singleton.");
499        }
500
501
502        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
503        // Check the DotPair.start
504        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
505
506        if (this.start >= page.size()) throw new IndexOutOfBoundsException(
507            "DotPair's 'start' field [" + this.start + "], is greater than or equal to the " +
508            "size of the HTML-Vector [" + page.size() + "]."
509        );
510
511        if (! (page.elementAt(this.start) instanceof TagNode))
512            throw new TagNodeExpectedException(this.start);
513
514        TagNode t1 = (TagNode) page.elementAt(this.start);
515
516        if (t1.isClosing) throw new OpeningTagNodeExpectedException(
517            "The TagNode at index [" + this.start + "] was a closing " +
518            "</" + t1.tok.toUpperCase() + ">, but an opening tag was expected here."
519        );
520
521
522        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
523        // Now Check the DotPair.end
524        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
525
526        if (this.end >= page.size()) throw new IndexOutOfBoundsException(
527            "DotPair's 'end' field [" + this.end + "], is greater than or equal to the " +
528            "size of the HTML-Vector [" + page.size() + "]."
529        );
530
531        if (! (page.elementAt(this.end) instanceof TagNode))
532            throw new TagNodeExpectedException(this.end);
533
534        TagNode t2 = (TagNode) page.elementAt(this.end);
535
536        if (! t2.isClosing) throw new ClosingTagNodeExpectedException(
537            "The TagNode at index [" + this.start + "] was an opening " +
538            "<" + t2.tok.toUpperCase() + ">, but a closing tag was expected here."
539        );
540
541
542        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
543        // Token Check
544        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
545
546        if (! t1.tok.equalsIgnoreCase(t2.tok)) throw new HTMLTokException(
547            "The opening TagNode was the [" + t1.tok.toLowerCase() + "] HTML Tag, while the " +
548            "closing Tag was the [" + t2.tok.toLowerCase() + "].  These two tag's must be an " +
549            "opening and closing pair, and therefore must match each-other."
550        );
551
552        for (String possibleToken : possibleTokens)
553            if (possibleToken.equalsIgnoreCase(t1.tok))
554                return;
555
556        String t = t1.tok.toUpperCase();
557
558        throw new HTMLTokException(
559            "The opening and closing tags were: <" + t + ">, and </" + t + ">, but " +
560            "unfortunately this Tag is not included among the list of expected tags:\n" +
561            "    [" + StrCSV.toCSV(possibleTokens, false, false, 60) + "]."
562        );
563    }
564
565    /**
566     * Performs an exception check, using {@code 'this'} instance of {@code DotPair}, and throws
567     * an {@code IndexOutOfBoundsException} if {@code 'this'} contains end-points that do not fit
568     * inside the {@code 'page'} Vector Parameter.
569     * 
570     * @param page Any HTML Page, or subpage.  {@code page.size()} must return a value that is
571     * larger than <B STYLE='color: red;'>BOTH</B> {@link #start}
572     * <B STYLE='color:red;'>AND</B> {@link #end}.
573     * 
574     * @throws IndexOutOfBoundsException A value for {@link #start} or {@link #end} which
575     * are larger than the size of the {@code Vector} parameter {@code 'page'} will cause this
576     * exception throw.
577     */
578    public void exceptionCheck(Vector<HTMLNode> page)
579    {
580        if (this.end >= page.size()) throw new IndexOutOfBoundsException(
581            "The value of this.end [" + this.end + "] is greater than the size of Vector " +
582            "parameter 'page' [" + page.size() + "]"
583        );
584
585        // This is actually unnecessary.  If 'end' is fine, then 'start' must be fine.  If 'end' is
586        // out of bounds, then it is irrelevant whether 'start' is out of bounds.  "They" play with
587        // your brain when you are coding.
588
589        /*
590        if (this.start >= page.size()) throw new IndexOutOfBoundsException(
591            "The value of this.start [" + this.start + "] is greater than the size of Vector " +
592            "parameter 'page' [" + page.size() + "]"
593        );
594        */
595    }
596}