001package Torello.HTML;
002
003import Torello.Java.NException;
004
005import java.util.*;
006
007import java.util.stream.IntStream;
008
009public class DPUtil
010{
011    // ********************************************************************************************
012    // ********************************************************************************************
013    // To Vector & Sublist
014    // ********************************************************************************************
015    // ********************************************************************************************
016
017
018    /**
019     * This method converts a sublist, represented by a "dotted pair", and converts it into a
020     * {@code Vector} of {@code HTMLNode}.
021     * 
022     * <BR /><BR /><B CLASS=JDDescLabel>Retrieval Heuristic:</B>
023     * 
024     * <BR />It should be obvious that the parameter @code 'dp'} contains fields named
025     * {@link DotPair#start} and {@link DotPair#end}.  These two simply represent the starting and
026     * ending indices into an HTML page {@code Vector}.  In this method, the intention is that they
027     * are indices into the HTML-{@code Vector} parameter simply-named {@code 'html'}.
028     * 
029     * <BR /><BR />This method cycles through that {@code Vector}, beginning with the field
030     * {@code dp.start} (inclusive) and ending with {@code dp.end} (inclusive, as well).  Each
031     * {@code HTMLNode} reference within this sublist is inserted into the {@code Vector} that is 
032     * ultimately returned.
033     * 
034     * @param html Any Vectorized-HTML Web-Page, or sub-page
035     * 
036     * @param dp Any sublist within that HTML page.
037     * 
038     * @return A {@code Vector} version of the original sublist that was represented by 
039     * passed parameter {@code 'dp'}
040     */
041    public static Vector<HTMLNode> toVector(Vector<? extends HTMLNode> html, DotPair dp)
042    {
043        Vector<HTMLNode> ret = new Vector<>();
044
045
046        // NOTE: Cannot return the result of "subList" because it is linked/mapped to the original
047        //       Vector.  If changes are made to "subList", those changes will be reflected in the
048        //       original HTML Vector.
049
050        ret.addAll(html.subList(dp.start, dp.end + 1));
051
052        return ret;
053    }
054
055    /**
056     * This will cycle through a "list of sublists" and call the method
057     * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp)}
058     * on each sublist in the input parameter {@code 'sublists'}  Those sublists will be collected
059     * into another {@code Vector} and returned.
060     * 
061     * @param html Any Vectorized-HTML Web-Page, or sub-page
062     * @param sublists A "List of sublists" within that HTML page.
063     * @return This method shall return a {@code Vector} containing vectors as sublists.
064     */
065    public static Vector<Vector<HTMLNode>> toVectors
066        (Vector<? extends HTMLNode> html, Iterable<DotPair> sublists)
067    {
068        Vector<Vector<HTMLNode>> ret = new Vector<>();
069
070        for (DotPair sublist : sublists) ret.addElement(toVector(html, sublist));
071
072        return ret;
073    }
074
075    /**
076     * This will cycle through a "list of sublists" and call the method
077     * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp) }
078     * on each sublist in the input parameter {@code 'sublists'}.  Those sublists will be
079     * collected into another {@code Vector} and returned.
080     * 
081     * @param html Any Vectorized-HTML Web-Page, or sub-page
082     * @param sublists A "List of sublists" within that HTML page.
083     * @return This method shall return a {@code Vector} containing vectors as sublists.
084     */
085    public static Vector<SubSection> toSubSections
086        (Vector<? extends HTMLNode> html, Iterable<DotPair> sublists)
087    {
088        Vector<SubSection> ret = new Vector<>();
089
090        for (DotPair sublist : sublists)
091            ret.addElement(new SubSection(sublist, toVector(html, sublist)));
092
093        return ret;
094    }
095
096
097    // ********************************************************************************************
098    // ********************************************************************************************
099    // List / Iterable of DotPair's to a Stream, Iterator, Array
100    // ********************************************************************************************
101    // ********************************************************************************************
102
103
104    /**
105     * Convenience Method.
106     * <BR />Invokes: {@link #toStream(Iterable, boolean)}
107     * <BR />Converts output to an {@code Iterator}
108     */
109    public static PrimitiveIterator.OfInt iterator(Iterable<DotPair> dpi, boolean leastToGreatest)
110    { return toStream(dpi, leastToGreatest).iterator(); }
111
112    /**
113     * Convenience Method.
114     * <BR />Invokes: {@link #toStream(Iterable, boolean)}
115     * <BR />Converts output to an {@code int[]} array.
116     */
117    public static int[] toPosArray(Iterable<DotPair> dpi, boolean leastToGreatest)
118    { return toStream(dpi, leastToGreatest).toArray(); }
119
120    /**
121     * This method will convert a list of {@code DotPair} instances to a Java
122     * {@code java.util.stream.IntStream}.  The generated {@code IntStream} shall contain all
123     * {@code Vector}-indices (integers) that are within the bounds of any of the {@code DotPair's}
124     * listed by parameter {@code 'dpi'}.
125     * 
126     * <BR /><BR />Stating this a second time, the returned position-index list (which is of Java
127     * Type {@code IntStream}) is built out of the contents of each and every one of the the
128     * {@code DotPair's} in {@code Iterable}-parameter {@code 'dpi'}.  This index-list which is
129     * ultimately returned from this method <I>will contain all indices that are "inside" all
130     * {@coded DotPair's}</I>.
131     * 
132     * <BR /><BR /><B CLASS=JDDescLabel>Repeated Indices &amp; Gaps:</B>
133     * 
134     * <BR />This method will never include an index more than once, and all integers in the
135     * returned {@code IntStream} will be unique.
136     *
137     * <BR /><BR />The sublists (the {@code DotPair's} of input-parameter {@code 'dpi'}) may
138     * possibly (or even 'likely') overlap each-other.  Furthermore, other {@code DotPair} /
139     * sublists may have several gaps between them.  This method shall return an {@code IntStream}
140     * of unique, integer indices all of which are guaranteed to be
141     * <B STYLE='color: red;'><I>inside</I></B> at least one of {@code dpi's} sublists.
142     * 
143     * <BR /><BR /><B CLASS=JDDescLabel>NodeSearch Find 'all' Methods:</B>
144     * 
145     * <BR />Many of the "Find" Methods available in the {@code Torello.HTML.NodeSearch} package
146     * return instances of {@code Vector<DotPair>}.  These {@code DotPair} lists are to be
147     * thought-of as "lists of sub-lists" for a Vectorized-HTML web-page.  This method can help
148     * identify each and every integer-index that is <I>inside at least one of these returned
149     * sublists</I>.
150     *
151     * <BR /><BR /><B CLASS=JDDescLabel>Stale Data Reminder:</B>
152     * 
153     * <BR />Try to keep in mind, always, that when writing code that modifies Vectorized-HTML,
154     * <I>the moment any node is inserted or deleted into a {@code Vector}</I> all references /
155     * indices to that exact {@code Vector} will become invalid (unless special care is taken to
156     * update those indices by the number of nodes that were inserted or removed!)
157     * 
158     * <BR /><BR />There are myriad ways to handle this issue, many of which are beyond the scope
159     * of this Documentation Entry.  Generally, one of the better suggestions to keep in mind, is
160     * that if you are modifying a Vectorized-HTML page, perform your updates or removals <I>in
161     * reverse order</I>, and your {@code Vector} index-list pointers <I>will never</I> become
162     * stale pointers.
163     * 
164     * <BR /><BR />The interface {@link Replaceable} is also another way to avoid making elementary
165     * mistakes involving stale {@code Vector}-indices.
166     *
167     * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements
168     * the public interface {@code java.lang.Iterable}.
169     * 
170     * @param leastToGreatest  When this parameter receives a {@code TRUE} value, the results that
171     * are returned from this {@code IntStream} will be sorted least to greatest.  To generated an
172     * {@code IntStream} that produces results that are sorted from greatest to least, pass
173     * {@code FALSE} to this parameter.
174     * 
175     * @return A java {@code java.util.stream.IntStream} of the integers in that are members of
176     * this {@code Iterable<DotPair>}
177     */
178    public static IntStream toStream(Iterable<DotPair> dpi, boolean leastToGreatest)
179    {
180        Iterator<DotPair>   iter    = dpi.iterator();
181        TreeSet<DotPair>    ts      = new TreeSet<>();
182
183
184        // The tree-set will add the "DotPair" to the tree - and keep them sorted,
185        // since that's what "TreeSet" does.
186
187        while (iter.hasNext()) ts.add(iter.next());
188
189        Iterator<DotPair>   tsIter  = leastToGreatest ? ts.iterator() : ts.descendingIterator();
190        IntStream.Builder   builder = IntStream.builder();
191        DotPair             dp      = null;
192
193        if (leastToGreatest)
194
195            // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE!
196            while (tsIter.hasNext())
197                for (int i=(dp=tsIter.next()).start; i <= dp.end; i++)
198                    builder.add(i);
199
200        else
201
202
203            // we are building a "reverse-index" stream... Make sure to add the sub-lists in
204            // reverse-order.
205
206            while (tsIter.hasNext())
207                for (int i=(dp=tsIter.next()).end; i >= dp.start; i--)
208                    builder.add(i);
209
210        if (leastToGreatest)
211
212
213            // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point... 
214            // MULTIPLE, OVERLAPPING DOTPAIRS
215            // We need to sort because the DotPair sublists have been added in "sorted order" but
216            // the overall list is not (necessarily, but possibly) sorted!
217
218            return builder.build().sorted().distinct();
219
220        else
221
222            // Here, the exact same argument holds, but also, when "re-sorting" we have to futz
223            // around with the fact that Java's 'IntStream' class does not have a specialized
224            // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug).
225
226            return builder.build().map(i -> -i).sorted().map(i -> -i).distinct();
227    }
228
229
230    // ********************************************************************************************
231    // ********************************************************************************************
232    // List / Iterable of DotPair's  into an "End-Poinnts" Stream, Iterator, Array
233    // ********************************************************************************************
234    // ********************************************************************************************
235
236
237    /**
238     * Convenience Method.
239     * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)}
240     * <BR />Converts: output to an {@code Iterator}
241     */
242    public static PrimitiveIterator.OfInt endPointsIterator
243        (Iterable<DotPair> dpi, boolean leastToGreatest)
244    { return endPointsToStream(dpi, leastToGreatest).iterator(); }
245
246    /**
247     * Convenience Method.
248     * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)}
249     * <BR />Converts: output to an {@code int[]} array.
250     */
251    public static int[] endPointsToPosArray(Iterable<DotPair> dpi, boolean leastToGreatest)
252    { return endPointsToStream(dpi, leastToGreatest).toArray(); }
253
254    /**
255     * Collates a list of dotted-pairs into an {@code IntStream}.  Here, only the starting and
256     * ending values of the {@code DotPair's} are inserted into the returned {@code IntStream}.
257     * Any indices that lay between {@code DotPair.start} and {@code DotPair.end} <I>are not
258     * placed</I> into the output-{@code IntStream}.
259     * 
260     * <BR /><BR />All other behaviors of this method are the same as
261     * {@link #toStream(Iterable, boolean)}.
262     * 
263     * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements
264     * the public interface {@code java.lang.Iterable}.
265     * 
266     * @param leastToGreatest  When this parameter receives a {@code TRUE} value, the results that
267     * are returned from this {@code IntStream} will be sorted least to greatest.  To generated an
268     * {@code IntStream} that produces results that are sorted from greatest to least, pass
269     * {@code FALSE} to this parameter.
270     * 
271     * @return A java {@code java.util.stream.IntStream} of the integers in that are members of
272     * this {@code Iterable<DotPair>}.  Only the values {@code DotPair.start}, and
273     * {@code DotPair.end} are included in the output-{@code IntStream}.  This is unlike the method
274     * {@link #toStream(Iterable, boolean)} in that, here, only the starting and ending points of
275     * the dotted-pair are placed into result.  In the other method, the start-index, end-index 
276     * <I>and all indices in between them</I> are placed into the returned-{@code Stream}.
277     * 
278     * @see #toStream(Iterable, boolean)
279     */
280    public static IntStream endPointsToStream(Iterable<DotPair> dpi, boolean leastToGreatest)
281    {
282        Iterator<DotPair>   iter    = dpi.iterator();
283        TreeSet<DotPair>    ts      = new TreeSet<>();
284
285
286        // The tree-set will add the "DotPair" to the tree - and keep them sorted,
287        // since that's what "TreeSet" does.
288
289        while (iter.hasNext()) ts.add(iter.next());
290
291        Iterator<DotPair>   tsIter  = leastToGreatest ? ts.iterator() : ts.descendingIterator();
292        IntStream.Builder   builder = IntStream.builder();
293        DotPair             dp      = null;
294
295        if (leastToGreatest)
296
297            // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE!
298            while (tsIter.hasNext())
299            {
300                dp = tsIter.next();
301
302                // In this method, only the start/end are placed into the IntStream
303                builder.add(dp.start);
304
305                // The indices BETWEEN start/end ARE NOT appened to the IntStream
306                builder.add(dp.end);
307            }
308
309        else
310
311
312            // we are building a "reverse-index" stream... Make sure to add the sub-lists in
313            // reverse-order.
314
315            while (tsIter.hasNext())
316            {
317                dp = tsIter.next();
318
319                // Only start/end are appended.
320                builder.add(dp.end);
321
322                // NOTE: This is a "reverse order" IntStream
323                builder.add(dp.start);
324            }
325
326        if (leastToGreatest)
327
328
329            // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point...
330            // MULTIPLE, OVERLAPPING DOTPAIRS
331            // We need to sort because the DotPair sublists have been added in "sorted order" but
332            // the overall list is not (necessarily, but possibly) sorted!
333
334            return builder.build().sorted().distinct();
335
336        else
337
338            // Here, the exact same argument holds, but also, when "re-sorting" we have to futz
339            // around with the fact that Java's 'IntStream' class does not have a specialized
340            // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug).
341
342            return builder.build().map(i -> -i).sorted().map(i -> -i).distinct();
343    }
344
345
346    // ********************************************************************************************
347    // ********************************************************************************************
348    // Mirror-Inverse Collated Stream, Iterator, Array
349    // ********************************************************************************************
350    // ********************************************************************************************
351
352
353    /**
354     * Convenience Method.
355     * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)}
356     * <BR />Converts: output to an {@code Iterator}
357     */
358    public static PrimitiveIterator.OfInt excludedToIterator
359        (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest)
360    { return excludedToStream(dpi, vectorSize, leastToGreatest).iterator(); }
361
362    /**
363     * Convenience Method.
364     * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)}
365     * <BR />Converts: output to an {@code int[]} array.
366     */
367    public static int[] excludedToPosArray
368        (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest)
369    { return excludedToStream(dpi, vectorSize, leastToGreatest).toArray(); }
370
371    /**
372     * This method will first collate and sort a list of input {@code 'DotPair'} instances.
373     * 
374     * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements
375     * the public interface {@code java.lang.Iterable}.
376     * 
377     * @param vectorSize This method internal-loop will begin at index {@code '0'} and proceed 
378     * until {@code 'vectorSize' - 1}.  Any value in this range that is found not be inside any of
379     * the provided {@code DotPair's} will be included inside the returned {@code IntStream}.
380     * 
381     * @param leastToGreatest  When this parameter receives a {@code TRUE} value, the results that
382     * are returned from this {@code IntStream} will be sorted least to greatest.  To generated an
383     * {@code IntStream} that produces results that are sorted from greatest to least, pass
384     * {@code FALSE} to this parameter.
385     * 
386     * @return The list of ints
387     */
388    public static IntStream excludedToStream
389        (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest)
390    {
391        Iterator<DotPair>   iter    = dpi.iterator();
392        TreeSet<DotPair>    ts      = new TreeSet<>();
393
394        if (vectorSize < 1) throw new NException(
395            "You have passed " + vectorSize + " to parameter vectorSize, but this value must be " +
396            "at least 1, or greater."
397        );
398
399        // All this is going to do is MAKE SURE that the DotPair's are ordered, least to greatest
400        while (iter.hasNext()) ts.add(iter.next());
401
402        iter = leastToGreatest ? ts.iterator() : ts.descendingIterator();
403
404        DotPair             dp  = iter.hasNext() ? iter.next() : null;
405        IntStream.Builder   b   = IntStream.builder();
406
407        if (leastToGreatest)
408
409            for (int i=0; i < vectorSize;)
410
411                if (dp == null)             b.accept(i++);
412                else if (i < dp.start)      b.accept(i++);
413                else if (dp.isInside(i))    { i++; continue; }
414                else if (iter.hasNext())    Objects.requireNonNull(dp = iter.next());
415                else                        dp = null;
416
417        else 
418
419            for (int i=(vectorSize-1); i >= 0;)
420
421                if (dp == null)             b.accept(i--);
422                else if (i < dp.start)      b.accept(i--);
423                else if (dp.isInside(i))    { i--; continue; }
424                else if (iter.hasNext())    Objects.requireNonNull(dp = iter.next());
425                else                        dp = null;
426
427        return b.build();
428    }
429}