001package Torello.HTML; 002 003import Torello.Java.NException; 004 005import java.util.*; 006 007import java.util.stream.IntStream; 008 009public class DPUtil 010{ 011 // ******************************************************************************************** 012 // ******************************************************************************************** 013 // To Vector & Sublist 014 // ******************************************************************************************** 015 // ******************************************************************************************** 016 017 018 /** 019 * This method converts a sublist, represented by a "dotted pair", and converts it into a 020 * {@code Vector} of {@code HTMLNode}. 021 * 022 * <BR /><BR /><B CLASS=JDDescLabel>Retrieval Heuristic:</B> 023 * 024 * <BR />It should be obvious that the parameter @code 'dp'} contains fields named 025 * {@link DotPair#start} and {@link DotPair#end}. These two simply represent the starting and 026 * ending indices into an HTML page {@code Vector}. In this method, the intention is that they 027 * are indices into the HTML-{@code Vector} parameter simply-named {@code 'html'}. 028 * 029 * <BR /><BR />This method cycles through that {@code Vector}, beginning with the field 030 * {@code dp.start} (inclusive) and ending with {@code dp.end} (inclusive, as well). Each 031 * {@code HTMLNode} reference within this sublist is inserted into the {@code Vector} that is 032 * ultimately returned. 033 * 034 * @param html Any Vectorized-HTML Web-Page, or sub-page 035 * 036 * @param dp Any sublist within that HTML page. 037 * 038 * @return A {@code Vector} version of the original sublist that was represented by 039 * passed parameter {@code 'dp'} 040 */ 041 public static Vector<HTMLNode> toVector(Vector<? extends HTMLNode> html, DotPair dp) 042 { 043 Vector<HTMLNode> ret = new Vector<>(); 044 045 046 // NOTE: Cannot return the result of "subList" because it is linked/mapped to the original 047 // Vector. If changes are made to "subList", those changes will be reflected in the 048 // original HTML Vector. 049 050 ret.addAll(html.subList(dp.start, dp.end + 1)); 051 052 return ret; 053 } 054 055 /** 056 * This will cycle through a "list of sublists" and call the method 057 * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp)} 058 * on each sublist in the input parameter {@code 'sublists'} Those sublists will be collected 059 * into another {@code Vector} and returned. 060 * 061 * @param html Any Vectorized-HTML Web-Page, or sub-page 062 * @param sublists A "List of sublists" within that HTML page. 063 * @return This method shall return a {@code Vector} containing vectors as sublists. 064 */ 065 public static Vector<Vector<HTMLNode>> toVectors 066 (Vector<? extends HTMLNode> html, Iterable<DotPair> sublists) 067 { 068 Vector<Vector<HTMLNode>> ret = new Vector<>(); 069 070 for (DotPair sublist : sublists) ret.addElement(toVector(html, sublist)); 071 072 return ret; 073 } 074 075 /** 076 * This will cycle through a "list of sublists" and call the method 077 * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp) } 078 * on each sublist in the input parameter {@code 'sublists'}. Those sublists will be 079 * collected into another {@code Vector} and returned. 080 * 081 * @param html Any Vectorized-HTML Web-Page, or sub-page 082 * @param sublists A "List of sublists" within that HTML page. 083 * @return This method shall return a {@code Vector} containing vectors as sublists. 084 */ 085 public static Vector<SubSection> toSubSections 086 (Vector<? extends HTMLNode> html, Iterable<DotPair> sublists) 087 { 088 Vector<SubSection> ret = new Vector<>(); 089 090 for (DotPair sublist : sublists) 091 ret.addElement(new SubSection(sublist, toVector(html, sublist))); 092 093 return ret; 094 } 095 096 097 // ******************************************************************************************** 098 // ******************************************************************************************** 099 // List / Iterable of DotPair's to a Stream, Iterator, Array 100 // ******************************************************************************************** 101 // ******************************************************************************************** 102 103 104 /** 105 * Convenience Method. 106 * <BR />Invokes: {@link #toStream(Iterable, boolean)} 107 * <BR />Converts output to an {@code Iterator} 108 */ 109 public static PrimitiveIterator.OfInt iterator(Iterable<DotPair> dpi, boolean leastToGreatest) 110 { return toStream(dpi, leastToGreatest).iterator(); } 111 112 /** 113 * Convenience Method. 114 * <BR />Invokes: {@link #toStream(Iterable, boolean)} 115 * <BR />Converts output to an {@code int[]} array. 116 */ 117 public static int[] toPosArray(Iterable<DotPair> dpi, boolean leastToGreatest) 118 { return toStream(dpi, leastToGreatest).toArray(); } 119 120 /** 121 * This method will convert a list of {@code DotPair} instances to a Java 122 * {@code java.util.stream.IntStream}. The generated {@code IntStream} shall contain all 123 * {@code Vector}-indices (integers) that are within the bounds of any of the {@code DotPair's} 124 * listed by parameter {@code 'dpi'}. 125 * 126 * <BR /><BR />Stating this a second time, the returned position-index list (which is of Java 127 * Type {@code IntStream}) is built out of the contents of each and every one of the the 128 * {@code DotPair's} in {@code Iterable}-parameter {@code 'dpi'}. This index-list which is 129 * ultimately returned from this method <I>will contain all indices that are "inside" all 130 * {@coded DotPair's}</I>. 131 * 132 * <BR /><BR /><B CLASS=JDDescLabel>Repeated Indices & Gaps:</B> 133 * 134 * <BR />This method will never include an index more than once, and all integers in the 135 * returned {@code IntStream} will be unique. 136 * 137 * <BR /><BR />The sublists (the {@code DotPair's} of input-parameter {@code 'dpi'}) may 138 * possibly (or even 'likely') overlap each-other. Furthermore, other {@code DotPair} / 139 * sublists may have several gaps between them. This method shall return an {@code IntStream} 140 * of unique, integer indices all of which are guaranteed to be 141 * <B STYLE='color: red;'><I>inside</I></B> at least one of {@code dpi's} sublists. 142 * 143 * <BR /><BR /><B CLASS=JDDescLabel>NodeSearch Find 'all' Methods:</B> 144 * 145 * <BR />Many of the "Find" Methods available in the {@code Torello.HTML.NodeSearch} package 146 * return instances of {@code Vector<DotPair>}. These {@code DotPair} lists are to be 147 * thought-of as "lists of sub-lists" for a Vectorized-HTML web-page. This method can help 148 * identify each and every integer-index that is <I>inside at least one of these returned 149 * sublists</I>. 150 * 151 * <BR /><BR /><B CLASS=JDDescLabel>Stale Data Reminder:</B> 152 * 153 * <BR />Try to keep in mind, always, that when writing code that modifies Vectorized-HTML, 154 * <I>the moment any node is inserted or deleted into a {@code Vector}</I> all references / 155 * indices to that exact {@code Vector} will become invalid (unless special care is taken to 156 * update those indices by the number of nodes that were inserted or removed!) 157 * 158 * <BR /><BR />There are myriad ways to handle this issue, many of which are beyond the scope 159 * of this Documentation Entry. Generally, one of the better suggestions to keep in mind, is 160 * that if you are modifying a Vectorized-HTML page, perform your updates or removals <I>in 161 * reverse order</I>, and your {@code Vector} index-list pointers <I>will never</I> become 162 * stale pointers. 163 * 164 * <BR /><BR />The interface {@link Replaceable} is also another way to avoid making elementary 165 * mistakes involving stale {@code Vector}-indices. 166 * 167 * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements 168 * the public interface {@code java.lang.Iterable}. 169 * 170 * @param leastToGreatest When this parameter receives a {@code TRUE} value, the results that 171 * are returned from this {@code IntStream} will be sorted least to greatest. To generated an 172 * {@code IntStream} that produces results that are sorted from greatest to least, pass 173 * {@code FALSE} to this parameter. 174 * 175 * @return A java {@code java.util.stream.IntStream} of the integers in that are members of 176 * this {@code Iterable<DotPair>} 177 */ 178 public static IntStream toStream(Iterable<DotPair> dpi, boolean leastToGreatest) 179 { 180 Iterator<DotPair> iter = dpi.iterator(); 181 TreeSet<DotPair> ts = new TreeSet<>(); 182 183 184 // The tree-set will add the "DotPair" to the tree - and keep them sorted, 185 // since that's what "TreeSet" does. 186 187 while (iter.hasNext()) ts.add(iter.next()); 188 189 Iterator<DotPair> tsIter = leastToGreatest ? ts.iterator() : ts.descendingIterator(); 190 IntStream.Builder builder = IntStream.builder(); 191 DotPair dp = null; 192 193 if (leastToGreatest) 194 195 // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE! 196 while (tsIter.hasNext()) 197 for (int i=(dp=tsIter.next()).start; i <= dp.end; i++) 198 builder.add(i); 199 200 else 201 202 203 // we are building a "reverse-index" stream... Make sure to add the sub-lists in 204 // reverse-order. 205 206 while (tsIter.hasNext()) 207 for (int i=(dp=tsIter.next()).end; i >= dp.start; i--) 208 builder.add(i); 209 210 if (leastToGreatest) 211 212 213 // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point... 214 // MULTIPLE, OVERLAPPING DOTPAIRS 215 // We need to sort because the DotPair sublists have been added in "sorted order" but 216 // the overall list is not (necessarily, but possibly) sorted! 217 218 return builder.build().sorted().distinct(); 219 220 else 221 222 // Here, the exact same argument holds, but also, when "re-sorting" we have to futz 223 // around with the fact that Java's 'IntStream' class does not have a specialized 224 // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug). 225 226 return builder.build().map(i -> -i).sorted().map(i -> -i).distinct(); 227 } 228 229 230 // ******************************************************************************************** 231 // ******************************************************************************************** 232 // List / Iterable of DotPair's into an "End-Poinnts" Stream, Iterator, Array 233 // ******************************************************************************************** 234 // ******************************************************************************************** 235 236 237 /** 238 * Convenience Method. 239 * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)} 240 * <BR />Converts: output to an {@code Iterator} 241 */ 242 public static PrimitiveIterator.OfInt endPointsIterator 243 (Iterable<DotPair> dpi, boolean leastToGreatest) 244 { return endPointsToStream(dpi, leastToGreatest).iterator(); } 245 246 /** 247 * Convenience Method. 248 * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)} 249 * <BR />Converts: output to an {@code int[]} array. 250 */ 251 public static int[] endPointsToPosArray(Iterable<DotPair> dpi, boolean leastToGreatest) 252 { return endPointsToStream(dpi, leastToGreatest).toArray(); } 253 254 /** 255 * Collates a list of dotted-pairs into an {@code IntStream}. Here, only the starting and 256 * ending values of the {@code DotPair's} are inserted into the returned {@code IntStream}. 257 * Any indices that lay between {@code DotPair.start} and {@code DotPair.end} <I>are not 258 * placed</I> into the output-{@code IntStream}. 259 * 260 * <BR /><BR />All other behaviors of this method are the same as 261 * {@link #toStream(Iterable, boolean)}. 262 * 263 * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements 264 * the public interface {@code java.lang.Iterable}. 265 * 266 * @param leastToGreatest When this parameter receives a {@code TRUE} value, the results that 267 * are returned from this {@code IntStream} will be sorted least to greatest. To generated an 268 * {@code IntStream} that produces results that are sorted from greatest to least, pass 269 * {@code FALSE} to this parameter. 270 * 271 * @return A java {@code java.util.stream.IntStream} of the integers in that are members of 272 * this {@code Iterable<DotPair>}. Only the values {@code DotPair.start}, and 273 * {@code DotPair.end} are included in the output-{@code IntStream}. This is unlike the method 274 * {@link #toStream(Iterable, boolean)} in that, here, only the starting and ending points of 275 * the dotted-pair are placed into result. In the other method, the start-index, end-index 276 * <I>and all indices in between them</I> are placed into the returned-{@code Stream}. 277 * 278 * @see #toStream(Iterable, boolean) 279 */ 280 public static IntStream endPointsToStream(Iterable<DotPair> dpi, boolean leastToGreatest) 281 { 282 Iterator<DotPair> iter = dpi.iterator(); 283 TreeSet<DotPair> ts = new TreeSet<>(); 284 285 286 // The tree-set will add the "DotPair" to the tree - and keep them sorted, 287 // since that's what "TreeSet" does. 288 289 while (iter.hasNext()) ts.add(iter.next()); 290 291 Iterator<DotPair> tsIter = leastToGreatest ? ts.iterator() : ts.descendingIterator(); 292 IntStream.Builder builder = IntStream.builder(); 293 DotPair dp = null; 294 295 if (leastToGreatest) 296 297 // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE! 298 while (tsIter.hasNext()) 299 { 300 dp = tsIter.next(); 301 302 // In this method, only the start/end are placed into the IntStream 303 builder.add(dp.start); 304 305 // The indices BETWEEN start/end ARE NOT appened to the IntStream 306 builder.add(dp.end); 307 } 308 309 else 310 311 312 // we are building a "reverse-index" stream... Make sure to add the sub-lists in 313 // reverse-order. 314 315 while (tsIter.hasNext()) 316 { 317 dp = tsIter.next(); 318 319 // Only start/end are appended. 320 builder.add(dp.end); 321 322 // NOTE: This is a "reverse order" IntStream 323 builder.add(dp.start); 324 } 325 326 if (leastToGreatest) 327 328 329 // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point... 330 // MULTIPLE, OVERLAPPING DOTPAIRS 331 // We need to sort because the DotPair sublists have been added in "sorted order" but 332 // the overall list is not (necessarily, but possibly) sorted! 333 334 return builder.build().sorted().distinct(); 335 336 else 337 338 // Here, the exact same argument holds, but also, when "re-sorting" we have to futz 339 // around with the fact that Java's 'IntStream' class does not have a specialized 340 // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug). 341 342 return builder.build().map(i -> -i).sorted().map(i -> -i).distinct(); 343 } 344 345 346 // ******************************************************************************************** 347 // ******************************************************************************************** 348 // Mirror-Inverse Collated Stream, Iterator, Array 349 // ******************************************************************************************** 350 // ******************************************************************************************** 351 352 353 /** 354 * Convenience Method. 355 * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)} 356 * <BR />Converts: output to an {@code Iterator} 357 */ 358 public static PrimitiveIterator.OfInt excludedToIterator 359 (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest) 360 { return excludedToStream(dpi, vectorSize, leastToGreatest).iterator(); } 361 362 /** 363 * Convenience Method. 364 * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)} 365 * <BR />Converts: output to an {@code int[]} array. 366 */ 367 public static int[] excludedToPosArray 368 (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest) 369 { return excludedToStream(dpi, vectorSize, leastToGreatest).toArray(); } 370 371 /** 372 * This method will first collate and sort a list of input {@code 'DotPair'} instances. 373 * 374 * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements 375 * the public interface {@code java.lang.Iterable}. 376 * 377 * @param vectorSize This method internal-loop will begin at index {@code '0'} and proceed 378 * until {@code 'vectorSize' - 1}. Any value in this range that is found not be inside any of 379 * the provided {@code DotPair's} will be included inside the returned {@code IntStream}. 380 * 381 * @param leastToGreatest When this parameter receives a {@code TRUE} value, the results that 382 * are returned from this {@code IntStream} will be sorted least to greatest. To generated an 383 * {@code IntStream} that produces results that are sorted from greatest to least, pass 384 * {@code FALSE} to this parameter. 385 * 386 * @return The list of ints 387 */ 388 public static IntStream excludedToStream 389 (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest) 390 { 391 Iterator<DotPair> iter = dpi.iterator(); 392 TreeSet<DotPair> ts = new TreeSet<>(); 393 394 if (vectorSize < 1) throw new NException( 395 "You have passed " + vectorSize + " to parameter vectorSize, but this value must be " + 396 "at least 1, or greater." 397 ); 398 399 // All this is going to do is MAKE SURE that the DotPair's are ordered, least to greatest 400 while (iter.hasNext()) ts.add(iter.next()); 401 402 iter = leastToGreatest ? ts.iterator() : ts.descendingIterator(); 403 404 DotPair dp = iter.hasNext() ? iter.next() : null; 405 IntStream.Builder b = IntStream.builder(); 406 407 if (leastToGreatest) 408 409 for (int i=0; i < vectorSize;) 410 411 if (dp == null) b.accept(i++); 412 else if (i < dp.start) b.accept(i++); 413 else if (dp.isInside(i)) { i++; continue; } 414 else if (iter.hasNext()) Objects.requireNonNull(dp = iter.next()); 415 else dp = null; 416 417 else 418 419 for (int i=(vectorSize-1); i >= 0;) 420 421 if (dp == null) b.accept(i--); 422 else if (i < dp.start) b.accept(i--); 423 else if (dp.isInside(i)) { i--; continue; } 424 else if (iter.hasNext()) Objects.requireNonNull(dp = iter.next()); 425 else dp = null; 426 427 return b.build(); 428 } 429}