001package Torello.HTML;
002
003import Torello.Java.*;
004
005import Torello.HTML.NodeSearch.InnerTagFind; // Used for an @see reference
006import Torello.HTML.NodeSearch.TagNodeFind;  // Used in getBaseURL
007
008import Torello.Java.Additional.Ret2;
009import Torello.Java.Additional.Ret3;
010
011import Torello.JavaDoc.LinkJavaSource;
012import static Torello.JavaDoc.Entity.METHOD;
013
014import java.net.URL;
015import java.net.MalformedURLException;
016
017import java.util.Vector;
018import java.util.stream.IntStream;
019
020/**
021 * Utilities for de-refrencing 'partially-completed' {@code URL's} in a Web-Page {@code Vector}.
022 * 
023 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LINKS>
024 * @see ReplaceNodes
025 * @see ReplaceFunction
026 * @see HTMLPage
027 * @see InnerTagFind
028 * @see Ret2
029 */
030@Torello.JavaDoc.StaticFunctional
031public class Links
032{
033    private Links() { }
034
035    /**
036     * List of documented "starter-strings" that are sometimes used in Anchor URL
037     * {@code 'HREF=...'} attributes.
038     * 
039     * @see #NON_URL_HREFS
040     */
041    protected static final String[] _NON_URL_HREFS =
042        { "tel:", "magnet:", "javascript:", "mailto:", "ftp:", "file:", "data:", "blog:", "#" };
043
044    /**
045     * This small method just returns the complete list of commonly found Anchor
046     * {@code 'HREF' String's} that do not actually constitute an HTML {@code 'URL'.}  This method
047     * actually returns a "clone" of an internally stored {@code String[]} Array.  This is to
048     * protect and make sure that the list of potential HTML Anchor-Tag {@code 'HREF'} Attributes
049     * is not changed, doctored or modified
050     * 
051     * @return A clone of the {@code String}-array {@code '_NON_URL_HREFS'}
052     * 
053     * @see #_NON_URL_HREFS
054     */
055    public static String[] NON_URL_HREFS()
056    { return _NON_URL_HREFS.clone(); }
057
058    /**
059     * The methods in this class <I><B>will not automatically extract</I></B> any HTML
060     * {@code <BASE HREF=URL>} definitions that are found on this page.  If the user wishes to
061     * dereference partial / relative {@code URL} definitions that exist on the input page, all the
062     * while respecting any {@code <BASE HREF=URL>} definitions found on the input page, then this
063     * method should be utilized.
064     *
065     * @param page This may be any HTML page or partial page.  If this page has a valid HTML
066     * {@code <BASE HREF=URL>}, it will be extracted and returned as an instance of
067     * {@code class URL}.
068     *
069     * @return This shall return the HTML {@code <BASE HREF="http://...">} element found available
070     * within the input-page parameter {@code 'page'}.  If the page provided does not contain a
071     * {@code BASE URL} definition, then null shall be returned.
072     *
073     * <BR /><BR /><B>NOTE:</B> The HTML Specification clearly states that only one {@code URL}
074     * may be defined using the HTML Element {@code <BASE>}.  Clearly, due to the browser wars,
075     * unspecified / non-deterministic behavior is possible if multiple definitions are provided.
076     * For the purposes of this class, if such a situation arises, an exception is thrown.
077     *
078     * @throws MalformedHTMLException If the HTML page provided contains multiple definitions of
079     * the element {@code <BASE HREF=URL>}, then this exception will throw.
080     *
081     * @throws MalformedURLException If the {@code <BASE HREF=URL>} found / identified within the
082     * input page, but that {@code URL} is invalid, then this exception shall throw.
083     * 
084     * @see TagNodeFind
085     * @see Attributes#retrieve(Vector, int[], String)
086     */
087    public static URL getBaseURL(Vector<? extends HTMLNode> page)
088        throws MalformedHTMLException, MalformedURLException
089    {
090        int[] posArr = TagNodeFind.all(page, TC.OpeningTags, "base");
091
092        if (posArr.length == 0) return null;
093
094        // NOTE: The cast is all right because 'posArr' only points to TagNode's
095        // Attributes expects to avoid processing Vector<TextNode>, and Vector<CommentNode>
096        // Above, there will be nothing in the 'posArr' if either of those was passed.
097
098        @SuppressWarnings("unchecked")
099        String[]    urls    = Attributes.retrieve((Vector<HTMLNode>) page, posArr, "href");
100
101        boolean     found   = false;
102        String      ret     = null;
103
104        for (String url : urls)
105            if ((url != null) && (url.length() > 0))
106                if (found)
107                    throw new MalformedHTMLException(
108                        "The page you have provided has multiple <BASE HREF=URL> definitions.  " +
109                        "However, the HTML Specifications state that pages may provide just one " +
110                        "definition.  If you wish to proceed, retrieve the definitions manually " +
111                        "using class TagNodeFind.all and Attributes.retrieve, as explained in " +
112                        "the JavaDoc pages for this class."
113                    );
114                else 
115                {
116                    found = true;
117                    ret = url;
118                }
119
120        return new URL(ret);                    
121    }
122
123
124    // ********************************************************************************************
125    // ********************************************************************************************
126    // Complete Vector-Resolve Methods - SRC-ATTRIBUTE
127    // ********************************************************************************************
128    // ********************************************************************************************
129
130
131    /**
132     * Convenience Method.
133     * <BR />Invokes: {@link #resolveAllSRC(Vector, int, int, URL, SD, boolean)}
134     */
135    public static Ret3<int[], int[], int[]> resolveAllSRC(
136            Vector<? super TagNode> html, URL sourcePage, SD quote,
137            boolean askForReturnArraysOrReturnNull
138        )
139    { return resolveAllSRC(html, 0, -1, sourcePage, quote, askForReturnArraysOrReturnNull); }
140
141    /**
142     * Convenience Method.
143     * <BR />Accepts: {@code DotPair}.
144     * <BR />Invokes: {@link #resolveAllSRC(Vector, int, int, URL, SD, boolean)}
145     */
146    public static Ret3<int[], int[], int[]> resolveAllSRC(
147            Vector<? super TagNode> html, DotPair dp, URL sourcePage, SD quote,
148            boolean askForReturnArraysOrReturnNull
149        )
150    {
151        return resolveAllSRC
152            (html, dp.start, dp.end + 1, sourcePage, quote, askForReturnArraysOrReturnNull);
153    }
154
155    /**
156     * This method shall resolve all partial {@code URL} addresses that are found within
157     * {@code TagNode} elements having {@code 'SRC=...'} attributes.  Each instance of
158     * {@code TagNode} found in the input HTML {@code Vector} that has an {@code 'SRC'}
159     * attribute - if the {@code 'URL'} is only partially resolve - shall be updated and replaced
160     * with a new {@code TagNode} with a fully resolved {@code URL}.
161     * 
162     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
163     * 
164     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
165     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
166     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
167     * 
168     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
169     * (possibly-relative) {@code URL's} in the HTML-{@code Vector} will be resolved.
170     * 
171     * @param quote A choice for the quotes to use.  In most cases, {@code URL} attribute
172     * <B STYLE="color: red;">values</B> do not contain quotation-marks.  So likely either
173     * choice would work just fine, without exceptions.
174     * 
175     * <BR /><BR /><B>NOTE:</B> <I>null may be passed to this parameter</I>, and if it is
176     * the original quotation marks found in the {@code TagNode's 'SRC'} attribute will be
177     * reused.  Passing null to this parameter should almost always be easiest, safest.
178     * 
179     * @param askForReturnArraysOrReturnNull This (long-named) parameter is merely here to
180     * facilitate retrieving more information from this method - <I>if necessary</I>.  When this
181     * parameter receives the following values:
182     * 
183     * <BR /><BR /><UL CLASS=JDUL>
184     * <LI> <B>TRUE:</B> Three integer {@code int[]} arrays will be returned as listed in the
185     *      <B>{@code Returns:}</B> section of this method's documentation.
186     *      </LI>
187     * 
188     * <LI><B>FALSE:</B> This method shall return null.</LI>
189     * </UL>
190     * 
191     * @return If input parameter {@code 'askForReturnArraysOrReturnNull'} has been passed 
192     * {@code FALSE}, this method shall return null.  Otherwise, (if passed {@code TRUE}), then
193     * this method shall return an instance of {@code 'Ret3<int[], int[], int[]>'} - which is
194     * <I>returning three separate integer-arrays about what was found, and what has occurred.</I>
195     *
196     * <BR /><BR />
197     * Three arrays are returned as a result of this method's invocation.  Keep in mind that
198     * though the information might be superfluous, rejecting these arrays away is easy.
199     * They are provided as a matter of convenience for cases where more details information is
200     * mandatory for ensuring that long lists of {@code HTMLNode's} were properly updated.
201     * 
202     * <BR /><BR /><OL CLASS=JDOL>
203     * <LI> {@code Ret3.a (int[])}
204     *      <BR /><BR />
205     *      The first {@code int[] array} shall contain a list of the index of every
206     *      {@code TagNode} in the input-{@code Vector} parameter's range that <B><I>contained</B>
207     *      </I> a non-null HTML {@code 'SRC'} Attribute.
208     *      <BR /><BR />
209     *      </LI>
210     * 
211     * <LI> {@code Ret3.b (int[])}
212     *      <BR /><BR />
213     *      The second {@code int[] array} will contain an index-list of the indices
214     *      which contained {@code TagNode's} that were <B><I>replaced</I></B> by the
215     *      internal-resolve logic.
216     *      <BR /><BR />
217     *      </LI>
218     * 
219     * <LI> {@code Ret3.c (int[])}
220     *      <BR /><BR />
221     *      The third {@code int[] array} will contain an index-list of the indices
222     *      which contained {@code TagNode's} whose {@code 'SRC=...'} attribute
223     *      <I><B>failed</I></B> to be resolved by the internal-resolve logic, <I>or</I> caused a
224     *      {@code QuotesException} to throw.
225     *      </LI>
226     * </OL>
227     * 
228     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
229     * 
230     * @see #resolve(String, URL)
231     * @see TagNode#AV(String)
232     * @see TagNode#setAV(String, String, SD)
233     */
234    public static Ret3<int[], int[], int[]> resolveAllSRC(
235            Vector<? super TagNode> html, int sPos, int ePos, URL sourcePage, SD quote,
236            boolean askForReturnArraysOrReturnNull
237        )
238    {
239        // Retrieve the Vector-location of any TagNode on the page that has
240        // a "SRC=..." attribute.  These are almost always HTML <IMG> elements.
241        // NOTE: FIND Method's are "READ ONLY" - the Cast will make no difference at run-time.
242        //       The @SuppressWarnings is to overcome the cast of 'html'
243
244        @SuppressWarnings("unchecked")
245        int[] hasSrcPosArr = InnerTagFind.all((Vector<HTMLNode>) html, sPos, ePos, "src");
246
247        // Java Stream's are convenient for keeping "Growing Lists" of return values.
248        // This builder shall keep a list of all URL's that failed to update - for any reason
249        // **UNLESS** the reason is that the URL was already a fully-resolved, non-partial URL
250
251        IntStream.Builder failedUpdate = askForReturnArraysOrReturnNull
252            ? IntStream.builder() 
253            : null;
254
255        // This stream will keep a list of all URL's that were updated, and whose TagNode's
256        // were replaced inside the input HTML Vector
257
258        IntStream.Builder replaced = askForReturnArraysOrReturnNull
259            ? IntStream.builder()
260            : null;
261
262        for (int pos : hasSrcPosArr)
263        {
264            // Get the node at the index
265            TagNode tn = (TagNode) html.elementAt(pos);
266
267            // 1) Retrieve the SRC Attribute
268            // 2) if it is a partial-URL resolve it
269            // 3) Convert to a String
270
271            String  oldURL = tn.AV("src");
272            URL     newURL = resolve(oldURL, sourcePage);
273
274            // Some URL's cannot be resolved, if so, just skip this TagNode.
275            // Log the index to the stream (if requested), and continue.
276
277            if (newURL == null)
278            { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
279
280            // If the URL was already a fully-resolved-URL, continue - don't replace the TagNode;
281            // No logging needed here, the URL was *already* resolved...
282
283            if (oldURL.length() == newURL.toString().length()) continue;
284
285            // Replace the SRC Attribute in the TagNode.  This builds a new instance of TagNode
286            // If there is an exception, log the index to the stream (if requested), and continue.
287
288            try
289                { tn = tn.setAV("src", newURL.toString(), quote); }
290
291            catch (QuotesException qex)
292                { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
293
294            // Replace the index in the Vector containing the old TagNode with the new one.
295            html.setElementAt(tn , pos);
296
297            // The Vector-Index at this position had it's old TagNode removed and replaced with a
298            // new updated one.  Log this to the stream-list so to allow the user to know.
299
300            if (askForReturnArraysOrReturnNull) replaced.accept(pos);
301        }
302
303        return askForReturnArraysOrReturnNull
304
305            ? new Ret3<int[], int[], int[]>
306                (hasSrcPosArr, replaced.build().toArray(), failedUpdate.build().toArray())
307            : null;
308    }
309
310
311    // ********************************************************************************************
312    // ********************************************************************************************
313    // Complete Vector-Resolve Methods - HREF-ATTRIBUTE
314    // ********************************************************************************************
315    // ********************************************************************************************
316
317
318    /**
319     * Convenience Method.
320     * <BR />Invokes: {@link #resolveAllHREF(Vector, int, int, URL, SD, boolean)}
321     */
322    public static Ret3<int[], int[], int[]> resolveAllHREF(
323            Vector<? super TagNode> html, URL sourcePage, SD quote,
324            boolean askForReturnArraysOrReturnNull
325        )
326    { return resolveAllHREF(html, 0, -1, sourcePage, quote, askForReturnArraysOrReturnNull); }
327
328    /**
329     * Convenience Method.
330     * <BR />Accepts: {@code DotPair}.
331     * <BR />Invokes: {@link #resolveAllHREF(Vector, int, int, URL, SD, boolean)}
332     */
333    public static Ret3<int[], int[], int[]> resolveAllHREF(
334            Vector<? super TagNode> html, DotPair dp, URL sourcePage, SD quote,
335            boolean askForReturnArraysOrReturnNull
336        )
337    {
338        return resolveAllHREF
339            (html, dp.start, dp.end + 1, sourcePage, quote, askForReturnArraysOrReturnNull); 
340    }
341
342    /**
343     * This method shall resolve all partial {@code URL} addresses that are found within
344     * {@code TagNode} elements having {@code 'HREF=...'} attributes.  Each instance of
345     * {@code TagNode} found in the input HTML {@code Vector} that has an {@code 'HREF'}
346     * attribute - if the {@code 'URL'} is only partially resolve - shall be updated and replaced
347     * with a new {@code TagNode} with a fully resolved {@code URL}.
348     * 
349     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
350     * 
351     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
352     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
353     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
354     * 
355     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
356     * (possibly-relative) {@code URL's} in the HTML-{@code Vector} will be resolved.
357     * 
358     * @param quote A choice for the quotes to use.  In most cases, {@code URL} attribute
359     * <B STYLE="color: red;">values</B> do not contain quotation-marks.  So likely either
360     * choice would work just fine, without exceptions.
361     * 
362     * <BR /><BR /><B>NOTE:</B> <I>null may be passed to this parameter</I>, and if it is
363     * the original quotation marks found in the {@code TagNode's 'HREF'} attribute will be
364     * reused.  Passing null to this parameter should almost always be easiest, safest.
365     * 
366     * @param askForReturnArraysOrReturnNull This (long-named) parameter is merely here to
367     * facilitate retrieving more information from this method - <I>if necessary</I>.  When this
368     * parameter receives the following values:
369     * 
370     * <BR /><BR /><UL CLASS=JDUL>
371     * <LI> <B>TRUE:</B> Three integer {@code int[]} arrays will be returned as listed in the
372     *      <B>{@code Returns:}</B> section of this method's documentation.
373     *      </LI>
374     * 
375     * <LI><B>FALSE:</B> This method shall return null. </LI>
376     * </UL>
377     * 
378     * @return If input parameter {@code 'askForReturnArraysOrReturnNull'} has been passed 
379     * {@code FALSE}, this method shall return null.  Otherwise, (if passed {@code TRUE}), then
380     * this method shall return an instance of {@code 'Ret3<int[], int[], int[]>'} - which is
381     * <I>returning three separate integer-arrays about what was found, and what has occurred.</I>
382     *
383     * <BR /><BR />
384     * Three arrays are returned as a result of this method's invocation.  Keep in mind that
385     * though the information might be superfluous, rejecting these arrays away is easy.
386     * They are provided as a matter of convenience for cases where more details information is
387     * mandatory for ensuring that long lists of {@code HTMLNode's} were properly updated.
388     * 
389     * <BR /><BR /><OL CLASS=JDOL>
390     * <LI> {@code Ret3.a (int[])}
391     *      <BR /><BR />
392     *      The first {@code int[] array} shall contain a list of the index of every
393     *      {@code TagNode} in the input-{@code Vector} parameter's range that <B><I>contained</B>
394     *      </I> a non-null HTML {@code 'HREF'} Attribute.
395     *      <BR /><BR />
396     *      </LI>
397     * 
398     * <LI> {@code Ret3.b (int[])}
399     *      <BR /><BR />
400     *      The second {@code int[] array} will contain an index-list of the indices
401     *      which contained {@code TagNode's} that were <B><I>replaced</I></B> by the
402     *      internal-resolve logic.
403     *      <BR /><BR />
404     *      </LI>
405     * 
406     * <LI> {@code Ret3.c (int[])}
407     *      <BR /><BR />
408     *      The third {@code int[] array} will contain an index-list of the indices
409     *      which contained {@code TagNode's} whose {@code 'HREF=...'} attribute
410     *      <I><B>failed</I></B> to be resolved by the internal-resolve logic, <I>or</I> caused a
411     *      {@code QuotesException} to throw.
412     *      </LI>
413     * </OL>
414     * 
415     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
416     * 
417     * @see #resolve(String, URL)
418     * @see TagNode#AV(String)
419     * @see TagNode#setAV(String, String, SD)
420     */
421    public static Ret3<int[], int[], int[]> resolveAllHREF(
422            Vector<? super TagNode> html, int sPos, int ePos, URL sourcePage, SD quote,
423            boolean askForReturnArraysOrReturnNull
424        )
425    {
426        // Retrieve the Vector-location of any TagNode on the page that has
427        // a "HREF=..." attribute.  These are almost always HTML <IMG> elements.
428        // NOTE: FIND Method's are "READ ONLY" - the Cast will make no difference at run-time.
429        //       The @SuppressWarnings is to overcome the cast of 'html'
430
431        @SuppressWarnings("unchecked")
432        int[] hasHRefPosArr = InnerTagFind.all((Vector<HTMLNode>) html, sPos, ePos, "href");
433
434        // Java Stream's are convenient for keeping "Growing Lists" of return values.
435        // This builder shall keep a list of all URL's that failed to update - for any reason
436        // **UNLESS** the reason is that the URL was already a fully-resolved, non-partial URL
437
438        IntStream.Builder failedUpdate = askForReturnArraysOrReturnNull
439            ? IntStream.builder() 
440            : null;
441
442        // This stream will keep a list of all URL's that were updated, and whose TagNode's
443        // were replaced inside the input HTML Vector
444
445        IntStream.Builder replaced = askForReturnArraysOrReturnNull
446            ? IntStream.builder()
447            : null;
448
449        for (int pos : hasHRefPosArr)
450        {
451            // Get the node at the index
452            TagNode tn = (TagNode) html.elementAt(pos);
453
454            // 1) Retrieve the HREF Attribute
455            // 2) if it is a partial-URL resolve it
456            // 3) Convert to a String
457
458            String  oldURL = tn.AV("HREF");
459            URL     newURL = resolve(oldURL, sourcePage);
460
461            // Some URL's cannot be resolved, if so, just skip this TagNode.
462            // Log the index to the stream (if requested), and continue.
463
464            if (newURL == null)
465            { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
466
467            // If the URL was already a fully-resolved-URL, continue - don't replace the TagNode;
468            // No logging needed here, the URL was *already* resolved...
469
470            if (oldURL.length() == newURL.toString().length()) continue;
471
472            // Replace the HREF Attribute in the TagNode.  This builds a new instance of TagNode
473            // If there is an exception, log the index to the stream (if requested), and continue.
474
475            try
476                { tn = tn.setAV("href", newURL.toString(), quote); }
477
478            catch (QuotesException qex)
479                { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
480
481            // Replace the index in the Vector containing the old TagNode with the new one.
482            html.setElementAt(tn , pos);
483
484            // The Vector-Index at this position had it's old TagNode removed and replaced with a
485            // new updated one.  Log this to the stream-list so to allow the user to know.
486
487            if (askForReturnArraysOrReturnNull) replaced.accept(pos);
488        }
489
490        return askForReturnArraysOrReturnNull
491
492            ? new Ret3<int[], int[], int[]>
493                (hasHRefPosArr, replaced.build().toArray(), failedUpdate.build().toArray())
494            : null;
495    }
496
497
498    // ********************************************************************************************
499    // ********************************************************************************************
500    // Resolve, Not Keep Exceptions
501    // ********************************************************************************************
502    // ********************************************************************************************
503
504
505    /**
506     * Convenience Method.
507     * <BR />Invokes: {@link #resolveHREF(TagNode, URL)}.
508     * <BR />And-Then: {@link TagNode#setAV(String, String, SD)}
509     */
510    public static TagNode resolveHREFAndUpdate(TagNode tnWithHREF, URL sourcePage)
511    { 
512        URL url = resolveHREF(tnWithHREF, sourcePage);
513
514        return (url == null)
515            ? null
516            : tnWithHREF.setAV("href", url.toString(), null);
517    }
518
519
520    /**
521     * This should be used for {@code TagNode's} that contain an {@code 'HREF'} inner-tag
522     * (attribute).
523     * 
524     * @param tnWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_HREF>
525     * 
526     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode}
527     * (possibly-relative) {@code URL} will be resolved.
528     * 
529     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
530     * directory.  Null is returned if attempting to build the {@code URL} generated a
531     * {@code MalformedURLException}.
532     * 
533     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
534     * {@code MalformedURLException's}.
535     * 
536     * @throws HREFException If the {@code TagNode} passed to parameter {@code 'tnWithHREF'} does
537     * not actually contain an {@code HREF} attribute, then this exception shall throw.
538     * 
539     * @see #resolve(String, URL)
540     * @see TagNode#AV(String)
541     */
542    public static URL resolveHREF(TagNode tnWithHREF, URL sourcePage)
543    {
544        String href = tnWithHREF.AV("href");
545
546        if (href == null) throw new HREFException(
547            "The TagNode passed to parameter tnWithHREF does not actually contain an " +
548            "HREF attribute."
549        );
550
551        return resolve(href, sourcePage);
552    }
553
554
555    /**
556     * Convenience Method.
557     * <BR />Invokes: {@link #resolveSRC(TagNode, URL)} 
558     * <BR />And-Then: {@link TagNode#setAV(String, String, SD)}
559     */
560    public static TagNode resolveSRCAndUpdate(TagNode tnWithSRC, URL sourcePage)
561    { 
562        URL url = resolveSRC(tnWithSRC, sourcePage);
563
564        return (url == null) 
565            ? null 
566            : tnWithSRC.setAV("src", url.toString(), null);
567    }
568
569
570    /**
571     * This should be used for {@code TagNode's} that contain a {@code 'SRC'} inner-tag
572     * (attribute).
573     * 
574     * @param tnWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_SRC>
575     * 
576     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode}
577     * (possibly-relative) {@code URL} will be resolved.
578     * 
579     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
580     * directory.  Null is returned if attempting to build the {@code URL} generated a
581     * {@code MalformedURLException}.
582     * 
583     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
584     * {@code MalformedURLException's}.
585     * 
586     * @throws SRCException If the {@code TagNode} passed to parameter {@code 'tnWithSRC'} does not
587     * actually contain a {@code SRC} attribute, then this exception shall throw.
588     * 
589     * @see #resolve(String, URL)
590     * @see TagNode#AV(String)
591     */
592    public static URL resolveSRC(TagNode tnWithSRC, URL sourcePage)
593    {
594        String src = tnWithSRC.AV("src");
595
596        if (src == null) throw new SRCException(
597            "The TagNode passed to parameter tnWithSRC does not actually contain a " +
598            "SRC attribute."
599        );
600
601        return resolve(src, sourcePage);
602    }
603
604    /**
605     * This should be used for lists of {@code TagNode's}, each of which contain an {@code 'HREF'}
606     * inner-tag (attribute).
607     * 
608     * @param tnListWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_HREF>
609     * 
610     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
611     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
612     * 
613     * @return A list of {@code URL's}, each of which have been completed/resolved with the 
614     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
615     * result in a null value in the {@code Vector}.
616     * 
617     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
618     * 
619     * @see #resolve(String, URL)
620     * @see TagNode#AV(String)
621     */
622    public static Vector<URL> resolveHREFs(Iterable<TagNode> tnListWithHREF, URL sourcePage)
623    {
624        Vector<URL> ret = new Vector<>();
625
626        for (TagNode tn : tnListWithHREF) ret.addElement(resolve(tn.AV("href"), sourcePage));
627
628        return ret;
629    }
630
631
632    /**
633     * This should be used for lists of {@code TagNode's}, each of which contain a {@code 'SRC'}
634     * inner-tag (attribute).
635     * 
636     * @param tnListWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_SRC>
637     * 
638     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
639     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
640     * 
641     * @return A list of {@code URL's}, each of which have been completed/resolved with the
642     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
643     * result in a null value in the {@code Vector.}
644     * 
645     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
646     * 
647     * @see #resolve(String, URL)
648     * @see TagNode#AV(String)
649     */
650    public static Vector<URL> resolveSRCs(Iterable<TagNode> tnListWithSRC, URL sourcePage)
651    {
652        Vector<URL> ret = new Vector<>();
653
654        for (TagNode tn : tnListWithSRC) ret.addElement(resolve(tn.AV("src"), sourcePage));
655
656        return ret;
657    }
658
659
660    /**
661     * This will use a "pointer array" - an array containing indexes into the downloaded page to
662     * retrieve {@code TagNode's}.  The {@code TagNode's} to which this pointer-array points -
663     * must each contain an {@code HREF} inner-tag with a {@code URL}, or a partial {@code URL}.
664     * 
665     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
666     * 
667     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
668     * 
669     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
670     * reference {@code TagNode's} that contain {@code HREF} attributes.  Integer-pointer Arrays
671     * are usually returned from the {@code package 'NodeSearch'} "Find" methods.
672     *
673     * <DIV CLASS="EXAMPLE">{@code 
674     * // Retrieve 'pointers' to all the '<A HREF=...>' TagNode's.  The term 'pointer' refers to
675     * // integer-indices into the vectorized-html variable 'page'
676     * int[] anchorPosArr = TagNodeFind.all(page, TC.OpeningTags, "a");
677     * 
678     * // Extract each HREF inner-tag, and construct a {@code URL}.  Use the 'sourcePage' parameter
679     * // if the URL is only partially-resolved
680     * Vector<URL> urls = Links.resolveHREFs(page, anchorPosArr, mySourcePage);
681     * }</DIV>
682     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
683     * {@code "<A ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
684     * {@code 'html'}, and then resolve any shortened {@code URL's}. 
685     *
686     * @param sourcePage This is the source page {@code URL} from whence the (possibly relative)
687     * {@code TagNode URL's} in the {@code Vector} are to be resolved.
688     *
689     * @return A list of {@code URL's}, each of which have been completed/resolved with the
690     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
691     * result in a null value in the {@code Vector}.  However, if any of the nodes pointed to by
692     * the {@code 'nodePosArr'} parameter do not contain opening {@code TagNode} elements, then
693     * this mistake shall generate {@code TagNodeExpectedException's}.
694     *
695     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
696     *
697     * @throws ArrayIndexOutOfBoundsException
698     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
699     * @throws OpeningTagNodeExpectedException
700     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
701     * 
702     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
703     *
704     * @see #resolve(String, URL)
705     * @see TagNode#AV(String)
706     */
707    public static Vector<URL> resolveHREFs
708        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
709    {
710        // Return Vector
711        Vector<URL> ret = new Vector<>();
712
713        for (int nodePos : nodePosArr)
714        {
715            HTMLNode n = html.elementAt(nodePos);
716
717            // Must be an HTML TagNode
718            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
719
720            TagNode tn = (TagNode) n;
721
722            // Must be an "Opening" HTML TagNode
723            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
724
725            // Resolve the 'HREF', save the URL
726            ret.addElement(resolve(tn.AV("href"), sourcePage));
727        }
728
729        return ret;
730    }
731 
732
733    /**
734     * This will use a "pointer array" - an array containing indexes into the downloaded page to
735     * retrieve {@code TagNode's}.  The {@code TagNode's} to which this pointer-array points - must
736     * each contain a {@code SRC} inner-tag with a {@code URL}, or a partial {@code URL}.
737     * 
738     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
739     *
740     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
741     * 
742     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
743     * reference {@code TagNode's} that contain {@code SRC} attributes.  Integer-pointer Arrays are
744     * usually returned from the {@code package 'NodeSearch'} "Find" methods.
745     *
746     * <DIV CLASS="EXAMPLE">{@code 
747     * // Retrieve 'pointers' to all the '<IMG SRC=...>' TagNode's.  The term 'pointer' refers to
748     * // integer-indices into the vectorized-html variable 'page'
749     * 
750     * int[] picturePosArr = TagNodeFind.all(page, TC.OpeningTags, "img");
751     * 
752     * // Extract each SRC inner-tag, and construct a {@code URL}.  Use the 'sourcePage' parameter
753     * // if the URL is only partially-resolved
754     * 
755     * Vector<URL> urls = Links.resolveSRCs(page, picturePosArr, mySourcePage);
756     * }</DIV>
757     * 
758     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
759     * {@code "<IMG ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
760     * {@code 'html'}, and then resolve any shorted image {@code URL's}.
761     *
762     * @param sourcePage This is the source page {@code URL} from whence the (possibly relative)
763     * {@code TagNode URL's} in the {@code Vector} are to be resolved.
764     *
765     * @return A list of {@code URL's}, each of which have been completed/resolved with the
766     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
767     * result in a null value in the {@code Vector}.  However, if any of the nodes pointed to by
768     * the {@code 'nodePosArr'} parameter do not contain opening {@code TagNode} elements, then
769     * this mistake shall generate {@code TagNodeExpectedException's}.
770     *
771     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
772     *
773     * @throws ArrayIndexOutOfBoundsException
774     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
775     * @throws OpeningTagNodeExpectedException
776     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
777     * 
778     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
779     *
780     * @see #resolve(String, URL)
781     * @see TagNode#AV(String)
782     */
783    public static Vector<URL> resolveSRCs
784        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
785    {
786        // Return Vector
787        Vector<URL> ret = new Vector<>();
788
789        for (int nodePos : nodePosArr)
790        {
791            HTMLNode n = html.elementAt(nodePos);
792
793            // Must be an HTML TagNode
794            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
795
796            TagNode tn = (TagNode) n;
797
798            // Must be an "Opening" HTML TagNode
799            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
800
801            // Resolve the "SRC", save the URL
802            ret.addElement(resolve(tn.AV("src"), sourcePage));
803        }
804
805        return ret;
806    }
807
808
809    /**
810     * This will convert <I><B>a list of </B></I> simple java {@code String's} to a
811     * list/{@code Vector} of {@code URL's}, de-referencing any missing information using the
812     * {@code 'sourcePage'} parameter.
813     * 
814     * @param src a list of strings - usually partially or totally completed Internet {@code URL's}
815     * 
816     * @param sourcePage This is the source page {@code URL} from which the {@code String's}
817     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
818     * 
819     * @return A list of {@code URL's}, each of which have been completed/resolved with the
820     * {@code 'sourcePage'} parameter.  If there were any {@code String's} that were zero-length or
821     * null,  then null is returned in the related {@code Vector} position.  If any
822     * {@code TagNode} causes a {@code MalformedURLException}, then that position in the
823     * {@code Vector} will be null.
824     * 
825     * @see #resolve(String, URL)
826     */
827    public static Vector<URL> resolve(Vector<String> src, URL sourcePage)
828    {
829        Vector<URL> ret = new Vector<>();
830
831        for (String s : src) ret.addElement(resolve(s, sourcePage));
832
833        return ret;
834    }
835
836    /**
837     * This will convert a simple java {@code String} to a {@code URL}, de-referencing any missing
838     * information using the {@code 'sourcePage'} parameter.
839     * 
840     * @param src Any java {@code String}, usually one which was scraped from an HTML-Page, and
841     * needs to be "completed."
842     * 
843     * @param sourcePage This is the source page {@code URL} from which the String
844     * (possibly-relative) {@code URL} will be resolved.
845     * 
846     * @return A {@code URL}, which has been completed/resolved with the {@code 'sourcePage'}
847     * parameter. If parameter {@code 'src'} is null or zero-length, then this method will also
848     * return null.  If a {@code MalformedURLException} is generated, null will also be returned.
849     */
850    public static URL resolve(String src, URL sourcePage)
851    {
852        if (sourcePage == null) throw new NullPointerException(
853            "Though you may provide null to the partial-URL to dereference parameter, null " +
854            "may not be passed to the Source-Page Parameter.  The purpose of the 'resolve' " +
855            "operation is to resolve partial-URLs against a source-page (root) URL. " +
856            "Therefore this is not allowed."
857        );
858
859        if (src == null) return null;
860
861        src = src.trim();
862
863        if (src.length() == 0) return null;
864
865        String srcLC = src.toLowerCase();
866
867        if (StrCmpr.startsWithXOR(srcLC, _NON_URL_HREFS)) return null;
868
869        if (srcLC.startsWith("http://") || srcLC.startsWith("https://"))
870
871            try
872                { return new URL(src); }
873
874            catch (MalformedURLException e) { return null; }
875
876        if (src.startsWith("//") && (src.charAt(3) != '/'))
877
878            try
879                { return new URL(sourcePage.getProtocol().toLowerCase() + ":" + src); }
880
881            catch (MalformedURLException e) { return null; }
882        
883        if (src.startsWith("/"))
884
885            try
886            { 
887                return new URL(
888                    sourcePage.getProtocol().toLowerCase() + "://" +
889                    sourcePage.getHost().toLowerCase() +
890                    src
891                );
892            }
893
894            catch (MalformedURLException e) { return null; }
895 
896        if (src.startsWith("../"))
897        {
898            String  sourcePageStr   = sourcePage.toString();
899            short   nLevels         = 0;
900
901            do      { nLevels++;  src = src.substring(3); }
902            while   (src.startsWith("../"));
903
904            String  directory = StringParse.dotDotParentDirectory(sourcePage.toString(), nLevels);
905
906            try     { return new URL(directory + src); }
907            catch   (Exception e) { return null; }
908        }
909
910        String  root =
911            sourcePage.getProtocol().toLowerCase() + "://" + 
912            sourcePage.getHost().toLowerCase();
913
914        String  path    = sourcePage.getPath().trim();
915        int     pos     = StringParse.findLastFrontSlashPos(path);
916
917        if (pos == -1) throw new StringIndexOutOfBoundsException(
918            "The URL you have provided: " + sourcePage.toString() + " does not have a '/' " +
919            "front-slash character in it's path.  Cannot proceed resolving relative-URL's " +
920            "without this."
921        );
922
923        path = path.substring(0, pos + 1);
924
925        try     { return new URL(root + path + src); }
926        catch   (MalformedURLException e) { return null; }
927    }
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949    // ********************************************************************************************
950    // ********************************************************************************************
951    // Resolve, KE - Keep Exceptions
952    // ********************************************************************************************
953    // ********************************************************************************************
954
955
956    /**
957     * This should be used for {@code TagNode's} that contain an {@code 'HREF'} inner-tag
958     * (attribute).
959     * 
960     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
961     * 
962     * @param tnWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_HREF>
963     * 
964     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
965     * (possibly-relative) {@code URL} will be resolved.
966     * 
967     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or 
968     * directory.  If there were no {@code HREF} tag, then null is returned.  If
969     * the {@code TagNode} causes a {@code MalformedURLException}, that is returned in
970     * {@code Ret2.b}
971     * 
972     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
973     * {@code MalformedURLException's}.
974     * 
975     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
976     * 
977     * @throws HREFException If the {@code TagNode} passed to parameter {@code 'tnWithHREF'} does
978     * not actually contain an {@code HREF} attribute, then this exception shall throw.
979     * 
980     * @see #resolve_KE(String, URL)
981     * @see TagNode#AV(String)
982     * @see Ret2
983     */
984    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
985    public static Ret2<URL, MalformedURLException> resolveHREF_KE
986        (TagNode tnWithHREF, URL sourcePage)
987    {
988        String href = tnWithHREF.AV("href");
989
990        if (href == null) throw new HREFException(
991            "The TagNode passed to parameter tnWithHREF does not actually contain an " +
992            "HREF attribute."
993        );
994
995        return LinksResolve_KE.resolve(href, sourcePage);
996    }
997
998
999    /**
1000     * This should be used for {@code TagNode's} that contain a {@code 'SRC'} inner-tag
1001     * (attribute).
1002     * 
1003     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1004     * 
1005     * @param tnWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_SRC>
1006     * 
1007     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1008     * (possibly-relative) {@code URL} will be resolved.
1009     * 
1010     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
1011     * directory.  If there were no {@code SRC} tag, then null is returned.  If the
1012     * {@code TagNode} causes a {@code MalformedURLException}, that is returned in {@code Ret2.b}
1013     * 
1014     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
1015     * {@code MalformedURLException's}.
1016     *
1017     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1018     * 
1019     * @throws SRCException If the {@code TagNode} passed to parameter {@code 'tnWithSRC'} does not
1020     * actually contain a {@code SRC} attribute, then this exception shall throw.
1021     * 
1022     * @see #resolve_KE(String, URL)
1023     * @see TagNode#AV(String)
1024     * @see Ret2
1025     */
1026    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1027    public static Ret2<URL, MalformedURLException> resolveSRC_KE
1028        (TagNode tnWithSRC, URL sourcePage)
1029    {
1030        String src = tnWithSRC.AV("src");
1031
1032        if (src == null) throw new SRCException(
1033            "The TagNode passed to parameter tnWithSRC does not actually contain a " +
1034            "SRC attribute."
1035        );
1036
1037        return LinksResolve_KE.resolve(src, sourcePage);
1038    }
1039
1040
1041    /**
1042     * This should be used for lists of {@code TagNode's}, each of which contain an {@code 'HREF'}
1043     * inner-tag (attribute).
1044     * 
1045     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1046     * 
1047     * @param tnListWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_HREF>
1048     * 
1049     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
1050     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
1051     * 
1052     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1053     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code HREF} tag,
1054     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1055     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1056     * exception in {@code Ret2.b}
1057     * 
1058     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
1059     *
1060     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1061     * 
1062     * @see #resolve_KE(String, URL)
1063     * @see TagNode#AV(String)
1064     * @see Ret2
1065     */
1066    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1067    public static Vector<Ret2<URL, MalformedURLException>> resolveHREFs_KE
1068        (Iterable<TagNode> tnListWithHREF, URL sourcePage)
1069    {
1070        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1071
1072        for (TagNode tn : tnListWithHREF)
1073            ret.addElement(LinksResolve_KE.resolve(tn.AV("href"), sourcePage));
1074
1075        return ret;
1076    }
1077
1078
1079    /**
1080     * This should be used for lists of {@code TagNode's}, each of which contain a {@code 'SRC'}
1081     * inner-tag (attribute).
1082     * 
1083     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1084     * 
1085     * @param tnListWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_SRC>
1086     * 
1087     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1088     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
1089     * 
1090     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1091     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code SRC} tag,
1092     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1093     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1094     * exception in {@code Ret2.b}
1095     * 
1096     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
1097     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1098     * 
1099     * @see #resolve_KE(String, URL)
1100     * @see TagNode#AV(String)
1101     * @see Ret2
1102     */
1103    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1104    public static Vector<Ret2<URL, MalformedURLException>> resolveSRCs_KE
1105        (Iterable<TagNode> tnListWithSRC, URL sourcePage)
1106    {
1107        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1108
1109        for (TagNode tn : tnListWithSRC)
1110            ret.addElement(LinksResolve_KE.resolve(tn.AV("src"), sourcePage));
1111
1112        return ret;
1113    }
1114
1115
1116    /**
1117     * This will use a "pointer array" - an array containing indexes into the downloaded page to
1118     * retrieve {@code TagNode's}.  The {@code TagNode} to which this pointer-array points - must
1119     * contain {@code HREF} inner-tags with {@code URL's}, or partial {@code URL's}.
1120     * 
1121     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
1122     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1123     * 
1124     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
1125     * 
1126     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
1127     * reference {@code TagNode's} that contain {@code HREF} attributes.  Integer-pointer Arrays
1128     * are usually return from the {@code package 'NodeSearch'} "Find" methods.
1129     *
1130     * <DIV CLASS="EXAMPLE">{@code 
1131     * // Retrieve 'pointers' to all the '<A HREF=...>' TagNode's.  The term 'pointer' refers to
1132     * // integer-indices into the vectorized-html variable 'page'
1133     * 
1134     * int[] anchorPosArr = TagNodeFind.all(page, TC.OpeningTags, "a");
1135     * 
1136     * // Extract each HREF inner-tag, and construct a URL.  Use the 'sourcePage' parameter if
1137     * // the URL is only partially-resolved.  If any URL's on the original-page are invalid, the
1138     * // method shall not crash, but save the exception instead.
1139     * 
1140     * Vector<Ret2<URL, MalformedURLException> urlsWithEx =
1141     *     Links.resolveHREFs_KE(page, picturePosArr, mySourcePage);
1142     *
1143     * // Print out any "failed" urls
1144     * for (Ret2<URL, MalformedURLException> r : urlsWithEx)
1145     *     if (r.b != null) 
1146     *         System.out.println("There was an exception: " + r.b.toString());
1147     * }</DIV>
1148     *
1149     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
1150     * {@code "<A ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
1151     * {@code 'html'}., and then resolve any shortened {@code URL's}.
1152     *
1153     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1154     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1155     * 
1156     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1157     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code HREF} tag,
1158     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1159     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1160     * exception in {@code Ret2.b}
1161     *
1162     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
1163     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1164     *
1165     * @throws ArrayIndexOutOfBoundsException
1166     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1167     * @throws OpeningTagNodeExpectedException
1168     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1169     * 
1170     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1171     *
1172     * @see #resolve_KE(String, URL)
1173     * @see TagNode#AV(String)
1174     * @see Ret2
1175     */
1176    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1177    public static Vector<Ret2<URL, MalformedURLException>> resolveHREFs_KE
1178        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
1179    {
1180         // Return Vector
1181        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1182
1183        for (int nodePos : nodePosArr)
1184        {
1185            HTMLNode n = html.elementAt(nodePos);
1186
1187            // Must be an HTML TagNode
1188            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
1189
1190            TagNode tn = (TagNode) n;
1191
1192            // Must be an "Opening" HTML TagNode
1193            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
1194
1195            // Resolve the "HREF", keep the URL
1196            ret.addElement(LinksResolve_KE.resolve(tn.AV("href"), sourcePage));
1197        }
1198
1199        return ret;
1200    }
1201 
1202    /**
1203     * This will use a "pointer array" - an array containing indexes into the downloaded page to
1204     * retrieve {@code TagNode's}.  The {@code TagNode} to which this pointer-array points - must 
1205     * contain {@code SRC} inner-tags with {@code URL's}, or partial {@code URL's}.
1206     * 
1207     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
1208     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1209     *
1210     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
1211     * 
1212     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
1213     * reference {@code TagNode's} that contain {@code SRC} attributes.  Integer-pointer Arrays are
1214     * usually return from the {@code package 'NodeSearch'} "Find" methods.
1215     *
1216     * <DIV CLASS="EXAMPLE">{@code 
1217     * // Retrieve 'pointers' to all the '<IMG SRC=...>' TagNode's.  The term 'pointer' refers to
1218     * // integer-indices into the vectorized-html variable 'page'
1219     * 
1220     * int[] picturePosArr = TagNodeFind.all(page, TC.OpeningTags, "img");
1221     * 
1222     * // Extract each SRC inner-tag, and construct a URL.  Use the 'sourcePage' parameter if
1223     * // the URL is only partially-resolved.  If any URL's on the original-page are invalid,
1224     * // the method shall not crash, but save the exception instead.
1225     * 
1226     * Vector<Ret2<URL, MalformedURLException> urlsWithEx =
1227     *      Links.resolveSRCs_KE(page, picturePosArr, mySourcePage);
1228     *
1229     * // Print out any "failed" urls
1230     * for (Ret2<URL, MalformedURLException> r : urlsWithEx)
1231     *     if (r.b != null) 
1232     *         System.out.println("There was an exception: " + r.b.toString());
1233     * }</DIV>
1234     *
1235     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
1236     * {@code "<IMG ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
1237     * {@code 'html'}, and then resolve any shortened {@code URL's}.
1238     *
1239     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1240     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1241     *
1242     * @return A list of {@code URL's}, each of which have been completed/resolved with the 
1243     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code SRC} tag,
1244     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1245     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1246     * exception in {@code Ret2.b}
1247     *
1248     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
1249     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1250     *
1251     * @throws ArrayIndexOutOfBoundsException
1252     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1253     * @throws OpeningTagNodeExpectedException
1254     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1255     * 
1256     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1257     *
1258     * @see #resolve_KE(String, URL)
1259     * @see TagNode#AV(String)
1260     * @see Ret2
1261     */
1262    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1263    public static Vector<Ret2<URL, MalformedURLException>> resolveSRCs_KE
1264        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
1265    {
1266         // Return Vector
1267        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();                                         
1268
1269        for (int nodePos : nodePosArr)
1270        {
1271            HTMLNode n = html.elementAt(nodePos);
1272
1273            // Must be an HTML TagNode
1274            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
1275
1276            TagNode tn = (TagNode) n;
1277
1278            // Must be an "Opening" HTML TagNode
1279            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
1280
1281            // Resolve "SRC" and keep URL's
1282            ret.addElement(LinksResolve_KE.resolve(tn.AV("src"), sourcePage));
1283        }
1284
1285        return ret;
1286    }
1287
1288    /**
1289     * Resolve all {@code URL's}, represented as {@code String's}, inside of a {@code Vector}.
1290     * 
1291     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1292     * 
1293     * @param src a list of {@code String's} - usually partially or totally completed Internet
1294     * {@code URL's}
1295     * 
1296     * @param sourcePage This is the source page {@code URL} from which the {@code String's}
1297     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1298     * 
1299     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1300     * {@code 'sourcePage'} parameter.  If there were any {@code String's} that were zero-length or
1301     * null, then null is returned in the related {@code Vector} position.  If any {@code TagNode} 
1302     * causes a {@code MalformedURLException}, then that position in the {@code Vector} will
1303     * contain the exception in {@code Ret2.b}
1304     *
1305     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1306     * 
1307     * @see #resolve_KE(String, URL)
1308     * @see Ret2
1309     */
1310    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1311    public static Vector<Ret2<URL, MalformedURLException>> resolve_KE
1312        (Vector<String> src, URL sourcePage)
1313    {
1314        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1315
1316        for (String s : src)
1317            ret.addElement(LinksResolve_KE.resolve(s, sourcePage));
1318
1319        return ret;
1320    }
1321
1322    /**
1323     * This will convert a simple java {@code String} to a {@code URL}, de-referencing any missing
1324     * information using the {@code 'sourcePage'} parameter.
1325     * 
1326     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1327     * 
1328     * @param src Any java {@code String}, usually one which was scraped from an HTML-Page, and
1329     * needs to be "completed."
1330     * 
1331     * @param sourcePage This is the source page {@code URL} from which the String (possibly
1332     * relative) {@code URL} will be resolved.
1333     * 
1334     * @return A {@code URL}, which has been completed/resolved with the {@code 'sourcePage'}
1335     * parameter. If parameter {@code 'src'} is null or zero-length, null will be returned.  If a
1336     * {@code MalformedURLException} is thrown, that will be included with the {@code Ret2<>}
1337     * result.
1338     *
1339     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1340     * 
1341     * @see Ret2
1342     */
1343    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1344    public static Ret2<URL, MalformedURLException> resolve_KE(String src, URL sourcePage)
1345    { return LinksResolve_KE.resolve(src, sourcePage); }
1346}