001package Torello.HTML;
002
003import Torello.Java.*;
004
005import Torello.HTML.NodeSearch.InnerTagFind; // Used for an @see reference
006import Torello.HTML.NodeSearch.TagNodeFind;  // Used in getBaseURL
007
008import Torello.Java.Additional.Ret2;
009import Torello.Java.Additional.Ret3;
010
011import Torello.JavaDoc.LinkJavaSource;
012import static Torello.JavaDoc.Entity.METHOD;
013
014import java.net.URL;
015import java.net.MalformedURLException;
016
017import java.util.Vector;
018import java.util.stream.IntStream;
019
020/**
021 * Utilities for de-refrencing 'partially-completed' {@code URL's} in a Web-Page {@code Vector}.
022 * 
023 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LINKS>
024 * @see ReplaceNodes
025 * @see ReplaceFunction
026 * @see HTMLPage
027 * @see InnerTagFind
028 * @see Ret2
029 */
030@Torello.JavaDoc.StaticFunctional
031public class Links
032{
033    private Links() { }
034
035    /**
036     * List of documented "starter-strings" that are sometimes used in Anchor URL
037     * {@code 'HREF=...'} attributes.
038     * 
039     * @see #NON_URL_HREFS
040     */
041    protected static final String[] _NON_URL_HREFS =
042        { "tel:", "magnet:", "javascript:", "mailto:", "ftp:", "file:", "data:", "blog:", "#" };
043
044    /**
045     * This small method just returns the complete list of commonly found Anchor
046     * {@code 'HREF' String's} that do not actually constitute an HTML {@code 'URL'.}  This method
047     * actually returns a "clone" of an internally stored {@code String[]} Array.  This is to
048     * protect and make sure that the list of potential HTML Anchor-Tag {@code 'HREF'} Attributes
049     * is not changed, doctored or modified
050     * 
051     * @return A clone of the {@code String}-array {@code '_NON_URL_HREFS'}
052     * @see #_NON_URL_HREFS
053     */
054    public static String[] NON_URL_HREFS()
055    { return _NON_URL_HREFS.clone(); }
056
057    /**
058     * The methods in this class <I><B>will not automatically extract</I></B> any HTML
059     * {@code <BASE HREF=URL>} definitions that are found on this page.  If the user wishes to
060     * dereference partial / relative {@code URL} definitions that exist on the input page, all the
061     * while respecting any {@code <BASE HREF=URL>} definitions found on the input page, then this
062     * method should be utilized.
063     *
064     * @param page This may be any HTML page or partial page.  If this page has a valid HTML
065     * {@code <BASE HREF=URL>}, it will be extracted and returned as an instance of
066     * {@code class URL}.
067     *
068     * @return This shall return the HTML {@code <BASE HREF="http://...">} element found available
069     * within the input-page parameter {@code 'page'}.  If the page provided does not contain a
070     * {@code BASE URL} definition, then null shall be returned.
071     *
072     * <BR /><BR /><DIV CLASS=JDHint>
073     * The HTML Specification clearly states that only one {@code URL} may be defined using the
074     * HTML Element {@code <BASE>}.  Clearly, due to the browser wars, unspecified /
075     * non-deterministic behavior is possible if multiple definitions are provided.  For the
076     * purposes of this class, if such a situation arises, an exception is thrown.
077     * </DIV>
078     *
079     * @throws MalformedHTMLException If the HTML page provided contains multiple definitions of
080     * the element {@code <BASE HREF=URL>}, then this exception will throw.
081     *
082     * @throws MalformedURLException If the {@code <BASE HREF=URL>} found / identified within the
083     * input page, but that {@code URL} is invalid, then this exception shall throw.
084     * 
085     * @see TagNodeFind
086     * @see Attributes#retrieve(Vector, int[], String)
087     */
088    public static URL getBaseURL(Vector<? extends HTMLNode> page)
089        throws MalformedHTMLException, MalformedURLException
090    {
091        int[] posArr = TagNodeFind.all(page, TC.OpeningTags, "base");
092
093        if (posArr.length == 0) return null;
094
095
096        // NOTE: The cast is all right because 'posArr' only points to TagNode's
097        // Attributes expects to avoid processing Vector<TextNode>, and Vector<CommentNode>
098        // Above, there will be nothing in the 'posArr' if either of those was passed.
099
100        @SuppressWarnings("unchecked")
101        String[]    urls    = Attributes.retrieve((Vector<HTMLNode>) page, posArr, "href");
102
103        boolean     found   = false;
104        String      ret     = null;
105
106        for (String url : urls)
107            if ((url != null) && (url.length() > 0))
108                if (found)
109                    throw new MalformedHTMLException(
110                        "The page you have provided has multiple <BASE HREF=URL> definitions.  " +
111                        "However, the HTML Specifications state that pages may provide just one " +
112                        "definition.  If you wish to proceed, retrieve the definitions manually " +
113                        "using class TagNodeFind.all and Attributes.retrieve, as explained in " +
114                        "the JavaDoc pages for this class."
115                    );
116                else 
117                {
118                    found = true;
119                    ret = url;
120                }
121
122        return new URL(ret);                    
123    }
124
125
126    // ********************************************************************************************
127    // ********************************************************************************************
128    // Complete Vector-Resolve Methods - SRC-ATTRIBUTE
129    // ********************************************************************************************
130    // ********************************************************************************************
131
132
133    /**
134     * Convenience Method.
135     * <BR />Invokes: {@link #resolveAllSRC(Vector, int, int, URL, SD, boolean)}
136     */
137    public static Ret3<int[], int[], int[]> resolveAllSRC(
138            Vector<? super TagNode> html, URL sourcePage, SD quote,
139            boolean askForReturnArraysOrReturnNull
140        )
141    { return resolveAllSRC(html, 0, -1, sourcePage, quote, askForReturnArraysOrReturnNull); }
142
143    /**
144     * Convenience Method.
145     * <BR />Accepts: {@code DotPair}.
146     * <BR />Invokes: {@link #resolveAllSRC(Vector, int, int, URL, SD, boolean)}
147     */
148    public static Ret3<int[], int[], int[]> resolveAllSRC(
149            Vector<? super TagNode> html, DotPair dp, URL sourcePage, SD quote,
150            boolean askForReturnArraysOrReturnNull
151        )
152    {
153        return resolveAllSRC
154            (html, dp.start, dp.end + 1, sourcePage, quote, askForReturnArraysOrReturnNull);
155    }
156
157    /**
158     * This method shall resolve all partial {@code URL} addresses that are found within
159     * {@code TagNode} elements having {@code 'SRC=...'} attributes.  Each instance of
160     * {@code TagNode} found in the input HTML {@code Vector} that has an {@code 'SRC'}
161     * attribute - if the {@code 'URL'} is only partially resolve - shall be updated and replaced
162     * with a new {@code TagNode} with a fully resolved {@code URL}.
163     * 
164     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
165     * 
166     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
167     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
168     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
169     * 
170     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
171     * (possibly-relative) {@code URL's} in the HTML-{@code Vector} will be resolved.
172     * 
173     * @param quote A choice for the quotes to use.  In most cases, {@code URL} attribute
174     * <B STYLE="color: red;">values</B> do not contain quotation-marks.  So likely either
175     * choice would work just fine, without exceptions.
176     * 
177     * <BR /><BR /><DIV CLASS=JDHint>
178     * <B>null may be passed to this parameter</B>, and if it is, the original quotation marks
179     * found in the {@code TagNode's 'SRC'} attribute will be reused.  Passing null to this
180     * parameter should almost always be easiest, safest.
181     * </DIV>
182     * 
183     * @param askForReturnArraysOrReturnNull This (long-named) parameter is merely here to
184     * facilitate retrieving more information from this method - <I>if necessary</I>.  When this
185     * parameter receives the following values:
186     * 
187     * <BR /><BR /><UL CLASS=JDUL>
188     * 
189     * <LI> <B>TRUE:</B> Three integer {@code int[]} arrays will be returned as listed in the
190     *      <B>{@code Returns:}</B> section of this method's documentation.
191     *      </LI>
192     * 
193     * <LI><B>FALSE:</B> This method shall return null.</LI>
194     * </UL>
195     * 
196     * @return If input parameter {@code 'askForReturnArraysOrReturnNull'} has been passed 
197     * {@code FALSE}, this method shall return null.  Otherwise, (if passed {@code TRUE}), then
198     * this method shall return an instance of {@code 'Ret3<int[], int[], int[]>'} - which is
199     * <I>returning three separate integer-arrays about what was found, and what has occurred.</I>
200     *
201     * <BR /><BR />
202     * Three arrays are returned as a result of this method's invocation.  Keep in mind that
203     * though the information might be superfluous, rejecting these arrays away is easy.
204     * They are provided as a matter of convenience for cases where more details information is
205     * mandatory for ensuring that long lists of {@code HTMLNode's} were properly updated.
206     * 
207     * <BR /><BR /><OL CLASS=JDOL>
208     * 
209     * <LI> {@code Ret3.a (int[])}
210     *      <BR /><BR />
211     *      The first {@code int[] array} shall contain a list of the index of every
212     *      {@code TagNode} in the input-{@code Vector} parameter's range that <B><I>contained</B>
213     *      </I> a non-null HTML {@code 'SRC'} Attribute.
214     *      <BR /><BR />
215     *      </LI>
216     * 
217     * <LI> {@code Ret3.b (int[])}
218     *      <BR /><BR />
219     *      The second {@code int[] array} will contain an index-list of the indices
220     *      which contained {@code TagNode's} that were <B><I>replaced</I></B> by the
221     *      internal-resolve logic.
222     *      <BR /><BR />
223     *      </LI>
224     * 
225     * <LI> {@code Ret3.c (int[])}
226     *      <BR /><BR />
227     *      The third {@code int[] array} will contain an index-list of the indices
228     *      which contained {@code TagNode's} whose {@code 'SRC=...'} attribute
229     *      <I><B>failed</I></B> to be resolved by the internal-resolve logic, <I>or</I> caused a
230     *      {@code QuotesException} to throw.
231     *      </LI>
232     * 
233     * </OL>
234     * 
235     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
236     * @see #resolve(String, URL)
237     * @see TagNode#AV(String)
238     * @see TagNode#setAV(String, String, SD)
239     */
240    public static Ret3<int[], int[], int[]> resolveAllSRC(
241            Vector<? super TagNode> html, int sPos, int ePos, URL sourcePage, SD quote,
242            boolean askForReturnArraysOrReturnNull
243        )
244    {
245        // Retrieve the Vector-location of any TagNode on the page that has
246        // a "SRC=..." attribute.  These are almost always HTML <IMG> elements.
247        // NOTE: FIND Method's are "READ ONLY" - the Cast will make no difference at run-time.
248        //       The @SuppressWarnings is to overcome the cast of 'html'
249
250        @SuppressWarnings("unchecked")
251        int[] hasSrcPosArr = InnerTagFind.all((Vector<HTMLNode>) html, sPos, ePos, "src");
252
253
254        // Java Stream's are convenient for keeping "Growing Lists" of return values.
255        // This builder shall keep a list of all URL's that failed to update - for any reason
256        // **UNLESS** the reason is that the URL was already a fully-resolved, non-partial URL
257
258        IntStream.Builder failedUpdate = askForReturnArraysOrReturnNull
259            ? IntStream.builder() 
260            : null;
261
262
263        // This stream will keep a list of all URL's that were updated, and whose TagNode's
264        // were replaced inside the input HTML Vector
265
266        IntStream.Builder replaced = askForReturnArraysOrReturnNull
267            ? IntStream.builder()
268            : null;
269
270        for (int pos : hasSrcPosArr)
271        {
272            // Get the node at the index
273            TagNode tn = (TagNode) html.elementAt(pos);
274
275
276            // 1) Retrieve the SRC Attribute
277            // 2) if it is a partial-URL resolve it
278            // 3) Convert to a String
279
280            String  oldURL = tn.AV("src");
281            URL     newURL = resolve(oldURL, sourcePage);
282
283
284            // Some URL's cannot be resolved, if so, just skip this TagNode.
285            // Log the index to the stream (if requested), and continue.
286
287            if (newURL == null)
288            { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
289
290
291            // If the URL was already a fully-resolved-URL, continue - don't replace the TagNode;
292            // No logging needed here, the URL was *already* resolved...
293
294            if (oldURL.length() == newURL.toString().length()) continue;
295
296
297            // Replace the SRC Attribute in the TagNode.  This builds a new instance of TagNode
298            // If there is an exception, log the index to the stream (if requested), and continue.
299
300            try
301                { tn = tn.setAV("src", newURL.toString(), quote); }
302
303            catch (QuotesException qex)
304                { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
305
306            // Replace the index in the Vector containing the old TagNode with the new one.
307            html.setElementAt(tn , pos);
308
309
310            // The Vector-Index at this position had it's old TagNode removed and replaced with a
311            // new updated one.  Log this to the stream-list so to allow the user to know.
312
313            if (askForReturnArraysOrReturnNull) replaced.accept(pos);
314        }
315
316        return askForReturnArraysOrReturnNull
317
318            ? new Ret3<int[], int[], int[]>
319                (hasSrcPosArr, replaced.build().toArray(), failedUpdate.build().toArray())
320            : null;
321    }
322
323
324    // ********************************************************************************************
325    // ********************************************************************************************
326    // Complete Vector-Resolve Methods - HREF-ATTRIBUTE
327    // ********************************************************************************************
328    // ********************************************************************************************
329
330
331    /**
332     * Convenience Method.
333     * <BR />Invokes: {@link #resolveAllHREF(Vector, int, int, URL, SD, boolean)}
334     */
335    public static Ret3<int[], int[], int[]> resolveAllHREF(
336            Vector<? super TagNode> html, URL sourcePage, SD quote,
337            boolean askForReturnArraysOrReturnNull
338        )
339    { return resolveAllHREF(html, 0, -1, sourcePage, quote, askForReturnArraysOrReturnNull); }
340
341    /**
342     * Convenience Method.
343     * <BR />Accepts: {@code DotPair}.
344     * <BR />Invokes: {@link #resolveAllHREF(Vector, int, int, URL, SD, boolean)}
345     */
346    public static Ret3<int[], int[], int[]> resolveAllHREF(
347            Vector<? super TagNode> html, DotPair dp, URL sourcePage, SD quote,
348            boolean askForReturnArraysOrReturnNull
349        )
350    {
351        return resolveAllHREF
352            (html, dp.start, dp.end + 1, sourcePage, quote, askForReturnArraysOrReturnNull); 
353    }
354
355    /**
356     * This method shall resolve all partial {@code URL} addresses that are found within
357     * {@code TagNode} elements having {@code 'HREF=...'} attributes.  Each instance of
358     * {@code TagNode} found in the input HTML {@code Vector} that has an {@code 'HREF'}
359     * attribute - if the {@code 'URL'} is only partially resolve - shall be updated and replaced
360     * with a new {@code TagNode} with a fully resolved {@code URL}.
361     * 
362     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
363     * 
364     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
365     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
366     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
367     * 
368     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
369     * (possibly-relative) {@code URL's} in the HTML-{@code Vector} will be resolved.
370     * 
371     * @param quote A choice for the quotes to use.  In most cases, {@code URL} attribute
372     * <B STYLE="color: red;">values</B> do not contain quotation-marks.  So likely either
373     * choice would work just fine, without exceptions.
374     * 
375     * <BR /><BR /><DIV CLASS=JDHint>
376     * <B>null may be passed to this parameter</B>, and if it is the original quotation marks
377     * found in the {@code TagNode's 'HREF'} attribute will be reused.  Passing null to this
378     * parameter should almost always be easiest, safest.
379     * </DIV>
380     * 
381     * @param askForReturnArraysOrReturnNull This (long-named) parameter is merely here to
382     * facilitate retrieving more information from this method - <I>if necessary</I>.  When this
383     * parameter receives the following values:
384     * 
385     * <BR /><BR /><UL CLASS=JDUL>
386     * 
387     * <LI> <B>TRUE:</B> Three integer {@code int[]} arrays will be returned as listed in the
388     *      <B>{@code Returns:}</B> section of this method's documentation.
389     *      </LI>
390     * 
391     * <LI><B>FALSE:</B> This method shall return null. </LI>
392     * </UL>
393     * 
394     * @return If input parameter {@code 'askForReturnArraysOrReturnNull'} has been passed 
395     * {@code FALSE}, this method shall return null.  Otherwise, (if passed {@code TRUE}), then
396     * this method shall return an instance of {@code 'Ret3<int[], int[], int[]>'} - which is
397     * <I>returning three separate integer-arrays about what was found, and what has occurred.</I>
398     *
399     * <BR /><BR />
400     * Three arrays are returned as a result of this method's invocation.  Keep in mind that
401     * though the information might be superfluous, rejecting these arrays away is easy.
402     * They are provided as a matter of convenience for cases where more details information is
403     * mandatory for ensuring that long lists of {@code HTMLNode's} were properly updated.
404     * 
405     * <BR /><BR /><OL CLASS=JDOL>
406     * 
407     * <LI> {@code Ret3.a (int[])}
408     *      <BR /><BR />
409     *      The first {@code int[] array} shall contain a list of the index of every
410     *      {@code TagNode} in the input-{@code Vector} parameter's range that <B><I>contained</B>
411     *      </I> a non-null HTML {@code 'HREF'} Attribute.
412     *      <BR /><BR />
413     *      </LI>
414     * 
415     * <LI> {@code Ret3.b (int[])}
416     *      <BR /><BR />
417     *      The second {@code int[] array} will contain an index-list of the indices
418     *      which contained {@code TagNode's} that were <B><I>replaced</I></B> by the
419     *      internal-resolve logic.
420     *      <BR /><BR />
421     *      </LI>
422     * 
423     * <LI> {@code Ret3.c (int[])}
424     *      <BR /><BR />
425     *      The third {@code int[] array} will contain an index-list of the indices
426     *      which contained {@code TagNode's} whose {@code 'HREF=...'} attribute
427     *      <I><B>failed</I></B> to be resolved by the internal-resolve logic, <I>or</I> caused a
428     *      {@code QuotesException} to throw.
429     *      </LI>
430     * 
431     * </OL>
432     * 
433     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
434     * @see #resolve(String, URL)
435     * @see TagNode#AV(String)
436     * @see TagNode#setAV(String, String, SD)
437     */
438    public static Ret3<int[], int[], int[]> resolveAllHREF(
439            Vector<? super TagNode> html, int sPos, int ePos, URL sourcePage, SD quote,
440            boolean askForReturnArraysOrReturnNull
441        )
442    {
443        // Retrieve the Vector-location of any TagNode on the page that has
444        // a "HREF=..." attribute.  These are almost always HTML <IMG> elements.
445        // NOTE: FIND Method's are "READ ONLY" - the Cast will make no difference at run-time.
446        //       The @SuppressWarnings is to overcome the cast of 'html'
447
448        @SuppressWarnings("unchecked")
449        int[] hasHRefPosArr = InnerTagFind.all((Vector<HTMLNode>) html, sPos, ePos, "href");
450
451
452        // Java Stream's are convenient for keeping "Growing Lists" of return values.
453        // This builder shall keep a list of all URL's that failed to update - for any reason
454        // **UNLESS** the reason is that the URL was already a fully-resolved, non-partial URL
455
456        IntStream.Builder failedUpdate = askForReturnArraysOrReturnNull
457            ? IntStream.builder() 
458            : null;
459
460
461        // This stream will keep a list of all URL's that were updated, and whose TagNode's
462        // were replaced inside the input HTML Vector
463
464        IntStream.Builder replaced = askForReturnArraysOrReturnNull
465            ? IntStream.builder()
466            : null;
467
468        for (int pos : hasHRefPosArr)
469        {
470            // Get the node at the index
471            TagNode tn = (TagNode) html.elementAt(pos);
472
473
474            // 1) Retrieve the HREF Attribute
475            // 2) if it is a partial-URL resolve it
476            // 3) Convert to a String
477
478            String  oldURL = tn.AV("HREF");
479            URL     newURL = resolve(oldURL, sourcePage);
480
481
482            // Some URL's cannot be resolved, if so, just skip this TagNode.
483            // Log the index to the stream (if requested), and continue.
484
485            if (newURL == null)
486            { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
487
488
489            // If the URL was already a fully-resolved-URL, continue - don't replace the TagNode;
490            // No logging needed here, the URL was *already* resolved...
491
492            if (oldURL.length() == newURL.toString().length()) continue;
493
494
495            // Replace the HREF Attribute in the TagNode.  This builds a new instance of TagNode
496            // If there is an exception, log the index to the stream (if requested), and continue.
497
498            try
499                { tn = tn.setAV("href", newURL.toString(), quote); }
500
501            catch (QuotesException qex)
502                { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
503
504            // Replace the index in the Vector containing the old TagNode with the new one.
505            html.setElementAt(tn , pos);
506
507
508            // The Vector-Index at this position had it's old TagNode removed and replaced with a
509            // new updated one.  Log this to the stream-list so to allow the user to know.
510
511            if (askForReturnArraysOrReturnNull) replaced.accept(pos);
512        }
513
514        return askForReturnArraysOrReturnNull
515
516            ? new Ret3<int[], int[], int[]>
517                (hasHRefPosArr, replaced.build().toArray(), failedUpdate.build().toArray())
518            : null;
519    }
520
521
522    // ********************************************************************************************
523    // ********************************************************************************************
524    // Resolve, Not Keep Exceptions
525    // ********************************************************************************************
526    // ********************************************************************************************
527
528
529    /**
530     * Convenience Method.
531     * <BR />Invokes: {@link #resolveHREF(TagNode, URL)}.
532     * <BR />And-Then: {@link TagNode#setAV(String, String, SD)}
533     */
534    public static TagNode resolveHREFAndUpdate(TagNode tnWithHREF, URL sourcePage)
535    { 
536        URL url = resolveHREF(tnWithHREF, sourcePage);
537
538        return (url == null)
539            ? null
540            : tnWithHREF.setAV("href", url.toString(), null);
541    }
542
543
544    /**
545     * This should be used for {@code TagNode's} that contain an {@code 'HREF'} inner-tag
546     * (attribute).
547     * 
548     * @param tnWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_HREF>
549     * 
550     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode}
551     * (possibly-relative) {@code URL} will be resolved.
552     * 
553     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
554     * directory.  Null is returned if attempting to build the {@code URL} generated a
555     * {@code MalformedURLException}.
556     * 
557     * <BR /><BR /><DIV CLASS=JDHint>
558     * <B>SPECIFICALLY:</B> This method shall catch all {@code MalformedURLException's}.
559     * </DIV>
560     * 
561     * @throws HREFException If the {@code TagNode} passed to parameter {@code 'tnWithHREF'} does
562     * not actually contain an {@code HREF} attribute, then this exception shall throw.
563     * 
564     * @see #resolve(String, URL)
565     * @see TagNode#AV(String)
566     */
567    public static URL resolveHREF(TagNode tnWithHREF, URL sourcePage)
568    {
569        String href = tnWithHREF.AV("href");
570
571        if (href == null) throw new HREFException(
572            "The TagNode passed to parameter tnWithHREF does not actually contain an " +
573            "HREF attribute."
574        );
575
576        return resolve(href, sourcePage);
577    }
578
579
580    /**
581     * Convenience Method.
582     * <BR />Invokes: {@link #resolveSRC(TagNode, URL)} 
583     * <BR />And-Then: {@link TagNode#setAV(String, String, SD)}
584     */
585    public static TagNode resolveSRCAndUpdate(TagNode tnWithSRC, URL sourcePage)
586    { 
587        URL url = resolveSRC(tnWithSRC, sourcePage);
588
589        return (url == null) 
590            ? null 
591            : tnWithSRC.setAV("src", url.toString(), null);
592    }
593
594
595    /**
596     * This should be used for {@code TagNode's} that contain a {@code 'SRC'} inner-tag
597     * (attribute).
598     * 
599     * @param tnWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_SRC>
600     * 
601     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode}
602     * (possibly-relative) {@code URL} will be resolved.
603     * 
604     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
605     * directory.  Null is returned if attempting to build the {@code URL} generated a
606     * {@code MalformedURLException}.
607     * 
608     * <BR /><BR /><DIV CLASS=JDHint>
609     * <B>SPECIFICALLY:</B> This method shall catch all {@code MalformedURLException's}.
610     * </DIV>
611     * 
612     * @throws SRCException If the {@code TagNode} passed to parameter {@code 'tnWithSRC'} does not
613     * actually contain a {@code SRC} attribute, then this exception shall throw.
614     * 
615     * @see #resolve(String, URL)
616     * @see TagNode#AV(String)
617     */
618    public static URL resolveSRC(TagNode tnWithSRC, URL sourcePage)
619    {
620        String src = tnWithSRC.AV("src");
621
622        if (src == null) throw new SRCException(
623            "The TagNode passed to parameter tnWithSRC does not actually contain a " +
624            "SRC attribute."
625        );
626
627        return resolve(src, sourcePage);
628    }
629
630    /**
631     * This should be used for lists of {@code TagNode's}, each of which contain an {@code 'HREF'}
632     * inner-tag (attribute).
633     * 
634     * @param tnListWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_HREF>
635     * 
636     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
637     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
638     * 
639     * @return A list of {@code URL's}, each of which have been completed/resolved with the 
640     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
641     * result in a null value in the {@code Vector}.
642     * 
643     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
644     * 
645     * @see #resolve(String, URL)
646     * @see TagNode#AV(String)
647     */
648    public static Vector<URL> resolveHREFs(Iterable<TagNode> tnListWithHREF, URL sourcePage)
649    {
650        Vector<URL> ret = new Vector<>();
651
652        for (TagNode tn : tnListWithHREF) ret.addElement(resolve(tn.AV("href"), sourcePage));
653
654        return ret;
655    }
656
657
658    /**
659     * This should be used for lists of {@code TagNode's}, each of which contain a {@code 'SRC'}
660     * inner-tag (attribute).
661     * 
662     * @param tnListWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_SRC>
663     * 
664     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
665     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
666     * 
667     * @return A list of {@code URL's}, each of which have been completed/resolved with the
668     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
669     * result in a null value in the {@code Vector.}
670     * 
671     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
672     * 
673     * @see #resolve(String, URL)
674     * @see TagNode#AV(String)
675     */
676    public static Vector<URL> resolveSRCs(Iterable<TagNode> tnListWithSRC, URL sourcePage)
677    {
678        Vector<URL> ret = new Vector<>();
679
680        for (TagNode tn : tnListWithSRC) ret.addElement(resolve(tn.AV("src"), sourcePage));
681
682        return ret;
683    }
684
685
686    /**
687     * This will use a "pointer array" - an array containing indexes into the downloaded page to
688     * retrieve {@code TagNode's}.  The {@code TagNode's} to which this pointer-array points -
689     * must each contain an {@code HREF} inner-tag with a {@code URL}, or a partial {@code URL}.
690     * 
691     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
692     * 
693     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
694     * 
695     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
696     * reference {@code TagNode's} that contain {@code HREF} attributes.  Integer-pointer Arrays
697     * are usually returned from the {@code package 'NodeSearch'} "Find" methods.
698     *
699     * <DIV CLASS="EXAMPLE">{@code 
700     * // Retrieve 'pointers' to all the '<A HREF=...>' TagNode's.  The term 'pointer' refers to
701     * // integer-indices into the vectorized-html variable 'page'
702     * int[] anchorPosArr = TagNodeFind.all(page, TC.OpeningTags, "a");
703     * 
704     * // Extract each HREF inner-tag, and construct a {@code URL}.  Use the 'sourcePage' parameter
705     * // if the URL is only partially-resolved
706     * Vector<URL> urls = Links.resolveHREFs(page, anchorPosArr, mySourcePage);
707     * }</DIV>
708     * 
709     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
710     * {@code "<A ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
711     * {@code 'html'}, and then resolve any shortened {@code URL's}. 
712     *
713     * @param sourcePage This is the source page {@code URL} from whence the (possibly relative)
714     * {@code TagNode URL's} in the {@code Vector} are to be resolved.
715     *
716     * @return A list of {@code URL's}, each of which have been completed/resolved with the
717     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
718     * result in a null value in the {@code Vector}.  However, if any of the nodes pointed to by
719     * the {@code 'nodePosArr'} parameter do not contain opening {@code TagNode} elements, then
720     * this mistake shall generate {@code TagNodeExpectedException's}.
721     *
722     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
723     *
724     * @throws ArrayIndexOutOfBoundsException
725     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
726     * 
727     * @throws OpeningTagNodeExpectedException
728     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
729     * 
730     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
731     *
732     * @see #resolve(String, URL)
733     * @see TagNode#AV(String)
734     */
735    public static Vector<URL> resolveHREFs
736        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
737    {
738        // Return Vector
739        Vector<URL> ret = new Vector<>();
740
741        for (int nodePos : nodePosArr)
742        {
743            HTMLNode n = html.elementAt(nodePos);
744
745            // Must be an HTML TagNode
746            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
747
748            TagNode tn = (TagNode) n;
749
750            // Must be an "Opening" HTML TagNode
751            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
752
753            // Resolve the 'HREF', save the URL
754            ret.addElement(resolve(tn.AV("href"), sourcePage));
755        }
756
757        return ret;
758    }
759 
760
761    /**
762     * This will use a "pointer array" - an array containing indexes into the downloaded page to
763     * retrieve {@code TagNode's}.  The {@code TagNode's} to which this pointer-array points - must
764     * each contain a {@code SRC} inner-tag with a {@code URL}, or a partial {@code URL}.
765     * 
766     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
767     *
768     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
769     * 
770     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
771     * reference {@code TagNode's} that contain {@code SRC} attributes.  Integer-pointer Arrays are
772     * usually returned from the {@code package 'NodeSearch'} "Find" methods.
773     *
774     * <DIV CLASS="EXAMPLE">{@code 
775     * // Retrieve 'pointers' to all the '<IMG SRC=...>' TagNode's.  The term 'pointer' refers to
776     * // integer-indices into the vectorized-html variable 'page'
777     * 
778     * int[] picturePosArr = TagNodeFind.all(page, TC.OpeningTags, "img");
779     * 
780     * // Extract each SRC inner-tag, and construct a {@code URL}.  Use the 'sourcePage' parameter
781     * // if the URL is only partially-resolved
782     * 
783     * Vector<URL> urls = Links.resolveSRCs(page, picturePosArr, mySourcePage);
784     * }</DIV>
785     * 
786     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
787     * {@code "<IMG ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
788     * {@code 'html'}, and then resolve any shorted image {@code URL's}.
789     *
790     * @param sourcePage This is the source page {@code URL} from whence the (possibly relative)
791     * {@code TagNode URL's} in the {@code Vector} are to be resolved.
792     *
793     * @return A list of {@code URL's}, each of which have been completed/resolved with the
794     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
795     * result in a null value in the {@code Vector}.  However, if any of the nodes pointed to by
796     * the {@code 'nodePosArr'} parameter do not contain opening {@code TagNode} elements, then
797     * this mistake shall generate {@code TagNodeExpectedException's}.
798     *
799     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
800     *
801     * @throws ArrayIndexOutOfBoundsException
802     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
803     * 
804     * @throws OpeningTagNodeExpectedException
805     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
806     * 
807     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
808     *
809     * @see #resolve(String, URL)
810     * @see TagNode#AV(String)
811     */
812    public static Vector<URL> resolveSRCs
813        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
814    {
815        // Return Vector
816        Vector<URL> ret = new Vector<>();
817
818        for (int nodePos : nodePosArr)
819        {
820            HTMLNode n = html.elementAt(nodePos);
821
822            // Must be an HTML TagNode
823            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
824
825            TagNode tn = (TagNode) n;
826
827            // Must be an "Opening" HTML TagNode
828            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
829
830            // Resolve the "SRC", save the URL
831            ret.addElement(resolve(tn.AV("src"), sourcePage));
832        }
833
834        return ret;
835    }
836
837
838    /**
839     * This will convert <I><B>a list of </B></I> simple java {@code String's} to a
840     * list/{@code Vector} of {@code URL's}, de-referencing any missing information using the
841     * {@code 'sourcePage'} parameter.
842     * 
843     * @param src a list of strings - usually partially or totally completed Internet {@code URL's}
844     * 
845     * @param sourcePage This is the source page {@code URL} from which the {@code String's}
846     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
847     * 
848     * @return A list of {@code URL's}, each of which have been completed/resolved with the
849     * {@code 'sourcePage'} parameter.  If there were any {@code String's} that were zero-length or
850     * null,  then null is returned in the related {@code Vector} position.  If any
851     * {@code TagNode} causes a {@code MalformedURLException}, then that position in the
852     * {@code Vector} will be null.
853     * 
854     * @see #resolve(String, URL)
855     */
856    public static Vector<URL> resolve(Vector<String> src, URL sourcePage)
857    {
858        Vector<URL> ret = new Vector<>();
859
860        for (String s : src) ret.addElement(resolve(s, sourcePage));
861
862        return ret;
863    }
864
865    /**
866     * This will convert a simple java {@code String} to a {@code URL}, de-referencing any missing
867     * information using the {@code 'sourcePage'} parameter.
868     * 
869     * @param src Any java {@code String}, usually one which was scraped from an HTML-Page, and
870     * needs to be "completed."
871     * 
872     * @param sourcePage This is the source page {@code URL} from which the String
873     * (possibly-relative) {@code URL} will be resolved.
874     * 
875     * @return A {@code URL}, which has been completed/resolved with the {@code 'sourcePage'}
876     * parameter. If parameter {@code 'src'} is null or zero-length, then this method will also
877     * return null.  If a {@code MalformedURLException} is generated, null will also be returned.
878     */
879    public static URL resolve(String src, URL sourcePage)
880    {
881        if (sourcePage == null) throw new NullPointerException(
882            "Though you may provide null to the partial-URL to dereference parameter, null " +
883            "may not be passed to the Source-Page Parameter.  The purpose of the 'resolve' " +
884            "operation is to resolve partial-URLs against a source-page (root) URL. " +
885            "Therefore this is not allowed."
886        );
887
888        if (src == null) return null;
889
890        src = src.trim();
891
892        if (src.length() == 0) return null;
893
894        String srcLC = src.toLowerCase();
895
896        if (StrCmpr.startsWithXOR(srcLC, _NON_URL_HREFS)) return null;
897
898        if (srcLC.startsWith("http://") || srcLC.startsWith("https://"))
899
900            try
901                { return new URL(src); }
902
903            catch (MalformedURLException e) { return null; }
904
905        if (src.startsWith("//") && (src.charAt(3) != '/'))
906
907            try
908                { return new URL(sourcePage.getProtocol().toLowerCase() + ":" + src); }
909
910            catch (MalformedURLException e) { return null; }
911        
912        if (src.startsWith("/"))
913
914            try
915            { 
916                return new URL(
917                    sourcePage.getProtocol().toLowerCase() + "://" +
918                    sourcePage.getHost().toLowerCase() +
919                    src
920                );
921            }
922
923            catch (MalformedURLException e) { return null; }
924 
925        if (src.startsWith("../"))
926        {
927            String  sourcePageStr   = sourcePage.toString();
928            short   nLevels         = 0;
929
930            do      { nLevels++;  src = src.substring(3); }
931            while   (src.startsWith("../"));
932
933            String  directory = StringParse.dotDotParentDirectory(sourcePage.toString(), nLevels);
934
935            try     { return new URL(directory + src); }
936            catch   (Exception e) { return null; }
937        }
938
939        String  root =
940            sourcePage.getProtocol().toLowerCase() + "://" + 
941            sourcePage.getHost().toLowerCase();
942
943        String  path    = sourcePage.getPath().trim();
944        int     pos     = StringParse.findLastFrontSlashPos(path);
945
946        if (pos == -1) throw new StringIndexOutOfBoundsException(
947            "The URL you have provided: " + sourcePage.toString() + " does not have a '/' " +
948            "front-slash character in it's path.  Cannot proceed resolving relative-URL's " +
949            "without this."
950        );
951
952        path = path.substring(0, pos + 1);
953
954        try     { return new URL(root + path + src); }
955        catch   (MalformedURLException e) { return null; }
956    }
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978    // ********************************************************************************************
979    // ********************************************************************************************
980    // Resolve, KE - Keep Exceptions
981    // ********************************************************************************************
982    // ********************************************************************************************
983
984
985    /**
986     * This should be used for {@code TagNode's} that contain an {@code 'HREF'} inner-tag
987     * (attribute).
988     * 
989     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
990     * 
991     * @param tnWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_HREF>
992     * 
993     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
994     * (possibly-relative) {@code URL} will be resolved.
995     * 
996     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or 
997     * directory.  If there were no {@code HREF} tag, then null is returned.  If
998     * the {@code TagNode} causes a {@code MalformedURLException}, that is returned in
999     * {@code Ret2.b}
1000     * 
1001     * <BR /><BR /><DIV CLASS=JDHint>
1002     * <B>SPECIFICALLY:</B> This method shall catch all {@code MalformedURLException's}.
1003     * </DIV>
1004     * 
1005     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1006     * 
1007     * @throws HREFException If the {@code TagNode} passed to parameter {@code 'tnWithHREF'} does
1008     * not actually contain an {@code HREF} attribute, then this exception shall throw.
1009     * 
1010     * @see #resolve_KE(String, URL)
1011     * @see TagNode#AV(String)
1012     * @see Ret2
1013     */
1014    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1015    public static Ret2<URL, MalformedURLException> resolveHREF_KE
1016        (TagNode tnWithHREF, URL sourcePage)
1017    {
1018        String href = tnWithHREF.AV("href");
1019
1020        if (href == null) throw new HREFException(
1021            "The TagNode passed to parameter tnWithHREF does not actually contain an " +
1022            "HREF attribute."
1023        );
1024
1025        return LinksResolve_KE.resolve(href, sourcePage);
1026    }
1027
1028
1029    /**
1030     * This should be used for {@code TagNode's} that contain a {@code 'SRC'} inner-tag
1031     * (attribute).
1032     * 
1033     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1034     * 
1035     * @param tnWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_SRC>
1036     * 
1037     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1038     * (possibly-relative) {@code URL} will be resolved.
1039     * 
1040     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
1041     * directory.  If there were no {@code SRC} tag, then null is returned.  If the
1042     * {@code TagNode} causes a {@code MalformedURLException}, that is returned in {@code Ret2.b}
1043     * 
1044     * <BR /><BR /><DIV CLASS=JDHint>
1045     * <B>SPECIFICALLY:</B> This method shall catch all {@code MalformedURLException's}.
1046     * </DIV>
1047     * 
1048     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1049     * 
1050     * @throws SRCException If the {@code TagNode} passed to parameter {@code 'tnWithSRC'} does not
1051     * actually contain a {@code SRC} attribute, then this exception shall throw.
1052     * 
1053     * @see #resolve_KE(String, URL)
1054     * @see TagNode#AV(String)
1055     * @see Ret2
1056     */
1057    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1058    public static Ret2<URL, MalformedURLException> resolveSRC_KE
1059        (TagNode tnWithSRC, URL sourcePage)
1060    {
1061        String src = tnWithSRC.AV("src");
1062
1063        if (src == null) throw new SRCException(
1064            "The TagNode passed to parameter tnWithSRC does not actually contain a " +
1065            "SRC attribute."
1066        );
1067
1068        return LinksResolve_KE.resolve(src, sourcePage);
1069    }
1070
1071
1072    /**
1073     * This should be used for lists of {@code TagNode's}, each of which contain an {@code 'HREF'}
1074     * inner-tag (attribute).
1075     * 
1076     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1077     * 
1078     * @param tnListWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_HREF>
1079     * 
1080     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
1081     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
1082     * 
1083     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1084     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code HREF} tag,
1085     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1086     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1087     * exception in {@code Ret2.b}
1088     * 
1089     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
1090     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1091     * 
1092     * @see #resolve_KE(String, URL)
1093     * @see TagNode#AV(String)
1094     * @see Ret2
1095     */
1096    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1097    public static Vector<Ret2<URL, MalformedURLException>> resolveHREFs_KE
1098        (Iterable<TagNode> tnListWithHREF, URL sourcePage)
1099    {
1100        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1101
1102        for (TagNode tn : tnListWithHREF)
1103            ret.addElement(LinksResolve_KE.resolve(tn.AV("href"), sourcePage));
1104
1105        return ret;
1106    }
1107
1108
1109    /**
1110     * This should be used for lists of {@code TagNode's}, each of which contain a {@code 'SRC'}
1111     * inner-tag (attribute).
1112     * 
1113     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1114     * 
1115     * @param tnListWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_SRC>
1116     * 
1117     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1118     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
1119     * 
1120     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1121     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code SRC} tag,
1122     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1123     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1124     * exception in {@code Ret2.b}
1125     * 
1126     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
1127     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1128     * 
1129     * @see #resolve_KE(String, URL)
1130     * @see TagNode#AV(String)
1131     * @see Ret2
1132     */
1133    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1134    public static Vector<Ret2<URL, MalformedURLException>> resolveSRCs_KE
1135        (Iterable<TagNode> tnListWithSRC, URL sourcePage)
1136    {
1137        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1138
1139        for (TagNode tn : tnListWithSRC)
1140            ret.addElement(LinksResolve_KE.resolve(tn.AV("src"), sourcePage));
1141
1142        return ret;
1143    }
1144
1145
1146    /**
1147     * This will use a "pointer array" - an array containing indexes into the downloaded page to
1148     * retrieve {@code TagNode's}.  The {@code TagNode} to which this pointer-array points - must
1149     * contain {@code HREF} inner-tags with {@code URL's}, or partial {@code URL's}.
1150     * 
1151     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
1152     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1153     * 
1154     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
1155     * 
1156     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
1157     * reference {@code TagNode's} that contain {@code HREF} attributes.  Integer-pointer Arrays
1158     * are usually return from the {@code package 'NodeSearch'} "Find" methods.
1159     *
1160     * <DIV CLASS="EXAMPLE">{@code 
1161     * // Retrieve 'pointers' to all the '<A HREF=...>' TagNode's.  The term 'pointer' refers to
1162     * // integer-indices into the vectorized-html variable 'page'
1163     * 
1164     * int[] anchorPosArr = TagNodeFind.all(page, TC.OpeningTags, "a");
1165     * 
1166     * // Extract each HREF inner-tag, and construct a URL.  Use the 'sourcePage' parameter if
1167     * // the URL is only partially-resolved.  If any URL's on the original-page are invalid, the
1168     * // method shall not crash, but save the exception instead.
1169     * 
1170     * Vector<Ret2<URL, MalformedURLException> urlsWithEx =
1171     *     Links.resolveHREFs_KE(page, picturePosArr, mySourcePage);
1172     *
1173     * // Print out any "failed" urls
1174     * for (Ret2<URL, MalformedURLException> r : urlsWithEx)
1175     *     if (r.b != null) 
1176     *         System.out.println("There was an exception: " + r.b.toString());
1177     * }</DIV>
1178     *
1179     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
1180     * {@code "<A ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
1181     * {@code 'html'}., and then resolve any shortened {@code URL's}.
1182     *
1183     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1184     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1185     * 
1186     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1187     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code HREF} tag,
1188     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1189     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1190     * exception in {@code Ret2.b}
1191     *
1192     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
1193     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1194     *
1195     * @throws ArrayIndexOutOfBoundsException
1196     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1197     * 
1198     * @throws OpeningTagNodeExpectedException
1199     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1200     * 
1201     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1202     *
1203     * @see #resolve_KE(String, URL)
1204     * @see TagNode#AV(String)
1205     * @see Ret2
1206     */
1207    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1208    public static Vector<Ret2<URL, MalformedURLException>> resolveHREFs_KE
1209        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
1210    {
1211         // Return Vector
1212        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1213
1214        for (int nodePos : nodePosArr)
1215        {
1216            HTMLNode n = html.elementAt(nodePos);
1217
1218            // Must be an HTML TagNode
1219            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
1220
1221            TagNode tn = (TagNode) n;
1222
1223            // Must be an "Opening" HTML TagNode
1224            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
1225
1226            // Resolve the "HREF", keep the URL
1227            ret.addElement(LinksResolve_KE.resolve(tn.AV("href"), sourcePage));
1228        }
1229
1230        return ret;
1231    }
1232 
1233    /**
1234     * This will use a "pointer array" - an array containing indexes into the downloaded page to
1235     * retrieve {@code TagNode's}.  The {@code TagNode} to which this pointer-array points - must 
1236     * contain {@code SRC} inner-tags with {@code URL's}, or partial {@code URL's}.
1237     * 
1238     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
1239     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1240     *
1241     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
1242     * 
1243     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
1244     * reference {@code TagNode's} that contain {@code SRC} attributes.  Integer-pointer Arrays are
1245     * usually return from the {@code package 'NodeSearch'} "Find" methods.
1246     *
1247     * <DIV CLASS="EXAMPLE">{@code 
1248     * // Retrieve 'pointers' to all the '<IMG SRC=...>' TagNode's.  The term 'pointer' refers to
1249     * // integer-indices into the vectorized-html variable 'page'
1250     * 
1251     * int[] picturePosArr = TagNodeFind.all(page, TC.OpeningTags, "img");
1252     * 
1253     * // Extract each SRC inner-tag, and construct a URL.  Use the 'sourcePage' parameter if
1254     * // the URL is only partially-resolved.  If any URL's on the original-page are invalid,
1255     * // the method shall not crash, but save the exception instead.
1256     * 
1257     * Vector<Ret2<URL, MalformedURLException> urlsWithEx =
1258     *      Links.resolveSRCs_KE(page, picturePosArr, mySourcePage);
1259     *
1260     * // Print out any "failed" urls
1261     * for (Ret2<URL, MalformedURLException> r : urlsWithEx)
1262     *     if (r.b != null) 
1263     *         System.out.println("There was an exception: " + r.b.toString());
1264     * }</DIV>
1265     *
1266     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
1267     * {@code "<IMG ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
1268     * {@code 'html'}, and then resolve any shortened {@code URL's}.
1269     *
1270     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1271     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1272     *
1273     * @return A list of {@code URL's}, each of which have been completed/resolved with the 
1274     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code SRC} tag,
1275     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1276     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1277     * exception in {@code Ret2.b}
1278     *
1279     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
1280     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1281     *
1282     * @throws ArrayIndexOutOfBoundsException
1283     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1284     * 
1285     * @throws OpeningTagNodeExpectedException
1286     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1287     * 
1288     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1289     *
1290     * @see #resolve_KE(String, URL)
1291     * @see TagNode#AV(String)
1292     * @see Ret2
1293     */
1294    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1295    public static Vector<Ret2<URL, MalformedURLException>> resolveSRCs_KE
1296        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
1297    {
1298         // Return Vector
1299        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();                                         
1300
1301        for (int nodePos : nodePosArr)
1302        {
1303            HTMLNode n = html.elementAt(nodePos);
1304
1305            // Must be an HTML TagNode
1306            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
1307
1308            TagNode tn = (TagNode) n;
1309
1310            // Must be an "Opening" HTML TagNode
1311            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
1312
1313            // Resolve "SRC" and keep URL's
1314            ret.addElement(LinksResolve_KE.resolve(tn.AV("src"), sourcePage));
1315        }
1316
1317        return ret;
1318    }
1319
1320    /**
1321     * Resolve all {@code URL's}, represented as {@code String's}, inside of a {@code Vector}.
1322     * 
1323     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1324     * 
1325     * @param src a list of {@code String's} - usually partially or totally completed Internet
1326     * {@code URL's}
1327     * 
1328     * @param sourcePage This is the source page {@code URL} from which the {@code String's}
1329     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1330     * 
1331     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1332     * {@code 'sourcePage'} parameter.  If there were any {@code String's} that were zero-length or
1333     * null, then null is returned in the related {@code Vector} position.  If any {@code TagNode} 
1334     * causes a {@code MalformedURLException}, then that position in the {@code Vector} will
1335     * contain the exception in {@code Ret2.b}
1336     *
1337     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1338     * 
1339     * @see #resolve_KE(String, URL)
1340     * @see Ret2
1341     */
1342    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1343    public static Vector<Ret2<URL, MalformedURLException>> resolve_KE
1344        (Vector<String> src, URL sourcePage)
1345    {
1346        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1347
1348        for (String s : src)
1349            ret.addElement(LinksResolve_KE.resolve(s, sourcePage));
1350
1351        return ret;
1352    }
1353
1354    /**
1355     * This will convert a simple java {@code String} to a {@code URL}, de-referencing any missing
1356     * information using the {@code 'sourcePage'} parameter.
1357     * 
1358     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1359     * 
1360     * @param src Any java {@code String}, usually one which was scraped from an HTML-Page, and
1361     * needs to be "completed."
1362     * 
1363     * @param sourcePage This is the source page {@code URL} from which the String (possibly
1364     * relative) {@code URL} will be resolved.
1365     * 
1366     * @return A {@code URL}, which has been completed/resolved with the {@code 'sourcePage'}
1367     * parameter. If parameter {@code 'src'} is null or zero-length, null will be returned.  If a
1368     * {@code MalformedURLException} is thrown, that will be included with the {@code Ret2<>}
1369     * result.
1370     *
1371     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1372     * 
1373     * @see Ret2
1374     */
1375    @LinkJavaSource(handle="LinksResolve_KE", entity=METHOD, name="resolve")
1376    public static Ret2<URL, MalformedURLException> resolve_KE(String src, URL sourcePage)
1377    { return LinksResolve_KE.resolve(src, sourcePage); }
1378}