001package Torello.HTML;
002
003import java.util.*;
004import java.io.IOException;
005import java.util.stream.IntStream;
006
007import Torello.Java.*;
008import Torello.Java.Additional.RemoveUnsupportedIterator;
009
010
011/**
012 * A basic tool for finding Java-Script Listener Attributes in the {@link TagNode} elements in a
013 * Vectorized-HTML Web-Page.
014 * 
015 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LISTENERS>
016 */
017@Torello.JavaDoc.StaticFunctional
018public class Listeners
019{
020    private Listeners() { }
021
022    @SuppressWarnings("unchecked")
023    private static final TreeSet<String> l = (TreeSet<String>) LFEC.readObjectFromFile_JAR
024        (Listeners.class, "data-files/Listeners.tsdat", true, TreeSet.class);
025
026    public static void main(String[] argv)
027    {
028        for (String s : l) System.out.print(s + ", ");
029    }
030    
031    /**
032     * This will return an {@code Iterator} of the listed java-script listeners available in this
033     * class
034     */
035    public static Iterator<String> listAllAvailable()
036    { return new RemoveUnsupportedIterator<String>(l.iterator()); }
037
038    /**
039     * This just allows the user to add a name of a new listener that was not already stored in the
040     * internal-set of known java-script listeners.  When searching a page for listeners, this
041     * class will only (obviously) be able to find ones whose names are known.
042     * 
043     * @param listenerName The name of a listener that is not already 'known-about' in by this
044     * class
045     * 
046     * @return {@code TRUE} If the internal table of listener names was not already stored in the
047     * set, {@code FALSE} if attempting to add a listener that is already in the set.
048     */
049    public static boolean addNewListenerName(String listenerName)
050    { return l.add(listenerName.toLowerCase()); }
051
052    /**
053     * This will test whether listeners are present in the {@code TagNode}, and if so - return
054     * them.
055     * 
056     * <BR /><TABLE CLASS=JDBriefTable>
057     * <TR><TH>Input {@code TagNode}</TH><TH>Output Properties:</TH></TR>
058     * 
059     * <TR><TD><CODE>&lt;frameset cols="20%,80%" title="Documentation frame"
060     *      onload="top.loadFrames()"&gt;</CODE>
061     *      </TD>
062     *      <TD><CODE>onload: top.loadFrames()</CODE></TD>
063     *      </TR>
064     * 
065     * <TR><TD><CODE>&lt;a href="javascript:void(0);" onclick="return
066     *      j2gb('http://www.gov.cn');"&gt;</CODE>
067     *      </TD>
068     * 
069     *      <TD><CODE>onclick:  return j2gb('http://www.gov.cn');</CODE></TD></TR>
070     * 
071     * </TABLE>
072     * 
073     * @param tn This may be any {@code TagNode}, but it will be tested for JavaScript listeners.
074     * 
075     * @return Will return a {@code java.util.Properties} object that contains a key-value table of
076     * any/all listeners present in the {@code TagNode.}  If there are no listeners, this method
077     * <I>will not return null</I>, it will return an <I>empty {@code Properties} object</I>.
078     * 
079     * @see TagNode#AV(String)
080     * @see StrCmpr#containsIgnoreCase(String, String)
081     */
082    public static Properties extract(TagNode tn)
083    {
084        Properties  p = new Properties();
085        String      s;
086
087        for (String listener : l)
088
089            if (StrCmpr.containsIgnoreCase(tn.str, listener))
090
091                if ((s = tn.AV(listener)) != null) 
092
093                    // This **may** seem redundant, but it is not, because what if it was phony?
094                    // What if the "listener" key-word was actually buried in some "ALT=..." text?
095                    // The initial "StrCmpr.contains..." an optimization
096
097                    p.put(listener, s);
098
099        return p;
100    }
101
102    /**
103     * If you have performed a Java-Script Listener Get, this method will cycle through the list
104     * that was returned and generate <I><B>an identical length return {@code Properties[]}</B></I>
105     * array that has called {@code extract(tn)} for-each element in the parameter {@code 'list.'}
106     * 
107     * @param list A list of {@code TagNode's} that are expected to contain Java-Script listeners.
108     * If some of the members of this input {@code Vector} have {@code TagNode's} with no
109     * listeners, the return array will <I>still remain a parallel (same-size) array</I>,
110     * however some of it's elements will have {@code Properties} with no key/value pairs in them
111     * (zero-size).
112     * 
113     * @return A list of {@code Properties} for each element in this {@code 'list.'}
114     * 
115     * @see #extract(TagNode)
116     */
117    public static Properties[] extractAll(Vector<TagNode> list)
118    {
119        Properties[] ret = new Properties[list.size()];
120
121        for (int i=0; i < list.size(); i++) ret[i] = extract(list.elementAt(i));
122
123        return ret;
124    }
125
126
127    // ********************************************************************************************
128    // ********************************************************************************************
129    // FIND
130    // ********************************************************************************************
131    // ********************************************************************************************
132
133
134    /** 
135     * Convenience Method.
136     * <BR />Invokes: {@link #find(Vector, int, int)}
137     */
138    public static int[] find(Vector<? extends HTMLNode> html)
139    { return find(html, 0, -1); }
140
141    /**
142     * Convenience Method.
143     * <BR />Receives: {@code DotPair}
144     * <BR />Invokes: {@link #find(Vector, int, int)}
145     */
146    public static int[] find(Vector<? extends HTMLNode> html, DotPair dp)
147    { return find(html, dp.start, dp.end + 1); }
148
149    /**
150     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
151     * the page to a sublist of that page, 
152     * 
153     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
154     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
155     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
156     * 
157     * @return A list of index-pointers into the underlying parameter {@code 'html'} where each
158     * node pointed to by the list contains a {@code TagNode} element with a listener attribute /
159     * inner-tag. Search results shall be limited to only considering elements between
160     * {@code sPos ... ePos.}
161     * 
162     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
163     * 
164     * @see #hasListener(TagNode)
165     * @see LV
166     */
167    public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos)
168    {
169        // Java Streams to keep lists of int's
170        IntStream.Builder   b = IntStream.builder();
171        LV                  l = new LV(html, sPos, ePos);
172        TagNode             tn;
173
174        for (int i=l.start; i < l.end; i++)
175
176            // Only check Openening TagNode's, long enought to have attributes, and then only
177            // retain TagNode's that have a listener attribute.
178
179            if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) b.add(i);
180
181        return b.build().toArray();
182    }
183
184    /**
185     * Convenience Method.
186     * <BR />Invokes: {@link #find(Vector, int, int, String[])}
187     */
188    public static int[] find(Vector<? extends HTMLNode> html, String... htmlTags)
189    { return find(html, 0, -1, htmlTags); }
190
191    /**
192     * Convenience Method.
193     * <BR />Receives: {@code DotPair}
194     * <BR />Invokes: {@link #find(Vector, int, int, String[])}
195     */
196    public static int[] find(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags)
197    { return find(html, dp.start, dp.end + 1, htmlTags); }
198
199    /**
200     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
201     * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only
202     * allow for matches where the HTML Element is among the list of elements in parameter 
203     * {@code 'htmlTags'}
204     * 
205     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
206     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
207     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
208     * 
209     * @param htmlTags A list of HTML Elements, as a varargs {@code String...} Array, that
210     * constitute a match.  Any HTML Element in the web-page that has a listener attribute, but
211     * whose HTML tag/token is not present in this list will not be considered a match, and will
212     * not be returned in this method's search results.
213     * 
214     * @return A list of index-pointers into the underlying parameter {@code 'html'} where each
215     * node pointed to by the list contains a {@code TagNode} element with a listener attribute /
216     * inner-tag. Search results shall be limited to only considering elements between
217     * {@code sPos ... ePos,} <B><I>and also</I></B> limited to HTML Elements in parameter
218     * {@code 'htmlTags'}
219     * 
220     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
221     * @see #HAS_TOK_MATCH(String, String[])
222     * @see #hasListener(TagNode)
223     * @see LV
224     */
225    public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags)
226    {
227        // Java Streams can keep lists of int's
228        IntStream.Builder   b = IntStream.builder();
229        LV                  l = new LV(html, sPos, ePos);   
230        TagNode             tn;
231
232        htmlTags = toLowerCase(htmlTags);
233
234        for (int i=l.start; i < l.end; i++)
235
236            if (
237                // Only Match Opening-Tags with internal-string's long enough to contain Attributes
238                ((tn = html.elementAt(i).openTagPWA()) != null)
239
240                // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags'
241                &&  HAS_TOK_MATCH(tn.tok, htmlTags)
242
243                // Check whethr or not that the TagNode has a listener attribute (if yes, save it)
244                &&  hasListener(tn)
245            )
246                // Save the array-index
247                b.add(i);
248
249        return b.build().toArray();
250    }
251
252
253    // ********************************************************************************************
254    // ********************************************************************************************
255    // GET
256    // ********************************************************************************************
257    // ********************************************************************************************
258
259
260    /**
261     * Convenience Method.
262     * <BR />Invokes {@link #get(Vector, int, int)}
263     */
264    public static Vector<TagNode> get(Vector<? extends HTMLNode> html)
265    { return get(html, 0, -1); }
266
267    /**
268     * Convenience Method.
269     * <BR />Receives: {@code DotPair}
270     * <BR />Invokes: {@link #get(Vector, int, int)}
271     */
272    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp)
273    { return get(html, dp.start, dp.end + 1); }
274
275    /**
276     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
277     * the page to a sublist of that page, 
278     * 
279     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
280     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
281     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
282     * 
283     * @return A list TagNode elements that have a listener attribute / inner-tag.  Search results
284     * shall be limited to only considering elements between sPos ... ePos.
285     * 
286     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
287     * @see #hasListener(TagNode)
288     * @see LV
289     */
290    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, int sPos, int ePos)
291    {
292        Vector<TagNode> ret = new Vector<>();
293        LV              l   = new LV(html, sPos, ePos);
294        TagNode         tn;
295
296        for (int i=l.start; i < l.end; i++)
297
298            // Only check Openening TagNode's, long enought to have attributes, and then only
299            // retain TagNode's that have a listener attribute.  If this TagNodes does have a 
300            // listener, place it in the return vector.
301
302            if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) ret.add(tn);
303
304        return ret;
305    }
306
307    /**
308     * Convenience Method.
309     * <BR />Invokes: {@link #get(Vector, int, int, String[])}
310     */
311    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, String... htmlTags)
312    { return get(html, 0, -1, htmlTags); }
313
314    /** Convenience Method.  (Range-Limited Method)
315     * <BR />Receives: {@code DotPair}
316     * <BR />Invokes: {@link #get(Vector, int, int, String[])}
317     */
318    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags)
319    { return get(html, dp.start, dp.end + 1, htmlTags); }
320
321    /**
322     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
323     * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only
324     * allow for matches where the HTML Element is among the list of elements in parameter
325     * {@code 'htmlTags'}
326     * 
327     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
328     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
329     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
330     * 
331     * @param htmlTags A list of HTML Elements, as a varargs {@code String} Array, that constitute
332     * a match.  Any HTML Element in the web-page that has a listener attribute, but whose HTML
333     * tag/token is not present in this list will not be considered a match, and will not be
334     * returned in this method's search results.
335     * 
336     * @return A list of TagNode elements that have a listener attribute / inner-tag.  Search
337     * results shall be limited to only considering elements between sPos ... ePos, <B><I>and
338     * also</I></B> limited to HTML Elements in parameter {@code 'htmlTags'}
339     * 
340     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
341     * @see #HAS_TOK_MATCH(String, String[])
342     * @see #hasListener(TagNode)
343     * @see LV
344     */
345    public static Vector<TagNode> get
346        (Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags)
347    {
348        Vector<TagNode> ret = new Vector<>();
349        LV              l   = new LV(html, sPos, ePos);
350        TagNode         tn;
351
352        htmlTags = toLowerCase(htmlTags);
353
354        for (int i=l.start; i < l.end; i++)
355
356            if (
357                // Only Match Opening-Tags with internal-string's long enough to contain Attributes
358                ((tn = html.elementAt(i).openTagPWA()) != null)
359
360                // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags'
361                &&  HAS_TOK_MATCH(tn.tok, htmlTags)
362
363                // Check whethr or not that the TagNode has a listener attribute (if yes, save it)
364                &&  hasListener(tn)
365            )
366
367                // All requirements have been affirmed, save this node in the return vector.
368                ret.add(tn);
369
370        return ret;
371    }
372
373
374    // ********************************************************************************************
375    // ********************************************************************************************
376    // Helpers
377    // ********************************************************************************************
378    // ********************************************************************************************
379
380
381    /**
382     * Checks if a certain {@code class TagNode} has a listener inner-tag / attribute.
383     * @param tn Any HTML Element {@code TagNode}
384     * @return {@code TRUE} If this {@code TagNode} has a listener, and {@code FALSE} otherwise.
385     * @see StrCmpr#containsIgnoreCase(String, String)
386     */
387    public static boolean hasListener(TagNode tn)
388    {
389        Properties p = new Properties();
390
391        for (String listener : l)
392
393            // This is a simple string-comparison - with no reg-ex involved
394            if (StrCmpr.containsIgnoreCase(tn.str, listener))
395
396                // Slightly slower, uses a - TagNode.AV(attribute) uses a Regular-Expression
397                if (tn.AV(listener) != null)
398
399                    // This **may** seem redundant, but it is not, because what if it was phony?
400                    // What if the "listener" key-word was actually buried in some "ALT=..." text?
401
402                    return true;
403
404        return false;
405    }
406
407    /**
408     * Converts the varargs parameter to lower-case {@code Strings.}
409     * 
410     * <BR /><BR />Note that this is <I><B>{@code "Varargs Safe"}</B></I>,
411     * because a new {@code String}-Array is created that has new {@code String}-pointers.
412     * 
413     * @param tags The varargs {@code String} parameter acquired from the search-methods in this
414     * class.
415     * 
416     * @return a lower-case version of the input.
417     */
418    protected static String[] toLowerCase(String[] tags)
419    {
420        String[] ret = new String[tags.length];
421
422        for (int i=0; i < tags.length; i++)
423
424            if (tags[i] != null) ret[i] = tags[i].toLowerCase();
425
426            else throw new HTMLTokException(
427                "One of the HTML tokens you have passed to the variable-length parameter " +
428                "'htmlTags' was null."
429            );
430
431        return ret;
432    }
433
434    /**
435     * Checks if the var-args parameter {@code String... htmlTags} matches a particular token
436     * 
437     * @param htmlTag The token to be checked against the user's requested {@code 'htmlTags'} list
438     * parameter
439     * 
440     * @param htmlTags The list of acceptable HTML Tag Elements.  This is a search specification
441     * parameter used by some of the search-methods in this class.
442     * 
443     * @return {@code TRUE} If the tested token parameter {@code 'htmlTag'} is a member of this
444     * elements in list parameter {@code 'htmlTags'}, and {@code FALSE} otherwise.
445     */
446    protected static boolean HAS_TOK_MATCH(String htmlTag, String... htmlTags)
447    { for (String s : htmlTags) if (s.equals(htmlTag)) return true; return false; }
448}