001package Torello.HTML; 002 003import java.util.*; 004import java.io.IOException; 005import java.util.stream.IntStream; 006 007import Torello.Java.*; 008import Torello.Java.Additional.RemoveUnsupportedIterator; 009 010 011/** 012 * A basic tool for finding Java-Script Listener Attributes in the {@link TagNode} elements in a 013 * Vectorized-HTML Web-Page. 014 * 015 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LISTENERS> 016 */ 017@Torello.JavaDoc.StaticFunctional 018public class Listeners 019{ 020 private Listeners() { } 021 022 @SuppressWarnings("unchecked") 023 private static final TreeSet<String> l = (TreeSet<String>) LFEC.readObjectFromFile_JAR 024 (Listeners.class, "data-files/Listeners.tsdat", true, TreeSet.class); 025 026 public static void main(String[] argv) 027 { 028 for (String s : l) System.out.print(s + ", "); 029 } 030 031 /** 032 * This will return an {@code Iterator} of the listed java-script listeners available in this 033 * class 034 */ 035 public static Iterator<String> listAllAvailable() 036 { return new RemoveUnsupportedIterator<String>(l.iterator()); } 037 038 /** 039 * This just allows the user to add a name of a new listener that was not already stored in the 040 * internal-set of known java-script listeners. When searching a page for listeners, this 041 * class will only (obviously) be able to find ones whose names are known. 042 * 043 * @param listenerName The name of a listener that is not already 'known-about' in by this 044 * class 045 * 046 * @return {@code TRUE} If the internal table of listener names was not already stored in the 047 * set, {@code FALSE} if attempting to add a listener that is already in the set. 048 */ 049 public static boolean addNewListenerName(String listenerName) 050 { return l.add(listenerName.toLowerCase()); } 051 052 /** 053 * This will test whether listeners are present in the {@code TagNode}, and if so - return 054 * them. 055 * 056 * <BR /><TABLE CLASS=JDBriefTable> 057 * <TR><TH>Input {@code TagNode}</TH><TH>Output Properties:</TH></TR> 058 * 059 * <TR><TD><CODE><frameset cols="20%,80%" title="Documentation frame" 060 * onload="top.loadFrames()"></CODE> 061 * </TD> 062 * <TD><CODE>onload: top.loadFrames()</CODE></TD> 063 * </TR> 064 * 065 * <TR><TD><CODE><a href="javascript:void(0);" onclick="return 066 * j2gb('http://www.gov.cn');"></CODE> 067 * </TD> 068 * 069 * <TD><CODE>onclick: return j2gb('http://www.gov.cn');</CODE></TD></TR> 070 * 071 * </TABLE> 072 * 073 * @param tn This may be any {@code TagNode}, but it will be tested for JavaScript listeners. 074 * 075 * @return Will return a {@code java.util.Properties} object that contains a key-value table of 076 * any/all listeners present in the {@code TagNode.} If there are no listeners, this method 077 * <I>will not return null</I>, it will return an <I>empty {@code Properties} object</I>. 078 * 079 * @see TagNode#AV(String) 080 * @see StrCmpr#containsIgnoreCase(String, String) 081 */ 082 public static Properties extract(TagNode tn) 083 { 084 Properties p = new Properties(); 085 String s; 086 087 for (String listener : l) 088 089 if (StrCmpr.containsIgnoreCase(tn.str, listener)) 090 091 if ((s = tn.AV(listener)) != null) 092 093 // This **may** seem redundant, but it is not, because what if it was phony? 094 // What if the "listener" key-word was actually buried in some "ALT=..." text? 095 // The initial "StrCmpr.contains..." an optimization 096 097 p.put(listener, s); 098 099 return p; 100 } 101 102 /** 103 * If you have performed a Java-Script Listener Get, this method will cycle through the list 104 * that was returned and generate <I><B>an identical length return {@code Properties[]}</B></I> 105 * array that has called {@code extract(tn)} for-each element in the parameter {@code 'list.'} 106 * 107 * @param list A list of {@code TagNode's} that are expected to contain Java-Script listeners. 108 * If some of the members of this input {@code Vector} have {@code TagNode's} with no 109 * listeners, the return array will <I>still remain a parallel (same-size) array</I>, 110 * however some of it's elements will have {@code Properties} with no key/value pairs in them 111 * (zero-size). 112 * 113 * @return A list of {@code Properties} for each element in this {@code 'list.'} 114 * 115 * @see #extract(TagNode) 116 */ 117 public static Properties[] extractAll(Vector<TagNode> list) 118 { 119 Properties[] ret = new Properties[list.size()]; 120 121 for (int i=0; i < list.size(); i++) ret[i] = extract(list.elementAt(i)); 122 123 return ret; 124 } 125 126 127 // ******************************************************************************************** 128 // ******************************************************************************************** 129 // FIND 130 // ******************************************************************************************** 131 // ******************************************************************************************** 132 133 134 /** 135 * Convenience Method. 136 * <BR />Invokes: {@link #find(Vector, int, int)} 137 */ 138 public static int[] find(Vector<? extends HTMLNode> html) 139 { return find(html, 0, -1); } 140 141 /** 142 * Convenience Method. 143 * <BR />Receives: {@code DotPair} 144 * <BR />Invokes: {@link #find(Vector, int, int)} 145 */ 146 public static int[] find(Vector<? extends HTMLNode> html, DotPair dp) 147 { return find(html, dp.start, dp.end + 1); } 148 149 /** 150 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 151 * the page to a sublist of that page, 152 * 153 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 154 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 155 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 156 * 157 * @return A list of index-pointers into the underlying parameter {@code 'html'} where each 158 * node pointed to by the list contains a {@code TagNode} element with a listener attribute / 159 * inner-tag. Search results shall be limited to only considering elements between 160 * {@code sPos ... ePos.} 161 * 162 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 163 * 164 * @see #hasListener(TagNode) 165 * @see LV 166 */ 167 public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos) 168 { 169 // Java Streams to keep lists of int's 170 IntStream.Builder b = IntStream.builder(); 171 LV l = new LV(html, sPos, ePos); 172 TagNode tn; 173 174 for (int i=l.start; i < l.end; i++) 175 176 // Only check Openening TagNode's, long enought to have attributes, and then only 177 // retain TagNode's that have a listener attribute. 178 179 if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) b.add(i); 180 181 return b.build().toArray(); 182 } 183 184 /** 185 * Convenience Method. 186 * <BR />Invokes: {@link #find(Vector, int, int, String[])} 187 */ 188 public static int[] find(Vector<? extends HTMLNode> html, String... htmlTags) 189 { return find(html, 0, -1, htmlTags); } 190 191 /** 192 * Convenience Method. 193 * <BR />Receives: {@code DotPair} 194 * <BR />Invokes: {@link #find(Vector, int, int, String[])} 195 */ 196 public static int[] find(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags) 197 { return find(html, dp.start, dp.end + 1, htmlTags); } 198 199 /** 200 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 201 * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only 202 * allow for matches where the HTML Element is among the list of elements in parameter 203 * {@code 'htmlTags'} 204 * 205 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 206 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 207 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 208 * 209 * @param htmlTags A list of HTML Elements, as a varargs {@code String...} Array, that 210 * constitute a match. Any HTML Element in the web-page that has a listener attribute, but 211 * whose HTML tag/token is not present in this list will not be considered a match, and will 212 * not be returned in this method's search results. 213 * 214 * @return A list of index-pointers into the underlying parameter {@code 'html'} where each 215 * node pointed to by the list contains a {@code TagNode} element with a listener attribute / 216 * inner-tag. Search results shall be limited to only considering elements between 217 * {@code sPos ... ePos,} <B><I>and also</I></B> limited to HTML Elements in parameter 218 * {@code 'htmlTags'} 219 * 220 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 221 * @see #HAS_TOK_MATCH(String, String[]) 222 * @see #hasListener(TagNode) 223 * @see LV 224 */ 225 public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags) 226 { 227 // Java Streams can keep lists of int's 228 IntStream.Builder b = IntStream.builder(); 229 LV l = new LV(html, sPos, ePos); 230 TagNode tn; 231 232 htmlTags = toLowerCase(htmlTags); 233 234 for (int i=l.start; i < l.end; i++) 235 236 if ( 237 // Only Match Opening-Tags with internal-string's long enough to contain Attributes 238 ((tn = html.elementAt(i).openTagPWA()) != null) 239 240 // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags' 241 && HAS_TOK_MATCH(tn.tok, htmlTags) 242 243 // Check whethr or not that the TagNode has a listener attribute (if yes, save it) 244 && hasListener(tn) 245 ) 246 // Save the array-index 247 b.add(i); 248 249 return b.build().toArray(); 250 } 251 252 253 // ******************************************************************************************** 254 // ******************************************************************************************** 255 // GET 256 // ******************************************************************************************** 257 // ******************************************************************************************** 258 259 260 /** 261 * Convenience Method. 262 * <BR />Invokes {@link #get(Vector, int, int)} 263 */ 264 public static Vector<TagNode> get(Vector<? extends HTMLNode> html) 265 { return get(html, 0, -1); } 266 267 /** 268 * Convenience Method. 269 * <BR />Receives: {@code DotPair} 270 * <BR />Invokes: {@link #get(Vector, int, int)} 271 */ 272 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp) 273 { return get(html, dp.start, dp.end + 1); } 274 275 /** 276 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 277 * the page to a sublist of that page, 278 * 279 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 280 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 281 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 282 * 283 * @return A list TagNode elements that have a listener attribute / inner-tag. Search results 284 * shall be limited to only considering elements between sPos ... ePos. 285 * 286 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 287 * @see #hasListener(TagNode) 288 * @see LV 289 */ 290 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, int sPos, int ePos) 291 { 292 Vector<TagNode> ret = new Vector<>(); 293 LV l = new LV(html, sPos, ePos); 294 TagNode tn; 295 296 for (int i=l.start; i < l.end; i++) 297 298 // Only check Openening TagNode's, long enought to have attributes, and then only 299 // retain TagNode's that have a listener attribute. If this TagNodes does have a 300 // listener, place it in the return vector. 301 302 if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) ret.add(tn); 303 304 return ret; 305 } 306 307 /** 308 * Convenience Method. 309 * <BR />Invokes: {@link #get(Vector, int, int, String[])} 310 */ 311 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, String... htmlTags) 312 { return get(html, 0, -1, htmlTags); } 313 314 /** Convenience Method. (Range-Limited Method) 315 * <BR />Receives: {@code DotPair} 316 * <BR />Invokes: {@link #get(Vector, int, int, String[])} 317 */ 318 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags) 319 { return get(html, dp.start, dp.end + 1, htmlTags); } 320 321 /** 322 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 323 * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only 324 * allow for matches where the HTML Element is among the list of elements in parameter 325 * {@code 'htmlTags'} 326 * 327 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 328 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 329 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 330 * 331 * @param htmlTags A list of HTML Elements, as a varargs {@code String} Array, that constitute 332 * a match. Any HTML Element in the web-page that has a listener attribute, but whose HTML 333 * tag/token is not present in this list will not be considered a match, and will not be 334 * returned in this method's search results. 335 * 336 * @return A list of TagNode elements that have a listener attribute / inner-tag. Search 337 * results shall be limited to only considering elements between sPos ... ePos, <B><I>and 338 * also</I></B> limited to HTML Elements in parameter {@code 'htmlTags'} 339 * 340 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 341 * @see #HAS_TOK_MATCH(String, String[]) 342 * @see #hasListener(TagNode) 343 * @see LV 344 */ 345 public static Vector<TagNode> get 346 (Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags) 347 { 348 Vector<TagNode> ret = new Vector<>(); 349 LV l = new LV(html, sPos, ePos); 350 TagNode tn; 351 352 htmlTags = toLowerCase(htmlTags); 353 354 for (int i=l.start; i < l.end; i++) 355 356 if ( 357 // Only Match Opening-Tags with internal-string's long enough to contain Attributes 358 ((tn = html.elementAt(i).openTagPWA()) != null) 359 360 // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags' 361 && HAS_TOK_MATCH(tn.tok, htmlTags) 362 363 // Check whethr or not that the TagNode has a listener attribute (if yes, save it) 364 && hasListener(tn) 365 ) 366 367 // All requirements have been affirmed, save this node in the return vector. 368 ret.add(tn); 369 370 return ret; 371 } 372 373 374 // ******************************************************************************************** 375 // ******************************************************************************************** 376 // Helpers 377 // ******************************************************************************************** 378 // ******************************************************************************************** 379 380 381 /** 382 * Checks if a certain {@code class TagNode} has a listener inner-tag / attribute. 383 * @param tn Any HTML Element {@code TagNode} 384 * @return {@code TRUE} If this {@code TagNode} has a listener, and {@code FALSE} otherwise. 385 * @see StrCmpr#containsIgnoreCase(String, String) 386 */ 387 public static boolean hasListener(TagNode tn) 388 { 389 Properties p = new Properties(); 390 391 for (String listener : l) 392 393 // This is a simple string-comparison - with no reg-ex involved 394 if (StrCmpr.containsIgnoreCase(tn.str, listener)) 395 396 // Slightly slower, uses a - TagNode.AV(attribute) uses a Regular-Expression 397 if (tn.AV(listener) != null) 398 399 // This **may** seem redundant, but it is not, because what if it was phony? 400 // What if the "listener" key-word was actually buried in some "ALT=..." text? 401 402 return true; 403 404 return false; 405 } 406 407 /** 408 * Converts the varargs parameter to lower-case {@code Strings.} 409 * 410 * <BR /><BR />Note that this is <I><B>{@code "Varargs Safe"}</B></I>, 411 * because a new {@code String}-Array is created that has new {@code String}-pointers. 412 * 413 * @param tags The varargs {@code String} parameter acquired from the search-methods in this 414 * class. 415 * 416 * @return a lower-case version of the input. 417 */ 418 protected static String[] toLowerCase(String[] tags) 419 { 420 String[] ret = new String[tags.length]; 421 422 for (int i=0; i < tags.length; i++) 423 424 if (tags[i] != null) ret[i] = tags[i].toLowerCase(); 425 426 else throw new HTMLTokException( 427 "One of the HTML tokens you have passed to the variable-length parameter " + 428 "'htmlTags' was null." 429 ); 430 431 return ret; 432 } 433 434 /** 435 * Checks if the var-args parameter {@code String... htmlTags} matches a particular token 436 * 437 * @param htmlTag The token to be checked against the user's requested {@code 'htmlTags'} list 438 * parameter 439 * 440 * @param htmlTags The list of acceptable HTML Tag Elements. This is a search specification 441 * parameter used by some of the search-methods in this class. 442 * 443 * @return {@code TRUE} If the tested token parameter {@code 'htmlTag'} is a member of this 444 * elements in list parameter {@code 'htmlTags'}, and {@code FALSE} otherwise. 445 */ 446 protected static boolean HAS_TOK_MATCH(String htmlTag, String... htmlTags) 447 { for (String s : htmlTags) if (s.equals(htmlTag)) return true; return false; } 448}