001package Torello.HTML; 002 003import java.util.*; 004import java.util.stream.*; 005 006import Torello.HTML.NodeSearch.*; 007import Torello.Java.*; 008 009import Torello.Java.Additional.Ret2; 010import Torello.HTML.Tools.Images.IF; 011 012/** 013 * Tools to retrieve and insert tags into the {@code <HEAD>} of a web-page. 014 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES> 015 */ 016@Torello.JavaDoc.StaticFunctional 017public class Features 018{ 019 private Features() { } 020 021 /** Error Message that is used repeatedly. */ 022 public static final String NO_HEADER_MESSAGE = 023 "You are attempting to insert an HTML INSERT-STR, but such an element belongs in the " + 024 "page's header. Unfortunately, the page or sub-page you have passed does not have a " + 025 "<HEAD>...</HEAD> sub-section. Therefore, there is no place to insert the elements."; 026 027 /** 028 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 029 * {@code <HEAD> ... </HEAD>}</B> section to add a "logo-image" at the top-left corner of the 030 * Web-Browser's tab for the page when it loads. This logo is called a {@code 'favicon'}. 031 * 032 * @see #insertFavicon(Vector, String) 033 * @see #hasFavicon(Vector) 034 */ 035 public static final String favicon = 036 "<LINK REL='icon' TYPE='image/INSERT-IMAGE-TYPE-HERE' HREF='INSERT-URL-STRING-HERE' />"; 037 038 /** 039 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 040 * {@code <HEAD> ... </HEAD>}</B> section to add a <B>Cascading Style Sheet</B> (a 041 * {@code '.css'} file) to your page. 042 * 043 * <BR /><BR />The web-browser that ultimately loads the HTML that you are exporting will 044 * render the style elements across all the HTML elements in your page that match their 045 * respective CSS-Selectors. Without going into a big diatribe about how CSS works, just know 046 * that the {@code String} used to build / instantiate a new {@link TagNode} with an externally 047 * linked {@code CSS}-Page is provided here, by this field. 048 * 049 * @see #insertCSSLink(Vector, String) 050 * @see #getAllCSSLinks(Vector) 051 */ 052 public static final String cssExternalSheet = 053 "<LINK REL=stylesheet TYPE='text/css' HREF='INSERT-URL-STRING-HERE' />"; 054 055 /** 056 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 057 * {@code <HEAD> ... </HEAD>}</B> section to add a <B>Cascading Style Sheet</B> (a 058 * {@code '.css'} file) to your page. This particular {@code String}-Constant Field includes / 059 * allows for a {@code MEDIA}-Attribute / Inner-Tag. 060 * 061 * @see #insertCSSLink(Vector, String) 062 * @see #insertCSSLink(Vector, String, String) 063 * @see #getAllCSSLinks(Vector) 064 */ 065 public static final String cssExternalSheetWithMediaAttribute = 066 "<LINK REL=stylesheet TYPE='text/css' HREF='INSERT-URL-STRING-HERE' " + 067 "MEDIA='INSERT-MEDIA-ATTRIBUTE-VALUE-HERE' />"; 068 069 /** 070 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 071 * {@code <HEAD> ... </HEAD>}</B> section to add an externally-linked 072 * <B>Java-Script File</B> ({@code '.js'} File) to your page. 073 * 074 * <BR /><BR />The Web-Browser will download this <B>Java-Script</B> page from the 075 * {@code URL} that you ultimately provide and (hopefully) load all your variable definitions 076 * and methods when the page loads. 077 * 078 * <BR /><BR /><B CLASS=JDDescLabel>Closing {@code </SCRIPT>} Tag:</B> 079 * 080 * <BR />Inserting an external <B>Java-Script</B> Page has one important difference vis-a-vis 081 * inserting an external CSS-Page. Inserting a link to a {@code '.js'} page requires 082 * <B><I>both</I></B> the opening 083 * <B STYLE='color: red;'>{@code <SCRIPT ..>}</B> <B><I>and</I></B> the closing 084 * <B STYLE='color: red;'>{@code </SCRIPT>}</B> 085 * Tags. 086 * 087 * <BR /><BR />This is expected and required even-when / especially-when there is no actual 088 * java-script code being placed on the {@code '.html'} page itself. Effectively, regardless 089 * of whether you are putting actual java-script code into / inside your HTML page, or you are 090 * just inserting a link to a {@code '.js'} File on your server - <I>you must always create 091 * both the open and the closed HTML 092 * <B STYLE='color: red;'>{@code <SCRIPT SRC='...'></SCRIPT>}</B> tags and insert them into 093 * your Vectorized-HTML Web-Page</I>. 094 * 095 * <BR /><BR />In the brief example below, it should be clear that even though the 096 * {@code SCRIPT}-Tags do not enclose any <B>Java-Script</B>, both the open and the closed 097 * versions of the tag are placed into the HTML-File. 098 * 099 * <DIV CLASS="HTML">{@code 100 * <!-- This is a short note about including the HTML SCRIPT element in your web-pages. --> 101 * <HTML> 102 * <HEAD> 103 * <!-- Version #1 Inserting a java-script 'variables & functions' external-page --> 104 * <SCRIPT TYPE='text/javascript' SRC='/script/javaScriptFiles/functions.js'> 105 * </SCRIPT> 106 * <!-- Right here (line above) we always need the closing Script-tag, even when there is no 107 * actual java-script present, and the methods/variables are going to be downloaded from 108 * the java-script file identified in by the SRC="..." attribute! --> 109 * 110 * <SCRIPT TYPE='text/javascript'> 111 * var someVar1; 112 * var someVar2; 113 * 114 * function someFunction() 115 * { return; } 116 * 117 * </SCRIPT> <!-- Either way, the closing-script tag is expected. --> 118 * }</DIV> 119 * 120 * @see #insertExternalJavaScriptLink(Vector, String) 121 * @see #getAllExternalJSLinks(Vector) 122 */ 123 public static final String javaScriptExternalPage = 124 "<SCRIPT TYPE='text/javascript' SRC='INSERT-URL-STRING-HERE'>"; 125 126 /** 127 * If you have pages on your site that are almost identical, then you may need to inform search 128 * engines which one to prioritize. Or you might have syndicated content on your site which was 129 * republished elsewhere. You can do both of these things without incurring a duplicate content 130 * penalty – as long as you use a {@code CANONICAL}-Tag. 131 * 132 * <BR /><BR />Instead of confusing Google and missing your ranking on the SERP's, you are 133 * guiding the crawlers as to which URL counts as the “main” one. This places the emphasis on 134 * the right URL and prevents the others from cannibalizing your SEO. 135 * 136 * <BR /><BR />Use {@code CANONICAL}-Tags to avoid having problems with duplicate content that 137 * may affect your rankings. 138 * 139 * <BR /><BR /><HR><BR /> 140 * 141 * The content of this Documentation Page was copied from a page on the web-domain 142 * {@code 'http://searchenginewatch.com'}. It was lifted on May 24th, 2019. 143 * 144 * <BR /><BR />See link below, if still valid: 145 * 146 * <BR /><A 147 * HREF="https://searchenginewatch.com/2018/04/04/a-quick-and-easy-guide-to-meta-tags-in-seo/"> 148 * https://searchenginewatch.com/2018/04/04/a-quick-and-easy-guide-to-meta-tags-in-seo/ </A> 149 * 150 * @see #insertCanonicalURL(Vector, String) 151 * @see #hasCanonicalURL(Vector) 152 */ 153 public static final String canonicalTag = 154 "<LINK REL=canonical HREF='INSERT-URL-STRING-HERE' />"; 155 156 /** This is a new-line {@code HTMLNode} */ 157 protected static final TextNode NEWLINE = new TextNode("\n"); 158 159 /** 160 * This method checks whether the {@code String}-Parameter {@code 's'} contains a 161 * Single-Quotations Punctuation-Mark anywhere inside that {@code String}. If so, a properly 162 * formatted exception is thrown. This is used as an internal Helper-Method. 163 * 164 * @param s This may be any Java {@code String}, but generally it is one used to insert into an 165 * HTML {@code CONTENT}-Attribute. 166 * 167 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=s DATA-FILE-ID=FT_Q_EX> 168 */ 169 protected static void checkForSingleQuote(String s) 170 { 171 int pos; 172 173 if ((pos = s.indexOf("'")) != -1) throw new QuotesException( 174 "The passed string-parameter may not contain a single-quote punctuation mark. " + 175 "Yours was: [" + s + "], and has a single-quotation mark at string-position " + 176 "[" + pos + "]" 177 ); 178 } 179 180 /** 181 * This inserts a favicon HTML link element into the right location so that a particular 182 * Web-Page will render an "browser icon image" into the top-left corner of the Web-Page's 183 * Browser-Tab. 184 * 185 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 186 * 187 * @param imageURLAsString <EMBED CLASS='external-html' DATA-FIELD=favicon 188 * DATA-FILE-ID=FT_STR_INS_PARAM> 189 * 190 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 191 * 192 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=imageURLAsString 193 * DATA-FILE-ID=FT_Q_EX> 194 * 195 * @see #favicon 196 * @see #checkForSingleQuote(String) 197 */ 198 public static void insertFavicon(Vector<HTMLNode> html, String imageURLAsString) 199 { 200 // Insert the Favicon <LINK ...> element into the <HEAD> section of the input html page. 201 // <link rel='icon' type='image/INSERT-IMAGE-TYPE-HERE' href='INSERT-URL-STRING-HERE' /> 202 203 checkForSingleQuote(imageURLAsString); 204 205 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 206 DotPair header = TagNodeFindInclusive.first(html, "head"); 207 208 if (header == null) throw new NodeNotFoundException 209 (NO_HEADER_MESSAGE.replace("INSERT-STR", "favicon <LINK> element")); 210 211 String ext = IF.getGuess(imageURLAsString).extension; 212 213 if (ext == null) throw new IllegalArgumentException( 214 "The Image-Type of the 'imageURLAsString' parameter could not be determined. " + 215 "The method IF.getGuess(faviconURL) returned null. Please provide a favicon with " + 216 "standard image file-type. This is required because the image-type is required " + 217 "to be placed inside the HTML <LINK TYPE=... HREF=...> Element 'TYPE' Attribute." 218 ); 219 220 // Build a new Favicon TagNode. 221 TagNode faviconTN = new TagNode 222 ("<LINK REL='icon' TYPE='image/" + ext + "' HREF='" + imageURLAsString + "' />"); 223 224 // Insert the Favicon into the page. Put it at the top of the header, just after <HEAD> 225 Util.insertNodes(html, header.start + 1, NEWLINE, faviconTN, NEWLINE); 226 } 227 228 /** 229 * This method will search for an HTML <B STYLE='color: red;'>{@code <LINK REL="icon" ...>}</B> 230 * Tag, in hopes of finding a {@code REL}-Attribute whose value is {@code 'icon'}. 231 * 232 * <BR /><BR />When this method finds such a tag, it will return the 233 * <B STYLE='color: red;'>value</B> of that Tag's {@code HREF}-Attribute. 234 * 235 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 236 * 237 * @return This method will return the {@code String}-<B STYLE='color: red;'>value</B> of the 238 * {@code HREF}-Attribute found inside the {@code LINK}-Tag. 239 * 240 * If this page or sub-page does not have such a tag with an {@code HREF}-Attribute, then null 241 * is returned. 242 * 243 * <BR /><BR /><DIV CLASS=JDHint> 244 * In the event that multiple copies of the HTML {@code LINK}-Tag are found, and more than one 245 * of these tags has a {@code REL}-Attribute with a <B STYLE='color: red;'>value</B> equal to 246 * {@code "icon"}, then this method will simple return the first of the {@code 'favicon'} tags 247 * that were found. 248 * 249 * <BR /><BR />An (albeit erroneous) page, with multiple favicon definitions, will not cause 250 * this method to throw an exception. 251 * </DIV> 252 * 253 * @see InnerTagGet 254 * @see #favicon 255 * @see TagNode#AV(String) 256 */ 257 public static String hasFavicon(Vector<? extends HTMLNode> html) 258 { 259 // InnerTagGet.all: Returns a vector of TagNode's that resemble: <LINK rel="icon" ...> 260 // 261 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 262 // String-Comparison. 263 // Trim the 'rel' Attribute-Value String of possible leading & trailing 264 // White-Space before performing the comparison. 265 266 Vector<TagNode> list = InnerTagGet.all 267 (html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "icon"); 268 269 // If there were no HTML "<LINK ...>" elements with REL='ICON' attributes, then 270 // there was no favicon. 271 272 if (list.size() == 0) return null; 273 274 // Just in case there were multiple favicon <LINK ...> tags, just return the first 275 // one found. Inside of a <LINK REL="icon" HREF="..."> the 'HREF' Attribute contains 276 // the Image-URL. Use TagNode.AV("HREF") to retrieve that image url. 277 278 String s; 279 for (TagNode tn : list) if ((s = tn.AV("HREF")) != null) return s; 280 281 // If for some reason, none of these <LINK REL='ICON' ...> elements had an "HREF" 282 // attribute, then just return null. 283 284 return null; 285 } 286 287 /** 288 * This inserts an HTML {@code LINK}-Tag into Web-Page parameter {@code 'html'} with the 289 * purpose of linking an externally-defined <B>Cascading Style Sheet</B> (also known as a 290 * {@code CSS}-Page) into that Page-{@code Vector}. 291 * 292 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 293 * 294 * @param externalCSSFileURLAsString <EMBED CLASS='external-html' DATA-FIELD=cssExternalSheet 295 * DATA-FILE-ID=FT_STR_INS_PARAM> 296 * 297 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 298 * 299 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=externalCSSFileURLAsString 300 * DATA-FILE-ID=FT_Q_EX> 301 * 302 * @see #cssExternalSheet 303 * @see #cssExternalSheetWithMediaAttribute 304 * @see #insertCSSLink(Vector, String, String) 305 * @see #getAllCSSLinks(Vector) 306 * @see #checkForSingleQuote(String) 307 * @see DotPair 308 * @see TagNode 309 */ 310 public static void insertCSSLink(Vector<HTMLNode> html, String externalCSSFileURLAsString) 311 { 312 // Inserts an external CSS Link into the <HEAD> section of this html page vector 313 // <link REL=stylesheet type='text/css' href='INSERT-URL-STRING-HERE' /> 314 315 checkForSingleQuote(externalCSSFileURLAsString); 316 317 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 318 DotPair header = TagNodeFindInclusive.first(html, "head"); 319 320 if (header == null) throw new NodeNotFoundException( 321 NO_HEADER_MESSAGE.replace 322 ("INSERT-STR", "externally-linked CSS page <LINK> element") 323 ); 324 325 TagNode cssTN = new TagNode 326 ("<LINK REL=stylesheet TYPE='text/css' HREF='" + externalCSSFileURLAsString + "' />"); 327 328 // Insert the Style-Sheet link into the page. Put it at the top of the header, 329 // just after <HEAD> 330 331 Util.insertNodes(html, header.start + 1, NEWLINE, cssTN, NEWLINE); 332 } 333 334 /** 335 * This inserts a <B>Cascading Style Sheet</B> with the extra {@code MEDIA}-Attribute using 336 * an HTML {@code LINK}-Tag into the Vectorized-HTML Web-Page parameter {@code 'html'} 337 * 338 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 339 * 340 * @param externalCSSFileURLAsString <EMBED CLASS='external-html' DATA-FIELD=cssExternalSheet 341 * DATA-FILE-ID=FT_STR_INS_PARAM> 342 * 343 * @param mediaInnerTagValue Externally linked CSS-Pages, which are included using the HTML 344 * {@code LINK}-Tag may explicitly request a {@code MEDIA}-Attribute be inserted into that 345 * Tag. That {@code MEDIA}-Attribute may take one of five values. In such a tag, the extra 346 * attribute specifies when the listed CSS-Rules are to be applied. 347 * 348 * <BR /><BR />Listed here are the most common values for the {@code MEDIA}-Attribute: 349 * 350 * <BR /><TABLE CLASS=JDBriefTable> 351 * <TR> 352 * <TH>Attribute Value</TH> 353 * <TH>Intended CSS Meaning</TH> 354 * </TR> 355 * <TR> 356 * <TD>screen</TD> 357 * <TD>indicates for use on a computer screen</TD> 358 * </TR> 359 * <TR> 360 * <TD>projection</TD> 361 * <TD>for projected presentations</TD> 362 * </TR> 363 * <TR> 364 * <TD>handheld</TD> 365 * <TD>for handheld devices (typically with small screens)</TD></TR> 366 * <TR> 367 * <TD>print</TD> 368 * <TD>to style printed Web-Pages</TD> 369 * </TR> 370 * <TR> 371 * <TD>all</TD> 372 * <TD>(default value) This is what most people choose. You can leave off the 373 * {@code MEDIA}-Attribute completely if you want your styles to be applied for all 374 * media types. 375 * </TD> 376 * </TR> 377 * </TABLE> 378 * 379 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 380 * 381 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM1=externalCSSFileURLAsString 382 * DATA-PARAM2=mediaInnerTagValue DATA-FILE-ID=FT_Q_EX_DOUBL> 383 * 384 * @see #cssExternalSheet 385 * @see #cssExternalSheetWithMediaAttribute 386 * @see #insertCSSLink(Vector, String) 387 * @see #getAllCSSLinks(Vector) 388 * @see #checkForSingleQuote(String) 389 * @see DotPair 390 */ 391 public static void insertCSSLink 392 (Vector<HTMLNode> html, String externalCSSFileURLAsString, String mediaInnerTagValue) 393 { 394 // Inserts an external CSS Link (with 'media' attribute) into the <HEAD> section of 395 // this html page vector 396 // <link REL=stylesheet type='text/css' href='INSERT-URL-STRING-HERE' 397 // media='INSERT-MEDIA-ATTRIBUTE-VALUE-HERE' /> 398 399 checkForSingleQuote(externalCSSFileURLAsString); 400 checkForSingleQuote(mediaInnerTagValue); 401 402 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 403 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 404 405 if (header == null) throw new NodeNotFoundException( 406 NO_HEADER_MESSAGE.replace 407 ("INSERT-STR", "externally-linked CSS Style-Sheet LINK-Tag") 408 ); 409 410 // Build the TagNode 411 TagNode cssTN = new TagNode( 412 "<LINK REL=stylesheet TYPE='text/css' HREF='" + externalCSSFileURLAsString + "' " + 413 "MEDIA='" + mediaInnerTagValue + "' />" 414 ); 415 416 // Insert the Style-Sheet link into the page. Put it at the top of the header, just 417 // after <HEAD> 418 419 Util.insertNodes(html, header.start + 1, NEWLINE, cssTN, NEWLINE); 420 } 421 422 /** 423 * This will retrieve all linked CSS-Pages from Vectorized-HTML Web-Page parameter 424 * {@code 'html'}. 425 * 426 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 427 * @return This will return the links as a list of {@link TagNode}'s' 428 * @see #insertCSSLink(Vector, String) 429 * @see #insertCSSLink(Vector, String, String) 430 * @see InnerTagGet 431 */ 432 public static Vector<TagNode> getAllCSSLinks(Vector<? extends HTMLNode> html) 433 { 434 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 435 // <LINK rel="stylesheet" ...> 436 // 437 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 438 // String-Comparison 439 // Trim the 'rel' Attribute-Value String of possible leading & trailing 440 // White-Space before performing the comparison. 441 442 return InnerTagGet.all(html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "stylesheet"); 443 } 444 445 /** 446 * This inserts an HTML <B STYLE='color: red;'>{@code '<LINK ...>'}</B> element into the proper 447 * location for linking an externally-defined <B>Java-Script</B> (a {@code '.js'} File) into 448 * the Web-Page. 449 * 450 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 451 * 452 * @param externalJSFileURLAsString 453 * <EMBED CLASS='external-html' DATA-FIELD=javaScriptExternalPage DATA-FILE-ID=FT_STR_INS_PARAM> 454 * 455 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 456 * 457 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=externalJSFileURLAsString 458 * DATA-FILE-ID=FT_Q_EX> 459 * 460 * @see #javaScriptExternalPage 461 * @see #getAllExternalJSLinks(Vector) 462 * @see #checkForSingleQuote(String) 463 * @see TagNode 464 * @see TextNode 465 * @see DotPair 466 * @see HTMLTags#hasTag(String, TC) 467 */ 468 public static void insertExternalJavaScriptLink 469 (Vector<HTMLNode> html, String externalJSFileURLAsString) 470 { 471 // Builds an external Java-Script link, and inserts it into the header portion of 472 // this html page. 473 // <script type='text/javascript' src='INSERT-URL-STRING-HERE'> 474 475 checkForSingleQuote(externalJSFileURLAsString); 476 477 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 478 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 479 480 if (header == null) throw new NodeNotFoundException( 481 NO_HEADER_MESSAGE.replace( 482 "INSERT-STR", "externally-linked Java-Script <SCRIPT> ... </SCRIPT> elements") 483 ); 484 485 // Build an HTML <SCRIPT ...> node, and a </SCRIPT> node. 486 HTMLNode n = new TagNode 487 ("<SCRIPT TYPE='text/javascript' SRC='" + externalJSFileURLAsString + "'>"); 488 489 HTMLNode closeN = HTMLTags.hasTag("script", TC.ClosingTags); 490 491 // Insert the Java-Script link into the page. Put it at the top of the header, just 492 // after <HEAD> 493 494 Util.insertNodes(html, header.start + 1, NEWLINE, n, closeN, NEWLINE); 495 } 496 497 /** 498 * Inserting <B>Java-Script</B> directly onto an HTML-Page and including an external link to a 499 * {@code '.js'} File are extremely similar tasks. Either way, in both cases the construct is 500 * simply: 501 * 502 * <BR /><BR /><B STYLE='color: red;'>{@code <SCRIPT TYPE='text/javascript'> ... </SCRIPT>}</B> 503 * 504 * <BR /><BR />When the actual functions and methods are pasted into an HTML-Page directly, 505 * they are pasted into the {@code String} above where the ellipses {@code '...'} are. When a 506 * link is made to an external page from a directory on the same Web-Server - both the open and 507 * the close HTML {@code SCRIPT}-Tag's must be included. 508 * 509 * <BR /><BR />If just a link is being added, then the text-content of the {@code SCRIPT}-Tag 510 * should just be left blank or empty. Instead, the {@code URL} to the Java-Script Page is 511 * added as an HTML {@code SRC}-Attribute. 512 * 513 * <BR /><BR />This method will retrieve any and all {@code 'SCRIPT'} nodes that meet the 514 * following criteria: 515 * 516 * <BR /><BR /><OL CLASS=JDOL> 517 * 518 * <LI> The <B>Script Body</B> must be empty, meaning there is no Java-Script between the 519 * opening and closing {@code SCRIPT}-Tags 520 * </LI> 521 * 522 * <LI> The HTML {@code SRC}-Attribute must contain a non-null, non-zero-length 523 * <B STYLE='color: red;'>value</B> 524 * </LI> 525 * 526 * </OL> 527 * 528 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 529 * 530 * @return This will return a list of relative {@code URL's} to externally linked 531 * <B>Java-Script</B> Pages as {@code String's} 532 * 533 * @see InnerTagGetInclusive 534 * @see #javaScriptExternalPage 535 * @see #insertExternalJavaScriptLink(Vector, String) 536 * @see TagNode 537 * @see TextNode 538 * @see TagNode#AV(String) 539 * @see HTMLNode#str 540 */ 541 public static String[] getAllExternalJSLinks(Vector<? extends HTMLNode> html) 542 { 543 // InnerTagGetInclusive.all: Returns a vector of TagNode's that resemble: 544 // <SCRIPT TYPE="javascript" ...> 545 // 546 // CN_CI: Check the 'rel' Attribute-Value using a Case-Insensitive, "Contains" 547 // String-Comparison 548 // 'contains' rather than 'equals' testing is done because this value may be 549 // "javascript", but it may also be "text/javascript" 550 // 551 // Inclusive: This means that everything between the <SCRIPT type="javascript"> ... and 552 // the closing </SCRIPT> tag are returned in a vector of vectors. 553 554 Vector<Vector<HTMLNode>> v = InnerTagGetInclusive.all 555 (html, "SCRIPT", "TYPE", TextComparitor.CN_CI, "javascript"); 556 557 Stream.Builder<String> b = Stream.builder(); 558 559 TOP: 560 for (Vector<HTMLNode> scriptSection : v) 561 { 562 String srcValue = null; 563 564 for (HTMLNode n : scriptSection) 565 { 566 if (n.isTagNode()) 567 if ((srcValue = ((TagNode) n).AV("SRC")) != null) 568 break; 569 570 if (n.isTextNode()) 571 if (n.str.trim().length() > 0) 572 break TOP; 573 } 574 575 b.add(srcValue); 576 } 577 578 return b.build().toArray(String[]::new); 579 } 580 581 /** 582 * This section will insert a Canonical-{@code URL} into Vectorized-HTML parameter 583 * {@code 'html'}. The {@code URL} itself will be inserted into an HTML {@code LINK}-Tag as 584 * below: 585 * 586 * <BR /><BR /><B STYLE='color: red;'>{@code <LINK REL=canonical HREF='the_url'>}</B> 587 * 588 * <BR /><BR />Since HTML mandates that such elements be located in the {@code 'HEAD'} portion 589 * of an HTML-Page, if the Vectorized-HTML parameter {@code 'html'} does not have a 590 * {@code 'HEAD'} area, then this method shall throw a {@link NodeNotFoundException}. 591 * 592 * <BR /><BR />Note that this exception is an unchecked / runtime exception. 593 * 594 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 595 * 596 * @param canonicalURLAsStr 597 * <EMBED CLASS='external-html' DATA-FIELD=canonicalTag DATA-FILE-ID=FT_STR_INS_PARAM> 598 * 599 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 600 * 601 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=canonicalURLAsStr 602 * DATA-FILE-ID=FT_Q_EX> 603 * 604 * @see #canonicalTag 605 * @see #hasCanonicalURL(Vector) 606 * @see #checkForSingleQuote(String) 607 * @see TagNode 608 * @see DotPair 609 */ 610 public static void insertCanonicalURL(Vector<HTMLNode> html, String canonicalURLAsStr) 611 { 612 // Inserts a link element into the header of this page 613 // <link REL=canonical href='INSERT-URL-STRING-HERE' /> 614 615 checkForSingleQuote(canonicalURLAsStr); 616 617 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 618 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 619 620 if (header == null) throw new NodeNotFoundException 621 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Canonical-url LINK-Tag")); 622 623 // Builds the canonical <LINK ...> element 624 TagNode linkTN = new TagNode 625 ("<LINK REL=canonical HREF='" + canonicalURLAsStr + "' />"); 626 627 // Insert the canonical-url into the page. Put it at the top of the header, just 628 // after <HEAD> 629 630 Util.insertNodes(html, header.start + 1, NEWLINE, linkTN, NEWLINE); 631 } 632 633 /** 634 * This method will check whether a Vectorized-HTML Page has an HTML 635 * <B STYLE='color: red;'>{@code <LINK REL=canonical ...>}</B> Tag. This tag is used to 636 * inform Search-Engines whether or not this page <I>surrenders</I> or <I>relays</I> to a 637 * "Canonical-{@code URL}". 638 * 639 * <BR /><BR />Canonical-Pages help Search-Engines index large web-sites by providing a root or 640 * Master-{@code URL} to which all sub-pages may point. Such {@code URL's} are often (but not 641 * always) like a "Table of Contents". 642 * 643 * <BR /><BR />The primary goal of having a canonical is to avoid forcing Search-Engines (and 644 * their users) from sifting through and indexing every page of a large Web-Site, and instead 645 * focusing on either an introductory T.O.C. or a Title-Page. 646 * 647 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 648 * 649 * @return This will return whatever text was placed inside the canonical-url 650 * {@code HREF='some_url'} attribute/value pair of the HTML link tag. If there were no HTML 651 * {@code <LINK REL=canonical HREF='some_url'>} tag, then this method will return null. 652 * 653 * @throws MalformedHTMLException This exception will be thrown if there are multiple html tags 654 * that match the link, and REL=canonical search criteria requirements. If an HTML element 655 * {@code <link REL=canonical>} is found, but that element does not have an 656 * {@code href='...'} attribute, or that attribute is of zero length, then this a situation 657 * that will also force this exception to throw. 658 * 659 * @see InnerTagGet 660 * @see #canonicalTag 661 * @see #insertCanonicalURL(Vector, String) 662 * @see TagNode#AV(String) 663 */ 664 public static String hasCanonicalURL(Vector<? extends HTMLNode> html) 665 throws MalformedHTMLException 666 { 667 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 668 // <LINK rel="canonical" ...> 669 // 670 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 671 // String-Comparison 672 // Trim the 'rel' Attribute-Value String of possible leading & trailing 673 // White-Space before performing the comparison. 674 675 Vector<TagNode> v = InnerTagGet.all 676 (html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "canonical"); 677 678 if (v.size() == 0) return null; 679 680 if (v.size() > 1) throw new MalformedHTMLException( 681 "The Web-Page you have passed has precisely " + v.size() + 682 " Canonical-URL LINK-Tags, but it may not have more than 1. This is " + 683 "invalid HTML." 684 ); 685 686 String s = v.elementAt(0).AV("href"); 687 688 if (s == null) throw new MalformedHTMLException( 689 "The HTML LINK-Tag that was retrieved, contained a " + 690 "REL=canonical Attribute-Value pair, but did not have an HREF-Attribute." + 691 "This is invalid HTML." 692 ); 693 694 if (s.length() == 0) throw new MalformedHTMLException( 695 "The HTML LINK-Tag that was retrieved contained a zero-length " + 696 "String as the Attribute-Value for the HREF-Attribute. This is not " + 697 "invalid, but poorly formatted HTML." 698 ); 699 700 return s; 701 } 702 703 /** 704 * Tools made specifically for the {@code <META>} tags in the {@code <HEAD>} of a web-page. 705 * 706 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_META> 707 */ 708 @Torello.JavaDoc.StaticFunctional 709 public static class Meta 710 { 711 private Meta() { } 712 713 714 // **************************************************************************************** 715 // **************************************************************************************** 716 // Static String-Constants (the tags!) 717 // **************************************************************************************** 718 // **************************************************************************************** 719 720 721 /** 722 * This is the most common HTML <B STYLE='color: red;'>{@code <META ... >}</B> Tag. 723 * 724 * @see #getAllMetaTagNames(Vector) 725 * @see #insertMetaTagName(Vector, MetaTagName, String) 726 */ 727 public static final String metaTagName = 728 "<META NAME='INSERT-NAME-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 729 730 /** 731 * This HTML <B STYLE='color: red;'>{@code <META ...>}</B> Tag is less frequently used, but 732 * does provide some properties needed and used by various Web-Servers. It is the 733 * <B>{@code 'ITEMPROP'}</B> Meta-Tag. 734 * 735 * @see #getItemProp(Vector, String) 736 * @see #insertItemProp(Vector, String, String) 737 */ 738 public static final String metaTagItemProp = 739 "<META ITEMPROP='INSERT-ITEMPROP-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 740 741 /** 742 * <EMBED CLASS='external-html' DATA-PROP=robots DATA-FILE-ID=FEATURES_HTTP_EQUIV> 743 * 744 * @see #getHTTPEquiv(Vector, String) 745 * @see #insertHTTPEquiv(Vector, String, String) 746 */ 747 public static final String metaTagHTTPEquiv = 748 "<META HTTP-EQUIV='INSERT-HTTP-EQUIV-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 749 750 /** 751 * <EMBED CLASS='external-html' DATA-PROP=robots DATA-FILE-ID=FEATURES_META_PROP> 752 * 753 * A {@code Robots}-Property Meta-Tag lets you utilize a granular, page-specific approach 754 * to controlling how an individual page should be indexed and served to users in 755 * Search-Engine results. 756 * 757 * @see #insertRobots(Vector, boolean, boolean) 758 * @see #getAllRobots(Vector) 759 */ 760 public static final String robotsMetaTag = 761 "<META NAME=robots CONTENT='INSERT-CONTENT-STRING-HERE'>"; 762 763 /** 764 * <EMBED CLASS='external-html' DATA-PROP=description DATA-FILE-ID=FEATURES_META_PROP> 765 * 766 * When search engines crawl Internet Web-Pages to read the provided key-words and 767 * descriptions used for indexing, this particular Meta-Tag Property is one of the first 768 * those crawlers will look at. 769 * 770 * <BR /><BR />You may include a {@code Description}-Property in the {@code 'HEAD'} portion 771 * of your site’s main-page. A {@code META}-Description can influence both a Search-Engine's 772 * Web-Crawlers, and ultimately the click-through rates of your readers. 773 * 774 * <BR /><BR />Google has stated that Meta-Tag {@code Description}-Properties are NOT used 775 * to rank pages. 776 * 777 * @see #insertDescription(Vector, String) 778 * @see #hasDescription(Vector) 779 */ 780 public static final String descriptionMetaTag = 781 "<META NAME=description CONTENT='INSERT-DESCRIPTION-OR-KEYWORDS-HERE'>"; 782 783 /** 784 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_UTF8> 785 * 786 * @see #insertUTF8MetaTag(Vector) 787 * @see #hasUTF8MetaTag(Vector) 788 */ 789 public static final String UTF8MetaTag = 790 "<META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=utf-8'>"; 791 792 /** 793 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_OPEN_GRAPH> 794 * 795 * @see #insertOGMetaTag(Vector, String, String) 796 * @see #getAllOGMetaTags(Vector) 797 */ 798 public static final String openGraphMetaTag = 799 "<META PROPERTY='og:INSERT-OG-PROPERTY-HERE' CONTENT='INSERT-OG-VALUE-HERE'>"; 800 801 /** All Open-Graph Property names. */ 802 public static final TreeMap<String, String> openGraphProperties = new TreeMap<>(); 803 804 /** 805 * <EMBED CLASS='external-html' DATA-PROP=keywords DATA-FILE-ID=FEATURES_META_PROP> 806 * 807 * A {@code KeyWords}-Property helps identify relevant, pertinent or 'germane' words that 808 * describe the content of a Web-Site or Web-Page to a Web-Indexing or Web-Search 809 * Organization. 810 * 811 * @see #insertKeyWords(Vector, String[]) 812 * @see #getAllKeyWords(Vector) 813 */ 814 public static final String keyWordsMetaTag = 815 "<META NAME=keywords CONTENT='INSERT-COMMA-SEPARATED-KEYWORDS-HERE'>"; 816 817 /** 818 * <EMBED CLASS='external-html' DATA-PROP=author DATA-FILE-ID=FEATURES_META_PROP> 819 * 820 * This helps identify Web-Sites or Web-Pages "Author-Names" to Web-Indexing and Web-Search 821 * Organizations. 822 * 823 * @see #insertAuthor(Vector, String) 824 * @see #hasAuthor(Vector) 825 */ 826 public static final String authorMetaTag = 827 "<META NAME=author CONTENT='INSERT-AUTHOR-NAME-HERE'>"; 828 829 830 // **************************************************************************************** 831 // **************************************************************************************** 832 // Retrieve all Meta-Tags as a java.util.Properties instance 833 // **************************************************************************************** 834 // **************************************************************************************** 835 836 837 /** 838 * This simple method will retrieve a {@code java.util.Properties} object for each and 839 * every HTML <B STYLE='color: red'>{@code <META ...>}</B> tag found within a 840 * Vectorized-HTML Web-Page. 841 * 842 * @param page Any Vectorized-HTML page. It is expected that this page contain a few 843 * {@code META}-Tags. If not, the method will still return an empty 844 * {@code Vector<Properties>} having {@code size()} of zero. 845 * 846 * @return The Java {@code 'Properties'} object that is returned from a call to 847 * {@link TagNode#allAV()} 848 * 849 * @see TagNode#allAV() 850 * @see TagNodeGet 851 */ 852 public static Vector<Properties> getAllMeta(Vector<HTMLNode> page) 853 { 854 Vector<Properties> ret = new Vector<>(); 855 856 // Retrieve all TagNode's that are HTML <META ...> Elements. Invoke TagNode.allAV() 857 // on each of these nodes to retrieve a java.util.Properties instance.\ 858 // 859 // NOTE: These "Properties" could possibly be combined into a single Properties 860 // instance, but because of the ever-changing nature of Web-Page 861 // Meta-Information tags, this is not employed here. It is an exercise 862 // left to the programmer. 863 864 for (TagNode tn : TagNodeGet.all(page, TC.OpeningTags, "META")) 865 ret.add(tn.allAV()); 866 867 return ret; 868 } 869 870 871 // **************************************************************************************** 872 // **************************************************************************************** 873 // Retrieve NAME/Property Meta-Tags 874 // **************************************************************************************** 875 // **************************************************************************************** 876 877 878 /** 879 * This method will find an HTML 880 * <B STYLE='color: red;'>{@code <META NAME=... CONTENT=...>}</B> element whose 881 * {@code NAME}-Attribute has a {@code String}-value equal-to (<I>ignoring case</I>) the 882 * value of the provided {@code String}-parameter {@code 'name'}. 883 * 884 * <BR /><BR />After this HTML {@code META}-Tag has been identified, the 885 * {@code String}-value of it's {@code CONTENT}-Attribute will be extracted and returned. 886 * 887 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 888 * 889 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 890 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 891 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 892 * will return null. 893 * 894 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 895 * 896 * <BR />Before the comparison is done with the {@code 'name'} parameter, that 897 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 898 * <I>is done while ignoring case</I>. 899 * 900 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 901 * 902 * @param name The name of the {@code <META NAME=...>} Tag. 903 * 904 * @return The {@code String}-<B STYLE='color: red;'>value</B> of the 905 * {@code CONTENT}-Attribute for a Meta-Tag whose {@code NAME}-Attribute is equal to the 906 * specified name provided by parameter {@code 'name'}. If such information is not found 907 * on the page, then this method shall return null. 908 * 909 * @see #getItemProp(Vector, String) 910 * @see #getHTTPEquiv(Vector, String) 911 */ 912 public static String getMetaTagName(Vector<HTMLNode> html, String name) 913 { 914 // Find the first <META NAME=... CONTENT=...> tag element where the name equals 915 // the string-value provided by parameter name. 916 917 TagNode tn = InnerTagGet.first 918 (html, "META", "NAME", TextComparitor.EQ_CI, name.trim()); 919 920 // If there are no <META NAME='NAME' CONTENT=...> elements found on the page, 921 // then this method returns null. 922 923 if (tn == null) return null; 924 925 // Return the string-value of the attribute 'content'. Note that if this 926 // attribute isn't available, this method shall return 'null', gracefully. 927 928 return tn.AV("CONTENT"); 929 } 930 931 932 /** 933 * This will retrieve all Meta-Tag's having {@code NAME}-Attribute and 934 * {@code CONTENT}-Attribute pairs. 935 * 936 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 937 * 938 * @return a {@code java.util.Hashtable} of all the Meta-Tag Name/Content pairs that do not 939 * have null values. 940 * 941 * @throws IllegalArgumentException The method {@code MetaTagName.valueOf(...)} will throw 942 * an Illegal Argument Exception if any of the {@code <META NAME=...>} elements use a value 943 * of "NAME" that is not listed or identified in the Enumerated Type "MetaTagName". 944 * 945 * <BR /><BR /><DIV CLASS=JDHint> 946 * <B>ALTERNATIVE:</B> As Internet Companies come and go, pinning down a complete list of 947 * valid Meta Tag's that use the "NAME" Attribute is a possibly misguided approach. In 948 * lieu of eliminating the Enumerated-Type {@code MetaTagName}, it should be easier to just 949 * use the standard TagNode search below: 950 * </DIV> 951 * 952 * <DIV CLASS=EXAMPLE>{@code 953 * // This code should be used as an alternative to this method if there are non-standard 954 * // HTML Meta Tag Names. It uses the more fundamental InnerTagGet Method. 955 * 956 * // This will retrieve all <META ...> HTML Elements that have a "NAME" Property. 957 * Vector<TagNode> metaTags = InnerTagGet.all(page, "meta", "name"); 958 * 959 * // This will print out those results: 960 * for (TagNode metaTag : metaTags) System.out.println 961 * ("Name:\t" + metaTag.AV("name") + "\tContent:\t" + metaTag.AV("content")); 962 * }</DIV> 963 * 964 * @see MetaTagName 965 * @see #metaTagName 966 * @see #insertMetaTagName(Vector, MetaTagName, String) 967 * @see InnerTagGet 968 */ 969 public static Hashtable<MetaTagName, String> getAllMetaTagNames 970 (Vector<? extends HTMLNode> html) 971 { 972 Hashtable<MetaTagName, String> ret = new Hashtable<>(); 973 974 // Converting the output "Vector<TagNode>" to a "Stream<TagNode>" by calling the 975 // .stream() method mainly because java streams provide the very simple 976 // 'filter(Predicate)' and 'forEach(Consumer)' methods. Vector.removeIf and 977 // Vector.forEach could also have been easily used as well. 978 979 // InnerTagGet.all returns a vector containing all <META NAME=...> TagNode's where 980 // the value of the 'name' attribute is one of the pre-defined MetaTagName 981 // EnumeratedTypes. 982 983 // NOTE: This is done via a java.util.function.Predicate<String> and a lambda 984 // expression 985 986 InnerTagGet 987 .all (html, "META", "NAME", (String nameAttributeValue) -> 988 MetaTagName.valueOf 989 (nameAttributeValue.toLowerCase().trim()) != null) 990 991 .stream() 992 .filter((TagNode tn) -> tn.AV("CONTENT") != null) 993 994 .forEach((TagNode tn) -> 995 996 ret.put( 997 MetaTagName.valueOf(tn.AV("NAME").toLowerCase().trim()), 998 tn.AV("CONTENT") 999 )); 1000 1001 return ret; 1002 } 1003 1004 1005 // **************************************************************************************** 1006 // **************************************************************************************** 1007 // Retrieve **SPECIFIC** NAME/Property Meta-Tags 1008 // **************************************************************************************** 1009 // **************************************************************************************** 1010 1011 1012 /** 1013 * This method looks for robots HTML <B STYLE='color: red;'>{@code <META NAME=robots>} 1014 * </B> tag, and returns the value of the {@code content}-Attribute. 1015 * 1016 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1017 * 1018 * @return This will return a {@code Vector} of the robots named or specified by the HTML 1019 * Meta-Tag's present on this page. 1020 * 1021 * @throws MalformedHTMLException If any invalid robot-strings are found on the page, this 1022 * method will throw an exception. The impetus behind this is to prevent accidentally 1023 * ignoring newly found tags, or incorrect tags. The extraction of the robots Meta-Tag from 1024 * an HTML page can be performed manually, if throwing an exception is causing problems. 1025 * The code to do this is listed in the documentation of this method. 1026 * 1027 * @see #robotsMetaTag 1028 * @see #insertRobots(Vector, boolean, boolean) 1029 */ 1030 public static Vector<Robots> getAllRobots(Vector<? extends HTMLNode> html) 1031 throws MalformedHTMLException 1032 { 1033 // Here, again, using Java Streams can be sometimes useful - primarily whenever a 1034 // 'filter' operation is going to be used on a Vector. Vector.removeIf works, BUT 1035 // this also extracts attribute values, and the original TagNode are discarded, and 1036 // replaced by the the <META> attributes. 1037 // 1038 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1039 // puts each separate String in each array into the TreeSet. 1040 // 1041 // NOTE: The TreeSet also functions as a "duplicate checker" although this is also 1042 // provided by Stream.distinct() 1043 // 1044 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1045 // <META NAME="robots" ...> 1046 // 1047 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1048 // String-Comparison 1049 // Trim the 'name' Attribute-Value String of possible leading & trailing 1050 // White-Space before performing the comparison. 1051 1052 TreeSet<String> temp = InnerTagGet 1053 .all (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "robots") 1054 .stream () 1055 .map ((TagNode tn) -> tn.AV("CONTENT")) 1056 1057 .filter ((String contents) -> 1058 (contents != null) && (contents.trim().length() > 0)) 1059 1060 .map ((String contents) -> 1061 Arrays.asList(StrCSV.CSV(contents.toLowerCase()))) 1062 1063 .collect (TreeSet<String>::new, TreeSet::addAll, TreeSet::addAll); 1064 1065 1066 // I cannot use EXCEPTIONS and STREAMS together, there is no simple way. 1067 // It would be too ugly to read. 1068 1069 Vector<Robots> ret = new Vector<>(); 1070 1071 1072 // If an invalid robot-attribute is found, this will 1073 // throw a MalformedHTMLException 1074 1075 for (String s : temp) ret.add(Robots.getRobot(s)); 1076 1077 return ret; 1078 } 1079 1080 /** 1081 * This will retrieve the {@code 'robots'} Meta-Tag 1082 * Attribute-<B STYLE='color: red;'>value</B> present on a Web-Page. 1083 * 1084 * <BR /><BR />If any of them are not in accordance with the tags listed in the 1085 * Enumerated-Type {@link Robots}, this will not cause a {@link MalformedHTMLException} to 1086 * throw. Instead, the result will just be eliminated and ignored. Take care that all of 1087 * the necessary {@code ROBOTS}-Tags are listed in the Enumerated-Type, and that there 1088 * are no "undefined, but necessary" robot elements to be found before using this method! 1089 * 1090 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1091 * @return A vector of all the valid robots attribute values found on the web-page. 1092 * @see #robotsMetaTag 1093 * @see #insertRobots(Vector, boolean, boolean) 1094 * @see TagNode#AV(String) 1095 */ 1096 public static Vector<Robots> getAllRobotsNOMHE(Vector<? extends HTMLNode> html) 1097 { 1098 // Java Streams, used here, filter out irrelevant meta tags, and also convert the 1099 // HTML Meta TagNode's into their their "CONTENT" Attribute String value. The TreeSet 1100 // provides a duplicate check elimination and sorts the {@code String's} as well. 1101 // 1102 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1103 // puts each separate String in each array into the TreeSet 1104 // 1105 // NOTE: The 'getRobotNOMHE' suppresses a possible exception, and converts such a 1106 // situation to 'null.' The suppressed-exception is the "MalformedHTMLException" 1107 // 1108 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1109 // <META NAME="robots" ...> 1110 // 1111 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1112 // String-Comparison 1113 // Trim the 'name' Attribute-Value String of possible leading & trailing 1114 // White-Space before performing the comparison. 1115 1116 return InnerTagGet 1117 .all (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "robots") 1118 .stream () 1119 .map ((TagNode tn) -> tn.AV("CONTENT")) 1120 1121 .filter ((String contents) -> 1122 (contents != null) && (contents.trim().length() > 0)) 1123 1124 .map ((String contents) -> 1125 Arrays.asList(StrCSV.CSV(contents.toLowerCase()))) 1126 1127 .collect (TreeSet<String>::new, TreeSet::addAll, TreeSet::addAll) 1128 .stream () 1129 .map ((String robotParam) -> Robots.getRobotNOMHE(robotParam)) 1130 .filter ((Robots robot) -> robot != null) 1131 .collect (Collectors.toCollection(Vector<Robots>::new)); 1132 } 1133 1134 /** 1135 * This method will extract any / all HTML 1136 * <B STYLE='color: red;'>{@code <META NAME='keywords' ...>}</B> Meta-Tags, and then extract 1137 * the relevant page key-words. These key-words will be returned as a Java 1138 * {@code String-Vector}. 1139 * 1140 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1141 * 1142 * @return The list of words that were stored in the 'keywords' HTML Meta-Tags. If there 1143 * were no keywords in any {@code 'KEYWORDS'} Meta-Tags, then an empty Java 1144 * {@code String[]}-Array is returned. 1145 * 1146 * <BR /><BR /><B CLASS=JDDescLabel>Java Stream's Utility:</B> 1147 * 1148 * <BR />If the code below looks complicated, Java's Streams-Package does have a tendency 1149 * to make <I>simple things look difficult</I>. However, once the {@code Stream}-Methods 1150 * are understood, it's usually pretty useful for actually being very concise. 1151 * 1152 * <BR /><BR /><OL CLASS=JDOL> 1153 * <LI> Get all HTML {@code <META name="keywords" content="...">} elements</LI> 1154 * 1155 * <LI> Extracts the {@code CONTENT}-Attribute, <I>and particularly the 1156 * <B STYLE='color: red;'>value</B> stored there</I> 1157 * </LI> 1158 * 1159 * <LI> Removes blanks, and {@code nulls}</LI> 1160 * <LI> Converts a {@code String[]} to {@code List<String>}</LI> 1161 * <LI> Collects all the List<String> into a single java String-Array</LI> 1162 * </OL> 1163 * 1164 * @see #insertKeyWords(Vector, String[]) 1165 * @see #keyWordsMetaTag 1166 * @see TagNode 1167 * @see TagNode#AV(String) 1168 * @see StrCSV#CSV(String) 1169 */ 1170 public static String[] getAllKeyWords(Vector<? extends HTMLNode> html) 1171 { 1172 // Java Streams here both filter irrelevant meta tags, and also convert the type from 1173 // TagNode to String... using the 'map' function. Ultimately, those strings are 1174 // 'collected' into the returned vector. 1175 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1176 // puts each separate String into the returned Vector. 1177 1178 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1179 // <META name="keywords" ...> 1180 // 1181 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1182 // String-Comparison 1183 // Trim the 'name' Attribute-Value String of possible leading & trailing 1184 // White-Space before performing the comparison. 1185 1186 return InnerTagGet.all(html, "META", "NAME", TextComparitor.EQ_CI_TRM, "keywords") 1187 .stream () 1188 .map ((TagNode tn) -> tn.AV("content")) 1189 1190 .filter ((String contents) -> 1191 (contents != null) && (contents.trim().length() > 0)) 1192 1193 .map ((String contents) -> Arrays.asList(StrCSV.CSV(contents))) 1194 .collect (Vector::new, Vector::addAll, Vector::addAll) 1195 .stream () 1196 .toArray (String[]::new); 1197 } 1198 1199 /** 1200 * This method attempts to retrieve a {@code 'description'}-Property Meta-Tag out of an 1201 * HTML_Page. If no such Meta-Tag is found, then null is returned. 1202 * 1203 * <BR /><BR />If a partial Meta-Tag is found, but that tag is incomplete, then a 1204 * {@link MalformedHTMLException} will be thrown. 1205 * 1206 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1207 * 1208 * @return The content-description that has been extracted from the HTML Meta-Tag 1209 * <B STYLE='color: red;'>{@code <META NAME="description" CONTENT="the-description">}</B>. 1210 * 1211 * <BR /><BR />If this tag is not found, then null is returned. If this tag is found, but 1212 * does not posses a {@code CONTENT}-Attribute, then a {@code MalformedHTMLException} is 1213 * thrown. 1214 * 1215 * @throws MalformedHTMLException This is thrown if there are multiple definitions of the 1216 * {@code 'ROBOTS'} Meta-Tag. There ought to only be a single definition, and if multiple 1217 * are found, it would be better to identify why, and do the data-extraction manually. 1218 * 1219 * This is en-lieu of randomly picking one of them, and randomly returning one of the 1220 * Meta-Tag's {@code CONTENT}-Attribute <B STYLE='color: red;'>value</B>. 1221 * 1222 * <BR />This exception will also be thrown if proper-values for {@code 'index'} or 1223 * {@code 'follow'} are not found in the {@code CONTENT}-Attribute of the 1224 * {@code 'ROBOTS'} Meta-Tag. 1225 * 1226 * <BR /><BR />These are probably unlikely occurrences. This exception is a 1227 * Checked-Exception and must have a {@code try-catch} block or be declared thrown in your 1228 * method-declaration. 1229 * 1230 * @see #descriptionMetaTag 1231 * @see #insertDescription(Vector, String) 1232 * @see InnerTagGet 1233 */ 1234 public static String hasDescription(Vector<? extends HTMLNode> html) 1235 throws MalformedHTMLException 1236 { 1237 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1238 // <META NAME="description" ...> 1239 // 1240 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1241 // String-Comparison 1242 // Trim the 'name' Attribute-Value String of possible leading & trailing 1243 // White-Space before performing the comparison. 1244 1245 Vector<TagNode> v = InnerTagGet.all 1246 (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "description"); 1247 1248 if (v.size() == 0) return null; 1249 1250 if (v.size() > 1) throw new MalformedHTMLException( 1251 "You have asked for the value of the HTML 'description' <META ...> Tag, but " + 1252 "unfortunately there were multiple instances of this Tag on your page. " + 1253 "This is poorly formatted HTML, and not allowed here." 1254 ); 1255 1256 String s = v.elementAt(0).AV("CONTENT"); 1257 1258 if (s == null) throw new MalformedHTMLException( 1259 "An HTML Meta-Tag was found with a NAME-Attribute whose value was " + 1260 "'description,' but unfortunately this Meta-Tag did not posses a CONTENT-Attribute" 1261 ); 1262 1263 return s; 1264 } 1265 1266 /** 1267 * This helps identify Web-Sites & Web-Pages "author-names" to Web-Indexing and 1268 * Web-Search Organizations. 1269 * 1270 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1271 * 1272 * @return This returns the author's name of a Web-Page, as delineated in the 1273 * {@code 'AUTHOR'} Meta-Tag, or null if the Web-Page parameter {@code 'html'} does not 1274 * have an {@code 'AUTHOR'} Meta-Tag. 1275 * 1276 * @throws MalformedHTMLException If multiple {@code 'AUTHOR'} Meta-Tags are found, this 1277 * method is forced to throw an exception. It is necessary to avoid "picking a favorite 1278 * author among a list". 1279 * 1280 * <BR /><BR />HTML does not actually adhere to these exact requirements, so if there is 1281 * such a scenario with a page having multiple-authors, this method throws an exception in 1282 * order to avoid returning a {@code String[]}-Array or {@code Vector<String>} which would 1283 * be an alternative that would add unnecessary complexity. 1284 * 1285 * <BR /><BR />If this method throws this exception, it is better to know about it, and 1286 * just perform the search again, using a manual {@code 'AUTHOR'} retrieval. The code for 1287 * extracting these properties is, indeed listed directly at the bottom. 1288 * 1289 * @see #insertAuthor(Vector, String) 1290 * @see #authorMetaTag 1291 * @see TagNode#AV(String) 1292 */ 1293 public static String hasAuthor(Vector<? extends HTMLNode> html) 1294 throws MalformedHTMLException 1295 { 1296 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1297 // <META name="author" ...> 1298 // 1299 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1300 // String-Comparison 1301 // Trim the 'name' Attribute-Value String of possible leading & trailing 1302 // White-Space before performing the comparison. 1303 1304 Vector<TagNode> v = InnerTagGet.all 1305 (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "author"); 1306 1307 if (v.size() > 1) throw new MalformedHTMLException( 1308 "This method has identified multiple author Meta-Tags. To handle this " + 1309 "situation, the search should be performed manually using InnerTagGet, with " + 1310 "your code deciding what to do about the HTML Web-Page having multiple 'author' " + 1311 "Meta-Tags." 1312 ); 1313 1314 // No HTML TagNode's were found that resembled <META NAME=author ...> 1315 if (v.size() == 0) return null; 1316 1317 // Just return the first one that was found, always check for 'null' first to 1318 // avoid the embarrassing NullPointerException. 1319 1320 String author = v.elementAt(0).AV("CONTENT"); 1321 1322 if (author == null) return null; 1323 1324 return author.trim(); 1325 } 1326 1327 1328 // **************************************************************************************** 1329 // **************************************************************************************** 1330 // Retrieve HTTP-EQUIV Meta-Tags 1331 // **************************************************************************************** 1332 // **************************************************************************************** 1333 1334 1335 /** 1336 * This method will find an HTML 1337 * <B STYLE='color: red;'>{@code <META HTTP-EQUIV=... CONTENT=...>}</B> element whose 1338 * {@code HTTP-EQUIV}-Attribute's <B STYLE='color: red;'>value</B> is equal to the 1339 * {@code String}-Parameter {@code 'httpEquiv'} (ignoring case). 1340 * 1341 * <BR /><BR />After such an HTML {@code META}-Tag has been identified, its 1342 * {@code CONTENT}-Attribute {@code String}-value will be subsequently queried, extracted 1343 * and returned by this method. 1344 * 1345 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 1346 * 1347 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 1348 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 1349 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 1350 * will return null. 1351 * 1352 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 1353 * 1354 * <BR />Before the comparison is done with the {@code 'httpEquiv'} parameter, that 1355 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 1356 * <I>is done while ignoring case</I>. 1357 * 1358 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1359 * 1360 * @param httpEquiv The Attribute-<B STYLE='color: red;'>name</B> of the 1361 * {@code HTTP-EQUIV}-Attribute. 1362 * 1363 * @return The {@code String}-value of the {@code CONTENT}-Attribute for a 1364 * {@code META}-Tag whose {@code HTTP-EQUIV}-Attribute is equal to the specified name 1365 * provided by parameter {@code 'httpEquiv'}. 1366 * 1367 * <BR /><BR />If no such tag is found on the page, then this method shall return null. 1368 */ 1369 public static String getHTTPEquiv(Vector<HTMLNode> html, String httpEquiv) 1370 { 1371 // Find the first <META HTTP-EQUIV=... CONTENT=...> tag element where the name equals 1372 // the string-value provided by parameter 'httpEquiv'. 1373 1374 TagNode tn = InnerTagGet.first 1375 (html, "META", "HTTP-EQUIV", TextComparitor.EQ_CI, httpEquiv.trim()); 1376 1377 // If there are no <META HTTP-EQUIV='httpEquiv' CONTENT=...> elements found on the 1378 // page, then this method returns null. 1379 1380 if (tn == null) return null; 1381 1382 // Return the string-value of the attribute 'content'. Note that if this 1383 // attribute isn't available, this method shall return 'null', gracefully. 1384 1385 return tn.AV("CONTENT"); 1386 } 1387 1388 /** 1389 * This method will find all HTML {@code HTTP-EQUIV}-Directives, and return them in a Java 1390 * {@code Properties} object. 1391 * 1392 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1393 * 1394 * @return An instance of {@code java.util.Properties} containing all 1395 * {@code HTTP-EQUIV}-Directives. If HTML-Page paramter {@code 'html'} does not have any 1396 * such Meta-Tags, then an empty {@code Properties} instance is returned, rather than null. 1397 * 1398 * @throws MalformedHTMLException If the page provided has multiple definitions for the 1399 * exact same {@code HTTP}-Header property, then this exception will throw. 1400 */ 1401 public static Properties getAllHTTPEquiv(Vector<HTMLNode> html) 1402 throws MalformedHTMLException 1403 { 1404 Properties ret = new Properties(); 1405 String prev = null; 1406 1407 // Find the first <META HTTP-EQUIV=... CONTENT=...> tag element where the name equals 1408 // the string-value provided by parameter 'httpEquiv'. 1409 1410 for (TagNode httpEquivTN : InnerTagGet.all(html, "META", "HTTP-EQUIV")) 1411 1412 if ((prev = (String) ret.put 1413 (httpEquivTN.AV("HTTP-EQUIV"), httpEquivTN.AV("CONTENT"))) != null) 1414 1415 throw new MalformedHTMLException( 1416 "This HTML Page has multiple Meta-Tag Definitions for the HTTP-" + 1417 "EQUIVALENT Property [" + httpEquivTN.AV("HTTP-EQUIV") + "].\n" + 1418 " " + prev + "\n" + 1419 "and " + httpEquivTN.AV("CONTENT") + '\n' 1420 ); 1421 1422 return ret; 1423 } 1424 1425 /** 1426 * This will detect whether a {@code UTF-8} HTML Meta-Tag is included on this page. Below 1427 * are examples of what such tags look like. 1428 * 1429 * <DIV CLASS="HTML">{@code 1430 * <meta http-equiv="content-type" content="text/html; charset=UTF-8"> 1431 * <meta charset="UTF-8"> 1432 * }</DIV> 1433 * 1434 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1435 * 1436 * @return {@code TRUE} If an appropriate HTML Meta-Tag identifying this page as a 1437 * {@code UTF-8} Character-Set Web-Site. will {@code FALSE} otherwise. 1438 * 1439 * @see #hasUTF8MetaTag(Vector) 1440 * @see #UTF8MetaTag 1441 * @see StrCmpr#containsAND_CI(String, String[]) 1442 * @see TagNode#AV(String) 1443 */ 1444 public static boolean hasUTF8MetaTag(Vector<? extends HTMLNode> html) 1445 { 1446 String s; 1447 1448 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1449 // <META http-equiv="content-type" ...> 1450 // 1451 // EQ_CI_TRM: Check the 'http-equiv' Attribute-Value using a Case-Insensitive, 1452 // Equality String-Comparison 1453 // Trim the 'http-equiv' Attribute-Value String of possible leading & 1454 // trailing White-Space before performing the comparison. 1455 1456 Vector<TagNode> v = InnerTagGet.all 1457 (html, "META", "HTTP-EQUIV", TextComparitor.EQ_CI_TRM, "content-type"); 1458 1459 for (TagNode tn : v) 1460 if ((s = tn.AV("CONTENT")) != null) 1461 if (StrCmpr.containsAND_CI(s, "charset", "utf-8")) 1462 return true; 1463 1464 // InnerTagGet.aall retrieves all TagNode's that resemble <META charset="utf-8" ...> 1465 // EQ_CI_TRM: Equality-Test, Case-Insensitive, Trim any White-Space before 1466 // performing comparison. 1467 1468 v = InnerTagGet.all(html, "META", "CHARSET", TextComparitor.EQ_CI_TRM, "utf-8"); 1469 1470 for (TagNode tn : v) 1471 if ((s = tn.AV("CHARSET")) != null) 1472 if (StrCmpr.containsAND_CI(s, "utf-8")) 1473 return true; 1474 1475 return false; 1476 } 1477 1478 1479 // **************************************************************************************** 1480 // **************************************************************************************** 1481 // Insert NAME/Property Meta-Tags 1482 // **************************************************************************************** 1483 // **************************************************************************************** 1484 1485 1486 /** 1487 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 1488 * Meta-Tags that have both a {@code NAME}-Attribute and a {@code CONTENT}-Attribute 1489 * set. 1490 * 1491 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1492 * 1493 * @param m This is any of the enumerated-types of specific Meta-Tag {@code NAME}-Attribute 1494 * & {@code CONTENT}-Attribute pair / combinations. 1495 * 1496 * @param contentAttributeValue This is the value that will be used to set the 1497 * <B STYLE='color: red;'>value</B> for the {@code CONTENT}-Attribute. 1498 * 1499 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1500 * 1501 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 1502 * DATA-FILE-ID=FT_Q_EX> 1503 * 1504 * @see #metaTagName 1505 * @see #getAllMetaTagNames(Vector) 1506 * @see DotPair 1507 * @see TagNode 1508 */ 1509 public static void insertMetaTagName 1510 (Vector<HTMLNode> html, MetaTagName m, String contentAttributeValue) 1511 { 1512 // Builds and inserts a TagNode HTML Element that looks like: 1513 // <meta name='INSERT-NAME-STRING-HERE' content='INSERT-CONTENT-STRING-HERE'> 1514 1515 // Single Quotes are used, so the attribute-value may not contain single quotes. 1516 checkForSingleQuote(contentAttributeValue); 1517 1518 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1519 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1520 1521 if (header == null) throw new NodeNotFoundException 1522 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META NAME=... CONTENT=...> tag")); 1523 1524 // Build a <META> tag, as in the comment above 1525 TagNode metaTN = new TagNode 1526 ("<META NAME='" + m.name + "' CONTENT='" + contentAttributeValue + "'>"); 1527 1528 // Insert the meta-tag into the page. Put it at the top of the header, 1529 // just after <HEAD> 1530 1531 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1532 } 1533 1534 /** 1535 * This does an insertion of a list of HTML Meta-Tags from a java Hashtable of Meta-Tag 1536 * Name-Attribute / Content-Attribute pairs. All name-based Meta-Tags have both a 1537 * {@code NAME}-Attribute, and also a {@code CONTENT}-Attribute. 1538 * 1539 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1540 * 1541 * @param metaTags This is a hash-table of the enumerated-types of specific Meta-Tag Name 1542 * property/content pairs. 1543 * 1544 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1545 * 1546 * @throws QuotesException If any of the <B STYLE='color: red;'>values</B> from the 1547 * <B STYLE='color:red'>key-value</B> pair hash-table contain a {@code String} that has a 1548 * single-quotation mark, anywhere inside the it. 1549 * 1550 * @see #metaTagName 1551 * @see #getAllMetaTagNames(Vector) 1552 * @see #insertMetaTagName(Vector, MetaTagName, String) 1553 * @see TagNode 1554 */ 1555 public static void insertMetaTagNames 1556 (Vector<HTMLNode> html, Hashtable<MetaTagName, String> metaTags) 1557 { 1558 // Builds and inserts a TagNode HTML Element that looks like: 1559 // "<meta name='INSERT-NAME-STRING-HERE' content='INSERT-CONTENT-STRING-HERE'"; 1560 1561 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1562 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1563 1564 if (header == null) throw new NodeNotFoundException 1565 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META NAME=... CONTENT=...> tag")); 1566 1567 // Java Stream's can be addictive... It is an easier way to build a list. 1568 Stream.Builder<HTMLNode> b = Stream.builder(); 1569 b.accept(NEWLINE); 1570 1571 // Iterate the complete list of meta-tag names to insert 1572 for (MetaTagName m : metaTags.keySet()) 1573 { 1574 String contentAttributeValue = metaTags.get(m); 1575 checkForSingleQuote(contentAttributeValue); 1576 1577 // Build the new node 1578 TagNode metaTN = new TagNode 1579 ("<META NAME='" + m.name + "' CONTENT='" + contentAttributeValue + "'>"); 1580 1581 b.accept(metaTN); b.accept(NEWLINE); 1582 } 1583 1584 // Insert the meta-tag names into the page. Put it at the top of the header, 1585 // just after <HEAD> 1586 1587 Util.insertNodes(html, header.start + 1, b.build().toArray(HTMLNode[]::new)); 1588 } 1589 1590 1591 // **************************************************************************************** 1592 // **************************************************************************************** 1593 // Insert **SPECIFIC** NAME/Property Meta-Tags 1594 // **************************************************************************************** 1595 // **************************************************************************************** 1596 1597 1598 /** 1599 * One common HTML Meta-Tag is the one which informs Google & Yahoo (and all 1600 * search-engine sites) which of your pages you would like to be indexed by their search 1601 * engine, and which pages you would like to not be indexed. Worrying about what Google 1602 * does and does not index may seem daunting, but this meta-tag can prevent certain 1603 * behaviors. 1604 * 1605 * <BR /><BR />The {@code 'ROBOTS'} Meta-Tag informs Search-Engines which pages on your 1606 * site should be indexed. This Meta-Tag serves a similar purpose to a {@code 'robots.txt'} 1607 * File. It is generally used to prevent a Search-Engine from indexing individual pages, 1608 * while {@code 'robots.txt'} is used to prevent the search from indexing a whole site or 1609 * section of a site. 1610 * 1611 * <BR /><BR />A {@code 'ROBOTS'} Meta-Tag which instructs the Search-Engine Crawler not to 1612 * index a page, or follow any links on it, would be written as below. 1613 * 1614 * <DIV CLASS="HTML">{@code 1615 * <meta name="robots" content="noindex, nofollow" /> 1616 * <meta name="robots" content="index, follow" /> 1617 * }</DIV> 1618 * 1619 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1620 * 1621 * @param index This is a {@code boolean}-Parameter that when set to {@code TRUE} will 1622 * force this method to place an {@code INDEX-String} into the finally-exported HTML 1623 * element. If {@code FALSE} is passed, then a {@code NOINDEX-String} will be put into the 1624 * HTML-Tag. 1625 * 1626 * @param follow This is also a {@code boolean}-Parameter. When {@code TRUE} this will 1627 * force the method to put a {@code FOLLOW-String} into the finally-exported HTML-Tag. 1628 * When {@code FALSE}, then a {@code 'NOFOLLOW'} will be inserted. 1629 * 1630 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1631 * 1632 * @see #robotsMetaTag 1633 * @see #getAllRobots(Vector) 1634 * @see #getAllRobotsNOMHE(Vector) 1635 * @see TagNode 1636 */ 1637 public static void insertRobots(Vector<HTMLNode> html, boolean index, boolean follow) 1638 { 1639 // Builds a robots meta tag. These are used by google and search engines 1640 // <meta NAME=robots content='INSERT-CONTENT-STRING-HERE' /> 1641 1642 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1643 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1644 1645 if (header == null) throw new NodeNotFoundException 1646 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Robots <META ... > Tag")); 1647 1648 // Build a 'robots' TagNode 1649 TagNode robotsTN = new TagNode( 1650 "<META NAME=robots CONTENT='" + 1651 (index ? "index" : "noindex") + ", " + (follow ? "follow" : "nofollow") + 1652 "' >" 1653 ); 1654 1655 // Insert the robots-tag into the page. 1656 // Put it at the top of the header, just after <HEAD> 1657 1658 Util.insertNodes(html, header.start + 1, NEWLINE, robotsTN, NEWLINE); 1659 } 1660 1661 /** 1662 * This will add an HTML Meta-Tag with a 1663 * <B STYLE='color: red;'>{@code <META NAME=robots>}</B> 1664 * 1665 * <BR /><BR /><B CLASS=JDDescLabel>Validity Check Warning:</B> 1666 * 1667 * <BR />This method avoids all presumed <I><B>validity check,</B></I> primarily because 1668 * making an attempt to identify what is absolutely correct or not-correct seems a little 1669 * far-fetched. 1670 * 1671 * <BR /><BR />Although the number of actual values the {@code ROBOTS}-Attribute may 1672 * contain is very low, throwing a {@code MalformedHTMLException} for some errors, while 1673 * ignoring others was decided to best avoid during this method's development. 1674 * 1675 * <BR /><BR /><DIV CLASS=JDHint> 1676 * If a programmer were to pass both the {@link Robots#Follow} and the 1677 * {@link Robots#NoFollow} Enum-Constants, both of these tags would be inserted into an 1678 * HTML {@code 'robots'} Meta-Tag without any kind of warning or exception throw. 1679 * </DIV> 1680 * 1681 * <BR />This, clearly, would be a faulty HTML directive, though. 1682 * 1683 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1684 * 1685 * @param rArr This is an array of the Enumerated-Type {@link Robots}. It may contain a 1686 * list of any number of the items available to add into an HTML Meta-Tag's 1687 * {@code ROBOTS}-Attribute. If any of the array elements are null, they will be skipped 1688 * and ignored. 1689 * 1690 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1691 * 1692 * @see #robotsMetaTag 1693 * @see #getAllRobots(Vector) 1694 * @see #insertRobots(Vector, boolean, boolean) 1695 * @see StrCSV#toCSV(Object[], IntTFunction, boolean, Integer) 1696 * @see DotPair 1697 */ 1698 public static void insertRobots(Vector<HTMLNode> html, Robots... rArr) 1699 { 1700 // Builds a series-of-robots meta tag. These are used by google and search engines 1701 // <meta NAME=robots content='INSERT-CONTENT-STRING-HERE' /> 1702 1703 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1704 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1705 1706 if (header == null) throw new NodeNotFoundException 1707 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Robots <META ... > Tag")); 1708 1709 String robotsStr = StrCSV.toCSV(rArr, (int i, Robots r) -> r.name, false, null); 1710 1711 // Build the <META> TagNode 1712 TagNode robotsTN = new TagNode("<META NAME=robots CONTENT='" + robotsStr + "'>"); 1713 1714 // Insert the robots-tag into the page. Put it at the top of the header, just 1715 // after <HEAD> 1716 1717 Util.insertNodes(html, header.start + 1, NEWLINE, robotsTN, NEWLINE); 1718 } 1719 1720 /** 1721 * Another common HTML {@code META}-Tag is the one that provides a brief description of 1722 * the page in question. This method facilitates adding a Meta-Tag that contains two 1723 * attributes: 1724 * 1725 * <BR /><BR /><OL CLASS=JDUL> 1726 * 1727 * <LI> {@code NAME}-Attribute whose <B STYLE='color: red;'>value</B> must be 1728 * {@code 'description'} 1729 * </LI> 1730 * 1731 * <LI> {@code CONTENT}-Attribute whose <B STYLE='color: red;'>value</B> should be a brief 1732 * textual description of the content of the page 1733 * </LI> 1734 * 1735 * </OL> 1736 * 1737 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1738 * 1739 * @param description This is a textual-description of the Web-Page to which this HTML 1740 * <B STYLE='color: red;'>{@code <META NAME=description CONTENT='...'}</B> Tag is being 1741 * added. If Google or any of the other Internet Search Sites, return your Web-Page as a 1742 * part of a search-results, this description is usually used. 1743 * 1744 * <BR /><BR />Furthermore, the key-words that are listed here are some-how (in a way that 1745 * is not-knownst to this programmer) used in indexing your particular page in the 1746 * search-algorithms. 1747 * 1748 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1749 * 1750 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=description 1751 * DATA-FILE-ID=FT_Q_EX> 1752 * 1753 * @see #descriptionMetaTag 1754 * @see #hasDescription(Vector) 1755 * @see #checkForSingleQuote(String) 1756 * @see TagNode 1757 */ 1758 public static void insertDescription(Vector<HTMLNode> html, String description) 1759 { 1760 // Meta-Tag for Descriptions. This will be inserted into the HTML page. 1761 // <meta NAME=description content='INSERT-DESCRIPTION-OR-KEYWORDS-HERE'> 1762 1763 checkForSingleQuote(description); 1764 1765 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1766 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1767 1768 if (header == null) throw new NodeNotFoundException 1769 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Description <META ... > Tag")); 1770 1771 // Build the Meta Tag for a description to google and search engines 1772 TagNode metaTN = new TagNode 1773 ("<META NAME=description CONTENT='" + description + "'>"); 1774 1775 // Insert the description-tag into the page. Put it at the top of the header, 1776 // just after <HEAD> 1777 1778 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1779 } 1780 1781 /** 1782 * This will attempt to insert key-words into an HTML Meta-Tag. This is usually used to 1783 * summarize-explain 'main-points' that a Web-Page author wants to make to any 1784 * search-engineer or any-listener on the internet about the Web-Page that includes such a 1785 * Meta-Tag. 1786 * 1787 * <BR /><BR /><B CLASS=JDDescLabel>Validity Checking:</B> 1788 * 1789 * <BR />This method does a few minor validity checks regarding the content inside of a 1790 * description keyword. All it does is look for things like White-Space and a few 1791 * punctuation rules. If either of these problems occur inside any of the key-words 1792 * provided to the {@code 'keyWords'} Var-Args Parameter, then an 1793 * {@code IllegalArgumentException} is thrown. 1794 * 1795 * <BR /><BR /><B CLASS=JDDescLabel>Disallowed Punctuation:</B> 1796 * 1797 * <BR />This list of disallowed punctuation marks for the key-words are as processed as 1798 * follows: 1799 * 1800 * <DIV CLASS="SNIP">{@code 1801 * if (StrCmpr.containsOR 1802 * (keyWord, ";", ",", "'", "\"", "!", "#", "<", ">", 1803 * "(", ")", "*", "/", "\\") 1804 * ) 1805 * throw new IllegalArgumentException(...); 1806 * }</DIV> 1807 * 1808 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1809 * 1810 * @param keyWords This is a list of germane key-words that help identify, indicate or 1811 * describe the content of the Web-Page in which they are placed. 1812 * 1813 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1814 * 1815 * @throws IllegalArgumentException If any of the key-words provided to the Java Var-Args 1816 * {@code 'keyWords'} parameter contain invalid punctuation characters, or white-space. 1817 * 1818 * @see #keyWordsMetaTag 1819 * @see #getAllKeyWords(Vector) 1820 * @see StringParse#hasWhiteSpace(String) 1821 * @see StrCmpr#containsOR(String, String[]) 1822 * @see StrCSV#toCSV(String[], boolean, boolean, Integer) 1823 */ 1824 public static void insertKeyWords(Vector<HTMLNode> html, String... keyWords) 1825 { 1826 // The meta-tag for key-words. Search Engines look for these key-words when indexing 1827 // <meta NAME=keywords content='INSERT-COMMA-SEPARATED-KEYWORDS-HERE'> 1828 1829 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1830 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1831 1832 if (header == null) throw new NodeNotFoundException 1833 (NO_HEADER_MESSAGE.replace("INSERT-STR", "KeyWords Meta-Tag")); 1834 1835 for (String keyWord : keyWords) if (StringParse.hasWhiteSpace(keyWord)) 1836 1837 throw new IllegalArgumentException( 1838 "You have tried to insert keywords into an HTML Meta-Tag KeyWord-{roperty, " + 1839 "but unfortunately one of the words provided [" + keyWord + "] contains " + 1840 "white-space. This is not allowed here." 1841 ); 1842 1843 1844 for (String keyWord : keyWords) 1845 1846 if (StrCmpr.containsOR 1847 (keyWord, ";", ",", "'", "\"", "!", "<", ">", "(", ")", "*", "/", "\\")) 1848 1849 throw new IllegalArgumentException( 1850 "You have tried to insert keywords into an HTML Meta-Tag KeyWords-" + 1851 "Property, but unfortunately one of the words provide [" + keyWord + "] " + 1852 "contains error-prone punctuation, and cannot be used here." 1853 ); 1854 1855 // All this does is build a list - Comma Separated values. 1856 String listAsString = StrCSV.toCSV(keyWords, true, false, null); 1857 1858 // Build the TagNode, it will contain all key-words listed in the input var-args 1859 // String array 1860 1861 TagNode metaTN = new TagNode("<META NAME=keywords CONTENT='" + listAsString + "'>"); 1862 1863 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 1864 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1865 } 1866 1867 /** 1868 * This method will insert an "author" HTML Meta-Tag into the 1869 * <B STYLE='color: red;'>{@code <HEAD> ... </HEAD>}</B> section of this page. 1870 * 1871 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1872 * @param author This is the author of this Web-Page. 1873 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1874 * 1875 * @throws QuotesException If the author's name prevents the HTML-Engine from building any 1876 * version of an {@code AUTHOR} Meta-Tag. This will happen, certainly, if the author's 1877 * name-{@code String} contains <I><B>both</B></I> a single <I><B>and</B></I> a double 1878 * quote. 1879 * 1880 * <BR /><BR />Choose either the single-quote, or the double. Do not use both, or this 1881 * exception will throw. 1882 * 1883 * <BR /><BR /><DIV CLASS=JDHint> 1884 * Most author's names don't have any quotes at all! Checking for these things prevents 1885 * unexplainable exceptions later on. 1886 * </DIV> 1887 * 1888 * @see #authorMetaTag 1889 * @see #hasAuthor(Vector) 1890 * @see SD 1891 * @see DotPair 1892 */ 1893 public static void insertAuthor(Vector<HTMLNode> html, String author) 1894 { 1895 // The 'Author' Meta tag shall be inserted into the html page. 1896 // <meta NAME=author content='INSERT-AUTHOR-NAME-HERE'> 1897 1898 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1899 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1900 1901 if (header == null) throw new NodeNotFoundException 1902 (NO_HEADER_MESSAGE.replace("INSERT-STR", "author meta-tag")); 1903 1904 if ((author.indexOf("'") != -1) && (author.indexOf("\"") != -1)) 1905 1906 throw new QuotesException( 1907 "The author string provided here contains both a single-quote and a double-" + 1908 "quote, but this cannot be inserted into any HTML-Tag. Please remove " + 1909 "one or the other." 1910 ); 1911 1912 // Use the more complicated TagNode constructor to build the "author" tag. 1913 SD quote = (author.indexOf("'") == -1) ? SD.SingleQuotes : SD.DoubleQuotes; 1914 Properties p = new Properties(); 1915 1916 p.put("NAME", "author"); 1917 p.put("CONTENT", author); 1918 1919 // This constructor accepts a properties instance. 1920 TagNode authorTN = new TagNode("META", p, quote, true); 1921 1922 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 1923 Util.insertNodes(html, header.start + 1, NEWLINE, authorTN, NEWLINE); 1924 } 1925 1926 1927 // **************************************************************************************** 1928 // **************************************************************************************** 1929 // Insert HTTP-EQUIV Meta-Tags 1930 // **************************************************************************************** 1931 // **************************************************************************************** 1932 1933 1934 /** 1935 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 1936 * Meta-Tags that have a {@code HTTP-EQUIV}-Attribute paired with a 1937 * {@code CONTENT}-Attribute. 1938 * 1939 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1940 * 1941 * @param httpEquiv This is the property that is passed using the 1942 * {@code HTTP-EQUIV}-Attribute. 1943 * 1944 * @param contentAttributeValue This is the value that will be used to set the 1945 * {@code CONTENT}-Attribute. 1946 * 1947 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1948 * 1949 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 1950 * DATA-FILE-ID=FT_Q_EX> 1951 * 1952 * @see #metaTagHTTPEquiv 1953 * @see #getHTTPEquiv(Vector, String) 1954 * @see DotPair 1955 * @see TagNode 1956 */ 1957 public static void insertHTTPEquiv 1958 (Vector<HTMLNode> html, String httpEquiv, String contentAttributeValue) 1959 { 1960 // Builds and inserts a TagNode HTML Element that looks like: 1961 // <meta http-equiv='INSERT-HTTP-EQUIV-STRING-HERE' 1962 // content='INSERT-CONTENT-STRING-HERE' > 1963 1964 // Single Quotes are used, so the attribute-value may not contain single quotes. 1965 checkForSingleQuote(contentAttributeValue); 1966 1967 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1968 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1969 1970 if (header == null) throw new NodeNotFoundException 1971 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META HTTP-EQUIV=... CONTENT=...> Tag")); 1972 1973 // Build a <META> tag, as in the comment above 1974 TagNode metaTN = new TagNode 1975 ("<META HTTP-EQUIV='" + httpEquiv + "' CONTENT='" + contentAttributeValue + "'>"); 1976 1977 // Insert the meta-tag into the page. Put it at the top of the header, 1978 // just after <HEAD> 1979 1980 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1981 } 1982 1983 /** 1984 * The method will insert a {@code UTF-8} Meta-Tag that identifies the HTML-Page to any 1985 * Web-Browser that attempts to render its content as containing Foreign-Language 1986 * Characters, Emoji's & other non-{@code ASCII} Glyphs. 1987 * 1988 * <BR /><BR />{@code UTF-8} text utilizes/makes-use-of characters in a higher 1989 * {@code 'byte-range'} than the traditional <I>single-byte (256 different-characters) ASCII</I> 1990 * Character-Set. {@code UTF-8} allows for Chinese, Japanese and just about every variant of 1991 * language in the rest of the world. 1992 * 1993 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1994 * 1995 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1996 * 1997 * @see #hasUTF8MetaTag(Vector) 1998 * @see #UTF8MetaTag 1999 * @see TagNode 2000 * @see DotPair 2001 */ 2002 public static void insertUTF8MetaTag(Vector<HTMLNode> html) 2003 { 2004 // Meta-Tag to assert that the UTF-8 Charset is being used: 2005 // <meta http-equiv='Content-Type' content='text/html; charset=utf-8' /> 2006 2007 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2008 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2009 2010 if (header == null) throw new NodeNotFoundException 2011 (NO_HEADER_MESSAGE.replace("INSERT-STR", "UTF-8 <META> Tag")); 2012 2013 // Insert the UTF-8 tag into the page. Put it at the top of the header, just 2014 // after <HEAD> 2015 2016 Util.insertNodes(html, header.start + 1, NEWLINE, new TagNode(UTF8MetaTag), NEWLINE); 2017 } 2018 2019 2020 // **************************************************************************************** 2021 // **************************************************************************************** 2022 // ITEMPROP Meta-Tags 2023 // **************************************************************************************** 2024 // **************************************************************************************** 2025 2026 2027 /** 2028 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 2029 * Meta-Tags that have an {@code ITEMPROP}-Attribute paired with a 2030 * {@code CONTENT}-Attribute set. 2031 * 2032 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 2033 * 2034 * @param itemProp This is a property that is passed via the {@code ITEMPROP}-Attribute 2035 * 2036 * @param contentAttributeValue This is the value that will be used to set the 2037 * {@code CONTENT}-Attribute 2038 * 2039 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 2040 * 2041 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 2042 * DATA-FILE-ID=FT_Q_EX> 2043 * 2044 * @see #metaTagItemProp 2045 * @see #getItemProp(Vector, String) 2046 * @see DotPair 2047 * @see TagNode 2048 */ 2049 public static void insertItemProp 2050 (Vector<HTMLNode> html, String itemProp, String contentAttributeValue) 2051 { 2052 // Builds and inserts a TagNode HTML Element that looks like: 2053 // <meta itemprop='INSERT-ITEMPROP-STRING-HERE' content='INSERT-CONTENT-STRING-HERE' > 2054 2055 // Single Quotes are used, so the attribute-value may not contain single quotes. 2056 checkForSingleQuote(contentAttributeValue); 2057 2058 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2059 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2060 2061 if (header == null) throw new NodeNotFoundException 2062 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META ITEMPROP=... CONTENT=...> tag")); 2063 2064 // Build a <META> tag, as in the comment above 2065 TagNode metaTN = new TagNode 2066 ("<META ITEMPROP='" + itemProp + "' CONTENT='" + contentAttributeValue + "'>"); 2067 2068 // Insert the meta-tag into the page. Put it at the top of the header, 2069 // just after <HEAD> 2070 2071 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 2072 } 2073 2074 /** 2075 * This method will find an HTML 2076 * <B STYLE='color: red;'>{@code <META ITEMPROP=... CONTENT=...>}</B> element whose 2077 * {@code ITEMPROP}-Attribute <B STYLE='color: red;'>value</B> is equal to the 2078 * {@code String}-parameter {@code 'itemProp'} (ignoring case). 2079 * 2080 * <BR /><BR />After such an HTML {@code META}-Tag has been identified, its 2081 * {@code CONTENT}-Attribute {@code String}-value will be subsequently queried, extracted 2082 * and returned by this method. 2083 * 2084 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 2085 * 2086 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 2087 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 2088 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 2089 * will return null. 2090 * 2091 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 2092 * 2093 * <BR />Before the comparison is done with the {@code 'itemProp'} parameter, that 2094 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 2095 * <I>is done while ignoring case</I>. 2096 * 2097 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 2098 * 2099 * @param itemProp The Attribute-<B STYLE='color: red;'>name</B> of the 2100 * {@code ITEMPROP}-Attribute. 2101 * 2102 * @return The {@code String}-value of the {@code CONTENT}-Attribute for a 2103 * {@code META}-Tag whose {@code ITEMPROP}-Attribute is equal to the specified name 2104 * provided by parameter {@code 'itemProp'}. 2105 * 2106 * <BR /><BR />If such information is not found on the page, then this method returns null. 2107 */ 2108 public static String getItemProp(Vector<HTMLNode> html, String itemProp) 2109 { 2110 // Find the first <META ITEMPROP=... CONTENT=...> tag element where the name equals 2111 // the string-value provided by parameter 'itemProp'. 2112 2113 TagNode tn = InnerTagGet.first 2114 (html, "META", "ITEMPROP", TextComparitor.EQ_CI, itemProp.trim()); 2115 2116 // If there are no <META ITEMPROP='itemProp' CONTENT=...> elements found on the page, 2117 // then this method returns null. 2118 2119 if (tn == null) return null; 2120 2121 // Return the string-value of the attribute 'content'. Note that if this 2122 // attribute isn't available, this method shall return 'null', gracefully. 2123 2124 return tn.AV("content"); 2125 } 2126 2127 2128 // **************************************************************************************** 2129 // **************************************************************************************** 2130 // Open-Graph Meta-Tags 2131 // **************************************************************************************** 2132 // **************************************************************************************** 2133 2134 2135 /** 2136 * This will insert a single Open-Graph Meta-Tag into an HTML-Page. 2137 * 2138 * <BR /><BR /><B CLASS=JDDescLabel>Prepending <CODE>'og:'</CODE></B> 2139 * 2140 * <BR />The name of the property <I><B>MUST NOT</B></I> begin with the characters 2141 * {@code "og:"}, because they will be prepended when the HTML 2142 * <B STYLE='color: red;'>{@code <META PROPERTY='...' CONTENT='...' />}</B> Tag is 2143 * instantiated. 2144 * 2145 * <BR /><BR />Please review <I>exact</I> method body below. 2146 * 2147 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 2148 * 2149 * @param ogProperty This is the name of the Open-Graph protocol property that is being 2150 * inserted. Generally these are simple text-{@code String's} with alphanumeric-limited 2151 * names, or they are series of alphanumeric text-{@code String's}, separated by a period 2152 * {@code '.'} character. 2153 * 2154 * @param ogValueAsStr If you look at the definition of the {@link #openGraphMetaTag} above 2155 * in this class, you may view all of the acceptable types that Open-Graph Properties may 2156 * use. 2157 * 2158 * <BR /><BR />Whichever property or field that is being inserted, mostly, the field must 2159 * be converted to a {@code String} when being passed to this method. 2160 * 2161 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 2162 * 2163 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM1=ogProperty 2164 * DATA-PARAM2=ogValueAsStr DATA-FILE-ID=FT_Q_EX_DOUBL> 2165 * 2166 * @see #openGraphMetaTag 2167 * @see #getAllOGMetaTags(Vector) 2168 * @see #checkForSingleQuote(String) 2169 * @see TagNode 2170 */ 2171 public static void insertOGMetaTag 2172 (Vector<HTMLNode> html, String ogProperty, String ogValueAsStr) 2173 { 2174 // Open graph tag looks like this: 2175 // <meta property='og:INSERT-OG-PROPERTY-HERE' content='INSERT-OG-VALUE-HERE' /> 2176 2177 checkForSingleQuote(ogProperty); 2178 checkForSingleQuote(ogValueAsStr); 2179 2180 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2181 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2182 2183 if (header == null) throw new NodeNotFoundException( 2184 NO_HEADER_MESSAGE.replace 2185 ("INSERT-STR", "Open-Graph <META NAME='og:...' ...> Tag") 2186 ); 2187 2188 // Build the Open-Graph Meta Tag 2189 TagNode metaTN = new TagNode 2190 ("<META PROPERTY='og:" + ogProperty+ "' CONTENT='" + ogValueAsStr +"'>"); 2191 2192 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 2193 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 2194 } 2195 2196 /** 2197 * This will search any Vectorized HTML-Pge for 2198 * <B STYLE='color: red;'>{@code <META PROPERTY='og:...' CONTENT='...'>}</B> Tags, and 2199 * retrieve them for placement into a {@code java.util.Properties} table. 2200 * 2201 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 2202 * 2203 * @return This will return a Java {@code 'Properties'} Object, with all Open-Graph 2204 * properties saved inside. 2205 * 2206 * @see #openGraphMetaTag 2207 * @see #insertOGMetaTag(Vector, String, String) 2208 * @see TagNode#AV(String) 2209 * @see InnerTagGet 2210 */ 2211 public static Properties getAllOGMetaTags(Vector<? extends HTMLNode> html) 2212 { 2213 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 2214 // <META property="og:..." ...> 2215 // 2216 // SW_CI_TRM: Check the 'property' Attribute-Value using a Case-Insensitive, 2217 // 'Starts-With' String-Comparison 2218 // Trim the 'property' Attribute-Value String of possible leading & 2219 // trailing White-Space before performing the comparison. 2220 2221 Vector<TagNode> v = InnerTagGet.all 2222 (html, "META", "PROPERTY", TextComparitor.SW_CI_TRM, "og:"); 2223 2224 Properties ret = new Properties(); 2225 2226 for (TagNode tn : v) 2227 ret.put(tn.AV("PROPERTY").substring(3), tn.AV("CONTENT")); 2228 2229 return ret; 2230 } 2231 } 2232}