1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83 | package Torello.HTML.Tools.NewsSite;
import Torello.HTML.HTMLNode;
import Torello.HTML.HTMLTokException;
import Torello.HTML.NodeSearch.InnerTagGetInclusive;
import Torello.HTML.NodeSearch.InclusiveException;
import java.util.Vector;
import java.util.function.Predicate;
import java.net.URL;
class Usual_p
{
static ArticleGet generate(
final String htmlTag,
final String innerTag,
final Predicate<String> p
)
{
final String htmlTagLC = htmlTag.toLowerCase();
final String innerTagLC = innerTag.toLowerCase();
// This 'final' String is merely used for proper error reporting in any potential
// exception-messages, nothing else.
final String functionNameStr =
"InnerTagGetInclusive.first(page, \"" + htmlTag + "\", \"" + innerTag + "\", " +
"Predicate<String>)";
// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
// FAIL-FAST: Check user-input for possible errors BEFORE building the Lambda-Function.
// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
HTMLTokException.check(htmlTagLC);
InclusiveException.check(htmlTagLC);
if (p == null) throw new NullPointerException
("Null has been passed to Predicate parameter 'p'. This is not allowed here.");
// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
// Build the instance, using a lambda-expression
// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
return (URL url, Vector<HTMLNode> page) ->
{
// This exception-check is done on every invocation of this Lambda-Function.
// It is merely checking that these inputs are not-null, and page is of non-zero size.
// ArticleGetException is a compile-time, checked exception. It is important to halt
// News-Site Scrape Progress when "Empty News-Page Data" is being passed here.
// NOTE: This would imply an internal-error with class Download has occurred.
ArticleGetException.check(url, page);
final Vector<HTMLNode> ret;
try
{ ret = InnerTagGetInclusive.first(page, htmlTagLC, innerTagLC, p); }
catch (Exception e)
{
throw new ArticleGetException
(ArticleGetException.GOT_EXCEPTION, functionNameStr, e);
}
// These error-checks are used to deduce whether the "Article Get" was successful.
// When this exception is thrown, it means that the user-specified means of "Retrieving
// an Article Body" FAILED. In this case, the "innerHTML" of the specified htmlTag and
// attribute produced a null news-article page, or an empty news-article page.
if (ret == null) throw new ArticleGetException
(ArticleGetException.RET_NULL, functionNameStr, null);
if (ret.size() == 0) throw new ArticleGetException
(ArticleGetException.RET_EMPTY_VECTOR, functionNameStr, null);
return ret;
};
}
}
|