1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109 | package Torello.HTML.Tools.NewsSite;
import Torello.Java.*;
import java.io.*;
/**
* A Java function-pointer / lambda-target that provides a means for deciding where to save
* downloaded article HTML, including a {@code static}-builder method for choosing to save articles
* directly to the file-system.
*
* <EMBED CLASS='external-html' DATA-FILE-ID=SCRAPE_ART_REC>
*/
@FunctionalInterface
public interface ScrapedArticleReceiver
{
/** <EMBED CLASS='external-html' DATA-FILE-ID=SVUIDFI> */
public static final long serialVersionUID = 1;
/**
* <EMBED CLASS='external-html' DATA-FILE-ID=FUNC_INTER_METH>
*
* @param articleBody After an article has been downloaded by the {@code ScrapeArticles} class,
* it will build an instance of {@code class Article} and pass it to this class. It is the
* programmer's responsibility to ultimately decide what to do with news articles after they
* have been successfully scraped and parsed.
*
* @param sectionURLNum This is a convenience parameter that informs the
* implementing-{@code interface} <B><I>from which URL Section</I></B> of the News Web-Sites
* main-page that this article is being downloaded.
*
* <BR /><BR /><DIV CLASS=JDHint>
* <B STYLE='color:red;'>Note:</B> Review the {@code class ScrapeURLs} to read more about
* "Section {@code URL's}." The number of "Section {@code URL's}" for a news web-site scrape
* is just the length of the {@code Vector<URL> sectionURL's}
* </DIV>
*
* @param articleNum This parameter informs the implementing-{@code interface} which article
* number is being downloaded. Each section-{@code URL} will have a number of different
* articles in the section. These numbers can be used to create unique file-names, for
* instance.
*
* @throws ReceiveException This exception may be thrown by the lambda-expression or class
* instance that implements this {@code FunctionalInterface}. It is not mandatory that this
* exception be used.
*/
public void receive(Article articleBody, int sectionURLNum, int articleNum)
throws ReceiveException;
/**
* <B><SPAN STYLE="color: red;">saveToFS => Save To File-System</SPAN></B>
*
* <BR /><BR />This is a static factory-builder method that will produce a
* {@code 'ScrapedArticleReceiver'} that simply <B>saves downloaded articles to a
* directory</B> on the file-system.
*
* <BR /><BR />The user, here, merely needs to provide a Directory-Name using {@code String}
* parameter {@code 'dirNameStr'}. This is the <B><I>most simple</I></B> way to create an
* instance of this class.
*
* @param dirNameStr This is a directory on the file-system that will be used to save articles
* that are received directly to the file-system.
*
* @throws WritableDirectoryException This method shall check whether it is possible to
* write to the provided directory name.
*/
public static ScrapedArticleReceiver saveToFS(String dirNameStr)
{
WritableDirectoryException.check(dirNameStr);
// Make sure that the directory name-string ends with the system File-Separator
// character. This '/' for UNIX and '\' for MS-DOS.
final String finalDirNameStr = dirNameStr.endsWith(File.separator)
? dirNameStr
: dirNameStr + File.separator;
// Create an instance of this functional-interface using a lambda-expression.
//
// NOTE: This is literally just saving an object to a file using object-serialization.
// The exception catching / throwing is just to produce standardized error messages
// back to the user, if an exception occurs when saving files.
return (Article articleBody, int sectionURLNum, int articleNum) ->
{
String outFileName =
finalDirNameStr +
StrPrint.zeroPad(sectionURLNum) + '.' +
StrPrint.zeroPad10e4(articleNum) + ".dat";
try
{ FileRW.writeObjectToFile(articleBody, outFileName, true); }
catch (Exception ex)
{
throw new ReceiveException(
"A " + ex.getClass().getCanonicalName() + " was thrown while attempting to " +
"write a downloaded article to the file-system.\n" +
"Section-URL [" + sectionURLNum + "], Article Number [" + articleNum + "]\n" +
"Unable to save file:\n" +
outFileName + "\n" +
"Please review this exception's getCause() for more details.",
ex, sectionURLNum, articleNum
);
}
};
}
}
|