1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120 | package Torello.HTML.Tools.NewsSite;
import Torello.Java.*;
import Torello.Java.Additional.Ret4;
import java.io.*;
import java.util.Vector;
/**
* When the main iteration-loop for downloading news-articles is running, the loop-variables are
* kept current to this class; so if (while watching the downloader), the programmer has decided
* to go take a break (and presses {@code Control-^C}), 'download progress' won't be lost and
* starting over with articles that have already been saved won't be necessary.
*
* <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=PAUSE>
*/
public interface Pause extends Serializable
{
/** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */
public static final long serialVersionUID = 1;
/**
* This method needs to save the current download state. The three integers provided are all
* that the download logic needs in order to identify which newspaper article {@code URL's}
* have already downloaded - <I>and, therefore, where to begin the download process after a
* pause or break.</I> The instance of {@code Vector} that is required by this method's
* parameter list contain the "Download Results" for each news-{@code Article} in the
* {@code URL} list.
*
* @param results This is the two dimensional {@code Vector} that contains instances of
* {@code 'DownloadResult'}. Each news-{@code Article} in each section of a newspaper
* website has a specific location in this two dimensional {@code Vector}. As the downloader
* retrieves (or fails) to scrape news-{@code Article's}, the result of the scrape (or
* scrape-attempt) are inserted into this 2-D {@code Vector}.
*
* @param outerCounter This is the outer-{@code Vector} index of the last {@code URL}
* downloaded.
*
* @param innerCounter This is the inner-{@code Vector} index of the last {@code URL}
* downloaded.
*
* @param successCounter This is how many of the {@code URL's} that were downloaded without
* throwing any exceptions.
*/
public void saveState(
Vector<Vector<DownloadResult>> results,
int outerCounter, int innerCounter, int successCounter
) throws PauseException;
/**
* This method loads the state of the downloader. This can be helpful if the user wishes to
* "pause" the download when long-lists of article {@code URL's} are being retrieved. Also, if
* the downloader exits due to an exception, the state of download is maintained.
*
* @return An instance of {@code Ret4<Vector<Vector<DownloadResult>>, Integer, Integer, Integer>}
*
* <BR /><BR /><UL CLASS=JDUL>
*
* <LI> {@code Ret4.a} - The current state of the "Return {@code Vector}". This two dimensional
* {@code Vector} fills up with instances of enumerated-type {@code DownloadResult}.
* <BR /><BR />
* </LI>
*
* <LI> {@code Ret2.b} - The outer-{@code Vector} index of the last attempted newspaper article
* {@code URL} download.
* <BR /><BR />
* </LI>
*
* <LI> {@code Ret2.c} - The inner-{@code Vector} index of the last attempted newspaper article
* {@code URL} download.
* <BR /><BR />
* </LI>
*
* <LI> {@code Ret2.d} - The number of article {@code URL's} that have successfully downloaded.
* </LI>
*
* </UL>
*/
public Ret4<Vector<Vector<DownloadResult>>, Integer, Integer, Integer> loadState()
throws PauseException;
/**
* If the {@code Pause} implementation needs initialization, it ought to implement this method.
*
* <BR /><BR /><DIV CLASS=JDHint>
* <B STYLE='color:red;'>Important:</B>
* The initialize process should ensure that a call to {@code loadState()} will return a
* {@link Ret4} data-structure whose integer fields are all equal to zero. These fields are
* counters, and when download begins, if they are not-zero, then many news-articles will not
* be scraped.
* </DIV>
*
* <BR /><DIV CLASS=JDHintAlt>
* <B STYLE='color:red;'>Additionally:</B>
* On initialization, the value for the 2-D {@code Vector} in the {@code Ret4} data-structure
* need only be present - <B><I>it does not matter what values have been inserted into it, nor
* the sizes of the sub-{@code Vector's}.</I></B> Do note that it's values will be clobbered by
* the downloader if / when the downloader determines that the download process is starting at
* the beginning.
* </DIV>
*
* @throws PauseException This exception is thrown if the implementation of this {@code interface}
* fails to init or load.
*/
public void initialize() throws PauseException;
/**
* This method is a {@code static}-factory method that returns an instance of this
* {@code interface Pause} that uses the file-system for saving the state to a user-specified
* file-name.
*
* @param saveFileName This is just the name of the data-file where state shall be saved.
* This state contains only two integers, and is, therefore, an extremely small data-file.
*
* @return A functioning instance of this interface - one that uses a flat file for saving state.
*/
public static Pause getFSInstance(String saveFileName)
throws PauseException
{ return new PauseFS(saveFileName); }
}
|