001package Torello.HTML.Tools.NewsSite; 002 003import Torello.Java.*; 004import Torello.Java.Additional.Ret4; 005 006import java.io.*; 007import java.util.Vector; 008 009/** 010 * When the main iteration-loop for downloading news-articles is running, the loop-variables are 011 * kept current to this class; so if (while watching the downloader), the programmer has decided 012 * to go take a break (and presses {@code Control-^C}), 'download progress' won't be lost and 013 * starting over with articles that have already been saved won't be necessary. 014 * 015 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=PAUSE> 016 */ 017public interface Pause extends Serializable 018{ 019 /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */ 020 public static final long serialVersionUID = 1; 021 022 /** 023 * This method needs to save the current download state. The three integers provided are all 024 * that the download logic needs in order to identify which newspaper article {@code URL's} 025 * have already downloaded - <I>and, therefore, where to begin the download process after a 026 * pause or break.</I> The instance of {@code Vector} that is required by this method's 027 * parameter list contain the "Download Results" for each news-{@code Article} in the 028 * {@code URL} list. 029 * 030 * @param results This is the two dimensional {@code Vector} that contains instances of 031 * {@code 'DownloadResult'}. Each news-{@code Article} in each section of a newspaper 032 * website has a specific location in this two dimensional {@code Vector}. As the downloader 033 * retrieves (or fails) to scrape news-{@code Article's}, the result of the scrape (or 034 * scrape-attempt) are inserted into this 2-D {@code Vector}. 035 * 036 * @param outerCounter This is the outer-{@code Vector} index of the last {@code URL} 037 * downloaded. 038 * 039 * @param innerCounter This is the inner-{@code Vector} index of the last {@code URL} 040 * downloaded. 041 * 042 * @param successCounter This is how many of the {@code URL's} that were downloaded without 043 * throwing any exceptions. 044 */ 045 public void saveState( 046 Vector<Vector<DownloadResult>> results, 047 int outerCounter, int innerCounter, int successCounter 048 ) throws PauseException; 049 050 /** 051 * This method loads the state of the downloader. This can be helpful if the user wishes to 052 * "pause" the download when long-lists of article {@code URL's} are being retrieved. Also, if 053 * the downloader exits due to an exception, the state of download is maintained. 054 * 055 * @return An instance of {@code Ret4<Vector<Vector<DownloadResult>>, Integer, Integer, Integer>} 056 * 057 * <BR /><BR /><UL CLASS=JDUL> 058 * 059 * <LI> {@code Ret4.a} - The current state of the "Return {@code Vector}". This two dimensional 060 * {@code Vector} fills up with instances of enumerated-type {@code DownloadResult}. 061 * <BR /><BR /> 062 * </LI> 063 * 064 * <LI> {@code Ret2.b} - The outer-{@code Vector} index of the last attempted newspaper article 065 * {@code URL} download. 066 * <BR /><BR /> 067 * </LI> 068 * 069 * <LI> {@code Ret2.c} - The inner-{@code Vector} index of the last attempted newspaper article 070 * {@code URL} download. 071 * <BR /><BR /> 072 * </LI> 073 * 074 * <LI> {@code Ret2.d} - The number of article {@code URL's} that have successfully downloaded. 075 * </LI> 076 * 077 * </UL> 078 */ 079 public Ret4<Vector<Vector<DownloadResult>>, Integer, Integer, Integer> loadState() 080 throws PauseException; 081 082 /** 083 * If the {@code Pause} implementation needs initialization, it ought to implement this method. 084 * 085 * <BR /><BR /><DIV CLASS=JDHint> 086 * <B STYLE='color:red;'>Important:</B> 087 * The initialize process should ensure that a call to {@code loadState()} will return a 088 * {@link Ret4} data-structure whose integer fields are all equal to zero. These fields are 089 * counters, and when download begins, if they are not-zero, then many news-articles will not 090 * be scraped. 091 * </DIV> 092 * 093 * <BR /><DIV CLASS=JDHintAlt> 094 * <B STYLE='color:red;'>Additionally:</B> 095 * On initialization, the value for the 2-D {@code Vector} in the {@code Ret4} data-structure 096 * need only be present - <B><I>it does not matter what values have been inserted into it, nor 097 * the sizes of the sub-{@code Vector's}.</I></B> Do note that it's values will be clobbered by 098 * the downloader if / when the downloader determines that the download process is starting at 099 * the beginning. 100 * </DIV> 101 * 102 * @throws PauseException This exception is thrown if the implementation of this {@code interface} 103 * fails to init or load. 104 */ 105 public void initialize() throws PauseException; 106 107 /** 108 * This method is a {@code static}-factory method that returns an instance of this 109 * {@code interface Pause} that uses the file-system for saving the state to a user-specified 110 * file-name. 111 * 112 * @param saveFileName This is just the name of the data-file where state shall be saved. 113 * This state contains only two integers, and is, therefore, an extremely small data-file. 114 * 115 * @return A functioning instance of this interface - one that uses a flat file for saving state. 116 */ 117 public static Pause getFSInstance(String saveFileName) 118 throws PauseException 119 { return new PauseFS(saveFileName); } 120}