001package Torello.HTML.Tools.NewsSite; 002 003import Torello.Java.*; 004import java.io.*; 005 006/** 007 * A Java function-pointer / lambda-target that provides a means for deciding where to save 008 * downloaded article HTML, including a {@code static}-builder method for choosing to save articles 009 * directly to the file-system. 010 * 011 * <EMBED CLASS='external-html' DATA-FILE-ID=SCRAPE_ART_REC> 012 */ 013@FunctionalInterface 014public interface ScrapedArticleReceiver 015{ 016 /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUIDFI> */ 017 public static final long serialVersionUID = 1; 018 019 /** 020 * <EMBED CLASS='external-html' DATA-FILE-ID=FUNC_INTER_METH> 021 * 022 * @param articleBody After an article has been downloaded by the {@code ScrapeArticles} class, 023 * it will build an instance of {@code class Article} and pass it to this class. It is the 024 * programmer's responsibility to ultimately decide what to do with news articles after they 025 * have been successfully scraped and parsed. 026 * 027 * @param sectionURLNum This is a convenience parameter that informs the 028 * implementing-{@code interface} <B><I>from which URL Section</I></B> of the News Web-Sites 029 * main-page that this article is being downloaded. 030 * 031 * <BR /><BR /><DIV CLASS=JDHint> 032 * <B STYLE='color:red;'>Note:</B> Review the {@code class ScrapeURLs} to read more about 033 * "Section {@code URL's}." The number of "Section {@code URL's}" for a news web-site scrape 034 * is just the length of the {@code Vector<URL> sectionURL's} 035 * </DIV> 036 * 037 * @param articleNum This parameter informs the implementing-{@code interface} which article 038 * number is being downloaded. Each section-{@code URL} will have a number of different 039 * articles in the section. These numbers can be used to create unique file-names, for 040 * instance. 041 * 042 * @throws ReceiveException This exception may be thrown by the lambda-expression or class 043 * instance that implements this {@code FunctionalInterface}. It is not mandatory that this 044 * exception be used. 045 */ 046 public void receive(Article articleBody, int sectionURLNum, int articleNum) 047 throws ReceiveException; 048 049 /** 050 * <B><SPAN STYLE="color: red;">saveToFS => Save To File-System</SPAN></B> 051 * 052 * <BR /><BR />This is a static factory-builder method that will produce a 053 * {@code 'ScrapedArticleReceiver'} that simply <B>saves downloaded articles to a 054 * directory</B> on the file-system. 055 * 056 * <BR /><BR />The user, here, merely needs to provide a Directory-Name using {@code String} 057 * parameter {@code 'dirNameStr'}. This is the <B><I>most simple</I></B> way to create an 058 * instance of this class. 059 * 060 * @param dirNameStr This is a directory on the file-system that will be used to save articles 061 * that are received directly to the file-system. 062 * 063 * @throws WritableDirectoryException This method shall check whether it is possible to 064 * write to the provided directory name. 065 */ 066 public static ScrapedArticleReceiver saveToFS(String dirNameStr) 067 { 068 WritableDirectoryException.check(dirNameStr); 069 070 071 // Make sure that the directory name-string ends with the system File-Separator 072 // character. This '/' for UNIX and '\' for MS-DOS. 073 074 final String finalDirNameStr = dirNameStr.endsWith(File.separator) 075 ? dirNameStr 076 : dirNameStr + File.separator; 077 078 079 // Create an instance of this functional-interface using a lambda-expression. 080 // 081 // NOTE: This is literally just saving an object to a file using object-serialization. 082 // The exception catching / throwing is just to produce standardized error messages 083 // back to the user, if an exception occurs when saving files. 084 085 return (Article articleBody, int sectionURLNum, int articleNum) -> 086 { 087 String outFileName = 088 finalDirNameStr + 089 StrPrint.zeroPad(sectionURLNum) + '.' + 090 StrPrint.zeroPad10e4(articleNum) + ".dat"; 091 092 try 093 { FileRW.writeObjectToFile(articleBody, outFileName, true); } 094 095 catch (Exception ex) 096 { 097 throw new ReceiveException( 098 "A " + ex.getClass().getCanonicalName() + " was thrown while attempting to " + 099 "write a downloaded article to the file-system.\n" + 100 "Section-URL [" + sectionURLNum + "], Article Number [" + articleNum + "]\n" + 101 "Unable to save file:\n" + 102 outFileName + "\n" + 103 "Please review this exception's getCause() for more details.", 104 ex, sectionURLNum, articleNum 105 ); 106 } 107 }; 108 } 109}