001package Torello.HTML.Tools.NewsSite;
002
003import Torello.Java.*;
004import java.io.*;
005
006/**
007 * A Java function-pointer / lambda-target that provides a means for deciding where to save
008 * downloaded article HTML, including a {@code static}-builder method for choosing to save articles
009 * directly to the file-system.
010 * 
011 * <EMBED CLASS='external-html' DATA-FILE-ID=SCRAPE_ART_REC>
012 */
013@FunctionalInterface
014public interface ScrapedArticleReceiver
015{
016    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUIDFI>  */
017    public static final long serialVersionUID = 1;
018
019    /**
020     * <EMBED CLASS='external-html' DATA-FILE-ID=FUNC_INTER_METH>
021     * 
022     * @param articleBody After an article has been downloaded by the {@code ScrapeArticles} class,
023     * it will build an instance of {@code class Article} and pass it to this class.  It is the
024     * programmer's responsibility to ultimately decide what to do with news articles after they
025     * have been successfully scraped and parsed.
026     * 
027     * @param sectionURLNum This is a convenience parameter that informs the 
028     * implementing-{@code interface} <B><I>from which URL Section</I></B> of the News Web-Sites
029     * main-page that this article is being downloaded.
030     * 
031     * <BR /><BR /><DIV CLASS=JDHint>
032     * <B STYLE='color:red;'>Note:</B> Review the {@code class ScrapeURLs} to read more about
033     * "Section {@code URL's}."  The number of "Section {@code URL's}" for a news web-site scrape
034     * is just the length of the {@code Vector<URL> sectionURL's}
035     * </DIV>
036     * 
037     * @param articleNum This parameter informs the implementing-{@code interface} which article
038     * number is being downloaded.  Each section-{@code URL} will have a number of different
039     * articles in the section.  These numbers can be used to create unique file-names, for
040     * instance.
041     * 
042     * @throws ReceiveException This exception may be thrown by the lambda-expression or class
043     * instance that implements this {@code FunctionalInterface}.  It is not mandatory that this
044     * exception be used.
045     */
046    public void receive(Article articleBody, int sectionURLNum, int articleNum)
047        throws ReceiveException;
048
049    /**
050     * <B><SPAN STYLE="color: red;">saveToFS =&gt; Save To File-System</SPAN></B>
051     * 
052     * <BR /><BR />This is a static factory-builder method that will produce a
053     * {@code 'ScrapedArticleReceiver'} that simply <B>saves downloaded articles to a
054     * directory</B> on the file-system.
055     * 
056     * <BR /><BR />The user, here, merely needs to provide a Directory-Name using {@code String}
057     * parameter {@code 'dirNameStr'}.  This is the <B><I>most simple</I></B> way to create an
058     * instance of this class.
059     * 
060     * @param dirNameStr This is a directory on the file-system that will be used to save articles
061     * that are received directly to the file-system.
062     * 
063     * @throws WritableDirectoryException This method shall check whether it is possible to
064     * write to the provided directory name.
065     */
066    public static ScrapedArticleReceiver saveToFS(String dirNameStr)
067    {
068        WritableDirectoryException.check(dirNameStr);
069
070
071        // Make sure that the directory name-string ends with the system File-Separator
072        // character.  This '/' for UNIX and '\' for MS-DOS.
073
074        final String finalDirNameStr = dirNameStr.endsWith(File.separator) 
075            ? dirNameStr
076            : dirNameStr + File.separator;
077
078
079        // Create an instance of this functional-interface using a lambda-expression.
080        // 
081        // NOTE: This is literally just saving an object to a file using object-serialization.
082        //       The exception catching / throwing is just to produce standardized error messages
083        //       back to the user, if an exception occurs when saving files.
084
085        return (Article articleBody, int sectionURLNum, int articleNum) ->
086        {
087            String outFileName = 
088                finalDirNameStr +
089                StrPrint.zeroPad(sectionURLNum) + '.' +
090                StrPrint.zeroPad10e4(articleNum) + ".dat";
091
092            try
093                { FileRW.writeObjectToFile(articleBody, outFileName, true); }
094
095            catch (Exception ex)
096            {
097                throw new ReceiveException(
098                    "A " + ex.getClass().getCanonicalName() + " was thrown while attempting to " +
099                    "write a downloaded article to the file-system.\n" +
100                    "Section-URL [" + sectionURLNum + "], Article Number [" + articleNum + "]\n" +
101                    "Unable to save file:\n" +
102                    outFileName + "\n" + 
103                    "Please review this exception's getCause() for more details.",
104                    ex, sectionURLNum, articleNum
105                );
106            }
107        };
108    }
109}