Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/sales.git
src/main/java/com/whitemagicsoftware/sales/Main.java
import java.io.IOException;
import java.io.InputStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
import java.util.List;
import javax.mail.internet.AddressException;
-import static org.apache.commons.io.FilenameUtils.removeExtension;
+import org.apache.log4j.BasicConfigurator;
import org.webharvest.definition.ScraperConfiguration;
import org.webharvest.runtime.Scraper;
private void run() throws Exception {
- process(getSubscribers());
+ process( getSubscribers() );
}
private void process(
- List<Subscriber> subscribers) {
- for (Subscriber subscriber : subscribers) {
+ List<Subscriber> subscribers ) {
+ for( Subscriber subscriber : subscribers ) {
try {
- String message = process(subscriber, getVendors(subscriber));
- notify(subscriber, message);
- } catch (Exception e) {
- notify(e);
+ String message = process( subscriber, getVendors( subscriber ) );
+ notify( subscriber, message );
+ } catch( Exception e ) {
+ notify( e );
}
}
}
- private String process(Subscriber subscriber, List<Vendor> vendors)
+ private String process( Subscriber subscriber, List<Vendor> vendors )
throws Exception {
StringBuilder result = new StringBuilder();
- for (Vendor vendor : vendors) {
- String s = process(subscriber, vendor, getProducts(subscriber, vendor));
- result.append(s);
+ for( Vendor vendor : vendors ) {
+ String s = process( subscriber, vendor, getProducts( subscriber, vendor ) );
+ result.append( s );
}
return result.toString();
}
private String process(
Subscriber subscriber,
Vendor vendor,
- List<Product> products) throws Exception {
+ List<Product> products ) throws Exception {
StringBuilder result = new StringBuilder();
// Include the vendor name on the first products loop iteration.
boolean includeVendorName = true;
- for (Product product : products) {
- Scraper scraper = getScraper(vendor);
+ for( Product product : products ) {
+ Scraper scraper = getScraper( vendor );
- scraper.addVariableToContext("includeVendorName", includeVendorName);
- scraper.addVariableToContext("locationCode", subscriber.getLocationCode());
- scraper.addVariableToContext("vendorName", vendor.getName());
- scraper.addVariableToContext("productName", product.getName());
- scraper.addVariableToContext("productPath", product.getUrlPath());
+ scraper.addVariableToContext( "includeVendorName", includeVendorName );
+ scraper.addVariableToContext( "locationCode", subscriber.getLocationCode() );
+ scraper.addVariableToContext( "vendorName", vendor.getName() );
+ scraper.addVariableToContext( "productName", product.getName() );
+ scraper.addVariableToContext( "productPath", product.getUrlPath() );
scraper.execute();
- Variable message = scraper.getContext().getVar("message");
- result.append(message.toString());
+ Variable message = scraper.getContext().getVar( "message" );
+ result.append( message.toString() );
// Presumably, the template added the vendor's name to deliniate its
* @param subscriber The person to notify.
* @param message
+ *
* @throws Exception
*/
- private void notify(Subscriber subscriber, String message) throws Exception {
- if (!empty(message)) {
- getNotifyService().notify(subscriber.getAddress(), "Subject", message);
+ private void notify( Subscriber subscriber, String message ) throws Exception {
+ if( !empty( message ) ) {
+ getNotifyService().notify( subscriber.getAddress(), "Subject", message );
}
}
- private void notify(Exception e) {
- getNotifyService().notify(e);
+ private void notify( Exception e ) {
+ getNotifyService().notify( e );
}
/**
* Answers whether the string is null or empty or contains only whitespace.
*
* @param s The string to validate.
+ *
* @return true The string has no content. F
*/
- private boolean empty(String s) {
+ private boolean empty( String s ) {
return s == null || s.trim().length() == 0;
}
* @return A Scraper that can extract data.
*/
- private Scraper getScraper(Vendor vendor) throws IOException {
- String resource = vendor.getScriptName();
- final ScraperConfiguration config = getScraperConfiguration(resource);
- return new Scraper(config, createTemporaryDirectory(resource).toString());
+ private Scraper getScraper( Vendor vendor ) throws IOException {
+ String resource = "scripts/" + vendor.getScriptName();
+ final ScraperConfiguration config = getScraperConfiguration( resource );
+ return new Scraper( config, getWorkingDirectory() );
}
- private ScraperConfiguration getScraperConfiguration(String resource) {
- final InputSource in = getResource(resource);
- return new ScraperConfiguration(in);
+ /**
+ * Root directory containing scripts/ and templates/ directories.
+ *
+ * @return A non-null String to the resources' root directory.
+ */
+ private String getWorkingDirectory() {
+ return new File( "src/main/resources" ).toString();
}
- private InputSource getResource(String resource) {
- final InputStream in = getResourceAsStream(resource);
- return new InputSource(in);
+ private ScraperConfiguration getScraperConfiguration( String resource ) {
+ final InputSource in = getResource( resource );
+ return new ScraperConfiguration( in );
}
- private InputStream getResourceAsStream(String resource) {
+ private InputSource getResource( String resource ) {
+ final InputStream in = getResourceAsStream( resource );
+ return new InputSource( in );
+ }
+
+ private InputStream getResourceAsStream( String resource ) {
final InputStream in
- = getContextClassLoader().getResourceAsStream(resource);
+ = getContextClassLoader().getResourceAsStream( resource );
- return in == null ? getClass().getResourceAsStream(resource) : in;
+ return in == null ? getClass().getResourceAsStream( resource ) : in;
}
private ClassLoader getContextClassLoader() {
return Thread.currentThread().getContextClassLoader();
- }
-
- /**
- * Creates a fully qualified path for web pages downloaded by the scraper.
- *
- * @param resource The site (with filename extension) to scrap.
- *
- * @return A path to the directory that was created (e.g.,
- * $HOME/.../sales/working/website/scrape4443161710900...).
- *
- * @throws IOException
- */
- private File createTemporaryDirectory(String resource) throws IOException {
- Path working = Paths.get(getWorkingDirectory(resource));
- working = Files.createDirectories(working);
- working = Files.createTempDirectory(working, DIRECTORY_PREFIX);
-
- File file = working.toFile();
- file.deleteOnExit();
-
- return file;
- }
-
- private String getWorkingDirectory(String resource) {
- return System.getProperty("user.dir")
- + File.separator + "working"
- + File.separator + removeExtension(resource);
}
}
- private List<Vendor> getVendors(Subscriber subscriber) {
- return getVendorService().list(subscriber);
+ private List<Vendor> getVendors( Subscriber subscriber ) {
+ return getVendorService().list( subscriber );
}
- private List<Product> getProducts(Subscriber subscriber, Vendor vendor) {
- return getProductService().list(subscriber, vendor);
+ private List<Product> getProducts( Subscriber subscriber, Vendor vendor ) {
+ return getProductService().list( subscriber, vendor );
}
- public static void main(String args[]) throws Exception {
+ public static void main( String args[] ) throws Exception {
new Main().run();
}
}
-
-/*
-
- # Filename to the message subject and body to send to the subscriber.
- FILE_MESSAGE_SUBJECT="subject.txt"
- FILE_MESSAGE_BODY="body.txt"
-
- FILE_FLYER="flyer.txt"
- FILE_NOTIFIED="notified.txt"
-
- # Significantly reduce line wrap (to help with text parsing). The width
- # is required because line items can be longer than 80 characters. Without
- # the width, long item descriptions, which contain the item cost, might
- # not parse correctly from some vendors.
- TEXT_WIDTH=8192
-
- # Human-readable date format.
- DATE_FORMAT="%A, %b %d"
-
- # Generate a hash value for the $1 parameter.
- hashed() {
- echo "$1" | sha256sum | awk {'print $1'}
- }
-
- # Determine whether products are on sale for each subscriber.
- while IFS=',' read email postal_code store product_name product_page
- do
- # Ignore blank lines.
- if [ ! "$email" ]; then continue; fi
-
- # Extract the vendor details for the subscriber's desired product.
- line=$(head -$store $DIR/stores.csv | tail -1)
- vendor_name=$(echo $line | awk -F, '{print $2}')
-
- # Drop the scrape function so it can be reloaded (fails silently).
- unset -f scrape
-
- # Load the scraper for the subscriber's vendor.
- source $DIR/vendors/$(echo $line | awk -F, '{print $3}')/scrape.sh
-
- # Hash the email for a safe directory name.
- dir_subscriber="$DIR_SUBSCRIBERS/$(hashed $email)"
-
- # Create a place to store this subscriber's information (fail silent).
- mkdir -p "$dir_subscriber"
-
- # Remove the quotes around the product name and product page.
- temp="${product_name%\"}"
- temp="${temp#\"}"
- product_name=$temp
-
- temp="${product_page%\"}"
- temp="${temp#\"}"
- product_page=$temp
-
- # Extract product information for subscriber notifications.
- # Builds the message subject and body for notifications.
- # Will delete the notification file if it is time to send a new message.
- scrape "$dir_subscriber" "$vendor_name" "$product_name" "$product_page" \
- "$postal_code"
- done < "$DIR_SUBSCRIBERS/list.csv"
-
- # Load function to notify a subscriber.
- source "$DIR/notify.sh"
-
- # Notification happens after sale determination because subscribers can
- # subscribe to notifications for multiple product items. Once all the
- # on sale products have been extracted, the message content can be sent.
- while IFS=',' read email postal_code store product_name product_page
- do
- # Ignore blank lines.
- if [ ! "$email" ]; then continue; fi
-
- dir_subscriber="$DIR_SUBSCRIBERS/$(hashed $email)"
- path_notified="$dir_subscriber/$FILE_NOTIFIED"
-
- # If the notified file doesn't exist, send a notification.
- if [ ! -f "$path_notified" ]; then
- # Full path to the message subject and body.
- path_subject="$dir_subscriber/$FILE_MESSAGE_SUBJECT"
- path_body="$dir_subscriber/$FILE_MESSAGE_BODY"
-
- # Only send the notification if both a message body and subject exist.
- # This allows for writing a body prefix (e.g., store address) to the
- # message body without sending a message if no subscriber products are
- # on sale.
- if [ -s "$path_subject" -a -s "$path_body" ]; then
- # Notify the subscriber.
- notify $DIR $email $path_subject $path_body
-
- # Allow comparing flyer expiry date against last notified date.
- date +%s > "$path_notified"
- fi
- fi
- done < "$DIR_SUBSCRIBERS/list.csv"
- */
src/main/java/com/whitemagicsoftware/sales/service/impl/NotifyServiceImpl.java
import com.whitemagicsoftware.notify.TextHtmlMultipart;
import com.whitemagicsoftware.sales.service.NotifyService;
+import java.io.PrintWriter;
+import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
public class NotifyServiceImpl extends ServiceImpl
implements NotifyService {
-
+
private Session session;
public NotifyServiceImpl() {
}
@Override
- public void notify(Address recipient, String subject, String markdown)
+ public void notify( Address recipient, String subject, String markdown )
throws MessagingException, UnsupportedEncodingException {
String textBody = markdown;
- String htmlBody = toHtml(markdown);
+ String htmlBody = toHtml( markdown );
- Message message = createMessage(recipient, subject);
- message.setContent(new TextHtmlMultipart(textBody, htmlBody));
- send(message);
+ Message message = createMessage( recipient, subject );
+ message.setContent( new TextHtmlMultipart( textBody, htmlBody ) );
+ send( message );
}
@Override
- public void notify(Exception e) {
- try {
- Message message = createMessage(ALERT_CONTACT.asAddress(), "Error sending.");
- message.setText(e.getMessage());
- send(message);
- } catch (MessagingException | UnsupportedEncodingException ex) {
+ public void notify( Exception e ) {
+ try(
+ StringWriter sw = new StringWriter();
+ PrintWriter pw = new PrintWriter( sw ) ) {
+ Message message = createMessage(
+ ALERT_CONTACT.asAddress(),
+ "[Error] " + e.getMessage() );
+
+ e.printStackTrace( pw );
+
+ message.setText( sw.toString() );
+ send( message );
+ } catch( Exception ex ) {
// TODO: Log this.
}
}
/**
* TODO: Convert from Markdown to HTML (using PegDown?).
*
* @param markdown The markdown to HTML-ify.
+ *
* @return The given markdown as an HTML fragment.s
*/
- private String toHtml(String markdown) {
+ private String toHtml( String markdown ) {
return markdown;
}
/**
* Returns an authenticated transport.
*
* @param session Session to associate with the transport.
+ *
* @return A non-null instance.
*/
private Address getSender() throws UnsupportedEncodingException {
- return new InternetAddress(getSenderAddress(), getSenderName());
+ return new InternetAddress( getSenderAddress(), getSenderName() );
}
- private void send(Message message) throws MessagingException {
+ private void send( Message message ) throws MessagingException {
HttpTransport transport = getTransport();
- transport.sendMessage(message, message.getAllRecipients());
+ transport.sendMessage( message, message.getAllRecipients() );
transport.close();
}
private HttpTransport getTransport() {
- HttpTransport transport = new ElasticEmailTransport(getSession());
- transport.setAuthentication(getUsername(), getPassword());
+ HttpTransport transport = new ElasticEmailTransport( getSession() );
+ transport.setAuthentication( getUsername(), getPassword() );
// TODO: Track the result token for the HTTP request?
* @param recipient Destination address for the message.
* @param subject Subject line.
+ *
* @return A new Message instance, never null.
+ *
* @throws MessagingException Could not create the message instance.
* @throws UnsupportedEncodingException The address was not properly
* formatted.
*/
- private Message createMessage(Address recipient, String subject)
+ private Message createMessage( Address recipient, String subject )
throws MessagingException, UnsupportedEncodingException {
- Message message = new MimeMessage(getSession());
+ Message message = new MimeMessage( getSession() );
- message.setFrom(getSender());
- message.addRecipient(Message.RecipientType.TO, recipient);
- message.setSubject(subject);
+ message.setFrom( getSender() );
+ message.addRecipient( Message.RecipientType.TO, recipient );
+ message.setSubject( subject );
return message;
}
/**
* Returns a lazily initialized session instance.
- *
+ *
* @return A new Session instance, never null.
*/
private synchronized Session getSession() {
if( this.session == null ) {
- this.session = Session.getInstance(new Properties());
+ this.session = Session.getInstance( new Properties() );
}
src/main/resources/com_thriftyfoods.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<config charset="UTF-8">
-
- <var-def name="vendor">https://www.thriftyfoods.com/product/</var-def>
-
- <var-def name="page">
- <html-to-xml outputtype="pretty" prunetags="script">
- <http url="${vendor}${productPath}" />
- </html-to-xml>
- </var-def>
-
- <var-def name="price">
- <xpath expression="(//span[@class='price' and @itemprop='price'])[last()]/text()">
- <var name="page" />
- </xpath>
- </var-def>
-
- <var-def name="sale">
- <xpath expression="boolean(//div[contains(@class,'on-sale') and @itemprop='offerDetails'])">
- <var name="page" />
- </xpath>
- </var-def>
-
- <var-def name="message">
- <case>
- <if condition="${includeVendorName}">
- <template>
- <file path="templates/header-vendor-name.md"/>
- </template>
- </if>
- </case>
-
- <template>
- <file path="templates/header-product-price.md"/>
- <file path="templates/product-price.md"/>
- </template>
- </var-def>
-</config>
src/main/resources/log4j.xml
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+<log4j:configuration debug="true"
+ xmlns:log4j='http://jakarta.apache.org/log4j/'>
+
+ <appender name="console" class="org.apache.log4j.ConsoleAppender">
+ <layout class="org.apache.log4j.PatternLayout">
+ <param name="ConversionPattern"
+ value="%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n" />
+ </layout>
+ </appender>
+
+ <root>
+ <level value="WARN" />
+ <appender-ref ref="console" />
+ </root>
+
+</log4j:configuration>
+
src/main/resources/scripts/com_thriftyfoods.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<config charset="UTF-8">
+
+ <var-def name="vendor">https://www.thriftyfoods.com/product/</var-def>
+
+ <var-def name="page">
+ <html-to-xml outputtype="pretty" prunetags="script">
+ <http url="${vendor}${productPath}" />
+ </html-to-xml>
+ </var-def>
+
+ <var-def name="price">
+ <xpath expression="(//span[@class='price' and @itemprop='price'])[last()]/text()">
+ <var name="page" />
+ </xpath>
+ </var-def>
+
+ <var-def name="sale">
+ <xpath expression="boolean(//div[contains(@class,'on-sale') and @itemprop='offerDetails'])">
+ <var name="page" />
+ </xpath>
+ </var-def>
+
+ <var-def name="message">
+ <case>
+ <if condition="${includeVendorName}">
+ <template>
+ <file path="templates/header-vendor-name.md"/>
+ </template>
+ </if>
+ </case>
+
+ <template>
+ <file path="templates/header-product-name.md"/>
+ <file path="templates/product-price.md"/>
+ </template>
+ </var-def>
+</config>

Reorg resources. Addressed logging warning. Runs bug free.

Author djarvis <email>
Date 2016-06-12 23:31:38 GMT-0700
Commit 9e9d4ec06e98c7098ac0dcbd9d43a4e1bb77b2d8
Parent 317b3fb
Delta 152 lines added, 235 lines removed, 83-line decrease