Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/sales.git
src/main/java/com/whitemagicsoftware/sales/Main.java
}
- String subject = "Sales: " + buildSubject( vendors );
-
createNotifyService().notify(
- subscriber.getAddress(), subject, message.toString() );
+ subscriber.getAddress(), buildSubject( vendors ), message.toString() );
}
private String buildSubject( Set<Vendor> vendors ) {
- return vendors.stream().map( v -> v.getName() ).collect(
+ return getSubjectPrefix() + vendors.stream().map( v -> v.getName() ).collect(
Collectors.joining( ", " ) );
+ }
+
+ /**
+ * Currently returns "Sales: " for all notices.
+ *
+ * @return A non-null string used for the notification subject line.
+ */
+ protected String getSubjectPrefix() {
+ return "Sales: ";
}
src/main/java/com/whitemagicsoftware/sales/service/impl/DefaultScraperService.java
import org.webharvest.definition.ScraperConfiguration;
import org.webharvest.runtime.Scraper;
-import org.xml.sax.InputSource;
/**
public Scraper createScraper( Vendor vendor ) throws IOException {
final String sVendor = "scripts/" + vendor.getScriptName();
- final String sLib = "scripts/lib/common.xml";
+ final String sPostfix = "scripts/lib/postfix.xml";
- final ScraperConfiguration scLib = createScraperConfiguration( sLib );
- final ScraperConfiguration scMain = createScraperConfiguration( sVendor );
+ final ScraperConfiguration scMain = createScript( sVendor );
+ final ScraperConfiguration scPostfix = createScript( sPostfix );
+
+ scMain.appendOperations( scPostfix );
Scraper scraper = new Scraper( scMain, getWorkingDirectory() );
scraper.setDebug( true );
- scraper.execute( scLib.getOperations() );
return scraper;
private String getWorkingDirectory() {
return new File( "src/main/resources" ).toString();
- }
-
- private ScraperConfiguration createScraperConfiguration( String resource ) {
- final InputSource in = getResource( resource );
- return new ScraperConfiguration( in );
}
- private InputSource getResource( String resource ) {
- final InputStream in = getResourceAsStream( resource );
- return new InputSource( in );
+ private ScraperConfiguration createScript( String resource )
+ throws IOException {
+ return new ScraperConfiguration( getResourceAsStream( resource ) );
}
src/main/resources/scripts/1_com_thriftyfoods.xml
<var-def name="vendor_url">https://www.thriftyfoods.com/product/</var-def>
<var-def name="vendor_name">Thrifty Foods</var-def>
- <var-def name="vendor_template">thriftyfoods</var-def>
-
+ <var-def name="product_text">price</var-def>
+
<var-def name="product_page">
<html-to-xml outputtype="pretty" prunetags="script">
- <http url="${vendor_url}${product_path}" />
+ <http url="${vendor_url}${product_path}"/>
</html-to-xml>
</var-def>
- <var-def name="product_price">
- <xpath expression="(//span[@class='price' and @itemprop='price'])[last()]/text()">
- <var name="product_page" />
- </xpath>
- </var-def>
-
<var-def name="product_sale">
<xpath expression="boolean(//div[contains(@on-sale,'on-sale') and @itemprop='offerDetails'])">
- <var name="product_page" />
+ <var name="product_page"/>
</xpath>
</var-def>
- <var-def name="message_body">
- <call name="vendor-name">
- <call-param name="include_vendor_name">
- <var name="include_vendor_name" />
- </call-param>
- <call-param name="vendor_name">
- <var name="vendor_name" />
- </call-param>
- </call>
- <call name="product-details">
- <call-param name="product_sale">
- <var name="product_sale" />
- </call-param>
- <call-param name="product_name">
- <var name="product_name" />
- </call-param>
- <call-param name="product_price">
- <var name="product_price" />
- </call-param>
- <call-param name="vendor_template">
- <var name="vendor_template" />
- </call-param>
- </call>
+ <var-def name="product_price">
+ <xpath expression="(//span[@class='price' and @itemprop='price'])[last()]/text()">
+ <var name="product_page"/>
+ </xpath>
</var-def>
+
+ <!-- See postfix.xml -->
</config>
src/main/resources/scripts/2_ca_safeway.xml
<var-def name="vendor_url">http://specials.safeway.ca/flyers/accessibility/safewaycanada-flyer</var-def>
<var-def name="vendor_name">Safeway</var-def>
+ <var-def name="product_text">description</var-def>
<!--
1. Find store code for nearest store.
2. Download flyer for store code.
3. Search for matching product name(s).
+
+ TODO: Save and re-use the flyer for a particular store code per week.
-->
</var-def>
- <var-def name="message_body" id="message_body">
- <call name="vendor-name">
- <call-param name="include_vendor_name">
- <var name="include_vendor_name" />
- </call-param>
- <call-param name="vendor_name">
- <var name="vendor_name" />
- </call-param>
- </call>
- <call name="product-details">
- <call-param name="product_name">
- <var name="product_name" />
- </call-param>
- <call-param name="product_price">
- <var name="product_price" />
- </call-param>
- <call-param name="product_sale">true</call-param>
- <call-param name="vendor_template">safeway</call-param>
- </call>
- </var-def>
+ <!-- See postfix.xml -->
</config>
src/main/resources/scripts/3_com_thebay.xml
<var-def name="vendor_url">http://www.thebay.com/webapp/wcs/stores/servlet/en/thebay</var-def>
<var-def name="vendor_name">Hudson's Bay</var-def>
+ <var-def name="product_text">price</var-def>
- <var-def name="message_body">
- <call name="vendor-name" />
- <call name="product-details" />
- </var-def>
+ <!-- See postfix.xml -->
</config>
src/main/resources/scripts/lib/common.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<config charset="UTF-8">
- <function name="vendor-name">
- <return>
- <case>
- <if condition="${include_vendor_name}">
- <template>
- <file path="templates/header-vendor-name.md"/>
- </template>
- </if>
- </case>
- </return>
- </function>
-
- <function name="product-details">
- <return>
- <case>
- <if condition="${product_sale}">
- <template>
- <file path="templates/header-product-name.md"/>
- <file path="templates/${vendor_template}/product-listing.md"/>
- </template>
- </if>
- </case>
- </return>
- </function>
-</config>
src/main/resources/scripts/lib/postfix.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<config charset="UTF-8">
+ <!-- Instructions in this file are appended to every script. -->
+ <var-def name="message_body" id="message_body">
+ <case>
+ <if condition="${product_sale}">
+ <case>
+ <if condition="${include_vendor_name}">
+ <template>
+ <file path="templates/header-vendor-name.md"/>
+ </template>
+ </if>
+ </case>
+
+ <template>
+ <file path="templates/header-product-name.md"/>
+ <file path="templates/product-${product_text}.md"/>
+ </template>
+ </if>
+ </case>
+ </var-def>
+</config>
src/main/resources/templates/product-price.md
+Sale price is ${product_price}.
src/main/resources/templates/safeway/product-listing.md
-${product_price}
src/main/resources/templates/thriftyfoods/product-listing.md
-Sale price is ${product_price}.
+

Migrated common code out of all scrapers and into postfix.xml. Defined two new templates: product price and description; these are used when a vendor's product prices can or cannot be fully parsed, respectively.

Author djarvis <email>
Date 2016-07-23 18:57:59 GMT-0700
Commit 5176bf2af82b6d482aef5f1f7d2e1cf76f901f08
Parent 2c4e053
Delta 59 lines added, 103 lines removed, 44-line decrease