Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/sales.git
lib/web-harvest/install.sh
#!/bin/bash
-mvn install:install-file -Dfile=webharvest-2.1.jar -DgroupId=net.sourceforge.web-harvest -DartifactId=webharvest -Dversion=2.1 -Dpackaging=jar
+VERSION=2.2
+mvn install:install-file \
+ -DgroupId=net.sourceforge.web-harvest \
+ -DartifactId=webharvest \
+ -Dversion=$VERSION \
+ -Dpackaging=jar \
+ -Dfile=webharvest-$VERSION.jar
+
+mvn install:install-file \
+ -DgroupId=net.sourceforge.web-harvest \
+ -DartifactId=webharvest \
+ -Dversion=$VERSION \
+ -Dpackaging=jar \
+ -Dfile=webharvest-$VERSION-sources.jar \
+ -Dclassifier=sources
lib/web-harvest/webharvest-2.1.jar
Binary files differ
lib/web-harvest/webharvest-2.2-sources.jar
Binary files differ
lib/web-harvest/webharvest-2.2.jar
Binary files differ
pom.xml
</dependency>
<dependency>
- <groupId>net.sourceforge.web-harvest</groupId>
- <artifactId>webharvest</artifactId>
- <version>2.1</version>
- </dependency>
- <dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-c3p0</artifactId>
<artifactId>pdfbox</artifactId>
<version>2.0.2</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.web-harvest</groupId>
+ <artifactId>webharvest</artifactId>
+ <version>2.2</version>
</dependency>
</dependencies>
src/main/resources/scripts/2_ca_safeway.xml
<var-def name="vendor_url">http://specials.safeway.ca/flyers/accessibility/safewaycanada-flyer</var-def>
<var-def name="vendor_name">Safeway</var-def>
+
+ <!--
+ 1. Find store code for nearest store.
+ 2. Download flyer for store code.
+ 3. Search for matching product name(s).
+ -->
- <!-- Download the flyer; include the postal code. -->
- <var-def name="product_page">
+ <var-def name="store_code">
+ <xpath expression="//select[@id='store_select']/option/@value">
+ <html-to-xml outputtype="pretty" id="store_page">
+ <http method="get" url="${vendor_url}" id="vendor_url">
+ <http-param name="type">1</http-param>
+ <http-param name="postal_code" id="postal_code"><var name="location_code"/></http-param>
+ </http>
+ </html-to-xml>
+ </xpath>
+ </var-def>
+
+<!--
+ <var-def name="flyer_page">
<html-to-xml outputtype="pretty" prunetags="script">
- <http method="get" url="${vendor_url}">
+ <http method="get" url="${vendor_url}?${store_code}">
<http-param name="postal_code">
<var name="location_code"/>
</http-param>
</http>
</html-to-xml>
</var-def>
+-->
<var-def name="message_body">
src/main/shell/safeway.sh
# Get the store code and address for a postal code.
XHTML=$(curl -s "$URL?postal_code=$POSTAL_CODE" | hxnormalize -x)
-
+
STORE_ADDRESS=$(echo $XHTML | hxselect -c "select > option:first-child" | recode html..ascii)
STORE_CODE=$(echo $XHTML | hxselect -i "select > option:first-child" | sed -n -e "s/^.*value=['\"]\(.*\)['\"].*/\1/p")
src/test/java/com/whitemagicsoftware/sales/service/impl/DefaultScraperServiceTest.java
for( Vendor vendor : vendors ) {
Scraper scraper = createScraperService().createScraper( vendor );
-
+
scraper.addVariableToContext( "include_vendor_name", true );
scraper.addVariableToContext( "location_code", "V8V1Z9" );
scraper.addVariableToContext( "product_name", product.getName() );
scraper.addVariableToContext( "product_path", product.getUrlPath() );
+
+ scraper.setDebug( true );
scraper.execute();

Fixed bug in Web-Harvest.

Author djarvis <email>
Date 2016-06-27 23:18:45 GMT-0700
Commit 12e565e3179164c26605f2ba8c7bf467686fe861
Parent c5afcbd
Delta 45 lines added, 11 lines removed, 34-line increase