| #!/bin/bash | ||
| -mvn install:install-file -Dfile=webharvest-2.1.jar -DgroupId=net.sourceforge.web-harvest -DartifactId=webharvest -Dversion=2.1 -Dpackaging=jar | ||
| +VERSION=2.2 | ||
| +mvn install:install-file \ | ||
| + -DgroupId=net.sourceforge.web-harvest \ | ||
| + -DartifactId=webharvest \ | ||
| + -Dversion=$VERSION \ | ||
| + -Dpackaging=jar \ | ||
| + -Dfile=webharvest-$VERSION.jar | ||
| + | ||
| +mvn install:install-file \ | ||
| + -DgroupId=net.sourceforge.web-harvest \ | ||
| + -DartifactId=webharvest \ | ||
| + -Dversion=$VERSION \ | ||
| + -Dpackaging=jar \ | ||
| + -Dfile=webharvest-$VERSION-sources.jar \ | ||
| + -Dclassifier=sources | ||
| </dependency> | ||
| <dependency> | ||
| - <groupId>net.sourceforge.web-harvest</groupId> | ||
| - <artifactId>webharvest</artifactId> | ||
| - <version>2.1</version> | ||
| - </dependency> | ||
| - <dependency> | ||
| <groupId>org.hibernate</groupId> | ||
| <artifactId>hibernate-c3p0</artifactId> | ||
| <artifactId>pdfbox</artifactId> | ||
| <version>2.0.2</version> | ||
| + </dependency> | ||
| + <dependency> | ||
| + <groupId>net.sourceforge.web-harvest</groupId> | ||
| + <artifactId>webharvest</artifactId> | ||
| + <version>2.2</version> | ||
| </dependency> | ||
| </dependencies> | ||
| <var-def name="vendor_url">http://specials.safeway.ca/flyers/accessibility/safewaycanada-flyer</var-def> | ||
| <var-def name="vendor_name">Safeway</var-def> | ||
| + | ||
| + <!-- | ||
| + 1. Find store code for nearest store. | ||
| + 2. Download flyer for store code. | ||
| + 3. Search for matching product name(s). | ||
| + --> | ||
| - <!-- Download the flyer; include the postal code. --> | ||
| - <var-def name="product_page"> | ||
| + <var-def name="store_code"> | ||
| + <xpath expression="//select[@id='store_select']/option/@value"> | ||
| + <html-to-xml outputtype="pretty" id="store_page"> | ||
| + <http method="get" url="${vendor_url}" id="vendor_url"> | ||
| + <http-param name="type">1</http-param> | ||
| + <http-param name="postal_code" id="postal_code"><var name="location_code"/></http-param> | ||
| + </http> | ||
| + </html-to-xml> | ||
| + </xpath> | ||
| + </var-def> | ||
| + | ||
| +<!-- | ||
| + <var-def name="flyer_page"> | ||
| <html-to-xml outputtype="pretty" prunetags="script"> | ||
| - <http method="get" url="${vendor_url}"> | ||
| + <http method="get" url="${vendor_url}?${store_code}"> | ||
| <http-param name="postal_code"> | ||
| <var name="location_code"/> | ||
| </http-param> | ||
| </http> | ||
| </html-to-xml> | ||
| </var-def> | ||
| +--> | ||
| <var-def name="message_body"> |
| # Get the store code and address for a postal code. | ||
| XHTML=$(curl -s "$URL?postal_code=$POSTAL_CODE" | hxnormalize -x) | ||
| - | ||
| + | ||
| STORE_ADDRESS=$(echo $XHTML | hxselect -c "select > option:first-child" | recode html..ascii) | ||
| STORE_CODE=$(echo $XHTML | hxselect -i "select > option:first-child" | sed -n -e "s/^.*value=['\"]\(.*\)['\"].*/\1/p") |
| for( Vendor vendor : vendors ) { | ||
| Scraper scraper = createScraperService().createScraper( vendor ); | ||
| - | ||
| + | ||
| scraper.addVariableToContext( "include_vendor_name", true ); | ||
| scraper.addVariableToContext( "location_code", "V8V1Z9" ); | ||
| scraper.addVariableToContext( "product_name", product.getName() ); | ||
| scraper.addVariableToContext( "product_path", product.getUrlPath() ); | ||
| + | ||
| + scraper.setDebug( true ); | ||
| scraper.execute(); | ||
| Author | djarvis <email> |
|---|---|
| Date | 2016-06-27 23:18:45 GMT-0700 |
| Commit | 12e565e3179164c26605f2ba8c7bf467686fe861 |
| Parent | c5afcbd |
| Delta | 45 lines added, 11 lines removed, 34-line increase |