| Author | djarvis <email> |
|---|---|
| Date | 2016-06-08 23:24:22 GMT-0700 |
| Commit | b231cc8c096d99e0c08adc608f1916700da2624d |
| Parent | a8ffed2 |
| lib | ||
| database | ||
| +working | ||
| +#!/bin/bash | ||
| + | ||
| +# Installs the Web Harvest Java archive. | ||
| +mvn install:install-file -Dfile=lib/web-harvest/webharvest-2.1.jar -DgroupId=org.webharvest -DartifactId=webharvest -Dversion=2.1 -Dpackaging=jar | ||
| +<?xml version="1.0" encoding="UTF-8"?> | ||
| +<actions> | ||
| + <action> | ||
| + <actionName>build</actionName> | ||
| + <packagings> | ||
| + <packaging>*</packaging> | ||
| + </packagings> | ||
| + <goals> | ||
| + <goal>install</goal> | ||
| + </goals> | ||
| + </action> | ||
| + <action> | ||
| + <actionName>rebuild</actionName> | ||
| + <packagings> | ||
| + <packaging>*</packaging> | ||
| + </packagings> | ||
| + <goals> | ||
| + <goal>clean</goal> | ||
| + <goal>install</goal> | ||
| + </goals> | ||
| + <properties> | ||
| + <skipTests>true</skipTests> | ||
| + </properties> | ||
| + </action> | ||
| + <action> | ||
| + <actionName>clean</actionName> | ||
| + <packagings> | ||
| + <packaging>*</packaging> | ||
| + </packagings> | ||
| + <goals> | ||
| + <goal>clean</goal> | ||
| + </goals> | ||
| + <properties> | ||
| + <skipTests>true</skipTests> | ||
| + </properties> | ||
| + </action> | ||
| + <action> | ||
| + <actionName>run</actionName> | ||
| + <packagings> | ||
| + <packaging>jar</packaging> | ||
| + </packagings> | ||
| + <goals> | ||
| + <goal>process-classes</goal> | ||
| + <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> | ||
| + </goals> | ||
| + <properties> | ||
| + <exec.args>-classpath %classpath ${packageClassName}</exec.args> | ||
| + <exec.executable>java</exec.executable> | ||
| + | ||
| + </properties> | ||
| + </action> | ||
| + </actions> | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
| - <modelVersion>4.0.0</modelVersion> | ||
| - <groupId>com.whitemagicsoftware</groupId> | ||
| - <artifactId>sales</artifactId> | ||
| - <version>1.0-SNAPSHOT</version> | ||
| - <packaging>jar</packaging> | ||
| - <dependencies> | ||
| - <dependency> | ||
| - <groupId>junit</groupId> | ||
| - <artifactId>junit</artifactId> | ||
| - <version>4.10</version> | ||
| - <scope>test</scope> | ||
| - </dependency> | ||
| - <dependency> | ||
| - <groupId>com.atlassian.commonmark</groupId> | ||
| - <artifactId>commonmark</artifactId> | ||
| - <version>0.5.1</version> | ||
| - </dependency> | ||
| - <dependency> | ||
| - <groupId>com.sun.mail</groupId> | ||
| - <artifactId>javax.mail</artifactId> | ||
| - <version>1.5.5</version> | ||
| - </dependency> | ||
| - <dependency> | ||
| - <groupId>org.joda</groupId> | ||
| - <artifactId>joda-money</artifactId> | ||
| - <version>0.11</version> | ||
| - </dependency> | ||
| - <dependency> | ||
| - <groupId>org.webharvest</groupId> | ||
| - <artifactId>webharvest</artifactId> | ||
| - <version>2.1</version> | ||
| - </dependency> | ||
| - </dependencies> | ||
| - <properties> | ||
| - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
| - <maven.compiler.source>1.8</maven.compiler.source> | ||
| - <maven.compiler.target>1.8</maven.compiler.target> | ||
| - </properties> | ||
| + <modelVersion>4.0.0</modelVersion> | ||
| + <groupId>com.whitemagicsoftware</groupId> | ||
| + <artifactId>sales</artifactId> | ||
| + <version>1.0-SNAPSHOT</version> | ||
| + <packaging>jar</packaging> | ||
| + <dependencies> | ||
| + <dependency> | ||
| + <groupId>junit</groupId> | ||
| + <artifactId>junit</artifactId> | ||
| + <version>4.10</version> | ||
| + <scope>test</scope> | ||
| + </dependency> | ||
| + <dependency> | ||
| + <groupId>com.atlassian.commonmark</groupId> | ||
| + <artifactId>commonmark</artifactId> | ||
| + <version>0.5.1</version> | ||
| + </dependency> | ||
| + <dependency> | ||
| + <groupId>com.sun.mail</groupId> | ||
| + <artifactId>javax.mail</artifactId> | ||
| + <version>1.5.5</version> | ||
| + </dependency> | ||
| + <dependency> | ||
| + <groupId>org.joda</groupId> | ||
| + <artifactId>joda-money</artifactId> | ||
| + <version>0.11</version> | ||
| + </dependency> | ||
| + <dependency> | ||
| + <groupId>org.webharvest</groupId> | ||
| + <artifactId>webharvest</artifactId> | ||
| + <version>2.1</version> | ||
| + </dependency> | ||
| + <dependency> | ||
| + <groupId>commons-io</groupId> | ||
| + <artifactId>commons-io</artifactId> | ||
| + <version>2.4</version> | ||
| + <type>jar</type> | ||
| + </dependency> | ||
| + </dependencies> | ||
| + <properties> | ||
| + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
| + <maven.compiler.source>1.8</maven.compiler.source> | ||
| + <maven.compiler.target>1.8</maven.compiler.target> | ||
| + </properties> | ||
| + <build> | ||
| + <plugins> | ||
| + <plugin> | ||
| + <groupId>org.apache.maven.plugins</groupId> | ||
| + <artifactId>maven-jar-plugin</artifactId> | ||
| + <version>3.0.0</version> | ||
| + <configuration> | ||
| + <archive> | ||
| + <index>true</index> | ||
| + <manifest> | ||
| + <addClasspath>true</addClasspath> | ||
| + <mainClass>com.whitemagicsoftware.sales.Main</mainClass> | ||
| + </manifest> | ||
| + <manifestEntries> | ||
| + <Build-Time>${maven.build.timestamp}</Build-Time> | ||
| + <Built-By>White Magic Software, Ltd.</Built-By> | ||
| + <Build-User>White Magic Software, Ltd.</Build-User> | ||
| + <Build-Maven>Maven ${maven.version}</Build-Maven> | ||
| + <Build-Java>${java.version}</Build-Java> | ||
| + <Build-Label>${project.version}</Build-Label> | ||
| + </manifestEntries> | ||
| + </archive> | ||
| + </configuration> | ||
| + </plugin> | ||
| + </plugins> | ||
| + <resources> | ||
| + <resource> | ||
| + <directory>src/main/resources</directory> | ||
| + </resource> | ||
| + </resources> | ||
| + </build> | ||
| + <name>Sales</name> | ||
| </project> |
| +Credentials.java | ||
| package com.whitemagicsoftware.sales; | ||
| -import com.whitemagicsoftware.sales.scraper.Scraper; | ||
| import com.whitemagicsoftware.sales.service.SubscriberService; | ||
| import com.whitemagicsoftware.sales.service.VendorService; | ||
| import com.whitemagicsoftware.sales.service.impl.SubscriberServiceImpl; | ||
| import com.whitemagicsoftware.sales.service.impl.VendorServiceImpl; | ||
| -import java.io.FileNotFoundException; | ||
| +import java.io.File; | ||
| +import java.io.IOException; | ||
| import java.io.InputStream; | ||
| -import java.io.InputStreamReader; | ||
| -import java.io.Reader; | ||
| +import java.nio.file.Files; | ||
| +import java.nio.file.Path; | ||
| +import java.nio.file.Paths; | ||
| import java.util.List; | ||
| +import static org.apache.commons.io.FilenameUtils.removeExtension; | ||
| import org.webharvest.definition.ScraperConfiguration; | ||
| +import org.webharvest.runtime.Scraper; | ||
| +import org.xml.sax.InputSource; | ||
| /** | ||
| * Re-runnable entry point. | ||
| * | ||
| * @author White Magic Software, Ltd. | ||
| */ | ||
| public class Main { | ||
| + | ||
| + private final static String DIRECTORY_PREFIX = "scrape"; | ||
| private void Main() { | ||
| } | ||
| - private void run() throws FileNotFoundException { | ||
| + private void run() throws IOException { | ||
| List<Vendor> vendors = getVendors(); | ||
| List<Subscriber> subscribers = getSubscribers(); | ||
| - ScraperConfiguration config = new ScraperConfiguration( "c:/wh/configs/news.xml" ); | ||
| - Scraper scraper = new Scraper( config, "c:/wh/work/" ); | ||
| + Scraper scraper = getScraper( "com_thriftyfoods.xml" ); | ||
| + scraper.execute(); | ||
| // Jarvest jarvest = new Jarvest(); | ||
| } | ||
| - private Reader script( String resource ) { | ||
| - return new InputStreamReader( getResourceAsStream( resource ) ); | ||
| + /** | ||
| + * Returns a scraper for a given resource (configuration). | ||
| + * | ||
| + * @param resource Name of script file to execute. | ||
| + * | ||
| + * @return A Scraper that can extract data. | ||
| + */ | ||
| + private Scraper getScraper( String resource ) throws IOException { | ||
| + final ScraperConfiguration config = getScraperConfiguration( resource ); | ||
| + return new Scraper( config, getTemporaryDirectory( resource ).toString() ); | ||
| + } | ||
| + | ||
| + private ScraperConfiguration getScraperConfiguration( String resource ) { | ||
| + final InputSource in = getResource( resource ); | ||
| + return new ScraperConfiguration( in ); | ||
| + } | ||
| + | ||
| + private InputSource getResource( String resource ) { | ||
| + final InputStream in = getResourceAsStream( resource ); | ||
| + return new InputSource( in ); | ||
| + } | ||
| + | ||
| + private InputStream getResourceAsStream( String resource ) { | ||
| + final InputStream in | ||
| + = getContextClassLoader().getResourceAsStream( resource ); | ||
| + | ||
| + return in == null ? getClass().getResourceAsStream( resource ) : in; | ||
| } | ||
| private ClassLoader getContextClassLoader() { | ||
| return Thread.currentThread().getContextClassLoader(); | ||
| } | ||
| - private InputStream getResourceAsStream( String resource ) { | ||
| - return getContextClassLoader().getResourceAsStream( resource ); | ||
| + /** | ||
| + * Creates a fully qualified path for web pages downloaded by the scraper. | ||
| + * | ||
| + * @param resource The site (with filename extension) to scrap. | ||
| + * | ||
| + * @return A path to the directory that was created (e.g., | ||
| + * $HOME/.../sales/working/website/scrape4443161710900...). | ||
| + * | ||
| + * @throws IOException | ||
| + */ | ||
| + private Path getTemporaryDirectory( String resource ) throws IOException { | ||
| + Path working = Paths.get( getWorkingDirectory( resource ) ); | ||
| + working = Files.createDirectories( working ); | ||
| + return Files.createTempDirectory( working, DIRECTORY_PREFIX ); | ||
| + } | ||
| + | ||
| + private String getWorkingDirectory( String resource ) { | ||
| + return System.getProperty( "user.dir" ) | ||
| + + File.separator + "working" | ||
| + + File.separator + removeExtension( resource ); | ||
| } | ||
| } | ||
| - public static void main( String args[] ) { | ||
| + public static void main( String args[] ) throws IOException { | ||
| new Main().run(); | ||
| } | ||
| +/* | ||
| + * The MIT License | ||
| + * | ||
| + * Copyright 2016 White Magic Software, Ltd.. | ||
| + * | ||
| + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| + * of this software and associated documentation files (the "Software"), to deal | ||
| + * in the Software without restriction, including without limitation the rights | ||
| + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| + * copies of the Software, and to permit persons to whom the Software is | ||
| + * furnished to do so, subject to the following conditions: | ||
| + * | ||
| + * The above copyright notice and this permission notice shall be included in | ||
| + * all copies or substantial portions of the Software. | ||
| + * | ||
| + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| + * THE SOFTWARE. | ||
| + */ | ||
| +package com.whitemagicsoftware.sales; | ||
| + | ||
| +import org.joda.money.Money; | ||
| + | ||
| +/** | ||
| + * | ||
| + * @author White Magic Software, Ltd. | ||
| + */ | ||
| +public final class Price extends BusinessEntity { | ||
| + | ||
| + /** | ||
| + * How much does a product cost? | ||
| + */ | ||
| + private Money amount; | ||
| + | ||
| + /** | ||
| + * Sometimes a price is cannot be extracted from a product description. | ||
| + */ | ||
| + private String description; | ||
| + | ||
| + /** | ||
| + * Use the builder. | ||
| + */ | ||
| + private Price() { | ||
| + } | ||
| + | ||
| + protected void setAmount( Money amount ) { | ||
| + this.amount = amount; | ||
| + } | ||
| + | ||
| + protected void setDescription( String description ) { | ||
| + this.description = description; | ||
| + } | ||
| + | ||
| + /** | ||
| + * To construct outer class instances. | ||
| + */ | ||
| + public static final class Builder extends BusinessEntity.Builder<Price, Builder> { | ||
| + | ||
| + @Override | ||
| + protected Price createObject() { | ||
| + return new Price(); | ||
| + } | ||
| + | ||
| + @Override | ||
| + protected Builder getBuilder() { | ||
| + return this; | ||
| + } | ||
| + | ||
| + public Builder withAmount( Money amount ) { | ||
| + getObject().setAmount( amount ); | ||
| + return getBuilder(); | ||
| + } | ||
| + | ||
| + public Builder withAmount( String amount ) { | ||
| + return withAmount( Money.parse( amount ) ); | ||
| + } | ||
| + | ||
| + public Builder withDescription( String description ) { | ||
| + getObject().setDescription( description ); | ||
| + return getBuilder(); | ||
| + } | ||
| + } | ||
| +} | ||
| +/* | ||
| + * The MIT License | ||
| + * | ||
| + * Copyright 2016 White Magic Software, Ltd.. | ||
| + * | ||
| + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| + * of this software and associated documentation files (the "Software"), to deal | ||
| + * in the Software without restriction, including without limitation the rights | ||
| + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| + * copies of the Software, and to permit persons to whom the Software is | ||
| + * furnished to do so, subject to the following conditions: | ||
| + * | ||
| + * The above copyright notice and this permission notice shall be included in | ||
| + * all copies or substantial portions of the Software. | ||
| + * | ||
| + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| + * THE SOFTWARE. | ||
| + */ | ||
| +package com.whitemagicsoftware.sales.service; | ||
| + | ||
| +/** | ||
| + * Extended by all different services. | ||
| + * | ||
| + * @author White Magic Software, Ltd. | ||
| + */ | ||
| +public interface Service { | ||
| +} | ||
| +/* | ||
| + * The MIT License | ||
| + * | ||
| + * Copyright 2016 White Magic Software, Ltd.. | ||
| + * | ||
| + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| + * of this software and associated documentation files (the "Software"), to deal | ||
| + * in the Software without restriction, including without limitation the rights | ||
| + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| + * copies of the Software, and to permit persons to whom the Software is | ||
| + * furnished to do so, subject to the following conditions: | ||
| + * | ||
| + * The above copyright notice and this permission notice shall be included in | ||
| + * all copies or substantial portions of the Software. | ||
| + * | ||
| + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| + * THE SOFTWARE. | ||
| + */ | ||
| +package com.whitemagicsoftware.sales.service.impl; | ||
| + | ||
| +import com.whitemagicsoftware.sales.service.Loadable; | ||
| +import com.whitemagicsoftware.sales.service.Service; | ||
| + | ||
| +/** | ||
| + * Superclass to all services. | ||
| + * | ||
| + * @author White Magic Software, Ltd. | ||
| + * @param <T> | ||
| + */ | ||
| +public abstract class ServiceImpl<T> implements Service, Loadable<T> { | ||
| +} | ||
| +<?xml version="1.0" encoding="UTF-8"?> | ||
| +<config scriptlang="Javascript" charset="UTF-8"> | ||
| +<!-- | ||
| + xpath( '(//span[@class="price" and @itemprop="price"])[last()]' ) | ||
| +--> | ||
| +</config> | ||
| + | ||
| Delta | 357 lines added, 50 lines removed, 307-line increase |
|---|