Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/sales.git

Added directory creation. Added start of script to scrape web site. Updated POM to write manifest for executability.

Author djarvis <email>
Date 2016-06-08 23:24:22 GMT-0700
Commit b231cc8c096d99e0c08adc608f1916700da2624d
Parent a8ffed2
.gitignore
lib
database
+working
install.sh
+#!/bin/bash
+
+# Installs the Web Harvest Java archive.
+mvn install:install-file -Dfile=lib/web-harvest/webharvest-2.1.jar -DgroupId=org.webharvest -DartifactId=webharvest -Dversion=2.1 -Dpackaging=jar
nbactions.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<actions>
+ <action>
+ <actionName>build</actionName>
+ <packagings>
+ <packaging>*</packaging>
+ </packagings>
+ <goals>
+ <goal>install</goal>
+ </goals>
+ </action>
+ <action>
+ <actionName>rebuild</actionName>
+ <packagings>
+ <packaging>*</packaging>
+ </packagings>
+ <goals>
+ <goal>clean</goal>
+ <goal>install</goal>
+ </goals>
+ <properties>
+ <skipTests>true</skipTests>
+ </properties>
+ </action>
+ <action>
+ <actionName>clean</actionName>
+ <packagings>
+ <packaging>*</packaging>
+ </packagings>
+ <goals>
+ <goal>clean</goal>
+ </goals>
+ <properties>
+ <skipTests>true</skipTests>
+ </properties>
+ </action>
+ <action>
+ <actionName>run</actionName>
+ <packagings>
+ <packaging>jar</packaging>
+ </packagings>
+ <goals>
+ <goal>process-classes</goal>
+ <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
+ </goals>
+ <properties>
+ <exec.args>-classpath %classpath ${packageClassName}</exec.args>
+ <exec.executable>java</exec.executable>
+
+ </properties>
+ </action>
+ </actions>
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>com.whitemagicsoftware</groupId>
- <artifactId>sales</artifactId>
- <version>1.0-SNAPSHOT</version>
- <packaging>jar</packaging>
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.10</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>com.atlassian.commonmark</groupId>
- <artifactId>commonmark</artifactId>
- <version>0.5.1</version>
- </dependency>
- <dependency>
- <groupId>com.sun.mail</groupId>
- <artifactId>javax.mail</artifactId>
- <version>1.5.5</version>
- </dependency>
- <dependency>
- <groupId>org.joda</groupId>
- <artifactId>joda-money</artifactId>
- <version>0.11</version>
- </dependency>
- <dependency>
- <groupId>org.webharvest</groupId>
- <artifactId>webharvest</artifactId>
- <version>2.1</version>
- </dependency>
- </dependencies>
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <maven.compiler.source>1.8</maven.compiler.source>
- <maven.compiler.target>1.8</maven.compiler.target>
- </properties>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.whitemagicsoftware</groupId>
+ <artifactId>sales</artifactId>
+ <version>1.0-SNAPSHOT</version>
+ <packaging>jar</packaging>
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.10</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.atlassian.commonmark</groupId>
+ <artifactId>commonmark</artifactId>
+ <version>0.5.1</version>
+ </dependency>
+ <dependency>
+ <groupId>com.sun.mail</groupId>
+ <artifactId>javax.mail</artifactId>
+ <version>1.5.5</version>
+ </dependency>
+ <dependency>
+ <groupId>org.joda</groupId>
+ <artifactId>joda-money</artifactId>
+ <version>0.11</version>
+ </dependency>
+ <dependency>
+ <groupId>org.webharvest</groupId>
+ <artifactId>webharvest</artifactId>
+ <version>2.1</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.4</version>
+ <type>jar</type>
+ </dependency>
+ </dependencies>
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <maven.compiler.source>1.8</maven.compiler.source>
+ <maven.compiler.target>1.8</maven.compiler.target>
+ </properties>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>3.0.0</version>
+ <configuration>
+ <archive>
+ <index>true</index>
+ <manifest>
+ <addClasspath>true</addClasspath>
+ <mainClass>com.whitemagicsoftware.sales.Main</mainClass>
+ </manifest>
+ <manifestEntries>
+ <Build-Time>${maven.build.timestamp}</Build-Time>
+ <Built-By>White Magic Software, Ltd.</Built-By>
+ <Build-User>White Magic Software, Ltd.</Build-User>
+ <Build-Maven>Maven ${maven.version}</Build-Maven>
+ <Build-Java>${java.version}</Build-Java>
+ <Build-Label>${project.version}</Build-Label>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ </plugin>
+ </plugins>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ </resource>
+ </resources>
+ </build>
+ <name>Sales</name>
</project>
src/main/java/com/whitemagicsoftware/notify/.gitignore
+Credentials.java
src/main/java/com/whitemagicsoftware/sales/Main.java
package com.whitemagicsoftware.sales;
-import com.whitemagicsoftware.sales.scraper.Scraper;
import com.whitemagicsoftware.sales.service.SubscriberService;
import com.whitemagicsoftware.sales.service.VendorService;
import com.whitemagicsoftware.sales.service.impl.SubscriberServiceImpl;
import com.whitemagicsoftware.sales.service.impl.VendorServiceImpl;
-import java.io.FileNotFoundException;
+import java.io.File;
+import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.List;
+import static org.apache.commons.io.FilenameUtils.removeExtension;
import org.webharvest.definition.ScraperConfiguration;
+import org.webharvest.runtime.Scraper;
+import org.xml.sax.InputSource;
/**
* Re-runnable entry point.
*
* @author White Magic Software, Ltd.
*/
public class Main {
+
+ private final static String DIRECTORY_PREFIX = "scrape";
private void Main() {
}
- private void run() throws FileNotFoundException {
+ private void run() throws IOException {
List<Vendor> vendors = getVendors();
List<Subscriber> subscribers = getSubscribers();
- ScraperConfiguration config = new ScraperConfiguration( "c:/wh/configs/news.xml" );
- Scraper scraper = new Scraper( config, "c:/wh/work/" );
+ Scraper scraper = getScraper( "com_thriftyfoods.xml" );
+ scraper.execute();
// Jarvest jarvest = new Jarvest();
}
- private Reader script( String resource ) {
- return new InputStreamReader( getResourceAsStream( resource ) );
+ /**
+ * Returns a scraper for a given resource (configuration).
+ *
+ * @param resource Name of script file to execute.
+ *
+ * @return A Scraper that can extract data.
+ */
+ private Scraper getScraper( String resource ) throws IOException {
+ final ScraperConfiguration config = getScraperConfiguration( resource );
+ return new Scraper( config, getTemporaryDirectory( resource ).toString() );
+ }
+
+ private ScraperConfiguration getScraperConfiguration( String resource ) {
+ final InputSource in = getResource( resource );
+ return new ScraperConfiguration( in );
+ }
+
+ private InputSource getResource( String resource ) {
+ final InputStream in = getResourceAsStream( resource );
+ return new InputSource( in );
+ }
+
+ private InputStream getResourceAsStream( String resource ) {
+ final InputStream in
+ = getContextClassLoader().getResourceAsStream( resource );
+
+ return in == null ? getClass().getResourceAsStream( resource ) : in;
}
private ClassLoader getContextClassLoader() {
return Thread.currentThread().getContextClassLoader();
}
- private InputStream getResourceAsStream( String resource ) {
- return getContextClassLoader().getResourceAsStream( resource );
+ /**
+ * Creates a fully qualified path for web pages downloaded by the scraper.
+ *
+ * @param resource The site (with filename extension) to scrap.
+ *
+ * @return A path to the directory that was created (e.g.,
+ * $HOME/.../sales/working/website/scrape4443161710900...).
+ *
+ * @throws IOException
+ */
+ private Path getTemporaryDirectory( String resource ) throws IOException {
+ Path working = Paths.get( getWorkingDirectory( resource ) );
+ working = Files.createDirectories( working );
+ return Files.createTempDirectory( working, DIRECTORY_PREFIX );
+ }
+
+ private String getWorkingDirectory( String resource ) {
+ return System.getProperty( "user.dir" )
+ + File.separator + "working"
+ + File.separator + removeExtension( resource );
}
}
- public static void main( String args[] ) {
+ public static void main( String args[] ) throws IOException {
new Main().run();
}
src/main/java/com/whitemagicsoftware/sales/Price.java
+/*
+ * The MIT License
+ *
+ * Copyright 2016 White Magic Software, Ltd..
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package com.whitemagicsoftware.sales;
+
+import org.joda.money.Money;
+
+/**
+ *
+ * @author White Magic Software, Ltd.
+ */
+public final class Price extends BusinessEntity {
+
+ /**
+ * How much does a product cost?
+ */
+ private Money amount;
+
+ /**
+ * Sometimes a price is cannot be extracted from a product description.
+ */
+ private String description;
+
+ /**
+ * Use the builder.
+ */
+ private Price() {
+ }
+
+ protected void setAmount( Money amount ) {
+ this.amount = amount;
+ }
+
+ protected void setDescription( String description ) {
+ this.description = description;
+ }
+
+ /**
+ * To construct outer class instances.
+ */
+ public static final class Builder extends BusinessEntity.Builder<Price, Builder> {
+
+ @Override
+ protected Price createObject() {
+ return new Price();
+ }
+
+ @Override
+ protected Builder getBuilder() {
+ return this;
+ }
+
+ public Builder withAmount( Money amount ) {
+ getObject().setAmount( amount );
+ return getBuilder();
+ }
+
+ public Builder withAmount( String amount ) {
+ return withAmount( Money.parse( amount ) );
+ }
+
+ public Builder withDescription( String description ) {
+ getObject().setDescription( description );
+ return getBuilder();
+ }
+ }
+}
src/main/java/com/whitemagicsoftware/sales/service/Service.java
+/*
+ * The MIT License
+ *
+ * Copyright 2016 White Magic Software, Ltd..
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package com.whitemagicsoftware.sales.service;
+
+/**
+ * Extended by all different services.
+ *
+ * @author White Magic Software, Ltd.
+ */
+public interface Service {
+}
src/main/java/com/whitemagicsoftware/sales/service/impl/ServiceImpl.java
+/*
+ * The MIT License
+ *
+ * Copyright 2016 White Magic Software, Ltd..
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package com.whitemagicsoftware.sales.service.impl;
+
+import com.whitemagicsoftware.sales.service.Loadable;
+import com.whitemagicsoftware.sales.service.Service;
+
+/**
+ * Superclass to all services.
+ *
+ * @author White Magic Software, Ltd.
+ * @param <T>
+ */
+public abstract class ServiceImpl<T> implements Service, Loadable<T> {
+}
src/main/resources/com_thriftyfoods.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<config scriptlang="Javascript" charset="UTF-8">
+<!--
+ xpath( '(//span[@class="price" and @itemprop="price"])[last()]' )
+-->
+</config>
+
Delta 357 lines added, 50 lines removed, 307-line increase