#!/bin/bash # ######################################################################## # # PURPOSE # # Filter data from a Safeway flyer. # # TODO: Delete the notification files if the subscriber added more # products after a notification was sent, so they'll always get notified # of the newly added product. # # ######################################################################## # Sets DATE_EXPIRY and DATE_EXPIRY_SEC to the expiration date in the # vendor's flyer. # # $1 - Full path to the flyer file. expiry() { # The expiry date is the last "word" on the line in %Y-%m-%d format. DATE_EXPIRY=$(head -1 "$1" | awk 'NF>1{print $NF}') DATE_EXPIRY_SEC=$(date -d $DATE_EXPIRY '+%s') } scrape() { local DIR_SUBSCRIBER=$1 local VENDOR_NAME=$2 local PRODUCT_NAME=$3 local PRODUCT_PAGE=$4 local POSTAL_CODE=$5 local \ URL="http://specials.safeway.ca/flyers/accessibility/safewaycanada-flyer" # Write the message body and subject to these files. local PATH_BODY="$DIR_SUBSCRIBER/$FILE_MESSAGE_BODY" local PATH_SUBJECT="$DIR_SUBSCRIBER/$FILE_MESSAGE_SUBJECT" local PATH_FLYER="$DIR_SUBSCRIBER/$FILE_FLYER" local PATH_NOTIFIED="$DIR_SUBSCRIBER/$FILE_NOTIFIED" # Don't download the flyer unless it has expired (or doesn't exist). local download_flyer=false # If a flyer exists, check its expiration date against the notified date. if [ -s "$PATH_FLYER" ]; then # The flyer exists, but has a notification been sent? if [ -s "$PATH_NOTIFIED" ]; then DATE_NOTIFIED_SEC=$(cat $PATH_NOTIFIED) # Set DATE_EXPIRY and DATE_EXPIRY_SEC. expiry $PATH_FLYER # If the notification date happened before the expiration date, then # don't send another notification. if [ "$DATE_NOTIFIED_SEC" -lt "$DATE_EXPIRY_SEC" ]; then return fi download_flyer=true fi else # No flyer exists; download anew and notify, regardless of notified state. download_flyer=true fi if [ "$download_flyer" == true ]; then # Get the store code and address for a postal code. XHTML=$(curl -s "$URL?postal_code=$POSTAL_CODE" | hxnormalize -x) STORE_ADDRESS=$(echo $XHTML | hxselect -c "select > option:first-child" | recode html..ascii) STORE_CODE=$(echo $XHTML | hxselect -i "select > option:first-child" | sed -n -e "s/^.*value=['\"]\(.*\)['\"].*/\1/p") # Download the flyer. lynx -nolist -nolog -accept_all_cookies -dump -width=$TEXT_WIDTH \ "$URL?store_code=$STORE_CODE" > "$PATH_FLYER" # Ensure the notification goes out by deleting the last notified file. rm -f $PATH_NOTIFIED echo "$VENDOR_NAME, $STORE_ADDRESS" > $PATH_BODY fi # Ensure flyer's expiry date is valid. expiry $PATH_FLYER # Strip the quotation marks from the product string (i.e., don't match 'em). # If the product contains spaces, replace the spaces such that they match # anything in between. This will match "Turkey Breast" against the vendor # text of "Turkey or Chicken Breast" (for example). # # Using bash internal is probably faster than sed. FIND_PRODUCT=${PRODUCT_NAME/ /\.\*} # Store the message body in a temporary file. if grep -i $FIND_PRODUCT "$PATH_FLYER" > "$PATH_BODY.tmp" then echo "" >> "$PATH_BODY" echo "$PRODUCT_NAME" >> "$PATH_BODY" echo "-------------------------" >> "$PATH_BODY" # Paste the message body after the product header and horizontal rule. # Remove all leading spaces, as well, so that conversion from Markdown # to HTML will work (e.g., via pandoc). # # Second expression fixes vendor's punctuation problem. sed -e "s/^[ \t]*//" -e "s/ \./\./g" -e "s/$/\./" \ < $PATH_BODY.tmp >> $PATH_BODY # Put flyer expiry date in an unambiguous, human-readable format. expires=$(date -d @$DATE_EXPIRY_SEC +"$DATE_FORMAT") echo "[$VENDOR_NAME] Sale until $expires" > $PATH_SUBJECT fi # Clean up. rm -f $PATH_BODY.tmp }