Add R functions

Author	DaveJarvis <email>
Date	2020-07-02 22:11:45 GMT-0700
Commit	2221224a39bfa1ec7d154eca274627db34a46f50
Parent	e36fac1

R/conversion.R

		+# -----------------------------------------------------------------------------
		+# Copyright 2020, White Magic Software, Ltd.
		+#
		+# Permission is hereby granted, free of charge, to any person obtaining
		+# a copy of this software and associated documentation files (the
		+# "Software"), to deal in the Software without restriction, including
		+# without limitation the rights to use, copy, modify, merge, publish,
		+# distribute, sublicense, and/or sell copies of the Software, and to
		+# permit persons to whom the Software is furnished to do so, subject to
		+# the following conditions:
		+#
		+# The above copyright notice and this permission notice shall be
		+# included in all copies or substantial portions of the Software.
		+#
		+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
		+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
		+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		+# -----------------------------------------------------------------------------
		+
		+# -----------------------------------------------------------------------------
		+# Substitute R expressions in a document with their evaluated value. The
		+# anchor variable must be set for functions that use relative dates.
		+# -----------------------------------------------------------------------------
		+
		+# -----------------------------------------------------------------------------
		+# Evaluates an expression; writes s if there is no expression.
		+# -----------------------------------------------------------------------------
		+x <- function( s ) {
		+ tryCatch( {
		+ r = eval( parse( text = s ) )
		+
		+ # If the result isn't primitive, then it was probably parsed into
		+ # an unprintable object (e.g., "gray" becomes a colour). In those
		+ # cases, return the original text string. Otherwise, an atomic
		+ # value means a primitive type (string, integer, etc.) that can be
		+ # written directly into the document.
		+ ifelse( is.atomic( r ), r, s );
		+ },
		+ warning = function( w ) { s },
		+ error = function( e ) { s } )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Returns a date offset by a given number of days, relative to the given
		+# date (d). This does not use the anchor, but is used to get the anchor's
		+# value as a date.
		+# -----------------------------------------------------------------------------
		+when <- function( d, n = 0, format = "%Y-%m-%d" ) {
		+ as.Date( d, format = format ) + x( n )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Full date (s) offset by an optional number of days before or after.
		+# This will remove leading zeros (applying leading spaces instead, which
		+# are ignored by any worthwhile typesetting engine).
		+# -----------------------------------------------------------------------------
		+annal <- function( days = 0, format = "%Y-%m-%d", oformat = "%B %d, %Y" ) {
		+ format( when( anchor, days ), format = oformat )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Extracts the year from a date string.
		+# -----------------------------------------------------------------------------
		+year <- function( days = 0, format = "%Y-%m-%d" ) {
		+ annal( days, format, "%Y" )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Day of the week (in days since the anchor date).
		+# -----------------------------------------------------------------------------
		+weekday <- function( n ) {
		+ weekdays( when( anchor, n ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# String concatenate function alias because paste0 is a terrible name.
		+# -----------------------------------------------------------------------------
		+concat <- paste0
		+
		+# -----------------------------------------------------------------------------
		+# Translates a number from digits to words using Chicago Manual of Style.
		+# This does not translate numbers greater than one hundred. If ordinal
		+# is TRUE, this will return the ordinal name. This will not produce ordinals
		+# for numbers greater than 100.
		+# -----------------------------------------------------------------------------
		+cms <- function( n, ordinal = FALSE ) {
		+ n <- x( n )
		+
		+ if( n == 0 ) {
		+ if( ordinal ) {
		+ return( "zeroth" )
		+ }
		+
		+ return( "zero" )
		+ }
		+
		+ # Concatenate this a little later.
		+ if( n < 0 ) {
		+ result = "negative "
		+ n = abs( n )
		+ }
		+
		+ # Do not spell out numbers greater than one hundred.
		+ if( n > 100 ) {
		+ # Comma-separated numbers.
		+ return( commas( n ) )
		+ }
		+
		+ # Don't go beyond 100.
		+ if( n == 100 ) {
		+ if( ordinal ) {
		+ return( "one hundredth" )
		+ }
		+
		+ return( "one hundred" )
		+ }
		+
		+ # Samuel Langhorne Clemens noted English has too many exceptions.
		+ small = c(
		+ "one", "two", "three", "four", "five",
		+ "six", "seven", "eight", "nine", "ten",
		+ "eleven", "twelve", "thirteen", "fourteen", "fifteen",
		+ "sixteen", "seventeen", "eighteen", "nineteen"
		+ )
		+
		+ ord_small = c(
		+ "first", "second", "third", "fourth", "fifth",
		+ "sixth", "seventh", "eighth", "ninth", "tenth",
		+ "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth",
		+ "sixteenth", "seventeenth", "eighteenth", "nineteenth", "twentieth"
		+ )
		+
		+ # After this, the number (n) is between 20 and 99.
		+ if( n < 20 ) {
		+ if( ordinal ) {
		+ return( .subset( ord_small, n %% 100 ) )
		+ }
		+
		+ return( .subset( small, n %% 100 ) )
		+ }
		+
		+ tens = c( "",
		+ "twenty", "thirty", "forty", "fifty",
		+ "sixty", "seventy", "eighty", "ninety"
		+ )
		+
		+ ord_tens = c( "",
		+ "twentieth", "thirtieth", "fortieth", "fiftieth",
		+ "sixtieth", "seventieth", "eightieth", "ninetieth"
		+ )
		+
		+ ones_index = n %% 10
		+ n = n %/% 10
		+
		+ # No number in the ones column, so the number must be a multiple of ten.
		+ if( ones_index == 0 ) {
		+ if( ordinal ) {
		+ return( .subset( ord_tens, n ) )
		+ }
		+
		+ return( .subset( tens, n ) )
		+ }
		+
		+ # Find the value from the ones column.
		+ if( ordinal ) {
		+ unit_1 = .subset( ord_small, ones_index )
		+ }
		+ else {
		+ unit_1 = .subset( small, ones_index )
		+ }
		+
		+ # Find the tens column.
		+ unit_10 = .subset( tens, n )
		+
		+ # Hyphenate the tens and the ones together.
		+ concat( unit_10, concat( "-", unit_1 ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Returns a number as a comma-delimited string. This is a work-around
		+# until Renjin fixes https://github.com/bedatadriven/renjin/issues/338
		+# -----------------------------------------------------------------------------
		+commas <- function( n ) {
		+ n <- x( n )
		+
		+ s <- sprintf( "%03.0f", n %% 1000 )
		+ n <- n %/% 1000
		+
		+ while( n > 0 ) {
		+ s <- concat( sprintf( "%03.0f", n %% 1000 ), ',', s )
		+ n <- n %/% 1000
		+ }
		+
		+ gsub( '^0*', '', s )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Returns a human-readable string that provides the elapsed time between
		+# two numbers in terms of years, months, and days. If any unit value is zero,
		+# the unit is not included. The words (year, month, day) are pluralised
		+# according to English grammar. The numbers are written out according to
		+# Chicago Manual of Style. This applies the serial comma.
		+#
		+# Both numbers are offsets relative to the anchor date.
		+#
		+# If all unit values are zero, this returns s ("same day" by default).
		+#
		+# If the start date (began) is greater than end date (ended), the dates are
		+# swapped before calculations are performed. This allows any two dates
		+# to be compared and positive unit values are always returned.
		+# -----------------------------------------------------------------------------
		+elapsed <- function( began, ended, s = "same day" ) {
		+ began = when( anchor, began )
		+ ended = when( anchor, ended )
		+
		+ # Swap the dates if the end date comes before the start date.
		+ if( as.integer( ended - began ) < 0 ) {
		+ tempd = began
		+ began = ended
		+ ended = tempd
		+ }
		+
		+ # Calculate number of elapsed years.
		+ years = length( seq( from = began, to = ended, by = 'year' ) ) - 1
		+
		+ # Move the start date up by the number of elapsed years.
		+ if( years > 0 ) {
		+ began = seq( began, length = 2, by = concat( years, " years" ) )[2]
		+ years = pl.numeric( "year", years )
		+ }
		+ else {
		+ # Zero years.
		+ years = ""
		+ }
		+
		+ # Calculate number of elapsed months, excluding years.
		+ months = length( seq( from = began, to = ended, by = 'month' ) ) - 1
		+
		+ # Move the start date up by the number of elapsed months
		+ if( months > 0 ) {
		+ began = seq( began, length = 2, by = concat( months, " months" ) )[2]
		+ months = pl.numeric( "month", months )
		+ }
		+ else {
		+ # Zero months
		+ months = ""
		+ }
		+
		+ # Calculate number of elapsed days, excluding months and years.
		+ days = length( seq( from = began, to = ended, by = 'day' ) ) - 1
		+
		+ if( days > 0 ) {
		+ days = pl.numeric( "day", days )
		+ }
		+ else {
		+ # Zero days
		+ days = ""
		+ }
		+
		+ if( years <= 0 && months <= 0 && days <= 0 ) {
		+ return( s )
		+ }
		+
		+ # Put them all in a vector, then remove the empty values.
		+ s <- c( years, months, days )
		+ s <- s[ s != "" ]
		+
		+ r <- paste( s, collapse = ", " )
		+
		+ # If all three items are present, replace the last comma with ", and".
		+ if( length( s ) > 2 ) {
		+ return( gsub( "(.*),", "\\1, and", r ) )
		+ }
		+
		+ # Does nothing if no commas are present.
		+ gsub( "(.*),", "\\1 and", r )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Returns the number (n) in English followed by the plural or singular
		+# form of the given string (s; resumably a noun), if applicable, according
		+# to English grammar. That is, pl.numeric( "wolf", 5 ) will return
		+# "five wolves".
		+# -----------------------------------------------------------------------------
		+pl.numeric <- function( s, n ) {
		+ concat( cms( n ), concat( " ", pluralise( s, n ) ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Pluralise s if n is not equal to 1.
		+# -----------------------------------------------------------------------------
		+pl <- function( s, n=2 ) {
		+ pluralize( s, x( n ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Name of the season, starting with an capital letter.
		+# -----------------------------------------------------------------------------
		+season <- function( n, format = "%Y-%m-%d" ) {
		+ WS <- as.Date("2016-12-15", "%Y-%m-%d") # Winter Solstice
		+ SE <- as.Date("2016-03-15", "%Y-%m-%d") # Spring Equinox
		+ SS <- as.Date("2016-06-15", "%Y-%m-%d") # Summer Solstice
		+ AE <- as.Date("2016-09-15", "%Y-%m-%d") # Autumn Equinox
		+
		+ d <- when( anchor, n )
		+ d <- as.Date( strftime( d, format="2016-%m-%d" ) )
		+
		+ ifelse( d >= WS \| d < SE, "Winter",
		+ ifelse( d >= SE & d < SS, "Spring",
		+ ifelse( d >= SS & d < AE, "Summer", "Autumn" )
		+ )
		+ )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Converts the first letter in a string to lowercase
		+# -----------------------------------------------------------------------------
		+lc <- function( s ) {
		+ concat( tolower( substr( s, 1, 1 ) ), substr( s, 2, nchar( s ) ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Converts the entire string to lowercase
		+# -----------------------------------------------------------------------------
		+lower <- tolower
		+
		+# -----------------------------------------------------------------------------
		+# Converts the first letter in a string to uppercase
		+# -----------------------------------------------------------------------------
		+uc <- function( s ) {
		+ concat( toupper( substr( s, 1, 1 ) ), substr( s, 2, nchar( s ) ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Returns the number of days between the given dates.
		+# -----------------------------------------------------------------------------
		+days <- function( d1, d2, format = "%Y-%m-%d" ) {
		+ dates = c( d1, d2 )
		+ dt = strptime( dates, format = format )
		+ as.integer( difftime( dates[2], dates[1], units = "days" ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Returns the number of years elapsed.
		+# -----------------------------------------------------------------------------
		+years <- function( began, ended ) {
		+ began = when( anchor, began )
		+ ended = when( anchor, ended )
		+
		+ # Swap the dates if the end date comes before the start date.
		+ if( as.integer( ended - began ) < 0 ) {
		+ tempd = began
		+ began = ended
		+ ended = tempd
		+ }
		+
		+ # Calculate number of elapsed years.
		+ length( seq( from = began, to = ended, by = 'year' ) ) - 1
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Full name of the month, starting with a capital letter.
		+# -----------------------------------------------------------------------------
		+month <- function( n ) {
		+ # Faster than month.name[ x( n ) ]
		+ .subset( month.name, x( n ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# -----------------------------------------------------------------------------
		+money <- function( n ) {
		+ commas( x( n ) )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# -----------------------------------------------------------------------------
		+timeline <- function( n ) {
		+ concat( weekday( n ), ", ", annal( n ), " (", season( n ), ")" )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rounds to the nearest base value (e.g., round to nearest 10).
		+#
		+# @param base The nearest value to round to.
		+# -----------------------------------------------------------------------------
		+round.up <- function( n, base = 5 ) {
		+ base * round( x( n ) / base )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Computes linear distance between two points using Haversine formula.
		+# Although Earth is an oblate spheroid, this will produce results close
		+# enough for most purposes.
		+#
		+# @param lat1/lon1 The source latitude and longitude.
		+# @param lat2/lon2 The destination latitude and longitude.
		+# @param radius The radius of the sphere.
		+#
		+# @return The distance between the two coordinates in meters.
		+# -----------------------------------------------------------------------------
		+haversine <- function( lat1, lon1, lat2, lon2, radius = 6371 ) {
		+ # Convert decimal degrees to radians
		+ lon1 = lon1 * pi / 180
		+ lon2 = lon2 * pi / 180
		+ lat1 = lat1 * pi / 180
		+ lat2 = lat2 * pi / 180
		+
		+ # Haversine formula
		+ dlon = lon2 - lon1
		+ dlat = lat2 - lat1
		+ a = sin( dlat / 2 ) ** 2 + cos( lat1 ) * cos( lat2 ) * sin( dlon / 2 ) ** 2
		+ c = 2 * atan2( sqrt( a ), sqrt( 1-a ) )
		+
		+ return( radius * c * 1000 )
		+}
		+

R/csv.R

		+# -----------------------------------------------------------------------------
		+# Copyright 2020, White Magic Software, Ltd.
		+#
		+# Permission is hereby granted, free of charge, to any person obtaining
		+# a copy of this software and associated documentation files (the
		+# "Software"), to deal in the Software without restriction, including
		+# without limitation the rights to use, copy, modify, merge, publish,
		+# distribute, sublicense, and/or sell copies of the Software, and to
		+# permit persons to whom the Software is furnished to do so, subject to
		+# the following conditions:
		+#
		+# The above copyright notice and this permission notice shall be
		+# included in all copies or substantial portions of the Software.
		+#
		+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
		+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
		+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		+# -----------------------------------------------------------------------------
		+
		+# -----------------------------------------------------------------------------
		+# Converts CSV to Markdown.
		+#
		+# Reads a CSV file and converts the contents to a Markdown table. The
		+# file must be in the working directory as specified by setwd.
		+#
		+# @param f The filename to convert.
		+# @param decimals Rounded decimal places (default 1).
		+# @param totals Include total sums (default TRUE).
		+# @param align Right-align numbers (default TRUE).
		+# -----------------------------------------------------------------------------
		+csv2md <- function( f, decimals = 2, totals = T, align = T ) {
		+ # Read the CVS data from the file; ensure strings become characters.
		+ df <- read.table( f, sep=',', header=T, stringsAsFactors=F )
		+
		+ if( totals ) {
		+ # Determine what columns can be summed.
		+ number <- which( unlist( lapply( df, is.numeric ) ) )
		+
		+ # Use colSums when more than one summable column exists.
		+ if( length( number ) > 1 ) {
		+ f.sum <- colSums
		+ }
		+ else {
		+ f.sum <- sum
		+ }
		+
		+ # Calculate the sum of all the summable columns and insert the
		+ # results back into the data frame.
		+ df[ (nrow( df ) + 1), number ] <- f.sum( df[, number], na.rm=TRUE )
		+
		+ # pluralise would be heavyweight here.
		+ if( length( number ) > 1 ) {
		+ t <- "Totals"
		+ }
		+ else {
		+ t <- "Total"
		+ }
		+
		+ # Change the first column of the last line to "Total(s)".
		+ df[ nrow( df ), 1 ] <- t
		+
		+ # Don't clutter the output with "NA" text.
		+ df[ is.na( df ) ] <- ""
		+ }
		+
		+ if( align ) {
		+ is.char <- vapply( df, is.character, logical( 1 ) )
		+ dashes <- paste( ifelse( is.char, ':---', '---:' ), collapse='\|' )
		+ }
		+ else {
		+ dashes <- paste( rep( '---', length( df ) ), collapse = '\|' )
		+ }
		+
		+ # Create a Markdown version of the data frame.
		+ paste(
		+ paste( names( df ), collapse = '\|'), '\n',
		+ dashes, '\n',
		+ paste(
		+ Reduce( function( x, y ) {
		+ paste( x, format( y, digits = decimals ), sep = '\|' )
		+ }, df
		+ ),
		+ collapse = '\|\n', sep=''
		+ )
		+ )
		+}
		+

Delta	511 lines added, 0 lines removed, 511-line increase