Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenwrite.git

Add R functions

AuthorDaveJarvis <email>
Date2020-07-02 22:11:45 GMT-0700
Commit2221224a39bfa1ec7d154eca274627db34a46f50
Parente36fac1
R/conversion.R
+# -----------------------------------------------------------------------------
+# Copyright 2020, White Magic Software, Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# Substitute R expressions in a document with their evaluated value. The
+# anchor variable must be set for functions that use relative dates.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# Evaluates an expression; writes s if there is no expression.
+# -----------------------------------------------------------------------------
+x <- function( s ) {
+ tryCatch( {
+ r = eval( parse( text = s ) )
+
+ # If the result isn't primitive, then it was probably parsed into
+ # an unprintable object (e.g., "gray" becomes a colour). In those
+ # cases, return the original text string. Otherwise, an atomic
+ # value means a primitive type (string, integer, etc.) that can be
+ # written directly into the document.
+ ifelse( is.atomic( r ), r, s );
+ },
+ warning = function( w ) { s },
+ error = function( e ) { s } )
+}
+
+# -----------------------------------------------------------------------------
+# Returns a date offset by a given number of days, relative to the given
+# date (d). This does not use the anchor, but is used to get the anchor's
+# value as a date.
+# -----------------------------------------------------------------------------
+when <- function( d, n = 0, format = "%Y-%m-%d" ) {
+ as.Date( d, format = format ) + x( n )
+}
+
+# -----------------------------------------------------------------------------
+# Full date (s) offset by an optional number of days before or after.
+# This will remove leading zeros (applying leading spaces instead, which
+# are ignored by any worthwhile typesetting engine).
+# -----------------------------------------------------------------------------
+annal <- function( days = 0, format = "%Y-%m-%d", oformat = "%B %d, %Y" ) {
+ format( when( anchor, days ), format = oformat )
+}
+
+# -----------------------------------------------------------------------------
+# Extracts the year from a date string.
+# -----------------------------------------------------------------------------
+year <- function( days = 0, format = "%Y-%m-%d" ) {
+ annal( days, format, "%Y" )
+}
+
+# -----------------------------------------------------------------------------
+# Day of the week (in days since the anchor date).
+# -----------------------------------------------------------------------------
+weekday <- function( n ) {
+ weekdays( when( anchor, n ) )
+}
+
+# -----------------------------------------------------------------------------
+# String concatenate function alias because paste0 is a terrible name.
+# -----------------------------------------------------------------------------
+concat <- paste0
+
+# -----------------------------------------------------------------------------
+# Translates a number from digits to words using Chicago Manual of Style.
+# This does not translate numbers greater than one hundred. If ordinal
+# is TRUE, this will return the ordinal name. This will not produce ordinals
+# for numbers greater than 100.
+# -----------------------------------------------------------------------------
+cms <- function( n, ordinal = FALSE ) {
+ n <- x( n )
+
+ if( n == 0 ) {
+ if( ordinal ) {
+ return( "zeroth" )
+ }
+
+ return( "zero" )
+ }
+
+ # Concatenate this a little later.
+ if( n < 0 ) {
+ result = "negative "
+ n = abs( n )
+ }
+
+ # Do not spell out numbers greater than one hundred.
+ if( n > 100 ) {
+ # Comma-separated numbers.
+ return( commas( n ) )
+ }
+
+ # Don't go beyond 100.
+ if( n == 100 ) {
+ if( ordinal ) {
+ return( "one hundredth" )
+ }
+
+ return( "one hundred" )
+ }
+
+ # Samuel Langhorne Clemens noted English has too many exceptions.
+ small = c(
+ "one", "two", "three", "four", "five",
+ "six", "seven", "eight", "nine", "ten",
+ "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+ "sixteen", "seventeen", "eighteen", "nineteen"
+ )
+
+ ord_small = c(
+ "first", "second", "third", "fourth", "fifth",
+ "sixth", "seventh", "eighth", "ninth", "tenth",
+ "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth",
+ "sixteenth", "seventeenth", "eighteenth", "nineteenth", "twentieth"
+ )
+
+ # After this, the number (n) is between 20 and 99.
+ if( n < 20 ) {
+ if( ordinal ) {
+ return( .subset( ord_small, n %% 100 ) )
+ }
+
+ return( .subset( small, n %% 100 ) )
+ }
+
+ tens = c( "",
+ "twenty", "thirty", "forty", "fifty",
+ "sixty", "seventy", "eighty", "ninety"
+ )
+
+ ord_tens = c( "",
+ "twentieth", "thirtieth", "fortieth", "fiftieth",
+ "sixtieth", "seventieth", "eightieth", "ninetieth"
+ )
+
+ ones_index = n %% 10
+ n = n %/% 10
+
+ # No number in the ones column, so the number must be a multiple of ten.
+ if( ones_index == 0 ) {
+ if( ordinal ) {
+ return( .subset( ord_tens, n ) )
+ }
+
+ return( .subset( tens, n ) )
+ }
+
+ # Find the value from the ones column.
+ if( ordinal ) {
+ unit_1 = .subset( ord_small, ones_index )
+ }
+ else {
+ unit_1 = .subset( small, ones_index )
+ }
+
+ # Find the tens column.
+ unit_10 = .subset( tens, n )
+
+ # Hyphenate the tens and the ones together.
+ concat( unit_10, concat( "-", unit_1 ) )
+}
+
+# -----------------------------------------------------------------------------
+# Returns a number as a comma-delimited string. This is a work-around
+# until Renjin fixes https://github.com/bedatadriven/renjin/issues/338
+# -----------------------------------------------------------------------------
+commas <- function( n ) {
+ n <- x( n )
+
+ s <- sprintf( "%03.0f", n %% 1000 )
+ n <- n %/% 1000
+
+ while( n > 0 ) {
+ s <- concat( sprintf( "%03.0f", n %% 1000 ), ',', s )
+ n <- n %/% 1000
+ }
+
+ gsub( '^0*', '', s )
+}
+
+# -----------------------------------------------------------------------------
+# Returns a human-readable string that provides the elapsed time between
+# two numbers in terms of years, months, and days. If any unit value is zero,
+# the unit is not included. The words (year, month, day) are pluralised
+# according to English grammar. The numbers are written out according to
+# Chicago Manual of Style. This applies the serial comma.
+#
+# Both numbers are offsets relative to the anchor date.
+#
+# If all unit values are zero, this returns s ("same day" by default).
+#
+# If the start date (began) is greater than end date (ended), the dates are
+# swapped before calculations are performed. This allows any two dates
+# to be compared and positive unit values are always returned.
+# -----------------------------------------------------------------------------
+elapsed <- function( began, ended, s = "same day" ) {
+ began = when( anchor, began )
+ ended = when( anchor, ended )
+
+ # Swap the dates if the end date comes before the start date.
+ if( as.integer( ended - began ) < 0 ) {
+ tempd = began
+ began = ended
+ ended = tempd
+ }
+
+ # Calculate number of elapsed years.
+ years = length( seq( from = began, to = ended, by = 'year' ) ) - 1
+
+ # Move the start date up by the number of elapsed years.
+ if( years > 0 ) {
+ began = seq( began, length = 2, by = concat( years, " years" ) )[2]
+ years = pl.numeric( "year", years )
+ }
+ else {
+ # Zero years.
+ years = ""
+ }
+
+ # Calculate number of elapsed months, excluding years.
+ months = length( seq( from = began, to = ended, by = 'month' ) ) - 1
+
+ # Move the start date up by the number of elapsed months
+ if( months > 0 ) {
+ began = seq( began, length = 2, by = concat( months, " months" ) )[2]
+ months = pl.numeric( "month", months )
+ }
+ else {
+ # Zero months
+ months = ""
+ }
+
+ # Calculate number of elapsed days, excluding months and years.
+ days = length( seq( from = began, to = ended, by = 'day' ) ) - 1
+
+ if( days > 0 ) {
+ days = pl.numeric( "day", days )
+ }
+ else {
+ # Zero days
+ days = ""
+ }
+
+ if( years <= 0 && months <= 0 && days <= 0 ) {
+ return( s )
+ }
+
+ # Put them all in a vector, then remove the empty values.
+ s <- c( years, months, days )
+ s <- s[ s != "" ]
+
+ r <- paste( s, collapse = ", " )
+
+ # If all three items are present, replace the last comma with ", and".
+ if( length( s ) > 2 ) {
+ return( gsub( "(.*),", "\\1, and", r ) )
+ }
+
+ # Does nothing if no commas are present.
+ gsub( "(.*),", "\\1 and", r )
+}
+
+# -----------------------------------------------------------------------------
+# Returns the number (n) in English followed by the plural or singular
+# form of the given string (s; resumably a noun), if applicable, according
+# to English grammar. That is, pl.numeric( "wolf", 5 ) will return
+# "five wolves".
+# -----------------------------------------------------------------------------
+pl.numeric <- function( s, n ) {
+ concat( cms( n ), concat( " ", pluralise( s, n ) ) )
+}
+
+# -----------------------------------------------------------------------------
+# Pluralise s if n is not equal to 1.
+# -----------------------------------------------------------------------------
+pl <- function( s, n=2 ) {
+ pluralize( s, x( n ) )
+}
+
+# -----------------------------------------------------------------------------
+# Name of the season, starting with an capital letter.
+# -----------------------------------------------------------------------------
+season <- function( n, format = "%Y-%m-%d" ) {
+ WS <- as.Date("2016-12-15", "%Y-%m-%d") # Winter Solstice
+ SE <- as.Date("2016-03-15", "%Y-%m-%d") # Spring Equinox
+ SS <- as.Date("2016-06-15", "%Y-%m-%d") # Summer Solstice
+ AE <- as.Date("2016-09-15", "%Y-%m-%d") # Autumn Equinox
+
+ d <- when( anchor, n )
+ d <- as.Date( strftime( d, format="2016-%m-%d" ) )
+
+ ifelse( d >= WS | d < SE, "Winter",
+ ifelse( d >= SE & d < SS, "Spring",
+ ifelse( d >= SS & d < AE, "Summer", "Autumn" )
+ )
+ )
+}
+
+# -----------------------------------------------------------------------------
+# Converts the first letter in a string to lowercase
+# -----------------------------------------------------------------------------
+lc <- function( s ) {
+ concat( tolower( substr( s, 1, 1 ) ), substr( s, 2, nchar( s ) ) )
+}
+
+# -----------------------------------------------------------------------------
+# Converts the entire string to lowercase
+# -----------------------------------------------------------------------------
+lower <- tolower
+
+# -----------------------------------------------------------------------------
+# Converts the first letter in a string to uppercase
+# -----------------------------------------------------------------------------
+uc <- function( s ) {
+ concat( toupper( substr( s, 1, 1 ) ), substr( s, 2, nchar( s ) ) )
+}
+
+# -----------------------------------------------------------------------------
+# Returns the number of days between the given dates.
+# -----------------------------------------------------------------------------
+days <- function( d1, d2, format = "%Y-%m-%d" ) {
+ dates = c( d1, d2 )
+ dt = strptime( dates, format = format )
+ as.integer( difftime( dates[2], dates[1], units = "days" ) )
+}
+
+# -----------------------------------------------------------------------------
+# Returns the number of years elapsed.
+# -----------------------------------------------------------------------------
+years <- function( began, ended ) {
+ began = when( anchor, began )
+ ended = when( anchor, ended )
+
+ # Swap the dates if the end date comes before the start date.
+ if( as.integer( ended - began ) < 0 ) {
+ tempd = began
+ began = ended
+ ended = tempd
+ }
+
+ # Calculate number of elapsed years.
+ length( seq( from = began, to = ended, by = 'year' ) ) - 1
+}
+
+# -----------------------------------------------------------------------------
+# Full name of the month, starting with a capital letter.
+# -----------------------------------------------------------------------------
+month <- function( n ) {
+ # Faster than month.name[ x( n ) ]
+ .subset( month.name, x( n ) )
+}
+
+# -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+money <- function( n ) {
+ commas( x( n ) )
+}
+
+# -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+timeline <- function( n ) {
+ concat( weekday( n ), ", ", annal( n ), " (", season( n ), ")" )
+}
+
+# -----------------------------------------------------------------------------
+# Rounds to the nearest base value (e.g., round to nearest 10).
+#
+# @param base The nearest value to round to.
+# -----------------------------------------------------------------------------
+round.up <- function( n, base = 5 ) {
+ base * round( x( n ) / base )
+}
+
+# -----------------------------------------------------------------------------
+# Computes linear distance between two points using Haversine formula.
+# Although Earth is an oblate spheroid, this will produce results close
+# enough for most purposes.
+#
+# @param lat1/lon1 The source latitude and longitude.
+# @param lat2/lon2 The destination latitude and longitude.
+# @param radius The radius of the sphere.
+#
+# @return The distance between the two coordinates in meters.
+# -----------------------------------------------------------------------------
+haversine <- function( lat1, lon1, lat2, lon2, radius = 6371 ) {
+ # Convert decimal degrees to radians
+ lon1 = lon1 * pi / 180
+ lon2 = lon2 * pi / 180
+ lat1 = lat1 * pi / 180
+ lat2 = lat2 * pi / 180
+
+ # Haversine formula
+ dlon = lon2 - lon1
+ dlat = lat2 - lat1
+ a = sin( dlat / 2 ) ** 2 + cos( lat1 ) * cos( lat2 ) * sin( dlon / 2 ) ** 2
+ c = 2 * atan2( sqrt( a ), sqrt( 1-a ) )
+
+ return( radius * c * 1000 )
+}
+
R/csv.R
+# -----------------------------------------------------------------------------
+# Copyright 2020, White Magic Software, Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# Converts CSV to Markdown.
+#
+# Reads a CSV file and converts the contents to a Markdown table. The
+# file must be in the working directory as specified by setwd.
+#
+# @param f The filename to convert.
+# @param decimals Rounded decimal places (default 1).
+# @param totals Include total sums (default TRUE).
+# @param align Right-align numbers (default TRUE).
+# -----------------------------------------------------------------------------
+csv2md <- function( f, decimals = 2, totals = T, align = T ) {
+ # Read the CVS data from the file; ensure strings become characters.
+ df <- read.table( f, sep=',', header=T, stringsAsFactors=F )
+
+ if( totals ) {
+ # Determine what columns can be summed.
+ number <- which( unlist( lapply( df, is.numeric ) ) )
+
+ # Use colSums when more than one summable column exists.
+ if( length( number ) > 1 ) {
+ f.sum <- colSums
+ }
+ else {
+ f.sum <- sum
+ }
+
+ # Calculate the sum of all the summable columns and insert the
+ # results back into the data frame.
+ df[ (nrow( df ) + 1), number ] <- f.sum( df[, number], na.rm=TRUE )
+
+ # pluralise would be heavyweight here.
+ if( length( number ) > 1 ) {
+ t <- "**Totals**"
+ }
+ else {
+ t <- "**Total**"
+ }
+
+ # Change the first column of the last line to "Total(s)".
+ df[ nrow( df ), 1 ] <- t
+
+ # Don't clutter the output with "NA" text.
+ df[ is.na( df ) ] <- ""
+ }
+
+ if( align ) {
+ is.char <- vapply( df, is.character, logical( 1 ) )
+ dashes <- paste( ifelse( is.char, ':---', '---:' ), collapse='|' )
+ }
+ else {
+ dashes <- paste( rep( '---', length( df ) ), collapse = '|' )
+ }
+
+ # Create a Markdown version of the data frame.
+ paste(
+ paste( names( df ), collapse = '|'), '\n',
+ dashes, '\n',
+ paste(
+ Reduce( function( x, y ) {
+ paste( x, format( y, digits = decimals ), sep = '|' )
+ }, df
+ ),
+ collapse = '|\n', sep=''
+ )
+ )
+}
+
Delta511 lines added, 0 lines removed, 511-line increase