Dave Jarvis' Repositories

Author	DaveJarvis <email>
Date	2020-05-18 17:13:47 GMT-0700
Commit	2c73b0a06ca3e1b387605be91e972a8411006652
Parent	05def79
Delta	20 lines added, 250 lines removed, 230-line decrease

R/pluralise.R

		-# -----------------------------------------------------------------------------
		-# Copyright 2020, White Magic Software, Ltd.
		-#
		-# Permission is hereby granted, free of charge, to any person obtaining
		-# a copy of this software and associated documentation files (the
		-# "Software"), to deal in the Software without restriction, including
		-# without limitation the rights to use, copy, modify, merge, publish,
		-# distribute, sublicense, and/or sell copies of the Software, and to
		-# permit persons to whom the Software is furnished to do so, subject to
		-# the following conditions:
		-#
		-# The above copyright notice and this permission notice shall be
		-# included in all copies or substantial portions of the Software.
		-#
		-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
		-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
		-# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		-# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		-# -----------------------------------------------------------------------------
		-
		-# -----------------------------------------------------------------------------
		-# See Damian Conway's "An Algorithmic Approach to English Pluralization":
		-# http://goo.gl/oRL4MP
		-# See Oliver Glerke's Evo Inflector: https://github.com/atteo/evo-inflector/
		-# See Shevek's Pluralizer: https://github.com/shevek/linguistics/
		-# See also: http://www.freevectors.net/assets/files/plural.txt
		-# -----------------------------------------------------------------------------
		-pluralise <- function( s, n ) {
		- result <- s
		-
		- # Partial implementation of Conway's algorithm for nouns.
		- if( n != 1 ) {
		- if( pl.noninflective( s ) \|\|
		- pl.suffix( "es", s ) \|\|
		- pl.suffix( "fish", s ) \|\|
		- pl.suffix( "ois", s ) \|\|
		- pl.suffix( "sheep", s ) \|\|
		- pl.suffix( "deer", s ) \|\|
		- pl.suffix( "pox", s ) \|\|
		- pl.suffix( "[A-Z].*ese", s ) \|\|
		- pl.suffix( "itis", s ) ) {
		- # 1. Retain non-inflective user-mapped noun as is.
		- # 2. Retain non-inflective plural as is.
		- result <- s
		- }
		- else if( pl.is.irregular.pl( s ) ) {
		- # 4. Change irregular plurals based on mapping.
		- result <- pl.irregular.pl( s )
		- }
		- else if( pl.is.irregular.es( s ) ) {
		- # x. From Shevek's
		- result <- pl.inflect( s, "", "es" )
		- }
		- else if( pl.suffix( "man", s ) ) {
		- # 5. For -man, change -an to -en
		- result <- pl.inflect( s, "an", "en" )
		- }
		- else if( pl.suffix( "[lm]ouse", s ) ) {
		- # 5. For [lm]ouse, change -ouse to -ice
		- result <- pl.inflect( s, "ouse", "ice" )
		- }
		- else if( pl.suffix( "tooth", s ) ) {
		- # 5. For -tooth, change -ooth to -eeth
		- result <- pl.inflect( s, "ooth", "eeth" )
		- }
		- else if( pl.suffix( "goose", s ) ) {
		- # 5. For -goose, change -oose to -eese
		- result <- pl.inflect( s, "oose", "eese" )
		- }
		- else if( pl.suffix( "foot", s ) ) {
		- # 5. For -foot, change -oot to -eet
		- result <- pl.inflect( s, "oot", "eet" )
		- }
		- else if( pl.suffix( "zoon", s ) ) {
		- # 5. For -zoon, change -on to -a
		- result <- pl.inflect( s, "on", "a" )
		- }
		- else if( pl.suffix( "[csx]is", s ) ) {
		- # 5. Change -cis, -sis, -xis to -es
		- result <- pl.inflect( s, "is", "es" )
		- }
		- else if( pl.suffix( "([cs]h\|ss\|zz\|x\|s)", s ) ) {
		- # 8. Change -ch, -sh, -ss, -zz, -x, -s to -es
		- result <- pl.inflect( s, "", "es" )
		- }
		- else if( pl.suffix( "([aeo]lf\|[^d]eaf\|arf)", s ) ) {
		- # 9. Change -f to -ves
		- result <- pl.inflect( s, "f", "ves" )
		- }
		- else if( pl.suffix( "[nlw]ife", s ) ) {
		- # 10. Change -fe to -ves
		- result <- pl.inflect( s, "fe", "ves" )
		- }
		- else if( pl.suffix( "[aeiou]y", s ) ) {
		- # 11. Change -[aeiou]y to -ys
		- result <- pl.inflect( s, "", "s" )
		- }
		- else if( pl.suffix( "y", s ) ) {
		- # 12. Change -y to -ies
		- result <- pl.inflect( s, "y", "ies" )
		- }
		- else if( pl.suffix( "z", s ) ) {
		- # x. Change -z to -zzes
		- result <- pl.inflect( s, "", "zes" )
		- }
		- else {
		- # 13. Default plural: add -s
		- result <- pl.inflect( s, "", "s" )
		- }
		- }
		-
		- result
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Returns the given string (s) with its suffix replaced by r.
		-# -----------------------------------------------------------------------------
		-pl.inflect <- function( s, suffix, r ) {
		- gsub( paste( suffix, "$", sep="" ), r, s )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Answers whether the given string (s) has the given ending.
		-# -----------------------------------------------------------------------------
		-pl.suffix <- function( ending, s ) {
		- grepl( paste( ending, "$", sep="" ), s )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Answers whether the given string (s) is a noninflective noun.
		-# -----------------------------------------------------------------------------
		-pl.noninflective <- function( s ) {
		- v <- c(
		- "aircraft",
		- "Bhutanese",
		- "bison",
		- "bream",
		- "Burmese",
		- "carp",
		- "chassis",
		- "Chinese",
		- "clippers",
		- "cod",
		- "contretemps",
		- "corps",
		- "debris",
		- "djinn",
		- "eland",
		- "elk",
		- "flounder",
		- "fracas",
		- "gallows",
		- "graffiti",
		- "headquarters",
		- "high-jinks",
		- "homework",
		- "hovercraft",
		- "innings",
		- "Japanese",
		- "Lebanese",
		- "mackerel",
		- "means",
		- "mews",
		- "mice",
		- "mumps",
		- "news",
		- "pincers",
		- "pliers",
		- "Portuguese",
		- "proceedings",
		- "salmon",
		- "scissors",
		- "sea-bass",
		- "Senegalese",
		- "shears",
		- "Siamese",
		- "Sinhalese",
		- "spacecraft",
		- "swine",
		- "trout",
		- "tuna",
		- "Vietnamese",
		- "watercraft",
		- "whiting",
		- "wildebeest"
		- )
		-
		- is.element( s, v )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Answers whether the given string (s) is an irregular plural.
		-# -----------------------------------------------------------------------------
		-pl.is.irregular.pl <- function( s ) {
		- # Could be refactored with pl.irregular.pl...
		- v <- c(
		- "beef", "brother", "child", "cow", "ephemeris", "genie", "money",
		- "mongoose", "mythos", "octopus", "ox", "soliloquy", "trilby"
		- )
		-
		- is.element( s, v )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Call to pluralise an irregular noun. Only call after confirming
		-# the noun is irregular via pl.is.irregular.pl.
		-# -----------------------------------------------------------------------------
		-pl.irregular.pl <- function( s ) {
		- v <- list(
		- "beef" = "beefs",
		- "brother" = "brothers",
		- "child" = "children",
		- "cow" = "cows",
		- "ephemeris" = "ephemerides",
		- "genie" = "genies",
		- "money" = "moneys",
		- "mongoose" = "mongooses",
		- "mythos" = "mythoi",
		- "octopus" = "octopuses",
		- "ox" = "oxen",
		- "soliloquy" = "soliloquies",
		- "trilby" = "trilbys"
		- )
		-
		- # Faster version of v[[ s ]]
		- .subset2( v, s )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Answers whether the given string (s) pluralises with -es.
		-# -----------------------------------------------------------------------------
		-pl.is.irregular.es <- function( s ) {
		- v <- c(
		- "acropolis", "aegis", "alias", "asbestos", "bathos", "bias", "bronchitis",
		- "bursitis", "caddis", "cannabis", "canvas", "chaos", "cosmos", "dais",
		- "digitalis", "epidermis", "ethos", "eyas", "gas", "glottis", "hubris",
		- "ibis", "lens", "mantis", "marquis", "metropolis", "pathos", "pelvis",
		- "polis", "rhinoceros", "sassafrass", "trellis"
		- )
		-
		- is.element( s, v )
		-}
		-


		// Tell the user that there was a problem.
		- getNotifier().notify( get( STATUS_PARSE_ERROR,
		- e.getMessage(), currIndex )
		+ getNotifier().notify(
		+ get( STATUS_PARSE_ERROR, e.getMessage(), currIndex )
		);
		}

		# -----------------------------------------------------------------------------
		rstr <- function( s, n = 1 ) {
		- l = nchar( s )
		+ l <- nchar( s )
		substr( s, l - n + 1, l )
		}

		# -----------------------------------------------------------------------------
		# Returns the possessive form of the given word, s.
		# -----------------------------------------------------------------------------
		pos <- function( s ) {
		- paste0( s, ifelse( rstr( s, 1 ) == 's', "'" ,"'s" ) )
		+ lcs <- tolower( s )
		+ pronouns <- c( 'your', 'our', 'her', 'it', 'their' )
		+
		+ if( lcs == 'my' ) {
		+ # Change "[Mm]y" to "[Mm]ine".
		+ s <- paste0( lstr( s, 1 ), "ine" )
		+ }
		+ else if( lcs %in% pronouns ) {
		+ # Append an s to most pronouns.
		+ s <- paste0( s, 's' )
		+ }
		+ else if( lcs != 'his' ) {
		+ # Possessive for all other words except 'his'.
		+ s <- paste0( s, ifelse( rstr( s, 1 ) == 's', "'" ,"'s" ) )
		+ }
		+
		+ s
		}

Pluralise to pluralize