| | +# ----------------------------------------------------------------------------- |
| | +# Copyright 2020, White Magic Software, Ltd. |
| | +# |
| | +# Permission is hereby granted, free of charge, to any person obtaining |
| | +# a copy of this software and associated documentation files (the |
| | +# "Software"), to deal in the Software without restriction, including |
| | +# without limitation the rights to use, copy, modify, merge, publish, |
| | +# distribute, sublicense, and/or sell copies of the Software, and to |
| | +# permit persons to whom the Software is furnished to do so, subject to |
| | +# the following conditions: |
| | +# |
| | +# The above copyright notice and this permission notice shall be |
| | +# included in all copies or substantial portions of the Software. |
| | +# |
| | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| | +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| | +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| | +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
| | +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| | +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| | +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| | +# ----------------------------------------------------------------------------- |
| | + |
| | +# ----------------------------------------------------------------------------- |
| | +# See Damian Conway's "An Algorithmic Approach to English Pluralization": |
| | +# http://goo.gl/oRL4MP |
| | +# See Oliver Glerke's Evo Inflector: https://github.com/atteo/evo-inflector/ |
| | +# See Shevek's Pluralizer: https://github.com/shevek/linguistics/ |
| | +# See also: http://www.freevectors.net/assets/files/plural.txt |
| | +# ----------------------------------------------------------------------------- |
| | +pluralize <- function( s, n ) { |
| | + result <- s |
| | + |
| | + # Partial implementation of Conway's algorithm for nouns. |
| | + if( n != 1 ) { |
| | + if( pl.noninflective( s ) || |
| | + pl.suffix( "es", s ) || |
| | + pl.suffix( "fish", s ) || |
| | + pl.suffix( "ois", s ) || |
| | + pl.suffix( "sheep", s ) || |
| | + pl.suffix( "deer", s ) || |
| | + pl.suffix( "pox", s ) || |
| | + pl.suffix( "[A-Z].*ese", s ) || |
| | + pl.suffix( "itis", s ) ) { |
| | + # 1. Retain non-inflective user-mapped noun as is. |
| | + # 2. Retain non-inflective plural as is. |
| | + result <- s |
| | + } |
| | + else if( pl.is.irregular.pl( s ) ) { |
| | + # 4. Change irregular plurals based on mapping. |
| | + result <- pl.irregular.pl( s ) |
| | + } |
| | + else if( pl.is.irregular.es( s ) ) { |
| | + # x. From Shevek's |
| | + result <- pl.inflect( s, "", "es" ) |
| | + } |
| | + else if( pl.suffix( "man", s ) ) { |
| | + # 5. For -man, change -an to -en |
| | + result <- pl.inflect( s, "an", "en" ) |
| | + } |
| | + else if( pl.suffix( "[lm]ouse", s ) ) { |
| | + # 5. For [lm]ouse, change -ouse to -ice |
| | + result <- pl.inflect( s, "ouse", "ice" ) |
| | + } |
| | + else if( pl.suffix( "tooth", s ) ) { |
| | + # 5. For -tooth, change -ooth to -eeth |
| | + result <- pl.inflect( s, "ooth", "eeth" ) |
| | + } |
| | + else if( pl.suffix( "goose", s ) ) { |
| | + # 5. For -goose, change -oose to -eese |
| | + result <- pl.inflect( s, "oose", "eese" ) |
| | + } |
| | + else if( pl.suffix( "foot", s ) ) { |
| | + # 5. For -foot, change -oot to -eet |
| | + result <- pl.inflect( s, "oot", "eet" ) |
| | + } |
| | + else if( pl.suffix( "zoon", s ) ) { |
| | + # 5. For -zoon, change -on to -a |
| | + result <- pl.inflect( s, "on", "a" ) |
| | + } |
| | + else if( pl.suffix( "[csx]is", s ) ) { |
| | + # 5. Change -cis, -sis, -xis to -es |
| | + result <- pl.inflect( s, "is", "es" ) |
| | + } |
| | + else if( pl.suffix( "([cs]h|ss|zz|x|s)", s ) ) { |
| | + # 8. Change -ch, -sh, -ss, -zz, -x, -s to -es |
| | + result <- pl.inflect( s, "", "es" ) |
| | + } |
| | + else if( pl.suffix( "([aeo]lf|[^d]eaf|arf)", s ) ) { |
| | + # 9. Change -f to -ves |
| | + result <- pl.inflect( s, "f", "ves" ) |
| | + } |
| | + else if( pl.suffix( "[nlw]ife", s ) ) { |
| | + # 10. Change -fe to -ves |
| | + result <- pl.inflect( s, "fe", "ves" ) |
| | + } |
| | + else if( pl.suffix( "[aeiou]y", s ) ) { |
| | + # 11. Change -[aeiou]y to -ys |
| | + result <- pl.inflect( s, "", "s" ) |
| | + } |
| | + else if( pl.suffix( "y", s ) ) { |
| | + # 12. Change -y to -ies |
| | + result <- pl.inflect( s, "y", "ies" ) |
| | + } |
| | + else if( pl.suffix( "z", s ) ) { |
| | + # x. Change -z to -zzes |
| | + result <- pl.inflect( s, "", "zes" ) |
| | + } |
| | + else { |
| | + # 13. Default plural: add -s |
| | + result <- pl.inflect( s, "", "s" ) |
| | + } |
| | + } |
| | + |
| | + result |
| | +} |
| | + |
| | +# ----------------------------------------------------------------------------- |
| | +# Returns the given string (s) with its suffix replaced by r. |
| | +# ----------------------------------------------------------------------------- |
| | +pl.inflect <- function( s, suffix, r ) { |
| | + gsub( paste( suffix, "$", sep="" ), r, s ) |
| | +} |
| | + |
| | +# ----------------------------------------------------------------------------- |
| | +# Answers whether the given string (s) has the given ending. |
| | +# ----------------------------------------------------------------------------- |
| | +pl.suffix <- function( ending, s ) { |
| | + grepl( paste( ending, "$", sep="" ), s ) |
| | +} |
| | + |
| | +# ----------------------------------------------------------------------------- |
| | +# Answers whether the given string (s) is a noninflective noun. |
| | +# ----------------------------------------------------------------------------- |
| | +pl.noninflective <- function( s ) { |
| | + v <- c( |
| | + "aircraft", |
| | + "Bhutanese", |
| | + "bison", |
| | + "bream", |
| | + "Burmese", |
| | + "carp", |
| | + "chassis", |
| | + "Chinese", |
| | + "clippers", |
| | + "cod", |
| | + "contretemps", |
| | + "corps", |
| | + "debris", |
| | + "djinn", |
| | + "eland", |
| | + "elk", |
| | + "flounder", |
| | + "fracas", |
| | + "gallows", |
| | + "graffiti", |
| | + "headquarters", |
| | + "high-jinks", |
| | + "homework", |
| | + "hovercraft", |
| | + "innings", |
| | + "Japanese", |
| | + "Lebanese", |
| | + "mackerel", |
| | + "means", |
| | + "mews", |
| | + "mice", |
| | + "mumps", |
| | + "news", |
| | + "pincers", |
| | + "pliers", |
| | + "Portuguese", |
| | + "proceedings", |
| | + "salmon", |
| | + "scissors", |
| | + "sea-bass", |
| | + "Senegalese", |
| | + "shears", |
| | + "Siamese", |
| | + "Sinhalese", |
| | + "spacecraft", |
| | + "swine", |
| | + "trout", |
| | + "tuna", |
| | + "Vietnamese", |
| | + "watercraft", |
| | + "whiting", |
| | + "wildebeest" |
| | + ) |
| | + |
| | + is.element( s, v ) |
| | +} |
| | + |
| | +# ----------------------------------------------------------------------------- |
| | +# Answers whether the given string (s) is an irregular plural. |
| | +# ----------------------------------------------------------------------------- |
| | +pl.is.irregular.pl <- function( s ) { |
| | + # Could be refactored with pl.irregular.pl... |
| | + v <- c( |
| | + "beef", "brother", "child", "cow", "ephemeris", "genie", "money", |
| | + "mongoose", "mythos", "octopus", "ox", "soliloquy", "trilby" |
| | + ) |
| | + |
| | + is.element( s, v ) |
| | +} |
| | + |
| | +# ----------------------------------------------------------------------------- |
| | +# Call to pluralize an irregular noun. Only call after confirming |
| | +# the noun is irregular via pl.is.irregular.pl. |
| | +# ----------------------------------------------------------------------------- |
| | +pl.irregular.pl <- function( s ) { |
| | + v <- list( |
| | + "beef" = "beefs", |
| | + "brother" = "brothers", |
| | + "child" = "children", |
| | + "cow" = "cows", |
| | + "ephemeris" = "ephemerides", |
| | + "genie" = "genies", |
| | + "money" = "moneys", |
| | + "mongoose" = "mongooses", |
| | + "mythos" = "mythoi", |
| | + "octopus" = "octopuses", |
| | + "ox" = "oxen", |
| | + "soliloquy" = "soliloquies", |
| | + "trilby" = "trilbys" |
| | + ) |
| | + |
| | + # Faster version of v[[ s ]] |
| | + .subset2( v, s ) |
| | +} |
| | + |
| | +# ----------------------------------------------------------------------------- |
| | +# Answers whether the given string (s) pluralizes with -es. |
| | +# ----------------------------------------------------------------------------- |
| | +pl.is.irregular.es <- function( s ) { |
| | + v <- c( |
| | + "acropolis", "aegis", "alias", "asbestos", "bathos", "bias", "bronchitis", |
| | + "bursitis", "caddis", "cannabis", "canvas", "chaos", "cosmos", "dais", |
| | + "digitalis", "epidermis", "ethos", "eyas", "gas", "glottis", "hubris", |
| | + "ibis", "lens", "mantis", "marquis", "metropolis", "pathos", "pelvis", |
| | + "polis", "rhinoceros", "sassafrass", "trellis" |
| | + ) |
| | + |
| | + is.element( s, v ) |
| | +} |
| | + |
| | |