# ----------------------------------------------------------------------------- # Copyright 2020, White Magic Software, Ltd. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # See Damian Conway's "An Algorithmic Approach to English Pluralization": # http://goo.gl/oRL4MP # See Oliver Glerke's Evo Inflector: https://github.com/atteo/evo-inflector/ # See Shevek's Pluralizer: https://github.com/shevek/linguistics/ # See also: http://www.freevectors.net/assets/files/plural.txt # ----------------------------------------------------------------------------- pluralize <- function( s, n ) { result <- s # Partial implementation of Conway's algorithm for nouns. if( n != 1 ) { if( pl.noninflective( s ) || pl.suffix( "es", s ) || pl.suffix( "fish", s ) || pl.suffix( "ois", s ) || pl.suffix( "sheep", s ) || pl.suffix( "deer", s ) || pl.suffix( "pox", s ) || pl.suffix( "[A-Z].*ese", s ) || pl.suffix( "itis", s ) ) { # 1. Retain non-inflective user-mapped noun as is. # 2. Retain non-inflective plural as is. result <- s } else if( pl.is.irregular.pl( s ) ) { # 4. Change irregular plurals based on mapping. result <- pl.irregular.pl( s ) } else if( pl.is.irregular.es( s ) ) { # x. From Shevek's result <- pl.inflect( s, "", "es" ) } else if( pl.suffix( "man", s ) ) { # 5. For -man, change -an to -en result <- pl.inflect( s, "an", "en" ) } else if( pl.suffix( "[lm]ouse", s ) ) { # 5. For [lm]ouse, change -ouse to -ice result <- pl.inflect( s, "ouse", "ice" ) } else if( pl.suffix( "tooth", s ) ) { # 5. For -tooth, change -ooth to -eeth result <- pl.inflect( s, "ooth", "eeth" ) } else if( pl.suffix( "goose", s ) ) { # 5. For -goose, change -oose to -eese result <- pl.inflect( s, "oose", "eese" ) } else if( pl.suffix( "foot", s ) ) { # 5. For -foot, change -oot to -eet result <- pl.inflect( s, "oot", "eet" ) } else if( pl.suffix( "zoon", s ) ) { # 5. For -zoon, change -on to -a result <- pl.inflect( s, "on", "a" ) } else if( pl.suffix( "[csx]is", s ) ) { # 5. Change -cis, -sis, -xis to -es result <- pl.inflect( s, "is", "es" ) } else if( pl.suffix( "([cs]h|ss|zz|x|s)", s ) ) { # 8. Change -ch, -sh, -ss, -zz, -x, -s to -es result <- pl.inflect( s, "", "es" ) } else if( pl.suffix( "([aeo]lf|[^d]eaf|arf)", s ) ) { # 9. Change -f to -ves result <- pl.inflect( s, "f", "ves" ) } else if( pl.suffix( "[nlw]ife", s ) ) { # 10. Change -fe to -ves result <- pl.inflect( s, "fe", "ves" ) } else if( pl.suffix( "[aeiou]y", s ) ) { # 11. Change -[aeiou]y to -ys result <- pl.inflect( s, "", "s" ) } else if( pl.suffix( "y", s ) ) { # 12. Change -y to -ies result <- pl.inflect( s, "y", "ies" ) } else if( pl.suffix( "z", s ) ) { # x. Change -z to -zzes result <- pl.inflect( s, "", "zes" ) } else { # 13. Default plural: add -s result <- pl.inflect( s, "", "s" ) } } result } # ----------------------------------------------------------------------------- # Returns the given string (s) with its suffix replaced by r. # ----------------------------------------------------------------------------- pl.inflect <- function( s, suffix, r ) { gsub( paste( suffix, "$", sep="" ), r, s ) } # ----------------------------------------------------------------------------- # Answers whether the given string (s) has the given ending. # ----------------------------------------------------------------------------- pl.suffix <- function( ending, s ) { grepl( paste( ending, "$", sep="" ), s ) } # ----------------------------------------------------------------------------- # Answers whether the given string (s) is a noninflective noun. # ----------------------------------------------------------------------------- pl.noninflective <- function( s ) { v <- c( "aircraft", "Bhutanese", "bison", "bream", "Burmese", "carp", "chassis", "Chinese", "clippers", "cod", "contretemps", "corps", "debris", "djinn", "eland", "elk", "flounder", "fracas", "gallows", "graffiti", "headquarters", "high-jinks", "homework", "hovercraft", "innings", "Japanese", "Lebanese", "mackerel", "means", "mews", "mice", "mumps", "news", "pincers", "pliers", "Portuguese", "proceedings", "salmon", "scissors", "sea-bass", "Senegalese", "shears", "Siamese", "Sinhalese", "spacecraft", "swine", "trout", "tuna", "Vietnamese", "watercraft", "whiting", "wildebeest" ) is.element( s, v ) } # ----------------------------------------------------------------------------- # Answers whether the given string (s) is an irregular plural. # ----------------------------------------------------------------------------- pl.is.irregular.pl <- function( s ) { # Could be refactored with pl.irregular.pl... v <- c( "beef", "brother", "child", "cow", "ephemeris", "genie", "money", "mongoose", "mythos", "octopus", "ox", "soliloquy", "trilby" ) is.element( s, v ) } # ----------------------------------------------------------------------------- # Call to pluralize an irregular noun. Only call after confirming # the noun is irregular via pl.is.irregular.pl. # ----------------------------------------------------------------------------- pl.irregular.pl <- function( s ) { v <- list( "beef" = "beefs", "brother" = "brothers", "child" = "children", "cow" = "cows", "ephemeris" = "ephemerides", "genie" = "genies", "money" = "moneys", "mongoose" = "mongooses", "mythos" = "mythoi", "octopus" = "octopuses", "ox" = "oxen", "passerby" = "passersby", "soliloquy" = "soliloquies", "trilby" = "trilbys" ) # Faster version of v[[ s ]] .subset2( v, s ) } # ----------------------------------------------------------------------------- # Answers whether the given string (s) pluralizes with -es. # ----------------------------------------------------------------------------- pl.is.irregular.es <- function( s ) { v <- c( "acropolis", "aegis", "alias", "asbestos", "bathos", "bias", "bronchitis", "bursitis", "caddis", "cannabis", "canvas", "chaos", "cosmos", "dais", "digitalis", "epidermis", "ethos", "eyas", "gas", "glottis", "hubris", "ibis", "lens", "mantis", "marquis", "metropolis", "pathos", "pelvis", "polis", "rhinoceros", "sassafrass", "trellis" ) is.element( s, v ) }