Dave Jarvis' Repositories

Author	DaveJarvis <email>
Date	2022-11-06 15:40:34 GMT-0800
Commit	e68ea19bc0a12f39b5a47e7b431fdf4b2eaade46
Parent	6e94bb2
Delta	577 lines added, 341 lines removed, 236-line increase

R/pluralize.R


		.uninflected_nouns <- c(
		- "bison", "flounder", "pliers", "bream", "gallows", "proceedings", "breeches",
		- "graffiti", "rabies", "britches", "headquarters", "salmon", "carp", "herpes",
		- "scissors", "chassis", "high-jinks", "sea-bass", "clippers", "homework",
		- "series", "cod", "innings", "shears", "contretemps", "jackanapes", "species",
		- "corps", "mackerel", "swine", "debris", "measles", "trout", "diabetes",
		- "mews", "tuna", "djinn", "mumps", "whiting", "eland", "news", "wildebeest",
		- "elk", "pincers"
		-)
		-
		-.singular_nouns <- c(
		- "acropolis", "chaos", "lens", "aegis", "cosmos", "mantis", "alias", "dais",
		- "marquis", "asbestos", "digitalis", "metropolis", "atlas", "epidermis",
		- "pathos", "bathos", "ethos", "pelvis", "bias", "gas", "polis", "caddis",
		- "glottis", "rhinoceros", "cannabis", "glottis", "sassafras", "canvas", "ibis",
		- "trellis"
		-)
		-
		-.irregular_patterns <- c(
		- "fish$", "ois$", "-sheep$", "deer$", "pox$", "[A-Z].*ese$", "itis$"
		-)
		-
		-.prepositions <- c(
		- "about", "before", "during", "of", "till", "above", "behind", "except", "off",
		- "to", "across", "below", "for", "on", "under", "after", "beneath", "from",
		- "onto", "until", "among", "beside", "in", "out", "unto", "around", "besides",
		- "into", "over", "upon", "at", "between", "near", "since", "with", "athwart",
		- "betwixt", "beyond", "but", "by"
		-)
		-
		-# -----------------------------------------------------------------------------
		-# Rule 3
		-#
		-# Handle pronouns in the nominative, accusative, and dative and propositional
		-# phrases.
		-# -----------------------------------------------------------------------------
		-pluralize_pronoun <- function( word ) {
		- as.vector( .pluralized_pronouns[word] )
		-}
		-
		-.pluralized_pronouns <- c(
		- "I" = "we", "me" = "us", "myself" = "ourselves",
		- "you" = "you", "thou" = "ye", "thee" = "ye", "yourself" = "yourself",
		- "thyself" = "yourself",
		- "she" = "they", "he" = "they", "it" = "they", "they" = "they",
		- "her" = "them", "him" = "them", "it" = "them", "them" = "them",
		- "herself" = "themselves", "himself" = "themselves", "itself" = "themselves",
		- "themself" = "themselves", "oneself" = "oneselves"
		-)
		-
		-# -----------------------------------------------------------------------------
		-# Rule 4
		-#
		-# Change irregular plurals based on mapping.
		-# -----------------------------------------------------------------------------
		-pluralize_irregular <- function( word, method = c( "ac", "ca", "a", "c" ) ) {
		- method <- match.arg( method )
		- plurals <- .irregular_nouns[word]
		-
		- extract_plural <- function( plurals ) {
		- if( is.null( plurals ) ) {
		- return( NA_character_ )
		- }
		-
		- return( switch(
		- method,
		- "a" = plurals["a"],
		- "c" = plurals["c"],
		- "ac" = if.na( plurals["a"], plurals["c"] ),
		- "ca" = if.na( plurals["c"], plurals["a"] )
		- ) )
		- }
		-
		- as.character( lapply( plurals, extract_plural ) )
		-}
		-
		-.irregular_nouns <- list(
		- "beef" = c( "a" = "beefs", "c" = "beeves" ),
		- "brother" = c( "a" = "brothers", "c" = "brethren" ),
		- "child" = c( "a" = NA_character_, "c" = "children" ),
		- "cow" = c( "a" = "cows", "c" = "kine" ),
		- "ephemeris" = c( "a" = NA_character_, "c" = "ephemerides" ),
		- "genie" = c( "a" = "genies", "c" = "genii" ),
		- "money" = c( "a" = "moneys", "c" = "monies" ),
		- "mongoose" = c( "a" = "mongooses", "c" = NA_character_ ),
		- "mythos" = c( "a" = NA_character_, "c" = "mythoi" ),
		- "octopus" = c( "a" = "octopuses", "c" = "octopodes" ),
		- "ox" = c( "a" = NA_character_, "c" = "oxen" ),
		- "soliloquy" = c( "a" = "soliloquies", "c" = NA_character_ ),
		- "trilby" = c( "a" = "trilbys", "c" = NA_character_ )
		-)
		-
		-# -----------------------------------------------------------------------------
		-# Rule 5
		-#
		-# Handle irregular inflections for common suffixes.
		-# -----------------------------------------------------------------------------
		-pluralize_irregular_inflection_for_common_suffixes <- function( word ) {
		- output <- sub( "man$", "men", word )
		- output <- sub( "([ml])(ouse)$", "\\1ice", output )
		- output <- sub( "tooth$", "teeth", output )
		- output <- sub( "goose$", "geese", output )
		- output <- sub( "foot$", "feet", output )
		- output <- sub( "zoon$", "zoa", output )
		- output <- sub( "([csx])(is)$", "\\1es", output )
		-
		- ifelse( output == word, NA_character_, output )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 6
		-#
		-# Handle fully assimilated classical inflections.
		-# -----------------------------------------------------------------------------
		-pluralize_fully_assimilated_classical_inflections <- function( word ) {
		- output <- replace_suffix(
		- word, "", "e", c( "alumna", "alga", "vertebra" ) )
		- output <- replace_suffix(
		- output, "ex", "ices", c( "codex", "murex", "silex" ) )
		- output <- replace_suffix(
		- output, "on", "a", c(
		- "aphelion", "hyperbaton", "perihelion", "asyndeton", "noumenon",
		- "phenomenon", "criterion", "organon", "prolegomenon"
		- )
		- )
		- output <- replace_suffix(
		- output, "um", "a", c(
		- "agendum", "datum", "extremum", "bacterium", "desideratum", "stratum",
		- "candelabrum", "erratum", "ovum"
		- )
		- )
		-
		- ifelse( output == word, NA_character_, output )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 7
		-#
		-# Classical variants of modern inflections (e.g., stigmata, soprani).
		-#
		-# See tables A.11 to A.13, A.15, A.16, A.18, A.21 to A.25.
		-# -----------------------------------------------------------------------------
		-pluralize_classical_variants_of_modern_inflections <- function(
		- word, method = c( "ac", "ca", "a", "c" ) ) {
		- method <- match.arg( method )
		-
		- # -a to -as ( anglicized ) or -ae ( classical )
		- a11 <- c(
		- "abscissa", "formula", "medusa", "amoeba", "hydra", "nebula", "antenna",
		- "hyperbola", "nova", "aurora", "lacuna", "parabola"
		- )
		-
		- # Table A.12: -a to -as ( anglicized ) or -ata ( classical )
		- a12 <- c(
		- "anathema", "enema", "oedema", "bema", "enigma", "sarcoma", "carcinoma",
		- "gumma", "schema", "charisma", "lemma", "soma", "diploma", "lymphoma",
		- "stigma", "dogma", "magma", "stoma", "drama", "melisma", "trauma", "edema",
		- "miasma"
		- )
		-
		- # Table A.13: -en to -ens ( anglicized ) or -ina ( classical )
		- a13 <- c( "stamen", "foramen", "lumen" )
		-
		- # Table A.15: -ex to -exes ( anglicized ) or -ices ( classical )
		- a15 <- c(
		- "apex", "latex", "vertex", "cortex", "pontifex", "vortex", "index",
		- "simplex"
		- )
		-
		- # Table A.16: -is to -ises ( anglicized ) or -ides ( classical )
		- a16 <- c( "iris", "clitoris" )
		-
		- # Table A.18: -o to -os ( anglicized ) or -i ( classical )
		- a18 <- c(
		- "alto", "contralto", "soprano", "basso", "crescendo", "tempo", "canto",
		- "solo"
		- )
		-
		- # Table A.21: -um to -ums ( anglicized ) or -a ( classical )
		- a21 <- c(
		- "aquarium", "interregnum", "quantum", "compendium", "lustrum", "rostrum",
		- "consortium", "maximum", "spectrum", "cranium", "medium", "speculum",
		- "curriculum", "memorandum", "stadium", "dictum", "millenium", "trapezium",
		- "emporium", "minimum", "ultimatum", "enconium", "momentum", "vacuum",
		- "gymnasium", "optimum", "velum", "honorarium", "phylum"
		- )
		-
		- # Table A.22: -us to -uses ( anglicized ) or -i ( classical )
		- a22 <- c(
		- "focus", "nimbus", "succubus", "fungus", "nucleolus", "torus", "genius",
		- "radius", "umbilicus", "incubus", "stylus", "uterus"
		- )
		-
		- # Table A.23: -us to -uses ( anglicized ) or -us ( classical )
		- a23 <- c(
		- "apparatus", "impetus", "prospectus", "cantus", "nexus", "sinus", "coitus",
		- "plexus", "status", "hiatus"
		- )
		-
		- output <- replace_suffix( word, "", "im", c( "cherub", "goy", "seraph" ) )
		- output <- replace_suffix( output, "", "i", c( "afreet", "afrit", "efreet" ) )
		-
		- if( method %in% c( "a", "ac" ) ) {
		- output <- replace_suffix( output, "us", "uses", a23 )
		- output <- replace_suffix( output, "us", "uses", a22 )
		- output <- replace_suffix( output, "um", "ums", a21 )
		- output <- replace_suffix( output, "o", "os", a18 )
		- output <- replace_suffix( output, "is", "ises", a16 )
		- output <- replace_suffix( output, "ex", "exes", a15 )
		- output <- replace_suffix( output, "en", "ens", a13 )
		- output <- replace_suffix( output, "a", "as", a12 )
		- output <- replace_suffix( output, "a", "as", a11 )
		- } else {
		- output <- replace_suffix( output, "us", "us", a23 )
		- output <- replace_suffix( output, "us", "i", a22 )
		- output <- replace_suffix( output, "um", "a", a21 )
		- output <- replace_suffix( output, "o", "i", a18 )
		- output <- replace_suffix( output, "is", "ides", a16 )
		- output <- replace_suffix( output, "ex", "ices", a15 )
		- output <- replace_suffix( output, "en", "ina", a13 )
		- output <- replace_suffix( output, "a", "ata", a12 )
		- output <- replace_suffix( output, "a", "ae", a11 )
		- }
		-
		- ifelse(
		- output == word & ( method %in% c( "a", "ac" ) \| !word %in% a23 ),
		- NA_character_,
		- output
		- )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 8
		-#
		-# The suffixes -ch, -sh, and -ss all take -es in the plural (e.g., churches,
		-# classes).
		-# -----------------------------------------------------------------------------
		-pluralize_ch_sh_ss_suffixes <- function( word ) {
		- output <- sub( "([cs]h)$", "\\1es", word )
		- output <- replace_suffix( output, "ss", "sses" )
		-
		- ifelse( output == word, NA_character_, output )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 9
		-#
		-# Certain words ending in -f or -fe take -ves in the plural.
		-# -----------------------------------------------------------------------------
		-pluralize_f_and_fe_suffix <- function( word ) {
		- output <- sub( "([aeo]l\|[^d]ea\|ar)f$", "\\1ves", word )
		- output <- sub( "([nlw]i)fe$", "\\1ves", output )
		-
		- ifelse( output == word, NA_character_, output )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 10
		-#
		-# Words ending in -y take -ies.
		-# -----------------------------------------------------------------------------
		-pluralize_y_suffix <- function( word ) {
		- output <- sub( "([aeiou]y)$", "\\1s", word )
		- output <- sub( "([A-Z].*y)$", "\\1s", output )
		- output <- replace_suffix( output, "y", "ies" )
		-
		- ifelse( output == word, NA_character_, output )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 11
		-#
		-# Some words ending in -o take -os (lassos, solos). See tables A.17 and A.18.
		-# Others take -oes (potatoes, dominoes).
		-# When -o is preceded by a vowel always take -os (folios, bamboos).
		-# -----------------------------------------------------------------------------
		-pluralize_o_suffix <- function( word, method = c( "ac", "ca", "a", "c" ) ) {
		- method <- match.arg( method )
		-
		- # Table A.17: -o to -os
		- a17 <- c(
		- "albino", "generalissimo", "manifesto", "archipelago", "ghetto", "medico",
		- "armadillo", "guano", "octavo", "commando", "inferno", "photo", "ditto",
		- "jumbo", "pro", "dynamo", "lingo", "quarto", "embryo", "lumbago", "rhino",
		- "fiasco", "magneto", "stylo"
		- )
		-
		- # Table A.18: -o to -os (anglicized) or -i (classical)
		- a18 <- c(
		- "alto", "contralto", "soprano", "basso", "crescendo", "tempo", "canto",
		- "solo"
		- )
		-
		- output <- replace_suffix( word, "o", "os", a17 )
		- replacement <- if( method %in% c( "c", "ca" ) ) "i" else "os"
		- output <- replace_suffix( output, "o", replacement, a18 )
		-
		- ifelse( output == word, NA_character_, output )
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 12
		-#
		-# Compound word pluralization.
		-# -----------------------------------------------------------------------------
		-pluralize_compound_words <- function(
		- word, method = c( "ac", "ca", "a", "c" ) ) {
		- method <- match.arg( method )
		- military <- c(
		- "Adjutant", "Lieutenant", "Quartermaster", "Brigadier", "Major"
		- )
		-
		- pluralize_cw <- Vectorize(
		- function( cw, seps ) {
		- if( cw[length( cw )] %in% c( "General", "general" ) &&
		- (!cw[length( cw )] %in% military) ) {
		- cw[1] <- pluralize( cw[1], method )
		- } else {
		- cw[1] <- pluralize( cw[1], method )
		- }
		-
		- paste( paste0( seps, cw ), collapse = "" )
		- }
		- )
		-
		- parts <- strsplit( word, "[- ]+" )
		- seps <- strsplit( word, "[^ -]+" )
		- is_compound <- grepl( "[- ]", word )
		- output <- word
		- output[!is_compound] <- NA_character_
		- output[is_compound] <- pluralize_cw( parts[is_compound], seps[is_compound] )
		-
		- output
		-}
		-
		-# -----------------------------------------------------------------------------
		-# Rule 13
		-#
		-# Otherwise add -s.
		-# -----------------------------------------------------------------------------
		-pluralize_regular <- function( word ) {
		- paste0( word, "s" )
		+ "adonis",
		+ "anis",
		+ "bison",
		+ "bream",
		+ "breeches",
		+ "britches",
		+ "carp",
		+ "chassis",
		+ "clippers",
		+ "cod",
		+ "contretemps",
		+ "corps",
		+ "debris",
		+ "diabetes",
		+ "djinn",
		+ "eland",
		+ "elk",
		+ "flounder",
		+ "gallows",
		+ "graffiti",
		+ "headquarters",
		+ "herpes",
		+ "high-jinks",
		+ "homework",
		+ "innings",
		+ "jackanapes",
		+ "mackerel",
		+ "measles",
		+ "mews",
		+ "mumps",
		+ "news",
		+ "pants",
		+ "physics",
		+ "pincers",
		+ "pliers",
		+ "proceedings",
		+ "rabies",
		+ "salmon",
		+ "scissors",
		+ "sea-bass",
		+ "series",
		+ "shears",
		+ "species",
		+ "swine",
		+ "trout",
		+ "tuna",
		+ "whiting",
		+ "wildebeest",
		+)
		+
		+.singular_nouns <- c(
		+ "bathos",
		+ "caddis",
		+ "cannabis",
		+ "dais",
		+ "digitalis",
		+ "ethos",
		+ "glottis",
		+ "marquis",
		+ "pathos",
		+ "polis"
		+)
		+
		+.irregular_patterns <- c(
		+ "fish$", "ois$", "-sheep$", "deer$", "pox$", "[A-Z].*ese$", "itis$"
		+)
		+
		+.prepositions <- c(
		+ "about",
		+ "above",
		+ "across",
		+ "after",
		+ "among",
		+ "around",
		+ "at",
		+ "athwart",
		+ "before",
		+ "behind",
		+ "below",
		+ "beneath",
		+ "beside",
		+ "besides",
		+ "between",
		+ "betwixt",
		+ "beyond",
		+ "but",
		+ "by",
		+ "during",
		+ "except",
		+ "for",
		+ "from",
		+ "in",
		+ "into",
		+ "near",
		+ "of",
		+ "off",
		+ "on",
		+ "onto",
		+ "out",
		+ "over",
		+ "since",
		+ "till",
		+ "to",
		+ "under",
		+ "until",
		+ "unto",
		+ "upon",
		+ "with"
		+)
		+
		+# -----------------------------------------------------------------------------
		+# Rule 3
		+#
		+# Handle pronouns in the nominative, accusative, and dative and propositional
		+# phrases.
		+# -----------------------------------------------------------------------------
		+pluralize_pronoun <- function( word ) {
		+ as.vector( .pluralized_pronouns[word] )
		+}
		+
		+.pluralized_pronouns <- c(
		+ "I" = "we",
		+ "me" = "us",
		+ "myself" = "ourselves",
		+
		+ "you" = "you",
		+ "thou" = "ye",
		+ "thee" = "ye",
		+ "yourself" = "yourself",
		+ "thyself" = "yourself",
		+
		+ "she" = "they",
		+ "he" = "they",
		+ "it" = "they",
		+ "they" = "they",
		+
		+ "her" = "them",
		+ "him" = "them",
		+ "it" = "them",
		+ "them" = "them",
		+
		+ "herself" = "themselves",
		+ "himself" = "themselves",
		+ "itself" = "themselves",
		+
		+ "themself" = "themselves",
		+ "oneself" = "oneselves"
		+)
		+
		+# -----------------------------------------------------------------------------
		+# Rule 4
		+#
		+# Change irregular plurals based on mapping.
		+# -----------------------------------------------------------------------------
		+pluralize_irregular <- function( word, method = c( "ac", "ca", "a", "c" ) ) {
		+ method <- match.arg( method )
		+ plurals <- .irregular_nouns[word]
		+
		+ extract_plural <- function( plurals ) {
		+ if( is.null( plurals ) ) {
		+ return( NA_character_ )
		+ }
		+
		+ return(
		+ switch(
		+ method,
		+ "a" = plurals["a"],
		+ "c" = plurals["c"],
		+ "ac" = if.na( plurals["a"], plurals["c"] ),
		+ "ca" = if.na( plurals["c"], plurals["a"] )
		+ )
		+ )
		+ }
		+
		+ as.character( lapply( plurals, extract_plural ) )
		+}
		+
		+.irregular_nouns <- list(
		+ "beef" = c( "a" = "beefs", "c" = "beeves" ),
		+ "brother" = c( "a" = "brothers", "c" = "brethren" ),
		+ "child" = c( "a" = NA_character_, "c" = "children" ),
		+ "cow" = c( "a" = "cows", "c" = "kine" ),
		+ "ephemeris" = c( "a" = NA_character_, "c" = "ephemerides" ),
		+ "genie" = c( "a" = "genies", "c" = "genii" ),
		+ "money" = c( "a" = "moneys", "c" = "monies" ),
		+ "mongoose" = c( "a" = "mongooses", "c" = NA_character_ ),
		+ "mythos" = c( "a" = NA_character_, "c" = "mythoi" ),
		+ "octopus" = c( "a" = "octopuses", "c" = "octopodes" ),
		+ "ox" = c( "a" = NA_character_, "c" = "oxen" ),
		+ "soliloquy" = c( "a" = "soliloquies", "c" = NA_character_ ),
		+ "trilby" = c( "a" = "trilbys", "c" = NA_character_ )
		+)
		+
		+# -----------------------------------------------------------------------------
		+# Rule 5
		+#
		+# Handle irregular inflections for common suffixes.
		+# -----------------------------------------------------------------------------
		+pluralize_irregular_inflection_for_common_suffixes <- function( word ) {
		+ output <- sub( "man$", "men", word )
		+ output <- sub( "([ml])(ouse)$", "\\1ice", output )
		+ output <- sub( "tooth$", "teeth", output )
		+ output <- sub( "goose$", "geese", output )
		+ output <- sub( "foot$", "feet", output )
		+ output <- sub( "zoon$", "zoa", output )
		+ output <- sub( "([csx])(is)$", "\\1es", output )
		+
		+ ifelse( output == word, NA_character_, output )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 6
		+#
		+# Handle fully assimilated classical inflections.
		+# -----------------------------------------------------------------------------
		+pluralize_fully_assimilated_classical_inflections <- function( word ) {
		+ output <- replace_suffix(
		+ word, "", "e", c( "alumna", "alga", "vertebra" ) )
		+ output <- replace_suffix(
		+ output, "ex", "ices", c( "codex", "murex", "silex" ) )
		+ output <- replace_suffix(
		+ output, "on", "a", c(
		+ "aphelion",
		+ "asyndeton",
		+ "criterion",
		+ "hyperbaton",
		+ "noumenon",
		+ "organon",
		+ "perihelion",
		+ "phenomenon",
		+ "prolegomenon"
		+ )
		+ )
		+ output <- replace_suffix(
		+ output, "um", "a", c(
		+ "agendum",
		+ "bacterium",
		+ "candelabrum",
		+ "datum",
		+ "desideratum",
		+ "erratum",
		+ "extremum",
		+ "ovum",
		+ "stratum"
		+ )
		+ )
		+
		+ ifelse( output == word, NA_character_, output )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 7
		+#
		+# Classical variants of modern inflections (e.g., stigmata, soprani).
		+#
		+# See tables A.11 to A.13, A.15, A.16, A.18, A.21 to A.25.
		+# -----------------------------------------------------------------------------
		+pluralize_classical_variants_of_modern_inflections <- function(
		+ word, method = c( "ac", "ca", "a", "c" ) ) {
		+ method <- match.arg( method )
		+
		+ # -a to -as (anglicized) or -ae (classical)
		+ a11 <- c(
		+ "abscissa",
		+ "amoeba",
		+ "antenna",
		+ "aurora",
		+ "formula",
		+ "hydra",
		+ "hyperbola",
		+ "lacuna",
		+ "medusa",
		+ "nebula",
		+ "nova",
		+ "parabola"
		+ )
		+
		+ # Table A.12: -a to -as (anglicized) or -ata (classical)
		+ a12 <- c(
		+ "anathema",
		+ "bema",
		+ "carcinoma",
		+ "charisma",
		+ "diploma",
		+ "dogma",
		+ "drama",
		+ "edema",
		+ "enema",
		+ "enigma",
		+ "gumma",
		+ "lemma",
		+ "lymphoma",
		+ "magma",
		+ "melisma",
		+ "miasma",
		+ "oedema",
		+ "sarcoma",
		+ "schema",
		+ "soma",
		+ "stigma",
		+ "stoma",
		+ "trauma"
		+ )
		+
		+ # Table A.13: -en to -ens (anglicized) or -ina (classical)
		+ a13 <- c( "stamen", "foramen", "lumen" )
		+
		+ # Table A.15: -ex to -exes (anglicized) or -ices (classical)
		+ a15 <- c(
		+ "apex",
		+ "cortex",
		+ "index",
		+ "latex",
		+ "pontifex",
		+ "simplex",
		+ "vertex",
		+ "vortex"
		+ )
		+
		+ # Table A.16: -is to -ises (anglicized) or -ides (classical)
		+ a16 <- c( "iris", "clitoris" )
		+
		+ # Table A.18: -o to -os (anglicized) or -i (classical)
		+ a18 <- c(
		+ "alto",
		+ "basso",
		+ "canto",
		+ "contralto",
		+ "crescendo",
		+ "solo",
		+ "soprano",
		+ "tempo"
		+ )
		+
		+ # Table A.21: -um to -ums (anglicized) or -a (classical)
		+ a21 <- c(
		+ "aquarium",
		+ "compendium",
		+ "consortium",
		+ "cranium",
		+ "curriculum",
		+ "dictum",
		+ "emporium",
		+ "enconium",
		+ "gymnasium",
		+ "honorarium",
		+ "interregnum",
		+ "lustrum",
		+ "maximum",
		+ "medium",
		+ "memorandum",
		+ "millenium",
		+ "minimum",
		+ "momentum",
		+ "optimum",
		+ "phylum",
		+ "quantum",
		+ "rostrum",
		+ "spectrum",
		+ "speculum",
		+ "stadium",
		+ "trapezium",
		+ "ultimatum",
		+ "vacuum",
		+ "velum"
		+ )
		+
		+ # Table A.22: -us to -uses (anglicized) or -i (classical)
		+ a22 <- c(
		+ "focus",
		+ "fungus",
		+ "genius",
		+ "incubus",
		+ "nimbus",
		+ "nucleolus",
		+ "radius",
		+ "stylus",
		+ "succubus",
		+ "torus",
		+ "umbilicus",
		+ "uterus"
		+ )
		+
		+ # Table A.23: -us to -uses (anglicized) or -us (classical)
		+ a23 <- c(
		+ "apparatus",
		+ "cantus",
		+ "coitus",
		+ "hiatus",
		+ "impetus",
		+ "nexus",
		+ "plexus",
		+ "prospectus",
		+ "sinus",
		+ "status"
		+ )
		+
		+ output <- replace_suffix( word, "", "im", c( "cherub", "goy", "seraph" ) )
		+ output <- replace_suffix( output, "", "i", c( "afreet", "afrit", "efreet" ) )
		+
		+ if( method %in% c( "a", "ac" ) ) {
		+ output <- replace_suffix( output, "us", "uses", a23 )
		+ output <- replace_suffix( output, "us", "uses", a22 )
		+ output <- replace_suffix( output, "um", "ums", a21 )
		+ output <- replace_suffix( output, "o", "os", a18 )
		+ output <- replace_suffix( output, "is", "ises", a16 )
		+ output <- replace_suffix( output, "ex", "exes", a15 )
		+ output <- replace_suffix( output, "en", "ens", a13 )
		+ output <- replace_suffix( output, "a", "as", a12 )
		+ output <- replace_suffix( output, "a", "as", a11 )
		+ } else {
		+ output <- replace_suffix( output, "us", "us", a23 )
		+ output <- replace_suffix( output, "us", "i", a22 )
		+ output <- replace_suffix( output, "um", "a", a21 )
		+ output <- replace_suffix( output, "o", "i", a18 )
		+ output <- replace_suffix( output, "is", "ides", a16 )
		+ output <- replace_suffix( output, "ex", "ices", a15 )
		+ output <- replace_suffix( output, "en", "ina", a13 )
		+ output <- replace_suffix( output, "a", "ata", a12 )
		+ output <- replace_suffix( output, "a", "ae", a11 )
		+ }
		+
		+ ifelse(
		+ output == word & (method %in% c( "a", "ac" ) \| !word %in% a23),
		+ NA_character_,
		+ output
		+ )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 8
		+#
		+# The suffixes -ch, -sh, and -ss all take -es in the plural (e.g., churches,
		+# classes).
		+# -----------------------------------------------------------------------------
		+pluralize_ch_sh_ss_suffixes <- function( word ) {
		+ output <- sub( "([cs]h)$", "\\1es", word )
		+ output <- replace_suffix( output, "ss", "sses" )
		+
		+ ifelse( output == word, NA_character_, output )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 9
		+#
		+# Certain words ending in -f or -fe take -ves in the plural.
		+# -----------------------------------------------------------------------------
		+pluralize_f_and_fe_suffix <- function( word ) {
		+ output <- sub( "([aeo]l\|[^d]ea\|ar)f$", "\\1ves", word )
		+ output <- sub( "([nlw]i)fe$", "\\1ves", output )
		+
		+ ifelse( output == word, NA_character_, output )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 10
		+#
		+# Words ending in -y take -ies.
		+# -----------------------------------------------------------------------------
		+pluralize_y_suffix <- function( word ) {
		+ output <- sub( "([aeiou]y)$", "\\1s", word )
		+ output <- sub( "([A-Z].*y)$", "\\1s", output )
		+ output <- replace_suffix( output, "y", "ies" )
		+
		+ ifelse( output == word, NA_character_, output )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 11
		+#
		+# Some words ending in -o take -os (lassos, solos). See tables A.17 and A.18.
		+# Others take -oes (potatoes, dominoes).
		+# When -o is preceded by a vowel always take -os (folios, bamboos).
		+# -----------------------------------------------------------------------------
		+pluralize_o_suffix <- function( word, method = c( "ac", "ca", "a", "c" ) ) {
		+ method <- match.arg( method )
		+
		+ # Table A.17: -o to -os
		+ a17 <- c(
		+ "albino",
		+ "archipelago",
		+ "armadillo",
		+ "commando",
		+ "ditto",
		+ "dynamo",
		+ "embryo",
		+ "fiasco",
		+ "generalissimo",
		+ "ghetto",
		+ "guano",
		+ "inferno",
		+ "jumbo",
		+ "lingo",
		+ "lumbago",
		+ "magneto",
		+ "manifesto",
		+ "medico",
		+ "octavo",
		+ "photo",
		+ "pro",
		+ "quarto",
		+ "rhino",
		+ "stylo"
		+ )
		+
		+ # Table A.18: -o to -os (anglicized) or -i (classical)
		+ a18 <- c(
		+ "alto",
		+ "basso",
		+ "canto",
		+ "contralto",
		+ "crescendo",
		+ "solo",
		+ "soprano",
		+ "tempo"
		+ )
		+
		+ output <- replace_suffix( word, "o", "os", a17 )
		+ replacement <- if( method %in% c( "c", "ca" ) ) "i" else "os"
		+ output <- replace_suffix( output, "o", replacement, a18 )
		+
		+ ifelse( output == word, NA_character_, output )
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 12
		+#
		+# Compound word pluralization.
		+# -----------------------------------------------------------------------------
		+pluralize_compound_words <- function(
		+ word, method = c( "ac", "ca", "a", "c" ) ) {
		+ method <- match.arg( method )
		+ military <- c(
		+ "Adjutant",
		+ "Brigadier",
		+ "Lieutenant",
		+ "Major"
		+ "Quartermaster"
		+ )
		+
		+ pluralize_cw <- Vectorize(
		+ function( cw, seps ) {
		+ if( cw[length( cw )] %in% c( "General", "general" ) &&
		+ (!cw[length( cw )] %in% military) ) {
		+ cw[1] <- pluralize( cw[1], method )
		+ } else {
		+ cw[1] <- pluralize( cw[1], method )
		+ }
		+
		+ paste( paste0( seps, cw ), collapse = "" )
		+ }
		+ )
		+
		+ parts <- strsplit( word, "[- ]+" )
		+ seps <- strsplit( word, "[^ -]+" )
		+ is_compound <- grepl( "[- ]", word )
		+ output <- word
		+ output[!is_compound] <- NA_character_
		+ output[is_compound] <- pluralize_cw( parts[is_compound], seps[is_compound] )
		+
		+ output
		+}
		+
		+# -----------------------------------------------------------------------------
		+# Rule 13
		+#
		+# Otherwise add -es if ending in -s; otherwise, append -s (e.g., tennis,
		+# lychnis, penis, and other singular forms).
		+# -----------------------------------------------------------------------------
		+pluralize_regular <- function( word ) {
		+ ending <- 's'
		+
		+ if( endsWith( word, ending ) ) {
		+ ending <- "es"
		+ }
		+
		+ paste0( word, ending )
		}

Fix pluralization of some words