|
|
@@ -0,0 +1,78 @@
|
|
|
+import re
|
|
|
+
|
|
|
+
|
|
|
+SINGULARS = (
|
|
|
+ (r'ives', 'ife'),
|
|
|
+ (r'ves', 'f'),
|
|
|
+ (r'zes', ''),
|
|
|
+ (r'men', 'man'),
|
|
|
+ (r'matrices', 'matrix'),
|
|
|
+ (r'([aeiou])ys', '\\1y'),
|
|
|
+ (r'([^aeiou])ies', '\\1y'),
|
|
|
+ (r'([^aeiou])ices$','\\1ice'),
|
|
|
+ (r'ices', 'ex'),
|
|
|
+ (r'([sxz]|[cs]h)es$', '\\1'),
|
|
|
+ (r'eet', 'oot'),
|
|
|
+ (r'ies', 'y'),
|
|
|
+ (r'i', 'us'),
|
|
|
+ (r's', ''),
|
|
|
+)
|
|
|
+PLURALS = (
|
|
|
+ (r'man$', 'men'),
|
|
|
+ (r'([aeiou]y)', '\\1s'),
|
|
|
+ (r'([^aeiou])y', '\\1ies'),
|
|
|
+ (r'([sxz]|[cs]h)', '\\1es'),
|
|
|
+ (r'(cello|disco|hairdo|logo|patio|photo|piano|radio|chef|cliff|ref|roof)',
|
|
|
+ '\\1s'),
|
|
|
+ (r'o', 'oes'),
|
|
|
+ (r'fe', 'ves'),
|
|
|
+ (r'eaf', 'eaves'),
|
|
|
+ (r'[ei]x', 'ices'),
|
|
|
+ (r'us', 'i'),
|
|
|
+ (r'', 's'),
|
|
|
+)
|
|
|
+EXCEPTIONS_SINGULAR = {
|
|
|
+ 'person': 'people',
|
|
|
+ 'child': 'children',
|
|
|
+ 'ox': 'oxen',
|
|
|
+ 'foot': 'feet',
|
|
|
+ 'tooth': 'teeth',
|
|
|
+ 'goose': 'geese',
|
|
|
+ 'mouse': 'mice',
|
|
|
+ 'louse': 'lice',
|
|
|
+ 'man': 'men',
|
|
|
+ 'woman': 'women',
|
|
|
+ 'goose': 'geese',
|
|
|
+}
|
|
|
+EXCEPTIONS_PLURAL = dict([p[::-1] for p in EXCEPTIONS_SINGULAR.iteritems()])
|
|
|
+SAME = ('equipment', 'information', 'rice', 'money', 'species', 'series',
|
|
|
+ 'fish', 'sheep', 'deer', 'tuna', 'salmon', 'trout')
|
|
|
+
|
|
|
+
|
|
|
+def apply_rules(word, rules):
|
|
|
+ word = word.strip()
|
|
|
+
|
|
|
+ if word in SAME:
|
|
|
+ return word
|
|
|
+
|
|
|
+ for pattern,replacement in rules:
|
|
|
+ word, n = re.subn(pattern + '$', replacement, word)
|
|
|
+
|
|
|
+ if n:
|
|
|
+ break
|
|
|
+
|
|
|
+ return word
|
|
|
+
|
|
|
+
|
|
|
+def singularize(word):
|
|
|
+ if word in EXCEPTIONS_PLURAL:
|
|
|
+ return EXCEPTIONS_PLURAL[word]
|
|
|
+
|
|
|
+ return apply_rules(word, SINGULARS)
|
|
|
+
|
|
|
+
|
|
|
+def pluralize(word):
|
|
|
+ if word in EXCEPTIONS_SINGULAR:
|
|
|
+ return EXCEPTIONS_SINGULAR[word]
|
|
|
+
|
|
|
+ return apply_rules(word, PLURALS)
|