فهرست منبع

Added English inflector.

Taddeus Kroes 13 سال پیش
والد
کامیت
6e90f2f160
2فایلهای تغییر یافته به همراه107 افزوده شده و 0 حذف شده
  1. 78 0
      singplur.py
  2. 29 0
      tests/test_singplur.py

+ 78 - 0
singplur.py

@@ -0,0 +1,78 @@
+import re
+
+
+SINGULARS = (
+    (r'ives', 'ife'),
+    (r'ves', 'f'),
+    (r'zes', ''),
+    (r'men', 'man'),
+    (r'matrices', 'matrix'),
+    (r'([aeiou])ys', '\\1y'),
+    (r'([^aeiou])ies', '\\1y'),
+    (r'([^aeiou])ices$','\\1ice'),
+    (r'ices', 'ex'),
+    (r'([sxz]|[cs]h)es$', '\\1'),
+    (r'eet', 'oot'),
+    (r'ies', 'y'),
+    (r'i', 'us'),
+    (r's', ''),
+)
+PLURALS = (
+    (r'man$', 'men'),
+    (r'([aeiou]y)', '\\1s'),
+    (r'([^aeiou])y', '\\1ies'),
+    (r'([sxz]|[cs]h)', '\\1es'),
+    (r'(cello|disco|hairdo|logo|patio|photo|piano|radio|chef|cliff|ref|roof)',
+     '\\1s'),
+    (r'o', 'oes'),
+    (r'fe', 'ves'),
+    (r'eaf', 'eaves'),
+    (r'[ei]x', 'ices'),
+    (r'us', 'i'),
+    (r'', 's'),
+)
+EXCEPTIONS_SINGULAR = {
+    'person': 'people',
+    'child': 'children',
+    'ox': 'oxen',
+    'foot': 'feet',
+    'tooth': 'teeth',
+    'goose': 'geese',
+    'mouse': 'mice',
+    'louse': 'lice',
+    'man': 'men',
+    'woman': 'women',
+    'goose': 'geese',
+}
+EXCEPTIONS_PLURAL = dict([p[::-1] for p in EXCEPTIONS_SINGULAR.iteritems()])
+SAME = ('equipment', 'information', 'rice', 'money', 'species', 'series',
+        'fish', 'sheep', 'deer', 'tuna', 'salmon', 'trout')
+
+
+def apply_rules(word, rules):
+    word = word.strip()
+
+    if word in SAME:
+        return word
+
+    for pattern,replacement in rules:
+        word, n =  re.subn(pattern + '$', replacement, word)
+
+        if n:
+            break
+
+    return word
+
+
+def singularize(word):
+    if word in EXCEPTIONS_PLURAL:
+        return EXCEPTIONS_PLURAL[word]
+
+    return apply_rules(word, SINGULARS)
+
+
+def pluralize(word):
+    if word in EXCEPTIONS_SINGULAR:
+        return EXCEPTIONS_SINGULAR[word]
+
+    return apply_rules(word, PLURALS)

+ 29 - 0
tests/test_singplur.py

@@ -0,0 +1,29 @@
+from unittest import TestCase
+from singplur import singularize, pluralize
+
+
+class SingplurTest(TestCase):
+    pairs = (
+        ('branch', 'branches'),
+        ('payment', 'payments'),
+        ('order', 'orders'),
+        ('party', 'parties'),
+        ('knife', 'knives'),
+        ('roof', 'roofs'),
+        ('leaf', 'leaves'),
+        ('box', 'boxes'),
+        ('sheep', 'sheep'),
+        ('salmon', 'salmon'),
+        ('person', 'people'),
+        ('shoe', 'shoes'),
+        ('foot', 'feet'),
+        ('dice', 'dices'),
+    )
+
+    def test_singularize(self):
+        for singular, plural in self.pairs:
+            self.assertEqual(singularize(plural), singular)
+
+    def test_pluralize(self):
+        for singular, plural in self.pairs:
+            self.assertEqual(pluralize(singular), plural)