diff --git a/.gitignore b/.gitignore index 9b48abe..ea2287d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .pytest* -*__pycache__* \ No newline at end of file +*__pycache__* +*.egg* +venv \ No newline at end of file diff --git a/soundchanger/change.py b/soundchanger/change.py index 64cab90..e19126a 100644 --- a/soundchanger/change.py +++ b/soundchanger/change.py @@ -7,23 +7,6 @@ def apply(change, string, categories=None, apply=True): if not apply: return string - # Set up default categories - if not categories: - categories = { - 'V': 'aeiou' - } - - # Prepare change for regex - for k, v in {'{': '[', '}': ']', ',': '', ' ': ''}.items(): - change = change.replace(k, v) - - # Replacements for categories - for k, v in categories.items(): - # Replacement of categories inside brackets (before or after a comma) - change = re.sub(f'((?<=,){k}|{k}(?=,))', v, change) - # Replacements of categories anywhere else - change = change.replace(k, f"[{v}]") - # Check validity of change if change.count('>') != 1: raise ValueError(f"Change {change} is not a valid sound change. (Missing character '>')") @@ -31,17 +14,31 @@ def apply(change, string, categories=None, apply=True): if change.count('/') > 1: raise ValueError(f"Change {change} is not a valid sound change. (More than one '/' character)") + # Prepare change for regex + for k, v in {'{': '(', '}': ')', ',': '|'}.items(): + change = change.replace(k, v) + + # Replacements for categories + for k, v in categories.items(): + # Replacement of categories inside brackets (before or after a comma) + change = re.sub(f'((?<=,){k}|{k}(?=,))', '|'.join(v), change) + # Replacements of categories anywhere else + change = change.replace(k, '('+'|'.join(v)+')') + if '/' in change: change, environment = change.split('/') else: - environment = '._.' + environment = '_' if environment.count('_') != 1: raise ValueError(f"Environment {environment} is not a valid environment. (Character '_' should exist exactly once)") - original, repl = change.split('>') + original, change_to = change.split('>') before, after = environment.split('_') - pattern = f"(?<={before}){original}(?={after})" - print(pattern) + n = before.count('(') + original.count('(') + 3 + pattern = f"({before})({original})({after})" + repl = f"\\1{change_to}\\{n}" + return re.sub(pattern, repl, f"#{string}#").strip('#') + diff --git a/tests/test_change.py b/tests/test_change.py index 0821e04..8d594ad 100644 --- a/tests/test_change.py +++ b/tests/test_change.py @@ -23,3 +23,14 @@ def test_apply_with_complex_group_and_categories(): 'P': 'ptk', 'N': 'mn', }) == 'pani' + +def test_apply_with_complex_group_and_categories_and_digraphs(): + assert apply('u>o/#V{ts,pf,t}_k{V,e,o}#', 'atsuka', categories={ + 'V': ['a', 'i', 'u'] + }) == 'atsoka' + assert apply('u>o/#V{ts,pf,t}_k{V,e,o}#', 'atuka', categories={ + 'V': ['a', 'i', 'u'] + }) == 'atoka' + assert apply('u>o/#V{ts,pf,t}_k{V,e,o}#', 'matsuka', categories={ + 'V': ['a', 'i', 'u'] + }) == 'matsuka'