From 418a318b29f31749f61c5d57dfae726459a39aaf Mon Sep 17 00:00:00 2001 From: Patrick Date: Sun, 29 Jan 2023 21:34:47 +0900 Subject: [PATCH] Initial implementation of regex sound changer --- .gitignore | 3 ++- soundchanger.py | 32 ------------------------- soundchanger/__init__.py | 0 soundchanger/change.py | 47 +++++++++++++++++++++++++++++++++++++ tests/test_apply.py | 24 ------------------- tests/test_change.py | 25 ++++++++++++++++++++ tests/test_change_errors.py | 15 ++++++++++++ 7 files changed, 89 insertions(+), 57 deletions(-) delete mode 100644 soundchanger.py create mode 100644 soundchanger/__init__.py create mode 100644 soundchanger/change.py delete mode 100644 tests/test_apply.py create mode 100644 tests/test_change.py create mode 100644 tests/test_change_errors.py diff --git a/.gitignore b/.gitignore index b0f8a61..9b48abe 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -.pytest* \ No newline at end of file +.pytest* +*__pycache__* \ No newline at end of file diff --git a/soundchanger.py b/soundchanger.py deleted file mode 100644 index add1d02..0000000 --- a/soundchanger.py +++ /dev/null @@ -1,32 +0,0 @@ -import re - - -class Change: - """ - Instantiates a string representation of a sound change. - """ - - def __init__(self, change): - self.parse(change) - - def __repr__(self): - return f"{self.b}>{self.a}/{self.f}_{self.t}" - - def parse(self, change): - if '/' in change: - _change, _env = change.split('/') - else: - _change = change - _env = '_' - self.b, self.a = _change.split('>') - self.f, self.t = _env.split('_') - - if self.f.startswith("{") and self.f.endswith("}"): - self.f = self.f[1:-1].split(',') - self.f = f"[{''.join(self.f)}]" - if self.t.startswith("{") and self.t.endswith("}"): - self.t = self.t[1:-1].split(',') - self.t = f"[{''.join(self.t)}]" - - def sub(self, string): - return re.sub(f"(?<={self.f}){self.b}(?={self.t})", f"{self.a}", f"#{string}#").strip('#') diff --git a/soundchanger/__init__.py b/soundchanger/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/soundchanger/change.py b/soundchanger/change.py new file mode 100644 index 0000000..64cab90 --- /dev/null +++ b/soundchanger/change.py @@ -0,0 +1,47 @@ +import re + + +def apply(change, string, categories=None, apply=True): + """Apply a sound change to a given string""" + + if not apply: + return string + + # Set up default categories + if not categories: + categories = { + 'V': 'aeiou' + } + + # Prepare change for regex + for k, v in {'{': '[', '}': ']', ',': '', ' ': ''}.items(): + change = change.replace(k, v) + + # Replacements for categories + for k, v in categories.items(): + # Replacement of categories inside brackets (before or after a comma) + change = re.sub(f'((?<=,){k}|{k}(?=,))', v, change) + # Replacements of categories anywhere else + change = change.replace(k, f"[{v}]") + + # Check validity of change + if change.count('>') != 1: + raise ValueError(f"Change {change} is not a valid sound change. (Missing character '>')") + + if change.count('/') > 1: + raise ValueError(f"Change {change} is not a valid sound change. (More than one '/' character)") + + if '/' in change: + change, environment = change.split('/') + else: + environment = '._.' + + if environment.count('_') != 1: + raise ValueError(f"Environment {environment} is not a valid environment. (Character '_' should exist exactly once)") + + original, repl = change.split('>') + before, after = environment.split('_') + + pattern = f"(?<={before}){original}(?={after})" + print(pattern) + return re.sub(pattern, repl, f"#{string}#").strip('#') diff --git a/tests/test_apply.py b/tests/test_apply.py deleted file mode 100644 index e1b0042..0000000 --- a/tests/test_apply.py +++ /dev/null @@ -1,24 +0,0 @@ -from soundchanger import Change - - -def test_simple_change_without_environment(): - c = Change('p>f') - assert c.sub('pana') == 'fana' - assert c.sub('kapa') == 'kafa' - -def test_simple_change_with_environment(): - c = Change('p>f/#_a') - assert c.sub('pana') == 'fana' - assert c.sub('pune') == 'pune' - assert c.sub('kapa') == 'kapa' - -def test_complex_change_with_environment(): - c = Change('p>f/{#,a}_{a,u}') - assert c.sub('pana') == 'fana' - assert c.sub('pune') == 'fune' - assert c.sub('kapa') == 'kafa' - -# def test_simple_change_with_category(): -# c = Change('w>h/V_V') -# assert c.sub('kawa') == 'kaha' -# assert c.sub('tuwo') == 'tuho' \ No newline at end of file diff --git a/tests/test_change.py b/tests/test_change.py new file mode 100644 index 0000000..0821e04 --- /dev/null +++ b/tests/test_change.py @@ -0,0 +1,25 @@ +from soundchanger.change import apply + + +def test_apply_without_environment(): + assert apply('p>h', 'pana') == 'hana' + +def test_apply_with_environment(): + assert apply('p>f/#_u', 'pune') == 'fune' + +def test_apply_with_complex_environment(): + inputs = ['pana', 'pina', 'puna', 'pama', 'pima', 'puma'] + outputs = ['pana', 'hina', 'huna', 'pama', 'hima', 'huma'] + for string, output in zip(inputs, outputs): + assert apply( + change='p>h/#_{u,i}Na#', + string=string, + categories={'N': 'nm'} + ) == output + +def test_apply_with_complex_group_and_categories(): + assert apply('e>i/{#,p,t,k}V{m,n,h}_#', 'pane', categories={ + 'V': 'aiu', + 'P': 'ptk', + 'N': 'mn', + }) == 'pani' diff --git a/tests/test_change_errors.py b/tests/test_change_errors.py new file mode 100644 index 0000000..14fb14a --- /dev/null +++ b/tests/test_change_errors.py @@ -0,0 +1,15 @@ +import pytest +import re +from soundchanger.change import apply + +def test_parse_change_error_change(): + with pytest.raises(ValueError, match=re.escape("Change is not a valid sound change. (Missing character '>')")): + apply('', '') + +def test_apply_error_split(): + with pytest.raises(ValueError, match=re.escape("Change a>b/c/d is not a valid sound change. (More than one '/' character)")): + apply('a>b/c/d', '') + +def test_apply_error_environment(): + with pytest.raises(ValueError, match=re.escape("Environment c is not a valid environment. (Character '_' should exist exactly once)")): + apply('a>b/c', '')