Initial implementation of regex sound changer
This commit is contained in:
parent
dd5b8ea1d5
commit
418a318b29
3
.gitignore
vendored
3
.gitignore
vendored
@ -1 +1,2 @@
|
||||
.pytest*
|
||||
.pytest*
|
||||
*__pycache__*
|
||||
@ -1,32 +0,0 @@
|
||||
import re
|
||||
|
||||
|
||||
class Change:
|
||||
"""
|
||||
Instantiates a string representation of a sound change.
|
||||
"""
|
||||
|
||||
def __init__(self, change):
|
||||
self.parse(change)
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.b}>{self.a}/{self.f}_{self.t}"
|
||||
|
||||
def parse(self, change):
|
||||
if '/' in change:
|
||||
_change, _env = change.split('/')
|
||||
else:
|
||||
_change = change
|
||||
_env = '_'
|
||||
self.b, self.a = _change.split('>')
|
||||
self.f, self.t = _env.split('_')
|
||||
|
||||
if self.f.startswith("{") and self.f.endswith("}"):
|
||||
self.f = self.f[1:-1].split(',')
|
||||
self.f = f"[{''.join(self.f)}]"
|
||||
if self.t.startswith("{") and self.t.endswith("}"):
|
||||
self.t = self.t[1:-1].split(',')
|
||||
self.t = f"[{''.join(self.t)}]"
|
||||
|
||||
def sub(self, string):
|
||||
return re.sub(f"(?<={self.f}){self.b}(?={self.t})", f"{self.a}", f"#{string}#").strip('#')
|
||||
0
soundchanger/__init__.py
Normal file
0
soundchanger/__init__.py
Normal file
47
soundchanger/change.py
Normal file
47
soundchanger/change.py
Normal file
@ -0,0 +1,47 @@
|
||||
import re
|
||||
|
||||
|
||||
def apply(change, string, categories=None, apply=True):
|
||||
"""Apply a sound change to a given string"""
|
||||
|
||||
if not apply:
|
||||
return string
|
||||
|
||||
# Set up default categories
|
||||
if not categories:
|
||||
categories = {
|
||||
'V': 'aeiou'
|
||||
}
|
||||
|
||||
# Prepare change for regex
|
||||
for k, v in {'{': '[', '}': ']', ',': '', ' ': ''}.items():
|
||||
change = change.replace(k, v)
|
||||
|
||||
# Replacements for categories
|
||||
for k, v in categories.items():
|
||||
# Replacement of categories inside brackets (before or after a comma)
|
||||
change = re.sub(f'((?<=,){k}|{k}(?=,))', v, change)
|
||||
# Replacements of categories anywhere else
|
||||
change = change.replace(k, f"[{v}]")
|
||||
|
||||
# Check validity of change
|
||||
if change.count('>') != 1:
|
||||
raise ValueError(f"Change {change} is not a valid sound change. (Missing character '>')")
|
||||
|
||||
if change.count('/') > 1:
|
||||
raise ValueError(f"Change {change} is not a valid sound change. (More than one '/' character)")
|
||||
|
||||
if '/' in change:
|
||||
change, environment = change.split('/')
|
||||
else:
|
||||
environment = '._.'
|
||||
|
||||
if environment.count('_') != 1:
|
||||
raise ValueError(f"Environment {environment} is not a valid environment. (Character '_' should exist exactly once)")
|
||||
|
||||
original, repl = change.split('>')
|
||||
before, after = environment.split('_')
|
||||
|
||||
pattern = f"(?<={before}){original}(?={after})"
|
||||
print(pattern)
|
||||
return re.sub(pattern, repl, f"#{string}#").strip('#')
|
||||
@ -1,24 +0,0 @@
|
||||
from soundchanger import Change
|
||||
|
||||
|
||||
def test_simple_change_without_environment():
|
||||
c = Change('p>f')
|
||||
assert c.sub('pana') == 'fana'
|
||||
assert c.sub('kapa') == 'kafa'
|
||||
|
||||
def test_simple_change_with_environment():
|
||||
c = Change('p>f/#_a')
|
||||
assert c.sub('pana') == 'fana'
|
||||
assert c.sub('pune') == 'pune'
|
||||
assert c.sub('kapa') == 'kapa'
|
||||
|
||||
def test_complex_change_with_environment():
|
||||
c = Change('p>f/{#,a}_{a,u}')
|
||||
assert c.sub('pana') == 'fana'
|
||||
assert c.sub('pune') == 'fune'
|
||||
assert c.sub('kapa') == 'kafa'
|
||||
|
||||
# def test_simple_change_with_category():
|
||||
# c = Change('w>h/V_V')
|
||||
# assert c.sub('kawa') == 'kaha'
|
||||
# assert c.sub('tuwo') == 'tuho'
|
||||
25
tests/test_change.py
Normal file
25
tests/test_change.py
Normal file
@ -0,0 +1,25 @@
|
||||
from soundchanger.change import apply
|
||||
|
||||
|
||||
def test_apply_without_environment():
|
||||
assert apply('p>h', 'pana') == 'hana'
|
||||
|
||||
def test_apply_with_environment():
|
||||
assert apply('p>f/#_u', 'pune') == 'fune'
|
||||
|
||||
def test_apply_with_complex_environment():
|
||||
inputs = ['pana', 'pina', 'puna', 'pama', 'pima', 'puma']
|
||||
outputs = ['pana', 'hina', 'huna', 'pama', 'hima', 'huma']
|
||||
for string, output in zip(inputs, outputs):
|
||||
assert apply(
|
||||
change='p>h/#_{u,i}Na#',
|
||||
string=string,
|
||||
categories={'N': 'nm'}
|
||||
) == output
|
||||
|
||||
def test_apply_with_complex_group_and_categories():
|
||||
assert apply('e>i/{#,p,t,k}V{m,n,h}_#', 'pane', categories={
|
||||
'V': 'aiu',
|
||||
'P': 'ptk',
|
||||
'N': 'mn',
|
||||
}) == 'pani'
|
||||
15
tests/test_change_errors.py
Normal file
15
tests/test_change_errors.py
Normal file
@ -0,0 +1,15 @@
|
||||
import pytest
|
||||
import re
|
||||
from soundchanger.change import apply
|
||||
|
||||
def test_parse_change_error_change():
|
||||
with pytest.raises(ValueError, match=re.escape("Change is not a valid sound change. (Missing character '>')")):
|
||||
apply('', '')
|
||||
|
||||
def test_apply_error_split():
|
||||
with pytest.raises(ValueError, match=re.escape("Change a>b/c/d is not a valid sound change. (More than one '/' character)")):
|
||||
apply('a>b/c/d', '')
|
||||
|
||||
def test_apply_error_environment():
|
||||
with pytest.raises(ValueError, match=re.escape("Environment c is not a valid environment. (Character '_' should exist exactly once)")):
|
||||
apply('a>b/c', '')
|
||||
Loading…
x
Reference in New Issue
Block a user