Initial implementation of regex sound changer

This commit is contained in:
Patrick Elmer 2023-01-29 21:34:47 +09:00
parent dd5b8ea1d5
commit 418a318b29
7 changed files with 89 additions and 57 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
.pytest*
*__pycache__*

View File

@ -1,32 +0,0 @@
import re
class Change:
"""
Instantiates a string representation of a sound change.
"""
def __init__(self, change):
self.parse(change)
def __repr__(self):
return f"{self.b}>{self.a}/{self.f}_{self.t}"
def parse(self, change):
if '/' in change:
_change, _env = change.split('/')
else:
_change = change
_env = '_'
self.b, self.a = _change.split('>')
self.f, self.t = _env.split('_')
if self.f.startswith("{") and self.f.endswith("}"):
self.f = self.f[1:-1].split(',')
self.f = f"[{''.join(self.f)}]"
if self.t.startswith("{") and self.t.endswith("}"):
self.t = self.t[1:-1].split(',')
self.t = f"[{''.join(self.t)}]"
def sub(self, string):
return re.sub(f"(?<={self.f}){self.b}(?={self.t})", f"{self.a}", f"#{string}#").strip('#')

0
soundchanger/__init__.py Normal file
View File

47
soundchanger/change.py Normal file
View File

@ -0,0 +1,47 @@
import re
def apply(change, string, categories=None, apply=True):
"""Apply a sound change to a given string"""
if not apply:
return string
# Set up default categories
if not categories:
categories = {
'V': 'aeiou'
}
# Prepare change for regex
for k, v in {'{': '[', '}': ']', ',': '', ' ': ''}.items():
change = change.replace(k, v)
# Replacements for categories
for k, v in categories.items():
# Replacement of categories inside brackets (before or after a comma)
change = re.sub(f'((?<=,){k}|{k}(?=,))', v, change)
# Replacements of categories anywhere else
change = change.replace(k, f"[{v}]")
# Check validity of change
if change.count('>') != 1:
raise ValueError(f"Change {change} is not a valid sound change. (Missing character '>')")
if change.count('/') > 1:
raise ValueError(f"Change {change} is not a valid sound change. (More than one '/' character)")
if '/' in change:
change, environment = change.split('/')
else:
environment = '._.'
if environment.count('_') != 1:
raise ValueError(f"Environment {environment} is not a valid environment. (Character '_' should exist exactly once)")
original, repl = change.split('>')
before, after = environment.split('_')
pattern = f"(?<={before}){original}(?={after})"
print(pattern)
return re.sub(pattern, repl, f"#{string}#").strip('#')

View File

@ -1,24 +0,0 @@
from soundchanger import Change
def test_simple_change_without_environment():
c = Change('p>f')
assert c.sub('pana') == 'fana'
assert c.sub('kapa') == 'kafa'
def test_simple_change_with_environment():
c = Change('p>f/#_a')
assert c.sub('pana') == 'fana'
assert c.sub('pune') == 'pune'
assert c.sub('kapa') == 'kapa'
def test_complex_change_with_environment():
c = Change('p>f/{#,a}_{a,u}')
assert c.sub('pana') == 'fana'
assert c.sub('pune') == 'fune'
assert c.sub('kapa') == 'kafa'
# def test_simple_change_with_category():
# c = Change('w>h/V_V')
# assert c.sub('kawa') == 'kaha'
# assert c.sub('tuwo') == 'tuho'

25
tests/test_change.py Normal file
View File

@ -0,0 +1,25 @@
from soundchanger.change import apply
def test_apply_without_environment():
assert apply('p>h', 'pana') == 'hana'
def test_apply_with_environment():
assert apply('p>f/#_u', 'pune') == 'fune'
def test_apply_with_complex_environment():
inputs = ['pana', 'pina', 'puna', 'pama', 'pima', 'puma']
outputs = ['pana', 'hina', 'huna', 'pama', 'hima', 'huma']
for string, output in zip(inputs, outputs):
assert apply(
change='p>h/#_{u,i}Na#',
string=string,
categories={'N': 'nm'}
) == output
def test_apply_with_complex_group_and_categories():
assert apply('e>i/{#,p,t,k}V{m,n,h}_#', 'pane', categories={
'V': 'aiu',
'P': 'ptk',
'N': 'mn',
}) == 'pani'

View File

@ -0,0 +1,15 @@
import pytest
import re
from soundchanger.change import apply
def test_parse_change_error_change():
with pytest.raises(ValueError, match=re.escape("Change is not a valid sound change. (Missing character '>')")):
apply('', '')
def test_apply_error_split():
with pytest.raises(ValueError, match=re.escape("Change a>b/c/d is not a valid sound change. (More than one '/' character)")):
apply('a>b/c/d', '')
def test_apply_error_environment():
with pytest.raises(ValueError, match=re.escape("Environment c is not a valid environment. (Character '_' should exist exactly once)")):
apply('a>b/c', '')