Change regex pattern to include digraphs
This commit is contained in:
parent
418a318b29
commit
c5251cc881
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,2 +1,4 @@
|
|||||||
.pytest*
|
.pytest*
|
||||||
*__pycache__*
|
*__pycache__*
|
||||||
|
*.egg*
|
||||||
|
venv
|
||||||
@ -7,23 +7,6 @@ def apply(change, string, categories=None, apply=True):
|
|||||||
if not apply:
|
if not apply:
|
||||||
return string
|
return string
|
||||||
|
|
||||||
# Set up default categories
|
|
||||||
if not categories:
|
|
||||||
categories = {
|
|
||||||
'V': 'aeiou'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Prepare change for regex
|
|
||||||
for k, v in {'{': '[', '}': ']', ',': '', ' ': ''}.items():
|
|
||||||
change = change.replace(k, v)
|
|
||||||
|
|
||||||
# Replacements for categories
|
|
||||||
for k, v in categories.items():
|
|
||||||
# Replacement of categories inside brackets (before or after a comma)
|
|
||||||
change = re.sub(f'((?<=,){k}|{k}(?=,))', v, change)
|
|
||||||
# Replacements of categories anywhere else
|
|
||||||
change = change.replace(k, f"[{v}]")
|
|
||||||
|
|
||||||
# Check validity of change
|
# Check validity of change
|
||||||
if change.count('>') != 1:
|
if change.count('>') != 1:
|
||||||
raise ValueError(f"Change {change} is not a valid sound change. (Missing character '>')")
|
raise ValueError(f"Change {change} is not a valid sound change. (Missing character '>')")
|
||||||
@ -31,17 +14,31 @@ def apply(change, string, categories=None, apply=True):
|
|||||||
if change.count('/') > 1:
|
if change.count('/') > 1:
|
||||||
raise ValueError(f"Change {change} is not a valid sound change. (More than one '/' character)")
|
raise ValueError(f"Change {change} is not a valid sound change. (More than one '/' character)")
|
||||||
|
|
||||||
|
# Prepare change for regex
|
||||||
|
for k, v in {'{': '(', '}': ')', ',': '|'}.items():
|
||||||
|
change = change.replace(k, v)
|
||||||
|
|
||||||
|
# Replacements for categories
|
||||||
|
for k, v in categories.items():
|
||||||
|
# Replacement of categories inside brackets (before or after a comma)
|
||||||
|
change = re.sub(f'((?<=,){k}|{k}(?=,))', '|'.join(v), change)
|
||||||
|
# Replacements of categories anywhere else
|
||||||
|
change = change.replace(k, '('+'|'.join(v)+')')
|
||||||
|
|
||||||
if '/' in change:
|
if '/' in change:
|
||||||
change, environment = change.split('/')
|
change, environment = change.split('/')
|
||||||
else:
|
else:
|
||||||
environment = '._.'
|
environment = '_'
|
||||||
|
|
||||||
if environment.count('_') != 1:
|
if environment.count('_') != 1:
|
||||||
raise ValueError(f"Environment {environment} is not a valid environment. (Character '_' should exist exactly once)")
|
raise ValueError(f"Environment {environment} is not a valid environment. (Character '_' should exist exactly once)")
|
||||||
|
|
||||||
original, repl = change.split('>')
|
original, change_to = change.split('>')
|
||||||
before, after = environment.split('_')
|
before, after = environment.split('_')
|
||||||
|
|
||||||
pattern = f"(?<={before}){original}(?={after})"
|
n = before.count('(') + original.count('(') + 3
|
||||||
print(pattern)
|
pattern = f"({before})({original})({after})"
|
||||||
|
repl = f"\\1{change_to}\\{n}"
|
||||||
|
|
||||||
return re.sub(pattern, repl, f"#{string}#").strip('#')
|
return re.sub(pattern, repl, f"#{string}#").strip('#')
|
||||||
|
|
||||||
|
|||||||
@ -23,3 +23,14 @@ def test_apply_with_complex_group_and_categories():
|
|||||||
'P': 'ptk',
|
'P': 'ptk',
|
||||||
'N': 'mn',
|
'N': 'mn',
|
||||||
}) == 'pani'
|
}) == 'pani'
|
||||||
|
|
||||||
|
def test_apply_with_complex_group_and_categories_and_digraphs():
|
||||||
|
assert apply('u>o/#V{ts,pf,t}_k{V,e,o}#', 'atsuka', categories={
|
||||||
|
'V': ['a', 'i', 'u']
|
||||||
|
}) == 'atsoka'
|
||||||
|
assert apply('u>o/#V{ts,pf,t}_k{V,e,o}#', 'atuka', categories={
|
||||||
|
'V': ['a', 'i', 'u']
|
||||||
|
}) == 'atoka'
|
||||||
|
assert apply('u>o/#V{ts,pf,t}_k{V,e,o}#', 'matsuka', categories={
|
||||||
|
'V': ['a', 'i', 'u']
|
||||||
|
}) == 'matsuka'
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user