77 lines
2.3 KiB
Python

import re
def apply(changes, strings, categories={}, ignore_errors=True, apply=True, zero_characters=['']):
"""Apply a sound change to a given string"""
if not apply:
return strings
if isinstance(changes, str):
changes = [changes]
return_str = isinstance(strings, str)
if isinstance(strings, str):
strings = [strings]
for change in changes:
v = validate_change(change, ignore_errors=ignore_errors)
if v == False:
continue
change = convert_change_to_regex(change, categories=categories, zero_characters=zero_characters)
original, change_to, before, after = split_change(change)
pattern = f"({before})({original})({after})"
last_group_index = pattern.count('(') - after.count('(')
replacement = f"\\1{change_to}\\{last_group_index}"
for i, string in enumerate(strings):
strings[i] = re.sub(pattern, replacement, f"#{string}#").strip('#')
if return_str:
return strings[0]
return strings
def validate_change(change, ignore_errors):
valid = re.search(r'^[^>_/]+?>[^>_/]*?(:?/[^>_/]*?_+[^>_/]*)?$', change)
if ignore_errors:
return valid != None
if not valid:
raise ValueError(f"Change {change} is not a valid sound change.")
return True
def convert_change_to_regex(change, categories, zero_characters):
# Prepare change for regex
for k, v in {' ': '', '{': '(', '}': ')', ',': '|'}.items():
change = change.replace(k, v)
# Replacements for categories
for k, v in categories.items():
# Replacement of categories inside brackets (before or after a comma)
change = re.sub(f'((?<=,){k}|{k}(?=,))', '|'.join(v), change)
# Replacements of categories anywhere else
change = change.replace(k, '('+'|'.join(v)+')')
# Remove zero characters
for char in zero_characters:
change = change.replace(char, '')
return change
def split_change(change):
if '/' in change:
change, environment = change.split('/')
else:
environment = '_'
# Collaplse multiple underscores
environment = re.sub('_+', '_', environment)
original, change_to = change.split('>')
before, after = environment.split('_')
return (original, change_to, before, after)