104 lines
3.1 KiB
Python
104 lines
3.1 KiB
Python
import re
|
|
|
|
|
|
def apply(
|
|
changes,
|
|
strings,
|
|
apply=True,
|
|
categories={},
|
|
ignore_errors=True,
|
|
zero_characters=['∅']):
|
|
"""
|
|
Applies a sound change or a list of sound changes to a string or
|
|
a list of given strings.
|
|
|
|
Accepts inputs of type str or list.
|
|
If the input value is of type str, the output will also be of type str.
|
|
|
|
Options:
|
|
- apply (default: True)
|
|
Whether or not the changes should be applied.
|
|
- categories (default: {})
|
|
Which categories will be detected.
|
|
For vowels it would be {'V'='aeiou'} or {'V'=['a', 'e', 'i', 'o', 'u']})
|
|
- ignore_errors (default: True)
|
|
If this option is set to `True`, any erroneous sound change will be skipped.
|
|
If set to `False`, a ValueError will be raised.
|
|
- zero_characters (default: ['∅'])
|
|
These characters will be removed in the changed words.
|
|
For example, `apply('h>∅', 'aha')` will return 'aa', not 'a∅a'.
|
|
"""
|
|
|
|
if not apply:
|
|
return strings
|
|
|
|
if isinstance(changes, str):
|
|
changes = [changes]
|
|
|
|
return_str = isinstance(strings, str)
|
|
if isinstance(strings, str):
|
|
strings = [strings]
|
|
else:
|
|
strings = strings.copy()
|
|
|
|
for change in changes:
|
|
if validate_change(change, ignore_errors=ignore_errors) == False:
|
|
continue
|
|
|
|
change = convert_change_to_regex(change, categories=categories, zero_characters=zero_characters)
|
|
original, change_to, before, after = split_change(change)
|
|
|
|
pattern = f"({before})({original})({after})"
|
|
last_group_index = pattern.count('(') - after.count('(')
|
|
replacement = f"\\1{change_to}\\{last_group_index}"
|
|
|
|
for i, string in enumerate(strings):
|
|
strings[i] = re.sub(pattern, replacement, f"#{string}#").strip('#')
|
|
|
|
if return_str:
|
|
return strings[0]
|
|
return strings
|
|
|
|
|
|
def validate_change(change, ignore_errors):
|
|
valid = re.search(r'^[^>_/]+?>[^>_/]*?(:?/[^>_/]*?_+[^>_/]*)?$', change)
|
|
if ignore_errors:
|
|
return valid != None
|
|
if not valid:
|
|
raise ValueError(f"Change {change} is not a valid sound change.")
|
|
return True
|
|
|
|
|
|
def convert_change_to_regex(change, categories, zero_characters):
|
|
# Prepare change for regex
|
|
for k, v in {' ': '', '{': '(', '}': ')', ',': '|'}.items():
|
|
change = change.replace(k, v)
|
|
|
|
# Replacements for categories
|
|
for k, v in categories.items():
|
|
# Replacement of categories inside brackets (before or after a comma)
|
|
change = re.sub(f'((?<=,){k}|{k}(?=,))', '|'.join(v), change)
|
|
# Replacements of categories anywhere else
|
|
change = change.replace(k, '('+'|'.join(v)+')')
|
|
|
|
# Remove zero characters
|
|
for char in zero_characters:
|
|
change = change.replace(char, '')
|
|
|
|
return change
|
|
|
|
|
|
def split_change(change):
|
|
if '/' in change:
|
|
change, environment = change.split('/')
|
|
else:
|
|
environment = '_'
|
|
|
|
# Collaplse multiple underscores
|
|
environment = re.sub('_+', '_', environment)
|
|
|
|
original, change_to = change.split('>')
|
|
before, after = environment.split('_')
|
|
|
|
return (original, change_to, before, after)
|