Skip to content

Instantly share code, notes, and snippets.

@wbolster
Last active November 22, 2022 09:30
Show Gist options
  • Save wbolster/7fead7a261b461a4dd7c8ae680d1452c to your computer and use it in GitHub Desktop.
Save wbolster/7fead7a261b461a4dd7c8ae680d1452c to your computer and use it in GitHub Desktop.
shuffle dots: text manıṗulatıȯn tool to move dots above letters to another ṗosıṫıon
"""
Text manıṗulatıȯn tool to move dots above letters to another ṗosıṫıon.
"""
import operator
import random
import re
import unicodedata
import sys
DOT_MAPPING = {
"i": "ı",
"j": "ȷ",
"\N{COMBINING DOT ABOVE}": "",
}
LOWER_CASE_LETTERS_SCORES = {
**dict.fromkeys("aecıȷouy", 3), # vowel-like
**dict.fromkeys("mnrsvwxz", 3), # no ascenders and no descenders
**dict.fromkeys("gpq", 3), # descenders
**dict.fromkeys("bdfhklt", 1), # ascenders
}
def shuffle_dots(s: str) -> str:
"""Shuffle existing dots above letters to random locations."""
# Decompose to ensure ‘U+0307 ◌̇ COMBINING DOT ABOVE’ characters
s = unicodedata.normalize("NFKD", s)
n_dots = sum(s.count(char) for char in DOT_MAPPING.keys())
# Remove existing dots
for needle, replacement in DOT_MAPPING.items():
s = s.replace(needle, replacement)
# Determine new positions, preferring good spots if possible
letter_positions_and_scores = [
(pos, LOWER_CASE_LETTERS_SCORES.get(c, 0))
for pos, c in enumerate(s)
if (category := unicodedata.category(c)).startswith("L")
]
random.shuffle(letter_positions_and_scores)
letter_positions_and_scores.sort(key=operator.itemgetter(1), reverse=True)
new_dot_positions = dict(letter_positions_and_scores[:n_dots]).keys()
# Add new dots
s = "".join(
c + "\N{COMBINING DOT ABOVE}" if pos in new_dot_positions else c
for pos, c in enumerate(s)
)
# Normalize
s = s.replace("ı\N{COMBINING DOT ABOVE}", "i")
s = s.replace("ȷ\N{COMBINING DOT ABOVE}", "j")
s = unicodedata.normalize("NFKC", s)
return s
def main() -> int:
# Use commmand line args as input, or stdin if none given.
text = " ".join(sys.argv[1:])
if not text:
text = sys.stdin.read()
# Shuffle dots within each ‘word’
result = re.sub(
r"\w+",
lambda m: shuffle_dots(m.group()),
text,
)
print(result)
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment