Skip to content

Instantly share code, notes, and snippets.

@planetis-m
Created September 21, 2024 23:13
Show Gist options
  • Save planetis-m/ad078e0e184439e2712f3b853c192d01 to your computer and use it in GitHub Desktop.
Save planetis-m/ad078e0e184439e2712f3b853c192d01 to your computer and use it in GitHub Desktop.
from spellchecker import SpellChecker
from prompt_toolkit import PromptSession
from prompt_toolkit.completion import Completer, Completion
from prompt_toolkit.document import Document
from prompt_toolkit.formatted_text import FormattedText
from prompt_toolkit.layout.processors import Processor, Transformation, TransformationInput, ConditionalProcessor
from prompt_toolkit.application.current import get_app
from prompt_toolkit.filters import Condition
from functools import lru_cache
import hashlib
import re
# Initialize the spellchecker
spell = SpellChecker()
# Custom Completer for spellcheck suggestions
class SpellCheckCompleter(Completer):
def __init__(self, cache_size=128):
# Cache to store suggestions for words
self.cache = lru_cache(maxsize=cache_size)(self._get_suggestions)
def _get_suggestions(self, word: str) -> list[str]:
# Fetch suggestions from spell checker for a given word
candidates = spell.candidates(word)
return list(candidates) if candidates is not None else []
def get_completions(self, document: Document, complete_event) -> Completion:
word = document.get_word_before_cursor()
if word and word.isalpha():
suggestions = self.cache(word)
# Yield completions, starting from the beginning of the word
for suggestion in suggestions:
yield Completion(suggestion, start_position=-len(word))
# Custom Processor to underline misspelled words
class UnderlineMisspelledProcessor(Processor):
def __init__(self, cache_size=128):
# Regex pattern to match words while ignoring HTML tags, email addresses, and URLs
self.word_pattern = re.compile(r'''
(?:<[^>]+>)| # HTML tags
(?:https?://|www\.)\S+| # Simplified URL pattern
(?:\S+@\S+\.\S+)| # Simplified email pattern
(\b\w+\b) # Words
''', re.VERBOSE)
self.cache = lru_cache(maxsize=cache_size)(self._is_misspelled)
def _is_misspelled(self, word: str) -> bool:
#Check if a word is misspelled using the spell checker.
return bool(spell.unknown([word]))
def apply_transformation(self, ti: TransformationInput) -> Transformation:
text = ti.document.text
if not text:
return Transformation(FormattedText([]))
fragments = []
last_end = 0
# Find all words in the text using the word pattern
for match in self.word_pattern.finditer(text):
full_match = match.group(0)
word = match.group(1) # This will be None for non-word matches
start, end = match.span()
# Add any text between words that is not part of a word
if start > last_end:
fragments.append(('', text[last_end:start]))
if word and word.isalpha():
is_misspelled = self.cache(word)
fragments.append(('underline' if is_misspelled else '', word))
else:
# It's a tag, email, or URL, don't check spelling
fragments.append(('', full_match))
last_end = end
# Add any remaining text after the last word
if last_end < len(text):
fragments.append(('', text[last_end:]))
# Return the transformation with the formatted text
return Transformation(FormattedText(fragments))
# Global variable to store the last processed text hash
last_text_hash = None
# Function to hash the document text
def hash_text(text):
return hashlib.md5(text.encode()).hexdigest()
def buffer_has_changed(): # Works but breaks underlining completely!
global last_text_hash
current_hash = hash_text(session.default_buffer.text)
if current_hash != last_text_hash:
last_text_hash = current_hash
return True
return False
# Example usage
session = PromptSession(
completer=SpellCheckCompleter(),
input_processors=[
ConditionalProcessor(
UnderlineMisspelledProcessor(),
Condition(buffer_has_changed)
)
]
)
while True:
try:
text = session.prompt('Enter text: ', complete_while_typing=True)
print(f"You entered: {text}")
except KeyboardInterrupt:
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment