planetis-m · September 21, 2024 23:13
diff --git a/spellcheck.py b/spellcheck.py
 from spellchecker import SpellChecker
 from prompt_toolkit import PromptSession
 from prompt_toolkit.completion import Completer, Completion
 from prompt_toolkit.document import Document
 from prompt_toolkit.formatted_text import FormattedText
 from prompt_toolkit.layout.processors import Processor, Transformation, TransformationInput, ConditionalProcessor
 from prompt_toolkit.application.current import get_app
 from prompt_toolkit.filters import Condition
 from functools import lru_cache
 import hashlib
 import re

 # Initialize the spellchecker
 spell = SpellChecker()

 # Custom Completer for spellcheck suggestions
 class SpellCheckCompleter(Completer):
  def __init__(self, cache_size=128):
    # Cache to store suggestions for words
    self.cache = lru_cache(maxsize=cache_size)(self._get_suggestions)

  def _get_suggestions(self, word: str) -> list[str]:
    # Fetch suggestions from spell checker for a given word
    candidates = spell.candidates(word)
    return list(candidates) if candidates is not None else []

  def get_completions(self, document: Document, complete_event) -> Completion:
    word = document.get_word_before_cursor()
    if word and word.isalpha():
      suggestions = self.cache(word)
      # Yield completions, starting from the beginning of the word
      for suggestion in suggestions:
        yield Completion(suggestion, start_position=-len(word))

 # Custom Processor to underline misspelled words
 class UnderlineMisspelledProcessor(Processor):
  def __init__(self, cache_size=128):
    # Regex pattern to match words while ignoring HTML tags, email addresses, and URLs
    self.word_pattern = re.compile(r'''
      (?:<[^>]+>)|             # HTML tags
      (?:https?://|www\.)\S+|  # Simplified URL pattern
      (?:\S+@\S+\.\S+)|        # Simplified email pattern
      (\b\w+\b)                # Words
    ''', re.VERBOSE)
    self.cache = lru_cache(maxsize=cache_size)(self._is_misspelled)

  def _is_misspelled(self, word: str) -> bool:
    #Check if a word is misspelled using the spell checker.
    return bool(spell.unknown([word]))

  def apply_transformation(self, ti: TransformationInput) -> Transformation:
    text = ti.document.text
    if not text:
      return Transformation(FormattedText([]))
    fragments = []
    last_end = 0
    # Find all words in the text using the word pattern
    for match in self.word_pattern.finditer(text):
      full_match = match.group(0)
      word = match.group(1)  # This will be None for non-word matches
      start, end = match.span()
      # Add any text between words that is not part of a word
      if start > last_end:
        fragments.append(('', text[last_end:start]))
      if word and word.isalpha():
        is_misspelled = self.cache(word)
        fragments.append(('underline' if is_misspelled else '', word))
      else:
        # It's a tag, email, or URL, don't check spelling
        fragments.append(('', full_match))
      last_end = end
    # Add any remaining text after the last word
    if last_end < len(text):
      fragments.append(('', text[last_end:]))
    # Return the transformation with the formatted text
    return Transformation(FormattedText(fragments))

 # Global variable to store the last processed text hash
 last_text_hash = None

 # Function to hash the document text
 def hash_text(text):
  return hashlib.md5(text.encode()).hexdigest()

 def buffer_has_changed(): # Works but breaks underlining completely!
  global last_text_hash
  current_hash = hash_text(session.default_buffer.text)
  if current_hash != last_text_hash:
    last_text_hash = current_hash
    return True
  return False

 # Example usage
 session = PromptSession(
  completer=SpellCheckCompleter(),
  input_processors=[
    ConditionalProcessor(
      UnderlineMisspelledProcessor(),
      Condition(buffer_has_changed)
    )
  ]
 )

 while True:
  try:
    text = session.prompt('Enter text: ', complete_while_typing=True)
    print(f"You entered: {text}")
  except KeyboardInterrupt:
    break
	from spellchecker import SpellChecker
	from prompt_toolkit import PromptSession
	from prompt_toolkit.completion import Completer, Completion
	from prompt_toolkit.document import Document
	from prompt_toolkit.formatted_text import FormattedText
	from prompt_toolkit.layout.processors import Processor, Transformation, TransformationInput, ConditionalProcessor
	from prompt_toolkit.application.current import get_app
	from prompt_toolkit.filters import Condition
	from functools import lru_cache
	import hashlib
	import re

	# Initialize the spellchecker
	spell = SpellChecker()

	# Custom Completer for spellcheck suggestions
	class SpellCheckCompleter(Completer):
	def __init__(self, cache_size=128):
	# Cache to store suggestions for words
	self.cache = lru_cache(maxsize=cache_size)(self._get_suggestions)

	def _get_suggestions(self, word: str) -> list[str]:
	# Fetch suggestions from spell checker for a given word
	candidates = spell.candidates(word)
	return list(candidates) if candidates is not None else []

	def get_completions(self, document: Document, complete_event) -> Completion:
	word = document.get_word_before_cursor()
	if word and word.isalpha():
	suggestions = self.cache(word)
	# Yield completions, starting from the beginning of the word
	for suggestion in suggestions:
	yield Completion(suggestion, start_position=-len(word))

	# Custom Processor to underline misspelled words
	class UnderlineMisspelledProcessor(Processor):
	def __init__(self, cache_size=128):
	# Regex pattern to match words while ignoring HTML tags, email addresses, and URLs
	self.word_pattern = re.compile(r'''
	(?:<[^>]+>)\| # HTML tags
	(?:https?://\|www\.)\S+\| # Simplified URL pattern
	(?:\S+@\S+\.\S+)\| # Simplified email pattern
	(\b\w+\b) # Words
	''', re.VERBOSE)
	self.cache = lru_cache(maxsize=cache_size)(self._is_misspelled)

	def _is_misspelled(self, word: str) -> bool:
	#Check if a word is misspelled using the spell checker.
	return bool(spell.unknown([word]))

	def apply_transformation(self, ti: TransformationInput) -> Transformation:
	text = ti.document.text
	if not text:
	return Transformation(FormattedText([]))
	fragments = []
	last_end = 0
	# Find all words in the text using the word pattern
	for match in self.word_pattern.finditer(text):
	full_match = match.group(0)
	word = match.group(1) # This will be None for non-word matches
	start, end = match.span()
	# Add any text between words that is not part of a word
	if start > last_end:
	fragments.append(('', text[last_end:start]))
	if word and word.isalpha():
	is_misspelled = self.cache(word)
	fragments.append(('underline' if is_misspelled else '', word))
	else:
	# It's a tag, email, or URL, don't check spelling
	fragments.append(('', full_match))
	last_end = end
	# Add any remaining text after the last word
	if last_end < len(text):
	fragments.append(('', text[last_end:]))
	# Return the transformation with the formatted text
	return Transformation(FormattedText(fragments))

	# Global variable to store the last processed text hash
	last_text_hash = None

	# Function to hash the document text
	def hash_text(text):
	return hashlib.md5(text.encode()).hexdigest()

	def buffer_has_changed(): # Works but breaks underlining completely!
	global last_text_hash
	current_hash = hash_text(session.default_buffer.text)
	if current_hash != last_text_hash:
	last_text_hash = current_hash
	return True
	return False

	# Example usage
	session = PromptSession(
	completer=SpellCheckCompleter(),
	input_processors=[
	ConditionalProcessor(
	UnderlineMisspelledProcessor(),
	Condition(buffer_has_changed)
	)
	]
	)

	while True:
	try:
	text = session.prompt('Enter text: ', complete_while_typing=True)
	print(f"You entered: {text}")
	except KeyboardInterrupt:
	break