Skip to content

Instantly share code, notes, and snippets.

@CurtisAccelerate
Created September 3, 2023 16:01
Show Gist options
  • Save CurtisAccelerate/b240a0088b1baeb588dd7056791b0eb9 to your computer and use it in GitHub Desktop.
Save CurtisAccelerate/b240a0088b1baeb588dd7056791b0eb9 to your computer and use it in GitHub Desktop.
Search Helper for GPT-4 Advanced Data Analysis
from difflib import SequenceMatcher
# Text-based Case-Insensitive Search Function with Snippet Extraction
def text_search_focused(query, text, snippet_size=1000, max_results=20):
results = []
start_idx = 0
query = query.lower() # Convert query to lowercase for case-insensitive search
text = text.lower() # Convert text to lowercase for case-insensitive search
while len(results) < max_results:
start_idx = text.find(query, start_idx)
if start_idx == -1:
break
# Extract snippet around found text
snippet_start_idx = max(0, start_idx - snippet_size // 2)
snippet_end_idx = min(start_idx + snippet_size // 2, len(text))
snippet = text[snippet_start_idx:snippet_end_idx]
# Add result to list
results.append({
'snippet': snippet,
'start_idx': snippet_start_idx,
'end_idx': snippet_end_idx
})
# Move index forward for next search
start_idx += len(query)
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment