davidmezzetti · September 17, 2024 14:15 · Staffyeahh · Sep 17, 2024
diff --git a/txtai-reflection.py b/txtai-reflection.py
 import re

 from txtai import Embeddings, LLM

 # Prompt courtesy of the following link: https://github.com/codelion/optillm/blob/main/cot_reflection.py
 def cot(system, user):
    system = f"""
        {system}

        You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to answer queries. Follow these steps:

        1. Think through the problem step by step within the <thinking> tags.
        2. Reflect on your thinking to check for any errors or improvements within the <reflection> tags.
        3. Make any necessary adjustments based on your reflection.
        4. Provide your final, concise answer within the <output> tags.

        Important: The <thinking> and <reflection> sections are for your internal reasoning process only. 
        Do not include any part of the final answer in these sections. 
        The actual response to the query must be entirely contained within the <output> tags.

        Use the following format for your response:
        <thinking>
        [Your step-by-step reasoning goes here. This is your internal thought process, not the final answer.]
        <reflection>
        [Your reflection on your reasoning, checking for errors or improvements]
        </reflection>
        [Any adjustments to your thinking based on your reflection]
        </thinking>
        <output>
        [Your final, concise answer to the query. This is the only part that will be shown to the user.]
        </output>
        """

    # Run LLM inference
    response = llm([
            {"role": "system", "content": system},
            {"role": "user", "content": user}
        ],
        maxlength=4096
    )

    # Extract and return output
    match = re.search(r"<output>(.*?)(?:</output>|$)", response, re.DOTALL)
    return match.group(1).strip() if match else response

 def rag(question):
    prompt = """
    Answer the following question using only the context below. Only include information
    specifically discussed.

    question: {question}
    context: {context}
    """

    # System prompt
    system = "You are a friendly assistant. You answer questions from users."

    # RAG context
    context = "\n".join([x["text"] for x in embeddings.search(question)])

    # RAG with CoT + Self-Reflection
    return cot(system, prompt.format(question=question, context=context))

 # Wikipedia Embeddings Index
 embeddings = Embeddings()
 embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia")

 # LLM
 llm = LLM("hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4")

 # RAG + CoT with Self-Reflection
 print(rag("Tell me about how jet engines work"))
	import re

	from txtai import Embeddings, LLM

	# Prompt courtesy of the following link: https://github.com/codelion/optillm/blob/main/cot_reflection.py
	def cot(system, user):
	system = f"""
	{system}

	You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to answer queries. Follow these steps:

	1. Think through the problem step by step within the <thinking> tags.
	2. Reflect on your thinking to check for any errors or improvements within the <reflection> tags.
	3. Make any necessary adjustments based on your reflection.
	4. Provide your final, concise answer within the <output> tags.

	Important: The <thinking> and <reflection> sections are for your internal reasoning process only.
	Do not include any part of the final answer in these sections.
	The actual response to the query must be entirely contained within the <output> tags.

	Use the following format for your response:
	<thinking>
	[Your step-by-step reasoning goes here. This is your internal thought process, not the final answer.]
	<reflection>
	[Your reflection on your reasoning, checking for errors or improvements]
	</reflection>
	[Any adjustments to your thinking based on your reflection]
	</thinking>
	<output>
	[Your final, concise answer to the query. This is the only part that will be shown to the user.]
	</output>
	"""

	# Run LLM inference
	response = llm([
	{"role": "system", "content": system},
	{"role": "user", "content": user}
	],
	maxlength=4096
	)

	# Extract and return output
	match = re.search(r"<output>(.*?)(?:</output>\|$)", response, re.DOTALL)
	return match.group(1).strip() if match else response

	def rag(question):
	prompt = """
	Answer the following question using only the context below. Only include information
	specifically discussed.

	question: {question}
	context: {context}
	"""

	# System prompt
	system = "You are a friendly assistant. You answer questions from users."

	# RAG context
	context = "\n".join([x["text"] for x in embeddings.search(question)])

	# RAG with CoT + Self-Reflection
	return cot(system, prompt.format(question=question, context=context))

	# Wikipedia Embeddings Index
	embeddings = Embeddings()
	embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia")

	# LLM
	llm = LLM("hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4")

	# RAG + CoT with Self-Reflection
	print(rag("Tell me about how jet engines work"))