Created
March 26, 2024 15:24
-
-
Save mrmaheshrajput/42078fed5c1d9524b4eb8b303bcef9bb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
delimiter = "####" | |
eval_system_prompt = f"""You are an assistant that evaluates \ | |
whether or not an assistant is producing valid quizzes. | |
The assistant should be producing output in the \ | |
format of Question N:{delimiter} <question N>?""" | |
llm_response = """ | |
Question 1:#### What is the largest telescope in space called and what material is its mirror made of? | |
Question 2:#### True or False: Water slows down the speed of light. | |
Question 3:#### What did Marie and Pierre Curie discover in Paris? | |
""" | |
eval_user_message = f"""You are evaluating a generated quiz \ | |
based on the context that the assistant uses to create the quiz. | |
Here is the data: | |
[BEGIN DATA] | |
************ | |
[Response]: {llm_response} | |
************ | |
[END DATA] | |
Read the response carefully and determine if it looks like \ | |
a quiz or test. Do not evaluate if the information is correct | |
only evaluate if the data is in the expected format. | |
Output Y if the response is a quiz, \ | |
output N if the response does not look like a quiz. | |
""" | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.chat_models import ChatOpenAI | |
from langchain.schema.output_parser import StrOutputParser | |
eval_prompt = ChatPromptTemplate.from_messages([ | |
("system", eval_system_prompt), | |
("human", eval_user_message), | |
]) | |
llm = ChatOpenAI(model="gpt-3.5-turbo", | |
temperature=0) | |
output_parser = StrOutputParser() | |
eval_chain = eval_prompt | llm | output_parser | |
eval_chain.invoke({}) | |
def create_eval_chain( | |
agent_response, | |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0), | |
output_parser=StrOutputParser() | |
): | |
delimiter = "####" | |
eval_system_prompt = f"""You are an assistant that evaluates whether or not an assistant is producing valid quizzes. | |
The assistant should be producing output in the format of Question N:{delimiter} <question N>?""" | |
eval_user_message = f"""You are evaluating a generated quiz based on the context that the assistant uses to create the quiz. | |
Here is the data: | |
[BEGIN DATA] | |
************ | |
[Response]: {agent_response} | |
************ | |
[END DATA] | |
Read the response carefully and determine if it looks like a quiz or test. Do not evaluate if the information is correct | |
only evaluate if the data is in the expected format. | |
Output Y if the response is a quiz, output N if the response does not look like a quiz. | |
""" | |
eval_prompt = ChatPromptTemplate.from_messages([ | |
("system", eval_system_prompt), | |
("human", eval_user_message), | |
]) | |
return eval_prompt | llm | output_parser | |
known_bad_result = "There are lots of interesting facts. Tell me more about what you'd like to know" | |
bad_eval_chain = create_eval_chain(known_bad_result) | |
# response for wrong prompt | |
bad_eval_chain.invoke({}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment