ahasha · July 15, 2024 23:56
diff --git a/actions.py b/actions.py
 class Action(BaseModel):
    """Information about an action item described in the document."""


    id: Optional[str] = Field(
        description="If a Unique Identifier for the action is given in the text, reecord it here."
    )
    action_category: str = Field(
        description="""
        The Action Category the goal is associated with, e.g. Buildings, Transportation, Waste, Governance, Conservation, or Energy.
        Select Energy only if no other more specific emission category is mentioned, or if the goal pertains specifically to electricity.
        Governance is for goals related to municipal staffing, policies, or processes to support execution of the Climate Action Plan.
        Should be a valid ActionCategory value:
        """
    )
    owner: Optional[List[str]] = Field(
        default=None,
        description="The entities or individuals responsible for the action, if mentioned in the text",
    )
    description: str = Field(description="A summary description of the action")
    context: str = Field(
        description="Verbatim text from the provided document on which the Action description is based"
    )
    context_page: int = Field(
        description="The page number of the document that the context string was drawn from."
    )
diff --git a/extract_with_backoff.py b/extract_with_backoff.py
 def get_extraction_results_with_backoff(chain, text):
    try:
        result = chain.invoke({"text": text})
    except OutputParserException as e:
        logger.error(f"A validation error occurred: {str(e)}")
        logger.error("Retrying with content split in half")
        # Split text in half and try twice...
        first_half = text[: len(text) // 2]
        second_half = text[len(text) // 2 :]
        result1 = get_extraction_results_with_backoff(chain, first_half)
        result2 = get_extraction_results_with_backoff(chain, second_half)
        result = Results(
            goals=result1.goals + result2.goals,
            actions=result1.actions + result2.actions,
        )
    return result
diff --git a/goals.py b/goals.py
 class Goal(BaseModel):
    """Information about a strategic planning Goal.

    Goals are quantified outcomes necessary to meet emissions targets and resilience goals.
    If the goal does not mention a quantitative target and a target year, you should skip it or classify it as an Action instead.
    """


    document_goal_id: Optional[str] = Field(
        description="If a unique identifier for the Goal ID is given in the text record it here."
    )
    action_category: str = Field(
        description="""
        The Action Category the goal is associated with, e.g. Buildings, Transportation, Waste, Governance, Conservation, or Energy.
        Select Energy only if no other more specific emission category is mentioned, or if the goal pertains specifically to electricity.
        Governance is for goals related to municipal staffing, policies, or processes to support execution of the Climate Action Plan.
        Should be a valid ActionCategory value: G, Z, B, E, T, W, C
        """
    )
    year: Optional[int] = Field(
        default=None,
        description="The year by which the goal should be achieved.",
        ge=1990,
        le=2100,
    )
    description: str = Field(
        description="A summary description of the goal, which must include a quantitative target and a target year.",
    )
    context: str = Field(
        description="Verbatim text from the provided document on which the Goal description is based"
    )
    context_page: int = Field(
        description="The page number of the document that the context string was drawn from."
    )
diff --git a/results.py b/results.py
 from langchain_openai import ChatOpenAI

 class Results(BaseModel):
    goals: Optional[List[Goal]] = []
    actions: Optional[List[Action]] = []
        
 llm = ChatOpenAI(model=llm_model, temperature=0)
 prompt = get_prompt_template()
 chain = prompt | llm.with_structured_output(schema=Results)
	class Action(BaseModel):
	"""Information about an action item described in the document."""


	id: Optional[str] = Field(
	description="If a Unique Identifier for the action is given in the text, reecord it here."
	)
	action_category: str = Field(
	description="""
	The Action Category the goal is associated with, e.g. Buildings, Transportation, Waste, Governance, Conservation, or Energy.
	Select Energy only if no other more specific emission category is mentioned, or if the goal pertains specifically to electricity.
	Governance is for goals related to municipal staffing, policies, or processes to support execution of the Climate Action Plan.
	Should be a valid ActionCategory value:
	"""
	)
	owner: Optional[List[str]] = Field(
	default=None,
	description="The entities or individuals responsible for the action, if mentioned in the text",
	)
	description: str = Field(description="A summary description of the action")
	context: str = Field(
	description="Verbatim text from the provided document on which the Action description is based"
	)
	context_page: int = Field(
	description="The page number of the document that the context string was drawn from."
	)
	def get_extraction_results_with_backoff(chain, text):
	try:
	result = chain.invoke({"text": text})
	except OutputParserException as e:
	logger.error(f"A validation error occurred: {str(e)}")
	logger.error("Retrying with content split in half")
	# Split text in half and try twice...
	first_half = text[: len(text) // 2]
	second_half = text[len(text) // 2 :]
	result1 = get_extraction_results_with_backoff(chain, first_half)
	result2 = get_extraction_results_with_backoff(chain, second_half)
	result = Results(
	goals=result1.goals + result2.goals,
	actions=result1.actions + result2.actions,
	)
	return result
	class Goal(BaseModel):
	"""Information about a strategic planning Goal.

	Goals are quantified outcomes necessary to meet emissions targets and resilience goals.
	If the goal does not mention a quantitative target and a target year, you should skip it or classify it as an Action instead.
	"""


	document_goal_id: Optional[str] = Field(
	description="If a unique identifier for the Goal ID is given in the text record it here."
	)
	action_category: str = Field(
	description="""
	The Action Category the goal is associated with, e.g. Buildings, Transportation, Waste, Governance, Conservation, or Energy.
	Select Energy only if no other more specific emission category is mentioned, or if the goal pertains specifically to electricity.
	Governance is for goals related to municipal staffing, policies, or processes to support execution of the Climate Action Plan.
	Should be a valid ActionCategory value: G, Z, B, E, T, W, C
	"""
	)
	year: Optional[int] = Field(
	default=None,
	description="The year by which the goal should be achieved.",
	ge=1990,
	le=2100,
	)
	description: str = Field(
	description="A summary description of the goal, which must include a quantitative target and a target year.",
	)
	context: str = Field(
	description="Verbatim text from the provided document on which the Goal description is based"
	)
	context_page: int = Field(
	description="The page number of the document that the context string was drawn from."
	)
	from langchain_openai import ChatOpenAI

	class Results(BaseModel):
	goals: Optional[List[Goal]] = []
	actions: Optional[List[Action]] = []

	llm = ChatOpenAI(model=llm_model, temperature=0)
	prompt = get_prompt_template()
	chain = prompt \| llm.with_structured_output(schema=Results)