from pathlib import Path from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_ollama.llms import OllamaLLM from src.data import LabeledImage, load_data from src.img_utils import encode_base64_resized TESTING = 1 def reasoning(): template_reasoning = ChatPromptTemplate.from_messages([ ("system", "{role_reasoning}"), ("human", "Question: {question_reasoning}"), ]) model_reasoning = OllamaLLM(model="hf.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Q4_K_M") reasoning_chain = template_reasoning | model_reasoning | StrOutputParser() description = reasoning_chain.invoke({ "role_reasoning": "You are an expert in the classification of whether an animal is a cat or a dog", "question_reasoning": "I want you to do a comparative analysis of cats and dogs. Your analysis must use the inherent traits and biological characteristics of each species. You should list each of these characteristics so that an informed decision can be made about whether a given animal, e.g., in the form of an image, is a cat or a dog. Please provide a detailed analysis, focusing on traits and characteristics that can be extracted from a given image.", }) return description def coding(description: str): template_coding = ChatPromptTemplate.from_messages([ ("system", "{role_coding}"), ("human", "Instructions: {instruction}\n Description: {description}"), ]) model_coding = OllamaLLM(model="hf.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q4_K_XL") coding_chain = template_coding | model_coding | StrOutputParser() coding_description = coding_chain.invoke({ "role_coding": "You are an expert Prolog programmer with extended knowledge in reasoning and probabilities. Given instructions and a description, you can write a correct Prolog program that expresses the given task as a suitable logical program in Prolog", "instruction": "Write a logical program for the following description", "description": description, }) return coding_description def grounding(coding_description: str, labeled_image: LabeledImage): model_vl = OllamaLLM(model="qwen2.5vl:7b") model_vl_ctx = model_vl.bind( images=[ encode_base64_resized( Path(".tmp-data/highlight_spotlight_heatmap.jpg"), max_width=512, max_height=512, quality=70 ) ] ) template_grounding = ChatPromptTemplate.from_messages([ ("system", "{role_vl}"), ( "human", """Instructions: {instruction} Description: {description} """, ), ]) grounding_chain = template_grounding | model_vl_ctx | StrOutputParser() return grounding_chain.invoke( { "role_vl": "You are an expert in analyzing an image to extract and match features of a given list.", "instruction": """You are given a logic program in the following description and an image with a heatmap as input. Your task is to do the following steps: 1. Extract the list of features from the given Prolog program that contribute to deciding whether the image is a cat or a dog. 2. Match only the features highlighted by the heatmap in the given image with the features you retrieved from the Prolog program. Give a likelihood of how sure you are with your matching. Print your result in this format: - : - ... """, "description": coding_description, }, ) def main(): print("Starting Abduction Demo") labeled_images = load_data() labeled_image = labeled_images[1] # image = labeled_image.image # mask = labeled_image.create_mask([labeled_image.labels[0]]) if TESTING == 1: coding_description = Path(".tmp-data/coding_description").open("r").read() else: coding_description = coding(reasoning()) with open(".tmp-data/coding_description", "w") as f: f.write(coding_description) result = grounding(coding_description, labeled_image) # TODO: Feed this into the Prolog program and execute to reach final verdict print(result) if __name__ == "__main__": main()