from pathlib import Path from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI from problog import get_evaluatable from problog.program import PrologString from src.data import LabeledImage, load_data from src.img_utils import encode_base64_resized TESTING = 1 EPSILON_PROB = 0.01 def reasoning(): template_reasoning = ChatPromptTemplate.from_messages([ ("system", "{role_reasoning}"), ("human", "Question: {question_reasoning}"), ]) model_reasoning = ChatOpenAI( model="qwen3-thinking", base_url="http://localhost:8080/v1", ) reasoning_chain = template_reasoning | model_reasoning | StrOutputParser() description = reasoning_chain.invoke({ "role_reasoning": "You are a scientific expert in the classification of whether an animal is a cat or a dog. If tasked to answer questions, you shall adhere to scientific facts, think step-by-step, and explain your decision-making process. Focus on 'why' something is done, especially for complex logic, rather than *what* is done. Your answer should be concise and direct, and avoid conversational fillers. Format your answer appropriately for better understanding.", "question_reasoning": "I want you to do a comparative analysis of cats and dogs. Your analysis must use the inherent traits and biological characteristics of each species. You should list each of these characteristics so that an informed decision can be made about whether a given animal depicted in an image is a cat or a dog. Please provide a detailed analysis, focusing on traits and characteristics that can be extracted from a given image. For formatting please use a list-like fashion.", }) return description def coding(description: str): template_coding = ChatPromptTemplate.from_messages([ ("system", "{role_coding}"), ("human", "Instructions: {instruction}\n Description: {description}"), ]) model_coding = ChatOpenAI( model="qwen3-coder", base_url="http://localhost:8080/v1", ) coding_chain = template_coding | model_coding | StrOutputParser() coding_description = coding_chain.invoke({ "role_coding": """You are an expert Prolog programmer with extended knowledge in reasoning and probabilities. Given instructions and a description, you can write a correct Prolog program that expresses the given question as a suitable logical program in Prolog. You shall format your answer so that it can be directly used as an input for a Prolog interpreter. Do not incorporate example facts or queries into the knowledge base; these will be added later by the user. If necessary, add comments to your program to provide explanations to the user. The proposed facts should follow the form: - . There shall only be two rules of the following form present at the end: - dog :- ; ; ...; . - cat :- ; ; ...; . Make sure to use ';' and not ',' for these two rules! Lastly, the following two queries should be added: - query(dog). - query(cat). """, "instruction": "Write a logical program for the following description:", "description": description, }) return coding_description def grounding(coding_description: str, labeled_image: LabeledImage): model_vl = ChatOpenAI( model="minicpm-v-45", base_url="http://localhost:8080/v1", ) template_grounding = ChatPromptTemplate.from_messages([ ("system", "{role_vl}"), ( "human", """Instructions: {instruction} Description: {description} """, ), ("placeholder", "{image}"), ]) grounding_chain = template_grounding | model_vl | StrOutputParser() return grounding_chain.invoke( { "role_vl": "You are an expert in analyzing images to extract and match features of a given list. First, you look at the list of given features (facts written in Prolog), and then you analyze the given image for these features. If you are uncertain whether a feature matches, please acknowledge this and inform the user, but do not add the feature to the list of matched features. Please follow the user's instructions precisely.", "instruction": """You are given a logic program in the following description and an image. Your task is to do the following steps: 1. Extract the list of features/facts from the given Prolog program that contribute to deciding whether the image is a cat or a dog. 2. Match only the features that are highlighted in the given image with the features (Prolog facts) you retrieved from the Prolog program. If no highlighting is visible consider the whole image. Give a likelihood, as decimal number, of how sure you are of your match. Print your result in this format: - : - : - ... """, "description": coding_description, "image": [ ( "human", [ { "type": "image_url", "image_url": { "url": f"data:image/jpg;base64,{encode_base64_resized(Path('.tmp-data/dog.jpg'), max_width=512, max_height=512, quality=70)}" }, } ], ) ], }, ) def execute_logic_program(coding_description: str, grounding_results: str): program = coding_description # remove code-block notation program = program.split("\n", 1)[-1] program = program.rsplit("\n", 2)[0] # extract evidence from grounding evidence = [] for grounds in grounding_results.splitlines(): # e.g., '- 0.95:visible_nose_ride' prob, fact = grounds[2:].split(":", maxsplit=2) evidence.append((fact, prob)) # add probabilities program_lines = program.splitlines() for idx, line in enumerate(program_lines): if len(line) <= 1 or line.startswith("%"): continue else: if line.startswith("cat :-") or line.startswith("dog :-") or "query" in line: continue else: matched = False for fact, prob in evidence: if fact in line: program_lines[idx] = f"{prob}::" + line matched = True if not matched: program_lines[idx] = f"{EPSILON_PROB}::" + line program_sanitized = "\n".join(program_lines) # evaluate logical program print(program_sanitized) result = get_evaluatable().create_from(PrologString(program_sanitized)).evaluate() # get final probabilities p_cat, p_dog = (0.0, 0.0) for term in result.keys(): if str(term) == "dog": p_dog = result[term] elif str(term) == "cat": p_cat = result[term] else: raise KeyError("Unknown key encountered!") return p_cat, p_dog def main(): print("Starting Abduction Demo") labeled_images = load_data() labeled_image = labeled_images[1] if TESTING == 1: reasoning_description = Path(".tmp-data/reasoning_description").open("r").read() coding_description = Path(".tmp-data/coding_description").open("r").read() grounding_results = Path(".tmp-data/grounding_results").open("r").read() else: reasoning_description = reasoning() with open(".tmp-data/reasoning_description", "w") as f: f.write(reasoning_description) coding_description = coding(reasoning_description) with open(".tmp-data/coding_description", "w") as f: f.write(coding_description) grounding_results = grounding(coding_description, labeled_image) with open(".tmp-data/grounding_results", "w") as f: f.write(grounding_results) print(grounding_results) p_cat, p_dog = execute_logic_program(coding_description, grounding_results) print(f"Cat Probability: {p_cat}") print(f"Dog Probability: {p_dog}") print("End Abduction Demo") if __name__ == "__main__": main()