abduction-demo/main_xai.py

from pathlib import Path

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from problog import get_evaluatable
from problog.program import PrologString

from src.data import LabeledImage, load_data
from src.img_utils import encode_base64_resized

TESTING = 1

EPSILON_PROB = 0.01


def reasoning():
    template_reasoning = ChatPromptTemplate.from_messages([
        ("system", "{role_reasoning}"),
        ("human", "Question: {question_reasoning}"),
    ])
    model_reasoning = ChatOpenAI(
        model="qwen3-thinking",
        base_url="http://localhost:8080/v1",
    )
    reasoning_chain = template_reasoning | model_reasoning | StrOutputParser()

    description = reasoning_chain.invoke({
        "role_reasoning": "You are a scientific expert in the classification of whether an animal is a cat or a dog. If tasked to answer questions, you shall adhere to scientific facts, think step-by-step, and explain your decision-making process. Focus on 'why' something is done, especially for complex logic, rather than *what* is done. Your answer should be concise and direct, and avoid conversational fillers. Format your answer appropriately for better understanding.",
        "question_reasoning": "I want you to do a comparative analysis of cats and dogs. Your analysis must use the inherent traits and biological characteristics of each species. You should list each of these characteristics so that an informed decision can be made about whether a given animal depicted in an image is a cat or a dog. Please provide a detailed analysis, focusing on traits and characteristics that can be extracted from a given image. For formatting please use a list-like fashion.",
    })
    return description


def coding(description: str):
    template_coding = ChatPromptTemplate.from_messages([
        ("system", "{role_coding}"),
        ("human", "Instructions: {instruction}\n Description: {description}"),
    ])
    model_coding = ChatOpenAI(
        model="qwen3-coder",
        base_url="http://localhost:8080/v1",
    )
    coding_chain = template_coding | model_coding | StrOutputParser()

    coding_description = coding_chain.invoke({
        "role_coding": """You are an expert Prolog programmer with extended knowledge in reasoning and probabilities. Given instructions and a description, you can write a correct Prolog program that expresses the given question as a suitable logical program in Prolog. You shall format your answer so that it can be directly used as an input for a Prolog interpreter. Do not incorporate example facts or queries into the knowledge base; these will be added later by the user. If necessary, add comments to your program to provide explanations to the user. The proposed facts should follow the form:
            - <trait>.
        There shall only be two rules of the following form present at the end:
            - dog :- <trait1>; <trait2>; ...; <traitN>.
            - cat :- <traitA>; <traitB>; ...; <traitX>.
        Make sure to use ';' and not ',' for these two rules!
        Lastly, the following two queries should be added:
            - query(dog).
            - query(cat).
        """,
        "instruction": "Write a logical program for the following description:",
        "description": description,
    })
    return coding_description


def grounding(coding_description: str, labeled_image: LabeledImage):
    model_vl = ChatOpenAI(
        model="minicpm-v-45",
        base_url="http://localhost:8080/v1",
    )

    template_grounding = ChatPromptTemplate.from_messages([
        ("system", "{role_vl}"),
        (
            "human",
            """Instructions: {instruction}

            Description: {description}
            """,
        ),
        ("placeholder", "{image}"),
    ])
    grounding_chain = template_grounding | model_vl | StrOutputParser()

    return grounding_chain.invoke(
        {
            "role_vl": "You are an expert in analyzing images to extract and match features of a given list. First, you look at the list of given features (facts written in Prolog), and then you analyze the given image for these features. If you are uncertain whether a feature matches, please acknowledge this and inform the user, but do not add the feature to the list of matched features. Please follow the user's instructions precisely.",
            "instruction": """You are given a logic program in the following description and an image. Your task is to do the following steps:
                1. Extract the list of features/facts from the given Prolog program that contribute to deciding whether the image is a cat or a dog.
                2. Match only the features that are highlighted in the given image with the features (Prolog facts) you retrieved from the Prolog program. If no highlighting is visible consider the whole image. Give a likelihood, as decimal number, of how sure you are of your match. Print your result in this format:
                    - <likelihood>:<trait1>
                    - <likelihood>:<trait2>
                    - ...
            """,
            "description": coding_description,
            "image": [
                (
                    "human",
                    [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpg;base64,{encode_base64_resized(Path('.tmp-data/dog.jpg'), max_width=512, max_height=512, quality=70)}"
                            },
                        }
                    ],
                )
            ],
        },
    )


def execute_logic_program(coding_description: str, grounding_results: str):
    program = coding_description

    # remove code-block notation
    program = program.split("\n", 1)[-1]
    program = program.rsplit("\n", 2)[0]

    # extract evidence from grounding
    evidence = []
    for grounds in grounding_results.splitlines():
        # e.g., '- 0.95:visible_nose_ride'
        prob, fact = grounds[2:].split(":", maxsplit=2)
        evidence.append((fact, prob))

    # add probabilities
    program_lines = program.splitlines()
    for idx, line in enumerate(program_lines):
        if len(line) <= 1 or line.startswith("%"):
            continue
        else:
            if line.startswith("cat :-") or line.startswith("dog :-") or "query" in line:
                continue
            else:
                matched = False
                for fact, prob in evidence:
                    if fact in line:
                        program_lines[idx] = f"{prob}::" + line
                        matched = True
                if not matched:
                    program_lines[idx] = f"{EPSILON_PROB}::" + line
    program_sanitized = "\n".join(program_lines)

    # evaluate logical program
    print(program_sanitized)
    result = get_evaluatable().create_from(PrologString(program_sanitized)).evaluate()

    # get final probabilities
    p_cat, p_dog = (0.0, 0.0)
    for term in result.keys():
        if str(term) == "dog":
            p_dog = result[term]
        elif str(term) == "cat":
            p_cat = result[term]
        else:
            raise KeyError("Unknown key encountered!")
    return p_cat, p_dog


def main():
    print("Starting Abduction Demo")
    labeled_images = load_data()
    labeled_image = labeled_images[1]

    if TESTING == 1:
        reasoning_description = Path(".tmp-data/reasoning_description").open("r").read()
        coding_description = Path(".tmp-data/coding_description").open("r").read()
        grounding_results = Path(".tmp-data/grounding_results").open("r").read()
    else:
        reasoning_description = reasoning()
        with open(".tmp-data/reasoning_description", "w") as f:
            f.write(reasoning_description)

        coding_description = coding(reasoning_description)
        with open(".tmp-data/coding_description", "w") as f:
            f.write(coding_description)

        grounding_results = grounding(coding_description, labeled_image)
        with open(".tmp-data/grounding_results", "w") as f:
            f.write(grounding_results)
        print(grounding_results)

    p_cat, p_dog = execute_logic_program(coding_description, grounding_results)
    print(f"Cat Probability: {p_cat}")
    print(f"Dog Probability: {p_dog}")
    print("End Abduction Demo")


if __name__ == "__main__":
    main()