189 lines
8.3 KiB
Python
189 lines
8.3 KiB
Python
from pathlib import Path
|
|
|
|
from langchain_core.output_parsers import StrOutputParser
|
|
from langchain_core.prompts import ChatPromptTemplate
|
|
from langchain_openai import ChatOpenAI
|
|
from problog import get_evaluatable
|
|
from problog.program import PrologString
|
|
|
|
from src.data import LabeledImage, load_data
|
|
from src.img_utils import encode_base64_resized
|
|
|
|
TESTING = 1
|
|
|
|
EPSILON_PROB = 0.01
|
|
|
|
|
|
def reasoning():
|
|
template_reasoning = ChatPromptTemplate.from_messages([
|
|
("system", "{role_reasoning}"),
|
|
("human", "Question: {question_reasoning}"),
|
|
])
|
|
model_reasoning = ChatOpenAI(
|
|
model="qwen3-thinking",
|
|
base_url="http://localhost:8080/v1",
|
|
)
|
|
reasoning_chain = template_reasoning | model_reasoning | StrOutputParser()
|
|
|
|
description = reasoning_chain.invoke({
|
|
"role_reasoning": "You are a scientific expert in the classification of whether an animal is a cat or a dog. If tasked to answer questions, you shall adhere to scientific facts, think step-by-step, and explain your decision-making process. Focus on 'why' something is done, especially for complex logic, rather than *what* is done. Your answer should be concise and direct, and avoid conversational fillers. Format your answer appropriately for better understanding.",
|
|
"question_reasoning": "I want you to do a comparative analysis of cats and dogs. Your analysis must use the inherent traits and biological characteristics of each species. You should list each of these characteristics so that an informed decision can be made about whether a given animal depicted in an image is a cat or a dog. Please provide a detailed analysis, focusing on traits and characteristics that can be extracted from a given image. For formatting please use a list-like fashion.",
|
|
})
|
|
return description
|
|
|
|
|
|
def coding(description: str):
|
|
template_coding = ChatPromptTemplate.from_messages([
|
|
("system", "{role_coding}"),
|
|
("human", "Instructions: {instruction}\n Description: {description}"),
|
|
])
|
|
model_coding = ChatOpenAI(
|
|
model="qwen3-coder",
|
|
base_url="http://localhost:8080/v1",
|
|
)
|
|
coding_chain = template_coding | model_coding | StrOutputParser()
|
|
|
|
coding_description = coding_chain.invoke({
|
|
"role_coding": """You are an expert Prolog programmer with extended knowledge in reasoning and probabilities. Given instructions and a description, you can write a correct Prolog program that expresses the given question as a suitable logical program in Prolog. You shall format your answer so that it can be directly used as an input for a Prolog interpreter. Do not incorporate example facts or queries into the knowledge base; these will be added later by the user. If necessary, add comments to your program to provide explanations to the user. The proposed facts should follow the form:
|
|
- <trait>.
|
|
There shall only be two rules of the following form present at the end:
|
|
- dog :- <trait1>; <trait2>; ...; <traitN>.
|
|
- cat :- <traitA>; <traitB>; ...; <traitX>.
|
|
Make sure to use ';' and not ',' for these two rules!
|
|
Lastly, the following two queries should be added:
|
|
- query(dog).
|
|
- query(cat).
|
|
""",
|
|
"instruction": "Write a logical program for the following description:",
|
|
"description": description,
|
|
})
|
|
return coding_description
|
|
|
|
|
|
def grounding(coding_description: str, labeled_image: LabeledImage):
|
|
model_vl = ChatOpenAI(
|
|
model="minicpm-v-45",
|
|
base_url="http://localhost:8080/v1",
|
|
)
|
|
|
|
template_grounding = ChatPromptTemplate.from_messages([
|
|
("system", "{role_vl}"),
|
|
(
|
|
"human",
|
|
"""Instructions: {instruction}
|
|
|
|
Description: {description}
|
|
""",
|
|
),
|
|
("placeholder", "{image}"),
|
|
])
|
|
grounding_chain = template_grounding | model_vl | StrOutputParser()
|
|
|
|
return grounding_chain.invoke(
|
|
{
|
|
"role_vl": "You are an expert in analyzing images to extract and match features of a given list. First, you look at the list of given features (facts written in Prolog), and then you analyze the given image for these features. If you are uncertain whether a feature matches, please acknowledge this and inform the user, but do not add the feature to the list of matched features. Please follow the user's instructions precisely.",
|
|
"instruction": """You are given a logic program in the following description and an image. Your task is to do the following steps:
|
|
1. Extract the list of features/facts from the given Prolog program that contribute to deciding whether the image is a cat or a dog.
|
|
2. Match only the features that are highlighted in the given image with the features (Prolog facts) you retrieved from the Prolog program. If no highlighting is visible consider the whole image. Give a likelihood, as decimal number, of how sure you are of your match. Print your result in this format:
|
|
- <likelihood>:<trait1>
|
|
- <likelihood>:<trait2>
|
|
- ...
|
|
""",
|
|
"description": coding_description,
|
|
"image": [
|
|
(
|
|
"human",
|
|
[
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/jpg;base64,{encode_base64_resized(Path('.tmp-data/dog.jpg'), max_width=512, max_height=512, quality=70)}"
|
|
},
|
|
}
|
|
],
|
|
)
|
|
],
|
|
},
|
|
)
|
|
|
|
|
|
def execute_logic_program(coding_description: str, grounding_results: str):
|
|
program = coding_description
|
|
|
|
# remove code-block notation
|
|
program = program.split("\n", 1)[-1]
|
|
program = program.rsplit("\n", 2)[0]
|
|
|
|
# extract evidence from grounding
|
|
evidence = []
|
|
for grounds in grounding_results.splitlines():
|
|
# e.g., '- 0.95:visible_nose_ride'
|
|
prob, fact = grounds[2:].split(":", maxsplit=2)
|
|
evidence.append((fact, prob))
|
|
|
|
# add probabilities
|
|
program_lines = program.splitlines()
|
|
for idx, line in enumerate(program_lines):
|
|
if len(line) <= 1 or line.startswith("%"):
|
|
continue
|
|
else:
|
|
if line.startswith("cat :-") or line.startswith("dog :-") or "query" in line:
|
|
continue
|
|
else:
|
|
matched = False
|
|
for fact, prob in evidence:
|
|
if fact in line:
|
|
program_lines[idx] = f"{prob}::" + line
|
|
matched = True
|
|
if not matched:
|
|
program_lines[idx] = f"{EPSILON_PROB}::" + line
|
|
program_sanitized = "\n".join(program_lines)
|
|
|
|
# evaluate logical program
|
|
print(program_sanitized)
|
|
result = get_evaluatable().create_from(PrologString(program_sanitized)).evaluate()
|
|
|
|
# get final probabilities
|
|
p_cat, p_dog = (0.0, 0.0)
|
|
for term in result.keys():
|
|
if str(term) == "dog":
|
|
p_dog = result[term]
|
|
elif str(term) == "cat":
|
|
p_cat = result[term]
|
|
else:
|
|
raise KeyError("Unknown key encountered!")
|
|
return p_cat, p_dog
|
|
|
|
|
|
def main():
|
|
print("Starting Abduction Demo")
|
|
labeled_images = load_data()
|
|
labeled_image = labeled_images[1]
|
|
|
|
if TESTING == 1:
|
|
reasoning_description = Path(".tmp-data/reasoning_description").open("r").read()
|
|
coding_description = Path(".tmp-data/coding_description").open("r").read()
|
|
grounding_results = Path(".tmp-data/grounding_results").open("r").read()
|
|
else:
|
|
reasoning_description = reasoning()
|
|
with open(".tmp-data/reasoning_description", "w") as f:
|
|
f.write(reasoning_description)
|
|
|
|
coding_description = coding(reasoning_description)
|
|
with open(".tmp-data/coding_description", "w") as f:
|
|
f.write(coding_description)
|
|
|
|
grounding_results = grounding(coding_description, labeled_image)
|
|
with open(".tmp-data/grounding_results", "w") as f:
|
|
f.write(grounding_results)
|
|
print(grounding_results)
|
|
|
|
p_cat, p_dog = execute_logic_program(coding_description, grounding_results)
|
|
print(f"Cat Probability: {p_cat}")
|
|
print(f"Dog Probability: {p_dog}")
|
|
print("End Abduction Demo")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|