Schema Idea
All checks were successful
Build and push Docker image at git tag / build (push) Successful in 44s

This commit is contained in:
2024-10-30 21:07:54 +01:00
parent 786a230e78
commit f2f9819d27
10 changed files with 1452 additions and 674 deletions

View File

@ -1,13 +1,25 @@
# Agenten Plattform
#
# (c) 2024 Magnus Bender
# Institute of Humanities-Centered Artificial Intelligence (CHAI)
# Universitaet Hamburg
# https://www.chai.uni-hamburg.de/~bender
#
# source code released under the terms of GNU Public License Version 3
# https://www.gnu.org/licenses/gpl-3.0.txt
import random
import random, os, json
from abc import abstractmethod, ABC
from enum import Enum
from typing import List, Callable
from pydantic import validate_call
from ums.utils import (
RiddleInformation, AgentMessage, RiddleDataType, RiddleData, Riddle,
RiddleStatus, RiddleSolution,
ExtractedData,
logger
)
@ -34,6 +46,7 @@ class BasicAgent(ABC):
"""
pass
@validate_call
def __init__(self, message:AgentMessage, send_message:Callable[[AgentMessage], bool]):
self._send_message = send_message
self._sub_cnt = 0
@ -57,7 +70,8 @@ class BasicAgent(ABC):
logger.debug(f"Response sent {self._response.id}")
else:
logger.debug(f"Stopped response {self._response.id}")
@validate_call
def before_response(self, response:AgentMessage, send_it:Callable[[], None]) -> bool:
"""
This method is called before the response is sent.
@ -69,12 +83,14 @@ class BasicAgent(ABC):
"""
return True
@validate_call
def message(self) -> AgentMessage:
"""
Get the message this agent object is working on.
"""
return self._message;
@validate_call
def sub_riddle(self,
riddle:Riddle, data:List[RiddleData]=[], status:RiddleStatus=None
) -> AgentMessage|bool:
@ -122,6 +138,21 @@ class BasicAgent(ABC):
"""
pass
@validate_call
def get_extracted(self, data:RiddleData) -> ExtractedData|None:
"""
Loads the extracted data from the `data` item (i.e., from the file `data.file_extracted`).
Returns None if no extracted data found.
"""
if not data.file_extracted is None:
return ExtractedData.model_validate(
json.load(open(data.file_extracted, 'r'))
)
return None
class ExtractAgent(BasicAgent):
"""
An extraction agent.
@ -154,11 +185,33 @@ class ExtractAgent(BasicAgent):
self._response.status.extract.finished = True
@abstractmethod
@validate_call
def handle(self, data:RiddleData) -> RiddleData:
"""
Process the item `data`, create extraction file and return `data` with populated `data.file_extracted`.
"""
@validate_call
def store_extracted(self, data:RiddleData, extracted:ExtractedData) -> str:
"""
Stores the newly extracted data (in `extracted`) from `data` (i.e., `data.file_plain`)
and returns the filename to use in `data.file_extracted`.
"""
path_name = data.file_plain[:data.file_plain.rfind('.')]
candidate = "{}.json".format(path_name)
cnt = 0
while os.path.isfile(candidate):
cnt += 1
candidate = "{}-{}.json".format(path_name, cnt)
with open(candidate, 'w+') as f:
f.write(extracted.model_dump_json())
return candidate
class ExtractTextAgent(ExtractAgent):
"""
An extraction agent for text, create a subclass for your agent.
@ -206,7 +259,8 @@ class SolveAgent(BasicAgent):
self._do_response = True
@abstractmethod
def handle(self, riddle:Riddle, data:RiddleData) -> RiddleSolution:
@validate_call
def handle(self, riddle: Riddle, data: List[RiddleData]) -> RiddleSolution:
"""
Solve the `riddle` using `data` and return a solution.
"""
@ -237,6 +291,7 @@ class GatekeeperAgent(BasicAgent):
self._do_response = True
@abstractmethod
@validate_call
def handle(self, solution:RiddleSolution, riddle:Riddle) -> RiddleSolution:
"""
Check the `solution` of `riddle` and return solution with populated `solution.accepted` and `solution.review`.

View File

@ -8,10 +8,10 @@
# source code released under the terms of GNU Public License Version 3
# https://www.gnu.org/licenses/gpl-3.0.txt
from typing import Callable
from typing import Callable, List
from ums.agent import ExtractAudioAgent, ExtractImageAgent, ExtractTextAgent, SolveAgent, GatekeeperAgent
from ums.utils.types import AgentMessage, Riddle, RiddleData, RiddleSolution, RiddleStatus
from ums.utils import AgentMessage, Riddle, RiddleData, RiddleSolution, RiddleStatus, ExtractedData
"""
Examples for simple agents.
@ -31,6 +31,9 @@ class MyExtractImageAgent(ExtractImageAgent):
def handle(self, data: RiddleData) -> RiddleData:
print("Image Process:", data.file_plain)
extracted = ExtractedData(other={"info":"just a test"})
data.file_extracted = self.store_extracted(data, extracted)
return data
class MyExtractTextAgent(ExtractTextAgent):
@ -46,7 +49,10 @@ class MyExtractTextAgent(ExtractTextAgent):
class MySolveAgent(SolveAgent):
def handle(self, riddle: Riddle, data: RiddleData) -> RiddleSolution:
def handle(self, riddle: Riddle, data: List[RiddleData]) -> RiddleSolution:
for d in data:
print(self.get_extracted(d))
if self.message().id == "test":
status = RiddleStatus()

View File

@ -38,3 +38,8 @@ from ums.utils.types import (
from ums.utils.request import ManagementRequest
from ums.utils.functions import list_shared_data, list_shared_schema
from ums.utils.schema import (
ExtractionSchema,
ExtractedData
)

View File

@ -13,15 +13,73 @@
The types are implemented using [pydantic](https://docs.pydantic.dev/).
It provides validation, allow JSON serialization and works well with [FastAPI](https://fastapi.tiangolo.com/) which is used internally for the http request between the agents and the management.
**This is work in progress!**
"""
from enum import Enum
from typing import List, Any
from typing import List, Any, Dict
from pydantic import BaseModel
class ExtractionSchema(BaseModel):
"""
This is the basic class used as superclass for all extracted information from data items.
"""
For all the `ExtractionSchema` is is required that the data can be serialized to json.
Thus, mostly only default data types like `int, str, bool, list, dict, tuple` also including `ExtractionSchema` and `RiddleInformation` can be used here!
"""
class ExtractedContent(ExtractionSchema):
"""
An extracted content item.
"""
type : str
"""
The type, as a string, the actual string will depend on the extraction agent.
"""
content : str | Any
"""
The extracted content
"""
class ExtractedPositions(ExtractionSchema):
"""
A position (like time, coordinates, ...) where something was extracted (each position should belong to a content item).
"""
type : str
"""
The type, as a string, the actual string will depend on the extraction agent.
"""
position : str | int | Any
"""
The position, will also depend on the extraction agent.
"""
description : str | Any = None
"""
An optional description for more details.
"""
class ExtractedData(ExtractionSchema):
"""
Contains the extracted items from a data file.
"""
contents : List[ExtractedContent] = []
"""
The extracted contents (i.e., transcriptions etc.), each item here should belong a position item at the same index.
"""
positions : List[ExtractedPositions] = []
"""
The positions of extracted contents, each item here should belong a content item at the same index.
"""
other : Dict[str, Any] = {}
"""
Possibly more data. Use a keywords (depending on agent) and store the data there.
"""