Fix #1
All checks were successful
Build and push Docker image at git tag / build (push) Successful in 55s

This commit is contained in:
2024-10-31 16:25:37 +01:00
parent e7910021dd
commit cc4bb9a7e8
13 changed files with 1157 additions and 989 deletions

View File

@ -120,6 +120,6 @@ class WebMain():
return msg
if __name__ == "ums.management.main" and os.environ.get('SERVE', 'false') == 'true':
if __name__ == "ums.management.main" and os.environ.get('SERVE', 'false').lower() == 'true':
main = WebMain()
app = main.app

View File

@ -15,13 +15,15 @@ import requests
from fastapi import BackgroundTasks
from ums.management.db import DB
from ums.utils import AgentMessage, AgentResponse, logger
from ums.utils import AgentMessage, AgentResponse, logger, RiddleData
class MessageProcessor():
SOLUTION_MAX_TRIALS = int(os.environ.get('SOLUTION_MAX_TRIALS', 5))
MESSAGE_MAX_CONTACTS = int(os.environ.get('MESSAGE_MAX_CONTACTS', 100))
REQUIRE_FULL_EXTRACT = os.environ.get('REQUIRE_FULL_EXTRACT', 'false').lower() == 'true'
MANAGEMENT_URL = os.environ.get('MANAGEMENT_URL', 'http://127.0.0.1:80').strip().strip('/')
AGENTS_PROCESS = tuple(map(
@ -79,12 +81,23 @@ class MessageProcessor():
# do not process processed messages again
return
# now message processed!
self.db.set_processed(count=count, processed=True)
# increment contacts counter
db_message.message.contacts += 1
if db_message.message.contacts > self.MESSAGE_MAX_CONTACTS:
logger.warning(f"Message reached max number of contacts! {db_message.message.id}, {count}")
return
# combine different extractions in data items
# will update items in `db_message.message.data`
fully_extracted = self._add_extractions(db_message.message.id, db_message.message.data)
if (self.REQUIRE_FULL_EXTRACT and not fully_extracted) \
and not (db_message.message.status.extract.required and not db_message.message.status.extract.finished):
logger.warning(f"Postpone message, wait for full extract of items! {db_message.message.id}, {count}")
return
# check which step/ state the message requires the management to do
if db_message.message.status.extract.required and not db_message.message.status.extract.finished:
# send to extract agents
@ -114,8 +127,41 @@ class MessageProcessor():
# try again
self._do_again(db_message.message)
# now message processed!
self.db.set_processed(count=count, processed=True)
def _hash_data(self, d:RiddleData) -> int:
return hash((d.file_plain, d.type, d.prompt))
def _add_extractions(self, riddle_id:str, data:List[RiddleData]) -> bool:
# get all the data items without extraction
empty_data = {}
for i, d in enumerate(data):
if d.file_extracted is None:
empty_data[self._hash_data(d)] = i
# search db for extractions already available
for row in self.db.iterate(
id=riddle_id,
limit=min(self.db.len(id=riddle_id), 250)
):
# check for required extraction
for d in row.message.data:
# already extracted ?
# extraction file exists ?
# one of the items, we do not have extractions for ?
# the same data item ?
if not d.file_extracted is None \
and not d.file_extracted.startswith("missing:") \
and self._hash_data(d) in empty_data:
# copy the reference to the extracted data
data[empty_data[self._hash_data(d)]].file_extracted = d.file_extracted
# remove from items we need extracted data for
del empty_data[self._hash_data(d)]
# break if all extractions found
if len(empty_data) == 0:
break
return len(empty_data) == 0 # fully extracted
def _do_again(self, message:AgentMessage):
if message.status.trial < self.SOLUTION_MAX_TRIALS: