This commit is contained in:
2024-10-31 15:14:52 +01:00
parent 784488e05b
commit e7910021dd
4 changed files with 331 additions and 235 deletions

View File

@ -192,22 +192,45 @@ class ExtractAgent(BasicAgent):
"""
@validate_call
def store_extracted(self, data:RiddleData, extracted:ExtractedData) -> str:
def store_extracted(self, data:RiddleData, extracted:ExtractedData, allow_overwrite:bool=True) -> str:
"""
Stores the newly extracted data (in `extracted`) from `data` (i.e., `data.file_plain`)
and returns the filename to use in `data.file_extracted`.
If there already exists an extracted file for this `data`, the file will be overwritten if `allow_overwrite=True`.
Generally the system will check, if the contents of the current file are equal to the contents to write.
File with equal content will not be written again.
"""
# get path and name
path_name = data.file_plain[:data.file_plain.rfind('.')]
candidate = "{}.json".format(path_name)
cnt = 0
while os.path.isfile(candidate):
cnt += 1
candidate = "{}-{}.json".format(path_name, cnt)
# data to write
data = extracted.model_dump_json()
# check for file
if os.path.isfile(candidate):
# get current content
with open(candidate, 'r') as f:
content = f.read()
# files equal -> no need to rewrite
if content == data:
return candidate
# not equal and overwrite not allowed
elif not allow_overwrite:
# get non-existent file name
cnt = 0
while os.path.isfile(candidate):
cnt += 1
candidate = "{}-{}.json".format(path_name, cnt)
# write file
with open(candidate, 'w+') as f:
f.write(extracted.model_dump_json())
f.write(data)
return candidate