Voici la documentation : https://geoplateforme.pages.gpf-tech.ign.fr/documentation

Skip to content
Validations sur la source (3)
......@@ -15,6 +15,15 @@ Unreleased
### Removed
-->
## 2.6.0 - 2024-01-19
### Changed
- Use Toolbelt 1.8.1 using directly uploads on FILESYSTEM
- Specific working to deal with both S3 and FILESYSTEM uploads
- Adapt some logs (french + user/admin)
## 2.5.1 - 2023-12-21
### Added
......@@ -27,6 +36,12 @@ Unreleased
- Use Toolbelt 1.7.1 for filesystem storage
## 2.4.2 - 2023-12-07
- Use Toolbelt 1.7.0 using directly uploads on FILESYSTEM
- Specific working to deal with both S3 and FILESYSTEM uploads
- Adapt some logs (french + user/admin)
## 2.4.1 - 2023-07-26
- Use Toolbelt 0.19.6 for OpenIO fix
......
......@@ -40,7 +40,7 @@ __uri_repository__ = (
__uri_tracker__ = f"{__uri_repository__}issues/"
__uri__ = __uri_repository__
__version__ = "2.5.1"
__version__ = "2.6.0"
__version_info__ = tuple(
[
int(num) if num.isdigit() else num
......
......@@ -33,13 +33,31 @@ from gpf_check_md5.core import run
# ################################
def fail_and_exit_with_message(work_dir: Path, message: str):
"""
Fail and exit process with a specified message
Args:
work_dir (Path): the current work directory
message (str): The specific message to send
"""
fail_and_exit(
work_dir=work_dir,
error_message=message,
package_title=__title_clean__,
package_version=__version__,
package_exec_name=__executable_name__,
)
def main(argv: list[str] = None):
"""Main CLI entrypoint."""
# create the top-level parser
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=f"{__cli_usage__}\n\n"
f"Développé avec \u2764\uFE0F par {__author__}\n"
f"Développé par {__author__}\n"
f"Documentation : {__uri_homepage__}",
description=f"{__title__} {__version__} - {__summary__}",
)
......@@ -140,9 +158,6 @@ def main(argv: list[str] = None):
# -- PARSE PASSED ARGUMENTS --
# get passed args and force print help if none
# args = parser.parse_args(None if sys.argv[1:] else ["-h"])
# just get passed args
args = parser.parse_args(argv)
......@@ -174,17 +189,12 @@ def main(argv: list[str] = None):
in_json_file_name=args.input_configuration_file_name,
upload_dir_required=False,
):
error_message = (
"Upload (aka livraison) doesn't complies with the expected structure."
f"{args.work_dir_path}. Check the job logs for more details."
)
fail_and_exit(
work_dir=args.work_dir_path,
error_message=error_message,
package_title=__title_clean__,
package_version=__version__,
package_exec_name=__executable_name__,
fail_and_exit_with_message(
args.work_dir_path,
"La livraison ne respecte pas la structure attendue. "
f"Lire le logs du job pour plus d'informations ({args.work_dir_path})",
)
# open configuration file
logger.info("Ouverture du fichier de configuration")
try:
......@@ -192,45 +202,42 @@ def main(argv: list[str] = None):
Path(args.work_dir_path, args.input_configuration_file_name)
)
except Exception as error:
error_message = (
"Reading the input configuration file "
f"({args.work_dir_path}/{args.input_configuration_file_name} failed. "
f"Trace: {error}"
)
fail_and_exit(
work_dir=args.work_dir_path,
error_message=error_message,
package_title=__title_clean__,
package_version=__version__,
package_exec_name=__executable_name__,
fail_and_exit_with_message(
args.work_dir_path,
"Lecture du fichier de configuration "
f"({args.work_dir_path}/{args.input_configuration_file_name} en erreur. "
f"Trace: {error}",
)
# Download files in upload folder
paths: dict = {}
failures: list[str] = []
try:
upload_dir_path: Path = args.work_dir_path / args.upload_dir_name
logger.info(f"Téléchargement des fichiers dans le dossier {upload_dir_path}")
parameters.import_input_files(upload_dir_path)
logger.user_info("Récupère les fichiers de la livraison")
upload_dir_path: Path = args.work_dir_path.joinpath(args.upload_dir_name)
paths, failures = parameters.retrieve_input_files(upload_dir_path)
except Exception as error:
error_message = "Download files into upload folder failed." f"Trace: {error}"
fail_and_exit(
work_dir=args.work_dir_path,
error_message=error_message,
package_title=__title_clean__,
package_version=__version__,
package_exec_name=__executable_name__,
fail_and_exit_with_message(
args.work_dir_path,
f"Récupération des fichiers de la livraison impossible. Trace: {error}",
)
if len(failures) != 0:
fail_and_exit_with_message(
args.work_dir_path,
f"Récupération des fichiers de la livraison impossible. Trace: {failures}",
)
# Run
try:
run_result = run(
work_dir=args.work_dir_path,
parameters=parameters,
upload_dir_name=args.upload_dir_name,
upload_dir_paths=paths,
)
parameters.set_pipeline_status(__executable_name__, run_result)
except Exception as error:
logger.user_error(
f"Running {__title__} (version {__version__}) failed. Trace: {error}"
logger.error(
f"Exécution {__title__} (version {__version__}) en erreur. Trace: {error}"
)
parameters.set_pipeline_status(__executable_name__, Status.TECHNICAL_ERROR)
......
......@@ -2,7 +2,6 @@
"""Main module logic."""
# -- IMPORTS
# standard lib
......@@ -11,11 +10,14 @@ import logging
import os
from pathlib import Path
# package
# 3rd party
from gpf_entrepot_toolbelt.orchestrator.models import GpfOrchestratorParameters
from gpf_entrepot_toolbelt.orchestrator.status import Status
from gpf_entrepot_toolbelt.utils.check_path import check_path
# package
from gpf_check_md5.__about__ import __title_clean__, __version__
# -- GLOBALS
# logs
......@@ -60,9 +62,7 @@ def validate(filename: str, md5digest: str, chunksize: int = 8192) -> Status:
if not check_path(
input_path=filename, must_be_a_file=True, must_exists=True, raise_error=False
):
logger.user_error(
f"{filename}: TECHNICAL ERROR: le fichier {filename} n'existe pas."
)
logger.user_error(f"Le fichier {filename} n'existe pas.")
return Status.TECHNICAL_ERROR
result = generate_md5_sum(filename, chunksize).lower() == md5digest.lower()
......@@ -129,46 +129,62 @@ def check_md5_file(filename: Path, chunksize: int = 8192) -> int:
def run(
work_dir: Path,
parameters: GpfOrchestratorParameters,
upload_dir_name: str = "upload",
upload_dir_paths: dict,
chunk_size: int = 8192,
) -> Status:
"""Main function running the logic.
Args:
work_dir (Path): Input working directory. The folder must exist.
parameters (GpfOrchestratorParameters): parameters read from input configuration\
file.
upload_dir_name (str, optional): Name (not the path) of the upload directory. \
Defaults to "upload".
parameters (GpfOrchestratorParameters): parameters read from input configuration file.
upload_dir_paths (dict): Dict of (_id, path) with path where data for upload _id are
chunk_size (int, optional): Size of the chunk-data in octets to load in memory. \
Defaults to 8192.
Returns:
Status: SUCCESS \
- FAILURE: at least one md5 error calculation \
- TECHNICAL_ERROR: at least one technical error.
Status: Status of the upload check
"""
# variables
upload_dir_path: Path = work_dir / upload_dir_name
result: int = 0
for upload in parameters.input_uploads:
with os.scandir(upload_dir_path / upload._id) as it:
for entry in it:
if entry.name.endswith(".md5") and entry.is_file():
result_check = check_md5_file(
filename=Path(upload_dir_path) / upload._id / entry.name,
chunksize=chunk_size,
)
logger.user_info(
f"Vérification de {entry.name} : {Status(result_check).name}"
)
result |= result_check
# getting upload directory
upload = parameters.input_uploads.pop(0)
upload_id: str = upload._id
if upload_id not in upload_dir_paths:
logger.error(f"Impossible d'accéder au donnée de l'upload {upload_id}")
return Status.TECHNICAL_ERROR
upload_dir: Path = upload_dir_paths[upload_id]
# checks
if not check_path(
input_path=upload_dir,
must_be_a_folder=True,
must_exists=True,
must_be_readable=True,
raise_error=False,
):
raise ValueError(
f"Le repertoire cible doit être valide. {upload_dir} ne l'est pas."
)
# RUN
logger.user_info(
f"Vérification {__title_clean__} ({__version__}) pour la livraison {upload_id} "
)
with os.scandir(upload_dir) as it:
for entry in it:
if entry.name.endswith(".md5") and entry.is_file():
result_check = check_md5_file(
filename=upload_dir.joinpath(entry.name),
chunksize=chunk_size,
)
logger.user_info(
f"Vérification de {entry.name} : {Status(result_check).name}"
)
result |= result_check
result = result if result < 2 else 2 # Le résultat vaut 2 au maximum
result_status = Status(result)
result_status = Status(result) if result < 2 else Status.TECHNICAL_ERROR
logger.user_info(f"Résultat global de la vérification : {result_status.name}")
return result_status
......
# change index-url for next lines
--extra-index-url https://gitlab.gpf-tech.ign.fr/api/v4/groups/55/-/packages/pypi/simple
gpf-entrepot-toolbelt==1.7.1
gpf-entrepot-toolbelt==1.8.1
872ac1a55fab1122f3b369ee9cd31549 md5.txt
b5871a318190397c5878ff2bd9f326d3 oslandia.tx
b5871a318190397c5878ff2bd9f326d3 oslandia.txt
772ac1a55fab1122f3b369ee9cd31549 subdir/md5.txt
b5871a318190397c5878ff2bd9f326d3 subdir/oslandia.txt
772ac1a55fab1122f3b369ee9cd31549 subdir/md5.txt
b5871a318190397c5878ff2bd9f326d3 subdir/oslandia.txt
772ac1a55fab1122f3b369ee9cd31549 md5.txt
b5871a318190397c5878ff2bd9f326d3 oslandia.txt
{
"executionId": "1231544456-1546546-164565",
"userId": "1546546231-1231544-164565",
"inputParameters": {
"parameter1_str": "string_value",
"parameter2_int": 10000
},
"targetParameters": {
"databaseInfo": {
"url": "",
"user": "gpf-ro-user",
"password": "use-me-i-m-read-only",
"schema": ""
},
"objectStorageInfo": {
"url": "s3://gpf/datasets/",
"token": "AQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT+FvwqnKwRcOIfrRh3c/LTo6UDdyJwOOvEVPvLXCrrrUtdnniCEXAMPLE/IvU1dYUg2RVAJBanLiHb4IgRmpRV3zZ3CYWFXG8C5zqx37wnOE49mRl/+OtkIKGO7fAE"
}
},
"technicalParameters": {
"databaseInfo": {
"url": "",
"user": "",
"password": "",
"schema": ""
}
}
}
c292924b183497010f77e1d5bfb4c4b1 file.md5
\ No newline at end of file
{"_id": "1231544456-1546546-164565", "global_variables": {"postgresql": {"pass": "", "user": ""}, "swift": {"auth_url": "", "identity_api_version": "", "password": "", "project_domain_name": "", "region_name": "", "tenant_id": "", "tenant_name": "", "user_domain_name": "", "username": ""}}, "inputs": {"stored_datas": [], "uploads": [{"_id": "invalid_upload", "extent": {"east": 0, "north": 0, "south": 0, "west": 0}, "name": "", "size": 0, "srs": "", "storage": {"_id": "string", "name": "string", "type": "S3", "type_infos": {"pot_name": "upload-test-check-md5"}}, "type": "stringEnum(uploadType)", "type_infos": {}}]}, "job_name": "", "output": null, "parameters": [{"name": "", "value": ""}], "pipeline_status": {"gpf-md5-checker": "SUCCESS", "job_name1": "SUCCESS", "job_name2": "FAILURE"}}
\ No newline at end of file
{"_id": "1231544456-1546546-164565", "global_variables": {"postgresql": {"pass": "", "user": ""}, "swift": {"auth_url": "", "identity_api_version": "", "password": "", "project_domain_name": "", "region_name": "", "tenant_id": "", "tenant_name": "", "user_domain_name": "", "username": ""}}, "inputs": {"stored_datas": [], "uploads": [{"_id": "valid_upload", "extent": null, "name": "", "size": 0, "srs": "", "storage": {"_id": "string", "name": "string", "type": "S3", "type_infos": {"pot_name": "upload-test-check-md5"}}, "type": "stringEnum(uploadType)", "type_infos": {}}]}, "job_name": "", "output": null, "parameters": [{"name": "", "value": ""}], "pipeline_status": {"gpf-md5-checker": "SUCCESS", "job_name1": "SUCCESS", "job_name2": "FAILURE"}}
\ No newline at end of file