Voici la documentation : https://geoplateforme.pages.gpf-tech.ign.fr/documentation

Skip to content
Validations sur la source (37)
......@@ -14,19 +14,22 @@ include:
- project: geoplateforme/templates
ref: main
file:
- "/ci/docker-v2.yml"
- "/ci/docker-v4.yml"
- "/ci/dependency-track.yml"
- "/ci/pre-commit-v1.yml"
- "/ci/sonarqube.yml"
# Change pip's cache directory to be inside the project directory since we can
# only cache local items.
variables:
DOCKERHUB_PROXY: "690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub"
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
PIP_DISABLE_PIP_VERSION_CHECK: 1
PIP_EXTRA_INDEX_URL: https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_API_V4_URL}/groups/55/-/packages/pypi/
PIP_PROGRESS_BAR: "off"
PIP_RETRIES: 0
PROJECT_FOLDER: "gpf_check_md5"
PYTHON_MIN_VERSION: "3.10"
# Pip's cache doesn't store the python packages
# https://pip.pypa.io/en/stable/reference/pip_install/#caching
......@@ -41,31 +44,9 @@ cache:
- .cache/pip
# -- LINT JOBS -------------------------------------------------------------------------
git-hooks:
stage: lint
image:
name: 690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub/library/python:3.9
variables:
PRE_COMMIT_HOME: ${CI_PROJECT_DIR}/.cache/pre-commit
cache:
paths:
- ${PRE_COMMIT_HOME}
# only:
# refs:
# - merge_requests
before_script:
- apt install git
- python3 -m pip install -U pip
- python3 -m pip install -U setuptools wheel
- python3 -m pip install -U -r requirements/development.txt
- pre-commit install
- git fetch origin
script:
- pre-commit run --from-ref "origin/$CI_DEFAULT_BRANCH" --to-ref "$CI_COMMIT_SHA";
flake8:
stage: lint
image: 690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub/library/python:3.9-slim-bullseye
image: ${DOCKERHUB_PROXY}/library/python:${PYTHON_MIN_VERSION}-slim-bullseye
only:
changes:
- "**/*.py"
......@@ -82,17 +63,7 @@ sast:
# -- TEST JOBS --------------------------------------------------------------------------
test:
stage: test
image: 690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub/library/python:3.9-slim-bullseye
# filter disabled because sonar job requires it and always runs...
# only:
# changes:
# - "**/*.py"
# - ".gitlab-ci.yml"
# - sonar-project.properties
# refs:
# - main
# - merge_requests
# - tags
image: ${DOCKERHUB_PROXY}/library/python:${PYTHON_MIN_VERSION}-slim-bullseye
before_script:
- python3 -m pip install -U -r requirements.txt
- python3 -m pip install -U -r requirements/testing.txt
......@@ -113,7 +84,7 @@ test:
# -- BUILD JOBS -------------------------------------------------------------------------
build:pip-wheel:
stage: build
image: 690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub/library/python:3.9-slim-bullseye
image: ${DOCKERHUB_PROXY}/library/python:${PYTHON_MIN_VERSION}-slim-bullseye
only:
refs:
- main
......@@ -130,7 +101,7 @@ build:pip-wheel:
build:documentation:licenses:
stage: build
image: 690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub/library/python:3.9-slim-bullseye
image: ${DOCKERHUB_PROXY}/library/python:${PYTHON_MIN_VERSION}-slim-bullseye
only:
refs:
- main
......@@ -150,7 +121,7 @@ build:documentation:licenses:
build:documentation:
stage: build
image: 690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub/library/python:3.9-slim-bullseye
image: ${DOCKERHUB_PROXY}/library/python:${PYTHON_MIN_VERSION}-slim-bullseye
only:
refs:
- main
......@@ -174,7 +145,7 @@ build:documentation:
# -- DEPLOYMENT JOBS -------------------------------------------------------------------
gitlab:pypi:
stage: deploy
image: 690uto8c.gra7.container-registry.ovh.net/gpf-dockerhub/library/python:3.9-slim-bullseye
image: ${DOCKERHUB_PROXY}/library/python:${PYTHON_MIN_VERSION}-slim-bullseye
variables:
GIT_STRATEGY: none
TWINE_PASSWORD: "${CI_JOB_TOKEN}"
......@@ -193,7 +164,26 @@ gitlab:pypi:
- python -m twine check dist/*
- python -m twine upload --verbose dist/*
release_job:
release:prepare:
stage: deploy
allow_failure: true
image:
name: ${DOCKERHUB_PROXY}/alpine/git:latest
entrypoint: [""]
script:
- echo -e '## Changelog\n' >> RELEASE_DESCRIPTION.md
- git tag -l -n9 $CI_COMMIT_TAG >> RELEASE_DESCRIPTION.md
- echo -e '\n### Merges\n' >> RELEASE_DESCRIPTION.md
- git log --merges --pretty="- %s (%h)" $(git tag --sort=-creatordate | head -2)...$(git tag --sort=-creatordate | head -1) >> RELEASE_DESCRIPTION.md
- echo -e '\n### AUTHORS\n' >> RELEASE_DESCRIPTION.md
- git log --pretty="- %an%n- %cn" $(git tag --sort=-creatordate | head -2)...$(git tag --sort=-creatordate | head -1) | sort | uniq >> RELEASE_DESCRIPTION.md
artifacts:
paths:
- RELEASE_DESCRIPTION.md
only:
- tags
release:publish:
stage: deploy
image: registry.gitlab.com/gitlab-org/release-cli:latest
variables:
......@@ -205,11 +195,14 @@ release_job:
artifacts: true
- job: gitlab:pypi
artifacts: false
- job: release:prepare
artifacts: true
script:
- echo "Creating release from $CI_COMMIT_TAG"
release: # See https://docs.gitlab.com/ee/ci/yaml/#release for available properties
description: RELEASE_DESCRIPTION.md
name: $CI_COMMIT_TAG
tag_name: "$CI_COMMIT_TAG"
description: "$CI_COMMIT_TAG"
pages:
stage: deploy
......
......@@ -6,6 +6,8 @@ repos:
hooks:
- id: check-added-large-files
args: ["--maxkb=500"]
- id: check-ast
- id: check-builtin-literals
- id: check-case-conflict
- id: check-toml
- id: check-yaml
......@@ -14,13 +16,29 @@ repos:
- id: fix-byte-order-marker
- id: fix-encoding-pragma
args: [--remove]
- id: name-tests-test
args: [--pytest-test-first]
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.275"
hooks:
- id: ruff
args: ["--fix-only", "--target-version=py310"]
- repo: https://github.com/asottile/pyupgrade
rev: v3.7.0
hooks:
- id: pyupgrade
args:
- "--py310-plus"
- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.3.0
hooks:
- id: black
args: ["--target-version=py310"]
- repo: https://github.com/pycqa/isort
rev: 5.12.0
......@@ -32,13 +50,15 @@ repos:
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies: ["flake8-docstrings<2"]
args:
[
"--config=setup.cfg",
"--select=E9,F63,F7,F82,D103",
"--select=E9,F401,F63,F7,F82,D103",
"--docstring-convention=google",
]
ci:
autoupdate_schedule: quarterly
skip: []
submodules: false
autoupdate_schedule: quarterly
skip: []
submodules: false
......@@ -15,6 +15,13 @@ Unreleased
### Removed
-->
## 2.3.0 - 2023-06-27
- increase verbosity for end-users
- bump dependencies and dev tooling
- modernize CI
## 2.2.0 - 2023-05-12
- Update toolbelt
......@@ -24,12 +31,11 @@ Unreleased
### Added
- S3 parameters in CLI
### Changed
- Files are downloadable from S3 bucket
### Removed
## 2.0.1 - 2023-03-30
- fixe majuscules dans le hash MD5
......@@ -56,7 +62,6 @@ Unreleased
- Wrong version, do not use
## 0.6.0 - 2022-12-20
- Rename main folder into package name to avoid conflict
......
......@@ -177,6 +177,7 @@ ogp_custom_meta_tags = [
f'<meta property="twitter:title" content="{project}" />',
]
# -- Options for Sphinx API doc ----------------------------------------------
# run api doc
def run_apidoc(_):
......
......@@ -4,7 +4,7 @@
### Requirements
- Python >= 3.9
- Python >= 3.10
- Network access granted to:
- the GitLab instance
- <https://pypi.org>
......
......@@ -19,7 +19,7 @@ __all__ = [
]
__author__ = "Oslandia"
__copyright__ = "2022 - {0}, {1}".format(date.today().year, __author__)
__copyright__ = f"2022 - {date.today().year}, {__author__}"
__email__ = "info@oslandia.com"
__executable_name__ = "gpf-md5-checker"
__package_name__ = "gpf_md5_checker"
......@@ -40,7 +40,7 @@ __uri_repository__ = (
__uri_tracker__ = f"{__uri_repository__}issues/"
__uri__ = __uri_repository__
__version__ = "2.2.0"
__version__ = "2.3.0"
__version_info__ = tuple(
[
int(num) if num.isdigit() else num
......
......@@ -6,7 +6,6 @@
import argparse
from os import environ, getenv
from pathlib import Path
from typing import List
# 3rd party
from gpf_entrepot_toolbelt.orchestrator.check_livraison import check_livraison_structure
......@@ -34,7 +33,7 @@ from gpf_check_md5.md5sum import run
# ################################
def main(argv: List[str] = None):
def main(argv: list[str] = None):
"""Main CLI entrypoint."""
# create the top-level parser
parser = argparse.ArgumentParser(
......@@ -168,10 +167,12 @@ def main(argv: List[str] = None):
# -- RUN LOGIC --
# check livraison
logger.info("Vérification de la structure de la livraison")
if not check_livraison_structure(
work_dir_path=args.work_dir_path,
upload_dir_name=args.upload_dir_name,
in_json_file_name=args.input_configuration_file_name,
upload_dir_required=False,
):
error_message = (
"Upload (aka livraison) doesn't complies with the expected structure."
......@@ -185,6 +186,7 @@ def main(argv: List[str] = None):
package_exec_name=__executable_name__,
)
# open configuration file
logger.info("Ouverture du fichier de configuration")
try:
parameters = GpfOrchestratorParameters.from_json(
Path(args.work_dir_path, args.input_configuration_file_name)
......@@ -206,7 +208,7 @@ def main(argv: List[str] = None):
# Download files in upload folder
try:
upload_dir_path: Path = args.work_dir_path / args.upload_dir_name
logger.debug(f"Téléchargement des fichiers dans le dossier {upload_dir_path}")
logger.info(f"Téléchargement des fichiers dans le dossier {upload_dir_path}")
parameters.import_input_files(upload_dir_path)
except Exception as error:
error_message = "Download files into upload folder failed." f"Trace: {error}"
......@@ -225,18 +227,15 @@ def main(argv: List[str] = None):
parameters=parameters,
upload_dir_name=args.upload_dir_name,
)
parameters.set_pipeline_status(__executable_name__, run_result[0])
if run_result[0] != Status.SUCCESS:
logger.error(
f"Running {__title__} (version {__version__}) failed. Trace: {run_result[1]}"
)
parameters.set_pipeline_status(__executable_name__, run_result)
except Exception as error:
logger.error(
logger.user_error(
f"Running {__title__} (version {__version__}) failed. Trace: {error}"
)
parameters.set_pipeline_status(__executable_name__, Status.TECHNICAL_ERROR)
# create output file
logger.info(f"Ecriture d'un fichier de sortie dans {args.work_dir_path}")
parameters.create_output_file(args.work_dir_path)
......
......@@ -5,13 +5,12 @@
# standard lib
from argparse import ArgumentTypeError
from pathlib import Path
from typing import Union
# 3rd party
from gpf_entrepot_toolbelt.utils.check_path import check_path
def arg_type_path_folder(input_path: Union[Path, str]) -> Path:
def arg_type_path_folder(input_path: Path | str) -> Path:
"""Check an argparse argument type, expecting a valid folder path.
Args:
......
......@@ -11,7 +11,6 @@ import logging
import os
from collections import namedtuple
from pathlib import Path
from typing import Tuple
# package
from gpf_entrepot_toolbelt.orchestrator.models import GpfOrchestratorParameters
......@@ -41,6 +40,7 @@ def generate_md5_sum(filename: str, chunksize: int = 8192) -> str:
Postconditions:
Retourne un hash md5 (chaîne hexadécimale de 32 caractères)
"""
logger.user_info(f"Génère un hash md5 pour le fichier : {filename}")
with open(filename, "rb") as f:
file_hash = hashlib.md5()
while chunk := f.read(chunksize):
......@@ -62,7 +62,7 @@ def validate(filename: str, md5digest: str, chunksize: int = 8192) -> Status:
if not check_path(
input_path=filename, must_be_a_file=True, must_exists=True, raise_error=False
):
logger.error(
logger.user_error(
f"{filename}: TECHNICAL ERROR: le fichier {filename} n'existe pas."
)
return Status.TECHNICAL_ERROR
......@@ -73,7 +73,7 @@ def validate(filename: str, md5digest: str, chunksize: int = 8192) -> Status:
return status_return
def check_md5_file(filename: Path, status: dict, chunksize: int = 8192) -> int:
def check_md5_file(filename: Path, chunksize: int = 8192) -> int:
"""Vérifie un fichier *.md5.
Ce genre de fichier est classiquement géneré par l'utilitaire
......@@ -91,17 +91,17 @@ def check_md5_file(filename: Path, status: dict, chunksize: int = 8192) -> int:
result = 0
try:
with open(filename, "r") as checksum_file:
resultList = list()
with open(filename) as checksum_file:
li_results = []
for line in checksum_file:
line = line.strip()
if len(line) <= 32:
# TODO: Quel log on utilise pour cela ?
logger.error(
f"TECHNICAL ERROR: la ligne {line} n'est pas conforme."
logger.user_error(
f"FAILURE : la longueur de la ligne {line} du fichier "
f"{filename} n'est pas conforme : {len(line)}<=32"
)
result |= Status.TECHNICAL_ERROR.value
result |= Status.FAILURE.value
continue
checksum = line[:32]
......@@ -110,14 +110,13 @@ def check_md5_file(filename: Path, status: dict, chunksize: int = 8192) -> int:
ret = validate(
filename=sourceFilename, md5digest=checksum, chunksize=chunksize
)
resultDict = namedtuple("Result", ["filename", "status"])
result_obj = namedtuple("Result", ["filename", "status"])
result |= ret.value
resultList.append(resultDict(sourceFilename, ret.name))
status[str(filename)] = resultList
except EnvironmentError as e:
# TODO: gestion erreurs si on ne peut pas :
# où que ça va ?
logger.error(f"TECHNICAL ERROR: {e}")
li_results.append(result_obj(sourceFilename, ret.name))
except OSError as err:
logger.user_error(
f"TECHNICAL ERROR : impossible de traiter les fichiers/ Trace : {err}"
)
exit(os.EX_IOERR)
return result if result < 2 else 2
......@@ -128,7 +127,7 @@ def run(
parameters: GpfOrchestratorParameters,
upload_dir_name: str = "upload",
chunk_size: int = 8192,
) -> Tuple[Status, dict]:
) -> Status:
"""Main function running the logic.
Args:
......@@ -151,49 +150,28 @@ def run(
Status: SUCCESS \
- FAILURE: at least one md5 error calculation \
- TECHNICAL_ERROR: at least one technical error.
dict : check descriptions for each file
"""
# checks
if not check_path(
input_path=work_dir,
must_be_a_folder=True,
must_exists=True,
must_be_readable=True,
raise_error=False,
):
raise ValueError(f"work_dir must be a valid folder path. {work_dir} is not.")
if not isinstance(upload_dir_name, str):
raise TypeError(f"work_dir must be a str instance, not {type(work_dir)}.")
# variables
upload_dir_path: Path = work_dir / upload_dir_name
result: int = 0
status = dict()
# check upload
if not upload_dir_path.is_dir():
raise ValueError(
f"The upload folder must be a valid folder path. {upload_dir_path.resolve()} is not."
)
if not upload_dir_path.exists():
raise FileExistsError(
f"The upload folder doesn't exist: {upload_dir_path.resolve()}."
)
for upload in parameters.input_uploads:
with os.scandir(upload_dir_path / upload._id) as it:
for entry in it:
if entry.name.endswith(".md5") and entry.is_file():
logger.debug(f"Traitement de {entry.name} :")
result |= check_md5_file(
result_check = check_md5_file(
filename=Path(upload_dir_path) / upload._id / entry.name,
status=status,
chunksize=chunk_size,
)
logger.user_info(
f"Vérification de {entry.name} : {Status(result_check).name}"
)
result |= result_check
result = result if result < 2 else 2 # Le résultat vaut 2 au maximum
return Status(result), status
result_status = Status(result)
logger.user_info(f"Résultat global de la vérification : {result_status.name}")
return result_status
# -- Stand alone execution
......
......@@ -3,9 +3,9 @@
black
flake8>=5,<5.1
flake8-bugbear>=22,<23
flake8-bugbear>=23.3,<24
flake8-builtins>=2,<3
flake8-docstrings>=1.6,<1.7
flake8-docstrings>=1.6,<2
flake8-eradicate>=1,<2
flake8-isort>=5,<6
pre-commit>=2.20,<3.1
flake8-isort>=5,<7
pre-commit>=3,<4
......@@ -60,7 +60,6 @@ setup(
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Operating System :: OS Independent",
......
......@@ -7,7 +7,7 @@ sonar.projectKey=geoplateforme_scripts-verification_check-md5
# only=main
# Python versions
sonar.python.version=3.9, 3.10, 3.11
sonar.python.version=3.10, 3.11
# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.
sonar.sources=gpf_check_md5
......
......@@ -35,6 +35,11 @@ bucket_name = "upload-test-check-md5"
@mock_s3
def s3_upload_stored_data(id):
"""Mock upload to stored data.
Args:
id (_type_): _description_
"""
s3 = boto3.client("s3")
s3.create_bucket(Bucket=bucket_name)
input_data_path = f"tests/fixtures/data/{id}"
......@@ -66,7 +71,7 @@ def test_cli_run_simple(capsys):
out, err = capsys.readouterr()
assert out == ""
assert err == ""
assert err != ""
@mock_s3
......@@ -113,7 +118,7 @@ def test_cli_run_env_vars(capsys):
out, err = capsys.readouterr()
assert out == ""
assert err == ""
assert err != ""
@pytest.mark.skip(reason="Ignorer les tests sur le modèle V1")
......
......@@ -17,14 +17,13 @@ from collections import namedtuple
from pathlib import Path
from gpf_entrepot_toolbelt.orchestrator.models import GpfOrchestratorParameters
from gpf_entrepot_toolbelt.orchestrator.status import Status
from gpf_entrepot_toolbelt.utils.gpf_logger import gpf_logger_script
# project
from gpf_check_md5 import md5sum
from gpf_check_md5.__about__ import __title_clean__
# ############################################################################
# ########## Classes #############
# ################################
logger = gpf_logger_script(verbosity=0, title=__title_clean__)
class TestMD5(unittest.TestCase):
......@@ -73,27 +72,10 @@ class TestMD5(unittest.TestCase):
def test_check_md5_file(self):
"""Test validate md5 file."""
status = dict()
resultDict = namedtuple("Result", ["filename", "status"])
namedtuple("Result", ["filename", "status"])
ret = md5sum.check_md5_file(
Path("tests/fixtures/data/valid_upload/all.md5"),
status,
)
ret_status = {
Path("tests/fixtures/data/valid_upload/all.md5"): [
resultDict(
filename=Path("./tests/fixtures/livraisons/good/default/md5.txt"),
status="SUCCESS",
),
resultDict(
filename=Path(
"./tests/fixtures/livraisons/good/default/oslandia.txt"
),
status="SUCCESS",
),
]
}
self.assertTrue(status, ret_status)
self.assertEqual(ret, md5sum.Status.SUCCESS.value)
def test_script_run_ok(self):
......@@ -107,9 +89,7 @@ class TestMD5(unittest.TestCase):
upload_dir_name="upload",
parameters=parameters,
)
self.assertIsInstance(ret, tuple)
self.assertEqual(ret[0], md5sum.Status.SUCCESS)
self.assertIsInstance(ret[1], dict)
self.assertEqual(ret, md5sum.Status.SUCCESS)
def test_script_run_ko(self):
"""Test case where main script should raise an error."""
......@@ -124,30 +104,8 @@ class TestMD5(unittest.TestCase):
with self.assertRaises(TypeError):
md5sum.run(parameters=parameters, work_dir="./tests")
# work_dir is not a valid folder path
with self.assertRaises(ValueError):
md5sum.run(parameters=parameters, work_dir=Path("/imaginary/folder/path/"))
# -- Upload dir name
# upload_dir_name is not a str instance
with self.assertRaises(TypeError):
md5sum.run(
parameters=parameters,
work_dir=Path("./tests"),
upload_dir_name=Path("./tests/assets"),
)
# upload_dir_name is not a valid subfolder of workdir
with self.assertRaises(ValueError):
md5sum.run(
parameters=parameters,
work_dir=Path("./tests/"),
upload_dir_name="hohoho",
)
# Technical error: file not found
status = dict()
status = {}
with self.assertRaises(SystemExit) as exc:
md5sum.check_md5_file(
Path("./tests/fixtures/livraisons/good/default/failed_all"), status
......