[go: nahoru, domu]

Skip to content

Commit

Permalink
format and more typings [utils][force ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
JaMe76 committed Jun 26, 2024
1 parent 1577c8a commit e2c416a
Show file tree
Hide file tree
Showing 33 changed files with 85 additions and 75 deletions.
2 changes: 0 additions & 2 deletions deepdoctection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,6 @@
logger.debug(LoggingRecord(msg=env_info))




# Direct imports for type-checking
if TYPE_CHECKING:
from .analyzer import *
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/dataflow/custom_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
from tabulate import tabulate
from termcolor import colored

from ..utils.context import timed_operation
from ..utils._types import JsonDict, Pathlike
from ..utils.context import timed_operation
from ..utils.error import FileExtensionError
from ..utils.identifier import get_uuid_from_str
from ..utils.pdf_utils import PDFStreamer
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/datapoint/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import numpy as np
from numpy import uint8

from ..utils._types import PixelValues, JsonDict, Pathlike
from ..utils._types import JsonDict, Pathlike, PixelValues
from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
from ..utils.identifier import get_uuid, is_uuid_like
from ..utils.settings import ObjectTypes, get_type
Expand Down
6 changes: 3 additions & 3 deletions deepdoctection/datapoint/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import numpy as np

from ..utils._types import PixelValues, JsonDict, Pathlike
from ..utils._types import JsonDict, Pathlike, PixelValues
from ..utils.error import AnnotationError, ImageError
from ..utils.logger import LoggingRecord, logger
from ..utils.settings import (
Expand Down Expand Up @@ -646,11 +646,11 @@ def from_image(
if image_dict:
image = Image.from_dict(**image_dict)
layout_ann.image = cls.from_image(
image_orig= image,
image_orig=image,
text_container=text_container,
floating_text_block_categories=floating_text_block_categories,
include_residual_text_container=include_residual_text_container,
base_page=page
base_page=page,
)
layout_ann.base_page = base_page if base_page is not None else page
page.dump(layout_ann)
Expand Down
3 changes: 1 addition & 2 deletions deepdoctection/datasets/instances/publaynet.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,7 @@ def build(self, **kwargs: Union[str, int]) -> DataFlow:
df = SerializerCoco.load(path, max_datapoints=max_datapoints)

# Map
df = MapDataComponent(df, lambda dp: (self.get_workdir() / self.get_split(split) / dp).as_posix(),
"file_name")
df = MapDataComponent(df, lambda dp: (self.get_workdir() / self.get_split(split) / dp).as_posix(), "file_name")
coco_mapper = coco_to_image( # pylint: disable=E1120 # 259
self.categories.get_categories(init=True),
load_image,
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/extern/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union

from ..utils._types import PixelValues, JsonDict, Requirement
from ..utils._types import JsonDict, PixelValues, Requirement
from ..utils.identifier import get_uuid_from_str
from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type

Expand Down
9 changes: 5 additions & 4 deletions deepdoctection/extern/doctrocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
from lazy_imports import try_import

from ..utils._types import PixelValues, Requirement
from ..utils.error import DependencyError
from ..utils.env_info import ENV_VARS_TRUE
from ..utils.error import DependencyError
from ..utils.file_utils import (
get_doctr_requirement,
get_pytorch_requirement,
Expand Down Expand Up @@ -253,7 +253,7 @@ def predict(self, np_img: PixelValues) -> List[DetectionResult]:
:param np_img: image as numpy array
:return: A list of DetectionResult
"""
return doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
return doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)

@classmethod
def get_requirements(cls) -> List[Requirement]:
Expand Down Expand Up @@ -394,8 +394,9 @@ def load_model(
_load_model(path_weights, doctr_predictor, device, lib)

@staticmethod
def build_model(architecture: str, lib: Literal["TF", "PT"],
path_config_json: Optional[str] = None) -> "RecognitionPredictor":
def build_model(
architecture: str, lib: Literal["TF", "PT"], path_config_json: Optional[str] = None
) -> "RecognitionPredictor":
"""Building the model"""

# inspired and adapted from https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/extern/pdftext.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

from lazy_imports import try_import

from ..utils.context import save_tmp_file
from ..utils._types import Requirement
from ..utils.context import save_tmp_file
from ..utils.file_utils import get_pdfplumber_requirement
from ..utils.settings import LayoutType, ObjectTypes
from .base import DetectionResult, PdfMiner
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/extern/tessocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@

from packaging.version import InvalidVersion, Version, parse

from ..utils.context import save_tmp_file, timeout_manager
from ..utils._types import PixelValues, Requirement
from ..utils.context import save_tmp_file, timeout_manager
from ..utils.error import DependencyError, TesseractError
from ..utils.file_utils import _TESS_PATH, get_tesseract_requirement
from ..utils.metacfg import config_to_cli_str, set_config_by_yaml
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/extern/texocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from lazy_imports import try_import

from ..datapoint.convert import convert_np_array_to_b64_b
from ..utils._types import PixelValues, JsonDict, Requirement
from ..utils._types import JsonDict, PixelValues, Requirement
from ..utils.file_utils import get_boto3_requirement
from ..utils.logger import LoggingRecord, logger
from ..utils.settings import LayoutType, ObjectTypes
Expand Down
18 changes: 11 additions & 7 deletions deepdoctection/extern/tp/tfutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from __future__ import annotations

import os
from typing import Optional, Union, ContextManager
from typing import ContextManager, Optional, Union

from lazy_imports import try_import

Expand Down Expand Up @@ -89,13 +89,17 @@ def get_tf_device(device: Optional[Union[str, tf.device]] = None) -> tf.device:
if os.environ.get("USE_CUDA", "False") in ENV_VARS_TRUE:
device_names = [device.name for device in tf.config.list_logical_devices(device_type="GPU")]
if not device_names:
raise EnvironmentError("USE_CUDA is set but tf.config.list_logical_devices cannot find anyx device. "
"It looks like there is an issue with your Tensorlfow installation. "
"You can LOG_LEVEL='DEBUG' to get more information about installation.")
raise EnvironmentError(
"USE_CUDA is set but tf.config.list_logical_devices cannot find anyx device. "
"It looks like there is an issue with your Tensorlfow installation. "
"You can LOG_LEVEL='DEBUG' to get more information about installation."
)
return tf.device(device_names[0])
device_names = [device.name for device in tf.config.list_logical_devices(device_type="CPU")]
if not device_names:
raise EnvironmentError("Cannot find any CPU device. It looks like there is an issue with your "
"Tensorflow installation. You can LOG_LEVEL='DEBUG' to get more information about "
"installation.")
raise EnvironmentError(
"Cannot find any CPU device. It looks like there is an issue with your "
"Tensorflow installation. You can LOG_LEVEL='DEBUG' to get more information about "
"installation."
)
return tf.device(device_names[0])
2 changes: 1 addition & 1 deletion deepdoctection/extern/tp/tpfrcnn/preproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from lazy_imports import try_import

from ....datapoint.convert import box_to_point4, point4_to_box
from ....utils._types import PixelValues, JsonDict
from ....utils._types import JsonDict, PixelValues
from ....utils.error import MalformedData
from ....utils.logger import log_once
from .common import filter_boxes_inside_shape, np_iou
Expand Down
5 changes: 2 additions & 3 deletions deepdoctection/mapper/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from __future__ import annotations

import ast
import os
import json
import os
from typing import List, Mapping, Optional, Sequence, Union

from lazy_imports import try_import
Expand Down Expand Up @@ -197,7 +197,6 @@ def xml_to_dict(dp: JsonDict, xslt_obj: etree.XSLT) -> JsonDict:
"""

output = str(xslt_obj(dp["xml"]))
output = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
dp.pop("xml")
dp["json"] = json.loads(output)
dp["json"] = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
return dp
2 changes: 1 addition & 1 deletion deepdoctection/pipe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
from ..dataflow import DataFlow, MapData
from ..datapoint.image import Image
from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner, TextRecognizer
from ..utils.context import timed_operation
from ..utils._types import JsonDict
from ..utils.context import timed_operation
from ..utils.identifier import get_uuid_from_str
from .anngen import DatapointManager

Expand Down
1 change: 0 additions & 1 deletion deepdoctection/pipe/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from __future__ import annotations

import os

from copy import copy, deepcopy
from typing import List, Literal, Mapping, Optional, Sequence, Union

Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/pipe/concurrency.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@

from ..dataflow import DataFlow, MapData
from ..datapoint.image import Image
from ..utils.context import timed_operation
from ..utils._types import JsonDict, QueueType, TqdmType
from ..utils.context import timed_operation
from ..utils.tqdm import get_tqdm
from .base import PipelineComponent
from .common import ImageParsingService, PageParsingService
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/pipe/sub_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from ..datapoint.box import crop_box_from_image
from ..datapoint.image import Image
from ..extern.base import DetectionResult, ObjectDetector, PdfMiner
from ..utils._types import PixelValues, JsonDict
from ..utils._types import JsonDict, PixelValues
from ..utils.settings import ObjectTypes, Relationships
from ..utils.transform import PadTransform
from .base import PredictorPipelineComponent
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/pipe/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from ..datapoint.image import Image
from ..extern.base import ObjectDetector, PdfMiner, TextRecognizer
from ..extern.tessocr import TesseractOcrDetector
from ..utils._types import PixelValues, JsonDict
from ..utils._types import JsonDict, PixelValues
from ..utils.error import ImageError
from ..utils.settings import PageType, TypeOrStr, WordType, get_type
from .base import PredictorPipelineComponent
Expand Down
17 changes: 11 additions & 6 deletions deepdoctection/utils/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@
Typing sheet for the whole package
"""

from pathlib import Path
import queue

from pathlib import Path
from typing import TYPE_CHECKING, Any, Protocol, Type, TypeVar, Union
from typing_extensions import TypeAlias

import numpy.typing as npt
import tqdm
from numpy import uint8
from typing_extensions import TypeAlias


# Type for a general dataclass
Expand All @@ -41,7 +41,7 @@ class IsDataclass(Protocol): # pylint: disable=R0903
# Numpy image type
PixelValues = npt.NDArray[uint8]

# typing for curry decorator
# Typing for curry decorator
DP = TypeVar("DP")
S = TypeVar("S")
T = TypeVar("T")
Expand All @@ -57,12 +57,17 @@ class IsDataclass(Protocol): # pylint: disable=R0903
QueueType = queue.Queue
TqdmType = tqdm.tqdm

JsonDict = dict[str, Any]
# A dict converted from a generic JSON object
JsonDict = dict[str, object]


# mainly used in utils
# Type for requirements. A requirement is a Tuple of string and a callable that returns True if the requirement is
# available
PackageAvailable: TypeAlias = bool
ErrorMsg: TypeAlias = str
Requirement = tuple[str, PackageAvailable, ErrorMsg]

Pathlike = Union[str,Path]
# A type to collect key val pairs of environ information. Mainly used in env_info.py
KeyValEnvInfos: TypeAlias = list[tuple[str, str]]
Pathlike = Union[str, Path]
17 changes: 12 additions & 5 deletions deepdoctection/utils/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from os import path, remove
from tempfile import NamedTemporaryFile
from time import perf_counter as timer
from typing import Any, Generator, Iterator, Optional, Tuple, Union
from typing import Any, Generator, Iterator, Optional, Union

import numpy as np

Expand Down Expand Up @@ -72,7 +72,7 @@ def timeout_manager(proc, seconds: Optional[int] = None) -> Iterator[str]: # ty


@contextmanager
def save_tmp_file(image: Union[str, PixelValues, bytes], prefix: str) -> Iterator[Tuple[str, str]]:
def save_tmp_file(image: Union[str, PixelValues, bytes], prefix: str) -> Iterator[tuple[str, str]]:
"""
Save image temporarily and handle the clean-up once not necessary anymore
Expand Down Expand Up @@ -112,13 +112,20 @@ def save_tmp_file(image: Union[str, PixelValues, bytes], prefix: str) -> Iterato
@contextmanager
def timed_operation(message: str, log_start: bool = False) -> Generator[Any, None, None]:
"""
Contextmanager with a timer. Can therefore be used in a with statement.
Contextmanager with a timer.
:param message: a log to print
.. code-block:: python
with timed_operation(message="Your stdout message", log_start=True):
with open("log.txt", "a") as file:
...
:param message: a log to stdout
:param log_start: whether to print also the beginning
"""

assert len(message)
if log_start:
logger.info(LoggingRecord(f"start task: {message} ..."))
start = timer()
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/utils/develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
_DEPRECATED_LOG_NUM = defaultdict(int) # type: ignore


def log_deprecated(name: str = "", text: str = "", eos: str = "", max_num_warnings: Optional[int] = None) -> None:
def log_deprecated(name: str, text: str, eos: str = "", max_num_warnings: Optional[int] = None) -> None:
"""
Log deprecation warning.
Expand Down
Loading

0 comments on commit e2c416a

Please sign in to comment.