Then I used the following script to fine-tune the given layout detection model:
\nimport os\nimport deepdoctection as dd\n\n_NAME = \"dataset\"\n_DESCRIPTION = \"Labeled images\"\n_SPLITS = {\"train\": \"/train\"}\n_CATEGORIES = [\"figure\", \"list\", \"table\", \"text\", \"title\"]\n_LOCATION = \"dataset\"\n_ANNOTATION_FILES = {\"train\": \"train.json\"}\n\n\nclass CustomDataFlowBuilder(dd.DataFlowBaseBuilder):\n def build(self, **kwargs):\n path = self.get_workdir() / _ANNOTATION_FILES[\"train\"]\n df = dd.SerializerCoco.load(path)\n coco_mapper = dd.coco_to_image(\n self.categories.get_categories(init=True), load_image=True,\n filter_empty_image=True, fake_score=False)\n df = dd.MapData(df, coco_mapper)\n return df\n\n\nclass CustomDataset(dd.DatasetBase):\n @classmethod\n def _info(cls):\n return dd.DatasetInfo(name=_NAME, description=_DESCRIPTION, splits=_SPLITS)\n\n def _categories(self):\n return dd.DatasetCategories(init_categories=_CATEGORIES)\n\n def _builder(self):\n return CustomDataFlowBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)\n\n\ncfg = dd.set_config_by_yaml(\"path/to/conf_dd_one.yaml\")\n\ndataset = CustomDataset()\n\nconfig_yaml_path = dd.ModelCatalog.get_full_path_configs(cfg.CONFIG.D2LAYOUT)\nweights_path = dd.ModelCatalog.get_full_path_weights(cfg.WEIGHTS.D2LAYOUT)\ncategories = dd.ModelCatalog.get_profile(cfg.WEIGHTS.D2LAYOUT).categories\nlayout_detector = dd.D2FrcnnDetector(config_yaml_path, weights_path, categories, device=cfg.DEVICE)\nlayout_service = dd.ImageLayoutService(layout_detector)\n\ncoco_metric = dd.get_metric(\"coco\")\n\nconfig_overwrite=[\"SOLVER.MAX_ITER=100000\",\n \"TEST.EVAL_PERIOD=20000\",\n \"SOLVER.CHECKPOINT_PERIOD=20000\",\n \"MODEL.BACKBONE.FREEZE_AT=0\",\n \"SOLVER.BASE_LR=1e-3\"]\n\nbuild_train_config = [\"max_datapoints=86000\"]\n\ndd.train_d2_faster_rcnn(\n path_config_yaml=config_yaml_path,\n dataset_train=dataset,\n path_weights=weights_path,\n config_overwrite=config_overwrite,\n log_dir=\"train_log\",\n build_train_config=build_train_config,\n dataset_val=dataset,\n build_val_config=None,\n metric=coco_metric,\n pipeline_component_name=\"ImageLayoutService\")
Here is my conf_dd_one.yaml
:
CONFIG:\n D2LAYOUT: dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml\n D2CELL: dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml\n D2ITEM: dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml\nWEIGHTS:\n D2LAYOUT: layout/d2_model_0829999_layout_inf_only.pt\n D2CELL: cell/d2_model_1849999_cell_inf_only.pt\n D2ITEM: item/d2_model_1639999_item_inf_only.pt\nLAYOUT_NMS_PAIRS:\n COMBINATIONS:\n - - text\n - table\n - - title\n - table\n - - text\n - list\n - - title\n - list\n - - text\n - title\n - - list\n - table\n THRESHOLDS:\n - 0.005\n - 0.005\n - 0.542\n - 0.1\n - 0.699\n - 0.01\nSEGMENTATION:\n ASSIGNMENT_RULE: ioa\n IOA_THRESHOLD_ROWS: 0.4\n IOA_THRESHOLD_COLS: 0.4\n IOU_THRESHOLD_ROWS: 0.01\n IOU_THRESHOLD_COLS: 0.001\n REMOVE_IOU_THRESHOLD_ROWS: 0.001\n REMOVE_IOU_THRESHOLD_COLS: 0.001\n FULL_TABLE_TILING: True\n STRETCH_RULE: left\n USE_REFINEMENT: False\nWORD_MATCHING:\n PARENTAL_CATEGORIES:\n - text\n - title\n - cell\n - list\n - figure\n CHILD_CATEGORIES:\n - word\n RULE: ioa\n IOA_THRESHOLD: 0.6\n IOU_THRESHOLD: 0.001\n MAX_PARENT_ONLY: False\nTEXT_ORDERING:\n TEXT_CONTAINER: word\n FLOATING_TEXT_BLOCK:\n - title\n - text\n - list\n - figure\n TEXT_BLOCK:\n - title\n - text\n - list\n - cell\n - figure\n - header\n - body\n TEXT_CONTAINER_TO_TEXT_BLOCK: True\nDEVICE: cuda
However, when I run the script, it kind of freezes at Starting training from iteration 0
. I have 2 Nvidia Quadros. watch -n 1 nvidia-smi
shows that the GPUs are not utilized. Though top
shows that CPU usage is at 100%. Is this performance normal?
Check #162 for CUDA OOM
","upvoteCount":0,"url":"https://github.com/deepdoctection/deepdoctection/discussions/160#discussioncomment-6239210"}}}-
Hi. I am a newbie to this library. I have a dataset of formal letters and would like to use deepdoctection to extract different kinds of texts (like address, sender, date, main paragraphs). I used Label Studio to label 20 images in COCO format (I used pdf2image and opencv to convert the PDFs to grayscale and binary image) for testing purposes. The labels are simply
Then I used the following script to fine-tune the given layout detection model: import os
import deepdoctection as dd
_NAME = "dataset"
_DESCRIPTION = "Labeled images"
_SPLITS = {"train": "/train"}
_CATEGORIES = ["figure", "list", "table", "text", "title"]
_LOCATION = "dataset"
_ANNOTATION_FILES = {"train": "train.json"}
class CustomDataFlowBuilder(dd.DataFlowBaseBuilder):
def build(self, **kwargs):
path = self.get_workdir() / _ANNOTATION_FILES["train"]
df = dd.SerializerCoco.load(path)
coco_mapper = dd.coco_to_image(
self.categories.get_categories(init=True), load_image=True,
filter_empty_image=True, fake_score=False)
df = dd.MapData(df, coco_mapper)
return df
class CustomDataset(dd.DatasetBase):
@classmethod
def _info(cls):
return dd.DatasetInfo(name=_NAME, description=_DESCRIPTION, splits=_SPLITS)
def _categories(self):
return dd.DatasetCategories(init_categories=_CATEGORIES)
def _builder(self):
return CustomDataFlowBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
cfg = dd.set_config_by_yaml("path/to/conf_dd_one.yaml")
dataset = CustomDataset()
config_yaml_path = dd.ModelCatalog.get_full_path_configs(cfg.CONFIG.D2LAYOUT)
weights_path = dd.ModelCatalog.get_full_path_weights(cfg.WEIGHTS.D2LAYOUT)
categories = dd.ModelCatalog.get_profile(cfg.WEIGHTS.D2LAYOUT).categories
layout_detector = dd.D2FrcnnDetector(config_yaml_path, weights_path, categories, device=cfg.DEVICE)
layout_service = dd.ImageLayoutService(layout_detector)
coco_metric = dd.get_metric("coco")
config_overwrite=["SOLVER.MAX_ITER=100000",
"TEST.EVAL_PERIOD=20000",
"SOLVER.CHECKPOINT_PERIOD=20000",
"MODEL.BACKBONE.FREEZE_AT=0",
"SOLVER.BASE_LR=1e-3"]
build_train_config = ["max_datapoints=86000"]
dd.train_d2_faster_rcnn(
path_config_yaml=config_yaml_path,
dataset_train=dataset,
path_weights=weights_path,
config_overwrite=config_overwrite,
log_dir="train_log",
build_train_config=build_train_config,
dataset_val=dataset,
build_val_config=None,
metric=coco_metric,
pipeline_component_name="ImageLayoutService") Here is my CONFIG:
D2LAYOUT: dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml
D2CELL: dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml
D2ITEM: dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml
WEIGHTS:
D2LAYOUT: layout/d2_model_0829999_layout_inf_only.pt
D2CELL: cell/d2_model_1849999_cell_inf_only.pt
D2ITEM: item/d2_model_1639999_item_inf_only.pt
LAYOUT_NMS_PAIRS:
COMBINATIONS:
- - text
- table
- - title
- table
- - text
- list
- - title
- list
- - text
- title
- - list
- table
THRESHOLDS:
- 0.005
- 0.005
- 0.542
- 0.1
- 0.699
- 0.01
SEGMENTATION:
ASSIGNMENT_RULE: ioa
IOA_THRESHOLD_ROWS: 0.4
IOA_THRESHOLD_COLS: 0.4
IOU_THRESHOLD_ROWS: 0.01
IOU_THRESHOLD_COLS: 0.001
REMOVE_IOU_THRESHOLD_ROWS: 0.001
REMOVE_IOU_THRESHOLD_COLS: 0.001
FULL_TABLE_TILING: True
STRETCH_RULE: left
USE_REFINEMENT: False
WORD_MATCHING:
PARENTAL_CATEGORIES:
- text
- title
- cell
- list
- figure
CHILD_CATEGORIES:
- word
RULE: ioa
IOA_THRESHOLD: 0.6
IOU_THRESHOLD: 0.001
MAX_PARENT_ONLY: False
TEXT_ORDERING:
TEXT_CONTAINER: word
FLOATING_TEXT_BLOCK:
- title
- text
- list
- figure
TEXT_BLOCK:
- title
- text
- list
- cell
- figure
- header
- body
TEXT_CONTAINER_TO_TEXT_BLOCK: True
DEVICE: cuda However, when I run the script, it kind of freezes at |
Beta Was this translation helpful? Give feedback.
-
Thank you for your answers. The performance is not normal and it sounds that Pytorch does not recognize that GPUs are available. Try: from deepdoctection.extern.pt.ptutils import get_num_gpu
print(get_num_gpu()) anf if this returns 0, it means that Pytorch does not connect to your GPU. Most of the time this issue comes from the fact that the Pytorch version does not align with CUDA. One other thing worth to check is whether your datasets actually streams data from its dataflow:
This should stream through all your datapoints. Finally, the config file is only needed in your training script here
and you can replace the What really matters while training is the config of the layout model specifified by |
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
Check #162 for CUDA OOM