Spaces:
Runtime error
Runtime error
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import os.path as osp | |
| from typing import List | |
| from mmocr.registry import DATA_PARSERS | |
| from .icdar_txt_parser import ICDARTxtTextRecogAnnParser | |
| class MJSynthAnnParser(ICDARTxtTextRecogAnnParser): | |
| """MJSynth Text Recognition Annotation Parser. | |
| The original annotation format of this dataset is stored in txt files, | |
| which is formed as the following format: | |
| img_path, transcription | |
| Args: | |
| separator (str): The separator between each element in a line. Defaults | |
| to ','. | |
| ignore (str): The text to be ignored. Defaults to '#'. | |
| format (str): The format of the annotation. Defaults to 'img, text'. | |
| encoding (str): The encoding of the annotation file. Defaults to | |
| 'utf-8-sig'. | |
| nproc (int): The number of processes to parse the annotation. Defaults | |
| to 1. | |
| base_name (bool): Whether to use the basename of the image path as the | |
| image name. Defaults to False. | |
| remove_strs (List[str], Optional): Used to remove redundant strings in | |
| the transcription. Defaults to ['"']. | |
| """ | |
| def parse_files(self, img_dir: str, ann_path: str) -> List: | |
| """Parse annotations.""" | |
| assert isinstance(ann_path, str) | |
| samples = list() | |
| for anno in self.loader( | |
| file_path=ann_path, | |
| format=self.format, | |
| encoding=self.encoding, | |
| separator=self.sep): | |
| text = osp.basename(anno['img']).split('_')[1] | |
| if self.remove_strs is not None: | |
| for strs in self.remove_strs: | |
| text = text.replace(strs, '') | |
| if text == self.ignore: | |
| continue | |
| img_name = anno['img'] | |
| samples.append((osp.join(img_dir, img_name), text)) | |
| return samples | |