Spaces:
Sleeping
Sleeping
| img_scale = (640, 640) # height, width | |
| # model settings | |
| model = dict( | |
| type='YOLOX', | |
| input_size=img_scale, | |
| random_size_range=(15, 25), | |
| random_size_interval=10, | |
| backbone=dict(type='CSPDarknet', deepen_factor=0.33, widen_factor=0.5), | |
| neck=dict( | |
| type='YOLOXPAFPN', | |
| in_channels=[128, 256, 512], | |
| out_channels=128, | |
| num_csp_blocks=1), | |
| bbox_head=dict( | |
| type='YOLOXHead', num_classes=80, in_channels=128, feat_channels=128), | |
| train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), | |
| # In order to align the source code, the threshold of the val phase is | |
| # 0.01, and the threshold of the test phase is 0.001. | |
| test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65))) | |
| # dataset settings | |
| data_root = 'data/coco/' | |
| dataset_type = 'CocoDataset' | |
| train_pipeline = [ | |
| dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), | |
| dict( | |
| type='RandomAffine', | |
| scaling_ratio_range=(0.1, 2), | |
| border=(-img_scale[0] // 2, -img_scale[1] // 2)), | |
| dict( | |
| type='MixUp', | |
| img_scale=img_scale, | |
| ratio_range=(0.8, 1.6), | |
| pad_val=114.0), | |
| dict(type='YOLOXHSVRandomAug'), | |
| dict(type='RandomFlip', flip_ratio=0.5), | |
| # According to the official implementation, multi-scale | |
| # training is not considered here but in the | |
| # 'mmdet/models/detectors/yolox.py'. | |
| dict(type='Resize', img_scale=img_scale, keep_ratio=True), | |
| dict( | |
| type='Pad', | |
| pad_to_square=True, | |
| # If the image is three-channel, the pad value needs | |
| # to be set separately for each channel. | |
| pad_val=dict(img=(114.0, 114.0, 114.0))), | |
| dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False), | |
| dict(type='DefaultFormatBundle'), | |
| dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) | |
| ] | |
| train_dataset = dict( | |
| type='MultiImageMixDataset', | |
| dataset=dict( | |
| type=dataset_type, | |
| ann_file=data_root + 'annotations/instances_train2017.json', | |
| img_prefix=data_root + 'train2017/', | |
| pipeline=[ | |
| dict(type='LoadImageFromFile'), | |
| dict(type='LoadAnnotations', with_bbox=True) | |
| ], | |
| filter_empty_gt=False, | |
| ), | |
| pipeline=train_pipeline) | |
| test_pipeline = [ | |
| dict(type='LoadImageFromFile'), | |
| dict( | |
| type='MultiScaleFlipAug', | |
| img_scale=img_scale, | |
| flip=False, | |
| transforms=[ | |
| dict(type='Resize', keep_ratio=True), | |
| dict(type='RandomFlip'), | |
| dict( | |
| type='Pad', | |
| pad_to_square=True, | |
| pad_val=dict(img=(114.0, 114.0, 114.0))), | |
| dict(type='DefaultFormatBundle'), | |
| dict(type='Collect', keys=['img']) | |
| ]) | |
| ] | |
| data = dict( | |
| samples_per_gpu=8, | |
| workers_per_gpu=4, | |
| persistent_workers=True, | |
| train=train_dataset, | |
| val=dict( | |
| type=dataset_type, | |
| ann_file=data_root + 'annotations/instances_val2017.json', | |
| img_prefix=data_root + 'val2017/', | |
| pipeline=test_pipeline), | |
| test=dict( | |
| type=dataset_type, | |
| ann_file=data_root + 'annotations/instances_val2017.json', | |
| img_prefix=data_root + 'val2017/', | |
| pipeline=test_pipeline)) | |
| evaluation = dict(interval=1, metric='bbox') |