Skip to content

Commit 3816689

Browse files
authored
Autogluon Integration (#1355)
* move rcnn forward backward task to model zoo * revert #1249 * fix * fix * docstring * fix style * add docs * faster rcnn estimator * refactor * move dataset to init * lint * merge * disable sacred config for now * logger fix * fix fit * autogluon integration * fix small bug. training working * lint
1 parent 5ba7ac2 commit 3816689

File tree

5 files changed

+360
-10
lines changed

5 files changed

+360
-10
lines changed

gluoncv/model_zoo/rcnn/rcnn.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,10 @@ def custom_rcnn_fpn(pretrained_base=True, base_network_name='resnet18_v1b', norm
326326
fpn_inputs_names = ['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu68_fwd',
327327
'layers4_relu8_fwd']
328328
elif base_network_name == 'resnest50':
329-
from ...model_zoo.resnest import resnet50
330-
base_network = resnet50(pretrained=pretrained_base, dilated=False,
331-
use_global_stats=use_global_stats, norm_layer=norm_layer,
332-
norm_kwargs=norm_kwargs)
329+
from ...model_zoo.resnest import resnest50
330+
base_network = resnest50(pretrained=pretrained_base, dilated=False,
331+
use_global_stats=use_global_stats, norm_layer=norm_layer,
332+
norm_kwargs=norm_kwargs)
333333
fpn_inputs_names = ['layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu23_fwd',
334334
'layers4_relu11_fwd']
335335
elif base_network_name == 'resnest101':

gluoncv/pipelines/estimators/rcnn/faster_rcnn.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,25 @@ def _get_dataloader(net, train_dataset, val_dataset, train_transform, val_transf
8989
return train_loader, val_loader
9090

9191

92+
def _get_testloader(net, test_dataset, num_devices, config):
93+
"""Get faster rcnn test dataloader."""
94+
if config.meta_arch == 'faster_rcnn':
95+
test_bfn = Tuple(*[Append() for _ in range(3)])
96+
short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short
97+
# validation use 1 sample per device
98+
test_loader = gluon.data.DataLoader(
99+
test_dataset.transform(FasterRCNNDefaultValTransform(short, net.max_size)),
100+
num_devices,
101+
False,
102+
batchify_fn=test_bfn,
103+
last_batch='keep',
104+
num_workers=config.num_workers
105+
)
106+
return test_loader
107+
else:
108+
raise NotImplementedError('%s not implemented.' % config.meta_arch)
109+
110+
92111
def _get_dataset(dataset, args):
93112
if dataset.lower() == 'voc':
94113
train_dataset = gdata.VOCDetection(
@@ -120,7 +139,7 @@ class FasterRCNNEstimator:
120139
TODO: use base estimators.
121140
"""
122141

123-
def __init__(self, config, logger=None):
142+
def __init__(self, config, logger=None, reporter=None):
124143
"""
125144
Constructs Faster R-CNN estimators.
126145
@@ -130,9 +149,13 @@ def __init__(self, config, logger=None):
130149
Configuration object containing information for constructing Faster R-CNN estimators.
131150
logger : logger object, default is None
132151
If not `None`, will use default logging object.
152+
reporter : reporter object, default is None
153+
133154
"""
134155
super(FasterRCNNEstimator, self).__init__()
135156
self._logger = logger if logger is not None else logging.getLogger(__name__)
157+
self._reporter = reporter
158+
136159
self._cfg = config
137160
# training contexts
138161
if self._cfg.horovod:
@@ -173,7 +196,8 @@ def __init__(self, config, logger=None):
173196
norm_kwargs = None
174197
sym_norm_layer = None
175198
sym_norm_kwargs = None
176-
classes = self._cfg.classes
199+
classes = self.train_dataset.CLASSES
200+
177201
# TODO: maybe refactor this to pass configuration into the model instead.
178202
self.net = get_model('custom_faster_rcnn_fpn', classes=classes, transfer=None,
179203
dataset=self._cfg.dataset,
@@ -184,7 +208,7 @@ def __init__(self, config, logger=None):
184208
num_fpn_filters=self._cfg.num_fpn_filters,
185209
num_box_head_conv=self._cfg.num_box_head_conv,
186210
num_box_head_conv_filters=self._cfg.num_box_head_conv_filters,
187-
num_box_head_dense_filters=self.cfg.num_box_head_dense_filters,
211+
num_box_head_dense_filters=self._cfg.num_box_head_dense_filters,
188212
short=self._cfg.image_short, max_size=self._cfg.image_max_size,
189213
min_stage=2, max_stage=6, nms_thresh=self._cfg.nms_thresh,
190214
nms_topk=self._cfg.nms_topk, post_nms=self._cfg.post_nms,
@@ -226,7 +250,6 @@ def __init__(self, config, logger=None):
226250
self.net.collect_params('.*batchnorm.*').setattr('dtype', 'float32')
227251
self.net.collect_params('.*normalizedperclassboxcenterencoder.*').setattr('dtype',
228252
'float32')
229-
self._cfg.save_prefix += net_name
230253
if self._cfg.resume.strip():
231254
self.net.load_parameters(self._cfg.resume.strip())
232255
else:
@@ -433,16 +456,19 @@ def fit(self):
433456
current_map = 0.
434457
_save_params(self.net, self._logger, best_map, current_map, epoch,
435458
self._cfg.save_interval, self._cfg.save_prefix)
459+
if self._reporter:
460+
self._reporter(epoch=epoch, map_reward=current_map)
436461

437-
def evaluate(self):
462+
def evaluate(self, dataset):
438463
"""Evaluate the current model on dataset.
439464
440465
Parameters
441466
----------
442467
dataset : mxnet.gluon.data.DataLoader
443468
DataLoader containing dataset for evaluation.
444469
"""
445-
return self._validate(self.val_dataset, self.ctx, self.eval_metric)
470+
dataloader = _get_testloader(self.net, dataset, len(self.ctx), self._cfg)
471+
return self._validate(dataloader, self.ctx, self.eval_metric)
446472

447473
def predict(self, x):
448474
"""Predict an individual example.
@@ -456,3 +482,15 @@ def predict(self, x):
456482
x = x.as_in_context(self.ctx[0])
457483
ids, scores, bboxes = [xx[0].asnumpy() for xx in self.net(x)]
458484
return ids, scores, bboxes
485+
486+
def load_parameters(self, parameters, multi_precision=False):
487+
"""Load saved parameters into the model"""
488+
param_dict = self.net._collect_params_with_prefix()
489+
kwargs = {'ctx': None} if mx.__version__[:3] == '1.4' else {'cast_dtype': multi_precision,
490+
'ctx': None}
491+
for k, _ in param_dict.items():
492+
param_dict[k]._load_init(parameters[k], **kwargs)
493+
494+
def get_parameters(self):
495+
"""Return model parameters"""
496+
return self.net._collect_params_with_prefix()

gluoncv/pipelines/tasks/__init__.py

Whitespace-only changes.
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import logging
2+
3+
import autogluon as ag
4+
from autogluon.core.decorator import sample_config
5+
from autogluon.scheduler.resource import get_cpu_count, get_gpu_count
6+
from autogluon.task import BaseTask
7+
from autogluon.utils import collect_params
8+
9+
from ..estimators.rcnn import FasterRCNNEstimator
10+
from ... import utils as gutils
11+
12+
__all__ = ['ObjectDetection']
13+
14+
15+
@ag.args()
16+
def _train_object_detection(args, reporter):
17+
# fix seed for mxnet, numpy and python builtin random generator.
18+
gutils.random.seed(args.seed)
19+
20+
# training contexts
21+
if args.meta_arch == 'yolo3':
22+
net_name = '_'.join((args.meta_arch, args.net, 'custom'))
23+
elif args.meta_arch == 'faster_rcnn':
24+
net_name = '_'.join(('custom', args.meta_arch, 'fpn'))
25+
kwargs = {'network': args.net, 'base_network_name': args.net,
26+
'image_short': args.data_shape, 'max_size': 1000, 'nms_thresh': 0.5,
27+
'nms_topk': -1, 'min_stage': 2, 'max_stage': 6, 'post_nms': -1,
28+
'roi_mode': 'align', 'roi_size': (7, 7), 'strides': (4, 8, 16, 32, 64),
29+
'clip': 4.14, 'rpn_channel': 256, 'anchor_scales': (2, 4, 8, 16, 32),
30+
'anchor_aspect_ratio': (0.5, 1, 2), 'anchor_alloc_size': (384, 384),
31+
'rpn_nms_thresh': 0.7, 'rpn_train_pre_nms': 12000, 'rpn_train_post_nms': 2000,
32+
'rpn_test_pre_nms': 6000, 'rpn_test_post_nms': 1000, 'rpn_min_size': 1,
33+
'per_device_batch_size': args.batch_size // args.num_gpus, 'num_sample': 512,
34+
'rcnn_pos_iou_thresh': 0.5, 'rcnn_pos_ratio': 0.25, 'max_num_gt': 100,
35+
'custom_model': True, 'no_pretrained_base': True, 'num_fpn_filters': 256,
36+
'num_box_head_conv': 4, 'num_box_head_conv_filters': 256, 'amp': False,
37+
'num_box_head_dense_filters': 1024, 'image_max_size': 1333, 'kv_store': 'nccl',
38+
'anchor_base_size': 16, 'rcnn_num_samples': 512, 'rpn_smoothl1_rho': 0.001,
39+
'rcnn_smoothl1_rho': 0.001, 'lr_warmup_factor': 1. / 3., 'lr_warmup': 500,
40+
'executor_threads': 4, 'disable_hybridization': False, 'static_alloc': False}
41+
vars(args).update(kwargs)
42+
else:
43+
raise NotImplementedError(args.meta_arch, 'is not implemented.')
44+
45+
if args.meta_arch == 'faster_rcnn':
46+
estimator = FasterRCNNEstimator(args, reporter=reporter)
47+
else:
48+
raise NotImplementedError('%s' % args.meta_arch)
49+
50+
# training
51+
estimator.fit()
52+
53+
if args.final_fit:
54+
return {'model_params': collect_params(estimator.net)}
55+
56+
57+
class ObjectDetection(BaseTask):
58+
def __init__(self, config, logger=None):
59+
super(ObjectDetection, self).__init__()
60+
self._logger = logger if logger is not None else logging.getLogger(__name__)
61+
self._config = config
62+
nthreads_per_trial = get_cpu_count() if self._config.nthreads_per_trial > get_cpu_count() \
63+
else self._config.nthreads_per_trial
64+
if self._config.ngpus_per_trial > get_gpu_count():
65+
self._logger.warning(
66+
"The number of requested GPUs is greater than the number of available GPUs.")
67+
ngpus_per_trial = get_gpu_count() if self._config.ngpus_per_trial > get_gpu_count() \
68+
else self._config.ngpus_per_trial
69+
70+
_train_object_detection.register_args(
71+
meta_arch=self._config.meta_arch, dataset=self._config.dataset, net=self._config.net,
72+
lr=self._config.lr, loss=self._config.loss, num_gpus=self._config.ngpus_per_trial,
73+
batch_size=self._config.batch_size, split_ratio=self._config.split_ratio,
74+
epochs=self._config.epochs, num_workers=self._config.nthreads_per_trial,
75+
hybridize=self._config.hybridize, verbose=self._config.verbose, final_fit=False,
76+
seed=self._config.seed, data_shape=self._config.data_shape, start_epoch=0,
77+
transfer=self._config.transfer, lr_mode=self._config.lr_mode,
78+
lr_decay=self._config.lr_decay, lr_decay_period=self._config.lr_decay_period,
79+
lr_decay_epoch=self._config.lr_decay_epoch, warmup_lr=self._config.warmup_lr,
80+
warmup_epochs=self._config.warmup_epochs, warmup_iters=self._config.warmup_iters,
81+
warmup_factor=self._config.warmup_factor, momentum=self._config.momentum,
82+
wd=self._config.wd, log_interval=self._config.log_interval,
83+
save_prefix=self._config.save_prefix, save_interval=self._config.save_interval,
84+
val_interval=self._config.val_interval, num_samples=self._config.num_samples,
85+
no_random_shape=self._config.no_random_shape, no_wd=self._config.no_wd,
86+
mixup=self._config.mixup, no_mixup_epochs=self._config.no_mixup_epochs,
87+
label_smooth=self._config.label_smooth, resume=self._config.resume,
88+
syncbn=self._config.syncbn, reuse_pred_weights=self._config.reuse_pred_weights,
89+
horovod=self._config.horovod, gpus='0,1,2,3,4,5,6,7', use_fpn=True,
90+
norm_layer='syncbn' if self._config.syncbn else None,
91+
)
92+
93+
self._config.scheduler_options = {
94+
'resource': {'num_cpus': nthreads_per_trial, 'num_gpus': ngpus_per_trial},
95+
'checkpoint': self._config.checkpoint,
96+
'num_trials': self._config.num_trials,
97+
'time_out': self._config.time_limits,
98+
'resume': self._config.resume,
99+
'visualizer': self._config.visualizer,
100+
'time_attr': 'epoch',
101+
'reward_attr': 'map_reward',
102+
'dist_ip_addrs': self._config.dist_ip_addrs,
103+
'searcher': self._config.search_strategy,
104+
'search_options': self._config.search_options,
105+
}
106+
if self._config.search_strategy == 'hyperband':
107+
self._config.scheduler_options.update({
108+
'searcher': 'random',
109+
'max_t': self._config.epochs,
110+
'grace_period': self._config.grace_period if self._config.grace_period
111+
else self._config.epochs // 4})
112+
113+
def fit(self):
114+
results = self.run_fit(_train_object_detection, self._config.search_strategy,
115+
self._config.scheduler_options)
116+
self._logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> finish model fitting")
117+
best_config = sample_config(_train_object_detection.args, results['best_config'])
118+
self._logger.info('The best config: {}'.format(results['best_config']))
119+
120+
estimator = FasterRCNNEstimator(best_config)
121+
estimator.load_parameters(results.pop('model_params'))
122+
return estimator

0 commit comments

Comments
 (0)