CenterNet网络模型代码_3-pose_dla_dcn.py

末蓝、 2022-12-23 07:30 291阅读 0赞
这里主要针对在CenterNet中表现还可以的改进过的 dla 网络进行分析,主要还是结合代码进行说明。下面就是代码:
  1. # BN 动量参数
  2. BN_MOMENTUM = 0.1
  3. # 获取打印日志对象
  4. logger = logging.getLogger(__name__)
  5. # 获取预训练网络
  6. def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
  7. return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
  8. # 3*3 卷积
  9. def conv3x3(in_planes, out_planes, stride=1):
  10. "3x3 convolution with padding"
  11. return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,padding=1, bias=False)
  12. #基础模块(Res18 34中运用)
  13. #3*3 bn relu -> 3*3 bn relu + x(res)
  14. class BasicBlock(nn.Module):
  15. def __init__(self, inplanes, planes, stride=1, dilation=1):
  16. super(BasicBlock, self).__init__()
  17. self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,
  18. stride=stride, padding=dilation,
  19. bias=False, dilation=dilation)
  20. self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
  21. self.relu = nn.ReLU(inplace=True)
  22. self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
  23. stride=1, padding=dilation,
  24. bias=False, dilation=dilation)
  25. self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
  26. self.stride = stride
  27. def forward(self, x, residual=None):
  28. if residual is None:
  29. residual = x
  30. out = self.conv1(x)
  31. out = self.bn1(out)
  32. out = self.relu(out)
  33. out = self.conv2(out)
  34. out = self.bn2(out)
  35. out += residual
  36. out = self.relu(out)
  37. return out
  38. # 残差瓶颈层(50以上的层次用)
  39. #Resnet bottleneck
  40. # 这个瓶颈层其实和 ResNet中的瓶颈层类似,只不过其宽度不同,这里更宽
  41. class Bottleneck(nn.Module):
  42. expansion = 2#这里的膨胀率是2,也就是让网络更宽
  43. # 初始化
  44. def __init__(self, inplanes, planes, stride=1, dilation=1):
  45. super(Bottleneck, self).__init__()
  46. expansion = Bottleneck.expansion
  47. bottle_planes = planes // expansion
  48. self.conv1 = nn.Conv2d(inplanes, bottle_planes,
  49. kernel_size=1, bias=False)
  50. self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
  51. self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
  52. stride=stride, padding=dilation,
  53. bias=False, dilation=dilation)
  54. self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
  55. self.conv3 = nn.Conv2d(bottle_planes, planes,
  56. kernel_size=1, bias=False)
  57. self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
  58. self.relu = nn.ReLU(inplace=True)
  59. self.stride = stride
  60. def forward(self, x, residual=None):
  61. if residual is None:
  62. residual = x
  63. out = self.conv1(x)
  64. out = self.bn1(out)
  65. out = self.relu(out)
  66. out = self.conv2(out)
  67. out = self.bn2(out)
  68. out = self.relu(out)
  69. out = self.conv3(out)
  70. out = self.bn3(out)
  71. out += residual
  72. out = self.relu(out)
  73. return out
  74. #ResneXt bottleneck
  75. class BottleneckX(nn.Module):
  76. expansion = 2
  77. cardinality = 32
  78. def __init__(self, inplanes, planes, stride=1, dilation=1):
  79. super(BottleneckX, self).__init__()
  80. cardinality = BottleneckX.cardinality
  81. # dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
  82. # bottle_planes = dim * cardinality
  83. bottle_planes = planes * cardinality // 32
  84. self.conv1 = nn.Conv2d(inplanes, bottle_planes,kernel_size=1, bias=False)
  85. self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
  86. # 使用了组卷积,划分方式为 32 组,如果输入的通道是128,输出通道是256 则卷积核会变成 32*8*3*3*4,即每次只考虑在输入4 个通道上 做卷积,输出为8 通道,然后再把 32个输出的 8 通道特征融合(concat)为 256维
  87. self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,stride=stride, padding=dilation, bias=False,dilation=dilation, groups=cardinality)
  88. self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
  89. self.conv3 = nn.Conv2d(bottle_planes, planes,
  90. kernel_size=1, bias=False)
  91. self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
  92. self.relu = nn.ReLU(inplace=True)
  93. self.stride = stride
  94. def forward(self, x, residual=None):
  95. if residual is None:
  96. residual = x
  97. out = self.conv1(x)
  98. out = self.bn1(out)
  99. out = self.relu(out)
  100. out = self.conv2(out)
  101. out = self.bn2(out)
  102. out = self.relu(out)
  103. out = self.conv3(out)
  104. out = self.bn3(out)
  105. out += residual
  106. out = self.relu(out)
  107. return out
  108. # 根 1*1 (in*out) 调整维度的卷积操作,可以使用残差模块
  109. class Root(nn.Module):
  110. def __init__(self, in_channels, out_channels, kernel_size, residual):
  111. super(Root, self).__init__()
  112. self.conv = nn.Conv2d(
  113. in_channels, out_channels, 1,
  114. stride=1, bias=False, padding=(kernel_size - 1) // 2)
  115. self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
  116. self.relu = nn.ReLU(inplace=True)
  117. self.residual = residual
  118. def forward(self, *x):
  119. children = x
  120. x = self.conv(torch.cat(x, 1))
  121. x = self.bn(x)
  122. if self.residual:
  123. x += children[0]
  124. x = self.relu(x)
  125. return x
  126. # 3*3 conv stack
  127. class Tree(nn.Module):
  128. def __init__(self, levels, block, in_channels, out_channels, stride=1,
  129. level_root=False, root_dim=0, root_kernel_size=1,
  130. dilation=1, root_residual=False):
  131. super(Tree, self).__init__()
  132. if root_dim == 0:
  133. root_dim = 2 * out_channels
  134. if level_root:
  135. root_dim += in_channels
  136. if levels == 1:
  137. self.tree1 = block(in_channels, out_channels, stride,
  138. dilation=dilation)
  139. self.tree2 = block(out_channels, out_channels, 1,
  140. dilation=dilation)
  141. else:
  142. # 堆叠的 BasicBlock 树结构
  143. self.tree1 = Tree(levels - 1, block, in_channels, out_channels,stride, root_dim=0,root_kernel_size=root_kernel_size,
  144. dilation=dilation, root_residual=root_residual)
  145. self.tree2 = Tree(levels - 1, block, out_channels, out_channels,
  146. root_dim=root_dim + out_channels,
  147. root_kernel_size=root_kernel_size,
  148. dilation=dilation, root_residual=root_residual)
  149. if levels == 1:
  150. self.root = Root(root_dim, out_channels, root_kernel_size,
  151. root_residual)
  152. self.level_root = level_root
  153. self.root_dim = root_dim
  154. self.downsample = None
  155. self.project = None
  156. self.levels = levels
  157. if stride > 1:
  158. self.downsample = nn.MaxPool2d(stride, stride=stride)
  159. if in_channels != out_channels:
  160. self.project = nn.Sequential(
  161. nn.Conv2d(in_channels, out_channels,
  162. kernel_size=1, stride=1, bias=False),
  163. nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
  164. )
  165. def forward(self, x, residual=None, children=None):
  166. children = [] if children is None else children
  167. #下采样为True则进行下采样,不然就是 x 本身
  168. bottom = self.downsample(x) if self.downsample else x
  169. # 是否使用1*1卷积进行维度改变(Cin!=Cout)
  170. residual = self.project(bottom) if self.project else bottom
  171. if self.level_root:
  172. children.append(bottom)
  173. x1 = self.tree1(x, residual)# 通过一个残差块
  174. if self.levels == 1:
  175. x2 = self.tree2(x1)#通过第二个残差块
  176. #进行组合x2和x1,进行1*1卷积
  177. x = self.root(x2, x1, *children)
  178. else:
  179. children.append(x1)
  180. x = self.tree2(x1, children=children)
  181. return x
  182. #DLA network backbone
  183. class DLA(nn.Module):
  184. def __init__(self, levels, channels, num_classes=1000,
  185. block=BasicBlock, residual_root=False, linear_root=False):
  186. super(DLA, self).__init__()
  187. self.channels = channels#[16,32,64,128,256,512]
  188. self.num_classes = num_classes#94
  189. # pre-precess 7*7 conv
  190. self.base_layer = nn.Sequential(
  191. nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
  192. padding=3, bias=False),
  193. nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),
  194. nn.ReLU(inplace=True))
  195. self.level0 = self._make_conv_level(
  196. channels[0], channels[0], levels[0])
  197. self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)
  198. self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,level_root=False,root_residual=residual_root)
  199. self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,level_root=True, root_residual=residual_root)
  200. self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,level_root=True, root_residual=residual_root)
  201. self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,level_root=True, root_residual=residual_root)
  202. # for m in self.modules():
  203. # if isinstance(m, nn.Conv2d):
  204. # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
  205. # m.weight.data.normal_(0, math.sqrt(2. / n))
  206. # elif isinstance(m, nn.BatchNorm2d):
  207. # m.weight.data.fill_(1)
  208. # m.bias.data.zero_()
  209. # 构建每阶段的卷积
  210. def _make_level(self, block, inplanes, planes, blocks, stride=1):
  211. downsample = None
  212. if stride != 1 or inplanes != planes:
  213. downsample = nn.Sequential(
  214. nn.MaxPool2d(stride, stride=stride),
  215. nn.Conv2d(inplanes, planes,
  216. kernel_size=1, stride=1, bias=False),
  217. nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
  218. )
  219. layers = []
  220. layers.append(block(inplanes, planes, stride, downsample=downsample))
  221. for i in range(1, blocks):
  222. layers.append(block(inplanes, planes))
  223. return nn.Sequential(*layers)
  224. # 3*3 conv
  225. def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
  226. modules = []
  227. for i in range(convs):
  228. modules.extend([
  229. nn.Conv2d(inplanes, planes, kernel_size=3,
  230. stride=stride if i == 0 else 1,
  231. padding=dilation, bias=False, dilation=dilation),
  232. nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
  233. nn.ReLU(inplace=True)])
  234. inplanes = planes
  235. return nn.Sequential(*modules)
  236. def forward(self, x):
  237. y = []
  238. x = self.base_layer(x)
  239. for i in range(6):
  240. x = getattr(self, 'level{}'.format(i))(x)
  241. y.append(x)
  242. return y#levl0-5 out put
  243. # 加载预训练模型
  244. def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
  245. # fc = self.fc
  246. if name.endswith('.pth'):
  247. model_weights = torch.load(data + name)
  248. else:
  249. model_url = get_model_url(data, name, hash)
  250. model_weights = model_zoo.load_url(model_url)
  251. num_classes = len(model_weights[list(model_weights.keys())[-1]])
  252. self.fc = nn.Conv2d(
  253. self.channels[-1], num_classes,
  254. kernel_size=1, stride=1, padding=0, bias=True)
  255. self.load_state_dict(model_weights)
  256. # self.fc = fc
  257. # 定义dla的一些模块配置
  258. def dla34(pretrained=True, **kwargs): # DLA-34
  259. model = DLA([1, 1, 1, 2, 2, 1],
  260. [16, 32, 64, 128, 256, 512],
  261. block=BasicBlock, **kwargs)
  262. if pretrained:
  263. model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
  264. return model
  265. # 返回自身数据
  266. class Identity(nn.Module):
  267. def __init__(self):
  268. super(Identity, self).__init__()
  269. def forward(self, x):
  270. return x
  271. # 初始化权重
  272. def fill_fc_weights(layers):
  273. for m in layers.modules():
  274. if isinstance(m, nn.Conv2d):
  275. if m.bias is not None:
  276. nn.init.constant_(m.bias, 0)
  277. # 初始化权重
  278. def fill_up_weights(up):
  279. w = up.weight.data
  280. f = math.ceil(w.size(2) / 2)
  281. c = (2 * f - 1 - f % 2) / (2. * f)
  282. for i in range(w.size(2)):
  283. for j in range(w.size(3)):
  284. w[0, 0, i, j] = \
  285. (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
  286. for c in range(1, w.size(0)):
  287. w[c, 0, :, :] = w[0, 0, :, :]
  288. # 可变形卷积
  289. class DeformConv(nn.Module):
  290. def __init__(self, chi, cho):
  291. super(DeformConv, self).__init__()
  292. self.actf = nn.Sequential(
  293. nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
  294. nn.ReLU(inplace=True)
  295. )
  296. self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1)
  297. def forward(self, x):
  298. x = self.conv(x)
  299. x = self.actf(x)
  300. return x
  301. #上采样
  302. class IDAUp(nn.Module):
  303. def __init__(self, o, channels, up_f):
  304. super(IDAUp, self).__init__()
  305. for i in range(1, len(channels)):
  306. c = channels[i]
  307. f = int(up_f[i])
  308. proj = DeformConv(c, o)#利用可变形卷积进行降维
  309. node = DeformConv(o, o)#利用可变形卷积进行调整特征
  310. #nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0,
  311. # output_padding=0, groups=1, bias=True, dilation=1)
  312. up = nn.ConvTranspose2d(o, o, f * 2, stride=f,
  313. padding=f // 2, output_padding=0,
  314. groups=o, bias=False)
  315. fill_up_weights(up)
  316. setattr(self, 'proj_' + str(i), proj)
  317. setattr(self, 'up_' + str(i), up)
  318. setattr(self, 'node_' + str(i), node)
  319. def forward(self, layers, startp, endp):
  320. for i in range(startp + 1, endp):
  321. upsample = getattr(self, 'up_' + str(i - startp))
  322. project = getattr(self, 'proj_' + str(i - startp))
  323. layers[i] = upsample(project(layers[i]))
  324. node = getattr(self, 'node_' + str(i - startp))
  325. layers[i] = node(layers[i] + layers[i - 1])
  326. #upsampling
  327. class DLAUp(nn.Module):
  328. def __init__(self, startp, channels, scales, in_channels=None):
  329. super(DLAUp, self).__init__()
  330. self.startp = startp# 2
  331. if in_channels is None:
  332. in_channels = channels
  333. self.channels = channels#[64, 128, 256, 512]
  334. channels = list(channels)#[64, 128, 256, 512]
  335. scales = np.array(scales, dtype=int)#[1,2,4,8]
  336. for i in range(len(channels) - 1):#[0,1,2]
  337. j = -i - 2#[-2,-3,-4] IDAUp:
  338. setattr(self, 'ida_{}'.format(i),
  339. IDAUp(channels[j], in_channels[j:],
  340. scales[j:] // scales[j]))#set the ida_* 为IDAUp(channels[j], in_channels[j:],scales[j:] // scales[j])
  341. scales[j + 1:] = scales[j]
  342. in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
  343. def forward(self, layers):
  344. out = [layers[-1]] # start with 32
  345. for i in range(len(layers) - self.startp - 1):
  346. ida = getattr(self, 'ida_{}'.format(i))
  347. ida(layers, len(layers) -i - 2, len(layers))
  348. out.insert(0, layers[-1])
  349. return out
  350. #线性插值
  351. class Interpolate(nn.Module):
  352. def __init__(self, scale, mode):
  353. super(Interpolate, self).__init__()
  354. self.scale = scale
  355. self.mode = mode
  356. def forward(self, x):
  357. x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False)
  358. return x
  359. # 构架全部的 改进 DLA 结构
  360. class DLASeg(nn.Module):
  361. def __init__(self, base_name, heads, pretrained, down_ratio, final_kernel,
  362. last_level, head_conv, out_channel=0):
  363. super(DLASeg, self).__init__()
  364. assert down_ratio in [2, 4, 8, 16]# down_ratio=4 default
  365. self.first_level = int(np.log2(down_ratio)) # first_level=2
  366. self.last_level = last_level # deafult 5
  367. self.base = globals()[base_name](pretrained=pretrained)#base_name dla34
  368. channels = self.base.channels#[16, 32, 64, 128, 256, 512]
  369. scales = [2 ** i for i in range(len(channels[self.first_level:]))]#[1,2,4,8]
  370. self.dla_up = DLAUp(self.first_level, channels[self.first_level:], scales)
  371. if out_channel == 0:
  372. out_channel = channels[self.first_level]
  373. self.ida_up = IDAUp(out_channel, channels[self.first_level:self.last_level],
  374. [2 ** i for i in range(self.last_level - self.first_level)])
  375. self.heads = heads#{'hm':94,'reg':2,'wh':2}
  376. for head in self.heads:
  377. classes = self.heads[head]
  378. if head_conv > 0:#head_conv 256
  379. #channels[self.first_level]:64
  380. # 3*3-->Relu-->1*1
  381. fc = nn.Sequential(
  382. nn.Conv2d(channels[self.first_level], head_conv,
  383. kernel_size=3, padding=1, bias=True),
  384. nn.ReLU(inplace=True),
  385. nn.Conv2d(head_conv, classes,
  386. kernel_size=final_kernel, stride=1,
  387. padding=final_kernel // 2, bias=True))
  388. if 'hm' in head:
  389. fc[-1].bias.data.fill_(-2.19)
  390. else:
  391. fill_fc_weights(fc)
  392. else:
  393. fc = nn.Conv2d(channels[self.first_level], classes,
  394. kernel_size=final_kernel, stride=1,
  395. padding=final_kernel // 2, bias=True)
  396. if 'hm' in head:
  397. fc.bias.data.fill_(-2.19)
  398. else:
  399. fill_fc_weights(fc)
  400. self.__setattr__(head, fc)
  401. def forward(self, x):
  402. x = self.base(x)#dla34 output
  403. x = self.dla_up(x)# dla upsampling
  404. y = []
  405. for i in range(self.last_level - self.first_level):
  406. y.append(x[i].clone())
  407. self.ida_up(y, 0, len(y))# [64,128,256] stage2_4 upsample directly
  408. z = { }
  409. for head in self.heads:
  410. z[head] = self.__getattr__(head)(y[-1])
  411. return [z]
  412. # 模型主函数
  413. def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4):
  414. model = DLASeg('dla{}'.format(num_layers), heads,
  415. pretrained=True,
  416. down_ratio=down_ratio,
  417. final_kernel=1,
  418. last_level=5,
  419. head_conv=head_conv)
  420. return model
代码可能不容易搞懂,因为里面加了递归,网络结构打印信息为,这个为两个堆叠的块结构:

在这里插入图片描述
在这里插入图片描述

以上是关于CenterNet中用的改进过后的 DLA34的代码,后来会在对该网络结构进行一个细致文章分析

发表评论

表情:
评论列表 (有 0 条评论,291人围观)

还没有评论,来说两句吧...

相关阅读