@@ -65,41 +65,6 @@ def decompress(self, tensor, ctx, *args, **kwargs):
65
65
return tensor_decompressed
66
66
67
67
68
- class WeightDecayMomentum (Compressor ):
69
- """For 1bit compression."""
70
-
71
- def __init__ (self , compressor , mu , wd , * args , ** kwargs ):
72
- self .compressor = compressor
73
- self .mom = None
74
- self .cache = None
75
- self .mu = mu
76
- self .wd = wd
77
-
78
- def compress (self , tensor , * args , ** kwargs ):
79
- """Returns the tensor unmodified."""
80
- return self .compressor .compress (tensor )
81
-
82
- def decompress (self , tensor , ctx , * args , ** kwargs ):
83
- """Returns the tensor added with additional momentum for wd
84
- m_t = \mu * m_{t-1} + wd * x_t
85
- x_{t+1} = x_t - \eta_t (tensor + \mu m_t + wd * x_t)
86
- """
87
- if "x" not in kwargs :
88
- return self .compressor .decompress (tensor , ctx )
89
-
90
- x = kwargs ["x" ]
91
-
92
- if self .mom is None :
93
- self .mom = nd .zeros_like (tensor )
94
- self .cache = nd .zeros_like (tensor )
95
-
96
- nd ._internal ._mul_scalar (x , self .wd , out = self .cache )
97
- self .mom += self .cache
98
- nd ._internal ._mul_scalar (self .mom , self .mu , out = self .mom )
99
- tensor += self .mom + self .cache
100
- return self .compressor .decompress (tensor , ctx )
101
-
102
-
103
68
class Compression (object ):
104
69
"""Optional gradient compression algorithm used during push_pull."""
105
70
@@ -109,9 +74,6 @@ class Compression(object):
109
74
"""Compress all floating point gradients to 16-bit."""
110
75
fp16 = FP16Compressor ()
111
76
112
- """Additional Momentum for weight decay. This is only for 1bit. This is a wrapper."""
113
- wdmom = WeightDecayMomentum
114
-
115
77
116
78
# if __name__ == "__main__":
117
79
# x = WeightDecayMomentum(Compression.none, 0.9, 1e-4)
0 commit comments