r3gm commited on
Commit
bb7c154
·
verified ·
1 Parent(s): 3f29218

Upload 4 files

Browse files
model/loss.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ import torchvision.models as models
6
+
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+
9
+
10
+ class EPE(nn.Module):
11
+ def __init__(self):
12
+ super(EPE, self).__init__()
13
+
14
+ def forward(self, flow, gt, loss_mask):
15
+ loss_map = (flow - gt.detach()) ** 2
16
+ loss_map = (loss_map.sum(1, True) + 1e-6) ** 0.5
17
+ return (loss_map * loss_mask)
18
+
19
+
20
+ class Ternary(nn.Module):
21
+ def __init__(self):
22
+ super(Ternary, self).__init__()
23
+ patch_size = 7
24
+ out_channels = patch_size * patch_size
25
+ self.w = np.eye(out_channels).reshape(
26
+ (patch_size, patch_size, 1, out_channels))
27
+ self.w = np.transpose(self.w, (3, 2, 0, 1))
28
+ self.w = torch.tensor(self.w).float().to(device)
29
+
30
+ def transform(self, img):
31
+ patches = F.conv2d(img, self.w, padding=3, bias=None)
32
+ transf = patches - img
33
+ transf_norm = transf / torch.sqrt(0.81 + transf**2)
34
+ return transf_norm
35
+
36
+ def rgb2gray(self, rgb):
37
+ r, g, b = rgb[:, 0:1, :, :], rgb[:, 1:2, :, :], rgb[:, 2:3, :, :]
38
+ gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
39
+ return gray
40
+
41
+ def hamming(self, t1, t2):
42
+ dist = (t1 - t2) ** 2
43
+ dist_norm = torch.mean(dist / (0.1 + dist), 1, True)
44
+ return dist_norm
45
+
46
+ def valid_mask(self, t, padding):
47
+ n, _, h, w = t.size()
48
+ inner = torch.ones(n, 1, h - 2 * padding, w - 2 * padding).type_as(t)
49
+ mask = F.pad(inner, [padding] * 4)
50
+ return mask
51
+
52
+ def forward(self, img0, img1):
53
+ img0 = self.transform(self.rgb2gray(img0))
54
+ img1 = self.transform(self.rgb2gray(img1))
55
+ return self.hamming(img0, img1) * self.valid_mask(img0, 1)
56
+
57
+
58
+ class SOBEL(nn.Module):
59
+ def __init__(self):
60
+ super(SOBEL, self).__init__()
61
+ self.kernelX = torch.tensor([
62
+ [1, 0, -1],
63
+ [2, 0, -2],
64
+ [1, 0, -1],
65
+ ]).float()
66
+ self.kernelY = self.kernelX.clone().T
67
+ self.kernelX = self.kernelX.unsqueeze(0).unsqueeze(0).to(device)
68
+ self.kernelY = self.kernelY.unsqueeze(0).unsqueeze(0).to(device)
69
+
70
+ def forward(self, pred, gt):
71
+ N, C, H, W = pred.shape[0], pred.shape[1], pred.shape[2], pred.shape[3]
72
+ img_stack = torch.cat(
73
+ [pred.reshape(N*C, 1, H, W), gt.reshape(N*C, 1, H, W)], 0)
74
+ sobel_stack_x = F.conv2d(img_stack, self.kernelX, padding=1)
75
+ sobel_stack_y = F.conv2d(img_stack, self.kernelY, padding=1)
76
+ pred_X, gt_X = sobel_stack_x[:N*C], sobel_stack_x[N*C:]
77
+ pred_Y, gt_Y = sobel_stack_y[:N*C], sobel_stack_y[N*C:]
78
+
79
+ L1X, L1Y = torch.abs(pred_X-gt_X), torch.abs(pred_Y-gt_Y)
80
+ loss = (L1X+L1Y)
81
+ return loss
82
+
83
+ class MeanShift(nn.Conv2d):
84
+ def __init__(self, data_mean, data_std, data_range=1, norm=True):
85
+ c = len(data_mean)
86
+ super(MeanShift, self).__init__(c, c, kernel_size=1)
87
+ std = torch.Tensor(data_std)
88
+ self.weight.data = torch.eye(c).view(c, c, 1, 1)
89
+ if norm:
90
+ self.weight.data.div_(std.view(c, 1, 1, 1))
91
+ self.bias.data = -1 * data_range * torch.Tensor(data_mean)
92
+ self.bias.data.div_(std)
93
+ else:
94
+ self.weight.data.mul_(std.view(c, 1, 1, 1))
95
+ self.bias.data = data_range * torch.Tensor(data_mean)
96
+ self.requires_grad = False
97
+
98
+ class VGGPerceptualLoss(torch.nn.Module):
99
+ def __init__(self, rank=0):
100
+ super(VGGPerceptualLoss, self).__init__()
101
+ blocks = []
102
+ pretrained = True
103
+ self.vgg_pretrained_features = models.vgg19(pretrained=pretrained).features
104
+ self.normalize = MeanShift([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], norm=True).cuda()
105
+ for param in self.parameters():
106
+ param.requires_grad = False
107
+
108
+ def forward(self, X, Y, indices=None):
109
+ X = self.normalize(X)
110
+ Y = self.normalize(Y)
111
+ indices = [2, 7, 12, 21, 30]
112
+ weights = [1.0/2.6, 1.0/4.8, 1.0/3.7, 1.0/5.6, 10/1.5]
113
+ k = 0
114
+ loss = 0
115
+ for i in range(indices[-1]):
116
+ X = self.vgg_pretrained_features[i](X)
117
+ Y = self.vgg_pretrained_features[i](Y)
118
+ if (i+1) in indices:
119
+ loss += weights[k] * (X - Y.detach()).abs().mean() * 0.1
120
+ k += 1
121
+ return loss
122
+
123
+ if __name__ == '__main__':
124
+ img0 = torch.zeros(3, 3, 256, 256).float().to(device)
125
+ img1 = torch.tensor(np.random.normal(
126
+ 0, 1, (3, 3, 256, 256))).float().to(device)
127
+ ternary_loss = Ternary()
128
+ print(ternary_loss(img0, img1).shape)
model/pytorch_msssim/__init__.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from math import exp
4
+ import numpy as np
5
+
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+
8
+ def gaussian(window_size, sigma):
9
+ gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
10
+ return gauss/gauss.sum()
11
+
12
+
13
+ def create_window(window_size, channel=1):
14
+ _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
15
+ _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0).to(device)
16
+ window = _2D_window.expand(channel, 1, window_size, window_size).contiguous()
17
+ return window
18
+
19
+ def create_window_3d(window_size, channel=1):
20
+ _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
21
+ _2D_window = _1D_window.mm(_1D_window.t())
22
+ _3D_window = _2D_window.unsqueeze(2) @ (_1D_window.t())
23
+ window = _3D_window.expand(1, channel, window_size, window_size, window_size).contiguous().to(device)
24
+ return window
25
+
26
+
27
+ def ssim(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None):
28
+ # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh).
29
+ if val_range is None:
30
+ if torch.max(img1) > 128:
31
+ max_val = 255
32
+ else:
33
+ max_val = 1
34
+
35
+ if torch.min(img1) < -0.5:
36
+ min_val = -1
37
+ else:
38
+ min_val = 0
39
+ L = max_val - min_val
40
+ else:
41
+ L = val_range
42
+
43
+ padd = 0
44
+ (_, channel, height, width) = img1.size()
45
+ if window is None:
46
+ real_size = min(window_size, height, width)
47
+ window = create_window(real_size, channel=channel).to(img1.device).type_as(img1)
48
+
49
+ mu1 = F.conv2d(F.pad(img1, (5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=channel)
50
+ mu2 = F.conv2d(F.pad(img2, (5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=channel)
51
+
52
+ mu1_sq = mu1.pow(2)
53
+ mu2_sq = mu2.pow(2)
54
+ mu1_mu2 = mu1 * mu2
55
+
56
+ sigma1_sq = F.conv2d(F.pad(img1 * img1, (5, 5, 5, 5), 'replicate'), window, padding=padd, groups=channel) - mu1_sq
57
+ sigma2_sq = F.conv2d(F.pad(img2 * img2, (5, 5, 5, 5), 'replicate'), window, padding=padd, groups=channel) - mu2_sq
58
+ sigma12 = F.conv2d(F.pad(img1 * img2, (5, 5, 5, 5), 'replicate'), window, padding=padd, groups=channel) - mu1_mu2
59
+
60
+ C1 = (0.01 * L) ** 2
61
+ C2 = (0.03 * L) ** 2
62
+
63
+ v1 = 2.0 * sigma12 + C2
64
+ v2 = sigma1_sq + sigma2_sq + C2
65
+ cs = torch.mean(v1 / v2) # contrast sensitivity
66
+
67
+ ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
68
+
69
+ if size_average:
70
+ ret = ssim_map.mean()
71
+ else:
72
+ ret = ssim_map.mean(1).mean(1).mean(1)
73
+
74
+ if full:
75
+ return ret, cs
76
+ return ret
77
+
78
+
79
+ def ssim_matlab(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None):
80
+ # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh).
81
+ if val_range is None:
82
+ if torch.max(img1) > 128:
83
+ max_val = 255
84
+ else:
85
+ max_val = 1
86
+
87
+ if torch.min(img1) < -0.5:
88
+ min_val = -1
89
+ else:
90
+ min_val = 0
91
+ L = max_val - min_val
92
+ else:
93
+ L = val_range
94
+
95
+ padd = 0
96
+ (_, _, height, width) = img1.size()
97
+ if window is None:
98
+ real_size = min(window_size, height, width)
99
+ window = create_window_3d(real_size, channel=1).to(img1.device).type_as(img1)
100
+ # Channel is set to 1 since we consider color images as volumetric images
101
+
102
+ img1 = img1.unsqueeze(1)
103
+ img2 = img2.unsqueeze(1)
104
+
105
+ mu1 = F.conv3d(F.pad(img1, (5, 5, 5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=1)
106
+ mu2 = F.conv3d(F.pad(img2, (5, 5, 5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=1)
107
+
108
+ mu1_sq = mu1.pow(2)
109
+ mu2_sq = mu2.pow(2)
110
+ mu1_mu2 = mu1 * mu2
111
+
112
+ sigma1_sq = F.conv3d(F.pad(img1 * img1, (5, 5, 5, 5, 5, 5), 'replicate'), window, padding=padd, groups=1) - mu1_sq
113
+ sigma2_sq = F.conv3d(F.pad(img2 * img2, (5, 5, 5, 5, 5, 5), 'replicate'), window, padding=padd, groups=1) - mu2_sq
114
+ sigma12 = F.conv3d(F.pad(img1 * img2, (5, 5, 5, 5, 5, 5), 'replicate'), window, padding=padd, groups=1) - mu1_mu2
115
+
116
+ C1 = (0.01 * L) ** 2
117
+ C2 = (0.03 * L) ** 2
118
+
119
+ v1 = 2.0 * sigma12 + C2
120
+ v2 = sigma1_sq + sigma2_sq + C2
121
+ cs = torch.mean(v1 / v2) # contrast sensitivity
122
+
123
+ ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
124
+
125
+ if size_average:
126
+ ret = ssim_map.mean()
127
+ else:
128
+ ret = ssim_map.mean(1).mean(1).mean(1)
129
+
130
+ if full:
131
+ return ret, cs
132
+ return ret
133
+
134
+
135
+ def msssim(img1, img2, window_size=11, size_average=True, val_range=None, normalize=False):
136
+ device = img1.device
137
+ weights = torch.FloatTensor([0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).to(device).type_as(img1)
138
+ levels = weights.size()[0]
139
+ mssim = []
140
+ mcs = []
141
+ for _ in range(levels):
142
+ sim, cs = ssim(img1, img2, window_size=window_size, size_average=size_average, full=True, val_range=val_range)
143
+ mssim.append(sim)
144
+ mcs.append(cs)
145
+
146
+ img1 = F.avg_pool2d(img1, (2, 2))
147
+ img2 = F.avg_pool2d(img2, (2, 2))
148
+
149
+ mssim = torch.stack(mssim)
150
+ mcs = torch.stack(mcs)
151
+
152
+ # Normalize (to avoid NaNs during training unstable models, not compliant with original definition)
153
+ if normalize:
154
+ mssim = (mssim + 1) / 2
155
+ mcs = (mcs + 1) / 2
156
+
157
+ pow1 = mcs ** weights
158
+ pow2 = mssim ** weights
159
+ # From Matlab implementation https://ece.uwaterloo.ca/~z70wang/research/iwssim/
160
+ output = torch.prod(pow1[:-1] * pow2[-1])
161
+ return output
162
+
163
+
164
+ # Classes to re-use window
165
+ class SSIM(torch.nn.Module):
166
+ def __init__(self, window_size=11, size_average=True, val_range=None):
167
+ super(SSIM, self).__init__()
168
+ self.window_size = window_size
169
+ self.size_average = size_average
170
+ self.val_range = val_range
171
+
172
+ # Assume 3 channel for SSIM
173
+ self.channel = 3
174
+ self.window = create_window(window_size, channel=self.channel)
175
+
176
+ def forward(self, img1, img2):
177
+ (_, channel, _, _) = img1.size()
178
+
179
+ if channel == self.channel and self.window.dtype == img1.dtype:
180
+ window = self.window
181
+ else:
182
+ window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype)
183
+ self.window = window
184
+ self.channel = channel
185
+
186
+ _ssim = ssim(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average)
187
+ dssim = (1 - _ssim) / 2
188
+ return dssim
189
+
190
+ class MSSSIM(torch.nn.Module):
191
+ def __init__(self, window_size=11, size_average=True, channel=3):
192
+ super(MSSSIM, self).__init__()
193
+ self.window_size = window_size
194
+ self.size_average = size_average
195
+ self.channel = channel
196
+
197
+ def forward(self, img1, img2):
198
+ return msssim(img1, img2, window_size=self.window_size, size_average=self.size_average)
model/warplayer.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
5
+ backwarp_tenGrid = {}
6
+
7
+
8
+ def warp(tenInput, tenFlow):
9
+ k = (str(tenFlow.device), str(tenFlow.size()))
10
+ if k not in backwarp_tenGrid:
11
+ tenHorizontal = torch.linspace(-1.0, 1.0, tenFlow.shape[3], device=tenFlow.device).view(
12
+ 1, 1, 1, tenFlow.shape[3]).expand(tenFlow.shape[0], -1, tenFlow.shape[2], -1)
13
+ tenVertical = torch.linspace(-1.0, 1.0, tenFlow.shape[2], device=tenFlow.device).view(
14
+ 1, 1, tenFlow.shape[2], 1).expand(tenFlow.shape[0], -1, -1, tenFlow.shape[3])
15
+ backwarp_tenGrid[k] = torch.cat(
16
+ [tenHorizontal, tenVertical], 1).to(tenFlow.device)
17
+
18
+ tenFlow = torch.cat([tenFlow[:, 0:1, :, :] / ((tenInput.shape[3] - 1.0) / 2.0),
19
+ tenFlow[:, 1:2, :, :] / ((tenInput.shape[2] - 1.0) / 2.0)], 1)
20
+
21
+ grid = backwarp_tenGrid[k].type_as(tenFlow)
22
+
23
+ g = (grid + tenFlow).permute(0, 2, 3, 1)
24
+ return torch.nn.functional.grid_sample(input=tenInput, grid=g, mode='bilinear', padding_mode='border', align_corners=True)
requirements.txt CHANGED
@@ -10,3 +10,10 @@ imageio
10
  imageio-ffmpeg
11
  opencv-python
12
  torchao==0.11.0
 
 
 
 
 
 
 
 
10
  imageio-ffmpeg
11
  opencv-python
12
  torchao==0.11.0
13
+
14
+ numpy>=1.16, <=1.23.5
15
+ # tqdm>=4.35.0
16
+ # sk-video>=1.1.10
17
+ # opencv-python>=4.1.2
18
+ # moviepy>=1.0.3
19
+ torchvision