⚗️ | More architectural changes

2025-11-18 21:34:59 +02:00
parent 3f23242d6f
commit 782a3bab28
8 changed files with 245 additions and 254 deletions
--- a/generator.py
+++ b/generator.py
@@ -2,25 +2,23 @@ import torch
 import torch.nn as nn


-def conv_block(in_channels, out_channels, kernel_size=3, dilation=1):
+def GeneratorBlock(in_channels, out_channels, kernel_size=3, stride=1, dilation=1):
+    padding = (kernel_size - 1) // 2 * dilation
    return nn.Sequential(
        nn.Conv1d(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
+            stride=stride,
            dilation=dilation,
-            padding=(kernel_size // 2) * dilation,
+            padding=padding
        ),
        nn.InstanceNorm1d(out_channels),
-        nn.PReLU(),
+        nn.PReLU(num_parameters=1, init=0.1),
    )


 class AttentionBlock(nn.Module):
-    """
-    Simple Channel Attention Block. Learns to weight channels based on their importance.
-    """
-
    def __init__(self, channels):
        super(AttentionBlock, self).__init__()
        self.attention = nn.Sequential(
@@ -32,7 +30,7 @@ class AttentionBlock(nn.Module):

    def forward(self, x):
        attention_weights = self.attention(x)
-        return x * attention_weights
+        return x + (x * attention_weights)


 class ResidualInResidualBlock(nn.Module):
@@ -40,7 +38,7 @@ class ResidualInResidualBlock(nn.Module):
        super(ResidualInResidualBlock, self).__init__()

        self.conv_layers = nn.Sequential(
-            *[conv_block(channels, channels) for _ in range(num_convs)]
+            *[GeneratorBlock(channels, channels) for _ in range(num_convs)]
        )

        self.attention = AttentionBlock(channels)
@@ -51,31 +49,74 @@ class ResidualInResidualBlock(nn.Module):
        x = self.attention(x)
        return x + residual

+def UpsampleBlock(in_channels, out_channels):
+    return nn.Sequential(
+        nn.ConvTranspose1d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1
+        ),
+        nn.InstanceNorm1d(out_channels),
+        nn.PReLU(num_parameters=1, init=0.1)
+    )

 class SISUGenerator(nn.Module):
-    def __init__(self, channels=16, num_rirb=4, alpha=1):
+    def __init__(self, channels=32, num_rirb=1):
        super(SISUGenerator, self).__init__()
-        self.alpha = alpha

-        self.conv1 = nn.Sequential(
-            nn.Conv1d(1, channels, kernel_size=7, padding=3),
-            nn.InstanceNorm1d(channels),
-            nn.PReLU(),
+        self.first_conv = GeneratorBlock(1, channels)
+
+        self.downsample = GeneratorBlock(channels, channels * 2, stride=2)
+        self.downsample_attn = AttentionBlock(channels * 2)
+        self.downsample_2 = GeneratorBlock(channels * 2, channels * 4, stride=2)
+        self.downsample_2_attn = AttentionBlock(channels * 4)
+
+        self.rirb = ResidualInResidualBlock(channels * 4)
+        # self.rirb = nn.Sequential(
+        #     *[ResidualInResidualBlock(channels * 4) for _ in range(num_rirb)]
+        # )
+
+        self.upsample = UpsampleBlock(channels * 4, channels * 2)
+        self.upsample_attn = AttentionBlock(channels * 2)
+        self.compress_1 = GeneratorBlock(channels * 4, channels * 2)
+
+        self.upsample_2 = UpsampleBlock(channels * 2, channels)
+        self.upsample_2_attn = AttentionBlock(channels)
+        self.compress_2 = GeneratorBlock(channels * 2, channels)
+
+        self.final_conv = nn.Sequential(
+            nn.Conv1d(channels, 1, kernel_size=7, padding=3),
+            nn.Tanh()
        )

-        self.rir_blocks = nn.Sequential(
-            *[ResidualInResidualBlock(channels) for _ in range(num_rirb)]
-        )
-
-        self.final_layer = nn.Sequential(
-            nn.Conv1d(channels, 1, kernel_size=3, padding=1), nn.Tanh()
-        )

    def forward(self, x):
        residual_input = x
-        x = self.conv1(x)
-        x_rirb_out = self.rir_blocks(x)
-        learned_residual = self.final_layer(x_rirb_out)
-        output = residual_input + self.alpha * learned_residual
+        x1 = self.first_conv(x)

-        return torch.tanh(output)
+        x2 = self.downsample(x1)
+        x2 = self.downsample_attn(x2)
+
+        x3 = self.downsample_2(x2)
+        x3 = self.downsample_2_attn(x3)
+
+        x_rirb = self.rirb(x3)
+
+        up1 = self.upsample(x_rirb)
+        up1 = self.upsample_attn(up1)
+
+        cat1 = torch.cat((up1, x2), dim=1)
+        comp1 = self.compress_1(cat1)
+
+        up2 = self.upsample_2(comp1)
+        up2 = self.upsample_2_attn(up2)
+
+        cat2 = torch.cat((up2, x1), dim=1)
+        comp2 = self.compress_2(cat2)
+
+        learned_residual = self.final_conv(comp2)
+
+        output = residual_input + learned_residual
+        return output