⚗️ | Added some stupid ways for training + some makeup

2025-10-04 22:38:11 +03:00
parent 0bc8fc2792
commit 3f23242d6f
12 changed files with 304 additions and 463 deletions
--- a/utils/MultiResolutionSTFTLoss.py
+++ b/utils/MultiResolutionSTFTLoss.py
@@ -8,8 +8,9 @@ import torchaudio.transforms as T

 class MultiResolutionSTFTLoss(nn.Module):
    """
-    Computes a loss based on multiple STFT resolutions, including both
-    spectral convergence and log STFT magnitude components.
+    Multi-resolution STFT loss.
+    Combines spectral convergence loss and log-magnitude loss
+    across multiple STFT resolutions.
    """

    def __init__(
@@ -20,43 +21,67 @@ class MultiResolutionSTFTLoss(nn.Module):
        eps: float = 1e-7,
    ):
        super().__init__()
-        self.stft_transforms = nn.ModuleList(
-            [
-                T.Spectrogram(
-                    n_fft=n_fft, win_length=win_len, hop_length=hop_len, power=None
-                )
-                for n_fft, hop_len, win_len in zip(fft_sizes, hop_sizes, win_lengths)
-            ]
-        )
+
        self.eps = eps
+        self.n_resolutions = len(fft_sizes)
+
+        self.stft_transforms = nn.ModuleList()
+        for n_fft, hop_len, win_len in zip(fft_sizes, hop_sizes, win_lengths):
+            window = torch.hann_window(win_len)
+            stft = T.Spectrogram(
+                n_fft=n_fft,
+                hop_length=hop_len,
+                win_length=win_len,
+                window_fn=lambda _: window,
+                power=None,  # Keep complex output
+                center=True,
+                pad_mode="reflect",
+                normalized=False,
+            )
+            self.stft_transforms.append(stft)

    def forward(
        self, y_true: torch.Tensor, y_pred: torch.Tensor
    ) -> Dict[str, torch.Tensor]:
-        sc_loss = 0.0  # Spectral Convergence Loss
-        mag_loss = 0.0  # Log STFT Magnitude Loss
+        """
+        Args:
+            y_true: (B, T) or (B, 1, T) waveform
+            y_pred: (B, T) or (B, 1, T) waveform
+        """
+        # Ensure correct shape (B, T)
+        if y_true.dim() == 3 and y_true.size(1) == 1:
+            y_true = y_true.squeeze(1)
+        if y_pred.dim() == 3 and y_pred.size(1) == 1:
+            y_pred = y_pred.squeeze(1)
+
+        sc_loss = 0.0
+        mag_loss = 0.0

        for stft in self.stft_transforms:
-            stft.to(y_pred.device)  # Ensure transform is on the correct device
+            stft = stft.to(y_pred.device)

-            # Get complex STFTs
+            # Complex STFTs: (B, F, T, 2)
            stft_true = stft(y_true)
            stft_pred = stft(y_pred)

-            # Get magnitudes
+            # Magnitudes
            stft_mag_true = torch.abs(stft_true)
            stft_mag_pred = torch.abs(stft_pred)

            # --- Spectral Convergence Loss ---
-            # || |S_true| - |S_pred| ||_F  /  || |S_true| ||_F
            norm_true = torch.linalg.norm(stft_mag_true, dim=(-2, -1))
            norm_diff = torch.linalg.norm(stft_mag_true - stft_mag_pred, dim=(-2, -1))
            sc_loss += torch.mean(norm_diff / (norm_true + self.eps))

            # --- Log STFT Magnitude Loss ---
            mag_loss += F.l1_loss(
-                torch.log(stft_mag_pred + self.eps), torch.log(stft_mag_true + self.eps)
+                torch.log(stft_mag_pred + self.eps),
+                torch.log(stft_mag_true + self.eps),
            )

+        # Average across resolutions
+        sc_loss /= self.n_resolutions
+        mag_loss /= self.n_resolutions
        total_loss = sc_loss + mag_loss
+
        return {"total": total_loss, "sc": sc_loss, "mag": mag_loss}
--- a/utils/TrainingTools.py
+++ b/utils/TrainingTools.py
@@ -0,0 +1,60 @@
+import torch
+
+# In case if needed again...
+# from utils.MultiResolutionSTFTLoss import MultiResolutionSTFTLoss
+#
+# stft_loss_fn = MultiResolutionSTFTLoss(
+#     fft_sizes=[1024, 2048, 512], hop_sizes=[120, 240, 50], win_lengths=[600, 1200, 240]
+# )
+
+
+def signal_mae(input_one: torch.Tensor, input_two: torch.Tensor) -> torch.Tensor:
+    absolute_difference = torch.abs(input_one - input_two)
+    return torch.mean(absolute_difference)
+
+
+def discriminator_train(
+    high_quality,
+    low_quality,
+    high_labels,
+    low_labels,
+    discriminator,
+    generator,
+    criterion,
+):
+    decision_high = discriminator(high_quality)
+    d_loss_high = criterion(decision_high, high_labels)
+    # print(f"Is this real?: {discriminator_decision_from_real} | {d_loss_real}")
+
+    decision_low = discriminator(low_quality)
+    d_loss_low = criterion(decision_low, low_labels)
+    # print(f"Is this real?: {discriminator_decision_from_fake} | {d_loss_fake}")
+
+    with torch.no_grad():
+        generator_quality = generator(low_quality)
+    decision_gen = discriminator(generator_quality)
+    d_loss_gen = criterion(decision_gen, low_labels)
+
+    noise = torch.rand_like(high_quality) * 0.08
+    decision_noise = discriminator(high_quality + noise)
+    d_loss_noise = criterion(decision_noise, low_labels)
+
+    d_loss = (d_loss_high + d_loss_low + d_loss_gen + d_loss_noise) / 4.0
+
+    return d_loss
+
+
+def generator_train(
+    low_quality, high_quality, real_labels, generator, discriminator, adv_criterion
+):
+    generator_output = generator(low_quality)
+
+    discriminator_decision = discriminator(generator_output)
+    adversarial_loss = adv_criterion(discriminator_decision, real_labels)
+
+    # Signal similarity
+    similarity_loss = signal_mae(generator_output, high_quality)
+
+    combined_loss = adversarial_loss + (similarity_loss * 100)
+
+    return combined_loss, adversarial_loss