✨ | Made training bit... spicier.

2025-09-10 19:52:53 +03:00
parent ff38cefdd3
commit 0bc8fc2792
8 changed files with 581 additions and 303 deletions
--- a/utils/MultiResolutionSTFTLoss.py
+++ b/utils/MultiResolutionSTFTLoss.py
@@ -0,0 +1,62 @@
+from typing import Dict, List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.transforms as T
+
+
+class MultiResolutionSTFTLoss(nn.Module):
+    """
+    Computes a loss based on multiple STFT resolutions, including both
+    spectral convergence and log STFT magnitude components.
+    """
+
+    def __init__(
+        self,
+        fft_sizes: List[int] = [1024, 2048, 512],
+        hop_sizes: List[int] = [120, 240, 50],
+        win_lengths: List[int] = [600, 1200, 240],
+        eps: float = 1e-7,
+    ):
+        super().__init__()
+        self.stft_transforms = nn.ModuleList(
+            [
+                T.Spectrogram(
+                    n_fft=n_fft, win_length=win_len, hop_length=hop_len, power=None
+                )
+                for n_fft, hop_len, win_len in zip(fft_sizes, hop_sizes, win_lengths)
+            ]
+        )
+        self.eps = eps
+
+    def forward(
+        self, y_true: torch.Tensor, y_pred: torch.Tensor
+    ) -> Dict[str, torch.Tensor]:
+        sc_loss = 0.0  # Spectral Convergence Loss
+        mag_loss = 0.0  # Log STFT Magnitude Loss
+
+        for stft in self.stft_transforms:
+            stft.to(y_pred.device)  # Ensure transform is on the correct device
+
+            # Get complex STFTs
+            stft_true = stft(y_true)
+            stft_pred = stft(y_pred)
+
+            # Get magnitudes
+            stft_mag_true = torch.abs(stft_true)
+            stft_mag_pred = torch.abs(stft_pred)
+
+            # --- Spectral Convergence Loss ---
+            # || |S_true| - |S_pred| ||_F  /  || |S_true| ||_F
+            norm_true = torch.linalg.norm(stft_mag_true, dim=(-2, -1))
+            norm_diff = torch.linalg.norm(stft_mag_true - stft_mag_pred, dim=(-2, -1))
+            sc_loss += torch.mean(norm_diff / (norm_true + self.eps))
+
+            # --- Log STFT Magnitude Loss ---
+            mag_loss += F.l1_loss(
+                torch.log(stft_mag_pred + self.eps), torch.log(stft_mag_true + self.eps)
+            )
+
+        total_loss = sc_loss + mag_loss
+        return {"total": total_loss, "sc": sc_loss, "mag": mag_loss}
--- a/utils/init.py
+++ b/utils/init.py