✨ | Implemented MFCC and STFT.
This commit is contained in:
22
training.py
22
training.py
@ -34,7 +34,7 @@ parser.add_argument("--discriminator", type=str, default=None,
|
||||
parser.add_argument("--device", type=str, default="cpu", help="Select device")
|
||||
parser.add_argument("--epoch", type=int, default=0, help="Current epoch for model versioning")
|
||||
parser.add_argument("--debug", action="store_true", help="Print debug logs")
|
||||
parser.add_argument("--continue_training", type=bool, default=False, help="Continue training using temp_generator and temp_discriminator models")
|
||||
parser.add_argument("--continue_training", action="store_true", help="Continue training using temp_generator and temp_discriminator models")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -60,6 +60,10 @@ mel_transform = T.MelSpectrogram(
|
||||
win_length=win_length, n_mels=n_mels, power=1.0 # Magnitude Mel
|
||||
).to(device)
|
||||
|
||||
stft_transform = T.Spectrogram(
|
||||
n_fft=n_fft, win_length=win_length, hop_length=hop_length
|
||||
).to(device)
|
||||
|
||||
debug = args.debug
|
||||
|
||||
# Initialize dataset and dataloader
|
||||
@ -72,7 +76,7 @@ os.makedirs(audio_output_dir, exist_ok=True)
|
||||
|
||||
# ========= SINGLE =========
|
||||
|
||||
train_data_loader = DataLoader(dataset, batch_size=12, shuffle=True)
|
||||
train_data_loader = DataLoader(dataset, batch_size=64, shuffle=True)
|
||||
|
||||
|
||||
# ========= MODELS =========
|
||||
@ -143,7 +147,7 @@ def start_training():
|
||||
|
||||
# ========= GENERATOR =========
|
||||
generator.train()
|
||||
generator_output, combined_loss, adversarial_loss, mel_l1_tensor = generator_train(
|
||||
generator_output, combined_loss, adversarial_loss, mel_l1_tensor, log_stft_l1_tensor, mfcc_l_tensor = generator_train(
|
||||
low_quality_sample,
|
||||
high_quality_sample,
|
||||
real_labels,
|
||||
@ -152,11 +156,13 @@ def start_training():
|
||||
criterion_d,
|
||||
optimizer_g,
|
||||
device,
|
||||
mel_transform
|
||||
mel_transform,
|
||||
stft_transform,
|
||||
mfcc_transform
|
||||
)
|
||||
|
||||
if debug:
|
||||
print(combined_loss, adversarial_loss, mel_l1_tensor)
|
||||
print(f"D_LOSS: {d_loss.item():.4f}, COMBINED_LOSS: {combined_loss.item():.4f}, ADVERSARIAL_LOSS: {adversarial_loss.item():.4f}, MEL_L1_LOSS: {mel_l1_tensor.item():.4f}, LOG_STFT_L1_LOSS: {log_stft_l1_tensor.item():.4f}, MFCC_LOSS: {mfcc_l_tensor.item():.4f}")
|
||||
scheduler_d.step(d_loss.detach())
|
||||
scheduler_g.step(adversarial_loss.detach())
|
||||
|
||||
@ -173,9 +179,9 @@ def start_training():
|
||||
torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-ai.wav", ai_enhanced_audio[0].cpu().detach(), ai_enhanced_audio[1])
|
||||
torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-orig.wav", high_quality_audio[0].cpu().detach(), high_quality_audio[1])
|
||||
|
||||
if debug:
|
||||
print(generator.state_dict().keys())
|
||||
print(discriminator.state_dict().keys())
|
||||
#if debug:
|
||||
# print(generator.state_dict().keys())
|
||||
# print(discriminator.state_dict().keys())
|
||||
torch.save(discriminator.state_dict(), f"{models_dir}/temp_discriminator.pt")
|
||||
torch.save(generator.state_dict(), f"{models_dir}/temp_generator.pt")
|
||||
Data.write_data(f"{models_dir}/epoch_data.json", {"epoch": new_epoch})
|
||||
|
Reference in New Issue
Block a user