✨ | Added support for .mp3 and .flac loading...
This commit is contained in:
99
training.py
99
training.py
@ -76,7 +76,7 @@ os.makedirs(audio_output_dir, exist_ok=True)
|
||||
|
||||
# ========= SINGLE =========
|
||||
|
||||
train_data_loader = DataLoader(dataset, batch_size=1, shuffle=True)
|
||||
train_data_loader = DataLoader(dataset, batch_size=256, shuffle=True)
|
||||
|
||||
|
||||
# ========= MODELS =========
|
||||
@ -122,70 +122,69 @@ def start_training():
|
||||
times_correct = 0
|
||||
|
||||
# ========= TRAINING =========
|
||||
for high_quality_data, low_quality_data in tqdm.tqdm(train_data_loader, desc=f"Training epoch {generator_epoch+1}/{generator_epochs}, Current epoch {epoch+1}"):
|
||||
for training_data in tqdm.tqdm(train_data_loader, desc=f"Training epoch {generator_epoch+1}/{generator_epochs}, Current epoch {epoch+1}"):
|
||||
## Data structure:
|
||||
# [[float..., float..., float...], sample_rate]
|
||||
# [[[float..., float..., float...], [float..., float..., float...]], [original_sample_rate, mangled_sample_rate]]
|
||||
|
||||
# ========= LABELS =========
|
||||
good_quality_data = training_data[0][0].to(device)
|
||||
bad_quality_data = training_data[0][1].to(device)
|
||||
original_sample_rate = training_data[1][0]
|
||||
mangled_sample_rate = training_data[1][1]
|
||||
|
||||
batch_size = high_quality_data[0][0].size(0)
|
||||
batch_size = good_quality_data.size(0)
|
||||
real_labels = torch.ones(batch_size, 1).to(device)
|
||||
fake_labels = torch.zeros(batch_size, 1).to(device)
|
||||
|
||||
high_quality_audio = high_quality_data
|
||||
low_quality_audio = low_quality_data
|
||||
high_quality_audio = (good_quality_data, original_sample_rate)
|
||||
low_quality_audio = (bad_quality_data, mangled_sample_rate)
|
||||
|
||||
ai_enhanced_outputs = []
|
||||
# ========= DISCRIMINATOR =========
|
||||
discriminator.train()
|
||||
d_loss = discriminator_train(
|
||||
good_quality_data,
|
||||
bad_quality_data,
|
||||
real_labels,
|
||||
fake_labels,
|
||||
discriminator,
|
||||
generator,
|
||||
criterion_d,
|
||||
optimizer_d
|
||||
)
|
||||
|
||||
for high_quality_sample, low_quality_sample in tqdm.tqdm(zip(high_quality_data[0], low_quality_data[0]), desc=f"Processing audio clip.. Length: {len(high_quality_data[0])}"):
|
||||
# ========= DISCRIMINATOR =========
|
||||
discriminator.train()
|
||||
d_loss = discriminator_train(
|
||||
high_quality_sample,
|
||||
low_quality_sample,
|
||||
real_labels,
|
||||
fake_labels,
|
||||
discriminator,
|
||||
generator,
|
||||
criterion_d,
|
||||
optimizer_d
|
||||
)
|
||||
# ========= GENERATOR =========
|
||||
generator.train()
|
||||
generator_output, combined_loss, adversarial_loss, mel_l1_tensor, log_stft_l1_tensor, mfcc_l_tensor = generator_train(
|
||||
bad_quality_data,
|
||||
good_quality_data,
|
||||
real_labels,
|
||||
generator,
|
||||
discriminator,
|
||||
criterion_d,
|
||||
optimizer_g,
|
||||
device,
|
||||
mel_transform,
|
||||
stft_transform,
|
||||
mfcc_transform
|
||||
)
|
||||
|
||||
# ========= GENERATOR =========
|
||||
generator.train()
|
||||
generator_output, combined_loss, adversarial_loss, mel_l1_tensor, log_stft_l1_tensor, mfcc_l_tensor = generator_train(
|
||||
low_quality_sample,
|
||||
high_quality_sample,
|
||||
real_labels,
|
||||
generator,
|
||||
discriminator,
|
||||
criterion_d,
|
||||
optimizer_g,
|
||||
device,
|
||||
mel_transform,
|
||||
stft_transform,
|
||||
mfcc_transform
|
||||
)
|
||||
|
||||
ai_enhanced_outputs.append(generator_output)
|
||||
|
||||
if debug:
|
||||
print(f"D_LOSS: {d_loss.item():.4f}, COMBINED_LOSS: {combined_loss.item():.4f}, ADVERSARIAL_LOSS: {adversarial_loss.item():.4f}, MEL_L1_LOSS: {mel_l1_tensor.item():.4f}, LOG_STFT_L1_LOSS: {log_stft_l1_tensor.item():.4f}, MFCC_LOSS: {mfcc_l_tensor.item():.4f}")
|
||||
scheduler_d.step(d_loss.detach())
|
||||
scheduler_g.step(adversarial_loss.detach())
|
||||
if debug:
|
||||
print(f"D_LOSS: {d_loss.item():.4f}, COMBINED_LOSS: {combined_loss.item():.4f}, ADVERSARIAL_LOSS: {adversarial_loss.item():.4f}, MEL_L1_LOSS: {mel_l1_tensor.item():.4f}, LOG_STFT_L1_LOSS: {log_stft_l1_tensor.item():.4f}, MFCC_LOSS: {mfcc_l_tensor.item():.4f}")
|
||||
scheduler_d.step(d_loss.detach())
|
||||
scheduler_g.step(adversarial_loss.detach())
|
||||
|
||||
# ========= SAVE LATEST AUDIO =========
|
||||
high_quality_audio = (torch.cat(high_quality_data[0]), high_quality_data[1])
|
||||
low_quality_audio = (torch.cat(low_quality_data[0]), low_quality_data[1])
|
||||
ai_enhanced_audio = (torch.cat(ai_enhanced_outputs), high_quality_data[1])
|
||||
high_quality_audio = (good_quality_data, original_sample_rate)
|
||||
low_quality_audio = (bad_quality_data, original_sample_rate)
|
||||
ai_enhanced_audio = (generator_output, original_sample_rate)
|
||||
|
||||
new_epoch = generator_epoch+epoch
|
||||
|
||||
if generator_epoch % 25 == 0:
|
||||
print(f"Saved epoch {new_epoch}!")
|
||||
torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-crap.wav", low_quality_audio[0].cpu().detach(), high_quality_audio[1]) # <-- Because audio clip was resampled in data.py from original to crap and to original again.
|
||||
torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-ai.wav", ai_enhanced_audio[0].cpu().detach(), ai_enhanced_audio[1])
|
||||
torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-orig.wav", high_quality_audio[0].cpu().detach(), high_quality_audio[1])
|
||||
# if generator_epoch % 25 == 0:
|
||||
# print(f"Saved epoch {new_epoch}!")
|
||||
# torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-orig.wav", high_quality_audio[0][-1].cpu().detach(), high_quality_audio[1][-1])
|
||||
# torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-crap.wav", low_quality_audio[0][-1].cpu().detach(), high_quality_audio[1][-1]) # <-- Because audio clip was resampled in data.py from original to crap and to original again.
|
||||
# torchaudio.save(f"{audio_output_dir}/epoch-{new_epoch}-audio-ai.wav", ai_enhanced_audio[0][-1].cpu().detach(), high_quality_audio[1][-1])
|
||||
|
||||
#if debug:
|
||||
# print(generator.state_dict().keys())
|
||||
|
Reference in New Issue
Block a user