3
具体思路: 1. 生成rand_spk并把weight保存到csv文件:
import torch
import csv
std, mean = torch.load("spk_stat.pt').chunk(2)
rand_spk = torch.randn(768) * std + mean
writeToCsv(f"saved.csv",rand_spk.detach().numpy())
def writeToCsv(csv_file_path,data):
with open(csv_file_path, mode='w', newline='') as file:
writer = csv.writer(file)
# 写入数据
writer.writerow(data.tolist())
2. 生成语音时加载保存的weight,注意temperature要设成一个极小的值
import pandas as pd
data = pd.read_csv(f"./saved.csv", header=None)
rand_spk = torch.tensor(data.iloc[0], dtype=torch.float32)
params_infer_code = {
'spk_emb': rand_spk, # add sampled speaker
'temperature': .000000000001, # using custom temperature
'top_P': 0.7, # top P decode
'top_K': 20, # top K decode
}
params_refine_text = {
'prompt': '[break_2]'
}
wavs = chat.infer("你的文本"
, params_refine_text=params_refine_text, params_infer_code=params_infer_code, use_decoder=True)
scipy.io.wavfile.write(filename=f"./chattts_download.wav", rate=24_000, data=wavs[0].T)