但是,我使用以下的代码又是可以的
import ChatTTS
from IPython.display import Audio
import torch
import torchaudio
from dotenv import load_dotenv
load_dotenv()
chat = ChatTTS.Chat()
chat.load_models(compile=False) # Set to True for better performance
###################################
# Sample a speaker from Gaussian.
rand_spk = chat.sample_random_speaker()
params_infer_code = {
'spk_emb': rand_spk, # add sampled speaker
'temperature': .3, # using custom temperature
'top_P': 0.7, # top P decode
'top_K': 20, # top K decode
}
inputs_en = """
chat T T S is a text to speech model designed for dialogue applications.
[uv_break]it supports mixed language input [uv_break]and offers multi speaker
capabilities with precise control over prosodic elements [laugh]like like
[uv_break]laughter[laugh], [uv_break]pauses, [uv_break]and intonation.
[uv_break]it delivers natural and expressive speech,[uv_break]so please
[uv_break] use the project responsibly at your own risk.[uv_break]
""".replace('\n', '') # English is still experimental.
params_refine_text = {
'prompt': '[oral_2][laugh_0][break_4]'
}
# audio_array_cn = chat.infer(inputs_cn, params_refine_text=params_refine_text)
audio_array_en = chat.infer(inputs_en, params_refine_text=params_refine_text)
torchaudio.save("output3.wav", torch.from_numpy(audio_array_en[0]), 24000)