[2noise/ChatTTS]如何固定音色？

由于我的语料较长，需要多次输入，这样会出现转语音时，不同段落转出来的音色值不同，声音不一致。

Shengrun2020

使用 zeroshot 并固定 seed 可较好还原音色。

fumiama

感谢作者，对于一些叠词输出的语音不太清晰，请问这是分词的原因吗？

Shengrun2020

zeroshot是什么?作者您好，想请教下 @fumiama 下面的代码是我写的，是固定音色，但是怎么变化他的音色呢？不断修改那3个种子吗？

# generate_audio.py

import os
import torch
import numpy as np
import torchaudio
import ChatTTS

os.environ["HF_HOME"] = r"D:\ChatTTS\cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = r"D:\ChatTTS\cache"
os.environ["TRANSFORMERS_CACHE"] = r"D:\ChatTTS\cache"

# 默认种子
DEFAULT_AUDIO_SEED_INPUT = 47
DEFAULT_TEXT_SEED_INPUT = 24
DEFAULT_SEED = 425

def generate_audio_file(texts=None, output_path="word_level_output.wav",
                        audio_seed=DEFAULT_AUDIO_SEED_INPUT,
                        text_seed=DEFAULT_TEXT_SEED_INPUT,
                        seed=DEFAULT_SEED):

    chat = ChatTTS.Chat()
    chat.load(compile=True, source='huggingface') # 设置为True以获得更快速度

    torch.manual_seed(seed)

    temperature = 0.3
    top_P = 0.7
    top_K = 20
    refine_text_flag = True

    torch.manual_seed(audio_seed)
    rand_spk = chat.sample_random_speaker()
    params_infer_code = ChatTTS.Chat.InferCodeParams(
        spk_emb=rand_spk,
        temperature=temperature,
        top_P=top_P,
        top_K=top_K)
    params_refine_text = ChatTTS.Chat.RefineTextParams(
        prompt='[oral_2][laugh_0][break_6]',)

    torch.manual_seed(text_seed)
    if refine_text_flag:
        text = chat.infer(texts,
                          skip_refine_text=False,
                          refine_text_only=True,
                          params_refine_text=params_refine_text,
                          params_infer_code=params_infer_code
                          )
    else:
        text = texts

    wavs = chat.infer(text,
                      skip_refine_text=True,
                      params_refine_text=params_refine_text,
                      params_infer_code=params_infer_code,)

    output_dir = os.path.dirname(output_path)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if os.path.exists(output_path):
        os.remove(output_path)
    torchaudio.save(output_path, torch.from_numpy(wavs[0]).unsqueeze(0), 24000)

    print(f"Audio file saved to {output_path}")

uvtong

用最新dev版本，有manual_seed参数。

fumiama

[2noise/ChatTTS]如何固定音色？

回答