[2noise/ChatTTS]根据音色种子设定音色的标准方法的疑问

为方便测试，写了一个kivy小框架：

import torch
import ChatTTS
from kivy.app import App
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.button import Button
from kivy.uix.textinput import TextInput
from kivy.core.audio import SoundLoader
import soundfile as sf
import tempfile
seeds = {
    "旁白": {"seed": 2222},
    "中年女性": {"seed": 7869},
    "年轻女性": {"seed": 6615},
    "中年男性": {"seed": 4099},
    "年轻男性": {"seed": 6653},
}
class ChatApp(App):
    def build(self):
        self.chat = ChatTTS.Chat()
        self.chat.load_models(source='local', local_path='models')
        self.std, self.mean = torch.load('models/asset/spk_stat.pt').chunk(2)

        layout = BoxLayout(orientation='vertical')
        self.input_text = TextInput(size_hint=(1, 0.8), multiline=False)
        submit_button = Button(text='Submit', size_hint=(1, 0.2))
        submit_button.bind(on_press=self.infer_and_play)
        layout.add_widget(self.input_text)
        layout.add_widget(submit_button)
        return layout

    def infer_and_play(self, instance):
        torch.manual_seed(seeds["年轻女性"]["seed"])
        rnd_spk_emb = self.chat.sample_random_speaker()
        params_infer_code = {
            'spk_emb': rnd_spk_emb,
            #'temperature': .1,
            #'top_P': 0.7,
            #'top_K': 20,
        }
        params_refine_text = {
        'prompt': '[oral_2][laugh_0][break_6]'
        } 
        text = self.input_text.text
        wav = self.chat.infer(text, params_infer_code=params_infer_code, use_decoder=True)[0][0]#params_refine_text=params_refine_text
        # 保存音频数据到临时文件
        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        sf.write(temp_audio_file, wav, 24000, format='WAV', subtype='PCM_24')
        temp_audio_file.close()
        print("temp_audio_file.name:", temp_audio_file.name)
        # 加载并播放音频文件
        sound = SoundLoader.load(temp_audio_file.name)
        if sound:
            sound.volume = 1.0
            sound.play()

if __name__ == '__main__':
    ChatApp().run()

NowLoadY

对于这个写法，

torch.manual_seed(seeds["年轻女性"]["seed"])
rnd_spk_emb = self.chat.sample_random_speaker()
params_infer_code = {
 'spk_emb': rnd_spk_emb,
}
wav = self.chat.infer(text, params_infer_code=params_infer_code, use_decoder=True)[0][0]

相当于

torch.manual_seed(6615)
rnd_spk_emb = self.chat.sample_random_speaker()
params_infer_code = {
 'spk_emb': rnd_spk_emb,
 }
 wav = self.chat.infer(text, params_infer_code=params_infer_code, use_decoder=True)[0][0]

生成的音色没有和预期一样，并且也不固定。请问有人能提供一点修改建议吗？非常感谢！ The resulting timbre is not as expected and is not fixed. Can anyone suggest any changes? Thank you very much!

NowLoadY

同疑问，好奇这里随机是在哪里随机，我找了好几个地方都不太对。

ligenxun

用这个

def deterministic(seed=0):
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

Jackiexiao

用这个

def deterministic(seed=0):
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

请问这个加在哪里？

AllanChen

用这个

def deterministic(seed=0):
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

请问这个加在哪里？

torch.manual_seed(seeds["年轻女性"]["seed"]) 改为 deterministic(seeds["年轻女性"]["seed"])

Jackiexiao

具体用法你可以参考： https://github.com/Jackiexiao/ChatTTS-api-ui-docker/blob/main/api.py

Jackiexiao

用这个

def deterministic(seed=0):
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

如果没有cuda环境，cpu上计算时，这种方法不是会出错吗

jianchang512

用这个

def deterministic(seed=0):
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

如果没有cuda环境，cpu上计算时，这种方法不是会出错吗

不会，只要不把tensor真的放到cuda上就不会报错。刚刚开了个新环境试了一下。

conda create -n test python=3.12
conda activate test
conda install pytorch cpuonly -c pytorch

python
>>> import torch
>>> torch.cuda.manual_seed(123)
>>> torch.backends.cudnn.deterministic = True
>>> torch.backends.cudnn.benchmark = False
>>>

ain-soph

Duplicate with #15 #72 #80 #97

ain-soph

[2noise/ChatTTS]根据音色种子设定音色的标准方法的疑问

回答