Skip to content

Commit a6c444c

Browse files
committed
Fix ssml id
1 parent 5aa3e92 commit a6c444c

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

manager/TTSManager.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
import logging
22
import os
3+
import re
34
import traceback
5+
import xml.etree.ElementTree as ET
6+
from io import BytesIO
47

58
import librosa
6-
import re
79
import numpy as np
8-
import xml.etree.ElementTree as ET
9-
10-
from contants import config
1110
import soundfile as sf
12-
from io import BytesIO
1311
from graiax import silkcoder
14-
from contants import ModelType
1512
from scipy.signal import resample_poly
1613

14+
from contants import ModelType
15+
from contants import config
1716
from logger import logger
1817
from manager.observer import Observer
1918
from utils.data_utils import check_is_none
@@ -248,9 +247,10 @@ def process_ssml_infer_task(self, tasks, format):
248247
raise ValueError(f"Unsupported model type: {task.get('model_type')}")
249248
model_type = ModelType(model_type_str)
250249
model = self.get_model(model_type, task.get("id"))
251-
task["id"] = self.get_real_id(model_type, task.get("id"))
252250
sampling_rates.append(model.sampling_rate)
253251
last_sampling_rate = model.sampling_rate
252+
253+
# self.logger.debug(model, model.sampling_rate, task)
254254
audio = self.infer_map[model_type](task, encode=False)
255255
audios.append(audio)
256256
# 得到最高的采样率
@@ -394,7 +394,7 @@ def bert_vits2_infer(self, state, encode=True):
394394
state["text"] = re.sub(r'\s+', ' ', state["text"]).strip()
395395
sampling_rate = model.sampling_rate
396396
sentences_list = sentence_split(state["text"], state["segment_size"])
397-
397+
398398
if model.zh_bert_extra:
399399
infer_func = model.infer
400400
state["lang"] = "zh"
@@ -411,7 +411,7 @@ def bert_vits2_infer(self, state, encode=True):
411411
state["text"] = sentences
412412
audio = infer_func(**state)
413413
audios.append(audio)
414-
414+
415415
audio = np.concatenate(audios)
416416

417417
return self.encode(sampling_rate, audio, state["format"]) if encode else audio
@@ -430,7 +430,7 @@ def stream_bert_vits2_infer(self, state, encode=True):
430430
infer_func = model.infer_multilang
431431
else:
432432
infer_func = model.infer
433-
433+
434434
for sentences in sentences_list:
435435
state["text"] = sentences
436436
audio = infer_func(**state)

0 commit comments

Comments
 (0)