快速生成语音视频

根据图片、文本生成语音视频

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from moviepy import ImageClip, AudioFileClip, concatenate_audioclips, AudioArrayClip
import numpy as np
from gtts import gTTS
import os
import time

# 1. 设置参数
image_path = "input.png" # 替换为你的图片路径
output_video = "output.mp4" # 输出视频路径
text_lines = [
"缺陷警示卡之物料绕反,问题描述:料带绕反,导致生产时,物料反向。",
"根本原因:物料脱离料盘,料带两边开孔的物料无法区分正反,导致料带绕反。",
"工位上对策:物料脱离料盘,必须找质量确认,料带低于1米的物料必须用料带膜将物料与料盘系上。"
]

# 2. 为每行文本生成单独的语音并添加停顿
audio_clips = []
silence_duration = 2 # 2秒停顿
sample_rate = 44100 # 音频采样率

# 确保临时目录存在
temp_dir = "temp_audio"
os.makedirs(temp_dir, exist_ok=True)

try:
for i, line in enumerate(text_lines):
# 生成当前行的语音
temp_audio_path = os.path.join(temp_dir, f"temp_line_{i}.mp3")
tts = gTTS(line, lang='zh-cn')
tts.save(temp_audio_path)

# 确保文件写入完成
time.sleep(0.5)

# 加载音频文件并立即读取内容
with AudioFileClip(temp_audio_path) as clip:
audio_data = clip.to_soundarray()
audio_clip = AudioArrayClip(audio_data, fps=sample_rate)
audio_clips.append(audio_clip)

# 添加2秒静音(停顿)
if i != len(text_lines) - 1: # 不在最后一行后添加停顿
silence_array = np.zeros((int(silence_duration * sample_rate), 2))
silence = AudioArrayClip(silence_array, fps=sample_rate)
audio_clips.append(silence)

# 删除临时文件
try:
os.remove(temp_audio_path)
except PermissionError:
pass

# 合并所有音频片段
if audio_clips:
final_audio = concatenate_audioclips(audio_clips)
temp_audio_path = os.path.join(temp_dir, "final_audio.mp3")
final_audio.write_audiofile(temp_audio_path, logger=None)

# 创建视频
with AudioFileClip(temp_audio_path) as audio:
video = ImageClip(image_path).with_duration(audio.duration)
video = video.with_audio(audio)
video.write_videofile(output_video, fps=24, codec='libx264', audio_codec='aac')

finally:
# 清理临时文件
for filename in os.listdir(temp_dir):
try:
os.remove(os.path.join(temp_dir, filename))
except:
pass
try:
os.rmdir(temp_dir)
except:
pass