通过音频文件生成字幕文件

```

def step_4_generate_srt_by_whisper(wav_dict_path):

"""

使用openai的whisper，从音频文件中提取文本，生成字幕文件

容易出现错别字

:param wav_dict_path:

:return:

"""

model = whisper.load_model("tiny")

srt_list = []

for index, item in enumerate(wav_dict_path):

wav_path = item.get("path")

result = model.transcribe(wav_path)

# 字幕纯文本

# text = result["text"]

# pprint(f"音频内容提取完毕：{text}")

segments = result['segments']

save_srt_path = os.path.join(fr"{project_path}\srt_temp", f"{get_now_time()}_{index}.srt")

for segment in segments:

start_time = str(0) + str(timedelta(seconds=int(segment['start']))) + ',000'

end_time = str(0) + str(timedelta(seconds=int(segment['end']))) + ',000'

# 繁体

text = segment['text']

# 简体

text = zhconv.convert(text, 'zh-cn')

segment_id = segment['id'] + 1

segment = f"{segment_id}\n{start_time} --> {end_time}\n{text[1:] if text[0] == ' ' else text}\n\n"

with open(save_srt_path, 'a', encoding='utf-8') as srtFile:

srtFile.write(segment)

srt_list.append(save_srt_path)

return srt_list

```

def step_4_generate_srt_by_char_calculation(wav_dict_path):

result_srt_list = []

for index, item in enumerate(wav_dict_path):

path = item.get("path")

duration = item.get("duration")

text = item.get("text")

print(f"path:{path};duration:{duration};text:{text}")

total_count = len(text)

print(f"word_count:{total_count}")

p = re.compile("[，。]") # 正则表达式模式指定为空格、句号和问号

words_list = p.split(text)

print(f"words_list:{words_list}")

# 去空

words_list = [word for word in words_list if word]

# 减去标点符号

word_count = total_count - len(words_list)

srt_list = []

# 上一句话结束时间

end_second = 0

# 字幕保存路径

save_srt_path = os.path.join(fr"{project_path}\srt_temp", f"{get_now_time()}_{index}.srt")

for w_index, summarize in enumerate(words_list):

# 通过当前的内容占比计算得出

audio_duration = len(summarize) / word_count * duration

# 字幕序号

segment_id = w_index + 1

# 字幕开始时间

start_time = second_to_time(end_second)

# 字幕结束时间

end_time = second_to_time(end_second + audio_duration)

# 将本地的结束时间赋值，下次使用

end_second = end_second + audio_duration

segment = f"{segment_id}\n{start_time} --> {end_time}\n{summarize}\n\n"

srt_list.append(segment)

with open(save_srt_path, 'a', encoding='UTF-8') as f:

for i in srt_list:

f.writelines(i)

f.close()

result_srt_list.append(save_srt_path)

return result_srt_list

```

打野技术记录

通过音频文件生成字幕文件