~~~
import os.path
import subprocess
import numpy as np
import torch
import xlsxwriter
from PIL import Image
import cn_clip.clip as clip
from cn_clip.clip import load_from_name, available_models
print("Available models:", available_models())
# Available models: ['ViT-B-16', 'ViT-L-14', 'ViT-L-14-336', 'ViT-H-14', 'RN50']
def test_chinese_clip():
"""
https://github.com/OFA-Sys/Chinese-CLIP
:return:
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = load_from_name("ViT-B-16", device=device, download_root='./')
model.eval()
image = preprocess(Image.open("pokemon.jpeg")).unsqueeze(0).to(device)
text = clip.tokenize(["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]).to(device)
with torch.no_grad():
image_features = model.encode_image(image)
text_features = model.encode_text(text)
# 对特征进行归一化,请使用归一化后的图文特征用于下游任务
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
logits_per_image, logits_per_text = model.get_similarity(image, text)
probs = logits_per_image.softmax(dim=-1).cpu().numpy()
print(probs)
def test_videos_frame(is_debug=True):
if is_debug:
video_list_path = [r"F:\视频介质\start.mp4", r"F:\视频介质\end.mp4"]
else:
video_list_path = [r"F:\视频介质\战狼1.mp4", r"F:\视频介质\叶问.mp4", r"F:\视频介质\七月与安生.mp4",
r"F:\视频介质\独行月球.mp4"]
for video_path in video_list_path:
frame_dir_name = os.path.basename(video_path).split('.')[0]
frame_dir_path = os.path.join(r"F:\PycharmProjects\clip_demo", frame_dir_name)
if not os.path.isdir(frame_dir_path):
os.mkdir(frame_dir_path)
output_img_path = os.path.join(frame_dir_path, "%05d.jpg")
cmd_frames = f"ffmpeg -i {video_path} -filter:v fps=1/2 {output_img_path} -loglevel quiet"
print(f"执行的ffmpeg命令:{cmd_frames}")
ffmpeg_code = subprocess.call(cmd_frames, shell=True)
print(f"电影:{frame_dir_name},抽帧状态:{ffmpeg_code},抽帧完成")
def test_videos_pic_tags(is_debug=True):
if is_debug:
frame_list_path = [r"F:\PycharmProjects\clip_demo\end", r"F:\PycharmProjects\clip_demo\start"]
tags_list = [["爆炸", "枪战"], ["打斗", "打拳"]]
else:
frame_list_path = [r"F:\PycharmProjects\clip_demo\战狼1", r"F:\PycharmProjects\clip_demo\叶问",
r"F:\PycharmProjects\clip_demo\七月与安生", r"F:\PycharmProjects\clip_demo\独行月球"]
tags_list = [["爆炸", "枪战"], ["打斗", "打拳"], ["拥抱", "哭泣"], ["摔倒", "大笑"]]
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = load_from_name("ViT-B-16", device=device, download_root='./')
model.eval()
for index in range(len(frame_list_path)):
tags = tags_list[index]
text = clip.tokenize(tags).to(device)
print(f"tags:{tags}")
frame_dir = frame_list_path[index]
print(f"frame_dir:{frame_dir}")
xlsx_name = os.path.basename(frame_dir)
xlsx_path = os.path.join(r"F:\PycharmProjects\clip_demo", f"{xlsx_name}.xlsx")
wb_write = xlsxwriter.Workbook(xlsx_path)
# 创建工作表
sheet = wb_write.add_worksheet(xlsx_name)
sheet.write(0, 0, "图片")
sheet.write(0, 1, tags[0])
sheet.write(0, 2, tags[1])
for frame_index, path in enumerate(os.listdir(frame_dir)):
image_path = os.path.join(frame_dir, path)
print(image_path)
image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
with torch.no_grad():
image_features = model.encode_image(image)
text_features = model.encode_text(text)
# 对特征进行归一化,请使用归一化后的图文特征用于下游任务
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
logits_per_image, logits_per_text = model.get_similarity(image, text)
probs = logits_per_image.softmax(dim=-1).cpu().numpy()
print(probs)
sheet.write(frame_index + 1, 0, path)
sheet.write(frame_index + 1, 1, probs[0][0])
sheet.write(frame_index + 1, 2, probs[0][1])
wb_write.close()
def test_videos_tag_pics(is_debug=True):
if is_debug:
frame_list_path = [r"F:\PycharmProjects\clip_demo\end", r"F:\PycharmProjects\clip_demo\start"]
tags_list = [["爆炸", "枪战"], ["打斗", "打拳"]]
else:
frame_list_path = [r"F:\PycharmProjects\clip_demo\战狼1", r"F:\PycharmProjects\clip_demo\叶问",
r"F:\PycharmProjects\clip_demo\七月与安生", r"F:\PycharmProjects\clip_demo\独行月球"]
tags_list = [["爆炸", "枪战"], ["打斗", "打拳"], ["拥抱", "哭泣"], ["摔倒", "大笑"]]
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = load_from_name("ViT-B-16", device=device, download_root='./')
model.eval()
for index in range(len(frame_list_path)):
frame_dir = frame_list_path[index]
print(f"frame_dir:{frame_dir}")
xlsx_name = os.path.basename(frame_dir)
xlsx_path = os.path.join(r"F:\PycharmProjects\clip_demo", f"{xlsx_name}.xlsx")
wb_write = xlsxwriter.Workbook(xlsx_path)
# 创建工作表
sheet = wb_write.add_worksheet(xlsx_name)
sheet.write(0, 0, "图片")
image_list = []
for frame_index, path in enumerate(os.listdir(frame_dir)):
image_path = os.path.join(frame_dir, path)
print(image_path)
image_1 = preprocess(Image.open(image_path))
image_list.append(image_1)
sheet.write(frame_index + 1, 0, path)
image = torch.stack(image_list).to(device)
for tag_index, tag in enumerate(tags_list[index]):
sheet.write(0, tag_index + 1, tag)
text = clip.tokenize([tag]).to(device)
with torch.no_grad():
image_features = model.encode_image(image)
text_features = model.encode_text(text)
# 对特征进行归一化,请使用归一化后的图文特征用于下游任务
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
logits_per_image, logits_per_text = model.get_similarity(image, text)
probs = logits_per_text.softmax(dim=-1).cpu().numpy()
for pro_index, score in enumerate(probs[0]):
sheet.write(pro_index + 1, tag_index + 1, score)
print(f"{tag} probs:", probs) # [[0.3333 0.3333 0.3333]]
wb_write.close()
if name == '__main__':
# 抽帧
test_videos_frame(False)
# 一张图片对应多个标签
# test_videos_pic_tags(False)
# 1个标签对应多张图片
test_videos_tag_pics(False)
~~~