~~~

import os.path

import subprocess

import numpy as np

import torch

import xlsxwriter

from PIL import Image

import cn_clip.clip as clip

from cn_clip.clip import load_from_name, available_models

print("Available models:", available_models())

# Available models: ['ViT-B-16', 'ViT-L-14', 'ViT-L-14-336', 'ViT-H-14', 'RN50']

def test_chinese_clip():

"""

https://github.com/OFA-Sys/Chinese-CLIP

:return:

"""

device = "cuda" if torch.cuda.is_available() else "cpu"

model, preprocess = load_from_name("ViT-B-16", device=device, download_root='./')

model.eval()

image = preprocess(Image.open("pokemon.jpeg")).unsqueeze(0).to(device)

text = clip.tokenize(["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]).to(device)

with torch.no_grad():

image_features = model.encode_image(image)

text_features = model.encode_text(text)

# 对特征进行归一化,请使用归一化后的图文特征用于下游任务

image_features /= image_features.norm(dim=-1, keepdim=True)

text_features /= text_features.norm(dim=-1, keepdim=True)

logits_per_image, logits_per_text = model.get_similarity(image, text)

probs = logits_per_image.softmax(dim=-1).cpu().numpy()

print(probs)

def test_videos_frame(is_debug=True):

if is_debug:

video_list_path = [r"F:\视频介质\start.mp4", r"F:\视频介质\end.mp4"]

else:

video_list_path = [r"F:\视频介质\战狼1.mp4", r"F:\视频介质\叶问.mp4", r"F:\视频介质\七月与安生.mp4",

r"F:\视频介质\独行月球.mp4"]

for video_path in video_list_path:

frame_dir_name = os.path.basename(video_path).split('.')[0]

frame_dir_path = os.path.join(r"F:\PycharmProjects\clip_demo", frame_dir_name)

if not os.path.isdir(frame_dir_path):

os.mkdir(frame_dir_path)

output_img_path = os.path.join(frame_dir_path, "%05d.jpg")

cmd_frames = f"ffmpeg -i {video_path} -filter:v fps=1/2 {output_img_path} -loglevel quiet"

print(f"执行的ffmpeg命令:{cmd_frames}")

ffmpeg_code = subprocess.call(cmd_frames, shell=True)

print(f"电影:{frame_dir_name},抽帧状态:{ffmpeg_code},抽帧完成")

def test_videos_pic_tags(is_debug=True):

if is_debug:

frame_list_path = [r"F:\PycharmProjects\clip_demo\end", r"F:\PycharmProjects\clip_demo\start"]

tags_list = [["爆炸", "枪战"], ["打斗", "打拳"]]

else:

frame_list_path = [r"F:\PycharmProjects\clip_demo\战狼1", r"F:\PycharmProjects\clip_demo\叶问",

r"F:\PycharmProjects\clip_demo\七月与安生", r"F:\PycharmProjects\clip_demo\独行月球"]

tags_list = [["爆炸", "枪战"], ["打斗", "打拳"], ["拥抱", "哭泣"], ["摔倒", "大笑"]]

device = "cuda" if torch.cuda.is_available() else "cpu"

model, preprocess = load_from_name("ViT-B-16", device=device, download_root='./')

model.eval()

for index in range(len(frame_list_path)):

tags = tags_list[index]

text = clip.tokenize(tags).to(device)

print(f"tags:{tags}")

frame_dir = frame_list_path[index]

print(f"frame_dir:{frame_dir}")

xlsx_name = os.path.basename(frame_dir)

xlsx_path = os.path.join(r"F:\PycharmProjects\clip_demo", f"{xlsx_name}.xlsx")

wb_write = xlsxwriter.Workbook(xlsx_path)

# 创建工作表

sheet = wb_write.add_worksheet(xlsx_name)

sheet.write(0, 0, "图片")

sheet.write(0, 1, tags[0])

sheet.write(0, 2, tags[1])

for frame_index, path in enumerate(os.listdir(frame_dir)):

image_path = os.path.join(frame_dir, path)

print(image_path)

image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

with torch.no_grad():

image_features = model.encode_image(image)

text_features = model.encode_text(text)

# 对特征进行归一化,请使用归一化后的图文特征用于下游任务

image_features /= image_features.norm(dim=-1, keepdim=True)

text_features /= text_features.norm(dim=-1, keepdim=True)

logits_per_image, logits_per_text = model.get_similarity(image, text)

probs = logits_per_image.softmax(dim=-1).cpu().numpy()

print(probs)

sheet.write(frame_index + 1, 0, path)

sheet.write(frame_index + 1, 1, probs[0][0])

sheet.write(frame_index + 1, 2, probs[0][1])

wb_write.close()

def test_videos_tag_pics(is_debug=True):

if is_debug:

frame_list_path = [r"F:\PycharmProjects\clip_demo\end", r"F:\PycharmProjects\clip_demo\start"]

tags_list = [["爆炸", "枪战"], ["打斗", "打拳"]]

else:

frame_list_path = [r"F:\PycharmProjects\clip_demo\战狼1", r"F:\PycharmProjects\clip_demo\叶问",

r"F:\PycharmProjects\clip_demo\七月与安生", r"F:\PycharmProjects\clip_demo\独行月球"]

tags_list = [["爆炸", "枪战"], ["打斗", "打拳"], ["拥抱", "哭泣"], ["摔倒", "大笑"]]

device = "cuda" if torch.cuda.is_available() else "cpu"

model, preprocess = load_from_name("ViT-B-16", device=device, download_root='./')

model.eval()

for index in range(len(frame_list_path)):

frame_dir = frame_list_path[index]

print(f"frame_dir:{frame_dir}")

xlsx_name = os.path.basename(frame_dir)

xlsx_path = os.path.join(r"F:\PycharmProjects\clip_demo", f"{xlsx_name}.xlsx")

wb_write = xlsxwriter.Workbook(xlsx_path)

# 创建工作表

sheet = wb_write.add_worksheet(xlsx_name)

sheet.write(0, 0, "图片")

image_list = []

for frame_index, path in enumerate(os.listdir(frame_dir)):

image_path = os.path.join(frame_dir, path)

print(image_path)

image_1 = preprocess(Image.open(image_path))

image_list.append(image_1)

sheet.write(frame_index + 1, 0, path)

image = torch.stack(image_list).to(device)

for tag_index, tag in enumerate(tags_list[index]):

sheet.write(0, tag_index + 1, tag)

text = clip.tokenize([tag]).to(device)

with torch.no_grad():

image_features = model.encode_image(image)

text_features = model.encode_text(text)

# 对特征进行归一化,请使用归一化后的图文特征用于下游任务

image_features /= image_features.norm(dim=-1, keepdim=True)

text_features /= text_features.norm(dim=-1, keepdim=True)

logits_per_image, logits_per_text = model.get_similarity(image, text)

probs = logits_per_text.softmax(dim=-1).cpu().numpy()

for pro_index, score in enumerate(probs[0]):

sheet.write(pro_index + 1, tag_index + 1, score)

print(f"{tag} probs:", probs) # [[0.3333 0.3333 0.3333]]

wb_write.close()

if name == '__main__':

# 抽帧

test_videos_frame(False)

# 一张图片对应多个标签

# test_videos_pic_tags(False)

# 1个标签对应多张图片

test_videos_tag_pics(False)

~~~