AI Content Safety Fast PoC

You’re welcome to follow my GitHub repo and give it a star：https://github.com/xinyuwei-david/david-share.git，lots of useful code is here！

AI Content Safety

AI content safety supports four types of content filtering by default, as shown in the figure below.

In this article, I will demonstrate how to use a Python program to call AI content safety to filter videos (split into images), images, and text. I will also demonstrate how to train a category.

Prepare environment

This repo uses code from: https://github.com/Azure-Samples/AzureAIContentSafety.git and did a little modification for fast PoC.

Sample data of this PoC is in my repo: https://github.com/xinyuwei-david/david-share/tree/master/LLMs/AI-Content-Safety

#git clone https://github.com/Azure-Samples/AzureAIContentSafety.git
#cd AzureAIContentSafety/python/1.0.0

Create AI content endpoint on Azure portal, then:

#export CONTENT_SAFETY_KEY=”***821″
# export CONTENT_SAFETY_ENDPOINT=”https://**cognitiveservices.azure.com/”

Video filter

#cat sample_analyze_video.py

import os
import imageio.v3 as iio
import numpy as np
from PIL import Image
from io import BytesIO
import datetime
from tqdm import tqdm
from azure.ai.contentsafety import ContentSafetyClient
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import HttpResponseError
from azure.ai.contentsafety.models import AnalyzeImageOptions, ImageData, ImageCategory

def analyze_video():
key = os.environ[“CONTENT_SAFETY_KEY”]
endpoint = os.environ[“CONTENT_SAFETY_ENDPOINT”]
video_path = os.path.abspath(
os.path.join(os.path.abspath(__file__), “..”, “./sample_data/2.mp4”))
client = ContentSafetyClient(endpoint, AzureKeyCredential(key))

video = iio.imread(video_path, plugin=’pyav’)
sampling_fps = 1
fps = 30 # 假设视频的帧率为30，如果不同，请调整
key_frames = [frame for i, frame in enumerate(video) if i % int(fps / sampling_fps) == 0]

results = [] # 用于存储每个帧的分析结果
output_dir = “./video-results”
os.makedirs(output_dir, exist_ok=True)

for key_frame_idx in tqdm(range(len(key_frames)), desc=”Processing video”,
total=len(key_frames)):
frame = Image.fromarray(key_frames[key_frame_idx])
frame_bytes = BytesIO()
frame.save(frame_bytes, format=”PNG”)

# 保存帧到本地
frame_filename = f”frame_{key_frame_idx}.png”
frame_path = os.path.join(output_dir, frame_filename)
frame.save(frame_path)

request = AnalyzeImageOptions(image=ImageData(content=frame_bytes.getvalue()))

frame_time_ms = key_frame_idx * 1000 / sampling_fps
frame_timestamp = datetime.timedelta(milliseconds=frame_time_ms)
print(f”Analyzing video at {frame_timestamp}”)
try:
response = client.analyze_image(request)
except HttpResponseError as e:
print(f”Analyze video failed at {frame_timestamp}”)
if e.error:
print(f”Error code: {e.error.code}”)
print(f”Error message: {e.error.message}”)
raise

hate_result = next(
(item for item in response.categories_analysis if item.category == ImageCategory.HATE), None)
self_harm_result = next(
(item for item in response.categories_analysis if item.category == ImageCategory.SELF_HARM), None)
sexual_result = next(
(item for item in response.categories_analysis if item.category == ImageCategory.SEXUAL), None)
violence_result = next(
(item for item in response.categories_analysis if item.category == ImageCategory.VIOLENCE), None)

frame_result = {
“frame”: frame_filename,
“timestamp”: str(frame_timestamp),
“hate_severity”: hate_result.severity if hate_result else None,
“self_harm_severity”: self_harm_result.severity if self_harm_result else None,
“sexual_severity”: sexual_result.severity if sexual_result else None,
“violence_severity”: violence_result.severity if violence_result else None
}
results.append(frame_result)

# 打印所有帧的分析结果
for result in results:
print(result)

if __name__ == “__main__”:
analyze_video()

Refer to sample_data/2.mp4, following is one frame of the video:

Run the python file:

python3 sample_analyze_video.py

The process is as following：

Results are：

We could observe which pictures have issue.

Image filter

We could also use other scripts:

(base) root@davidwei:/mnt/c/david-share/AzureAIContentSafety/python/1.0.0# cat sample_analyze_image.py

# coding: utf-8

# ————————————————————————-
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# ————————————————————————–
import os

from azure.ai.contentsafety import ContentSafetyClient
from azure.ai.contentsafety.models import AnalyzeImageOptions, ImageData, ImageCategory
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import HttpResponseError

# Sample: Analyze image in sync request
def analyze_image():
# analyze image
key = os.environ[“CONTENT_SAFETY_KEY”]
endpoint = os.environ[“CONTENT_SAFETY_ENDPOINT”]
image_path = os.path.abspath(os.path.join(os.path.abspath(__file__), “..”, “./sample_data/2.jpg”))

# Create a Content Safety client
client = ContentSafetyClient(endpoint, AzureKeyCredential(key))

# Build request
with open(image_path, “rb”) as file:
request = AnalyzeImageOptions(image=ImageData(content=file.read()))

# Analyze image
try:
response = client.analyze_image(request)
except HttpResponseError as e:
print(“Analyze image failed.”)
if e.error:
print(f”Error code: {e.error.code}”)
print(f”Error message: {e.error.message}”)
raise
print(e)
raise

hate_result = next(item for item in response.categories_analysis if item.category == ImageCategory.HATE)
self_harm_result = next(item for item in response.categories_analysis if item.category == ImageCategory.SELF_HARM)
sexual_result = next(item for item in response.categories_analysis if item.category == ImageCategory.SEXUAL)
violence_result = next(item for item in response.categories_analysis if item.category == ImageCategory.VIOLENCE)

if hate_result:
print(f”Hate severity: {hate_result.severity}”)
if self_harm_result:
print(f”SelfHarm severity: {self_harm_result.severity}”)
if sexual_result:
print(f”Sexual severity: {sexual_result.severity}”)
if violence_result:
print(f”Violence severity: {violence_result.severity}”)

if __name__ == “__main__”:
analyze_image()

(base) root@davidwei:/mnt/c/david-share/AzureAIContentSafety/python/1.0.0# python sample_analyze_image.py

Hate severity: 0
SelfHarm severity: 0
Sexual severity: 2
Violence severity: 0

Text filter

When we use text content fileter, we usually need customize blacklist of words.

(base) root@davidwei:/mnt/c/david-share/AzureAIContentSafety/python/1.0.0# cat sample_manage_blocklist.py

# coding: utf-8

# Sample: Create or modify a blocklist
def create_or_update_text_blocklist():
# [START create_or_update_text_blocklist]

import os
from azure.ai.contentsafety import BlocklistClient
from azure.ai.contentsafety.models import TextBlocklist
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import HttpResponseError