检测与标注
Supervision 提供了一个无缝的流程,用于标注由各种物体检测和分割模型生成的预测结果。本指南展示了如何使用 Inference、Ultralytics 或 Transformers 等包进行推理。接着,你将学习如何将这些预测导入 Supervision 并用于标注原始图像。
运行检测
首先,你需要从你的物体检测或分割模型中获取预测结果。
“Inference”
import cv2
from inference import get_model
model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
“Ultralytics”
import cv2
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
“Transformers”
import torch
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
将预测结果加载到 Supervision
现在我们有了模型的预测结果,可以将它们加载到 Supervision 中。
“Inference”
我们可以使用 sv.Detections.from_inference
方法加载,该方法支持检测和分割模型的结果。
import cv2
import supervision as sv
from inference import get_model
model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)
“Ultralytics”
使用 sv.Detections.from_ultralytics
方法加载,同样支持检测和分割模型结果。
import cv2
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)
“Transformers”
使用 sv.Detections.from_transformers
方法加载,支持检测和分割模型结果。
import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
transformers_results=results,
id2label=model.config.id2label)
你也可以使用以下方法加载来自其他计算机视觉框架和库的预测:
from_deepsparse
(Deepsparse)from_detectron2
(Detectron2)from_mmdetection
(MMDetection)from_sam
(Segment Anything Model)from_yolo_nas
(YOLO-NAS)
使用检测结果标注图像
最后,我们用预测结果对图像进行标注。因为我们使用的是物体检测模型,我们将用到 sv.BoxAnnotator
和 sv.LabelAnnotator
这两个类。
“Inference”
import cv2
import supervision as sv
from inference import get_model
model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
annotated_image = box_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections)
“Ultralytics”
import cv2
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
annotated_image = box_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections)
“Transformers”
import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
transformers_results=results,
id2label=model.config.id2label)
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
annotated_image = box_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections)
显示自定义标签
默认情况下,sv.LabelAnnotator
会使用检测结果的 class_name
(如果有)或 class_id
作为标签。你可以通过向 annotate
方法传入自定义的 labels
列表来覆盖这一行为。
“Inference”
import cv2
import supervision as sv
from inference import get_model
model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
labels = [
f"{class_name} {confidence:.2f}"
for class_name, confidence
in zip(detections['class_name'], detections.confidence)
]
annotated_image = box_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections, labels=labels)
“Ultralytics”
import cv2
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
labels = [
f"{class_name} {confidence:.2f}"
for class_name, confidence
in zip(detections['class_name'], detections.confidence)
]
annotated_image = box_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections, labels=labels)
“Transformers”
import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
transformers_results=results,
id2label=model.config.id2label)
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
labels = [
f"{class_name} {confidence:.2f}"
for class_name, confidence
in zip(detections['class_name'], detections.confidence)
]
annotated_image = box_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections, labels=labels)
使用分割结果标注图像
如果你使用的是分割模型,sv.MaskAnnotator
可以替代 sv.BoxAnnotator
,用来绘制掩码而不是边框。
“Inference”
import cv2
import supervision as sv
from inference import get_model
model = get_model(model_id="yolov8n-seg-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)
mask_annotator = sv.MaskAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)
annotated_image = mask_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections)
“Ultralytics”
import cv2
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n-seg.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)
mask_annotator = sv.MaskAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)
annotated_image = mask_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections)
“Transformers”
import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForSegmentation
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50-panoptic")
model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic")
image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_segmentation(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
transformers_results=results,
id2label=model.config.id2label)
mask_annotator = sv.MaskAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)
labels = [
f"{class_name} {confidence:.2f}"
for class_name, confidence
in zip(detections['class_name'], detections.confidence)
]
annotated_image = mask_annotator.annotate(
scene=image, detections=detections)
annotated_image = label_annotator.annotate(
scene=annotated_image, detections=detections, labels=labels)