Perception Encoder
Use Meta's Perception Encoder to compute image and text embeddings on a Dedicated Deployment or self-hosted Inference
Code sample
pip install requests opencv-pythonimport base64
import os
import urllib.request
import cv2
import requests
URL = "https://your-deployment.roboflow.cloud"
image_url = "https://media.roboflow.com/notebooks/examples/dog.jpeg"
image_path = "dog.jpeg"
urllib.request.urlretrieve(image_url, image_path)
image = cv2.imread(image_path)
_, buffer = cv2.imencode(".jpg", image)
image_base64 = base64.b64encode(buffer).decode("utf-8")
response = requests.post(
f"{URL}/perception_encoder/embed_image",
json={
"api_key": os.getenv("API_KEY"),
"image": {"type": "base64", "value": image_base64},
},
)
result = response.json()
embedding = result["embeddings"][0]
print(f"Embedding length: {len(embedding)}")
print(f"First values: {embedding[:5]}")Last updated
Was this helpful?