Query by Text

1 Script to Query by Text

1.1 Set up client and model embedding.

# code for testing
host = "<your opensearch domain>"
port = 443
user = "<your master user>"
password = "<your master password>"

client = OpenSearch(
    hosts=[{"host": host, "port": port}],
    http_auth=(user, password),
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout = 60,
    # max_retries = 5,
    # retry_on_timeout = True
)

s3 = boto3.client("s3")
BUCKET_NAME = "ai-challenge-2024"
FOLDER_NAME = "keyframes"

model = SentenceTransformer("clip-ViT-B-32")

1.2 Query by Text

def query(client , model ,  text , k=5):
    index_name = "semantic-index"
    query_vector = model.encode(text)
    search_body = {
        "size": k,
        "query": {
            "knn": {
                "image_embedding": {
                    "vector": query_vector.tolist(),
                    "k": k,
                }
            }
        },
    }

    result = []
    # {
    #     "image" : 1,
    #     "video" : "video1.mp4"
    #     "score" : 1
    # }
    response = client.search(index=index_name, body=search_body)
    response = response["hits"]["hits"]

    for hit in response:
        image = hit["_source"]["image"]
        video = hit["_source"]["video"]
        score = hit["_score"]
        result.append({"image" : image , "video" : video , "score" : score})

    return result


result_query = query(client=client , model=model , text="A drowning prevention drill is taking place. Flooding occurs, and soldiers are deployed to schools to clean up the floodwaters" , k=5)
print(result_query)

1.3 Visualize the Result

def convert_to_s3_key(video, image):
    #video L03_V025.npy => L03_V025
    # image 18 => 018.jpg
    # image 1 => 001.jpg
    video = video.split(".")[0]
    image = str(image).zfill(3) + ".jpg"
    return video + "/" + image

fig = plt.figure(figsize=(20, 10))
for res in result_query:
    fig.add_subplot(1, 5, result_query.index(res) + 1)
    img = s3.get_object(Bucket=BUCKET_NAME, Key=f"{FOLDER_NAME}/{convert_to_s3_key(res['video'],res['image'])}")["Body"].read()
    img = Image.open(io.BytesIO(img))
    plt.imshow(img)

plt.show()

2 Test some queries and visualize the results

  1. A drowning prevention drill is taking place. Flooding occurs, and soldiers are deployed to schools to clean up the floodwaters Docs Version Dropdown

  2. A video clip shows mothers taking care of children with chickenpox. Chickenpox causes blisters to appear all over the children’s bodies, accompanied by uncomfortable itching. Docs Version Dropdown

  3. A video clip of an apple orchard owned by a man. The orchard is vast, filled with many ripe red apples, and visitors happily pick apples, putting them into bags to take home. Docs Version Dropdown