Admin GuideDataset Registration

Dataset Registration

Typically this is the first step after setting up the workspace. Users upload their file/image to the S3 bucket, after which they need to do dataset registration.

This is the admin reference. The user-facing walkthrough lives under User Guide → Dataset Registration.

Single file registration

This sample helps to start the process for a single-file registration.

import requests
 
api_key = "dfdf3r4cg.8SFv1FTV0BfRvKky_VU0MlmIPxJ4t6Sm"  # Generate this key on the V7 portal
 
team_slug = "gene-gred-ace-nlp"          # Workspace name with - added to it
dataset_slug = "data"                    # Any name you want to display on V7
storage_name = "v7-gene-gred-ace-nlp-prod"  # S3 bucket name
 
headers = {
    "Content-Type": "application/json",
    "Accept": "application/json",
    "Authorization": f"ApiKey {api_key}",
}
payload = {
    "items": [
        {
            "path": "/",
            "slots": [
                {
                    "as_frames": "false",
                    "slot_name": "1",
                    "storage_key": "data/000000000.png",  # provide the s3 folder/filename
                    "file_name": "000000000.png",
                }
            ],
            "name": "000000000.png",
        }
    ],
    "dataset_slug": dataset_slug,
    "storage_slug": storage_name,
}
 
response = requests.post(
    f"https://darwin.v7labs.com/api/v2/teams/{team_slug}/items/register_existing",
    headers=headers,
    json=payload,
)
body = response.json()
 
if response.status_code != 200:
    print("request failed", response.text)
elif "blocked_items" in body and len(body["blocked_items"]) > 0:
    print("failed to register items:")
    for item in body["blocked_items"]:
        print("\t - ", item)
    if len(body["items"]) > 0:
        print("successfully registered items:")
    for item in body["items"]:
        print("\t - ", item)
else:
    print("success")

Multi-file registration

If you need multi-file registration, use the sample below.

import boto3
import requests
 
dev = boto3.session.Session(profile_name="default")
# Connect to the S3 bucket
s3 = dev.client("s3")
 
# Your AWS bucket name
bucket_name = "v7-roche-pred-opm-prod"
 
# List objects within the bucket
# objects = s3.list_objects_v2(Bucket=bucket_name)
 
# List objects within the bucket and subfolder if needed
objects = s3.list_objects_v2(Bucket=bucket_name, Prefix="data/acdc_batch2_dcm")
 
# V7 API setup
api_key = "dfdf3r4cg.8SFv1FTV0BfRvKky_VU0MlmIPxJ4t6Sm"  # Generate this key on the V7 portal
 
team_slug = "gene-gred-ace-nlp"          # Workspace name with - added to it
dataset_slug = "data"                    # Any name you want to display on V7
storage_name = "v7-gene-gred-ace-nlp-prod"  # S3 bucket name
 
headers = {
    "Content-Type": "application/json",
    "Accept": "application/json",
    "Authorization": f"ApiKey {api_key}",
}
 
# Initialize payload
payload = {
    "items": [],
    "dataset_slug": dataset_slug,
    "storage_slug": storage_name,
}
 
# Iterate over each object in the bucket
for obj in objects.get("Contents", []):
    file_name = obj["Key"]
    if f"{file_name.split('/')[-1]}":
        payload["items"].append(
            {
                "path": "/",
                "slots": [
                    {
                        "as_frames": "false",
                        "slot_name": "1",
                        "storage_key": f"{file_name}",
                        "file_name": f"{file_name.split('/')[-1]}",
                    }
                ],
                "name": f"{file_name.split('/')[-1]}",
            }
        )
 
# To test before loading to V7, uncomment the print and comment everything below it.
# print(payload)
 
# Send request to V7
response = requests.post(
    f"https://darwin.v7labs.com/api/v2/teams/{team_slug}/items/register_existing",
    headers=headers,
    json=payload,
    verify=True,
)
 
# Process response
body = response.json()
if response.status_code != 200:
    print("request failed", response.text)
elif "blocked_items" in body and len(body["blocked_items"]) > 0:
    print("failed to register items:")
    for item in body["blocked_items"]:
        print("\t - ", item)
    if len(body["items"]) > 0:
        print("successfully registered items:")
    for item in body["items"]:
        print("\t - ", item)
else:
    print("success")
⚠️

Hitting ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED]? See SSL Certificate Error.