Dataset Registration
Typically this is the first step after setting up the workspace. Users upload their file/image to the S3 bucket, after which they need to do dataset registration.
This is the admin reference. The user-facing walkthrough lives under User Guide → Dataset Registration.
Single file registration
This sample helps to start the process for a single-file registration.
import requests
api_key = "dfdf3r4cg.8SFv1FTV0BfRvKky_VU0MlmIPxJ4t6Sm" # Generate this key on the V7 portal
team_slug = "gene-gred-ace-nlp" # Workspace name with - added to it
dataset_slug = "data" # Any name you want to display on V7
storage_name = "v7-gene-gred-ace-nlp-prod" # S3 bucket name
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"ApiKey {api_key}",
}
payload = {
"items": [
{
"path": "/",
"slots": [
{
"as_frames": "false",
"slot_name": "1",
"storage_key": "data/000000000.png", # provide the s3 folder/filename
"file_name": "000000000.png",
}
],
"name": "000000000.png",
}
],
"dataset_slug": dataset_slug,
"storage_slug": storage_name,
}
response = requests.post(
f"https://darwin.v7labs.com/api/v2/teams/{team_slug}/items/register_existing",
headers=headers,
json=payload,
)
body = response.json()
if response.status_code != 200:
print("request failed", response.text)
elif "blocked_items" in body and len(body["blocked_items"]) > 0:
print("failed to register items:")
for item in body["blocked_items"]:
print("\t - ", item)
if len(body["items"]) > 0:
print("successfully registered items:")
for item in body["items"]:
print("\t - ", item)
else:
print("success")Multi-file registration
If you need multi-file registration, use the sample below.
import boto3
import requests
dev = boto3.session.Session(profile_name="default")
# Connect to the S3 bucket
s3 = dev.client("s3")
# Your AWS bucket name
bucket_name = "v7-roche-pred-opm-prod"
# List objects within the bucket
# objects = s3.list_objects_v2(Bucket=bucket_name)
# List objects within the bucket and subfolder if needed
objects = s3.list_objects_v2(Bucket=bucket_name, Prefix="data/acdc_batch2_dcm")
# V7 API setup
api_key = "dfdf3r4cg.8SFv1FTV0BfRvKky_VU0MlmIPxJ4t6Sm" # Generate this key on the V7 portal
team_slug = "gene-gred-ace-nlp" # Workspace name with - added to it
dataset_slug = "data" # Any name you want to display on V7
storage_name = "v7-gene-gred-ace-nlp-prod" # S3 bucket name
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"ApiKey {api_key}",
}
# Initialize payload
payload = {
"items": [],
"dataset_slug": dataset_slug,
"storage_slug": storage_name,
}
# Iterate over each object in the bucket
for obj in objects.get("Contents", []):
file_name = obj["Key"]
if f"{file_name.split('/')[-1]}":
payload["items"].append(
{
"path": "/",
"slots": [
{
"as_frames": "false",
"slot_name": "1",
"storage_key": f"{file_name}",
"file_name": f"{file_name.split('/')[-1]}",
}
],
"name": f"{file_name.split('/')[-1]}",
}
)
# To test before loading to V7, uncomment the print and comment everything below it.
# print(payload)
# Send request to V7
response = requests.post(
f"https://darwin.v7labs.com/api/v2/teams/{team_slug}/items/register_existing",
headers=headers,
json=payload,
verify=True,
)
# Process response
body = response.json()
if response.status_code != 200:
print("request failed", response.text)
elif "blocked_items" in body and len(body["blocked_items"]) > 0:
print("failed to register items:")
for item in body["blocked_items"]:
print("\t - ", item)
if len(body["items"]) > 0:
print("successfully registered items:")
for item in body["items"]:
print("\t - ", item)
else:
print("success")⚠️
Hitting ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED]? See SSL Certificate Error.