TileDB Backend for xarray

About this Example

What it shows

This example shows some of the basic usage for opening a TileDB array in xarray using the TileDB backend.

Set-up Requirements

This example requires tiledb-cf to be installed and uses the tiledb, xarray, and numpy libraries.

import tiledb
import xarray as xr
import numpy as np
# Set names for the output generated by the example.
output_dir = "output/tiledb-xarray-basics"
array_uri = f"{output_dir}/array"
group_uri = f"{output_dir}/group"
# Reset output folder
import os
import shutil

shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)

Data for examples

The following examples will show storing data for the following functions:

\[ \text{ripple1}(x, y) = \cos(x^2 + y^2), \]

\[ \text{ripple2}(x, y) = \cos(x^2 - y^2) \]

and

\[ \text{z}(x, y) = x \exp\left(-\dfrac{1}{2} y \right) \]

for \(0 \leq x \leq 4\), \(-4 \leq y \leq 4\), and $t=0, 0.25, 0.5, … $.

x_size = 250
y_size = 500

x_data = np.linspace(0.0, 4.0, x_size)
y_data = np.linspace(-4.0, 4.0, y_size)

xx, yy = np.meshgrid(x_data, y_data, indexing="ij")

ripple1_data = np.cos(xx**2 + yy**2)
ripple2_data = np.cos(xx**2 - yy**2)

z_data = xx * np.exp(-yy / 2.0)

Part 1. Open TileDB group in xarray

We create an empty group and add some basic metadata to it.

# Create the TileDB group and add initial metadata.
tiledb.Group.create(group_uri)
with tiledb.Group(group_uri, mode="w") as group:
    group.meta["description"] = "Small example of an xarray-compatible group"

Next we add a TileDB array to the group and write data to the array.

# Create the data array and add it to the group.
schema = tiledb.ArraySchema(
    domain=tiledb.Domain(
        tiledb.Dim("x", domain=(0, x_size - 1), dtype=np.uint32),
        tiledb.Dim("y", domain=(0, y_size - 1), dtype=np.uint32),
    ),
    attrs=(
        tiledb.Attr("ripple1", np.float64),
        tiledb.Attr("ripple2", np.float64),
    ),
)
array_name = "ripples"
tiledb.Array.create(f"{group_uri}/ripples", schema)
with tiledb.Group(group_uri, mode="w") as group:
    group.add(uri=array_name, name=array_name, relative=True)
# Write data to the array.
with tiledb.open(f"{group_uri}/ripples", mode="w") as array:
    array[:, :] = {
        "ripple1": ripple1_data,
        "ripple2": ripple2_data,
    }
    array.meta["__tiledb_attr.ripple1.description"] = "cos(x^2 + y^2)"
    array.meta["__tiledb_attr.ripple2.description"] = "cos(x^2 - y^2)"
    array.meta["description"] = "Small example dense array"

The TileDB array is opened with xarray using the tiledb engine. This allows for xarray to access the data using it’s standard lazy-loading. Once we’ve created the dataspace we can access and slice the data using standard xarray capabilities.

xr.open_dataset(group_uri, engine="tiledb")

Part 2. Adding coordinates

This example shows how to use a TileDB group to open multiple arrays with xarray. We want to include coordinates values for the xarray dimensions. For arrays with only one attribute, the xarray backend will use the name of the array (if provided) for the variable name instead of the attribute name.

# Create array and add data for the x-coordinate.
x_schema = tiledb.ArraySchema(
    domain=tiledb.Domain(tiledb.Dim("x", domain=(0, x_size - 1), dtype=np.uint32)),
    attrs=[tiledb.Attr("x_coordinate", np.float64)],
)
tiledb.Array.create(f"{group_uri}/x_coord", x_schema)
with tiledb.open(f"{group_uri}/x_coord", mode="w") as array:
    array[:] = x_data
# Create array and add data for the y-coordiante.
y_schema = tiledb.ArraySchema(
    domain=tiledb.Domain(tiledb.Dim("y", domain=(0, y_size - 1), dtype=np.uint32)),
    attrs=[tiledb.Attr("y_coordinate", np.float64)],
)
tiledb.Array.create(f"{group_uri}/y_coord", y_schema)
with tiledb.open(f"{group_uri}/y_coord", mode="w") as array:
    array[:] = y_data
# Add both arrays to the TileDB group.
with tiledb.Group(group_uri, mode="w") as group:
    group.add(uri="x_coord", name="x", relative=True)
    group.add(uri="y_coord", name="y", relative=True)
xr.open_dataset(group_uri, engine="tiledb")

Part 3. Adding additional arrays

A TileDB group can contain arrays that already exist. We add another array to our TileDB group.

We will add a new array to the group that has the same x and y dimensions. Since the array only has 1 TileDB attribute, the xarray backend will use the name of the array rather than the attribute for the variable name.

# Array for z data.
schema = tiledb.ArraySchema(
    domain=tiledb.Domain(
        tiledb.Dim("x", domain=(0, x_size - 1), dtype=np.uint32),
        tiledb.Dim("y", domain=(0, y_size - 1), dtype=np.uint32),
    ),
    attrs=[tiledb.Attr("values", np.float64)],
)
tiledb.Array.create(array_uri, schema)
with tiledb.open(array_uri, mode="w") as array:
    array[:] = z_data
    array.meta["__tiledb_attr.values.description"] = "x * exp(-y/2)"
# Add to the group.
with tiledb.Group(group_uri, mode="w") as group:
    group.add(uri=array_uri, name="z", relative=False)
# Open the dataset
xr.open_dataset(group_uri, engine="tiledb")