import tiledb
import xarray as xr
import numpy as np
TileDB Backend for xarray
About this Example
What it shows
This example shows some of the basic usage for opening a TileDB array in xarray using the TileDB backend.
Set-up Requirements
This example requires tiledb-cf
to be installed and uses the tiledb
, xarray
, and numpy
libraries.
# Set names for the output generated by the example.
= "output/tiledb-xarray-basics"
output_dir = f"{output_dir}/array"
array_uri = f"{output_dir}/group" group_uri
# Reset output folder
import os
import shutil
=True)
shutil.rmtree(output_dir, ignore_errors os.mkdir(output_dir)
Data for examples
The following examples will show storing data for the following functions:
\[ \text{ripple1}(x, y) = \cos(x^2 + y^2), \]
\[ \text{ripple2}(x, y) = \cos(x^2 - y^2) \]
and
\[ \text{z}(x, y) = x \exp\left(-\dfrac{1}{2} y \right) \]
for \(0 \leq x \leq 4\), \(-4 \leq y \leq 4\), and $t=0, 0.25, 0.5, … $.
= 250
x_size = 500
y_size
= np.linspace(0.0, 4.0, x_size)
x_data = np.linspace(-4.0, 4.0, y_size)
y_data
= np.meshgrid(x_data, y_data, indexing="ij")
xx, yy
= np.cos(xx**2 + yy**2)
ripple1_data = np.cos(xx**2 - yy**2)
ripple2_data
= xx * np.exp(-yy / 2.0) z_data
Part 1. Open TileDB group in xarray
We create an empty group and add some basic metadata to it.
# Create the TileDB group and add initial metadata.
tiledb.Group.create(group_uri)with tiledb.Group(group_uri, mode="w") as group:
"description"] = "Small example of an xarray-compatible group" group.meta[
Next we add a TileDB array to the group and write data to the array.
# Create the data array and add it to the group.
= tiledb.ArraySchema(
schema =tiledb.Domain(
domain"x", domain=(0, x_size - 1), dtype=np.uint32),
tiledb.Dim("y", domain=(0, y_size - 1), dtype=np.uint32),
tiledb.Dim(
),=(
attrs"ripple1", np.float64),
tiledb.Attr("ripple2", np.float64),
tiledb.Attr(
),
)= "ripples"
array_name f"{group_uri}/ripples", schema)
tiledb.Array.create(with tiledb.Group(group_uri, mode="w") as group:
=array_name, name=array_name, relative=True) group.add(uri
# Write data to the array.
with tiledb.open(f"{group_uri}/ripples", mode="w") as array:
= {
array[:, :] "ripple1": ripple1_data,
"ripple2": ripple2_data,
}"__tiledb_attr.ripple1.description"] = "cos(x^2 + y^2)"
array.meta["__tiledb_attr.ripple2.description"] = "cos(x^2 - y^2)"
array.meta["description"] = "Small example dense array" array.meta[
The TileDB array is opened with xarray using the tiledb
engine. This allows for xarray to access the data using it’s standard lazy-loading. Once we’ve created the dataspace we can access and slice the data using standard xarray capabilities.
="tiledb") xr.open_dataset(group_uri, engine
Part 2. Adding coordinates
This example shows how to use a TileDB group to open multiple arrays with xarray. We want to include coordinates values for the xarray dimensions. For arrays with only one attribute, the xarray backend will use the name of the array (if provided) for the variable name instead of the attribute name.
# Create array and add data for the x-coordinate.
= tiledb.ArraySchema(
x_schema =tiledb.Domain(tiledb.Dim("x", domain=(0, x_size - 1), dtype=np.uint32)),
domain=[tiledb.Attr("x_coordinate", np.float64)],
attrs
)f"{group_uri}/x_coord", x_schema)
tiledb.Array.create(with tiledb.open(f"{group_uri}/x_coord", mode="w") as array:
= x_data array[:]
# Create array and add data for the y-coordiante.
= tiledb.ArraySchema(
y_schema =tiledb.Domain(tiledb.Dim("y", domain=(0, y_size - 1), dtype=np.uint32)),
domain=[tiledb.Attr("y_coordinate", np.float64)],
attrs
)f"{group_uri}/y_coord", y_schema)
tiledb.Array.create(with tiledb.open(f"{group_uri}/y_coord", mode="w") as array:
= y_data array[:]
# Add both arrays to the TileDB group.
with tiledb.Group(group_uri, mode="w") as group:
="x_coord", name="x", relative=True)
group.add(uri="y_coord", name="y", relative=True) group.add(uri
="tiledb") xr.open_dataset(group_uri, engine
Part 3. Adding additional arrays
A TileDB group can contain arrays that already exist. We add another array to our TileDB group.
We will add a new array to the group that has the same x
and y
dimensions. Since the array only has 1 TileDB attribute, the xarray backend will use the name of the array rather than the attribute for the variable name.
# Array for z data.
= tiledb.ArraySchema(
schema =tiledb.Domain(
domain"x", domain=(0, x_size - 1), dtype=np.uint32),
tiledb.Dim("y", domain=(0, y_size - 1), dtype=np.uint32),
tiledb.Dim(
),=[tiledb.Attr("values", np.float64)],
attrs
) tiledb.Array.create(array_uri, schema)
with tiledb.open(array_uri, mode="w") as array:
= z_data
array[:] "__tiledb_attr.values.description"] = "x * exp(-y/2)" array.meta[
# Add to the group.
with tiledb.Group(group_uri, mode="w") as group:
=array_uri, name="z", relative=False) group.add(uri
# Open the dataset
="tiledb") xr.open_dataset(group_uri, engine