import time
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import tiledb
from tiledb.cf import AttrMetadata, ArrayMetadata, create_group, open_group_arrayWorking with Arrays in a TileDB Group
This notebook contains an example of how to use TileDB and TileDB-CF to create, inspect, open, read, and write data to arrays in a TileDB group.
Data
The following arrays will be created that share some of their dimensions:
dense_3dis a dense array with dimensionsdense_x,dense_yanddense_tdense_2dis a dense array with dimensionsdense_xanddense_ydense_1dis a dense array with dimensiondense_tsparse_4dis a sparse array with dimensionssparse_xsparse_ysparse_zandsparse_tdense_axes_xyis a dense array that contains the values of thedense_xanddense_ydimensions asdense_x_dataanddense_y_datadense_axes_tis a dense array that contains the values of thedense_tdimension asdense_t_datasparse_axesis a sparse array that contains the values of thesparse_xsparse_ysparse_zandsparse_tdimensions assparse_x_datasparse_y_datasparse_z_dataandsparse_t_data
Packages
Import the libraries used in this notebook:
Create numpy arrays
Variables to set the size of the arrays:
dense_size = 100
sparse_size = 2000
t_size = 365Functions used to create a dataset:
def ripple(x, y, t):
return np.sin(t * (x**2 + y**2)) / (t + 1)
def ripple2(x, y, z, t):
return (np.sin(t * (x**2 + y**2)) / (t + 1)) + zData for the dense arrays
dense_x_values = np.arange(1, dense_size + 1)
dense_y_values = np.arange(1, dense_size + 1)
dense_t_values = np.arange(1, t_size + 1)
dense_3d_values = np.fromfunction(
lambda x, y, t: ripple(x, y, t), (dense_size, dense_size, t_size)
)
dense_2d_values = np.nanmean(dense_3d_values, axis=2)
dense_1d_values = np.mean(dense_3d_values, axis=(0, 1))dense_3d_values = np.fromfunction(
lambda x, y, t: ripple(x, y, t), (dense_size, dense_size, t_size)
)
dense_2d_values = np.nanmean(dense_3d_values, axis=2)
dense_1d_values = np.mean(dense_3d_values, axis=(0, 1))Data for the sparse arrays
sparse_x_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_y_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_z_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_t_values = np.random.randint(1, t_size + 1, size=(sparse_size))
sparse_4d_values = ripple2(
sparse_x_values, sparse_y_values, sparse_z_values, sparse_t_values
)Create the TileDB Group
dense_x = tiledb.Dim(name="dense_x", domain=(1, dense_size), tile=10, dtype=np.uint64)
dense_y = tiledb.Dim(name="dense_y", domain=(1, dense_size), tile=10, dtype=np.uint64)
dense_t = tiledb.Dim(name="dense_t", domain=(1, t_size), tile=10, dtype=np.uint64)
sparse_x = tiledb.Dim(
name="sparse_x", domain=(1, sparse_size), tile=10, dtype=np.uint64
)
sparse_y = tiledb.Dim(
name="sparse_y", domain=(1, sparse_size), tile=10, dtype=np.uint64
)
sparse_z = tiledb.Dim(
name="sparse_z", domain=(1, sparse_size), tile=10, dtype=np.uint64
)
sparse_t = tiledb.Dim(name="sparse_t", domain=(1, t_size), tile=10, dtype=np.uint64)array_schemas = {
"dense_3d": tiledb.ArraySchema(
domain=tiledb.Domain(dense_x, dense_y, dense_t),
attrs=[
tiledb.Attr(name="dense_3d_data", dtype=np.float64),
],
),
"dense_2d": tiledb.ArraySchema(
domain=tiledb.Domain(dense_x, dense_y),
attrs=[
tiledb.Attr(name="dense_2d_data", dtype=np.float64),
],
),
"dense_1d": tiledb.ArraySchema(
domain=tiledb.Domain(dense_t),
attrs=[
tiledb.Attr(name="dense_1d_data", dtype=np.float64),
],
),
"sparse_4d": tiledb.ArraySchema(
domain=tiledb.Domain(sparse_x, sparse_y, sparse_z, sparse_t),
attrs=[
tiledb.Attr(name="sparse_4d_data", dtype=np.float64),
],
sparse=True,
allows_duplicates=True,
),
"dense_axes_xy": tiledb.ArraySchema(
domain=tiledb.Domain(dense_x),
attrs=[
tiledb.Attr(name="dense_x_data", dtype=np.uint64),
tiledb.Attr(name="dense_y_data", dtype=np.uint64),
],
),
"dense_axes_t": tiledb.ArraySchema(
domain=tiledb.Domain(dense_t),
attrs=[
tiledb.Attr(name="dense_t_data", dtype=np.uint64),
],
),
"sparse_axes": tiledb.ArraySchema(
domain=tiledb.Domain(sparse_x),
attrs=[
tiledb.Attr(name="sparse_x_data", dtype=np.uint64),
tiledb.Attr(name="sparse_y_data", dtype=np.uint64),
tiledb.Attr(name="sparse_z_data", dtype=np.uint64),
tiledb.Attr(name="sparse_t_data", dtype=np.uint64),
],
sparse=True,
allows_duplicates=True,
),
}Create the TileDB Group on disk
group_uri = "output/example_group"
if tiledb.object_type(group_uri) is None:
create_group("output/example_group", array_schemas)Write data to the TileDB Group
Writing to dense and sparse arrays uses standard TileDB write operations. Arrays can be opened one-by-one or multiple arrays can be opened at once.
with tiledb.Group(group_uri, mode="r") as group:
with (
open_group_array(group, attr="dense_3d_data", mode="w") as dense_3d_array,
open_group_array(group, attr="dense_2d_data", mode="w") as dense_2d_array,
open_group_array(group, attr="dense_1d_data", mode="w") as dense_1d_array,
):
dense_3d_array[:] = dense_3d_values
dense_2d_array[:] = dense_2d_values
dense_1d_array[:] = dense_1d_values
with (
open_group_array(group, attr="sparse_4d_data", mode="w") as sparse_data_array,
open_group_array(group, array="sparse_axes", mode="w") as sparse_axes_array,
):
sparse_data_array[
sparse_x_values, sparse_y_values, sparse_z_values, sparse_t_values
] = sparse_4d_values
sparse_axes_array[np.arange(sparse_size) + 1] = {
"sparse_x_data": sparse_x_values,
"sparse_y_data": sparse_y_values,
"sparse_z_data": sparse_z_values,
"sparse_t_data": sparse_t_values,
}
with (
open_group_array(group, array="dense_axes_xy", mode="w") as xy_array,
open_group_array(
group, array="dense_axes_t", attr="dense_t_data", mode="w"
) as t_array,
):
xy_array[:] = {"dense_x_data": dense_x_values, "dense_y_data": dense_y_values}
t_array[:] = dense_t_valuesMetadata
Write Group metadata:
with tiledb.Group(group_uri, mode="w") as group:
group.meta["description"] = "Example TileDB Group"
group.meta["version"] = "1.0"
group.meta["created on "] = time.ctime(time.time())Write Array metadata:
with tiledb.Group(group_uri, mode="r") as group:
with open_group_array(group, array="dense_3d", mode="w") as dense_3d_array:
dense_3d_array.meta[
"description"
] = "Example 3D dense array with dimensions x, y and t"
with open_group_array(group, array="dense_axes_xy", mode="w") as xy_array:
xy_array.meta[
"description"
] = "Values for the x and y dimensions of the 3D dense array"Write Attribute metadata using the AttrMetadata class:
with tiledb.Group(group_uri, mode="r") as group:
with open_group_array(group, array="dense_axes_xy", mode="w") as array:
x_attr_meta = AttrMetadata(array.meta, "dense_x_data")
x_attr_meta["description"] = "Values of x"Read data from the TileDB Group
Read the metadata
Read the Group metadata keys and their values:
with tiledb.Group(group_uri) as group:
for key, value in group.meta.items():
print(f"{key}: {value}")Read the metadata for the dense_axes_xy array. Optionally, you can filter the array attribute data using the AttrMetadata and ArrayMetadata.
with tiledb.Group(group_uri) as group:
with open_group_array(group, array="dense_axes_xy") as array:
print("All metadata in the array:")
for key, value in array.meta.items():
print(f" * {key}: {value}")
print("Only array metadata:")
for key, value in ArrayMetadata(array.meta).items():
print(f" * {key}: {value}")
print("Only attribute metadata for 'dense_x_data':")
for key, value in AttrMetadata(array.meta, "dense_x_data").items():
print(f" * {key}: {value}")Read and visualise the data
with tiledb.Group(group_uri) as group:
with (
open_group_array(group, array="dense_3d", attr="dense_3d_data") as data_array,
open_group_array(group, array="dense_axes_xy") as axes_array,
):
dense_3d_data = data_array[:]
axes_data = axes_array[...]
dense_x_data = axes_data["dense_x_data"]
dense_y_data = axes_data["dense_y_data"]fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))
axes[0, 0].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 33])
axes[0, 1].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 66])
axes[1, 0].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 99])
axes[1, 1].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 95]);with tiledb.Group(group_uri) as group:
with open_group_array(group, array="sparse_4d") as array:
df = pd.DataFrame(array[...])
df.head()df.describe().transpose()df.plot.scatter(x="sparse_x", y="sparse_4d_data");with tiledb.Group(group_uri) as group:
with open_group_array(group, array="dense_1d") as array:
df2 = pd.DataFrame(array[...])
df2.head()df2.plot();