import time
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import tiledb
from tiledb.cf import AttrMetadata, ArrayMetadata, create_group, open_group_array
Working with Arrays in a TileDB Group
This notebook contains an example of how to use TileDB and TileDB-CF to create, inspect, open, read, and write data to arrays in a TileDB group.
Data
The following arrays will be created that share some of their dimensions:
dense_3d
is a dense array with dimensionsdense_x
,dense_y
anddense_t
dense_2d
is a dense array with dimensionsdense_x
anddense_y
dense_1d
is a dense array with dimensiondense_t
sparse_4d
is a sparse array with dimensionssparse_x
sparse_y
sparse_z
andsparse_t
dense_axes_xy
is a dense array that contains the values of thedense_x
anddense_y
dimensions asdense_x_data
anddense_y_data
dense_axes_t
is a dense array that contains the values of thedense_t
dimension asdense_t_data
sparse_axes
is a sparse array that contains the values of thesparse_x
sparse_y
sparse_z
andsparse_t
dimensions assparse_x_data
sparse_y_data
sparse_z_data
andsparse_t_data
Packages
Import the libraries used in this notebook:
Create numpy arrays
Variables to set the size of the arrays:
= 100
dense_size = 2000
sparse_size = 365 t_size
Functions used to create a dataset:
def ripple(x, y, t):
return np.sin(t * (x**2 + y**2)) / (t + 1)
def ripple2(x, y, z, t):
return (np.sin(t * (x**2 + y**2)) / (t + 1)) + z
Data for the dense arrays
= np.arange(1, dense_size + 1)
dense_x_values = np.arange(1, dense_size + 1)
dense_y_values = np.arange(1, t_size + 1)
dense_t_values
= np.fromfunction(
dense_3d_values lambda x, y, t: ripple(x, y, t), (dense_size, dense_size, t_size)
)
= np.nanmean(dense_3d_values, axis=2)
dense_2d_values = np.mean(dense_3d_values, axis=(0, 1)) dense_1d_values
= np.fromfunction(
dense_3d_values lambda x, y, t: ripple(x, y, t), (dense_size, dense_size, t_size)
)
= np.nanmean(dense_3d_values, axis=2)
dense_2d_values = np.mean(dense_3d_values, axis=(0, 1)) dense_1d_values
Data for the sparse arrays
= np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_x_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_y_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_z_values = np.random.randint(1, t_size + 1, size=(sparse_size))
sparse_t_values
= ripple2(
sparse_4d_values
sparse_x_values, sparse_y_values, sparse_z_values, sparse_t_values )
Create the TileDB Group
= tiledb.Dim(name="dense_x", domain=(1, dense_size), tile=10, dtype=np.uint64)
dense_x = tiledb.Dim(name="dense_y", domain=(1, dense_size), tile=10, dtype=np.uint64)
dense_y = tiledb.Dim(name="dense_t", domain=(1, t_size), tile=10, dtype=np.uint64)
dense_t = tiledb.Dim(
sparse_x ="sparse_x", domain=(1, sparse_size), tile=10, dtype=np.uint64
name
)= tiledb.Dim(
sparse_y ="sparse_y", domain=(1, sparse_size), tile=10, dtype=np.uint64
name
)= tiledb.Dim(
sparse_z ="sparse_z", domain=(1, sparse_size), tile=10, dtype=np.uint64
name
)= tiledb.Dim(name="sparse_t", domain=(1, t_size), tile=10, dtype=np.uint64) sparse_t
= {
array_schemas "dense_3d": tiledb.ArraySchema(
=tiledb.Domain(dense_x, dense_y, dense_t),
domain=[
attrs="dense_3d_data", dtype=np.float64),
tiledb.Attr(name
],
),"dense_2d": tiledb.ArraySchema(
=tiledb.Domain(dense_x, dense_y),
domain=[
attrs="dense_2d_data", dtype=np.float64),
tiledb.Attr(name
],
),"dense_1d": tiledb.ArraySchema(
=tiledb.Domain(dense_t),
domain=[
attrs="dense_1d_data", dtype=np.float64),
tiledb.Attr(name
],
),"sparse_4d": tiledb.ArraySchema(
=tiledb.Domain(sparse_x, sparse_y, sparse_z, sparse_t),
domain=[
attrs="sparse_4d_data", dtype=np.float64),
tiledb.Attr(name
],=True,
sparse=True,
allows_duplicates
),"dense_axes_xy": tiledb.ArraySchema(
=tiledb.Domain(dense_x),
domain=[
attrs="dense_x_data", dtype=np.uint64),
tiledb.Attr(name="dense_y_data", dtype=np.uint64),
tiledb.Attr(name
],
),"dense_axes_t": tiledb.ArraySchema(
=tiledb.Domain(dense_t),
domain=[
attrs="dense_t_data", dtype=np.uint64),
tiledb.Attr(name
],
),"sparse_axes": tiledb.ArraySchema(
=tiledb.Domain(sparse_x),
domain=[
attrs="sparse_x_data", dtype=np.uint64),
tiledb.Attr(name="sparse_y_data", dtype=np.uint64),
tiledb.Attr(name="sparse_z_data", dtype=np.uint64),
tiledb.Attr(name="sparse_t_data", dtype=np.uint64),
tiledb.Attr(name
],=True,
sparse=True,
allows_duplicates
), }
Create the TileDB Group on disk
= "output/example_group"
group_uri if tiledb.object_type(group_uri) is None:
"output/example_group", array_schemas) create_group(
Write data to the TileDB Group
Writing to dense and sparse arrays uses standard TileDB write operations. Arrays can be opened one-by-one or multiple arrays can be opened at once.
with tiledb.Group(group_uri, mode="r") as group:
with (
="dense_3d_data", mode="w") as dense_3d_array,
open_group_array(group, attr="dense_2d_data", mode="w") as dense_2d_array,
open_group_array(group, attr="dense_1d_data", mode="w") as dense_1d_array,
open_group_array(group, attr
):= dense_3d_values
dense_3d_array[:] = dense_2d_values
dense_2d_array[:] = dense_1d_values
dense_1d_array[:] with (
="sparse_4d_data", mode="w") as sparse_data_array,
open_group_array(group, attr="sparse_axes", mode="w") as sparse_axes_array,
open_group_array(group, array
):
sparse_data_array[
sparse_x_values, sparse_y_values, sparse_z_values, sparse_t_values= sparse_4d_values
] + 1] = {
sparse_axes_array[np.arange(sparse_size) "sparse_x_data": sparse_x_values,
"sparse_y_data": sparse_y_values,
"sparse_z_data": sparse_z_values,
"sparse_t_data": sparse_t_values,
}with (
="dense_axes_xy", mode="w") as xy_array,
open_group_array(group, array
open_group_array(="dense_axes_t", attr="dense_t_data", mode="w"
group, arrayas t_array,
)
):= {"dense_x_data": dense_x_values, "dense_y_data": dense_y_values}
xy_array[:] = dense_t_values t_array[:]
Metadata
Write Group metadata:
with tiledb.Group(group_uri, mode="w") as group:
"description"] = "Example TileDB Group"
group.meta["version"] = "1.0"
group.meta["created on "] = time.ctime(time.time()) group.meta[
Write Array metadata:
with tiledb.Group(group_uri, mode="r") as group:
with open_group_array(group, array="dense_3d", mode="w") as dense_3d_array:
dense_3d_array.meta["description"
= "Example 3D dense array with dimensions x, y and t"
] with open_group_array(group, array="dense_axes_xy", mode="w") as xy_array:
xy_array.meta["description"
= "Values for the x and y dimensions of the 3D dense array" ]
Write Attribute metadata using the AttrMetadata
class:
with tiledb.Group(group_uri, mode="r") as group:
with open_group_array(group, array="dense_axes_xy", mode="w") as array:
= AttrMetadata(array.meta, "dense_x_data")
x_attr_meta "description"] = "Values of x" x_attr_meta[
Read data from the TileDB Group
Read the metadata
Read the Group metadata keys and their values:
with tiledb.Group(group_uri) as group:
for key, value in group.meta.items():
print(f"{key}: {value}")
Read the metadata for the dense_axes_xy
array. Optionally, you can filter the array attribute data using the AttrMetadata
and ArrayMetadata
.
with tiledb.Group(group_uri) as group:
with open_group_array(group, array="dense_axes_xy") as array:
print("All metadata in the array:")
for key, value in array.meta.items():
print(f" * {key}: {value}")
print("Only array metadata:")
for key, value in ArrayMetadata(array.meta).items():
print(f" * {key}: {value}")
print("Only attribute metadata for 'dense_x_data':")
for key, value in AttrMetadata(array.meta, "dense_x_data").items():
print(f" * {key}: {value}")
Read and visualise the data
with tiledb.Group(group_uri) as group:
with (
="dense_3d", attr="dense_3d_data") as data_array,
open_group_array(group, array="dense_axes_xy") as axes_array,
open_group_array(group, array
):= data_array[:]
dense_3d_data = axes_array[...]
axes_data = axes_data["dense_x_data"]
dense_x_data = axes_data["dense_y_data"] dense_y_data
= plt.subplots(nrows=2, ncols=2, figsize=(12, 12))
fig, axes 0, 0].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 33])
axes[0, 1].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 66])
axes[1, 0].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 99])
axes[1, 1].contourf(dense_x_data, dense_y_data, dense_3d_data[:, :, 95]); axes[
with tiledb.Group(group_uri) as group:
with open_group_array(group, array="sparse_4d") as array:
= pd.DataFrame(array[...])
df
df.head()
df.describe().transpose()
="sparse_x", y="sparse_4d_data"); df.plot.scatter(x
with tiledb.Group(group_uri) as group:
with open_group_array(group, array="dense_1d") as array:
= pd.DataFrame(array[...])
df2
df2.head()
; df2.plot()