NetCDF-to-TileDB: How to set the max fragment size for copying data

About this Example

What it Shows

This shows a simple example of copying a NetCDF file in multiple chunks by setting the maximum fragment size for arrays in the NetCDF4ConverterEngine.

Example dataset

  • Dimensions:
    • x: size=8
    • y: size=8
    • z: size=8
  • Variables:
    • f(x, y, z) = [0, …, 511]

Set-up Requirements

This example requires the following python packages are installed: netCDF4, numpy, tiledb, and tiledb-cf

import netCDF4
import numpy as np
import tiledb
import tiledb.cf
# Set names for the output generated by the example.
output_dir = "output/netcdf-to-tiledb-set-max-fragment-size"
netcdf_file = f"{output_dir}/simple1.nc"
array_uri = f"{output_dir}/simple_copy_chunks"
# Reset output folder
import os
import shutil

shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
with netCDF4.Dataset(netcdf_file, mode="w") as dataset:
    dataset.setncatts({"title": "Simple dataset for examples"})
    dataset.createDimension("x", 8)
    dataset.createDimension("y", 8)
    dataset.createDimension("z", 8)
    f = dataset.createVariable("f", np.int64, ("x", "y", "z"))
    f[:, :, :] = np.reshape(np.arange(512), (8, 8, 8))
print(f"Created example NetCDF file `{netcdf_file}`.")
# Create NetCDF4 converter and print output
converter = tiledb.cf.NetCDF4ConverterEngine.from_file(netcdf_file)
converter
# Set max_fragment_shape for array
converter.get_array_creator("array0").domain_creator.max_fragment_shape = (4, 8, 2)
converter
# Run conversion (using `convert_to_array` since there is only 1 array in the group)
# Consolidate fragment metadata (recommended for copying multiple fragments)
converter.convert_to_array(array_uri)
tiledb.consolidate(
    array_uri, config=tiledb.Config({"sm.consolidation.mode": "fragment_meta"})
)
# View fragments information to confirm multiple separate chunks were copied
fragment_info = tiledb.FragmentInfoList(array_uri)
print(f"Number of fragments: {len(fragment_info)}")
for frag in fragment_info:
    print(
        f"Fragment {frag.num}: nonempty_domain={frag.nonempty_domain}, has_consolidated_metadata={frag.has_consolidated_metadata}"
    )