options(max.print = 500)
library(tiledbsc)
#> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
#> which was just loaded, will retire in October 2023.
#> Please refer to R-spatial evolution reports for details, especially
#> https://r-spatial.org/r/2023/05/15/evolution4.html.
#> It may be desirable to make the sf package available;
#> package maintainers should consider adding sf to Suggests:.
#> The sp package is now running under evolution status 2
#> (status 2 uses the sf package in place of rgdal)
library(fs)
library(tiledb)
library(SeuratObject)
#> Loading required package: sp
#>
#> Attaching package: 'sp'
#> The following object is masked from 'package:tiledb':
#>
#> dimensions
data_dir <- file.path(tempdir(), "pbmc_small")
dir.create(data_dir, showWarnings = FALSE)Load the subsetted 10X genomics PBMC dataset provided by SeuratObject.
data("pbmc_small", package = "SeuratObject")
pbmc_small
#> An object of class Seurat
#> 230 features across 80 samples within 1 assay
#> Active assay: RNA (230 features, 20 variable features)
#> 2 dimensional reductions calculated: pca, tsneSeurat object to a TileDB-backed
SOMACollection
The SOMACollection class provides a method for
converting an entire Seurat object to an
SOMACollection. This is the recommended way to perform the
conversion since it can handle multiple Assay objects and
will (eventually) convert all of the standard slots that comprise a
Seurat object.
This first step is to create a new SOMACollection object
and provide a URI where the dataset should be created:
soco <- SOMACollection$new(uri = file.path(tempdir(), "soco"))
#> No SOMACollection currently exists at '/tmp/RtmpqnzHsY/soco'
#> Creating new SOMACollection at '/tmp/RtmpqnzHsY/soco'
#> No TileDBGroup currently exists at '/tmp/RtmpqnzHsY/soco/uns'
#> Creating new TileDBGroup at '/tmp/RtmpqnzHsY/soco/uns'Next, we’ll pass the entire pbmc_small object directly
to from_seurat() and one SOMA will be created
for each Assay object:
soco$from_seurat(object = pbmc_small)
#> No SOMA currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA'
#> Creating new SOMA at '/tmp/RtmpqnzHsY/soco/soma_RNA'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> No AssayMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/X'
#> Creating new AssayMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/X'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/varp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/varp'
#> No TileDBGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/uns'
#> Creating new TileDBGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/uns'
#> Creating new AnnotationDataframe array with index [obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obs
#> Creating new AnnotationDataframe array with index [var_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soco/soma_RNA/var
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/counts'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/counts'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/X/counts
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/X/data
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data
#> Finished converting Seurat Assay with key [rna_] to SOMA
#> No AnnotationMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca'
#> Creating new AnnotationMatrix array with index [var_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca
#> Adding 2 metadata keys to array
#> No AnnotationMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca'
#> Creating new AnnotationMatrix array with index [obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca
#> Adding 2 metadata keys to array
#> No AnnotationMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne'
#> Creating new AnnotationMatrix array with index [obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne
#> Adding 2 metadata keys to array
#> No AnnotationPairwiseMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn'
#> Creating new AnnotationPairwiseMatrix array with index [obs_id_i,obs_id_j] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationPairwiseMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn
#> Adding 2 metadata keys to array
#> No CommandsArray found at '/tmp/RtmpqnzHsY/soco/uns/commands'
#> Creating new CommandsArray array with index [index] at '/tmp/RtmpqnzHsY/soco/uns/commands'
#> Adding 3 metadata keys to array
#> Ingesting CommandsArray data into: /tmp/RtmpqnzHsY/soco/uns/commands
#> Finished converting Seurat object to SOMACollectionExamining the directory structure, you can see the top-level
SOMACollection directory now contains a single
soma_RNA sub-directory, corresponding to
pbmc_small’s only assay, "RNA":
fs::dir_tree(soco$uri, recurse = 1)
#> /tmp/RtmpqnzHsY/soco
#> ├── __group
#> │ ├── __1691776241832_1691776241832_3a529bc39b414d1d8cb7e73cdb7aea9a_2
#> │ └── __1691776242132_1691776242132_26d6137163be461593b8ca3809c8012d_2
#> ├── __meta
#> │ └── __1691776241807_1691776241807_f7171b493b8040b28ce3d8dd46e2f0e4
#> ├── __tiledb_group.tdb
#> ├── soma_RNA
#> │ ├── X
#> │ ├── __group
#> │ ├── __meta
#> │ ├── __tiledb_group.tdb
#> │ ├── obs
#> │ ├── obsm
#> │ ├── obsp
#> │ ├── uns
#> │ ├── var
#> │ ├── varm
#> │ └── varp
#> └── uns
#> ├── __group
#> ├── __meta
#> ├── __tiledb_group.tdb
#> └── commandsInternally, the SOMACollection class is used to convert
each Seurat Assay object to a SOMA, which
creates and populates the various sub-components, including:
counts, data, and
scale.data matrices are each stored in separate attribute
of the X arraydata.frame containing feature-level metadata is
ingested into the var arraySeparately, any dimensional reductions are extracted and stored in
corresponding obsm/varm arrays.
To close the loop we can then read the on-disk
SOMACollection back into memory as a Seurat
object.
soco$to_seurat(project = "SOCO Example")
#> Reading AssayMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/X/counts'
#> Reading AssayMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/X/data'
#> Reading AssayMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data'
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Found 2 dim reduction arrays
#> Reading AnnotationMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca'
#> Reading AnnotationMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca'
#> Found 1 dim reduction arrays
#> Reading AnnotationMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne'
#> Reading AnnotationPairwiseMatrix into dataframe from '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn'
#> Reading command history into memory
#> An object of class Seurat
#> 230 features across 80 samples within 1 assay
#> Active assay: RNA (230 features, 20 variable features)
#> 2 dimensional reductions calculated: pca, tsneAssay to TileDB-backed
SOMA
Conversions can happen at multiple levels of the API. For example, we
can operate directly on a Seurat Assay using
SOMA. The workflow is largely the same:
soma <- SOMA$new(uri = file.path(tempdir(), "soma"))
#> No SOMA currently exists at '/tmp/RtmpqnzHsY/soma'
#> Creating new SOMA at '/tmp/RtmpqnzHsY/soma'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soma/obs'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soma/var'
#> No AssayMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/X'
#> Creating new AssayMatrixGroup at '/tmp/RtmpqnzHsY/soma/X'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/obsm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soma/obsm'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/varm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soma/varm'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/obsp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soma/obsp'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/varp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soma/varp'
#> No TileDBGroup currently exists at '/tmp/RtmpqnzHsY/soma/uns'
#> Creating new TileDBGroup at '/tmp/RtmpqnzHsY/soma/uns'
fs::dir_tree(soma$uri)
#> /tmp/RtmpqnzHsY/soma
#> ├── X
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243908_1691776243908_59845e08751c4c4488f59dfeaef457eb
#> │ └── __tiledb_group.tdb
#> ├── __group
#> │ ├── __1691776243914_1691776243914_048b3a1143514aa096a544773d8b8854_2
#> │ ├── __1691776243926_1691776243926_82f92b6672cf4dd5852d56ff68fe0e21_2
#> │ ├── __1691776243938_1691776243938_eb5020eb1ed343039d51f3abf88f68b6_2
#> │ ├── __1691776243949_1691776243949_0cac1066ddb0453cbae687374a32ecd9_2
#> │ ├── __1691776243961_1691776243961_7df70651c26d481498e794fa8b0b349f_2
#> │ └── __1691776243972_1691776243972_6932f6a10d6b4891b1514f9435b91f3e_2
#> ├── __meta
#> │ └── __1691776243896_1691776243896_ff5bc32080f94159979cffcce1194c0a
#> ├── __tiledb_group.tdb
#> ├── obsm
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243919_1691776243919_bcfc8415f2984349913dce0e5b783928
#> │ └── __tiledb_group.tdb
#> ├── obsp
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243942_1691776243942_ff4565c756c9404eb05f8e999a618489
#> │ └── __tiledb_group.tdb
#> ├── uns
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243965_1691776243965_293f655300e342b7a5dc0e129453fb50
#> │ └── __tiledb_group.tdb
#> ├── varm
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243931_1691776243931_09bd1cdc962a4bd48ad7e3d53bdaa720
#> │ └── __tiledb_group.tdb
#> └── varp
#> ├── __group
#> ├── __meta
#> │ └── __1691776243954_1691776243954_4241dcd71fb84619b7721ae7a45e5733
#> └── __tiledb_group.tdbThen we’ll pass RNA assay from pbmc_small
to the from_seurat_assay() method of the SOMA
class.
Note: Because cell-level metadata is stored in the parent
Seurat object, we need to provide this data
separately.
soma$from_seurat_assay(
object = pbmc_small[["RNA"]],
obs = pbmc_small[[]]
)
#> Creating new AnnotationDataframe array with index [obs_id] at '/tmp/RtmpqnzHsY/soma/obs'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soma/obs'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soma/obs
#> Creating new AnnotationDataframe array with index [var_id] at '/tmp/RtmpqnzHsY/soma/var'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soma/var'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soma/var
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soma/X/counts'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soma/X/counts'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soma/X/counts
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soma/X/data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soma/X/data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soma/X/data
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soma/X/scale.data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soma/X/scale.data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soma/X/scale.data
#> Finished converting Seurat Assay with key [rna_] to SOMAExamine the directory structure of the soma we can see
the X, var, and obs arrays have
all been created.
fs::dir_tree(soma$uri, recurse = FALSE)
#> /tmp/RtmpqnzHsY/soma
#> ├── X
#> ├── __group
#> ├── __meta
#> ├── __tiledb_group.tdb
#> ├── obs
#> ├── obsm
#> ├── obsp
#> ├── uns
#> ├── var
#> ├── varm
#> └── varpAny of the underlying TileDB arrays can be accessed directly from a
SOMACollection object by navigating its internal
classes.
As an example, let’s access the cell-level metadata. Recall from the
SOMA
spec that cell-level metadata is stored in the obs
array of an SOMA. Therefore, we must first access a
specific SOMA within the SOMACollection’s
somas slot. Let’s generate a list of the available
SOMAs:
names(soco$somas)
#> [1] "RNA"Easy choice. "RNA" can then be used to index the
corresponding SOMA:
soco$members$RNA
#> <SOMA>
#> uri: /tmp/RtmpqnzHsY/soco/soma_RNA
#> arrays: obs, var
#> groups: obsm, obsp, uns, varm, varp, XWe can see we have access to a variety of fields and methods, but
obs is the one we’re after.
soco$members$RNA$obs$to_dataframe()
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8
#> AAATTCGAATCACG SeuratProject 327 62 1
#> AAGCAAGAGCTTAG SeuratProject 126 48 0
#> AAGCGACTTTGACG SeuratProject 443 77 1
#> AATGCGTGGACGGA SeuratProject 389 73 1
#> AATGTTGACAGTCA SeuratProject 100 41 0
#> ACAGGTACTGGTGT SeuratProject 151 59 0
#> ACCAGTGAATACCG SeuratProject 417 75 0
#> ACGTGATGCCATGA SeuratProject 709 94 1
#> ACTCGCACGAAAGT SeuratProject 231 49 1
#> AGAGATGATCTCGC SeuratProject 191 61 0
#> AGATATACCCGTAA SeuratProject 187 61 0
#> AGGTCATGAGTGTC SeuratProject 62 31 0
#> AGTCAGACTGCACA SeuratProject 173 53 0
#> AGTCTTACTTCGGA SeuratProject 157 29 0
#> ATAAGTTGGTACGT SeuratProject 99 42 1
#> ATACCACTCTAAGC SeuratProject 612 69 1
#> ATAGGAGAAACAGA SeuratProject 406 74 1
#> ATCATCTGACACCA SeuratProject 168 37 0
#> ATGCCAGAACGACT SeuratProject 70 47 0
#> ATTACCTGCCTTAT SeuratProject 463 77 1
#> ATTCAGCTCATTGG SeuratProject 212 38 0
#> ATTGCACTTGCTTT SeuratProject 502 81 1
#> ATTGTAGATTCCCG SeuratProject 745 84 1
#> CATATAGACTAAGC SeuratProject 286 68 0
#> CATCAGGATGCACA SeuratProject 353 80 1
#> CATCATACGGAGCA SeuratProject 79 43 0
#> CATGAGACACGGGA SeuratProject 51 26 0
#> CATGCGCTAGTCAC SeuratProject 443 81 0
#> CATGGCCTGTGCAT SeuratProject 85 52 0
#> CATTACACCAACTG SeuratProject 316 65 0
#> CCATCCGATTCGCC SeuratProject 224 50 1
#> CCCAACTGCAATCG SeuratProject 87 42 1
#> CCTATAACGAGACG SeuratProject 139 50 1
#> CGGCACGAACTCAG SeuratProject 94 55 0
#> CGTAGCCTGTATGC SeuratProject 371 75 1
#> CTAAACCTCTGACA SeuratProject 246 59 0
#> CTAAACCTGTGCAT SeuratProject 168 44 0
#> CTAACGGAACCGAT SeuratProject 189 53 0
#> CTAGGTGATGGTTG SeuratProject 324 76 1
#> CTGCCAACAGGAGC SeuratProject 146 47 0
#> CTTCATGACCGAAT SeuratProject 41 32 0
#> CTTGATTGATCTTC SeuratProject 233 76 1
#> GAACCTGATGAACC SeuratProject 87 50 1
#> GACATTCTCCACCT SeuratProject 872 96 1
#> GACGCTCTCTCTCG SeuratProject 202 30 0
#> GAGTTGTGGTAGCT SeuratProject 527 47 0
#> GATAGAGAAGGGTG SeuratProject 115 51 0
#> GATAGAGATCACGA SeuratProject 328 72 1
#> GATATAACACGCAT SeuratProject 52 36 0
#> GCACTAGACCTTTA SeuratProject 292 71 1
#> GCAGCTCTGTTTCT SeuratProject 72 45 0
#> GCGCACGACTTTAC SeuratProject 213 48 1
#> GCGCATCTTGCTCC SeuratProject 164 47 0
#> GCGTAAACACGGTT SeuratProject 754 83 0
#> GCTCCATGAGAAGT SeuratProject 139 61 0
#> GGAACACTTCAGAC SeuratProject 150 30 0
#> GGCATATGCTTATC SeuratProject 126 53 0
#> GGCATATGGGGAGT SeuratProject 172 29 0
#> GGCCGATGTACTCT SeuratProject 99 45 0
#> GGGTAACTCTAGTG SeuratProject 101 41 0
#> GGTGGAGATTACTC SeuratProject 204 52 0
#> GTAAGCACTCATTC SeuratProject 67 33 0
#> letter.idents groups RNA_snn_res.1 active_ident
#> AAATTCGAATCACG B g2 1 1
#> AAGCAAGAGCTTAG A g1 0 0
#> AAGCGACTTTGACG B g1 1 1
#> AATGCGTGGACGGA B g1 1 1
#> AATGTTGACAGTCA A g1 0 0
#> ACAGGTACTGGTGT A g1 0 0
#> ACCAGTGAATACCG A g1 1 1
#> ACGTGATGCCATGA B g2 1 1
#> ACTCGCACGAAAGT B g2 1 1
#> AGAGATGATCTCGC A g1 2 2
#> AGATATACCCGTAA A g2 0 0
#> AGGTCATGAGTGTC A g2 2 2
#> AGTCAGACTGCACA A g2 0 0
#> AGTCTTACTTCGGA A g1 0 0
#> ATAAGTTGGTACGT B g2 1 1
#> ATACCACTCTAAGC B g1 1 1
#> ATAGGAGAAACAGA B g1 1 1
#> ATCATCTGACACCA A g2 0 0
#> ATGCCAGAACGACT A g2 0 0
#> ATTACCTGCCTTAT B g1 1 1
#> ATTCAGCTCATTGG A g2 0 0
#> ATTGCACTTGCTTT B g1 1 1
#> ATTGTAGATTCCCG B g2 1 1
#> CATATAGACTAAGC A g1 2 2
#> CATCAGGATGCACA B g1 1 1
#> CATCATACGGAGCA A g1 2 2
#> CATGAGACACGGGA A g2 2 2
#> CATGCGCTAGTCAC A g1 0 0
#> CATGGCCTGTGCAT A g1 0 0
#> CATTACACCAACTG A g2 0 0
#> CCATCCGATTCGCC B g2 1 1
#> CCCAACTGCAATCG B g2 1 1
#> CCTATAACGAGACG B g2 2 2
#> CGGCACGAACTCAG A g2 0 0
#> CGTAGCCTGTATGC B g1 1 1
#> CTAAACCTCTGACA A g1 0 0
#> CTAAACCTGTGCAT A g1 2 2
#> CTAACGGAACCGAT A g1 0 0
#> CTAGGTGATGGTTG B g1 1 1
#> CTGCCAACAGGAGC A g1 2 2
#> CTTCATGACCGAAT A g2 0 0
#> CTTGATTGATCTTC B g1 1 1
#> GAACCTGATGAACC B g2 0 0
#> GACATTCTCCACCT B g1 2 2
#> GACGCTCTCTCTCG A g2 0 0
#> GAGTTGTGGTAGCT A g1 0 0
#> GATAGAGAAGGGTG A g1 2 2
#> GATAGAGATCACGA B g1 1 1
#> GATATAACACGCAT A g1 0 0
#> GCACTAGACCTTTA B g2 1 1
#> GCAGCTCTGTTTCT A g1 0 0
#> GCGCACGACTTTAC B g2 1 1
#> GCGCATCTTGCTCC A g1 0 0
#> GCGTAAACACGGTT A g1 2 2
#> GCTCCATGAGAAGT A g2 2 2
#> GGAACACTTCAGAC A g2 0 0
#> GGCATATGCTTATC A g1 0 0
#> GGCATATGGGGAGT A g1 0 0
#> GGCCGATGTACTCT A g2 0 0
#> GGGTAACTCTAGTG A g2 2 2
#> GGTGGAGATTACTC A g1 0 0
#> GTAAGCACTCATTC A g2 2 2
#> [ reached 'max' / getOption("max.print") -- omitted 18 rows ]This is a AnnotationDataframe object, which includes a
method for reading the data into R as a data.frame:
head(soco$members$RNA$obs$to_dataframe())
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8
#> AAATTCGAATCACG SeuratProject 327 62 1
#> AAGCAAGAGCTTAG SeuratProject 126 48 0
#> AAGCGACTTTGACG SeuratProject 443 77 1
#> AATGCGTGGACGGA SeuratProject 389 73 1
#> AATGTTGACAGTCA SeuratProject 100 41 0
#> ACAGGTACTGGTGT SeuratProject 151 59 0
#> letter.idents groups RNA_snn_res.1 active_ident
#> AAATTCGAATCACG B g2 1 1
#> AAGCAAGAGCTTAG A g1 0 0
#> AAGCGACTTTGACG B g1 1 1
#> AATGCGTGGACGGA B g1 1 1
#> AATGTTGACAGTCA A g1 0 0
#> ACAGGTACTGGTGT A g1 0 0All of the array-based classes include a number of helper functions for interacting with the underlying arrays.
Print the schema of an array:
soma_obs <- soco$members$RNA$obs
soma_obs$schema()
#> tiledb_array_schema(
#> domain=tiledb_domain(c(
#> tiledb_dim(name="obs_id", domain=c(NULL,NULL), tile=NULL, type="ASCII")
#> )),
#> attrs=c(
#> tiledb_attr(name="orig.ident", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="nCount_RNA", type="FLOAT64", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="nFeature_RNA", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="RNA_snn_res.0.8", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="letter.idents", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="groups", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="RNA_snn_res.1", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="active_ident", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
#> ),
#> cell_order="ROW_MAJOR", tile_order="ROW_MAJOR", capacity=256, sparse=TRUE, allows_dups=FALSE,
#> coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
#> offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
#> validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
#> )List the names of the array’s dimensions (i.e., indexed columns)
soma_obs$dimnames()
#> [1] "obs_id"and attributes (i.e., non-indexed columns):
soma_obs$attrnames()
#> [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "RNA_snn_res.0.8"
#> [5] "letter.idents" "groups" "RNA_snn_res.1" "active_ident"You can also use the tiledb_array() method to directly
access the underlying arrays using the standard TileDB API, providing
the full functionality of the tiledb
package. For example, let’s query the obs array and
retrieve a subset of cells that match our QC criteria:
obs_array <- soma_obs$tiledb_array(
return_as = "tibble",
attrs = c("nCount_RNA", "nFeature_RNA"),
query_condition = parse_query_condition(nFeature_RNA < 2500)
)
obs_array[]
#> # A tibble: 80 × 3
#> obs_id nCount_RNA nFeature_RNA
#> <chr> <dbl> <int>
#> 1 AAATTCGAATCACG 327 62
#> 2 AAGCAAGAGCTTAG 126 48
#> 3 AAGCGACTTTGACG 443 77
#> 4 AATGCGTGGACGGA 389 73
#> 5 AATGTTGACAGTCA 100 41
#> 6 ACAGGTACTGGTGT 151 59
#> 7 ACCAGTGAATACCG 417 75
#> 8 ACGTGATGCCATGA 709 94
#> 9 ACTCGCACGAAAGT 231 49
#> 10 AGAGATGATCTCGC 191 61
#> # ℹ 70 more rows
sessionInfo()
#> R version 4.3.1 (2023-06-16)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] RcppSpdlog_0.0.14 SeuratObject_4.1.3 sp_2.0-0
#> [4] tiledb_0.20.3 fs_1.6.3 tiledbsc_0.1.5.9002
#>
#> loaded via a namespace (and not attached):
#> [1] utf8_1.2.3 sass_0.4.7 future_1.33.0
#> [4] spdl_0.0.5 stringi_1.7.12 lattice_0.21-8
#> [7] listenv_0.9.0 digest_0.6.33 magrittr_2.0.3
#> [10] evaluate_0.21 grid_4.3.1 fastmap_1.1.1
#> [13] rprojroot_2.0.3 jsonlite_1.8.7 Matrix_1.5-4.1
#> [16] urltools_1.7.3 fansi_1.0.4 purrr_1.0.1
#> [19] codetools_0.2-19 textshaping_0.3.6 jquerylib_0.1.4
#> [22] cli_3.6.1 crayon_1.5.2 rlang_1.1.1
#> [25] triebeard_0.4.1 parallelly_1.36.0 future.apply_1.11.0
#> [28] bit64_4.0.5 cachem_1.0.8 yaml_2.3.7
#> [31] tools_4.3.1 parallel_4.3.1 nanotime_0.3.7
#> [34] memoise_2.0.1 globals_0.16.2 vctrs_0.6.3
#> [37] R6_2.5.1 zoo_1.8-12 lifecycle_1.0.3
#> [40] stringr_1.5.0 bit_4.0.5 ragg_1.2.5
#> [43] pkgconfig_2.0.3 desc_1.4.2 pillar_1.9.0
#> [46] pkgdown_2.0.7 progressr_0.13.0 bslib_0.5.0
#> [49] glue_1.6.2 Rcpp_1.0.11 systemfonts_1.0.4
#> [52] tibble_3.2.1 xfun_0.40 knitr_1.43
#> [55] htmltools_0.5.5 rmarkdown_2.23 compiler_4.3.1
#> [58] RcppCCTZ_0.2.12