options(max.print = 500)
library(tiledbsc)
#> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
#> which was just loaded, will retire in October 2023.
#> Please refer to R-spatial evolution reports for details, especially
#> https://r-spatial.org/r/2023/05/15/evolution4.html.
#> It may be desirable to make the sf package available;
#> package maintainers should consider adding sf to Suggests:.
#> The sp package is now running under evolution status 2
#> (status 2 uses the sf package in place of rgdal)
library(fs)
library(tiledb)
library(SeuratObject)
#> Loading required package: sp
#>
#> Attaching package: 'sp'
#> The following object is masked from 'package:tiledb':
#>
#> dimensions
data_dir <- file.path(tempdir(), "pbmc_small")
dir.create(data_dir, showWarnings = FALSE)
Load the subsetted 10X genomics PBMC dataset provided by SeuratObject.
data("pbmc_small", package = "SeuratObject")
pbmc_small
#> An object of class Seurat
#> 230 features across 80 samples within 1 assay
#> Active assay: RNA (230 features, 20 variable features)
#> 2 dimensional reductions calculated: pca, tsne
Seurat
object to a TileDB-backed
SOMACollection
The SOMACollection
class provides a method for
converting an entire Seurat
object to an
SOMACollection
. This is the recommended way to perform the
conversion since it can handle multiple Assay
objects and
will (eventually) convert all of the standard slots that comprise a
Seurat
object.
This first step is to create a new SOMACollection
object
and provide a URI where the dataset should be created:
soco <- SOMACollection$new(uri = file.path(tempdir(), "soco"))
#> No SOMACollection currently exists at '/tmp/RtmpqnzHsY/soco'
#> Creating new SOMACollection at '/tmp/RtmpqnzHsY/soco'
#> No TileDBGroup currently exists at '/tmp/RtmpqnzHsY/soco/uns'
#> Creating new TileDBGroup at '/tmp/RtmpqnzHsY/soco/uns'
Next, we’ll pass the entire pbmc_small
object directly
to from_seurat()
and one SOMA
will be created
for each Assay
object:
soco$from_seurat(object = pbmc_small)
#> No SOMA currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA'
#> Creating new SOMA at '/tmp/RtmpqnzHsY/soco/soma_RNA'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> No AssayMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/X'
#> Creating new AssayMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/X'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/varp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/varp'
#> No TileDBGroup currently exists at '/tmp/RtmpqnzHsY/soco/soma_RNA/uns'
#> Creating new TileDBGroup at '/tmp/RtmpqnzHsY/soco/soma_RNA/uns'
#> Creating new AnnotationDataframe array with index [obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obs
#> Creating new AnnotationDataframe array with index [var_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soco/soma_RNA/var
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/counts'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/counts'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/X/counts
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/X/data
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data
#> Finished converting Seurat Assay with key [rna_] to SOMA
#> No AnnotationMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca'
#> Creating new AnnotationMatrix array with index [var_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca
#> Adding 2 metadata keys to array
#> No AnnotationMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca'
#> Creating new AnnotationMatrix array with index [obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca
#> Adding 2 metadata keys to array
#> No AnnotationMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne'
#> Creating new AnnotationMatrix array with index [obs_id] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne
#> Adding 2 metadata keys to array
#> No AnnotationPairwiseMatrix found at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn'
#> Creating new AnnotationPairwiseMatrix array with index [obs_id_i,obs_id_j] at '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn'
#> Adding 3 metadata keys to array
#> Ingesting AnnotationPairwiseMatrix data into: /tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn
#> Adding 2 metadata keys to array
#> No CommandsArray found at '/tmp/RtmpqnzHsY/soco/uns/commands'
#> Creating new CommandsArray array with index [index] at '/tmp/RtmpqnzHsY/soco/uns/commands'
#> Adding 3 metadata keys to array
#> Ingesting CommandsArray data into: /tmp/RtmpqnzHsY/soco/uns/commands
#> Finished converting Seurat object to SOMACollection
Examining the directory structure, you can see the top-level
SOMACollection
directory now contains a single
soma_RNA
sub-directory, corresponding to
pbmc_small
’s only assay, "RNA"
:
fs::dir_tree(soco$uri, recurse = 1)
#> /tmp/RtmpqnzHsY/soco
#> ├── __group
#> │ ├── __1691776241832_1691776241832_3a529bc39b414d1d8cb7e73cdb7aea9a_2
#> │ └── __1691776242132_1691776242132_26d6137163be461593b8ca3809c8012d_2
#> ├── __meta
#> │ └── __1691776241807_1691776241807_f7171b493b8040b28ce3d8dd46e2f0e4
#> ├── __tiledb_group.tdb
#> ├── soma_RNA
#> │ ├── X
#> │ ├── __group
#> │ ├── __meta
#> │ ├── __tiledb_group.tdb
#> │ ├── obs
#> │ ├── obsm
#> │ ├── obsp
#> │ ├── uns
#> │ ├── var
#> │ ├── varm
#> │ └── varp
#> └── uns
#> ├── __group
#> ├── __meta
#> ├── __tiledb_group.tdb
#> └── commands
Internally, the SOMACollection
class is used to convert
each Seurat Assay
object to a SOMA
, which
creates and populates the various sub-components, including:
counts
, data
, and
scale.data
matrices are each stored in separate attribute
of the X
arraydata.frame
containing feature-level metadata is
ingested into the var
arraySeparately, any dimensional reductions are extracted and stored in
corresponding obsm
/varm
arrays.
To close the loop we can then read the on-disk
SOMACollection
back into memory as a Seurat
object.
soco$to_seurat(project = "SOCO Example")
#> Reading AssayMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/X/counts'
#> Reading AssayMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/X/data'
#> Reading AssayMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/X/scale.data'
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/var'
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Found 2 dim reduction arrays
#> Reading AnnotationMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_pca'
#> Reading AnnotationMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/varm/dimreduction_pca'
#> Found 1 dim reduction arrays
#> Reading AnnotationMatrix into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obsm/dimreduction_tsne'
#> Reading AnnotationPairwiseMatrix into dataframe from '/tmp/RtmpqnzHsY/soco/soma_RNA/obsp/graph_snn'
#> Reading command history into memory
#> An object of class Seurat
#> 230 features across 80 samples within 1 assay
#> Active assay: RNA (230 features, 20 variable features)
#> 2 dimensional reductions calculated: pca, tsne
Assay
to TileDB-backed
SOMA
Conversions can happen at multiple levels of the API. For example, we
can operate directly on a Seurat Assay
using
SOMA
. The workflow is largely the same:
soma <- SOMA$new(uri = file.path(tempdir(), "soma"))
#> No SOMA currently exists at '/tmp/RtmpqnzHsY/soma'
#> Creating new SOMA at '/tmp/RtmpqnzHsY/soma'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soma/obs'
#> No AnnotationDataframe found at '/tmp/RtmpqnzHsY/soma/var'
#> No AssayMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/X'
#> Creating new AssayMatrixGroup at '/tmp/RtmpqnzHsY/soma/X'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/obsm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soma/obsm'
#> No AnnotationMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/varm'
#> Creating new AnnotationMatrixGroup at '/tmp/RtmpqnzHsY/soma/varm'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/obsp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soma/obsp'
#> No AnnotationPairwiseMatrixGroup currently exists at '/tmp/RtmpqnzHsY/soma/varp'
#> Creating new AnnotationPairwiseMatrixGroup at '/tmp/RtmpqnzHsY/soma/varp'
#> No TileDBGroup currently exists at '/tmp/RtmpqnzHsY/soma/uns'
#> Creating new TileDBGroup at '/tmp/RtmpqnzHsY/soma/uns'
fs::dir_tree(soma$uri)
#> /tmp/RtmpqnzHsY/soma
#> ├── X
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243908_1691776243908_59845e08751c4c4488f59dfeaef457eb
#> │ └── __tiledb_group.tdb
#> ├── __group
#> │ ├── __1691776243914_1691776243914_048b3a1143514aa096a544773d8b8854_2
#> │ ├── __1691776243926_1691776243926_82f92b6672cf4dd5852d56ff68fe0e21_2
#> │ ├── __1691776243938_1691776243938_eb5020eb1ed343039d51f3abf88f68b6_2
#> │ ├── __1691776243949_1691776243949_0cac1066ddb0453cbae687374a32ecd9_2
#> │ ├── __1691776243961_1691776243961_7df70651c26d481498e794fa8b0b349f_2
#> │ └── __1691776243972_1691776243972_6932f6a10d6b4891b1514f9435b91f3e_2
#> ├── __meta
#> │ └── __1691776243896_1691776243896_ff5bc32080f94159979cffcce1194c0a
#> ├── __tiledb_group.tdb
#> ├── obsm
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243919_1691776243919_bcfc8415f2984349913dce0e5b783928
#> │ └── __tiledb_group.tdb
#> ├── obsp
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243942_1691776243942_ff4565c756c9404eb05f8e999a618489
#> │ └── __tiledb_group.tdb
#> ├── uns
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243965_1691776243965_293f655300e342b7a5dc0e129453fb50
#> │ └── __tiledb_group.tdb
#> ├── varm
#> │ ├── __group
#> │ ├── __meta
#> │ │ └── __1691776243931_1691776243931_09bd1cdc962a4bd48ad7e3d53bdaa720
#> │ └── __tiledb_group.tdb
#> └── varp
#> ├── __group
#> ├── __meta
#> │ └── __1691776243954_1691776243954_4241dcd71fb84619b7721ae7a45e5733
#> └── __tiledb_group.tdb
Then we’ll pass RNA
assay from pbmc_small
to the from_seurat_assay()
method of the SOMA
class.
Note: Because cell-level metadata is stored in the parent
Seurat
object, we need to provide this data
separately.
soma$from_seurat_assay(
object = pbmc_small[["RNA"]],
obs = pbmc_small[[]]
)
#> Creating new AnnotationDataframe array with index [obs_id] at '/tmp/RtmpqnzHsY/soma/obs'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soma/obs'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soma/obs
#> Creating new AnnotationDataframe array with index [var_id] at '/tmp/RtmpqnzHsY/soma/var'
#> Adding 3 metadata keys to array
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soma/var'
#> Ingesting AnnotationDataframe data into: /tmp/RtmpqnzHsY/soma/var
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soma/X/counts'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soma/X/counts'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soma/X/counts
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soma/X/data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soma/X/data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soma/X/data
#> No AssayMatrix found at '/tmp/RtmpqnzHsY/soma/X/scale.data'
#> Creating new AssayMatrix array with index [var_id,obs_id] at '/tmp/RtmpqnzHsY/soma/X/scale.data'
#> Adding 3 metadata keys to array
#> Ingesting AssayMatrix data into: /tmp/RtmpqnzHsY/soma/X/scale.data
#> Finished converting Seurat Assay with key [rna_] to SOMA
Examine the directory structure of the soma
we can see
the X
, var
, and obs
arrays have
all been created.
fs::dir_tree(soma$uri, recurse = FALSE)
#> /tmp/RtmpqnzHsY/soma
#> ├── X
#> ├── __group
#> ├── __meta
#> ├── __tiledb_group.tdb
#> ├── obs
#> ├── obsm
#> ├── obsp
#> ├── uns
#> ├── var
#> ├── varm
#> └── varp
Any of the underlying TileDB arrays can be accessed directly from a
SOMACollection
object by navigating its internal
classes.
As an example, let’s access the cell-level metadata. Recall from the
SOMA
spec that cell-level metadata is stored in the obs
array of an SOMA
. Therefore, we must first access a
specific SOMA
within the SOMACollection
’s
somas
slot. Let’s generate a list of the available
SOMA
s:
names(soco$somas)
#> [1] "RNA"
Easy choice. "RNA"
can then be used to index the
corresponding SOMA
:
soco$members$RNA
#> <SOMA>
#> uri: /tmp/RtmpqnzHsY/soco/soma_RNA
#> arrays: obs, var
#> groups: obsm, obsp, uns, varm, varp, X
We can see we have access to a variety of fields and methods, but
obs
is the one we’re after.
soco$members$RNA$obs$to_dataframe()
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8
#> AAATTCGAATCACG SeuratProject 327 62 1
#> AAGCAAGAGCTTAG SeuratProject 126 48 0
#> AAGCGACTTTGACG SeuratProject 443 77 1
#> AATGCGTGGACGGA SeuratProject 389 73 1
#> AATGTTGACAGTCA SeuratProject 100 41 0
#> ACAGGTACTGGTGT SeuratProject 151 59 0
#> ACCAGTGAATACCG SeuratProject 417 75 0
#> ACGTGATGCCATGA SeuratProject 709 94 1
#> ACTCGCACGAAAGT SeuratProject 231 49 1
#> AGAGATGATCTCGC SeuratProject 191 61 0
#> AGATATACCCGTAA SeuratProject 187 61 0
#> AGGTCATGAGTGTC SeuratProject 62 31 0
#> AGTCAGACTGCACA SeuratProject 173 53 0
#> AGTCTTACTTCGGA SeuratProject 157 29 0
#> ATAAGTTGGTACGT SeuratProject 99 42 1
#> ATACCACTCTAAGC SeuratProject 612 69 1
#> ATAGGAGAAACAGA SeuratProject 406 74 1
#> ATCATCTGACACCA SeuratProject 168 37 0
#> ATGCCAGAACGACT SeuratProject 70 47 0
#> ATTACCTGCCTTAT SeuratProject 463 77 1
#> ATTCAGCTCATTGG SeuratProject 212 38 0
#> ATTGCACTTGCTTT SeuratProject 502 81 1
#> ATTGTAGATTCCCG SeuratProject 745 84 1
#> CATATAGACTAAGC SeuratProject 286 68 0
#> CATCAGGATGCACA SeuratProject 353 80 1
#> CATCATACGGAGCA SeuratProject 79 43 0
#> CATGAGACACGGGA SeuratProject 51 26 0
#> CATGCGCTAGTCAC SeuratProject 443 81 0
#> CATGGCCTGTGCAT SeuratProject 85 52 0
#> CATTACACCAACTG SeuratProject 316 65 0
#> CCATCCGATTCGCC SeuratProject 224 50 1
#> CCCAACTGCAATCG SeuratProject 87 42 1
#> CCTATAACGAGACG SeuratProject 139 50 1
#> CGGCACGAACTCAG SeuratProject 94 55 0
#> CGTAGCCTGTATGC SeuratProject 371 75 1
#> CTAAACCTCTGACA SeuratProject 246 59 0
#> CTAAACCTGTGCAT SeuratProject 168 44 0
#> CTAACGGAACCGAT SeuratProject 189 53 0
#> CTAGGTGATGGTTG SeuratProject 324 76 1
#> CTGCCAACAGGAGC SeuratProject 146 47 0
#> CTTCATGACCGAAT SeuratProject 41 32 0
#> CTTGATTGATCTTC SeuratProject 233 76 1
#> GAACCTGATGAACC SeuratProject 87 50 1
#> GACATTCTCCACCT SeuratProject 872 96 1
#> GACGCTCTCTCTCG SeuratProject 202 30 0
#> GAGTTGTGGTAGCT SeuratProject 527 47 0
#> GATAGAGAAGGGTG SeuratProject 115 51 0
#> GATAGAGATCACGA SeuratProject 328 72 1
#> GATATAACACGCAT SeuratProject 52 36 0
#> GCACTAGACCTTTA SeuratProject 292 71 1
#> GCAGCTCTGTTTCT SeuratProject 72 45 0
#> GCGCACGACTTTAC SeuratProject 213 48 1
#> GCGCATCTTGCTCC SeuratProject 164 47 0
#> GCGTAAACACGGTT SeuratProject 754 83 0
#> GCTCCATGAGAAGT SeuratProject 139 61 0
#> GGAACACTTCAGAC SeuratProject 150 30 0
#> GGCATATGCTTATC SeuratProject 126 53 0
#> GGCATATGGGGAGT SeuratProject 172 29 0
#> GGCCGATGTACTCT SeuratProject 99 45 0
#> GGGTAACTCTAGTG SeuratProject 101 41 0
#> GGTGGAGATTACTC SeuratProject 204 52 0
#> GTAAGCACTCATTC SeuratProject 67 33 0
#> letter.idents groups RNA_snn_res.1 active_ident
#> AAATTCGAATCACG B g2 1 1
#> AAGCAAGAGCTTAG A g1 0 0
#> AAGCGACTTTGACG B g1 1 1
#> AATGCGTGGACGGA B g1 1 1
#> AATGTTGACAGTCA A g1 0 0
#> ACAGGTACTGGTGT A g1 0 0
#> ACCAGTGAATACCG A g1 1 1
#> ACGTGATGCCATGA B g2 1 1
#> ACTCGCACGAAAGT B g2 1 1
#> AGAGATGATCTCGC A g1 2 2
#> AGATATACCCGTAA A g2 0 0
#> AGGTCATGAGTGTC A g2 2 2
#> AGTCAGACTGCACA A g2 0 0
#> AGTCTTACTTCGGA A g1 0 0
#> ATAAGTTGGTACGT B g2 1 1
#> ATACCACTCTAAGC B g1 1 1
#> ATAGGAGAAACAGA B g1 1 1
#> ATCATCTGACACCA A g2 0 0
#> ATGCCAGAACGACT A g2 0 0
#> ATTACCTGCCTTAT B g1 1 1
#> ATTCAGCTCATTGG A g2 0 0
#> ATTGCACTTGCTTT B g1 1 1
#> ATTGTAGATTCCCG B g2 1 1
#> CATATAGACTAAGC A g1 2 2
#> CATCAGGATGCACA B g1 1 1
#> CATCATACGGAGCA A g1 2 2
#> CATGAGACACGGGA A g2 2 2
#> CATGCGCTAGTCAC A g1 0 0
#> CATGGCCTGTGCAT A g1 0 0
#> CATTACACCAACTG A g2 0 0
#> CCATCCGATTCGCC B g2 1 1
#> CCCAACTGCAATCG B g2 1 1
#> CCTATAACGAGACG B g2 2 2
#> CGGCACGAACTCAG A g2 0 0
#> CGTAGCCTGTATGC B g1 1 1
#> CTAAACCTCTGACA A g1 0 0
#> CTAAACCTGTGCAT A g1 2 2
#> CTAACGGAACCGAT A g1 0 0
#> CTAGGTGATGGTTG B g1 1 1
#> CTGCCAACAGGAGC A g1 2 2
#> CTTCATGACCGAAT A g2 0 0
#> CTTGATTGATCTTC B g1 1 1
#> GAACCTGATGAACC B g2 0 0
#> GACATTCTCCACCT B g1 2 2
#> GACGCTCTCTCTCG A g2 0 0
#> GAGTTGTGGTAGCT A g1 0 0
#> GATAGAGAAGGGTG A g1 2 2
#> GATAGAGATCACGA B g1 1 1
#> GATATAACACGCAT A g1 0 0
#> GCACTAGACCTTTA B g2 1 1
#> GCAGCTCTGTTTCT A g1 0 0
#> GCGCACGACTTTAC B g2 1 1
#> GCGCATCTTGCTCC A g1 0 0
#> GCGTAAACACGGTT A g1 2 2
#> GCTCCATGAGAAGT A g2 2 2
#> GGAACACTTCAGAC A g2 0 0
#> GGCATATGCTTATC A g1 0 0
#> GGCATATGGGGAGT A g1 0 0
#> GGCCGATGTACTCT A g2 0 0
#> GGGTAACTCTAGTG A g2 2 2
#> GGTGGAGATTACTC A g1 0 0
#> GTAAGCACTCATTC A g2 2 2
#> [ reached 'max' / getOption("max.print") -- omitted 18 rows ]
This is a AnnotationDataframe
object, which includes a
method for reading the data into R as a data.frame
:
head(soco$members$RNA$obs$to_dataframe())
#> Reading AnnotationDataframe into memory from '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> Checking legacy validity mode for array: '/tmp/RtmpqnzHsY/soco/soma_RNA/obs'
#> orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8
#> AAATTCGAATCACG SeuratProject 327 62 1
#> AAGCAAGAGCTTAG SeuratProject 126 48 0
#> AAGCGACTTTGACG SeuratProject 443 77 1
#> AATGCGTGGACGGA SeuratProject 389 73 1
#> AATGTTGACAGTCA SeuratProject 100 41 0
#> ACAGGTACTGGTGT SeuratProject 151 59 0
#> letter.idents groups RNA_snn_res.1 active_ident
#> AAATTCGAATCACG B g2 1 1
#> AAGCAAGAGCTTAG A g1 0 0
#> AAGCGACTTTGACG B g1 1 1
#> AATGCGTGGACGGA B g1 1 1
#> AATGTTGACAGTCA A g1 0 0
#> ACAGGTACTGGTGT A g1 0 0
All of the array-based classes include a number of helper functions for interacting with the underlying arrays.
Print the schema of an array:
soma_obs <- soco$members$RNA$obs
soma_obs$schema()
#> tiledb_array_schema(
#> domain=tiledb_domain(c(
#> tiledb_dim(name="obs_id", domain=c(NULL,NULL), tile=NULL, type="ASCII")
#> )),
#> attrs=c(
#> tiledb_attr(name="orig.ident", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="nCount_RNA", type="FLOAT64", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="nFeature_RNA", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="RNA_snn_res.0.8", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="letter.idents", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="groups", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="RNA_snn_res.1", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
#> tiledb_attr(name="active_ident", type="ASCII", ncells=NA, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
#> ),
#> cell_order="ROW_MAJOR", tile_order="ROW_MAJOR", capacity=256, sparse=TRUE, allows_dups=FALSE,
#> coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
#> offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
#> validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
#> )
List the names of the array’s dimensions (i.e., indexed columns)
soma_obs$dimnames()
#> [1] "obs_id"
and attributes (i.e., non-indexed columns):
soma_obs$attrnames()
#> [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "RNA_snn_res.0.8"
#> [5] "letter.idents" "groups" "RNA_snn_res.1" "active_ident"
You can also use the tiledb_array()
method to directly
access the underlying arrays using the standard TileDB API, providing
the full functionality of the tiledb
package. For example, let’s query the obs
array and
retrieve a subset of cells that match our QC criteria:
obs_array <- soma_obs$tiledb_array(
return_as = "tibble",
attrs = c("nCount_RNA", "nFeature_RNA"),
query_condition = parse_query_condition(nFeature_RNA < 2500)
)
obs_array[]
#> # A tibble: 80 × 3
#> obs_id nCount_RNA nFeature_RNA
#> <chr> <dbl> <int>
#> 1 AAATTCGAATCACG 327 62
#> 2 AAGCAAGAGCTTAG 126 48
#> 3 AAGCGACTTTGACG 443 77
#> 4 AATGCGTGGACGGA 389 73
#> 5 AATGTTGACAGTCA 100 41
#> 6 ACAGGTACTGGTGT 151 59
#> 7 ACCAGTGAATACCG 417 75
#> 8 ACGTGATGCCATGA 709 94
#> 9 ACTCGCACGAAAGT 231 49
#> 10 AGAGATGATCTCGC 191 61
#> # ℹ 70 more rows
sessionInfo()
#> R version 4.3.1 (2023-06-16)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] RcppSpdlog_0.0.14 SeuratObject_4.1.3 sp_2.0-0
#> [4] tiledb_0.20.3 fs_1.6.3 tiledbsc_0.1.5.9002
#>
#> loaded via a namespace (and not attached):
#> [1] utf8_1.2.3 sass_0.4.7 future_1.33.0
#> [4] spdl_0.0.5 stringi_1.7.12 lattice_0.21-8
#> [7] listenv_0.9.0 digest_0.6.33 magrittr_2.0.3
#> [10] evaluate_0.21 grid_4.3.1 fastmap_1.1.1
#> [13] rprojroot_2.0.3 jsonlite_1.8.7 Matrix_1.5-4.1
#> [16] urltools_1.7.3 fansi_1.0.4 purrr_1.0.1
#> [19] codetools_0.2-19 textshaping_0.3.6 jquerylib_0.1.4
#> [22] cli_3.6.1 crayon_1.5.2 rlang_1.1.1
#> [25] triebeard_0.4.1 parallelly_1.36.0 future.apply_1.11.0
#> [28] bit64_4.0.5 cachem_1.0.8 yaml_2.3.7
#> [31] tools_4.3.1 parallel_4.3.1 nanotime_0.3.7
#> [34] memoise_2.0.1 globals_0.16.2 vctrs_0.6.3
#> [37] R6_2.5.1 zoo_1.8-12 lifecycle_1.0.3
#> [40] stringr_1.5.0 bit_4.0.5 ragg_1.2.5
#> [43] pkgconfig_2.0.3 desc_1.4.2 pillar_1.9.0
#> [46] pkgdown_2.0.7 progressr_0.13.0 bslib_0.5.0
#> [49] glue_1.6.2 Rcpp_1.0.11 systemfonts_1.0.4
#> [52] tibble_3.2.1 xfun_0.40 knitr_1.43
#> [55] htmltools_0.5.5 rmarkdown_2.23 compiler_4.3.1
#> [58] RcppCCTZ_0.2.12