See the Login page for login information.
Generic UDFs
These are functions you write, invoked server-side.
# Namespace to charge
namespace <- "demo"
# Your function
myfunc <- function(x=50:54, y=70:74) { x + y }
# Execute the function on TileDB Cloud
result <- tiledbcloud::execute_generic_udf(udf=myfunc, namespace=namespace)
print(result)
[1] 120 122 124 126 128
Using arguments:
namespace <- "demo"
myfunc <- function(x, y) { x + y }
myargs <- list(100:104, 200:204)
result <- tiledbcloud::execute_generic_udf(udf=myfunc, args=myargs, namespace=namespace)
print(result)
[1] 300 302 304 306 308
Single-array UDFs
myfunc <- function(df) {
vec <- as.vector(df[["a"]])
list(min=min(vec), med=median(vec), max=max(vec))
}
result <- tiledbcloud::execute_array_udf(
array="tiledb://TileDB-Inc/quickstart_dense",
udf=myfunc,
selectedRanges=list(cbind(1,2), cbind(1,2)), # Upper-left 2x2
attrs=c("a"),
namespace="demo" // to charge
)
print(result)
$min
[1] 1
$med
[1] 3.5
$max
[1] 6
# Run a linear regression on bill length vs body mass
myfunc <- function(df) {
vec1 <- as.vector(df$bill_length_mm)
vec2 <- as.vector(df$body_mass_g)
lm.fit(cbind(1, vec2), vec1)$coefficients
}
result <- tiledbcloud::execute_array_udf(
array="tiledb://demo/palmer_penguins2",
udf=myfunc,
selectedRanges=list(cbind("A", "Z"), cbind(2007,2009)),
attrs=list("bill_length_mm", "body_mass_g")
# NULL namespace means username namespace is charged
)
print(result)
vec2
27.15072200 0.00400329
Multi-array UDFs
myfunc <- function(df1, df2) {
vec1 <- as.vector(df1[["a"]])
vec2 <- as.vector(df2[["a"]])
list(
len=length(vec1) + length(vec2),
min=min(vec1) + min(vec2),
med=median(vec1) + median(vec2),
max=max(vec1) + max(vec2)
)
}
details1 <- tiledbcloud::UDFArrayDetails$new(
uri="tiledb://TileDB-Inc/quickstart_dense",
ranges=QueryRanges$new(
layout=Layout$new('row-major'),
ranges=list(cbind(1,4),cbind(1,4))
),
buffers=list("a")
)
details2 <- tiledbcloud::UDFArrayDetails$new(
uri="tiledb://TileDB-Inc/quickstart_sparse",
ranges=QueryRanges$new(
layout=Layout$new('row-major'),
ranges=list(cbind(1,2),cbind(1,4))
),
buffers=list("a")
)
result <- tiledbcloud::execute_multi_array_udf(
array_list=list(details1, details2),
udf=myfunc,
namespace="demo"
)
print(result)
$len
[1] 19
$min
[1] 2
$med
[1] 10.5
$max
[1] 19
UDF registration
Known issue: Tags can be successfully set using the functions shown below but are not read back on get_udf_info
. This is a server-side bug, affecting UDF-info readback regardless of client (Python or R).
Register a UDF and read back UDF info
Generic UDFs
library(tiledbcloud)
namespace <- 'testuser'
udfname <- 'udf-registration-test-generic'
myfunc <- function(vec, exponent) {
sum(vec ** exponent)
}
register_udf(namespace=namespace, name=udfname, 'generic', func=myfunc)
info <- get_udf_info(namespace=namespace, name=udfname)
str(info)
$ ./cloud-register-generic-udf.r
List of 5
$ name : chr "udf-registration-test-generic"
$ exec : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__
$ exec_raw: chr "function (vec, exponent) \n{\n sum(vec ** exponent)\n}"
$ language: chr "\"r\""
$ tags : NULL
Single-array UDFs
library(tiledbcloud)
namespace <- "testuser"
udfname <- "udf-registration-test-single-array-with-arg"
myfunc <- function(df, attrname, exponent) {
vec <- as.vector(df[[attrname]])
sum(vec ** exponent)
}
tiledbcloud::register_udf(namespace=namespace, name=udfname, type='single_array', func=myfunc)
info <- tiledbcloud::get_udf_info(namespace=namespace, name=udfname)
str(info)
List of 5
$ name : chr "udf-registration-test-single-array-with-arg"
$ exec : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__
$ exec_raw: chr "function (df, attrname, exponent) \n{\n vec <- as.vector(df[[attrname]])\n sum(vec ** exponent)\n}"
$ language: chr "\"r\""
$ tags : list()
Multi-array UDFs
library(tiledbcloud)
namespace <- "testuser"
udfname <- "udf-registration-test-multi-array-with-arg"
myfunc <- function(df1, df2, attrname) {
vec1 <- as.vector(df1[[attrname]])
vec2 <- as.vector(df2[[attrname]])
list(
len=length(vec1) + length(vec2),
min=min(vec1) + min(vec2),
med=median(vec1) + median(vec2),
max=max(vec1) + max(vec2)
)
}
tiledbcloud::register_udf(namespace=namespace, name=udfname, type='multi_array', func=myfunc)
info <- tiledbcloud::get_udf_info(namespace=namespace, name=udfname)
str(info)
[1] "GET INFO"
List of 5
$ name : chr "udf-registration-test-multi-array-with-arg"
$ exec : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__
$ exec_raw: chr "function (df1, df2, attrname) \n{\n vec1 <- as.vector(df1[[attrname]])\n vec2 <- as.vector(df2[[attrname]"| __truncated__
$ language: chr "\"r\""
$ tags : list()
Invoke registered UDF
Generic UDFs
library(tiledbcloud)
namespace <- 'testuser'
udfname <- 'udf-registration-test-generic'
registered_udf_name=paste(namespace, udfname, sep='/')
result <- execute_generic_udf(
registered_udf_name=registered_udf_name,
args=list(vec=1:10, exponent=3),
namespace=namespace
)
print(result)
$ ./cloud-execute-registered-generic-udf.r
[1] 3025
[1] 166375
Single-array UDFs
library(tiledbcloud)
namespace_to_charge <- 'testuser'
udf_name <- 'udf-registration-test-single-array-with-arg'
registered_udf_name=paste(namespace_to_charge, udf_name, sep='/')
result <- tiledbcloud::execute_array_udf(
array="TileDB-Inc/quickstart_dense",
registered_udf_name=registered_udf_name,
args=list(attrname="a", exponent=2),
selectedRanges=list(cbind(1,2), cbind(1,2)),
attrs=c("a"),
namespace="testuser" # namespace to charge
)
print(result)
[1] 196
Multi-array UDFs
#!/usr/bin/env Rscript
library(tiledbcloud)
namespace_to_charge <- 'testuser'
registered_udf_name <- 'testuser/udf-registration-test-multi-array-with-arg'
details1 <- tiledbcloud::UDFArrayDetails$new(
uri="tiledb://TileDB-Inc/quickstart_dense",
ranges=QueryRanges$new(
layout=Layout$new('row-major'),
ranges=list(cbind(1,4),cbind(1,4))
),
buffers=list("a")
)
details2 <- tiledbcloud::UDFArrayDetails$new(
uri="tiledb://TileDB-Inc/quickstart_sparse",
ranges=QueryRanges$new(
layout=Layout$new('row-major'),
ranges=list(cbind(1,2),cbind(1,4))
),
buffers=list("a")
)
result <- tiledbcloud::execute_multi_array_udf(
array_list=list(details1, details2),
registered_udf_name=registered_udf_name,
args=list(attrname="a"),
namespace=namespace_to_charge
)
print(result)
$len
[1] 19
$min
[1] 2
$med
[1] 10.5
$max
[1] 19
Unregister UDF
This is the same for generic
, single_array
, and multi_array
registered UDFs.
library(tiledbcloud)
namespace <- 'testuser'
udfname <- 'udf-registration-test-generic'
deregister_udf(namespace=namespace, name=udfname)