Basic Reading

To read either a dense or a sparse array, the user typically opens the array in read mode and provides a subarray, any subset of the attributes (potentially including the coordinates) and the layout to get the results into (see Reading for more details). You can read from an array as follows:

C
C++
Python
R
Java
Go
C
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Open a 2D array for reading
tiledb_array_t* array;
tiledb_array_alloc(ctx, "<array-uri>", &array);
tiledb_array_open(ctx, array, TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
int subarray[] = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
int d1[20];
uint64_t d1_size = sizeof(d1);
int d2[20];
uint64_t d2_size = sizeof(d2);
int a[20];
uint64_t a_size = sizeof(a);
// Create query
tiledb_query_t* query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &query);
tiledb_query_set_subarray(ctx, query, subarray);
tiledb_query_set_layout(ctx, query, TILEDB_ROW_MAJOR);
tiledb_query_set_buffer(ctx, query, "a", a, &a_size);
tiledb_query_set_buffer(ctx, query, "d1", d1, &d1_size);
tiledb_query_set_buffer(ctx, query, "d2", d2, &d2_size);
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// NOTE: a_size, d1_size and d2_size now reflect the result size,
// i.e., TileDB changes those values so that you know how many
// results were retrieved (in bytes)
// Clean up
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
C++
// Create TileDB context
Context ctx;
// Prepare the array for reading
Array array(ctx, "<array-uri>", TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
const std::vector<int> subarray = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
std::vector<int> d1(20);
std::vector<int> d2(20);
std::vector<int> a(20);
// Prepare the query
Query query(ctx, array, TILEDB_READ);
query.set_subarray(subarray)
.set_layout(TILEDB_ROW_MAJOR)
.set_buffer("a", a)
.set_buffer("d1", d1)
.set_buffer("d2", d2);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Close the array
array.close();
// Get the number of elements in the result vectors
auto d1_num = query.result_buffer_elements()["d1"].second;
auto d2_num = query.result_buffer_elements()["d2"].second;
auto a_num = query.result_buffer_elements()["a"].second;
Python
# or, tiledb.open will return DenseArray or SparseArray as per schema
with tiledb.open(array_uri) as A:
# note that array indexes are half-open like NumPy
data = A[1:3, 2:5]
a = data['a']
d1 = data['d1']
d2 = data['d2']
# using `with` (context manager) ensure call to A.close()
d1_num = len(d1)
d2_num = len(d2)
a_num = len(a)
# to select only a single attribute, use the `query` method
# with `attrs` argument, which returns an indexable object
with tiledb.open(array_uri) as A:
q = A.query(attrs=('a',))
# indexing the Query object will only retrieve the
# selected attribute(s)
q[1:3, 2:5]
# If you wish to return the coordinate vectors as well
with tiledb.open(array_uri) as A:
q = A.query(attrs=('a',), coords=True)
q[1:3, 2:5]
# NOTE: Indexing of the query object follows numpy semantics,
# therefore, q[1:10] stands for range [1,9]
R
# Create a TileDB context
ctx <- tiledb_ctx()
# Open a dense array
A <- tiledb_array(uri = uridense)
# Or, open a sparse array
# A <- tiledb_array(uri = "<array-uri>", is.sparse = TRUE)
# Slice only rows 1, 2 and cols 2, 3, 4
a <- A[1:2, 2:4]
show(a)
# we can also read using lower-level code
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "READ")
## subarray of rows 1,2 and cols 2,3,4
subarr <- c(1L,2L, 2L,4L)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "ROW_MAJOR")
a <- integer(6) # reserve space
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", a)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
print(a) # unformed array, no coordinates
res <- tiledb:::libtiledb_array_close(arrptr)
Java
// Create TileDB context and open the array
try(Context ctx = new Context(),
Array array = new Array(ctx, "<array-uri>", TILEDB_READ)) {
// Slice only rows 1, 2 and cols 2, 3, 4
NativeArray subarray = new NativeArray(ctx, new long[] {1, 2, 2, 4}, Integer.class);
// Prepare the query
Query query = new Query(ctx, array, TILEDB_READ);
// Prepare the vectors that will hold the results
query.setBuffer(
"d1", new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"d2", new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"a1", new NativeArray(ctx, 20, Integer.class));
query.setSubarray(subarray)
.setLayout(TILEDB_ROW_MAJOR);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Get the results in native java arrays
int[] d1 = (int[]) query.getBuffer("d1");
int[] d2 = (int[]) query.getBuffer("d2");
int[] a1 = (int[]) query.getBuffer("a1");
// Close the query
query.close();
}
Go
// Create TileDB context
ctx, _ := tiledb.NewContext(nil)
// Open a 2D array for reading
array, _ := tiledb.NewArray(ctx, "<array-uri>")
array.Open(tiledb.TILEDB_READ)
// Slice only rows 1, 2 and cols 2, 3, 4
subArray := []int32{1, 2, 2, 4}
// Prepare the vectors that will hold the results
d1 := make([]int32, 20)
d2 := make([]int32, 20)
a := make([]int32, 20)
// Create query
query, _ := tiledb.NewQuery(ctx, array)
query.SetSubArray(subArray)
query.SetLayout(tiledb.TILEDB_ROW_MAJOR)
query.SetBuffer("a", a)
query.SetBuffer("d1", d1)
query.SetBuffer("d2", d2)
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
query.Submit()
// Close array
array.Close()
// NOTE: len(a)*size_of(int32), len(d1)*size_of(int32) and
// len(d2)*size_of(int32) now reflect the result size,
// i.e., TileDB changes those values so that you know how many
// results were retrieved (in bytes)

Variable-length Attributes

You can read variable-length attributes as follows:

C
C++
Python
R
Java
Go
C
// ... create contect ctx
// ... create query
// You need two buffers per variable-length attribute
char b_val[100];
unsigned long long b_val_size = sizeof(b_val);
unsigned long long b_off[20];
unsigned long long b_off_size = sizeof(b_off);
// Set buffers for the variable-length attributes
tiledb_query_set_buffer_var(ctx, query, "b", b_off, &b_off_size, b_val, &b_val_size);
// NOTE: b_off_size and b_val_size now reflect the result size (in bytes)
// for the offsets and values of the results on this attribute,
// i.e., TileDB changes those values so that you know how many
// results were retrieved
C++
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
std::vector<char> b_val(100);
std::vector<uint64_t> b_off(20);
// Set buffers for the variable-length attributes
query.set_buffer(ctx, query, "b", b_off, b_val);
// Get the number of elements in the result vectors
auto b_off_num = query.result_buffer_elements()["b"].first;
auto b_val_num = query.result_buffer_elements()["b"].second;
Python
# Variable-length arrays may be sliced as usual in Python.
# The API handles unpacking and type conversion, and returns
# a NumPy object array-of-arrays.
# For example, given the var-length array created in the
# Writing Arrays section, the result will be returned as:
with tiledb.SparseArray(array_name) as A:
print(A[:][attr_name])
# Returns:
# array([array([1, 1], dtype=int32), array([2], dtype=int32),
# array([3, 3, 3], dtype=int32), array([4], dtype=int32)],
# dtype=object)
R
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridensevar,
"READ")
subarr <- c(1L,4L, 1L,4L)
bufptr <-
tiledb:::libtiledb_query_buffer_var_char_alloc(arrptr, subarr,
"b", 16, 100)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr,
"ROW_MAJOR")
qryptr <-
tiledb:::libtiledb_query_set_buffer_var_char(qryptr, "b",
bufptr)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
expect_equal(tiledb:::libtiledb_query_status(qryptr), "COMPLETE")
tiledb:::libtiledb_array_close(arrptr)
mat <- tiledb:::libtiledb_query_get_buffer_var_char(bufptr)
print(mat, quote=FALSE)
Java
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
query.setBuffer(
"b", new NativeArray(ctx, 100, Long.class),
new NativeArray(ctx, 20, String.class));
// Get the results in native java arrays
long[] b_offsets = (long[]) query.getVarBuffer("b");
byte[] b_data = (byte[]) query.getBuffer("b");
// Strings can be constructed by copying the bytes
String firstBString = new String(Arrays.copyOfRange(b_data, (int) b_offsets[0], b_offsets[1])
Go
// ... create contect ctx
// ... create query
bufferElements, _ := query.EstimateBufferElements()
a1Off := make([]uint64, bufferElements["a1"][0])
a1Data := make([]byte, bufferElements["a1"][1]*rowsVariableLengthTileExtent)
query.SetLayout(tiledb.TILEDB_ROW_MAJOR)
query.SetBufferVar("a1", a1Off, a1Data)
// Submit the query
err = query.Submit()
elements, _ := query.ResultBufferElements()
// Get the string sizes
resultElA1Off := elements["a1"][0]
var a1StrSizes []uint64
for i := 0; i < int(resultElA1Off)-1; i++ {
a1StrSizes = append(a1StrSizes, a1Off[i+1]-a1Off[i])
}
resultA1DataSize := resultElMap["a1"][1] *
uint64(unsafe.Sizeof(byte(0)))
a1StrSizes = append(a1StrSizes,
resultA1DataSize-a1Off[resultElA1Off-1])
// Get the strings
a1Str := make([][]byte, resultElA1Off)
for i := 0; i < int(resultElA1Off); i++ {
a1Str[i] = make([]byte, 0)
for j := 0; j < int(a1StrSizes[i]); j++ {
a1Str[i] = append(a1Str[i], a1Data[a1Off[i]])
}
}
// Print the results
for i := 0; i < int(resultElA1Off); i++ {
fmt.Printf("a1: %s\n", string(a1Str[i]))
}

Fixed-length, Nullable Attributes

You can read fixed-length, nullable attributes as follows:

C
C++
Python
R
Java
Go
C
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Open a 2D array for reading
tiledb_array_t* array;
tiledb_array_alloc(ctx, "<array-uri>", &array);
tiledb_array_open(ctx, array, TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
int subarray[] = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
int d1[20];
uint64_t d1_size = sizeof(d1);
int d2[20];
uint64_t d2_size = sizeof(d2);
int a[20];
uint64_t a_size = sizeof(a);
uint8_t a_validity[20];
uint64_t a_validity_size = sizeof(a_validity);
// Create query
tiledb_query_t* query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &query);
tiledb_query_set_subarray(ctx, query, subarray);
tiledb_query_set_layout(ctx, query, TILEDB_ROW_MAJOR);
tiledb_query_set_buffer(ctx, query, "d1", d1, &d1_size);
tiledb_query_set_buffer(ctx, query, "d2", d2, &d2_size);
tiledb_query_set_buffer_nullable(
ctx, query, "a", a, &a_size, a_validity, &a_validity_size);
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// NOTE: a_size, a_validity_size, d1_size and d2_size now reflect
// the result size, i.e., TileDB changes those values so that you
// know how many results were retrieved (in bytes)
// Clean up
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
C++
// Create TileDB context
Context ctx;
// Prepare the array for reading
Array array(ctx, "<array-uri>", TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
const std::vector<int> subarray = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
std::vector<int> d1(20);
std::vector<int> d2(20);
std::vector<int> a(20);
std::vector<uint8_t> a_validity(20);
// Prepare the query
Query query(ctx, array, TILEDB_READ);
query.set_subarray(subarray)
.set_layout(TILEDB_ROW_MAJOR)
.set_buffer_nullable("a", a, a_validity)
.set_buffer("d1", d1)
.set_buffer("d2", d2);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Close the array
array.close();
// Get the number of elements in the result vectors
auto d1_num = query.result_buffer_elements()["d1"].second;
auto d2_num = query.result_buffer_elements()["d2"].second;
auto a_num = std::get<1>(query.result_buffer_elements_nullable()["a"]);
auto a_validity_num = std::get<2>(query.result_buffer_elements_nullable()["a"]);
Python
# TODO
R
## TODO
Java
// TODO
Go
// TODO

Variable-length, Nullable Attributes

You can read variable-length, nullable attributes as follows:

C
C++
Python
R
Java
Go
C
// ... create contect ctx
// ... create query
// You need three buffers per variable-length, nullable attribute
char b_val[100];
unsigned long long b_val_size = sizeof(b_val);
unsigned long long b_off[20];
unsigned long long b_off_size = sizeof(b_off);
uint8_t b_validity[20];
unsigned long long b_validity_size = sizeof(b_validity);
// Set buffers for the variable-length, nullable attribute
tiledb_query_set_buffer_var_nullable(
ctx,
query,
"b",
b_off,
&b_off_size,
b_val,
&b_val_size,
b_validity,
&b_validity_size);
// NOTE: b_off_size, b_val_size, and b_validity_size now reflect
// the result size (in bytes) for the offsets, data values, and validity
// values of the results on this attribute, i.e., TileDB changes those
// values so that you know how many results were retrieved
C++
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
std::vector<char> b_val(100);
std::vector<uint64_t> b_off(20);
std::vector<uint8_t> b_validity(20);
// Set buffers for the variable-length attributes
query.set_buffer(ctx, query, "b", b_off, b_val, b_validity);
// Get the number of elements in the result vectors
auto b_off_num = std::get<0>(query.result_buffer_elements_nullable()["b"]);
auto b_val_num = std::get<1>(query.result_buffer_elements_nullable()["b"]);
auto b_validity_num = std::get<2>(query.result_buffer_elements_nullable()["b"]);
Python
# TODO
R
## TODO
Java
// TODO
Go
// TODO

Getting the Non-empty Domain

You can get the non-empty domain of an array as follows:

C
C++
Python
R
Java
Go
C
// ... open array for reading
// Get non-empty domain for a dimension based on its index
int dom[2];
int is_empty;
tiledb_array_get_non_empty_domain_from_index(ctx, array, 0, domain, &is_empty);
// Or by name
tiledb_array_get_non_empty_domain_from_name(ctx, array, "dim", domain, &is_empty);
// For string dimensions, we need to first get the size of the
// start and end of the domain range, using the dimension index
unsigned long long start_size, end_size;
tiledb_array_get_non_empty_domain_var_size_from_index(
ctx, array, 0, &start_size, &end_size, &is_empty);
// Or by dimension name
tiledb_array_get_non_empty_domain_var_size_from_name(
ctx, array, "dim", &start_size, &end_size, &is_empty);
// Then we can allocate appropriately strings that will hold the start and end
char start[start_size];
char end[end_size];
tiledb_array_get_non_empty_domain_var_from_index(
ctx, array, 0, start, end, &is_empty);
// Or by dimension name
tiledb_array_get_non_empty_domain_var_from_name(
ctx, array, "dim", start, end, &is_empty);
C++
// ... open array for reading
// Returns a pair for the [start, end] of the non-empty domain of
// the dimension with the given index
auto non_empty_domain = array.non_empty_domain<int>(0);
// Or by name
auto non_empty_domain = array.non_empty_domain<int>("dim");
// For var-sized dimensions, the following returns a pair of strings
auto non_empty_domain = array.non_empty_domain_var(0);
// Or by name
auto non_empty_domain = array.non_empty_domain_var("dim");
Python
# ... open `array` for reading
# returns a tuple of the non-empty domain for each
# dimension:
non_empty_domain = array.nonempty_domain()
R
# example with one fixed- and one variable-sized domain
dom <- tiledb_domain(dims = c(tiledb_dim("d1", c(1L, 4L), 4L, "INT32"),
tiledb_dim("d2", NULL, NULL, "ASCII")))
# ... add attribute(s), write content, ...
# ... arr is the array opened
# retrieve non-empty domain for fixed-sized dimension
tiledb_array_get_non_empty_domain_from_index(arr, 1)
tiledb_array_get_non_empty_domain_from_name(arr, "d1")
# retrieve non-empty domain for variable-sized dimension
tiledb_array_get_non_empty_domain_from_index(arr, 2)
tiledb_array_get_non_empty_domain_from_name(arr, "d2")
Java
// ... open array for reading
// Returns a pair for the [start, end] of the non-empty domain of
// the dimension with the given index
Pair<Object, Object> nonEmptyDomain = array.getNonEmptyDomainFromIndex(0);
// Or by name
Pair<Object, Object> nonEmptyDomain = array.getNonEmptyDomainFromName("dim");
// For var-sized dimensions, the following returns a pair of strings
Pair<String, String> nonEmptyDomain = array.getNonEmptyDomainVarFromIndex(0);
// Or by name
Pair<Object, Object> non_empty_domain = array.getNonEmptyDomainVarFromName("dim");
array.close();
Go
// ... open array for reading
// Contains the non empty dimension bounds, by index
nonEmptyDomainFromIndex, isEmpty, _ := array.NonEmptyDomainFromIndex(0)
// Or by name
nonEmptyDomainFromName, isEmpty, _ := array.NonEmptyDomainFromName("dim")
// For var-sized dimensions, contains the non empty dimension bounds, by index
nonEmptyDomainVarFromIndex, isEmpty, _ := array.NonEmptyDomainVarFromIndex(0)
// Or by name
nonEmptyDomainVarFromName, isEmpty, _ := array.NonEmptyDomainVarFromName("dim")

Reopening Arrays

Assuming an already open array, you can reopen the array at the current timestamp. This is useful when potential writes happened since you last opened the array, and you wish to reopen it to get the most up-to-date view of the array. Also note that this is more efficient than closing and opening the array, as it will prevent refetching already loaded fragment metadata. You can reopen an array as follows:

C
C++
Python
R
Java
Go
C
// ... create context ctx
// ... open an array for reading
tiledb_array_reopen(ctx, array);
C++
// ... open an array for reading
array.reopen();
Python
# ... create context ctx
# ... open an array for reading
array.reopen()
# optionally, specify a timestamp:
# array.reopen(timestamp=...)
R
# Arrays are reopened automatically for you based on
# read or write being performed. For direct pointer-based
# access you can also explicitly reopen
arr@ptr <- tiledb:::libtiledb_array_reopen(arr@ptr)
Java
// ... open an array for reading
array.reopen();
Go
// ... open an array for reading
array.Reopen()

Slicing Negative Domains

You can slice negative domains in Python as follows:

Python
Python
# NOTE: In `multi_index`, all ranges are inclusive
with tiledb.SparseArray(path) as A:
print(A.multi_index[-3:3])