Basic Reading

To read either a dense or a sparse array, the user typically opens the array in read mode and provides a subarray, any subset of the attributes (potentially including the coordinates) and the layout to get the results into (see Reading for more details). You can read from an array as follows:

C
C++
Python
R
Java
Go
C
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Open a 2D array for reading
tiledb_array_t* array;
tiledb_array_alloc(ctx, "<array-uri>", &array);
tiledb_array_open(ctx, array, TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
int subarray[] = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
int coords[20];
uint64_t coords_size = sizeof(coords);
int a[20];
uint64_t a_size = sizeof(a);
// Create query
tiledb_query_t* query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &query);
tiledb_query_set_subarray(ctx, query, subarray);
tiledb_query_set_layout(ctx, query, TILEDB_ROW_MAJOR);
tiledb_query_set_buffer(ctx, query, "a", a, &a_size);
tiledb_query_set_buffer(ctx, query, TILEDB_COORDS, coords, &coords_size);
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// NOTE: a_size and coords_size now reflect the result size,
// i.e., TileDB changes those values so that you know how many
// results were retrieved (in bytes)
// Clean up
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
C++
// Create TileDB context
Context ctx;
// Prepare the array for reading
Array array(ctx, "<array-uri>", TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
const std::vector<int> subarray = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
std::vector<int> coords(20);
std::vector<int> a(20);
// Prepare the query
Query query(ctx, array, TILEDB_READ);
query.set_subarray(subarray)
.set_layout(TILEDB_ROW_MAJOR)
.set_buffer("a", a)
.set_coordinates(coords);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Close the array
array.close();
// Get the number of elements in the result vectors
auto coords_num = query.result_buffer_elements()[TILEDB_COORDS].second;
auto a_num = query.result_buffer_elements()["a"].second;
Python
# with tiledb.DenseArray(array_uri) as A:
# or, with tiledb.SparseArray(array_uri) as A:
# or, tiledb.open will return DenseArray or SparseArray as per schema
with tiledb.open(array_uri) as A:
# note that array indexes are half-open like NumPy
data = A[1:3, 2:5]
a = data['a']
coords = data['coords']
# using `with` (context manager) ensure call to A.close()
coords_num = len(coords)
a_num = len(a)
# to select only a single attribute, use the `query` method
# with `attrs` argument, which returns an indexable object
with tiledb.open(array_uri) as A:
q = A.query(attrs=('a',))
# indexing the Query object will only retrieve the
# selected attribute(s)
q[1:3, 2:5]
R
# Create a TileDB context
ctx <- tiledb_ctx()
# Open a dense array
A <- tiledb_dense(uri = "<array-uri>", ctx=ctx)
# Or, open a sparse array
# A <- tiledb_sparse(uri = "<array-uri>", ctx=ctx)
# Slice only rows 1, 2 and cols 2, 3, 4
data <- A[1:2, 2:4]
show(data)
Java
// Create TileDB context and open the array
try(Context ctx = new Context(),
Array array = new Array(ctx, "<array-uri>", TILEDB_READ)) {
// Slice only rows 1, 2 and cols 2, 3, 4
NativeArray subarray = new NativeArray(ctx, new long[] {1, 2, 2, 4}, Integer.class);
// Prepare the query
Query query = new Query(ctx, array, TILEDB_READ);
// Prepare the vectors that will hold the results
query.setCoordinates(
new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"a1", new NativeArray(ctx, 20, Integer.class));
query.setSubarray(subarray)
.setLayout(TILEDB_ROW_MAJOR);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Get the results in native java arrays
int[] coords = (int[]) query.getBuffer(TILEDB_COORDS);
int[] a1 = (int[]) query.getBuffer("a1");
// Close the query
query.close();
}
Go
// Create TileDB context
ctx, _ := tiledb.NewContext(nil)
// Open a 2D array for reading
array, _ := tiledb.NewArray(ctx, "<array-uri>")
array.Open(tiledb.TILEDB_READ)
// Slice only rows 1, 2 and cols 2, 3, 4
subArray := []int32{1, 2, 2, 4}
// Prepare the vectors that will hold the results
maxElements, _ := array.MaxBufferElements(subArray)
data := make([]uint32, maxElements["a"][1])
coords := make([]int32, maxElements[tiledb.TILEDB_COORDS][1])
// Create query
query, _ := tiledb.NewQuery(ctx, array)
query.SetSubArray(subArray)
query.SetLayout(tiledb.TILEDB_ROW_MAJOR)
query.SetBuffer("a", data)
query.SetCoordinates(coords)
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
query.Submit()
// Close array
array.Close()
// NOTE: len(data)*size_of(int32) and len(coords)*size_of(int32)
// now reflect the result size,
// i.e., TileDB changes those values so that you know how many
// results were retrieved (in bytes), MaxBufferElements uses
// this behaviour to return number of elements per datatype

Variable-length Attributes

You can read variable-length attributes as follows:

C
C++
Python
R
Java
Go
C
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
char b_val[100];
unsigned long long b_val_size = sizeof(b_val);
unsigned long long b_off[20];
unsigned long long b_off_size = sizeof(b_off);
// Set buffers for the variable-length attributes
tiledb_query_set_buffer_var(ctx, query, "b", b_off, &b_off_size, b_val, &b_val_size);
// NOTE: b_off_size and b_val_size now reflect the result size (in bytes)
// for the offsets and values of the results on this attribute,
// i.e., TileDB changes those values so that you know how many
// results were retrieved
C++
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
std::vector<char> b_val(100);
std::vector<uint64_t> b_off(20);
// Set buffers for the variable-length attributes
query.set_buffer(ctx, query, "b", b_off, b_val);
// Get the number of elements in the result vectors
auto b_off_num = query.result_buffer_elements()["b"].first;
auto b_val_num = query.result_buffer_elements()["b"].second;
Python
# Variable-length arrays may be sliced as usual in Python.
# The API handles unpacking and type conversion, and returns
# a NumPy object array-of-arrays.
# For example, given the var-length array created in the
# Writing Arrays section, the result will be returned as:
with tiledb.SparseArray(array_name) as A:
print(A[:][attr_name])
# Returns:
# array([array([1, 1], dtype=int32), array([2], dtype=int32),
# array([3, 3, 3], dtype=int32), array([4], dtype=int32)],
# dtype=object)
R
# TODO: R does not support variable length attributes currently
Java
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
query.setBuffer(
"b", new NativeArray(ctx, 100, Long.class),
new NativeArray(ctx, 20, String.class));
// Get the results in native java arrays
long[] b_offsets = (long[]) query.getVarBuffer("b");
byte[] b_data = (byte[]) query.getBuffer("b");
// Strings can be constructed by copying the bytes
String firstBString = new String(Arrays.copyOfRange(b_data, (int) b_offsets[0], b_offsets[1])
Go
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
std::vector<char> b_val(100);
maxElMap, err := array.MaxBufferElements(subArray)
bOff := make([]uint64, maxElMap["b"][0])
bData := make([]byte, maxElMap["b"][1])
uery.SetLayout(tiledb.TILEDB_ROW_MAJOR)
// Set buffers for the variable-length attributes
query.SetBufferVar("b", a1Off, a1Data)
// Submit the query
query.Submit()
// Get the number of elements in the result vectors
// Get the number of elements per cell value
var bElOff []uint64
resultElbOff := maxElMap["b"][0]
for i := 0; i < int(resultElbOff); i++ {
ElOff = append(bElOff, bOff[i]/uint64(unsafe.Sizeof(int32(0))))
}
var bCellEl []uint64
for i := 0; i < int(resultElbOff)-1; i++ {
bCellEl = append(bCellEl, bElOff[i+1]-bElOff[i])
}
resultElbData := maxElMap["b"][1]
bCellEl = append(bCellEl, resultElbData-bElOff[len(bElOff)-1])
// Close the array
array.Close()

Getting the Non-empty Domain

You can get the non-empty domain of an array as follows:

C
C++
Python
R
Java
Go
C
int is_empty;
uint64_t domain[4]; // Assuming a 2D array, two [start, end] pairs
tiledb_array_get_non_empty_domain(ctx, array, domain, &is_empty);
C++
// ... open array for reading
auto non_empty_domain = array.non_empty_domain<int>();
// non_empty_domain is a vector containing the non-empty
// domain on each dimension, i.e., it contains elements
// of the form (<dim-name>, (start, end))
Python
# ... open `array` for reading
# returns a tuple of the non-empty domain for each
# dimension:
non_empty_domain = array.nonempty_domain()
R
# TODO: Non empty domain is not supported in R
Java
// ... open array for reading
HashMap<String, Pair> nonEmptyDomain = array.nonEmptyDomain();
// A HashMap of dimension names and (lower, upper) inclusive
// bounding coordinate range pair.
// Empty HashMap if the array has no data.
Go
// ... open array for reading
nonEmptyDomain, isEmpty, err := array.NonEmptyDomain()

Reopening Arrays

Assuming an already open array, you can reopen the array at the current timestamp. This is useful when potential writes happened since you last opened the array, and you wish to reopen it to get the most up-to-date view of the array. Also note that this is more efficient than closing and opening the array, as it will prevent refetching already loaded fragment metadata. You can reopen an array as follows:

C
C++
Python
R
Java
Go
C
// ... create context ctx
// ... open an array for reading
tiledb_array_reopen(ctx, array);
C++
// ... open an array for reading
array.reopen();
Python
# ... create context ctx
# ... open an array for reading
array.reopen()
# optionally, specify a timestamp:
# array.reopen(timestamp=...)
R
# Arrays are reopened automatically for you based on
# read or write being performed
Java
// TODO: reopening an array is not supported in java
Go
// ... open an array for reading
array.Reopen()

Slicing Negative Domains

You can slice negative domains in Python as follows:

Python
Python
# NOTE: In `domain_index`, all ranges are inclusive
with tiledb.SparseArray(path) as A:
print(A.domain_index[-3:3])