Comment on page
Basic Reading
To read either a dense or a sparse array, the user typically opens the array in read mode and provides a subarray, any subset of the attributes (potentially including the coordinates) and the layout to get the results into (see Reading for more details). You can read from an array as follows:
C
C++
Python
R
Java
Go
C#
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Open a 2D array for reading
tiledb_array_t* array;
tiledb_array_alloc(ctx, "<array-uri>", &array);
tiledb_array_open(ctx, array, TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
int subarray[] = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
int d1[20];
uint64_t d1_size = sizeof(d1);
int d2[20];
uint64_t d2_size = sizeof(d2);
int a[20];
uint64_t a_size = sizeof(a);
// Create query
tiledb_query_t* query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &query);
tiledb_query_set_subarray(ctx, query, subarray);
tiledb_query_set_layout(ctx, query, TILEDB_ROW_MAJOR);
tiledb_query_set_data_buffer(ctx, query, "a", a, &a_size);
tiledb_query_set_data_buffer(ctx, query, "d1", d1, &d1_size);
tiledb_query_set_data_buffer(ctx, query, "d2", d2, &d2_size);
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// NOTE: a_size, d1_size and d2_size now reflect the result size,
// i.e., TileDB changes those values so that you know how many
// results were retrieved (in bytes)
// Clean up
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
// Create TileDB context
Context ctx;
// Prepare the array for reading
Array array(ctx, "<array-uri>", TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
const std::vector<int> subarray = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
std::vector<int> d1(20);
std::vector<int> d2(20);
std::vector<int> a(20);
// Prepare the query
Query query(ctx, array, TILEDB_READ);
query.set_subarray(subarray)
.set_layout(TILEDB_ROW_MAJOR)
.set_data_buffer("a", a)
.set_data_buffer("d1", d1)
.set_data_buffer("d2", d2);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Close the array
array.close();
// Get the number of elements in the result vectors
auto d1_num = query.result_buffer_elements()["d1"].second;
auto d2_num = query.result_buffer_elements()["d2"].second;
auto a_num = query.result_buffer_elements()["a"].second;
# or, tiledb.open will return DenseArray or SparseArray as per schema
with tiledb.open(array_uri) as A:
# note that array indexes are half-open like NumPy
data = A[1:3, 2:5]
a = data['a']
d1 = data['d1']
d2 = data['d2']
# using `with` (context manager) ensure call to A.close()
d1_num = len(d1)
d2_num = len(d2)
a_num = len(a)
# to select only a single attribute, use the `query` method
# with `attrs` argument, which returns an indexable object
with tiledb.open(array_uri) as A:
q = A.query(attrs=('a',))
# indexing the Query object will only retrieve the
# selected attribute(s)
q[1:3, 2:5]
# If you wish to return the coordinate vectors as well
with tiledb.open(array_uri) as A:
q = A.query(attrs=('a',), coords=True)
q[1:3, 2:5]
# NOTE: Indexing of the query object follows numpy semantics,
# therefore, q[1:10] stands for range [1,9]
# Create a TileDB context
ctx <- tiledb_ctx()
# Open a dense array
A <- tiledb_array(uri = uridense)
# Or, open a sparse array
# A <- tiledb_array(uri = "<array-uri>", is.sparse = TRUE)
# Slice only rows 1, 2 and cols 2, 3, 4
a <- A[1:2, 2:4]
show(a)
# we can also read using lower-level code
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "READ")
## subarray of rows 1,2 and cols 2,3,4
subarr <- c(1L,2L, 2L,4L)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "ROW_MAJOR")
a <- integer(6) # reserve space
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", a)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
print(a) # unformed array, no coordinates
res <- tiledb:::libtiledb_array_close(arrptr)
// Create TileDB context and open the array
try(Context ctx = new Context(),
Array array = new Array(ctx, "<array-uri>", TILEDB_READ)) {
// Slice only rows 1, 2 and cols 2, 3, 4
NativeArray subarray = new NativeArray(ctx, new long[] {1, 2, 2, 4}, Integer.class);
// Prepare the query
Query query = new Query(ctx, array, TILEDB_READ);
// Prepare the vectors that will hold the results
query.setBuffer(
"d1", new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"d2", new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"a1", new NativeArray(ctx, 20, Integer.class));
query.setSubarray(subarray)
.setLayout(TILEDB_ROW_MAJOR);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Get the results in native java arrays
int[] d1 = (int[]) query.getBuffer("d1");
int[] d2 = (int[]) query.getBuffer("d2");
int[] a1 = (int[]) query.getBuffer("a1");
// Close the query
query.close();
}
// Create TileDB context
ctx, _ := tiledb.NewContext(nil)
// Open a 2D array for reading
array, _ := tiledb.NewArray(ctx, "<array-uri>")
array.Open(tiledb.TILEDB_READ)
// Slice only rows 1, 2 and cols 2, 3, 4
subArray := []int32{1, 2, 2, 4}
// Prepare the vectors that will hold the results
d1 := make([]int32, 20)
d2 := make([]int32, 20)
a := make([]int32, 20)
// Create query
query, _ := tiledb.NewQuery(ctx, array)
query.SetSubArray(subArray)
query.SetLayout(tiledb.TILEDB_ROW_MAJOR)
query.SetBuffer("a", a)
query.SetBuffer("d1", d1)
query.SetBuffer("d2", d2)
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
query.Submit()
// Close array
array.Close()
// NOTE: len(a)*size_of(int32), len(d1)*size_of(int32) and
// len(d2)*size_of(int32) now reflect the result size,
// i.e., TileDB changes those values so that you know how many
// results were retrieved (in bytes)
using System.Collections.Generic;
using TileDB.CSharp;
// Create TileDB context
using Context ctx = new Context();
// Prepare the array for reading
using Array array = new Array(ctx, "<array-uri>");
array.Open(QueryType.Read);
// Slice only rows 1, 2 and cols 2, 3, 4
using Subarray subarray = new Subarray(array);
subarray.SetSubarray(1, 2, 2, 4);
int[] d1 = new int[20];
int[] d2 = new int[20];
int[] a = new int[20];
using Query query = new Query(ctx, array, QueryType.Read);
query.SetSubarray(subarray);
query.SetLayout(LayoutType.RowMajor);
query.SetDataBuffer("a", a);
query.SetDataBuffer("d1", d1);
query.SetDataBuffer("d2", d2);
// Submit the query and close the array.
query.Submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been ColumnMajpr or GlobalOrder.
// Close the array
array.Close();
// Get the number of elements read to the buffers
ulong d1Num = query.GetResultDataElements("d1");
ulong d2Num = query.GetResultDataElements("d2");
ulong aNum = query.GetResultDataElements("a");
You can read variable-length attributes as follows:
C
C++
Python
R
Java
Go
C#
// ... create contect ctx
// ... create query
// You need two buffers per variable-length attribute
char b_val[100];
unsigned long long b_val_size = sizeof(b_val);
unsigned long long b_off[20];
unsigned long long b_off_size = sizeof(b_off);
// Set buffers for the variable-length attributes
tiledb_query_set_data_buffer(ctx, query, "b", b_val, &b_val_size);
tiledb_query_set_offsets_buffer(ctx, query, "b", b_off, &b_off_size);
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// NOTE: b_off_size and b_val_size now reflect the result size (in bytes)
// for the offsets and values of the results on this attribute,
// i.e., TileDB changes those values so that you know how many
// results were retrieved
// Clean up
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
std::vector<char> b_val(100);
std::vector<uint64_t> b_off(20);
// Set buffers for the variable-length attributes
query
.set_data_buffer(ctx, query, "b", b_val)
.set_offsets_buffer(ctx, query, "b", b_off);
// Get the number of elements in the result vectors
auto b_off_num = query.result_buffer_elements()["b"].first;
auto b_val_num = query.result_buffer_elements()["b"].second;
# Variable-length arrays may be sliced as usual in Python.
# The API handles unpacking and type conversion, and returns
# a NumPy object array-of-arrays.
# For example, given the var-length array created in the
# Writing Arrays section, the result will be returned as:
with tiledb.SparseArray(array_name) as A:
print(A[:][attr_name])
# Returns:
# array([array([1, 1], dtype=int32), array([2], dtype=int32),
# array([3, 3, 3], dtype=int32), array([4], dtype=int32)],
# dtype=object)
ctx <- tiledb_ctx()
arrptr <- tiledb:::libtiledb_array_open(ctx@ptr, uridensevar,
"READ")
subarr <- c(1L,4L, 1L,4L)
bufptr <-
tiledb:::libtiledb_query_buffer_var_char_alloc(arrptr, subarr,
"b", 16, 100)
qryptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr,
"ROW_MAJOR")
qryptr <-
tiledb:::libtiledb_query_set_buffer_var_char(qryptr, "b",
bufptr)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
expect_equal(tiledb:::libtiledb_query_status(qryptr), "COMPLETE")
tiledb:::libtiledb_array_close(arrptr)
mat <- tiledb:::libtiledb_query_get_buffer_var_char(bufptr)
print(mat, quote=FALSE)
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
query.setBuffer(
"b", new NativeArray(ctx, 100, Long.class),
new NativeArray(ctx, 20, String.class));
// Get the results in native java arrays
long[] b_offsets = (long[]) query.getVarBuffer("b");
byte[] b_data = (byte[]) query.getBuffer("b");
// Strings can be constructed by copying the bytes
String firstBString = new String(Arrays.copyOfRange(b_data, (int) b_offsets[0], b_offsets[1])
// ... create contect ctx
// ... create query
bufferElements, _ := query.EstimateBufferElements()
a1Off := make([]uint64, bufferElements["a1"][0])
a1Data := make([]byte, bufferElements["a1"][1]*rowsVariableLengthTileExtent)
query.SetLayout(tiledb.TILEDB_ROW_MAJOR)
query.SetBufferVar("a1", a1Off, a1Data)
// Submit the query
err = query.Submit()
elements, _ := query.ResultBufferElements()
// Get the string sizes
resultElA1Off := elements["a1"][0]
var a1StrSizes []uint64
for i := 0; i < int(resultElA1Off)-1; i++ {
a1StrSizes = append(a1StrSizes, a1Off[i+1]-a1Off[i])
}
resultA1DataSize := resultElMap["a1"][1] *
uint64(unsafe.Sizeof(byte(0)))
a1StrSizes = append(a1StrSizes,
resultA1DataSize-a1Off[resultElA1Off-1])
// Get the strings
a1Str := make([][]byte, resultElA1Off)
for i := 0; i < int(resultElA1Off); i++ {
a1Str[i] = make([]byte, 0)
for j := 0; j < int(a1StrSizes[i]); j++ {
a1Str[i] = append(a1Str[i], a1Data[a1Off[i]])
}
}
// Print the results
for i := 0; i < int(resultElA1Off); i++ {
fmt.Printf("a1: %s\n", string(a1Str[i]))
}
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
byte[] bValues = new byte[100];
ulong[] bOffsets = new ulong[20];
// Set buffers for the variable-length attributes
query.SetDataBuffer(bValues);
query.SetOffsetsBuffer(bOffsets);
query.Submit();
// Get the number of elements read to the buffers
ulong bOffsetsNum = query.GetResultOffsets("b");
ulong bNum = query.GetResultDataElements("b");
You can read fixed-length, nullable attributes as follows:
C
C++
Python
R
Java
Go
C#
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Open a 2D array for reading
tiledb_array_t* array;
tiledb_array_alloc(ctx, "<array-uri>", &array);
tiledb_array_open(ctx, array, TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
int subarray[] = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
int d1[20];
uint64_t d1_size = sizeof(d1);
int d2[20];
uint64_t d2_size = sizeof(d2);
int a[20];
uint64_t a_size = sizeof(a);
uint8_t a_validity[20];
uint64_t a_validity_size = sizeof(a_validity);
// Create query
tiledb_query_t* query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &query);
tiledb_query_set_subarray(ctx, query, subarray);
tiledb_query_set_layout(ctx, query, TILEDB_ROW_MAJOR);
tiledb_query_set_data_buffer(ctx, query, "d1", d1, &d1_size);
tiledb_query_set_data_buffer(ctx, query, "d2", d2, &d2_size);
tiledb_query_set_data_buffer(ctx, query, "a", a, &a_size);
tiledb_query_set_validity_buffer(
ctx, query, "a", a_validity, &a_validity_size);
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// NOTE: a_size, a_validity_size, d1_size and d2_size now reflect
// the result size, i.e., TileDB changes those values so that you
// know how many results were retrieved (in bytes)
// Clean up
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
// Create TileDB context
Context ctx;
// Prepare the array for reading
Array array(ctx, "<array-uri>", TILEDB_READ);
// Slice only rows 1, 2 and cols 2, 3, 4
const std::vector<int> subarray = {1, 2, 2, 4};
// Prepare the vectors that will hold the results
std::vector<int> d1(20);
std::vector<int> d2(20);
std::vector<int> a(20);
std::vector<uint8_t> a_validity(20);
// Prepare the query
Query query(ctx, array, TILEDB_READ);
query.set_subarray(subarray)
.set_layout(TILEDB_ROW_MAJOR)
.set_data_buffer("a", a)
.set_validity_buffer("a", a_validity)
.set_data_buffer("d1", d1)
.set_data_buffer("d2", d2);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Close the array
array.close();
// Get the number of elements in the result vectors
auto d1_num = query.result_buffer_elements()["d1"].second;
auto d2_num = query.result_buffer_elements()["d2"].second;
auto a_num = std::get<1>(query.result_buffer_elements_nullable()["a"]);
auto a_validity_num = std::get<2>(query.result_buffer_elements_nullable()["a"]);
# TODO
## TODO
// Create array and query
try (Array array = new Array(ctx, arrayURI, TILEDB_READ);
ArraySchema schema = array.getSchema();
Query query = new Query(array, TILEDB_READ)) {
// Fetch all cells
query.addRange(0, 1, 2);
query.addRange(1, 1, 2);
query.setLayout(TILEDB_ROW_MAJOR);
//create buffers for the query
NativeArray dim1Array = new NativeArray(ctx, 100, Integer.class);
NativeArray dim2Array = new NativeArray(ctx, 100, Integer.class);
NativeArray a1Array = new NativeArray(ctx, 100, Character.class);
NativeArray a1byteMap = new NativeArray(ctx, 100, Datatype.TILEDB_UINT8);
NativeArray a2Array = new NativeArray(ctx, 100, Float.class);
NativeArray a2byteMap = new NativeArray(ctx, 100, Datatype.TILEDB_UINT8);
//set buffers
query.setBuffer("rows", dim1Array);
query.setBuffer("cols", dim2Array);
query.setBufferNullable("a1", a1Array, a1byteMap);
query.setBufferNullable("a2", a2Array, a2byteMap);
Pair<Long, Long> estimated = query.getEstResultSizeNullable(ctx, "a1");
Assert.assertEquals((long) estimated.getFirst(), 4);
Assert.assertEquals((long) estimated.getSecond(), 4);
// Submit query
query.submit();
HashMap<String, Pair<Long, Long>> resultElements = query.resultBufferElements();
//get the populated buffers after query submission
int[] dim1 = (int[]) query.getBuffer("rows");
int[] dim2 = (int[]) query.getBuffer("cols");
byte[] a1 = (byte[]) query.getBuffer("a1");
float[] a2 = (float[]) query.getBuffer("a2");
//get the validity buffers
short[] a1ValidityByteMap = query.getValidityByteMap("a1");
short[] a2ValidityByteMap = query.getValidityByteMap("a2");
}
// TODO
using TileDB.CSharp;
// Create TileDB context
using Context ctx = new Context();
// Prepare the array for reading
using Array array = new Array(ctx, "<array-uri>");
array.Open(QueryType.Read);
// Slice only rows 1, 2 and cols 2, 3, 4
using Subarray subarray = new Subarray(array);
subarray.SetSubarray(1, 2, 2, 4);
int[] d1 = new int[20];
int[] d2 = new int[20];
int[] a = new int[20];
byte[] aValidity = new byte[20];
using Query query = new Query(ctx, array, QueryType.Read);
query.SetSubarray(subarray);
query.SetLayout(LayoutType.RowMajor);
query.SetDataBuffer("a", a);
query.SetValidityBuffer("a", aValidity);
query.SetDataBuffer("d1", d1);
query.SetDataBuffer("d2", d2);
// Submit the query and close the array.
query.Submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been ColumnMajor or GlobalOrder.
// Close the array
array.Close();
// Get the number of elements read to the buffers
ulong d1Num = query.GetResultDataElements("d1");
ulong d2Num = query.GetResultDataElements("d2");
ulong aNum = query.GetResultDataElements("a");
ulong aValidityNum = query.GetResultValidities("a");
You can read variable-length, nullable attributes as follows:
C
C++
Python
R
Java
Go
C#
// ... create contect ctx
// ... create query
// You need three buffers per variable-length, nullable attribute
char b_val[100];
unsigned long long b_val_size = sizeof(b_val);
unsigned long long b_off[20];
unsigned long long b_off_size = sizeof(b_off);
uint8_t b_validity[20];
unsigned long long b_validity_size = sizeof(b_validity);
// Set buffers for the variable-length, nullable attribute
tiledb_query_set_data_buffer(ctx, query, "b", b_val, &b_val_size);
tiledb_query_set_offsets_buffer(ctx, query, "b", b_off, &b_off_size);
tiledb_query_set_validity_buffer(
ctx, query, "b", b_validity, &b_validity_size);
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// NOTE: b_off_size, b_val_size, and b_validity_size now reflect
// the result size (in bytes) for the offsets, data values, and validity
// values of the results on this attribute, i.e., TileDB changes those
// values so that you know how many results were retrieved
// Clean up
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
std::vector<char> b_val(100);
std::vector<uint64_t> b_off(20);
std::vector<uint8_t> b_validity(20);
// Set buffers for the variable-length attributes
query.set_buffer(ctx, query, "b", b_off, b_val, b_validity);
// Get the number of elements in the result vectors
auto b_off_num = std::get<0>(query.result_buffer_elements_nullable()["b"]);
auto b_val_num = std::get<1>(query.result_buffer_elements_nullable()["b"]);
auto b_validity_num = std::get<2>(query.result_buffer_elements_nullable()["b"]);
# TODO
## TODO
// TODO
// TODO
// ... create contect ctx
// ... create query
// You need two buffer per variable-length attribute
byte[] b = new byte[100];
ulong[] bOffsets = new ulong[20];
byte[] bValidity = new byte[20];
// Set buffers for the variable-length attributes
query.SetDataBuffer("b", b);
query.SetOffsetsBuffer("b", bOffsets);
query.SetValidityBuffer("b", bValidity);
// Get the number of elements read to the buffers
ulong bNum = query.GetResultDataElements("b");
ulong bOffsetsNum = query.GetResultOffsets("b");
ulong bValiditiesNum = query.GetResultValidities("b");
You can get the non-empty domain of an array as follows:
C
C++
Python
R
Java
Go
C#
// ... open array for reading
// Get non-empty domain for a dimension based on its index
int dom[2];
int is_empty;
tiledb_array_get_non_empty_domain_from_index(ctx, array, 0, domain, &is_empty);
// Or by name
tiledb_array_get_non_empty_domain_from_name(ctx, array, "dim", domain, &is_empty);
// For string dimensions, we need to first get the size of the
// start and end of the domain range, using the dimension index
unsigned long long start_size, end_size;
tiledb_array_get_non_empty_domain_var_size_from_index(
ctx, array, 0, &start_size, &end_size, &