Virtual Filesystem

TileDB is designed such that all IO to/from the storage backends is abstracted behind a Virtual Filesystem (VFS) module. This module supports simple operations, such as creating a file/directory, reading/writing to a file, etc. This abstraction enables us to easily plug in more storage backends in the future, effectively making the storage backend opaque to the user.

A nice positive “by-product” of this architecture is that it is possible to expose the basic virtual filesystem functionality via the TileDB APIs. This provides a simplified interface for file IO and directory management (i.e., not related to TileDB objects such as array) on all the storage backends that TileDB supports.

This page covers most of the TileDB VFS functionality.

Writing

C
C++
Python
R
Java
Go
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Create TileDB VFS
tiledb_vfs_t* vfs;
tiledb_vfs_alloc(ctx, NULL, &vfs);
// Write binary data
tiledb_vfs_fh_t* fh;
tiledb_vfs_open(ctx, vfs, "tiledb_vfs.bin", TILEDB_VFS_WRITE, &fh);
float f1 = 153.0;
const char* s1 = "abcd";
tiledb_vfs_write(ctx, fh, &f1, sizeof(float));
tiledb_vfs_write(ctx, fh, s1, strlen(s1));
tiledb_vfs_close(ctx, fh);
tiledb_vfs_fh_free(&fh);
// Write binary data again - this will overwrite the previous file
tiledb_vfs_open(ctx, vfs, "tiledb_vfs.bin", TILEDB_VFS_WRITE, &fh);
const char* s2 = "abcdef";
f1 = 153.1;
tiledb_vfs_write(ctx, fh, &f1, sizeof(float));
tiledb_vfs_write(ctx, fh, s2, strlen(s2));
tiledb_vfs_close(ctx, fh);
tiledb_vfs_fh_free(&fh);
// Append binary data to existing file
tiledb_vfs_open(ctx, vfs, "tiledb_vfs.bin", TILEDB_VFS_APPEND, &fh);
const char* s3 = "ghijkl";
tiledb_vfs_write(ctx, fh, s3, strlen(s3));
tiledb_vfs_close(ctx, fh);
tiledb_vfs_fh_free(&fh);
// Clean up
tiledb_vfs_free(&vfs);
tiledb_ctx_free(&ctx);
// Create TileDB context
Context ctx;
// Create TileDB VFS
VFS vfs(ctx);
// Create VFS file buffer
VFS::filebuf fbuf(vfs);
// Write binary data
fbuf.open("tiledb_vfs.bin", std::ios::out);
std::ostream os(&fbuf);
if (!os.good()) {
std::cerr << "Error opening file 'tiledb_vfs_bin'.\n";
return;
}
float f1 = 153.0;
std::string s1 = "abcd";
os.write((char*)&f1, sizeof(f1));
os.write(s1.data(), s1.size());
// Write binary data again - this will overwrite the previous file
fbuf.open("tiledb_vfs.bin", std::ios::out);
if (!os.good()) {
std::cerr << "Error opening file 'tiledb_vfs.bin' for write.\n";
return;
}
f1 = 153.1;
s1 = "abcdef";
os.write((char*)&f1, sizeof(f1));
os.write(s1.data(), s1.size());
// Append binary data to existing file (this will NOT work on S3)
fbuf.open("tiledb_vfs.bin", std::ios::app);
if (!os.good()) {
std::cerr << "Error opening file 'tiledb_vfs.bin' for append.\n";
return;
}
s1 = "ghijkl";
os.write(s1.data(), s1.size());
# Note: the Python VFS API currently supports bytes (no automatic conversion)
# therefore encoding must be handled manually (see calls to 'encode'
# and 'struct.pack' below)
import struct
ctx = tiledb.Ctx()
vfs = tiledb.VFS(ctx=ctx)
# Create and open writable buffer object
fh = vfs.open("tiledb_vfs.bin", 'w')
vfs.write(fh, struct.pack('<f', 153.0))
vfs.write(fh, "abcd".encode('UTF-8'))
# Write data again - this will overwrite the previous file
fh = vfs.open("tiledb_vfs.bin", 'w')
vfs.write(fh, struct.pack('<f', 153.1))
vfs.write(fh, "abcdef".encode("UTF-8"))
# Append data to existing file (this will NOT work on S3)
fh = vfs.open("tiledb_vfs.bin", 'a')
vfs.write(fh, "ghijkl".encode("UTF-8"))
# Close the handle
vfs.close(fh)
# TODO: VFS is currently not supported in the R
// Create TileDB context and VFS
try (Context ctx = new Context(); VFS vfs = new VFS(ctx)) {
String sourcePath = "tiledb_vfs.bin";
// Write binary data
vfs.write(sourcePath, ByteBuffer.allocate(4).putFloat(153.0).array());
byte[] resultBytes = vfs.readAllBytes(sourcePath);
// append bytes
String s1 = "abcd";
vfs.write(sourcePath, s1.getBytes(), VFSMode.TILEDB_VFS_APPEND);
// Write binary data again - this will overwrite the previous file
vfs.write(sourcePath, ByteBuffer.allocate(4).putFloat(153.0).array(), VFSMode.TILEDB_VFS_WRITE);
s1 = "abcdef";
vfs.write(sourcePath, s1.getBytes(), VFSMode.TILEDB_VFS_APPEND);
// Append binary data to existing file (this will NOT work on S3)
s1 = "ghijkl";
vfs.write(sourcePath, s1.getBytes(), VFSMode.TILEDB_VFS_APPEND);
resultBytes = vfs.readAllBytes(sourcePath);
}
// Create TileDB context
config, _ := NewConfig()
ctx, _ := NewContext(config)
// Create TileDB VFS
vfs, _ := NewVFS(ctx, config)
// Write binary data
fh1, _ := vfs.Open("tiledb_vfs.bin", tiledb.TILEDB_VFS_WRITE)
var f1 float32 = 153.0
s1 := "abcd"
vfs.Write(fh1, float32ToBytes(f1))
vfs.Write(fh1, []byte(s1))
vfs.Close(fh1)
// Write binary data again - this will overwrite the previous file
fh2, _ := vfs.Open("tiledb_vfs.bin", tiledb.TILEDB_VFS_WRITE)
var f2 float32 = 153.1
s2 := "abcdef"
vfs.Write(fh2, float32ToBytes(f2))
vfs.Write(fh2, []byte(s2))
vfs.Close(fh2)
// Append binary data to existing file
fh3, _ := vfs.Open("tiledb_vfs.bin", tiledb.TILEDB_VFS_APPEND)
s3 := "ghijkl"
vfs.Write(fh3, []byte(s3))
vfs.Close(fh3)
// Clean up
vfs.Free()
ctx.Free()
func float32ToBytes(float float32) []byte {
bits := math.Float32bits(float)
bytes := make([]byte, 4)
binary.LittleEndian.PutUint32(bytes, bits)
return bytes
}

Reading

C
C++
Python
R
Java
Go
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Create TileDB VFS
tiledb_vfs_t* vfs;
tiledb_vfs_alloc(ctx, NULL, &vfs);
// Read binary data
tiledb_vfs_fh_t* fh;
tiledb_vfs_open(ctx, vfs, "tiledb_vfs.bin", TILEDB_VFS_READ, &fh);
float f1;
char s1[13];
s1[12] = '\0';
tiledb_vfs_read(ctx, fh, 0, &f1, sizeof(float));
tiledb_vfs_read(ctx, fh, sizeof(float), s1, 12);
printf("Binary read:\n%.1f\n%s\n", f1, s1);
// Clean up
tiledb_vfs_fh_free(&fh);
tiledb_vfs_free(&vfs);
tiledb_ctx_free(&ctx);
// Create TileDB context
Context ctx;
// Create TileDB VFS
VFS vfs(ctx);
// Read binary data
VFS::filebuf sbuf(vfs);
sbuf.open("tiledb_vfs.bin", std::ios::in);
std::istream is(&sbuf);
if (!is.good()) {
std::cerr << "Error opening file 'tiledb_vfs.bin'.\n";
return;
}
float f1;
std::string s1;
auto s1_size = vfs.file_size("tiledb_vfs.bin") - sizeof(float);
s1.resize(s1_size);
is.read((char*)&f1, sizeof(f1));
is.read((char*)s1.data(), 12);
std::cout << "Binary read:\n" << f1 << '\n' << s1 << '\n';
import struct
ctx = tiledb.Ctx()
vfs = tiledb.VFS(ctx=ctx)
# Create and open readable handle
fh = vfs.open("tiledb_vfs.bin", "r")
float_struct = struct.Struct('<f')
float_data = vfs.read(fh, 0, float_struct.size)
string_data = vfs.read(fh, float_struct.size, 12)
print(float_struct.unpack(float_data)[0])
print(string_data.decode("UTF-8"))
vfs.close(fh)
# TODO: VFS is currently not supported in the R API
// Create TileDB context and VFS
try (Context ctx = new Context(); VFS vfs = new VFS(ctx)) {
String sourcePath = "tiledb_vfs.bin";
byte[] resultBytes = vfs.readAllBytes(sourcePath);
float f1 = ByteBuffer.wrap(resultBytes).order(ByteOrder.LITTLE_ENDIAN).getFloat();
resultBytes = copyOfRange(resultBytes, 4, resultBytes.length);
String s1 = new String( bytes, StandardCharsets.UTF_8 );
System.out.printf("Binary read:%f\n,%s\n", f1, s1);
}
// Create TileDB context
config, _ := NewConfig()
ctx, _ := NewContext(config)
// Create TileDB VFS
vfs, _ := NewVFS(ctx, config)
// Read binary data
vfs.Open("tiledb_vfs.bin", tiledb.TILEDB_VFS_READ)
sizeOfFile, _ := vfs.FileSize(vfsFileName)
var f float32 = 0.0
sizeOfFloat32 := uint64(unsafe.Sizeof(f))
f1, _ := vfs.Read(fh, 0, sizeOfFloat32)
s1, _ := vfs.Read(fh, sizeOfFloat32, sizeOfFile-sizeOfFloat32)
fmt.Println("Binary read:")
fmt.Println(float32FromBytes(f1))
fmt.Println(string(s1))
// Clean up
vfs.Close(fh)
vfs.Free()
ctx.Free()
func float32FromBytes(bytes []byte) float32 {
bits := binary.LittleEndian.Uint32(bytes)
float := math.Float32frombits(bits)
return float
}

Managing

C
C++
Python
R
Java
Go
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Create TileDB VFS
tiledb_vfs_t* vfs;
tiledb_vfs_alloc(ctx, NULL, &vfs);
// Create directory
int is_dir = 0;
tiledb_vfs_is_dir(ctx, vfs, "dir_A", &is_dir);
if (!is_dir) {
tiledb_vfs_create_dir(ctx, vfs, "dir_A");
printf("Created 'dir_A'\n");
} else {
printf("'dir_A' already exists\n");
}
// Creating an (empty) file
int is_file = 0;
tiledb_vfs_is_file(ctx, vfs, "dir_A/file_A", &is_file);
if (!is_file) {
tiledb_vfs_touch(ctx, vfs, "dir_A/file_A");
printf("Created empty file 'dir_A/file_A'\n");
} else {
printf("'dir_A/file_A' already exists\n");
}
// Getting the file size
uint64_t file_size;
tiledb_vfs_file_size(ctx, vfs, "dir_A/file_A", &file_size);
// Moving files (moving directories is similar)
tiledb_vfs_move_file(ctx, vfs, "dir_A/file_A", "dir_A/file_B");
// Deleting files and directories. Note that, in the case of directories,
// the function will delete all the contents of the directory (i.e., it
// works even for non-empty directories).
tiledb_vfs_remove_file(ctx, vfs, "dir_A/file_B");
tiledb_vfs_remove_dir(ctx, vfs, "dir_A");
// Clean up
tiledb_vfs_free(&vfs);
tiledb_ctx_free(&ctx);
// Create TileDB context
Context ctx;
// Create TileDB VFS
VFS vfs(ctx);
// Create directory
if (!vfs.is_dir("dir_A")) {
vfs.create_dir("dir_A");
std::cout << "Created 'dir_A'\n";
} else {
std::cout << "'dir_A' already exists\n";
}
// Creating an (empty) file
if (!vfs.is_file("dir_A/file_A")) {
vfs.touch("dir_A/file_A");
std::cout << "Created empty file 'dir_A/file_A'\n";
} else {
std::cout << "'dir_A/file_A' already exists\n";
}
// Getting the file size
std::cout << "Size of file 'dir_A/file_A': " << vfs.file_size("dir_A/file_A")
<< "\n";
// Moving files (moving directories is similar)
vfs.move_file("dir_A/file_A", "dir_A/file_B");
// Deleting files and directories
vfs.remove_file("dir_A/file_B");
vfs.remove_dir("dir_A");
import tiledb
ctx = tiledb.Ctx()
vfs = tiledb.VFS(ctx=ctx)
# Creating a directory
if not vfs.is_dir("dir_A"):
vfs.create_dir("dir_A")
print("Created 'dir_A'")
else:
print ("'dir_A' already exists")
# Creating an (empty) file
if not vfs.is_file("dir_A/file_A"):
vfs.touch("dir_A/file_A")
print("Created empty file 'dir_A/file_A'")
else:
print("'dir_A/file_A' already exists")
# Getting the file size
print("Size of file 'dir_A/file_A': ", vfs.file_size("dir_A/file_A"))
# Moving files (moving directories is similar)
vfs.move_file("dir_A/file_A", "dir_A/file_B")
vfs.remove_file("dir_A/file_B")
vfs.remove_dir("dir_A")
# TODO: VFS is currently not supported in the R
// Create TileDB context and VFS
try (Context ctx = new Context(); VFS vfs = new VFS(ctx)) {
// Create directory
if (!vfs.isDirectory("dir_A")) {
vfs.createDirectory("dir_A");
System.out.println( "Created 'dir_A'");
} else {
System.out.println("'dir_A' already exists");
}
// Creating an (empty) file
if (!vfs.isFile("dir_A/file_A")) {
vfs.createFile("dir_A/file_A");
System.out.println("Created empty file 'dir_A/file_A'");
} else {
System.out.println("'dir_A/file_A' already exists");
}
// Getting the file size
System.out.printf("Size of file 'dir_A/file_A': %d\n" , vfs.fileSize("dir_A/file_A"));
// Moving files (moving directories is similar)
vfs.moveFile("dir_A/file_A", "dir_A/file_B");
// Deleting files and directories
vfs.removeFile("dir_A/file_B");
vfs.removeDirectory("dir_A");
}
// Create TileDB context
config, _ := NewConfig()
ctx, _ := NewContext(config)
// Create TileDB VFS
vfs, _ := NewVFS(ctx, config)
// Create directory
vfs.CreateDir("dir_A")
isDir, _ := vfs.IsDir("dir_A")
if !is_dir {
tiledb_vfs_create_dir(ctx, vfs, "dir_A")
fmt.Println("Created 'dir_A'\n")
} else {
fmt.Println("'dir_A' already exists\n")
}
// Creating an (empty) file
isFile, _ := vfs.IsFile("dir_A/file_A")
if !isFile {
vfs.Touch("dir_A/file_A")
fmt.Println("Created empty file 'dir_A/file_A'\n")
} else {
fmt.Println("'dir_A/file_A' already exists\n")
}
// Getting the file size
fileSize, _ := vfs.FileSize("dir_A/file_A")
// Moving files (moving directories is similar)
vfs.MoveFile("dir_A/file_A", "dir_A/file_B")
// Deleting files and directories. Note that, in the case of directories,
// the function will delete all the contents of the directory (i.e., it
// works even for non-empty directories).
vfs.RemoveFile(ctx, vfs, "dir_A/file_B")
vfs.RemoveDir(ctx, vfs, "dir_A")
// Clean up
vfs.Free()
ctx.Free()

TileDB allows you to create/delete S3 buckets via its VFS functionality,

C
C++
Python
R
Java
Go
// ... create context ctx
// ... create VFS vfs
tiledb_vfs_create_bucket(ctx, vfs, "s3://my_bucket");
tiledb_vfs_remove_bucket(ctx, vfs, "s3://my_bucket");
// ... create context ctx
// ... create VFS vfs
vfs.create_bucket("s3://my_bucket");
vfs.remove_bucket("s3://my_bucket");
# ... create context ctx
# ... create VFS vfs
vfs.create_bucket("s3://my_bucket")
vfs.remove_bucket("s3://my_bucket")
# TODO: VFS is not supported in the R currently
// Create TileDB context and VFS
try (Context ctx = new Context(); VFS vfs = new VFS(ctx)) {
vfs.createBucket("s3://my_bucket");
vfs.removeBucket("s3://my_bucket");
}
// ... create VFS vfs
vfs.CreateBucket("s3://my_bucket")
vfs.RemoveBucket("s3://my_bucket")

However, extreme care must be taken when creating/deleting buckets on AWS S3. After its creation, a bucket may take some time to “appear” in the system. This will cause problems if the user creates the bucket and immediately tries to write a file in it. Similarly, deleting a bucket may not take effect immediately and, therefore, it may continue to “exist” for some time.

Configuring VFS

You can configure VFS by passing a configuration object upon its creation.

C
C++
Python
R
Java
Go
// Create TileDB context
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Create a configuration object
tiledb_config_t *config;
tiledb_config_alloc(&config, NULL);
tiledb_config_set(config, "vfs.num_threads", "16", NULL);
// Create TileDB VFS with a config object
tiledb_vfs_t* vfs;
tiledb_vfs_alloc(ctx, config, &vfs);
// Clean up
tiledb_config_free(&config);
tiledb_vfs_free(&vfs);
tiledb_ctx_free(&ctx);
// Create TileDB context
Context ctx;
// Create a configuration object
Config config;
config["vfs.num_threads"] = "16";
// Create TileDB VFS with a config object
VFS vfs(ctx, config);
ctx = tiledb.Ctx()
config = tiledb.Config()
config["vfs.num_threads"] = 16
vfs = tiledb.VFS(ctx=ctx, config=config)
# Or create the Config first and pass to the Ctx constructor
# TODO: VFS is not supported in the R currently
// Create TileDB context
try(Context ctx = new Context()) {
HashMap<String, String> settings = new HashMap<>();
// Set values
settings.put("vfs.num_threads", "16");
// Create TileDB VFS with a config object
try (Config config = new Config(settings), VFS vfs = new VFS(ctx, config);) {
}
}
// Create a configuration object
config, _ := tiledb.NewConfig()
// Create TileDB context
ctx, _ := tiledb.NewContext(config)
// Set vfs param
configCtx, err := ctx.Config()
configCtx.Set("vfs.num_threads", "16")
// Create TileDB VFS with a config object
vfs, _ := tiledb.NewVFS(ctx, config)
// Clean up
config.Free()
vfs.Free()
ctx.Free()

If you do not set a configuration object to VFS, then VFS will inherit the (default or set) configuration of the context. Otherwise, the set options in the passed configuration object will override those of the context's, but the rest of the options will still be inherited from the context's configuration.