Skip to content

feature: Add Remote (HTTP(S)) Support for SQLite Databases #154

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ include extension-ci-tools/makefiles/duckdb_extension.Makefile

# Setup the sqlite3 tpch database
data/db/tpch.db: release
command -v sqlite3 || (command -v brew && brew install sqlite) || (command -v choco && choco install sqlite -y) || (command -v apt-get && apt-get install -y sqlite3) || (command -v apk && apk add sqlite) || echo "no sqlite3"
command -v sqlite3 || (command -v brew && brew install sqlite) || (command -v choco && choco install sqlite -y) || (command -v apt-get && apt-get install -y sqlite3) || (command -v yum && yum install -y sqlite) || (command -v apk && apk add sqlite) || echo "no sqlite3"
./build/release/$(DUCKDB_PATH) < data/sql/tpch-export.duckdb || tree ./build/release || echo "neither tree not duck"
sqlite3 data/db/tpch.db < data/sql/tpch-create.sqlite

Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ add_subdirectory(storage)

add_library(
sqlite_ext_library OBJECT
sqlite_db.cpp sqlite_extension.cpp sqlite_scanner.cpp sqlite_stmt.cpp
sqlite_db.cpp sqlite_duckdb_vfs_cache.cpp sqlite_extension.cpp sqlite_scanner.cpp sqlite_stmt.cpp
sqlite_storage.cpp sqlite_utils.cpp)
set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:sqlite_ext_library>
Expand Down
16 changes: 16 additions & 0 deletions src/include/sqlite_db.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
namespace duckdb {
class SQLiteStatement;
struct IndexInfo;
class ClientContext;

class SQLiteDB {
public:
Expand All @@ -30,7 +31,11 @@ class SQLiteDB {
sqlite3 *db;

public:
//! Open a SQLite database (local files only)
static SQLiteDB Open(const string &path, const SQLiteOpenOptions &options, bool is_shared = false);
//! Open a SQLite database with support for both local and remote files (HTTP/HTTPS)
//! @param context Required for remote file access via DuckDB's VFS
static SQLiteDB Open(const string &path, const SQLiteOpenOptions &options, ClientContext &context, bool is_shared = false);
bool TryPrepare(const string &query, SQLiteStatement &result);
SQLiteStatement Prepare(const string &query);
void Execute(const string &query);
Expand All @@ -53,6 +58,17 @@ class SQLiteDB {

bool IsOpen();
void Close();

private:
//! Internal implementation methods for opening SQLite databases
static int GetOpenFlags(const SQLiteOpenOptions &options, bool is_shared, bool is_remote = false);
static void ApplyBusyTimeout(sqlite3 *db, const SQLiteOpenOptions &options);
static void HandleOpenError(const string &path, int rc, ClientContext *context = nullptr);
static SQLiteDB OpenWithVFS(const string &path, const SQLiteOpenOptions &options, ClientContext &context, bool is_shared);
//! Open a local SQLite database file (no remote support)
static SQLiteDB OpenLocal(const string &path, const SQLiteOpenOptions &options, bool is_shared = false);

static void CheckDBValid(sqlite3 *db);
};

} // namespace duckdb
114 changes: 114 additions & 0 deletions src/include/sqlite_duckdb_vfs_cache.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// sqlite_duckdb_vfs_cache.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb.hpp"
#include "duckdb/common/file_system.hpp"
#include "duckdb/common/mutex.hpp"
#include "duckdb/storage/buffer/buffer_handle.hpp"
#include "duckdb/storage/buffer_manager.hpp"
#include "duckdb/storage/caching_file_system.hpp"

#include "sqlite3.h"

namespace duckdb {

class ClientContext;

// Wrapper around DuckDB's CachingFileSystem for remote SQLite file access.
// Uses DuckDB's caching infrastructure to efficiently handle remote file I/O.
class DuckDBCachedFile {
public:
DuckDBCachedFile(ClientContext &context, const string &path);
~DuckDBCachedFile();

// Read data from the file at the specified offset
int Read(void *buffer, int amount, sqlite3_int64 offset);
// Get the cached file size
sqlite3_int64 get_file_size();

private:
// Lazy initialization - defer DuckDB operations until first use
void ensure_initialized();

// Adaptive read-ahead constants
static constexpr idx_t MIN_READAHEAD_SIZE = static_cast<idx_t>(1024) * 1024; // 1MB
static constexpr idx_t MAX_READAHEAD_SIZE = static_cast<idx_t>(128) * 1024 * 1024; // 128MB
static constexpr idx_t SEQUENTIAL_THRESHOLD = static_cast<idx_t>(64) * 1024; // 64KB gap tolerance

ClientContext &context;
const string path;
unique_ptr<CachingFileHandle> caching_handle;
bool initialized = false;

// Adaptive read-ahead state
sqlite3_int64 last_read_offset = -1; // Track last read position
sqlite3_int64 last_read_end = -1; // End of last read (offset + amount)
idx_t current_readahead_size = MIN_READAHEAD_SIZE; // Current read-ahead block size

// Helper methods for adaptive read-ahead
idx_t calculate_read_ahead_size(sqlite3_int64 offset, int amount) const;
bool is_sequential_read(sqlite3_int64 offset) const;
void update_read_ahead_state(sqlite3_int64 offset, int amount);
};

// SQLite Virtual File System (VFS) implementation that uses DuckDB's
// CachingFileSystem for efficient remote SQLite database access.
class SQLiteDuckDBCacheVFS {
public:
// Register the VFS with SQLite (thread-safe, idempotent)
static void Register(ClientContext &context);
// Unregister the VFS when context is destroyed
static void Unregister(ClientContext &context);
// Check if this path should be handled by our VFS (i.e., is it remote?)
static bool CanHandlePath(ClientContext &context, const string &path);
// Get the VFS registration name for a context
static const char *GetVFSNameForContext(ClientContext &context);
// Get the default VFS registration name (for compatibility)
static const char *GetVFSName() { return "duckdb_cache_fs"; }

// SQLite VFS interface methods (must be public for C callback registration)
static int Open(sqlite3_vfs *vfs, const char *filename, sqlite3_file *file, int flags, int *out_flags);
static int Delete(sqlite3_vfs *vfs, const char *filename, int sync_dir);
static int Access(sqlite3_vfs *vfs, const char *filename, int flags, int *result);
static int FullPathname(sqlite3_vfs *vfs, const char *filename, int out_size, char *out_buf);
static void *DlOpen(sqlite3_vfs *vfs, const char *filename);
static void DlError(sqlite3_vfs *vfs, int bytes, char *err_msg);
static void (*DlSym(sqlite3_vfs *vfs, void *handle, const char *symbol))(void);
static void DlClose(sqlite3_vfs *vfs, void *handle);
static int Randomness(sqlite3_vfs *vfs, int bytes, char *out);
static int Sleep(sqlite3_vfs *vfs, int microseconds);
static int CurrentTime(sqlite3_vfs *vfs, double *time);
static int GetLastError(sqlite3_vfs *vfs, int bytes, char *err_msg);

// SQLite file I/O methods (must be public for C callback registration)
static int Close(sqlite3_file *file);
static int Read(sqlite3_file *file, void *buffer, int amount, sqlite3_int64 offset);
static int Write(sqlite3_file *file, const void *buffer, int amount, sqlite3_int64 offset);
static int Truncate(sqlite3_file *file, sqlite3_int64 size);
static int Sync(sqlite3_file *file, int flags);
static int FileSize(sqlite3_file *file, sqlite3_int64 *size);
static int Lock(sqlite3_file *file, int level);
static int Unlock(sqlite3_file *file, int level);
static int CheckReservedLock(sqlite3_file *file, int *result);
static int FileControl(sqlite3_file *file, int op, void *arg);
static int SectorSize(sqlite3_file *file);
static int DeviceCharacteristics(sqlite3_file *file);
};

// SQLite file handle structure that wraps our DuckDBCachedFile.
// Memory layout must be compatible with SQLite's expectations.
// This structure is allocated by SQLite and may cross module boundaries.
// We use raw pointers with explicit ownership rules to avoid DLL issues.
struct SQLiteDuckDBCachedFile {
sqlite3_file base; // Must be first member for C compatibility
DuckDBCachedFile *duckdb_file; // Raw pointer - explicitly deleted in Close()
};

} // namespace duckdb
4 changes: 3 additions & 1 deletion src/include/storage/sqlite_catalog.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class SQLiteCatalog : public Catalog {
string GetDBPath() override;

//! Returns a reference to the in-memory database (if any)
SQLiteDB *GetInMemoryDatabase();
SQLiteDB *GetInMemoryDatabase(ClientContext &context);
//! Release the in-memory database (if there is any)
void ReleaseInMemoryDatabase();

Expand All @@ -76,6 +76,8 @@ class SQLiteCatalog : public Catalog {
mutex in_memory_lock;
//! Whether or not there is any active transaction on the in-memory database
bool active_in_memory;
//! Whether the in-memory database has been initialized
bool in_memory_db_initialized;
};

} // namespace duckdb
6 changes: 6 additions & 0 deletions src/include/storage/sqlite_transaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include "duckdb/transaction/transaction.hpp"
#include "duckdb/common/case_insensitive_map.hpp"
#include "duckdb/common/mutex.hpp"
#include "duckdb/common/atomic.hpp"
#include "sqlite_db.hpp"

namespace duckdb {
Expand Down Expand Up @@ -37,6 +39,10 @@ class SQLiteTransaction : public Transaction {
SQLiteDB *db;
SQLiteDB owned_db;
case_insensitive_map_t<unique_ptr<CatalogEntry>> catalog_entries;

// Atomic flags for thread-safe initialization
atomic<bool> started{false};
atomic<bool> db_initialized{false};
};

} // namespace duckdb
4 changes: 3 additions & 1 deletion src/include/storage/sqlite_transaction_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ class SQLiteTransactionManager : public TransactionManager {

private:
SQLiteCatalog &sqlite_catalog;
mutex transaction_lock;
reference_map_t<Transaction, unique_ptr<SQLiteTransaction>> transactions;

// Function-local static mutex to avoid Windows DLL initialization issues
static mutex& GetTransactionLock();
};

} // namespace duckdb
Loading