Skip to content

Commit 063a5bd

Browse files
committed
feat: Add HTTP/HTTPS support for remote SQLite databases
Implements seamless support for querying remote SQLite databases over HTTP/HTTPS using DuckDB's CachingFileSystem infrastructure. This enables direct SQL queries against SQLite files hosted on web servers without downloading the entire database. Key Features: - Custom SQLite VFS (Virtual File System) that integrates with DuckDB's external file cache - Adaptive read-ahead optimization (1MB-128MB) for efficient remote file access - Per-ClientContext VFS registration for thread-safe concurrent access - Automatic VFS lifecycle management tied to ClientContext lifetime - Full support for sqlite_scan, sqlite_attach, and SQLite storage engine operations Implementation Details: - SQLiteDuckDBCacheVFS: Custom VFS that delegates file I/O to DuckDB's CachingFileSystem - DuckDBCachedFile: Wrapper around DuckDB's CachingFileHandle with adaptive read-ahead - Uses DuckDB's FileSystem::IsRemoteFile() for automatic HTTP/HTTPS detection - Leverages DuckDB's httpfs extension for HTTP client functionality - Modified SQLiteDB::Open to automatically register VFS for remote files - Added comprehensive test suite covering VFS registration, basic scans, joins, CTEs, etc. Performance Optimizations: - Adaptive read-ahead reduces HTTP round trips for sequential scans - DuckDB's CachingFileSystem provides intelligent block caching and prefetching - Read-only access mode for remote files ensures optimal caching behavior This change enables use cases like: - Querying SQLite databases hosted on CDNs or web servers - Analyzing remote SQLite files without local storage requirements - Building data pipelines that directly access HTTP-hosted SQLite databases Example usage: ```sql -- Direct scan SELECT * FROM sqlite_scan('https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite', 'Artist'); -- Attach remote database ATTACH 'https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite' AS http_db (TYPE sqlite); -- Query attached database SELECT COUNT(*) FROM http_db.Track; ```
1 parent d6dee57 commit 063a5bd

30 files changed

+1443
-107
lines changed

.github/workflows/MainDistributionPipeline.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
name: Build extension binaries
1717
uses: duckdb/extension-ci-tools/.github/workflows/[email protected]
1818
with:
19-
duckdb_version: main
19+
duckdb_version: v1.3.0
2020
extension_name: sqlite_scanner
2121
ci_tools_version: v1.3.0
2222

@@ -26,7 +26,7 @@ jobs:
2626
uses: duckdb/extension-ci-tools/.github/workflows/[email protected]
2727
secrets: inherit
2828
with:
29-
duckdb_version: main
29+
duckdb_version: v1.3.0
3030
extension_name: sqlite_scanner
3131
ci_tools_version: v1.3.0
3232
deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}

src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ add_subdirectory(storage)
66
add_library(
77
sqlite_ext_library OBJECT
88
sqlite_db.cpp sqlite_extension.cpp sqlite_scanner.cpp sqlite_stmt.cpp
9-
sqlite_storage.cpp sqlite_utils.cpp)
9+
sqlite_storage.cpp sqlite_utils.cpp sqlite_duckdb_vfs_cache.cpp)
1010
set(ALL_OBJECT_FILES
1111
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:sqlite_ext_library>
1212
PARENT_SCOPE)

src/include/sqlite_db.hpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
namespace duckdb {
1515
class SQLiteStatement;
1616
struct IndexInfo;
17+
class ClientContext;
1718

1819
class SQLiteDB {
1920
public:
@@ -30,7 +31,9 @@ class SQLiteDB {
3031
sqlite3 *db;
3132

3233
public:
33-
static SQLiteDB Open(const string &path, const SQLiteOpenOptions &options, bool is_shared = false);
34+
//! Open a SQLite database with support for both local and remote files (HTTP/HTTPS)
35+
//! @param context Required for remote file access via DuckDB's VFS
36+
static SQLiteDB Open(const string &path, const SQLiteOpenOptions &options, ClientContext &context, bool is_shared = false);
3437
bool TryPrepare(const string &query, SQLiteStatement &result);
3538
SQLiteStatement Prepare(const string &query);
3639
void Execute(const string &query);
@@ -53,6 +56,18 @@ class SQLiteDB {
5356

5457
bool IsOpen();
5558
void Close();
59+
60+
private:
61+
//! Helper functions for Open methods
62+
static int GetOpenFlags(const SQLiteOpenOptions &options, bool is_shared, bool is_remote = false);
63+
static void ApplyBusyTimeout(sqlite3 *db, const SQLiteOpenOptions &options);
64+
static void HandleOpenError(const string &path, int rc, ClientContext *context = nullptr);
65+
static SQLiteDB OpenWithVFS(const string &path, const SQLiteOpenOptions &options, ClientContext &context, bool is_shared);
66+
//! Open a local SQLite database file (no remote support)
67+
static SQLiteDB OpenLocal(const string &path, const SQLiteOpenOptions &options, bool is_shared = false);
68+
69+
//! Verify database handle is valid
70+
static void CheckDBValid(sqlite3 *db);
5671
};
5772

5873
} // namespace duckdb
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// sqlite_duckdb_vfs_cache.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "duckdb.hpp"
12+
#include "duckdb/common/file_system.hpp"
13+
#include "duckdb/common/mutex.hpp"
14+
#include "duckdb/storage/buffer/buffer_handle.hpp"
15+
#include "duckdb/storage/buffer_manager.hpp"
16+
#include "duckdb/storage/caching_file_system.hpp"
17+
18+
#include "sqlite3.h"
19+
20+
namespace duckdb {
21+
22+
class ClientContext;
23+
24+
// Wrapper around DuckDB's CachingFileSystem for remote SQLite file access.
25+
// Uses DuckDB's caching infrastructure to efficiently handle remote file I/O.
26+
class DuckDBCachedFile {
27+
public:
28+
DuckDBCachedFile(ClientContext &context, const string &path);
29+
~DuckDBCachedFile() = default;
30+
31+
// Read data from the file at the specified offset
32+
int Read(void *buffer, int amount, sqlite3_int64 offset);
33+
// Get the cached file size
34+
sqlite3_int64 GetFileSize();
35+
// Get the file path
36+
const string &GetPath() const { return path; }
37+
38+
private:
39+
// Lazy initialization - defer DuckDB operations until first use
40+
void EnsureInitialized();
41+
42+
// Adaptive read-ahead constants
43+
static constexpr uint64_t MIN_READAHEAD_SIZE = static_cast<uint64_t>(1024) * 1024; // 1MB
44+
static constexpr uint64_t MAX_READAHEAD_SIZE = static_cast<uint64_t>(128) * 1024 * 1024; // 128MB
45+
static constexpr uint64_t SEQUENTIAL_THRESHOLD = static_cast<uint64_t>(64) * 1024; // 64KB gap tolerance
46+
47+
ClientContext &context;
48+
string path;
49+
unique_ptr<CachingFileHandle> caching_handle;
50+
sqlite3_int64 cached_file_size = -1; // Cached to avoid repeated remote calls
51+
bool initialized = false;
52+
53+
// Adaptive read-ahead state
54+
sqlite3_int64 last_read_offset = -1; // Track last read position
55+
sqlite3_int64 last_read_end = -1; // End of last read (offset + amount)
56+
uint64_t current_readahead_size = MIN_READAHEAD_SIZE; // Current read-ahead block size
57+
58+
// Helper methods for adaptive read-ahead
59+
uint64_t CalculateReadAheadSize(sqlite3_int64 offset, int amount) const;
60+
bool IsSequentialRead(sqlite3_int64 offset) const;
61+
void UpdateReadAheadState(sqlite3_int64 offset, int amount);
62+
};
63+
64+
// SQLite Virtual File System (VFS) implementation that uses DuckDB's
65+
// CachingFileSystem for efficient remote SQLite database access.
66+
class SQLiteDuckDBCacheVFS {
67+
public:
68+
// Register the VFS with SQLite (thread-safe, idempotent)
69+
static void Register(ClientContext &context);
70+
// Unregister the VFS when context is destroyed
71+
static void Unregister(ClientContext &context);
72+
// Check if this path should be handled by our VFS (i.e., is it remote?)
73+
static bool CanHandlePath(ClientContext &context, const string &path);
74+
// Get the VFS registration name for a context
75+
static const char *GetVFSNameForContext(ClientContext &context);
76+
// Get the default VFS registration name (for compatibility)
77+
static const char *GetVFSName() { return "duckdb_cache_fs"; }
78+
79+
// SQLite VFS interface methods (must be public for C callback registration)
80+
// Note: SQLite expects these to use the C calling convention
81+
#ifndef SQLITE_CALLBACK
82+
#ifdef _WIN32
83+
#define SQLITE_CALLBACK __cdecl
84+
#else
85+
#define SQLITE_CALLBACK
86+
#endif
87+
#endif
88+
89+
static int SQLITE_CALLBACK Open(sqlite3_vfs *vfs, const char *filename, sqlite3_file *file, int flags, int *out_flags);
90+
static int SQLITE_CALLBACK Delete(sqlite3_vfs *vfs, const char *filename, int sync_dir);
91+
static int SQLITE_CALLBACK Access(sqlite3_vfs *vfs, const char *filename, int flags, int *result);
92+
static int SQLITE_CALLBACK FullPathname(sqlite3_vfs *vfs, const char *filename, int out_size, char *out_buf);
93+
static void * SQLITE_CALLBACK DlOpen(sqlite3_vfs *vfs, const char *filename);
94+
static void SQLITE_CALLBACK DlError(sqlite3_vfs *vfs, int bytes, char *err_msg);
95+
static void (* SQLITE_CALLBACK DlSym(sqlite3_vfs *vfs, void *handle, const char *symbol))(void);
96+
static void SQLITE_CALLBACK DlClose(sqlite3_vfs *vfs, void *handle);
97+
static int SQLITE_CALLBACK Randomness(sqlite3_vfs *vfs, int bytes, char *out);
98+
static int SQLITE_CALLBACK Sleep(sqlite3_vfs *vfs, int microseconds);
99+
static int SQLITE_CALLBACK CurrentTime(sqlite3_vfs *vfs, double *time);
100+
static int SQLITE_CALLBACK GetLastError(sqlite3_vfs *vfs, int bytes, char *err_msg);
101+
102+
// SQLite file I/O methods (must be public for C callback registration)
103+
static int SQLITE_CALLBACK Close(sqlite3_file *file);
104+
static int SQLITE_CALLBACK Read(sqlite3_file *file, void *buffer, int amount, sqlite3_int64 offset);
105+
static int SQLITE_CALLBACK Write(sqlite3_file *file, const void *buffer, int amount, sqlite3_int64 offset);
106+
static int SQLITE_CALLBACK Truncate(sqlite3_file *file, sqlite3_int64 size);
107+
static int SQLITE_CALLBACK Sync(sqlite3_file *file, int flags);
108+
static int SQLITE_CALLBACK FileSize(sqlite3_file *file, sqlite3_int64 *size);
109+
static int SQLITE_CALLBACK Lock(sqlite3_file *file, int level);
110+
static int SQLITE_CALLBACK Unlock(sqlite3_file *file, int level);
111+
static int SQLITE_CALLBACK CheckReservedLock(sqlite3_file *file, int *result);
112+
static int SQLITE_CALLBACK FileControl(sqlite3_file *file, int op, void *arg);
113+
static int SQLITE_CALLBACK SectorSize(sqlite3_file *file);
114+
static int SQLITE_CALLBACK DeviceCharacteristics(sqlite3_file *file);
115+
116+
private:
117+
// No private members - all state is managed through static methods
118+
};
119+
120+
// SQLite file handle structure that wraps our DuckDBCachedFile.
121+
// Memory layout must be compatible with SQLite's expectations.
122+
#ifdef _WIN32
123+
#pragma pack(push, 8)
124+
struct SQLiteDuckDBCachedFile {
125+
sqlite3_file base; // Must be first member for C compatibility
126+
unique_ptr<DuckDBCachedFile> duckdb_file; // The actual file implementation
127+
ClientContext *context; // DuckDB context for this file
128+
};
129+
#pragma pack(pop)
130+
#else
131+
struct SQLiteDuckDBCachedFile {
132+
sqlite3_file base; // Must be first member for C compatibility
133+
unique_ptr<DuckDBCachedFile> duckdb_file; // The actual file implementation
134+
ClientContext *context; // DuckDB context for this file
135+
};
136+
#endif
137+
138+
} // namespace duckdb

src/include/sqlite_scanner.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ struct SqliteBindData : public TableFunctionData {
3030
SQLiteDB *global_db;
3131

3232
optional_ptr<TableCatalogEntry> table;
33+
34+
// Override virtual methods from FunctionData
35+
unique_ptr<FunctionData> Copy() const override;
36+
bool Equals(const FunctionData &other) const override;
3337
};
3438

3539
class SqliteScanFunction : public TableFunction {

src/include/storage/sqlite_catalog.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88

99
#pragma once
1010

11-
#include "duckdb/catalog/catalog.hpp"
12-
#include "sqlite_options.hpp"
1311
#include "sqlite_db.hpp"
12+
#include "sqlite_options.hpp"
13+
#include "duckdb/catalog/catalog.hpp"
1414

1515
namespace duckdb {
1616
class SQLiteSchemaEntry;
@@ -59,7 +59,7 @@ class SQLiteCatalog : public Catalog {
5959
string GetDBPath() override;
6060

6161
//! Returns a reference to the in-memory database (if any)
62-
SQLiteDB *GetInMemoryDatabase();
62+
SQLiteDB *GetInMemoryDatabase(ClientContext &context);
6363
//! Release the in-memory database (if there is any)
6464
void ReleaseInMemoryDatabase();
6565

@@ -76,6 +76,8 @@ class SQLiteCatalog : public Catalog {
7676
mutex in_memory_lock;
7777
//! Whether or not there is any active transaction on the in-memory database
7878
bool active_in_memory;
79+
//! Whether the in-memory database has been initialized
80+
bool in_memory_db_initialized;
7981
};
8082

8183
} // namespace duckdb

src/include/storage/sqlite_transaction.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88

99
#pragma once
1010

11-
#include "duckdb/transaction/transaction.hpp"
12-
#include "duckdb/common/case_insensitive_map.hpp"
1311
#include "sqlite_db.hpp"
12+
#include "duckdb/common/case_insensitive_map.hpp"
13+
#include "duckdb/common/mutex.hpp"
14+
#include "duckdb/transaction/transaction.hpp"
1415

1516
namespace duckdb {
1617
class SQLiteCatalog;
@@ -37,6 +38,10 @@ class SQLiteTransaction : public Transaction {
3738
SQLiteDB *db;
3839
SQLiteDB owned_db;
3940
case_insensitive_map_t<unique_ptr<CatalogEntry>> catalog_entries;
41+
bool started;
42+
43+
// Function-local static mutex to avoid Windows DLL initialization issues
44+
static mutex& GetInitializationMutex();
4045
};
4146

4247
} // namespace duckdb

src/include/storage/sqlite_transaction_manager.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88

99
#pragma once
1010

11-
#include "duckdb/transaction/transaction_manager.hpp"
1211
#include "storage/sqlite_catalog.hpp"
1312
#include "storage/sqlite_transaction.hpp"
1413
#include "duckdb/common/reference_map.hpp"
14+
#include "duckdb/transaction/transaction_manager.hpp"
1515

1616
namespace duckdb {
1717

@@ -27,8 +27,10 @@ class SQLiteTransactionManager : public TransactionManager {
2727

2828
private:
2929
SQLiteCatalog &sqlite_catalog;
30-
mutex transaction_lock;
3130
reference_map_t<Transaction, unique_ptr<SQLiteTransaction>> transactions;
31+
32+
// Function-local static mutex to avoid Windows DLL initialization issues
33+
static mutex& GetTransactionLock();
3234
};
3335

3436
} // namespace duckdb

0 commit comments

Comments
 (0)