Skip to content

Commit 9cc202e

Browse files
committed
feat: Add HTTP/HTTPS support for remote SQLite databases
Implements seamless support for querying remote SQLite databases over HTTP/HTTPS using DuckDB's CachingFileSystem infrastructure. This enables direct SQL queries against SQLite files hosted on web servers without downloading the entire database. Key Features: - Custom SQLite VFS (Virtual File System) that integrates with DuckDB's external file cache - Adaptive read-ahead optimization (1MB-128MB) for efficient remote file access - Per-ClientContext VFS registration for thread-safe concurrent access - Automatic VFS lifecycle management tied to ClientContext lifetime - Full support for sqlite_scan, sqlite_attach, and SQLite storage engine operations Implementation Details: - SQLiteDuckDBCacheVFS: Custom VFS that delegates file I/O to DuckDB's CachingFileSystem - DuckDBCachedFile: Wrapper around DuckDB's CachingFileHandle with adaptive read-ahead - Uses DuckDB's FileSystem::IsRemoteFile() for automatic HTTP/HTTPS detection - Leverages DuckDB's httpfs extension for HTTP client functionality - Modified SQLiteDB::Open to automatically register VFS for remote files - Added comprehensive test suite covering VFS registration, basic scans, joins, CTEs, etc. Performance Optimizations: - Adaptive read-ahead reduces HTTP round trips for sequential scans - DuckDB's CachingFileSystem provides intelligent block caching and prefetching - Read-only access mode for remote files ensures optimal caching behavior This change enables use cases like: - Querying SQLite databases hosted on CDNs or web servers - Analyzing remote SQLite files without local storage requirements - Building data pipelines that directly access HTTP-hosted SQLite databases Example usage: ```sql -- Direct scan SELECT * FROM sqlite_scan('https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite', 'Artist'); -- Attach remote database ATTACH 'https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite' AS http_db (TYPE sqlite); -- Query attached database SELECT COUNT(*) FROM http_db.Track; ```
1 parent cf13fa1 commit 9cc202e

35 files changed

+1470
-855
lines changed

Makefile

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,40 @@ test_debug_internal: data/db/tpch.db
2323

2424
test_reldebug_internal: data/db/tpch.db
2525
SQLITE_TPCH_GENERATED=1 ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*"
26+
27+
# Tidy check target for static analysis
28+
.PHONY: tidy-check
29+
tidy-check: release
30+
@command -v clang-tidy >/dev/null 2>&1 || { \
31+
echo "Error: clang-tidy is not installed."; \
32+
exit 1; \
33+
}
34+
@echo "Running clang-tidy on source files..."
35+
@find src/ -name '*.cpp' -not -path 'src/sqlite/*' | while read file; do \
36+
echo "Checking $$file..."; \
37+
clang-tidy $$file \
38+
-p build/release \
39+
--header-filter='$(PROJ_DIR)src/include/.*' \
40+
--config-file=.clang-tidy \
41+
|| exit 1; \
42+
done
43+
@echo "Clang-tidy check completed successfully!"
44+
45+
# Tidy fix target to automatically fix issues
46+
.PHONY: tidy-fix
47+
tidy-fix: release
48+
@command -v clang-tidy >/dev/null 2>&1 || { \
49+
echo "Error: clang-tidy is not installed."; \
50+
exit 1; \
51+
}
52+
@echo "Running clang-tidy with fixes on source files..."
53+
@find src/ -name '*.cpp' -not -path 'src/sqlite/*' | while read file; do \
54+
echo "Fixing $$file..."; \
55+
clang-tidy $$file \
56+
-p build/release \
57+
--header-filter='$(PROJ_DIR)src/include/.*' \
58+
--config-file=.clang-tidy \
59+
--fix \
60+
--fix-errors; \
61+
done
62+
@echo "Clang-tidy fixes applied!"

src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ add_subdirectory(storage)
66
add_library(
77
sqlite_ext_library OBJECT
88
sqlite_db.cpp sqlite_extension.cpp sqlite_scanner.cpp sqlite_stmt.cpp
9-
sqlite_storage.cpp sqlite_utils.cpp sqlite_http_vfs.cpp)
9+
sqlite_storage.cpp sqlite_utils.cpp sqlite_duckdb_vfs_cache.cpp)
1010
set(ALL_OBJECT_FILES
1111
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:sqlite_ext_library>
1212
PARENT_SCOPE)

src/include/sqlite_db.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ class SQLiteDB {
3131
sqlite3 *db;
3232

3333
public:
34-
static SQLiteDB Open(const string &path, const SQLiteOpenOptions &options, bool is_shared = false);
34+
//! Open a SQLite database with support for both local and remote files (HTTP/HTTPS)
35+
//! @param context Required for remote file access via DuckDB's VFS
3536
static SQLiteDB Open(const string &path, const SQLiteOpenOptions &options, ClientContext &context, bool is_shared = false);
3637
bool TryPrepare(const string &query, SQLiteStatement &result);
3738
SQLiteStatement Prepare(const string &query);
@@ -55,6 +56,18 @@ class SQLiteDB {
5556

5657
bool IsOpen();
5758
void Close();
59+
60+
private:
61+
//! Helper functions for Open methods
62+
static int GetOpenFlags(const SQLiteOpenOptions &options, bool is_shared, bool is_remote = false);
63+
static void ApplyBusyTimeout(sqlite3 *db, const SQLiteOpenOptions &options);
64+
static void HandleOpenError(const string &path, int rc, ClientContext *context = nullptr);
65+
static SQLiteDB OpenWithVFS(const string &path, const SQLiteOpenOptions &options, ClientContext &context, bool is_shared);
66+
//! Open a local SQLite database file (no remote support)
67+
static SQLiteDB OpenLocal(const string &path, const SQLiteOpenOptions &options, bool is_shared = false);
68+
69+
//! Verify database handle is valid
70+
static void CheckDBValid(sqlite3 *db);
5871
};
5972

6073
} // namespace duckdb
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// sqlite_duckdb_vfs_cache.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "duckdb.hpp"
12+
#include "duckdb/common/file_system.hpp"
13+
#include "duckdb/common/mutex.hpp"
14+
#include "duckdb/storage/buffer/buffer_handle.hpp"
15+
#include "duckdb/storage/buffer_manager.hpp"
16+
#include "duckdb/storage/caching_file_system.hpp"
17+
#include "sqlite3.h"
18+
19+
namespace duckdb {
20+
21+
class ClientContext;
22+
23+
// Wrapper around DuckDB's CachingFileSystem for remote SQLite file access.
24+
// Uses DuckDB's caching infrastructure to efficiently handle remote file I/O.
25+
class DuckDBCachedFile {
26+
public:
27+
DuckDBCachedFile(ClientContext &context, const string &path);
28+
~DuckDBCachedFile() = default;
29+
30+
// Read data from the file at the specified offset
31+
int Read(void *buffer, int amount, sqlite3_int64 offset);
32+
// Get the cached file size
33+
sqlite3_int64 GetFileSize();
34+
// Get the file path
35+
const string &GetPath() const { return path; }
36+
37+
private:
38+
// Lazy initialization - defer DuckDB operations until first use
39+
void EnsureInitialized();
40+
41+
// Adaptive read-ahead constants
42+
static constexpr uint64_t MIN_READAHEAD_SIZE = static_cast<uint64_t>(1024) * 1024; // 1MB
43+
static constexpr uint64_t MAX_READAHEAD_SIZE = static_cast<uint64_t>(128) * 1024 * 1024; // 128MB
44+
static constexpr uint64_t SEQUENTIAL_THRESHOLD = static_cast<uint64_t>(64) * 1024; // 64KB gap tolerance
45+
46+
ClientContext &context;
47+
string path;
48+
unique_ptr<CachingFileHandle> caching_handle;
49+
sqlite3_int64 cached_file_size = -1; // Cached to avoid repeated remote calls
50+
bool initialized = false;
51+
52+
// Adaptive read-ahead state (no mutex needed - SQLite ensures single-threaded access per file handle)
53+
sqlite3_int64 last_read_offset = -1; // Track last read position
54+
sqlite3_int64 last_read_end = -1; // End of last read (offset + amount)
55+
uint64_t current_readahead_size = MIN_READAHEAD_SIZE; // Current read-ahead block size
56+
57+
// Helper methods for adaptive read-ahead
58+
uint64_t CalculateReadAheadSize(sqlite3_int64 offset, int amount) const;
59+
bool IsSequentialRead(sqlite3_int64 offset) const;
60+
void UpdateReadAheadState(sqlite3_int64 offset, int amount);
61+
};
62+
63+
// SQLite Virtual File System (VFS) implementation that uses DuckDB's
64+
// CachingFileSystem for efficient remote SQLite database access.
65+
class SQLiteDuckDBCacheVFS {
66+
public:
67+
// Register the VFS with SQLite (thread-safe, idempotent)
68+
static void Register(ClientContext &context);
69+
// Unregister the VFS when context is destroyed
70+
static void Unregister(ClientContext &context);
71+
// Check if this path should be handled by our VFS (i.e., is it remote?)
72+
static bool CanHandlePath(ClientContext &context, const string &path);
73+
// Get the VFS registration name for a context
74+
static const char *GetVFSNameForContext(ClientContext &context);
75+
// Get the default VFS registration name (for compatibility)
76+
static const char *GetVFSName() { return "duckdb_cache_fs"; }
77+
78+
// SQLite VFS interface methods (must be public for C callback registration)
79+
// Note: SQLite expects these to use the C calling convention
80+
#ifndef SQLITE_CALLBACK
81+
#ifdef _WIN32
82+
#define SQLITE_CALLBACK __cdecl
83+
#else
84+
#define SQLITE_CALLBACK
85+
#endif
86+
#endif
87+
88+
static int SQLITE_CALLBACK Open(sqlite3_vfs *vfs, const char *filename, sqlite3_file *file, int flags, int *out_flags);
89+
static int SQLITE_CALLBACK Delete(sqlite3_vfs *vfs, const char *filename, int sync_dir);
90+
static int SQLITE_CALLBACK Access(sqlite3_vfs *vfs, const char *filename, int flags, int *result);
91+
static int SQLITE_CALLBACK FullPathname(sqlite3_vfs *vfs, const char *filename, int out_size, char *out_buf);
92+
static void * SQLITE_CALLBACK DlOpen(sqlite3_vfs *vfs, const char *filename);
93+
static void SQLITE_CALLBACK DlError(sqlite3_vfs *vfs, int bytes, char *err_msg);
94+
static void (* SQLITE_CALLBACK DlSym(sqlite3_vfs *vfs, void *handle, const char *symbol))(void);
95+
static void SQLITE_CALLBACK DlClose(sqlite3_vfs *vfs, void *handle);
96+
static int SQLITE_CALLBACK Randomness(sqlite3_vfs *vfs, int bytes, char *out);
97+
static int SQLITE_CALLBACK Sleep(sqlite3_vfs *vfs, int microseconds);
98+
static int SQLITE_CALLBACK CurrentTime(sqlite3_vfs *vfs, double *time);
99+
static int SQLITE_CALLBACK GetLastError(sqlite3_vfs *vfs, int bytes, char *err_msg);
100+
101+
// SQLite file I/O methods (must be public for C callback registration)
102+
static int SQLITE_CALLBACK Close(sqlite3_file *file);
103+
static int SQLITE_CALLBACK Read(sqlite3_file *file, void *buffer, int amount, sqlite3_int64 offset);
104+
static int SQLITE_CALLBACK Write(sqlite3_file *file, const void *buffer, int amount, sqlite3_int64 offset);
105+
static int SQLITE_CALLBACK Truncate(sqlite3_file *file, sqlite3_int64 size);
106+
static int SQLITE_CALLBACK Sync(sqlite3_file *file, int flags);
107+
static int SQLITE_CALLBACK FileSize(sqlite3_file *file, sqlite3_int64 *size);
108+
static int SQLITE_CALLBACK Lock(sqlite3_file *file, int level);
109+
static int SQLITE_CALLBACK Unlock(sqlite3_file *file, int level);
110+
static int SQLITE_CALLBACK CheckReservedLock(sqlite3_file *file, int *result);
111+
static int SQLITE_CALLBACK FileControl(sqlite3_file *file, int op, void *arg);
112+
static int SQLITE_CALLBACK SectorSize(sqlite3_file *file);
113+
static int SQLITE_CALLBACK DeviceCharacteristics(sqlite3_file *file);
114+
115+
private:
116+
// No private members - all state is managed through static methods
117+
};
118+
119+
// SQLite file handle structure that wraps our DuckDBCachedFile.
120+
// Memory layout must be compatible with SQLite's expectations.
121+
#ifdef _WIN32
122+
#pragma pack(push, 8)
123+
struct SQLiteDuckDBCachedFile {
124+
sqlite3_file base; // Must be first member for C compatibility
125+
unique_ptr<DuckDBCachedFile> duckdb_file; // The actual file implementation
126+
ClientContext *context; // DuckDB context for this file
127+
};
128+
#pragma pack(pop)
129+
#else
130+
struct SQLiteDuckDBCachedFile {
131+
sqlite3_file base; // Must be first member for C compatibility
132+
unique_ptr<DuckDBCachedFile> duckdb_file; // The actual file implementation
133+
ClientContext *context; // DuckDB context for this file
134+
};
135+
#endif
136+
137+
} // namespace duckdb

src/include/sqlite_http_vfs.hpp

Lines changed: 0 additions & 101 deletions
This file was deleted.

src/include/sqlite_scanner.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,20 @@ struct SqliteBindData : public TableFunctionData {
3030
SQLiteDB *global_db;
3131

3232
optional_ptr<TableCatalogEntry> table;
33+
34+
// Override virtual methods from FunctionData
35+
unique_ptr<FunctionData> Copy() const override;
36+
bool Equals(const FunctionData &other) const override;
3337
};
3438

3539
class SqliteScanFunction : public TableFunction {
3640
public:
3741
SqliteScanFunction();
42+
43+
// Static methods to access scan function pointers
44+
static table_function_init_global_t GetInitGlobal();
45+
static table_function_init_local_t GetInitLocal();
46+
static table_function_t GetFunction();
3847
};
3948

4049
class SqliteAttachFunction : public TableFunction {

src/include/storage/sqlite_catalog.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88

99
#pragma once
1010

11-
#include "duckdb/catalog/catalog.hpp"
12-
#include "sqlite_options.hpp"
1311
#include "sqlite_db.hpp"
12+
#include "sqlite_options.hpp"
13+
#include "duckdb/catalog/catalog.hpp"
1414

1515
namespace duckdb {
1616
class SQLiteSchemaEntry;

src/include/storage/sqlite_transaction.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88

99
#pragma once
1010

11-
#include "duckdb/transaction/transaction.hpp"
12-
#include "duckdb/common/case_insensitive_map.hpp"
1311
#include "sqlite_db.hpp"
12+
#include "duckdb/common/case_insensitive_map.hpp"
13+
#include "duckdb/common/mutex.hpp"
14+
#include "duckdb/transaction/transaction.hpp"
1415

1516
namespace duckdb {
1617
class SQLiteCatalog;
@@ -37,6 +38,10 @@ class SQLiteTransaction : public Transaction {
3738
SQLiteDB *db;
3839
SQLiteDB owned_db;
3940
case_insensitive_map_t<unique_ptr<CatalogEntry>> catalog_entries;
41+
bool started;
42+
43+
// Function-local static mutex to avoid Windows DLL initialization issues
44+
static mutex& GetInitializationMutex();
4045
};
4146

4247
} // namespace duckdb

0 commit comments

Comments
 (0)