Skip to content

Commit 4d11156

Browse files
committed
snapshots: restructure config options
1 parent 8c85f7d commit 4d11156

File tree

14 files changed

+230
-311
lines changed

14 files changed

+230
-311
lines changed

src/app/firedancer-dev/commands/snapshot_load.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@ extern int * fd_log_private_shared_lock;
138138
static void
139139
snapshot_load_cmd_fn( args_t * args,
140140
config_t * config ) {
141-
if( FD_UNLIKELY( config->firedancer.snapshots.sources.gossip.enabled || config->firedancer.snapshots.sources.entrypoints.enabled ) ) {
142-
FD_LOG_ERR(( "snapshot-load command is incompatible with gossip or entrypoint snapshot sources" ));
141+
if( FD_UNLIKELY( config->firedancer.snapshots.sources.gossip.allow_any || 0UL!=config->firedancer.snapshots.sources.gossip.allow_list_cnt ) ) {
142+
FD_LOG_ERR(( "snapshot-load command is incompatible with gossip snapshot sources" ));
143143
}
144144
snapshot_load_topo( config, args );
145145
fd_topo_t * topo = &config->topo;

src/app/firedancer/config/default.toml

Lines changed: 89 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ user = ""
267267
# specified in `[net.interface]`.
268268
host = ""
269269

270+
#TODO-AM: Tweak all the wording
270271
# Snapshots are a periodic view of the ledger at a point in time. They
271272
# are used to enable validators to join the network quickly, as they do
272273
# not need to replay all transactions since genesis, just those since a
@@ -283,20 +284,6 @@ user = ""
283284
# a validator to catch up more quickly.
284285
incremental_snapshots = true
285286

286-
# How old a local incremental snapshot on disk can be, in slots, and
287-
# still be loaded on startup. The age of a snapshot is determined
288-
# by looking at the highest incremental snapshot slot available for
289-
# download from cluster peers.
290-
#
291-
# If a local snapshot exists on disk, within the given age, other
292-
# options are ignored and no snapshot will be downloaded.
293-
#
294-
# If the local snapshot is older than this, we will instead download
295-
# a new snapshot. If the local snapshot is too old, and we cannot
296-
# download a snapshot because `download` is `false`, the validator
297-
# will exit with an error.
298-
maximum_local_snapshot_age = 2500
299-
300287
# Whether to allow downloading a new genesis file from a peer when
301288
# booting. If set to false, the validator will only reuse a locally
302289
# stored genesis file, and if one does not exist, the validator will
@@ -318,64 +305,6 @@ user = ""
318305
# TODO: Not yet implemented.
319306
genesis_download = true
320307

321-
# Whether to allow downloading a new snapshot from a peer when
322-
# booting.
323-
#
324-
# If set to true, the validator will contact peers from the gossip
325-
# network to locate both a full and incremental snapshot and
326-
# download it, assuming there is no loadable local snapshot. If
327-
# false, the validator will attempt to load the most recently
328-
# downloaded snapshot that exists locally on the disk. If there is
329-
# no local snapshot, the validator will abort with an error.
330-
#
331-
# When downloading a snapshot, it will optionally be stored on disk
332-
# if `[paths.snapshots]` is specified. The number of downloaded
333-
# snapshots that will be stored on disk is controlled below by the
334-
# max_*_snapshots to keep options.
335-
#
336-
# TODO: implement the "at most one downloaded" logic. "
337-
download = true
338-
339-
# A snapshot hash must be published in gossip by one of the
340-
# validator identities listed here for it to be accepted. The list
341-
# should be a list of identity public keys, specified as base58
342-
# strings.
343-
#
344-
# If a snapshot hash is received that is not published by a known
345-
# validator the validator will exit with an error.
346-
#
347-
# If no known validators are specified here, any snapshot hash will
348-
# be accepted.
349-
#
350-
# TODO: Implement this once gossip is wired up.
351-
known_validators = []
352-
353-
# The minimum acceptable speed for snapshot download from a peer, in
354-
# megabits per second. If the initial download speed falls below
355-
# this threshold, the validator will abandon the download, find a
356-
# new peer and start downloading the snapshot from them.
357-
#
358-
# The initial download speed is measured over every 10 mib for the
359-
# first 400 mib of downloaded content. The validator continues
360-
# to measure download speed over every 10 mib until the download is
361-
# complete, but only the initial download speed is used to decide
362-
# whether to abort and retry the download.
363-
# TODO: refine the initial download speed measurement and threshold.
364-
#
365-
# If set to zero, no minimum threshold will be applied.
366-
minimum_download_speed_mib = 0
367-
368-
# The maximum number of times to abort and retry when encountering a
369-
# slow snapshot download. If the maximum attempts is reached while
370-
# downloading a snapshot the validator will exit with an error.
371-
#
372-
# The number of attempts is reset when starting a download for a
373-
# new full or incremental snapshot.
374-
#
375-
# If set to zero, the validator will continually retry until
376-
# completing snapshot download.
377-
maximum_download_retry_abort = 5
378-
379308
# Controls how many full snapshots are allowed to be kept in the
380309
# snapshots directory on disk.
381310
#
@@ -385,6 +314,15 @@ user = ""
385314
max_full_snapshots_to_keep = 1
386315
max_incremental_snapshots_to_keep = 1
387316

317+
# If an in-progress loading (local, or downloaded) of a full
318+
# snapshot has its effective age rise above this threshold, we will
319+
# cancel the load and start over. The most common reason this would
320+
# happen is if the network is all using the same full snapshot
321+
# interval (e.g. 100_000) and while we are downloading a full
322+
# snapshot from interval N, the network moves on to N+1, removing
323+
# availability of all incremental snapshots that build on N.
324+
full_effective_age_cancel_threshold = 20_000
325+
388326
# Controls for where we look to find snapshots. If multiple sources
389327
# and peers are available, the one with the lowest latency and
390328
# fastest download will be automatically selected among them.
@@ -405,11 +343,47 @@ user = ""
405343
# you highly trust, and this is the default behavior of the
406344
# configuration.
407345
[snapshots.sources]
408-
# Allow fetching of snapshots from peers specified in the gossip
409-
# entrypoints. Gossip entrypoints must be peers that are highly
410-
# trusted as they are used to bootstrap your node.
411-
[snapshots.sources.entrypoints]
412-
enabled = false
346+
# How old a local incremental snapshot on disk can be, in slots, and
347+
# still be loaded on startup. The age of a snapshot is determined
348+
# by looking at the highest incremental snapshot slot available for
349+
# download from cluster peers.
350+
#
351+
# If a local snapshot exists on disk, within the given age, other
352+
# options are ignored and no snapshot will be downloaded.
353+
#
354+
# If the local snapshot is older than this, we will instead download
355+
# a new snapshot. If the local snapshot is too old, and we cannot
356+
# download a snapshot because `download` is `false`, the validator
357+
# will exit with an error.
358+
#
359+
# The local age parameters control whether or not we use locally
360+
# stored full / incremental snapshots, if they exist. Slot age
361+
# is measured against the highest slot we could currently arrive at
362+
# by downloading new full and incremental snapshots from any of
363+
# our trusted sources.
364+
#
365+
# The effective usable slot of the local full snapshot is
366+
# defined as the max slot of all trusted incremental snapshots
367+
# that we could possibly use (local, or downloaded) which apply
368+
# on top of this full snapshot.
369+
max_local_full_effective_age = 1000
370+
max_local_incremental_age = 200
371+
372+
# TODO-AM
373+
# If any HTTP peers are listed, the following paths will be
374+
# fetched from these peers to determine if they have a snapshot
375+
# available:
376+
#
377+
# /snapshot.tar.bz2
378+
# /incremental-snapshot.tar.bz2
379+
#
380+
# If these peers have snapshots at the given paths, they will be
381+
# considered as additional sources for downloading snapshots
382+
# when enabled alongside gossip and entrypoint sources.
383+
# The url to fetch snapshots from. The paths /snapshot.tar.bz2
384+
# and /incremental-snapshot.tar.bz2 will be queried from this
385+
# URL.
386+
servers = []
413387

414388
# Allow fetching of snapshots from arbitrary gossip peers that
415389
# are hosting a snapshot server. This may allow faster snapshot
@@ -418,27 +392,48 @@ user = ""
418392
# serve you a specially crafted malicious snapshot with no way
419393
# to tell.
420394
[snapshots.sources.gossip]
421-
enabled = false
395+
# TODO-AM
396+
allow_any = true
397+
398+
# TODO-AM
399+
allow_list = []
400+
401+
# TODO-AM
402+
# Do not attempt downloads from the given gossip pubkeys. This can
403+
# be useful if a peer is frequently selected by the automatic logic
404+
# but does not yield successful / usable snapshots.
405+
block_list = []
406+
407+
# TODO-AM
408+
[snapshots.download]
409+
# The minimum acceptable speed for snapshot download from a peer, in
410+
# megabits per second. If the initial download speed falls below
411+
# this threshold, the validator will abandon the download, find a
412+
# new peer and start downloading the snapshot from them.
413+
#
414+
# The initial download speed is measured over every 10 mib for the
415+
# first 400 mib of downloaded content. The validator continues
416+
# to measure download speed over every 10 mib until the download is
417+
# complete, but only the initial download speed is used to decide
418+
# whether to abort and retry the download.
419+
# TODO: refine the initial download speed measurement and threshold.
420+
#
421+
# If set to zero, no minimum threshold will be applied.
422+
min_speed_mib = 0
422423

423-
# If any HTTP peers are listed, the following paths will be
424-
# fetched from these peers to determine if they have a snapshot
425-
# available:
424+
# The maximum number of times to abort and retry when encountering a
425+
# slow snapshot download. If the maximum attempts is reached while
426+
# downloading a snapshot the validator will exit with an error.
426427
#
427-
# /snapshot.tar.bz2
428-
# /incremental-snapshot.tar.bz2
428+
# The number of attempts is reset when starting a download for a
429+
# new full or incremental snapshot.
429430
#
430-
# If these peers have snapshots at the given paths, they will be
431-
# considered as additional sources for downloading snapshots
432-
# when enabled alongside gossip and entrypoint sources.
433-
[[snapshots.sources.http]]
434-
# Whether to enable fetching snapshots from a list of
435-
# HTTP peers.
436-
enabled = false
431+
# If set to zero, the validator will continually retry until
432+
# completing snapshot download.
433+
max_retry_abort = 5
437434

438-
# The url to fetch snapshots from. The paths /snapshot.tar.bz2
439-
# and /incremental-snapshot.tar.bz2 will be queried from this
440-
# URL.
441-
url = "http://example.com:1234"
435+
# TODO-AM
436+
[snapshots.timeouts]
442437

443438
[rpc]
444439
# If nonzero, enable JSON RPC on this port, and use the next port
@@ -544,10 +539,6 @@ user = ""
544539
# is not recommended to change this setting.
545540
mean_cache_entry_size = 131072
546541

547-
# This section configures the "groove" persistent account database.
548-
# [groove]
549-
# ...
550-
551542
[store]
552543
# Similar to max_pending_shred_sets, this parameter configures the
553544
# maximum number of shred sets that can be buffered. However, this

src/app/firedancer/config/testnet.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,3 @@
88
max_account_records = 200_000_000
99
[layout]
1010
gossvf_tile_count = 4
11-
[snapshots]
12-
[[snapshots.sources.http]]
13-
enabled = true
14-
url = "http://solana-testnet-rpc.jumpisolated.com:8899"

src/app/firedancer/topology.c

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -901,35 +901,33 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,
901901
} else if( FD_UNLIKELY( !strcmp( tile->name, "snapct" ) ) ) {
902902

903903
fd_memcpy( tile->snapct.snapshots_path, config->paths.snapshots, PATH_MAX );
904-
tile->snapct.incremental_snapshot_fetch = config->firedancer.snapshots.incremental_snapshots;
905-
tile->snapct.do_download = config->firedancer.snapshots.download;
906-
tile->snapct.maximum_local_snapshot_age = config->firedancer.snapshots.maximum_local_snapshot_age;
907-
tile->snapct.minimum_download_speed_mib = config->firedancer.snapshots.minimum_download_speed_mib;
908-
tile->snapct.maximum_download_retry_abort = config->firedancer.snapshots.maximum_download_retry_abort;
909-
tile->snapct.max_full_snapshots_to_keep = config->firedancer.snapshots.max_full_snapshots_to_keep;
910-
tile->snapct.max_incremental_snapshots_to_keep = config->firedancer.snapshots.max_incremental_snapshots_to_keep;
911-
912-
tile->snapct.entrypoints_enabled = config->firedancer.snapshots.sources.entrypoints.enabled;
913-
tile->snapct.gossip_peers_enabled = config->firedancer.snapshots.sources.gossip.enabled;
914-
tile->snapct.gossip_entrypoints_cnt = config->gossip.entrypoints_cnt;
915-
916-
for( ulong i=0UL; i<tile->snapct.gossip_entrypoints_cnt; i++ ) tile->snapct.gossip_entrypoints[ i ] = config->gossip.resolved_entrypoints[ i ];
917-
918-
ulong peers_cnt = config->firedancer.snapshots.sources.http.peers_cnt;
919-
ulong resolved_peers_cnt = 0UL;
920-
921-
for( ulong j=0UL; j<peers_cnt; j++ ) {
922-
if( FD_UNLIKELY( !config->firedancer.snapshots.sources.http.peers[ j ].enabled ) ) continue;
923-
924-
if( FD_UNLIKELY( 0==resolve_peer( config->firedancer.snapshots.sources.http.peers[ j ].url, &tile->snapct.http.peers[ resolved_peers_cnt ] ) ) ) {
925-
FD_LOG_ERR(( "failed to resolve address of [snapshots.sources.http.peers] entry \"%s\"", config->firedancer.snapshots.sources.http.peers[ j ].url ));
926-
} else {
927-
resolved_peers_cnt++;
904+
tile->snapct.sources.max_local_full_effective_age = config->firedancer.snapshots.sources.max_local_full_effective_age;
905+
tile->snapct.sources.max_local_incremental_age = config->firedancer.snapshots.sources.max_local_incremental_age;
906+
tile->snapct.incremental_snapshots = config->firedancer.snapshots.incremental_snapshots;
907+
tile->snapct.max_full_snapshots_to_keep = config->firedancer.snapshots.max_full_snapshots_to_keep;
908+
tile->snapct.max_incremental_snapshots_to_keep = config->firedancer.snapshots.max_incremental_snapshots_to_keep;
909+
tile->snapct.full_effective_age_cancel_threshold = config->firedancer.snapshots.full_effective_age_cancel_threshold;
910+
tile->snapct.download.min_speed_mib = config->firedancer.snapshots.download.min_speed_mib;
911+
tile->snapct.download.max_retry_abort = config->firedancer.snapshots.download.max_retry_abort;
912+
tile->snapct.sources.gossip.allow_any = config->firedancer.snapshots.sources.gossip.allow_any;
913+
tile->snapct.sources.gossip.allow_list_cnt = config->firedancer.snapshots.sources.gossip.allow_list_cnt;
914+
tile->snapct.sources.gossip.block_list_cnt = config->firedancer.snapshots.sources.gossip.block_list_cnt;
915+
tile->snapct.sources.servers_cnt = config->firedancer.snapshots.sources.servers_cnt;
916+
for( ulong i=0UL; i<tile->snapct.sources.gossip.allow_list_cnt; i++ ) {
917+
if( FD_UNLIKELY( !fd_base58_decode_32( config->firedancer.snapshots.sources.gossip.allow_list[ i ], tile->snapct.sources.gossip.allow_list[ i ].uc ) ) ) {
918+
FD_LOG_ERR(( "[snapshots.sources.gossip.allow_list[%lu] invalid (%s)", i, config->firedancer.snapshots.sources.gossip.allow_list[ i ] ));
919+
}
920+
}
921+
for( ulong i=0UL; i<tile->snapct.sources.gossip.block_list_cnt; i++ ) {
922+
if( FD_UNLIKELY( !fd_base58_decode_32( config->firedancer.snapshots.sources.gossip.block_list[ i ], tile->snapct.sources.gossip.block_list[ i ].uc ) ) ) {
923+
FD_LOG_ERR(( "[snapshots.sources.gossip.block_list[%lu] invalid (%s)", i, config->firedancer.snapshots.sources.gossip.block_list[ i ] ));
924+
}
925+
}
926+
for( ulong i=0UL; i<tile->snapct.sources.servers_cnt; i++ ) {
927+
if( FD_UNLIKELY( !resolve_peer( config->firedancer.snapshots.sources.servers[ i ], &tile->snapct.sources.servers[ i ] ) ) ) {
928+
FD_LOG_ERR(( "[snapshots.sources.servers[%lu] invalid (%s)", i, config->firedancer.snapshots.sources.servers[ i ] ));
928929
}
929930
}
930-
931-
tile->snapct.http.peers_cnt = resolved_peers_cnt;
932-
/* TODO: set up known validators and known validators cnt */
933931

934932
} else if( FD_UNLIKELY( !strcmp( tile->name, "snapld" ) ) ) {
935933

src/app/shared/fd_config.h

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#define AFFINITY_SZ (256UL)
1212
#define CONFIGURE_STAGE_COUNT ( 12UL)
1313
#define GOSSIP_TILE_ENTRYPOINTS_MAX ( 16UL)
14-
#define SNAPSHOT_TILE_HTTP_PEERS_MAX ( 16UL)
1514
#define IP4_PORT_STR_MAX ( 22UL)
1615

1716
struct fd_configh {
@@ -124,37 +123,33 @@ struct fd_configf {
124123
} gossip;
125124

126125
struct {
127-
128126
struct {
127+
uint max_local_full_effective_age;
128+
uint max_local_incremental_age;
129129

130130
struct {
131-
int enabled;
132-
} entrypoints;
133-
134-
struct {
135-
int enabled;
131+
int allow_any;
132+
ulong allow_list_cnt;
133+
char allow_list[ 16 ][ FD_BASE58_ENCODED_32_SZ ];
134+
ulong block_list_cnt;
135+
char block_list[ 16 ][ FD_BASE58_ENCODED_32_SZ ];
136136
} gossip;
137137

138-
struct {
139-
ulong peers_cnt;
140-
struct {
141-
int enabled;
142-
char url[ PATH_MAX ];
143-
} peers[ SNAPSHOT_TILE_HTTP_PEERS_MAX ];
144-
} http;
145-
138+
ulong servers_cnt;
139+
char servers[ 16 ][ 128 ];
146140
} sources;
147141

148-
int incremental_snapshots;
149-
uint maximum_local_snapshot_age;
150-
int genesis_download;
151-
int download;
152-
ulong known_validators_cnt;
153-
char known_validators[ 16 ][ 256 ];
154-
uint minimum_download_speed_mib;
155-
uint maximum_download_retry_abort;
156-
uint max_full_snapshots_to_keep;
157-
uint max_incremental_snapshots_to_keep;
142+
int incremental_snapshots;
143+
int genesis_download;
144+
uint max_full_snapshots_to_keep;
145+
uint max_incremental_snapshots_to_keep;
146+
uint full_effective_age_cancel_threshold;
147+
148+
struct {
149+
uint min_speed_mib;
150+
uint max_retry_abort;
151+
} download;
152+
158153
} snapshots;
159154

160155
struct {

0 commit comments

Comments
 (0)