Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions book/api/metrics-generated.md
Original file line number Diff line number Diff line change
Expand Up @@ -1152,3 +1152,23 @@
| <span class="metrics-name">snapls_&#8203;incremental_&#8203;accounts_&#8203;hashed</span> | gauge | Number of accounts hashed for the incremental snapshot during snapshot loading. Might decrease if snapshot load is aborted and restarted |

</div>

## Tower Tile

<div class="metrics">

| Metric | Type | Description |
|--------|------|-------------|
| <span class="metrics-name">tower_&#8203;vote_&#8203;txn_&#8203;invalid</span> | counter | Number of times we dropped a vote txn because it was invalid (malformed, bad signature, etc.) |
| <span class="metrics-name">tower_&#8203;vote_&#8203;txn_&#8203;ignored</span> | counter | Number of times we ignored all or part of a vote txn because we didn't recognize a slot (eg. our replay was behind) |
| <span class="metrics-name">tower_&#8203;vote_&#8203;txn_&#8203;mismatch</span> | counter | Number of times a vote txn mismatched our own block id |
| <span class="metrics-name">tower_&#8203;ancestor_&#8203;rollback</span> | counter | Rollback to an ancestor of our prev vote (can't vote) |
| <span class="metrics-name">tower_&#8203;sibling_&#8203;confirmed</span> | counter | Duplicate sibling got confirmed (can't vote) |
| <span class="metrics-name">tower_&#8203;same_&#8203;fork</span> | counter | Same fork as prev vote (can vote) |
| <span class="metrics-name">tower_&#8203;switch_&#8203;pass</span> | counter | Prev vote was on a different fork, but we are allowed to switch (can vote) |
| <span class="metrics-name">tower_&#8203;switch_&#8203;fail</span> | counter | Prev vote was on a different fork, and we are not allowed to switch (can't vote) |
| <span class="metrics-name">tower_&#8203;lockout_&#8203;fail</span> | counter | Locked out (can't vote) |
| <span class="metrics-name">tower_&#8203;threshold_&#8203;fail</span> | counter | Did not pass threshold check (can't vote) |
| <span class="metrics-name">tower_&#8203;propagated_&#8203;fail</span> | counter | Prev leader block did not propagate (can't vote) |

</div>
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ encode_vote( send_test_ctx_t * ctx, fd_tower_slot_done_t * slot_done ) {
/* Create minimal mock tower with one vote */
uchar tower_mem[ FD_TOWER_FOOTPRINT ] __attribute__((aligned(FD_TOWER_ALIGN)));
fd_tower_t * tower = fd_tower_join( fd_tower_new( tower_mem ) );
fd_tower_votes_push_tail( tower, (fd_tower_vote_t){ .slot = vote_slot, .conf = 1 } );
fd_tower_push_tail( tower, (fd_tower_vote_t){ .slot = vote_slot, .conf = 1 } );

/* Mock values */
fd_lockout_offset_t lockouts_scratch[1];
Expand Down
21 changes: 21 additions & 0 deletions src/app/firedancer/config/default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1369,6 +1369,27 @@ user = ""
# TODO: What is this ... needs to be deleted
cluster_version = "1.18.0"

# The tower tile runs the fork choice and tower rules to determine
# both what block to vote on and what block to build our own leader
# blocks on top of.
[tiles.tower]
# Firedancer can process at most this many slots without rooting
# in the consensus rules before it must begin evicting.
#
# This is an estimate and should be set as generously as
# possible to allow for temporary outages such as network
# partitions. For example, the validator might get disconnected
# from part of the cluster due to data center issues. Roughly,
# the default of 4096 allows for 30 minutes without rooting.
#
# Specifically, tower will ignore gossip votes that exceed max
# unrooted slots ahead of the current root. Additionally, both
# fork choice and tower structures will OOM and cause Firedancer
# to exit if it needs to maintain more than max unrooted slots
# tower forks (TODO in the future Firedancer will instead
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is operator facing so don't leave TODOs unless they are immediate.

# gracefully degrade by evicting forks).
max_unrooted_slots = 4096
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still really don't buy that this limit needs to exist.


[tiles.send]
# The port the send tile uses for QUIC, to send votes and other
# transactions. It also uses this as the UDP src port.
Expand Down
18 changes: 12 additions & 6 deletions src/app/firedancer/topology.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "topology.h"

#include "../../ballet/lthash/fd_lthash.h"
#include "../../choreo/fd_choreo_base.h"
#include "../../discof/reasm/fd_reasm.h"
#include "../../discof/poh/fd_poh.h"
#include "../../discof/replay/fd_exec.h"
Expand Down Expand Up @@ -408,6 +409,7 @@ fd_topo_initialize( config_t * config ) {

/* TODO: Explain this .... USHORT_MAX is not dcache max */
ulong pending_fec_shreds_depth = fd_ulong_min( fd_ulong_pow2_up( config->tiles.shred.max_pending_shred_sets * FD_REEDSOL_DATA_SHREDS_MAX ), USHORT_MAX + 1 /* dcache max */ );
ulong max_unrooted_slots = config->tiles.tower.max_unrooted_slots;

/* topo, link_name, wksp_name, depth, mtu, burst */
/**/ fd_topob_link( topo, "gossip_net", "net_gossip", 32768UL, FD_NET_MTU, 1UL );
Expand Down Expand Up @@ -481,7 +483,7 @@ fd_topo_initialize( config_t * config ) {

FOR(shred_tile_cnt) fd_topob_link( topo, "shred_out", "shred_out", pending_fec_shreds_depth, FD_SHRED_OUT_MTU, 3UL ); /* TODO: Pretty sure burst of 3 is incorrect here */
FOR(shred_tile_cnt) fd_topob_link( topo, "repair_shred", "shred_out", pending_fec_shreds_depth, sizeof(fd_ed25519_sig_t), 1UL ); /* TODO: Also pending_fec_shreds_depth? Seems wrong */
/**/ fd_topob_link( topo, "tower_out", "tower_out", 1024UL, sizeof(fd_tower_slot_done_t), 1UL );
/**/ fd_topob_link( topo, "tower_out", "tower_out", max_unrooted_slots, sizeof(fd_tower_msg_t), 1UL );
/**/ fd_topob_link( topo, "send_out", "send_out", 128UL, FD_TPU_RAW_MTU, 1UL );

fd_topob_link( topo, "replay_exec", "replay_exec", 16384UL, sizeof(fd_exec_task_msg_t), 1UL );
Expand Down Expand Up @@ -665,14 +667,17 @@ fd_topo_initialize( config_t * config ) {
fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "snapin_manif", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
}

/**/ fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "poh_replay", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
FOR(exec_tile_cnt) fd_topob_tile_in( topo, "exec", i, "metric_in", "replay_exec", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
/**/ fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "poh_replay", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
FOR(exec_tile_cnt) fd_topob_tile_in ( topo, "exec", i, "metric_in", "replay_exec", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );

/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "genesi_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "gossip_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "replay_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
if( snapshots_enabled ) {
fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "snapin_manif", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
}
/**/ fd_topob_tile_out( topo, "tower", 0UL, "tower_out", 0UL );

/**/ fd_topob_tile_in ( topo, "send", 0UL, "metric_in", "replay_stake", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
/**/ fd_topob_tile_in ( topo, "send", 0UL, "metric_in", "gossip_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
/**/ fd_topob_tile_in ( topo, "send", 0UL, "metric_in", "tower_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
Expand Down Expand Up @@ -1205,9 +1210,10 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,

} else if( FD_UNLIKELY( !strcmp( tile->name, "tower" ) ) ) {

strncpy( tile->tower.identity_key_path, config->paths.identity_key, sizeof(tile->tower.identity_key_path) );
strncpy( tile->tower.vote_acc_path, config->paths.vote_account, sizeof(tile->tower.vote_acc_path) );
strncpy( tile->tower.ledger_path, config->paths.base, sizeof(tile->tower.ledger_path) );
tile->tower.slot_max = config->tiles.tower.max_unrooted_slots;
strncpy( tile->tower.identity_key, config->paths.identity_key, sizeof(tile->tower.identity_key) );
strncpy( tile->tower.vote_account, config->paths.vote_account, sizeof(tile->tower.vote_account) );
strncpy( tile->tower.base_path, config->paths.base, sizeof(tile->tower.base_path) );

} else if( FD_UNLIKELY( !strcmp( tile->name, "send" ) ) ) {

Expand Down
4 changes: 4 additions & 0 deletions src/app/shared/fd_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,10 @@ struct fd_config {
ulong write_buffer_size;
} shredcap;

struct {
ulong max_unrooted_slots;
} tower;

} tiles;
struct {
ulong capture_start_slot;
Expand Down
7 changes: 2 additions & 5 deletions src/app/shared/fd_config_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,10 @@ fd_config_extract_pod( uchar * pod,
CFG_POP ( cstr, tiles.replay.cluster_version );
CFG_POP_ARRAY( cstr, tiles.replay.enable_features );

CFG_POP ( cstr, tiles.store_int.slots_pending );
CFG_POP ( cstr, tiles.store_int.shred_cap_archive );
CFG_POP ( cstr, tiles.store_int.shred_cap_replay );
CFG_POP ( ulong, tiles.store_int.shred_cap_end_slot );

CFG_POP ( ushort, tiles.send.send_src_port );

CFG_POP ( ulong, tiles.tower.max_unrooted_slots );

CFG_POP ( bool, tiles.archiver.enabled );
CFG_POP ( ulong, tiles.archiver.end_slot );
CFG_POP ( cstr, tiles.archiver.rocksdb_path );
Expand Down
7 changes: 0 additions & 7 deletions src/app/shared_dev/commands/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,6 @@ update_config_for_dev( fd_config_t * config ) {
shred->shred.expected_shred_version = shred_version;
}
}
ulong store_id = fd_topo_find_tile( &config->topo, "storei", 0 );
if( FD_UNLIKELY( store_id!=ULONG_MAX ) ) {
fd_topo_tile_t * storei = &config->topo.tiles[ store_id ];
if( FD_LIKELY( storei->store_int.expected_shred_version==(ushort)0 ) ) {
storei->store_int.expected_shred_version = shred_version;
}
}
}

/* Run Firedancer entirely in a single process for development and
Expand Down
2 changes: 0 additions & 2 deletions src/choreo/epoch/Local.mk

This file was deleted.

146 changes: 0 additions & 146 deletions src/choreo/epoch/fd_epoch.c

This file was deleted.

Loading
Loading