Skip to content

Commit 297b856

Browse files
[AP][Solver][3D] 3D AP Solver Support
Upgraded the solver in the AP flow to support a "Z" dimension representing the layer. This code only kicks on when the architecture has more than one layer, since in the single layer case there is no point to compute the Z dimension.
1 parent 7f476db commit 297b856

File tree

6 files changed

+314
-82
lines changed

6 files changed

+314
-82
lines changed

vpr/src/analytical_place/analytical_solver.cpp

Lines changed: 145 additions & 46 deletions
Large diffs are not rendered by default.

vpr/src/analytical_place/analytical_solver.h

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,16 @@ class B2BSolver : public AnalyticalSolver {
531531
/// number, the solver will focus more on timing and less on wirelength.
532532
static constexpr double timing_slope_fac_ = 0.75;
533533

534+
/// @brief For most FPGA architectures, the cost of moving horizontally is
535+
/// equivalent to the cost moving vertically (i.e. moving in increasing
536+
/// x-dimension has the same cost as moving the same amount in the
537+
/// y-dimension). However, for 3D FPGAs, moving between layers is
538+
/// much more expensive than moving in the x or y dimension. We account
539+
/// for this by adding a cost penalty factor to the "z"-dimension.
540+
/// TODO: This cost factor was randomly selected because it felt ok. Should
541+
/// choose a better factor that is chosen empirically.
542+
static constexpr double layer_distance_cost_fac_ = 10.0;
543+
534544
public:
535545
B2BSolver(const APNetlist& ap_netlist,
536546
const DeviceGrid& device_grid,
@@ -699,15 +709,41 @@ class B2BSolver : public AnalyticalSolver {
699709
void update_linear_system_with_anchors(unsigned iteration);
700710

701711
/**
702-
* @brief Store the x and y solutions in Eigen's vectors into the partial
703-
* placement object.
704-
*
705-
* Note: The x_soln and y_soln may be modified if it is found that the
706-
* solution is imposible (i.e. has negative positions).
712+
* @brief Solves the linear system of equations using the connectivity
713+
* matrix (A), the constant vector (b), and a guess for the solution.
707714
*/
708-
void store_solution_into_placement(Eigen::VectorXd& x_soln,
709-
Eigen::VectorXd& y_soln,
710-
PartialPlacement& p_placement);
715+
Eigen::VectorXd solve_linear_system(Eigen::SparseMatrix<double> &A,
716+
Eigen::VectorXd &b,
717+
Eigen::VectorXd &guess);
718+
719+
/**
720+
* @brief Store the solutions from the linear system into the partial
721+
* placement object for the given dimension.
722+
*
723+
* Note: The dim_soln may be modified if it is found that the solution is
724+
* imposible (e.g. has negative positions).
725+
*
726+
* @param dim_soln
727+
* The solution of the linear system for a given dimension.
728+
* @param block_dim_locs
729+
* The block locations in the partial placement for the dimension.
730+
* @param dim_max_pos
731+
* The maximum position allowed for the dimension. For example, for the
732+
* x-dimension, this would be the width of the device. This is used to
733+
* ensure that the positions do not go off device.
734+
*/
735+
void store_solution_into_placement(Eigen::VectorXd &dim_soln,
736+
vtr::vector<APBlockId, double> &block_dim_locs,
737+
double dim_max_pos);
738+
739+
/**
740+
* @brief Does the FPGA that the AP flow is currently targeting have more
741+
* than one die. Having multiple dies would imply that the solver
742+
* needs to add another dimension to solve for.
743+
*/
744+
inline bool is_multi_die() const {
745+
return device_grid_num_layers_ > 1;
746+
}
711747

712748
// The following are variables used to store the system of equations to be
713749
// solved in the x and y dimensions. The equations are of the form:
@@ -720,22 +756,28 @@ class B2BSolver : public AnalyticalSolver {
720756
Eigen::SparseMatrix<double> A_sparse_x;
721757
/// @brief The coefficient / connectivity matrix for the y dimension.
722758
Eigen::SparseMatrix<double> A_sparse_y;
759+
/// @brief The coefficient / connectivity matrix for the z dimension (layer dimension).
760+
Eigen::SparseMatrix<double> A_sparse_z;
723761
/// @brief The constant vector in the x dimension.
724762
Eigen::VectorXd b_x;
725763
/// @brief The constant vector in the y dimension.
726764
Eigen::VectorXd b_y;
765+
/// @brief The constant vector in the z dimension (layer dimension).
766+
Eigen::VectorXd b_z;
727767

728768
// The following is the solution of the previous iteration of this solver.
729769
// They are updated at the end of solve() and are used as the starting point
730770
// for the next call to solve.
731771
vtr::vector<APBlockId, double> block_x_locs_solved;
732772
vtr::vector<APBlockId, double> block_y_locs_solved;
773+
vtr::vector<APBlockId, double> block_z_locs_solved;
733774

734775
// The following are the legalized solution coming into the analytical solver
735776
// (other than the first iteration). These are stored to be used as anchor
736777
// blocks during the solver.
737778
vtr::vector<APBlockId, double> block_x_locs_legalized;
738779
vtr::vector<APBlockId, double> block_y_locs_legalized;
780+
vtr::vector<APBlockId, double> block_z_locs_legalized;
739781

740782
/// @brief The total number of CG iterations that this solver has performed
741783
/// so far. This can be a useful metric for the amount of work the

vpr/src/place/initial_placement.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -653,21 +653,23 @@ static t_flat_pl_loc find_centroid_loc_from_flat_placement(const t_pl_macro& pl_
653653
// and save the closest of all regions.
654654
t_flat_pl_loc best_projected_pos = centroid;
655655
float best_distance = std::numeric_limits<float>::max();
656-
VTR_ASSERT_MSG(centroid.layer == 0,
657-
"3D FPGAs not supported for this part of the code yet");
658656
for (const Region& region : head_pr.get_regions()) {
659657
const vtr::Rect<int>& rect = region.get_rect();
660658
// Note: We add 0.999 here since the partition region is in grid
661659
// space, so it treats tile positions as having size 0x0 when
662660
// they really are 1x1.
663661
float proj_x = std::clamp<float>(centroid.x, rect.xmin(), rect.xmax() + 0.999);
664662
float proj_y = std::clamp<float>(centroid.y, rect.ymin(), rect.ymax() + 0.999);
663+
float proj_layer = std::clamp<float>(centroid.layer, region.get_layer_range().first,
664+
region.get_layer_range().second + 0.999);
665665
float dx = std::abs(proj_x - centroid.x);
666666
float dy = std::abs(proj_y - centroid.y);
667-
float dist = dx + dy;
667+
float dlayer = std::abs(proj_layer - centroid.layer);
668+
float dist = dx + dy + dlayer;
668669
if (dist < best_distance) {
669670
best_projected_pos.x = proj_x;
670671
best_projected_pos.y = proj_y;
672+
best_projected_pos.layer = proj_layer;
671673
best_distance = dist;
672674
}
673675
}

vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/config.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ circuit_list_add=mm9b.blif
2727
circuit_list_add=styr.blif
2828
circuit_list_add=s953.blif
2929

30+
# Constrain the IOs
31+
# TODO: Should create a unique config file that tests fixed blocks for 3D AP.
32+
# - For now, just add one so we can test the solver effectively.
33+
circuit_constraint_list_add=(mm9a.blif, constraints=../../../../constraints/mm9a_io_constraint.xml)
34+
3035
# Parse info and how to parse
3136
parse_file=vpr_fixed_chan_width.txt
3237

@@ -42,4 +47,6 @@ script_params_common=-starting_stage vpr -track_memory_usage --analytical_place
4247
script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none
4348
# Force unrelated clustering on.
4449
script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none --allow_unrelated_clustering on
50+
# Test that the solver will work with 3D
51+
script_params_list_add=--ap_partial_legalizer none --allow_unrelated_clustering on
4552

0 commit comments

Comments
 (0)