Skip to content

Commit

Permalink
refs #3, #6: Unify system parameter values and their limits.
Browse files Browse the repository at this point in the history
 * This prevents potential bugs due to mismatch of actual system
   parameter values (given by the configs) and their limits (used by
   data structures).  I had a problem with COPROC_PPDEPTH value: it
   was set 64 while its limit was 32, and thus offloading never
   happened!
  • Loading branch information
achimnol committed May 21, 2015
1 parent a91b0fd commit d2c8184
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 60 deletions.
4 changes: 2 additions & 2 deletions elements/ipsec/IPsecDatablocks.hh
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ public:
assert(!has_pending_data);

#ifdef DEBUG
memset(&block_info[0], 0xcc, sizeof(struct aes_block_info) * NBA_MAX_COMPBATCH_SIZE * (NBA_MAX_PACKET_SIZE / AES_BLOCK_SIZE));
memset(&block_info[0], 0xcc, sizeof(struct aes_block_info) * NBA_MAX_COMP_BATCH_SIZE * (NBA_MAX_PACKET_SIZE / AES_BLOCK_SIZE));
#endif

for (unsigned p = 0; p < batch->count; ++p) {
Expand Down Expand Up @@ -247,7 +247,7 @@ private:
bool has_pending_data;

size_t global_block_cnt;
struct aes_block_info block_info[NBA_MAX_COMPBATCH_SIZE * (NBA_MAX_PACKET_SIZE / AES_BLOCK_SIZE)];
struct aes_block_info block_info[NBA_MAX_COMP_BATCH_SIZE * (NBA_MAX_PACKET_SIZE / AES_BLOCK_SIZE)];
};

}
Expand Down
3 changes: 0 additions & 3 deletions lib/computation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,7 @@ comp_thread_context::comp_thread_context() {
num_combatch_size = 0;
num_batchpool_size = 0;
num_taskpool_size = 0;
num_comp_ppdepth = 0;
num_coproc_ppdepth = 0;
rx_queue_size = 0;
rx_wakeup_threshold = 0;

batch_pool = nullptr;
task_pool = nullptr;
Expand Down
37 changes: 19 additions & 18 deletions lib/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -537,24 +537,25 @@ bool load_config(const char *pyfilename)
if (p_sys_params == NULL)
goto exit_load_config;

#define LOAD_PARAM(name, defval) system_params.insert({{name, pymap_getlong(p_sys_params, name, defval)}})
LOAD_PARAM("IO_BATCH_SIZE", 64); // to use vPMD
LOAD_PARAM("IO_RXDESC_PER_HWRXQ", 1024); // to use vPMD
LOAD_PARAM("IO_TXDESC_PER_HWTXQ", 1024);

LOAD_PARAM("COMP_BATCH_SIZE", 64);
LOAD_PARAM("COMP_PPDEPTH", 32); // unused
LOAD_PARAM("COMP_RXQ_LENGTH", 2048); // unused
LOAD_PARAM("COMP_RXQ_THRES", 256); // unused
LOAD_PARAM("COMP_PREPKTQ_LENGTH", 32);

LOAD_PARAM("COPROC_PPDEPTH", 64);
LOAD_PARAM("COPROC_INPUTQ_LENGTH", 64);
LOAD_PARAM("COPROC_COMPLETIONQ_LENGTH", 64);
LOAD_PARAM("COPROC_CTX_PER_COMPTHREAD", 1);

LOAD_PARAM("TASKPOOL_SIZE", 256);
LOAD_PARAM("BATCHPOOL_SIZE", 512);
#define LOAD_PARAM(name, defval) { \
long val = pymap_getlong(p_sys_params, #name, defval); \
assert(val <= NBA_MAX_ ## name); \
system_params.insert({{#name, val}}); \
}
LOAD_PARAM(IO_BATCH_SIZE, 64);
LOAD_PARAM(IO_DESC_PER_HWRXQ, 1024);
LOAD_PARAM(IO_DESC_PER_HWTXQ, 1024);

LOAD_PARAM(COMP_BATCH_SIZE, 64);
LOAD_PARAM(COMP_PREPKTQ_LENGTH, 32);

LOAD_PARAM(COPROC_PPDEPTH, 64);
LOAD_PARAM(COPROC_INPUTQ_LENGTH, 64);
LOAD_PARAM(COPROC_COMPLETIONQ_LENGTH, 64);
LOAD_PARAM(COPROC_CTX_PER_COMPTHREAD, 1);

LOAD_PARAM(TASKPOOL_SIZE, 256);
LOAD_PARAM(BATCHPOOL_SIZE, 512);
#undef LOAD_PARAM

/* Retrieve io thread configurations. */
Expand Down
20 changes: 15 additions & 5 deletions lib/config.hh
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,32 @@
#define NBA_MAX_QUEUES_PER_PORT (128)
#define NBA_MAX_COPROCESSORS (2) // Max number of coprocessor devices
#define NBA_MAX_COPROCESSOR_TYPES (1) // Max number of device types

#define NBA_MAX_PACKET_SIZE (2048)
#ifdef NBA_NO_HUGE
#define NBA_MAX_IOBATCH_SIZE (4u)
#define NBA_MAX_COMPBATCH_SIZE (4u)
#else
#define NBA_MAX_IOBATCH_SIZE (256u)
#define NBA_MAX_COMPBATCH_SIZE (256u)
#define NBA_MAX_IO_BATCH_SIZE (256u)
#define NBA_MAX_COMP_BATCH_SIZE (256u)
#endif
#define NBA_MAX_SW_RXRING_LENGTH (2048u)
#define NBA_MAX_COMP_PPDEPTH (256u)
#define NBA_MAX_COPROC_PPDEPTH (32u)
#define NBA_MAX_COMP_PREPKTQ_LENGTH (256u)
#define NBA_MAX_IO_DESC_PER_HWRXQ (1024)
#define NBA_MAX_IO_DESC_PER_HWTXQ (1024)

#define NBA_MAX_COPROC_PPDEPTH (64u)
#define NBA_MAX_COPROC_INPUTQ_LENGTH (64)
#define NBA_MAX_COPROC_COMPLETIONQ_LENGTH (64)
#define NBA_MAX_COPROC_CTX_PER_COMPTHREAD (1)

#define NBA_MAX_TASKPOOL_SIZE (2048u)
#define NBA_MAX_BATCHPOOL_SIZE (2048u)

#define NBA_MAX_ANNOTATION_SET_SIZE (7)
#define NBA_MAX_NODELOCALSTORAGE_ENTRIES (16)
#define NBA_MAX_KERNEL_OVERLAP (8)
#define NBA_MAX_DATABLOCKS (12) // If too large (e.g., 64), batch_pool can not be allocated.

#define NBA_OQ (true) // Use output-queuing semantics when possible.
#define NBA_CPU_MICROBENCH // Enable support for PAPI library for microbenchmarks.

Expand Down
8 changes: 4 additions & 4 deletions lib/datablock.hh
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,17 @@ struct write_roi_info {
struct item_size_info {
union {
uint16_t size;
uint16_t sizes[NBA_MAX_COMPBATCH_SIZE * 12];
uint16_t sizes[NBA_MAX_COMP_BATCH_SIZE * 12];
};
uint16_t offsets[NBA_MAX_COMPBATCH_SIZE * 12];
uint16_t offsets[NBA_MAX_COMP_BATCH_SIZE * 12];
};
#else
struct item_size_info {
union {
uint16_t size;
uint16_t sizes[NBA_MAX_COMPBATCH_SIZE * 96];
uint16_t sizes[NBA_MAX_COMP_BATCH_SIZE * 96];
};
uint16_t offsets[NBA_MAX_COMPBATCH_SIZE * 96];
uint16_t offsets[NBA_MAX_COMP_BATCH_SIZE * 96];
};
#endif

Expand Down
2 changes: 0 additions & 2 deletions lib/elementgraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port)
OffloadableElement *offloadable = dynamic_cast<OffloadableElement*>(current_elem);
assert(offloadable != nullptr);
if (lb_decision != -1) {

/* Get or initialize the task object.
* This step is always executed for every input batch
* passing every offloadable element. */
Expand Down Expand Up @@ -276,7 +275,6 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port)

} else {
/* If not offloaded, run the element's CPU-version handler. */

batch_disposition = current_elem->_process_batch(input_port, batch);
double _cpu_end = rte_rdtsc();
batch->compute_time += (_cpu_end - _cpu_start);
Expand Down
16 changes: 9 additions & 7 deletions lib/io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ struct rx_state {
#ifdef TEST_MINIMAL_L2FWD
struct packet_batch {
unsigned count;
struct rte_mbuf *pkts[NBA_MAX_COMPBATCH_SIZE];
struct rte_mbuf *pkts[NBA_MAX_COMP_BATCH_SIZE];
};
#endif

Expand Down Expand Up @@ -371,6 +371,7 @@ static void io_local_stat_timer_cb(struct ev_loop *loop, struct ev_timer *watche
ctx->tx_pkt_thruput += ctx->port_stats[j].num_sent_pkts;
memset(&ctx->port_stats[j], 0, sizeof(struct io_port_stat));
}
#ifdef NBA_CPU_MICROBENCH
char buf[2048];
char *bufp = &buf[0];
for (int e = 0; e < 5; e++) {
Expand All @@ -380,6 +381,7 @@ static void io_local_stat_timer_cb(struct ev_loop *loop, struct ev_timer *watche
memset(ctx->papi_ctr_rx, 0, sizeof(long long) * 5);
memset(ctx->papi_ctr_tx, 0, sizeof(long long) * 5);
memset(ctx->papi_ctr_comp, 0, sizeof(long long) * 5);
#endif
/* Inform the master to check updates. */
rte_atomic16_inc(ctx->node_master_flag);
ev_async_send(ctx->node_master_ctx->loop, ctx->node_stat_watcher);
Expand Down Expand Up @@ -693,8 +695,8 @@ int io_loop(void *arg)

// the way numa index numbered for each cpu core is checked in main(). (see 'is_numa_idx_grouped' in main())
const unsigned num_nodes = numa_num_configured_nodes();
struct rte_mbuf *pkts[NBA_MAX_IOBATCH_SIZE * NBA_MAX_QUEUES_PER_PORT];
struct rte_mbuf *drop_pkts[NBA_MAX_IOBATCH_SIZE];
struct rte_mbuf *pkts[NBA_MAX_IO_BATCH_SIZE * NBA_MAX_QUEUES_PER_PORT];
struct rte_mbuf *drop_pkts[NBA_MAX_IO_BATCH_SIZE];
struct timespec sleep_ts;
unsigned i, j;
char temp[1024];
Expand Down Expand Up @@ -764,7 +766,7 @@ int io_loop(void *arg)
snprintf(temp, RTE_MEMPOOL_NAMESIZE,
"comp.batch.%u:%u@%u", ctx->loc.node_id, ctx->loc.local_thread_idx, ctx->loc.core_id);
ctx->comp_ctx->batch_pool = rte_mempool_create(temp, ctx->comp_ctx->num_batchpool_size + 1,
sizeof(PacketBatch), CACHE_LINE_SIZE,
sizeof(PacketBatch), 0, //(unsigned) (ctx->comp_ctx->num_batchpool_size / 1.5),
0, nullptr, nullptr,
comp_packetbatch_init, nullptr,
ctx->loc.node_id, 0);
Expand All @@ -776,7 +778,7 @@ int io_loop(void *arg)
size_t dbstate_pool_size = NBA_MAX_COPROC_PPDEPTH;
size_t dbstate_item_size = sizeof(struct datablock_tracker) * NBA_MAX_DATABLOCKS;
ctx->comp_ctx->dbstate_pool = rte_mempool_create(temp, dbstate_pool_size + 1,
dbstate_item_size, CACHE_LINE_SIZE,
dbstate_item_size, 0, //(unsigned) (dbstate_pool_size / 1.5),
0, nullptr, nullptr,
comp_dbstate_init, nullptr,
ctx->loc.node_id, 0);
Expand All @@ -788,7 +790,7 @@ int io_loop(void *arg)
snprintf(temp, RTE_MEMPOOL_NAMESIZE,
"comp.task.%u:%u@%u", ctx->loc.node_id, ctx->loc.local_thread_idx, ctx->loc.core_id);
ctx->comp_ctx->task_pool = rte_mempool_create(temp, ctx->comp_ctx->num_taskpool_size + 1,
sizeof(OffloadTask), CACHE_LINE_SIZE,
sizeof(OffloadTask), 0, //(unsigned) (ctx->comp_ctx->num_taskpool_size / 1.5),
0, nullptr, nullptr,
comp_task_init, nullptr,
ctx->loc.node_id, 0);
Expand Down Expand Up @@ -1054,7 +1056,7 @@ int io_loop(void *arg)
prev_tsc = cur_tsc;

} // end of rxq scanning
assert(total_recv_cnt <= NBA_MAX_IOBATCH_SIZE * NBA_MAX_COMP_PPDEPTH);
assert(total_recv_cnt <= NBA_MAX_IO_BATCH_SIZE * ctx->num_hw_rx_queues);
#ifdef NBA_CPU_MICROBENCH
{
long long ctr[5];
Expand Down
12 changes: 6 additions & 6 deletions lib/packetbatch.hh
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ public:
delay_start(0), compute_time(0)
{
#ifdef DEBUG
memset(&results[0], 0xdd, sizeof(int) * NBA_MAX_COMPBATCH_SIZE);
memset(&excluded[0], 0xcc, sizeof(bool) * NBA_MAX_COMPBATCH_SIZE);
memset(&packets[0], 0xbb, sizeof(struct rte_mbuf*) * NBA_MAX_COMPBATCH_SIZE);
memset(&results[0], 0xdd, sizeof(int) * NBA_MAX_COMP_BATCH_SIZE);
memset(&excluded[0], 0xcc, sizeof(bool) * NBA_MAX_COMP_BATCH_SIZE);
memset(&packets[0], 0xbb, sizeof(struct rte_mbuf*) * NBA_MAX_COMP_BATCH_SIZE);
#endif
}

Expand All @@ -50,9 +50,9 @@ public:
double compute_time;

struct annotation_set banno __rte_cache_aligned; /** Batch-level annotations. */
bool excluded[NBA_MAX_COMPBATCH_SIZE] __rte_cache_aligned;
struct rte_mbuf *packets[NBA_MAX_COMPBATCH_SIZE] __rte_cache_aligned;
int results[NBA_MAX_COMPBATCH_SIZE] __rte_cache_aligned;
bool excluded[NBA_MAX_COMP_BATCH_SIZE] __rte_cache_aligned;
struct rte_mbuf *packets[NBA_MAX_COMP_BATCH_SIZE] __rte_cache_aligned;
int results[NBA_MAX_COMP_BATCH_SIZE] __rte_cache_aligned;
};

}
Expand Down
3 changes: 0 additions & 3 deletions lib/types.hh
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,11 @@ public:
struct core_location loc;
unsigned num_tx_ports;
unsigned num_nodes;
unsigned num_comp_ppdepth;
unsigned num_coproc_ppdepth;
unsigned num_combatch_size;
unsigned num_batchpool_size;
unsigned num_taskpool_size;
unsigned task_completion_queue_size;
unsigned rx_queue_size;
unsigned rx_wakeup_threshold;

struct rte_mempool *batch_pool;
struct rte_mempool *dbstate_pool;
Expand Down
17 changes: 7 additions & 10 deletions main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ int main(int argc, char **argv)
rx_conf.rx_thresh.wthresh = 4;
rx_conf.rx_free_thresh = 32;
rx_conf.rx_drop_en = 0; /* when enabled, drop packets if no descriptors are available */
const unsigned num_rx_desc = system_params["IO_RXDESC_PER_HWRXQ"];
const unsigned num_rx_desc = system_params["IO_DESC_PER_HWRXQ"];

/* Per TX-queue configuration */
struct rte_eth_txconf tx_conf;
Expand All @@ -370,7 +370,7 @@ int main(int argc, char **argv)
tx_conf.tx_rs_thresh = 32;
tx_conf.tx_free_thresh = 0; /* use PMD default value */
tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS;
const unsigned num_tx_desc = system_params["IO_TXDESC_PER_HWTXQ"];
const unsigned num_tx_desc = system_params["IO_DESC_PER_HWTXQ"];

/* According to dpdk-dev mailing list,
* num_mbufs for the whole system should be greater than:
Expand Down Expand Up @@ -499,7 +499,7 @@ int main(int argc, char **argv)
unsigned queue_length = 0;
switch (conf.template_) {
case SWRXQ:
queue_length = system_params["COMP_RXQ_LENGTH"];
queue_length = 32; // FIXME: unsued
break;
case TASKINQ:
queue_length = system_params["COPROC_INPUTQ_LENGTH"];
Expand Down Expand Up @@ -536,7 +536,7 @@ int main(int argc, char **argv)

/* Some sanity checks... */
if (emulate_io) {
long expected_inflight_batches = num_mbufs / num_io_threads / system_params["COMP_PPDEPTH"];
long expected_inflight_batches = num_mbufs / num_io_threads / system_params["COMP_BATCH_SIZE"];
RTE_LOG(DEBUG, MAIN, "coproc_ppdepth = %ld, max.# in-flight batches per IO thread = %ld\n",
system_params["COPROC_PPDEPTH"], expected_inflight_batches);
//if (system_params["COPROC_PPDEPTH"] > expected_inflight_batches) {
Expand Down Expand Up @@ -717,9 +717,6 @@ int main(int argc, char **argv)
ctx->inspector = NULL;

ctx->num_combatch_size = system_params["COMP_BATCH_SIZE"];
ctx->rx_queue_size = system_params["COMP_RXQ_LENGTH"];
ctx->rx_wakeup_threshold = system_params["COMP_RXQ_THRES"];
ctx->num_comp_ppdepth = system_params["COMP_PPDEPTH"];
ctx->num_coproc_ppdepth = system_params["COPROC_PPDEPTH"];
ctx->num_batchpool_size = system_params["BATCHPOOL_SIZE"];
ctx->num_taskpool_size = system_params["TASKPOOL_SIZE"];
Expand Down Expand Up @@ -947,18 +944,18 @@ int main(int argc, char **argv)
*/
snprintf(ring_name, RTE_RING_NAMESIZE, "dropq.%u:%u@%u",
ctx->loc.node_id, ctx->loc.local_thread_idx, ctx->loc.core_id);
ctx->drop_queue = rte_ring_create(ring_name, 8 * NBA_MAX_COMPBATCH_SIZE,
ctx->drop_queue = rte_ring_create(ring_name, 8 * NBA_MAX_COMP_BATCH_SIZE,
node_id, RING_F_SC_DEQ);
assert(NULL != ctx->drop_queue);

ctx->num_tx_ports = num_ports;
for (k = 0; k < num_ports; k++) {
snprintf(ring_name, RTE_RING_NAMESIZE, "txq%u.%u:%u@%u",
k, ctx->loc.node_id, ctx->loc.local_thread_idx, ctx->loc.core_id);
ctx->tx_queues[k] = rte_ring_create(ring_name, 8 * NBA_MAX_COMPBATCH_SIZE,
ctx->tx_queues[k] = rte_ring_create(ring_name, 8 * NBA_MAX_COMP_BATCH_SIZE,
node_id, RING_F_SC_DEQ);
assert(NULL != ctx->tx_queues[k]);
assert(0 == rte_ring_set_water_mark(ctx->tx_queues[k], (8 * NBA_MAX_COMPBATCH_SIZE) - 16));
assert(0 == rte_ring_set_water_mark(ctx->tx_queues[k], (8 * NBA_MAX_COMP_BATCH_SIZE) - 16));
}

snprintf(ring_name, RTE_RING_NAMESIZE, "reqring.%u:%u@%u",
Expand Down

0 comments on commit d2c8184

Please sign in to comment.