diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index eea8bd70a..45f0b2d0a 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -17,6 +17,7 @@ set(BASE_SOURCES class/parsec_value_array.c class/parsec_hash_table.c class/parsec_rwlock.c + class/parsec_rbtree.c class/parsec_future.c class/parsec_datacopy_future.c class/info.c @@ -323,6 +324,7 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/class/list.h ${CMAKE_CURRENT_SOURCE_DIR}/class/info.h ${CMAKE_CURRENT_SOURCE_DIR}/class/parsec_future.h + ${CMAKE_CURRENT_SOURCE_DIR}/class/parsec_rbtree.h DESTINATION ${PARSEC_INSTALL_INCLUDEDIR}/parsec/class) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/scheduling.h diff --git a/parsec/class/parsec_rbtree.c b/parsec/class/parsec_rbtree.c new file mode 100644 index 000000000..2b4ad5def --- /dev/null +++ b/parsec/class/parsec_rbtree.c @@ -0,0 +1,346 @@ +#include + +#include "parsec/parsec_config.h" + +#include "parsec/class/parsec_rbtree.h" +#include "parsec/constants.h" + +static inline parsec_rbtree_node_t** left_node(parsec_rbtree_node_t* node) { + return (parsec_rbtree_node_t**)&node->super.list_prev; +} + +static inline parsec_rbtree_node_t** right_node(parsec_rbtree_node_t* node) { + return (parsec_rbtree_node_t**)&node->super.list_next; +} + +/* left and right children, using list pointers */ +#define LEFT(node) (*left_node(node)) +#define RIGHT(node) (*right_node(node)) + +/** + * The list_item object instance. + */ +static inline void +parsec_rbtree_node_construct( parsec_rbtree_node_t* item ) +{ + item->color = PARSEC_RBTREE_BLACK; +} + +PARSEC_OBJ_CLASS_INSTANCE(parsec_rbtree_node_t, parsec_list_item_t, + parsec_rbtree_node_construct, NULL); + +void parsec_rbtree_init(parsec_rbtree_t* tree, size_t offset) { + PARSEC_OBJ_CONSTRUCT(&tree->nil_element, parsec_rbtree_node_t); + tree->nil = &tree->nil_element; + tree->root = tree->nil; + tree->comp_offset = offset; +} + +void parsec_rbtree_fini(parsec_rbtree_t* tree) { + PARSEC_OBJ_DESTRUCT(&tree->nil_element); + tree->nil = NULL; + tree->root = NULL; + tree->comp_offset = 0; +} + +static inline +void parsec_rbtree_left_rotate(parsec_rbtree_t *tree, parsec_rbtree_node_t *x) { + parsec_rbtree_node_t *y = RIGHT(x); + RIGHT(x) = LEFT(y); + if (LEFT(y) != tree->nil) { + LEFT(y)->parent = x; + } + y->parent = x->parent; + if (x->parent == tree->nil) { + tree->root = y; + } else if (x == LEFT(x->parent)) { + LEFT(x->parent) = y; + } else { + RIGHT(x->parent) = y; + } + LEFT(y) = x; + x->parent = y; +} + +static inline +void parsec_rbtree_right_rotate(parsec_rbtree_t *tree, parsec_rbtree_node_t *y) { + parsec_rbtree_node_t *x = LEFT(y); + LEFT(y) = RIGHT(x); + if (RIGHT(x) != tree->nil) { + RIGHT(x)->parent = y; + } + x->parent = y->parent; + if (y->parent == tree->nil) { + tree->root = x; + } else if (y == LEFT(y->parent)) { + LEFT(y->parent) = x; + } else { + RIGHT(y->parent) = x; + } + RIGHT(x) = y; + y->parent = x; +} + +static void parsec_rbtree_insert_fixup(parsec_rbtree_t *tree, parsec_rbtree_node_t *z) { + while (z->parent->color == PARSEC_RBTREE_RED) { + if (z->parent == LEFT(z->parent->parent)) { + parsec_rbtree_node_t *y = RIGHT(z->parent->parent); + if (y->color == PARSEC_RBTREE_RED) { + z->parent->color = PARSEC_RBTREE_BLACK; + y->color = PARSEC_RBTREE_BLACK; + z->parent->parent->color = PARSEC_RBTREE_RED; + z = z->parent->parent; + } else { + if (z == RIGHT(z->parent)) { + z = z->parent; + parsec_rbtree_left_rotate(tree, z); + } + z->parent->color = PARSEC_RBTREE_BLACK; + z->parent->parent->color = PARSEC_RBTREE_RED; + parsec_rbtree_right_rotate(tree, z->parent->parent); + } + } else { + parsec_rbtree_node_t *y = LEFT(z->parent->parent); + if (y->color == PARSEC_RBTREE_RED) { + z->parent->color = PARSEC_RBTREE_BLACK; + y->color = PARSEC_RBTREE_BLACK; + z->parent->parent->color = PARSEC_RBTREE_RED; + z = z->parent->parent; + } else { + if (z == LEFT(z->parent)) { + z = z->parent; + parsec_rbtree_right_rotate(tree, z); + } + z->parent->color = PARSEC_RBTREE_BLACK; + z->parent->parent->color = PARSEC_RBTREE_RED; + parsec_rbtree_left_rotate(tree, z->parent->parent); + } + } + } + tree->root->color = PARSEC_RBTREE_BLACK; +} + +void parsec_rbtree_insert(parsec_rbtree_t *tree, parsec_rbtree_node_t *node) { + node->color = PARSEC_RBTREE_RED; + node->parent = tree->nil; + parsec_rbtree_node_t *z = node; + parsec_rbtree_node_t *y = tree->nil; + parsec_rbtree_node_t *x = tree->root; + while (x != tree->nil) { + y = x; + if (A_LOWER_PRIORITY_THAN_B(z, x, tree->comp_offset)) { + x = LEFT(x); + } else { + x = RIGHT(x); + } + } + z->parent = y; + if (y == tree->nil) { + tree->root = z; + } else if (A_LOWER_PRIORITY_THAN_B(z, y, tree->comp_offset)) { + LEFT(y) = z; + } else { + RIGHT(y) = z; + } + LEFT(z) = tree->nil; + RIGHT(z) = tree->nil; + z->color = PARSEC_RBTREE_RED; + parsec_rbtree_insert_fixup(tree, z); +} + +static void parsec_rbtree_delete_fixup(parsec_rbtree_t *tree, parsec_rbtree_node_t *x) { + while (x != tree->root && x->color == PARSEC_RBTREE_BLACK) { + if (x == LEFT(x->parent)) { + parsec_rbtree_node_t *w = RIGHT(x->parent); + if (w->color == PARSEC_RBTREE_RED) { + w->color = PARSEC_RBTREE_BLACK; + x->parent->color = PARSEC_RBTREE_RED; + parsec_rbtree_left_rotate(tree, x->parent); + w = RIGHT(x->parent); + } + if (LEFT(w)->color == PARSEC_RBTREE_BLACK && RIGHT(w)->color == PARSEC_RBTREE_BLACK) { + w->color = PARSEC_RBTREE_RED; + x = x->parent; + } else { + if (RIGHT(w)->color == PARSEC_RBTREE_BLACK) { + LEFT(w)->color = PARSEC_RBTREE_BLACK; + w->color = PARSEC_RBTREE_RED; + parsec_rbtree_right_rotate(tree, w); + w = RIGHT(x->parent); + } + w->color = x->parent->color; + x->parent->color = PARSEC_RBTREE_BLACK; + RIGHT(w)->color = PARSEC_RBTREE_BLACK; + parsec_rbtree_left_rotate(tree, x->parent); + x = tree->root; + } + } else { + parsec_rbtree_node_t *w = LEFT(x->parent); + if (w->color == PARSEC_RBTREE_RED) { + w->color = PARSEC_RBTREE_BLACK; + x->parent->color = PARSEC_RBTREE_RED; + parsec_rbtree_right_rotate(tree, x->parent); + w = LEFT(x->parent); + } + if (RIGHT(w)->color == PARSEC_RBTREE_BLACK && LEFT(w)->color == PARSEC_RBTREE_BLACK) { + w->color = PARSEC_RBTREE_RED; + x = x->parent; + } else { + if (LEFT(w)->color == PARSEC_RBTREE_BLACK) { + RIGHT(w)->color = PARSEC_RBTREE_BLACK; + w->color = PARSEC_RBTREE_RED; + parsec_rbtree_left_rotate(tree, w); + w = LEFT(x->parent); + } + w->color = x->parent->color; + x->parent->color = PARSEC_RBTREE_BLACK; + LEFT(w)->color = PARSEC_RBTREE_BLACK; + parsec_rbtree_right_rotate(tree, x->parent); + x = tree->root; + } + } + } + x->color = PARSEC_RBTREE_BLACK; +} + +static inline void parsec_rbtree_transplant(parsec_rbtree_t *tree, parsec_rbtree_node_t *u, parsec_rbtree_node_t *v) { + if (u->parent == tree->nil) { + tree->root = v; + } else if (u == LEFT(u->parent)) { + LEFT(u->parent) = v; + } else { + RIGHT(u->parent) = v; + } + v->parent = u->parent; +} + +parsec_rbtree_node_t* parsec_rbtree_minimum(parsec_rbtree_t *tree, parsec_rbtree_node_t *x) { + while (LEFT(x) != tree->nil) { + x = LEFT(x); + } + return x; +} + +void parsec_rbtree_remove(parsec_rbtree_t *tree, parsec_rbtree_node_t *z) { + parsec_rbtree_node_t *y = z; + parsec_rbtree_node_t *x; + parsec_rbtree_color_e y_original_color = y->color; + if (LEFT(z) == tree->nil) { + x = RIGHT(z); + parsec_rbtree_transplant(tree, z, RIGHT(z)); + } else if (RIGHT(z) == tree->nil) { + x = LEFT(z); + parsec_rbtree_transplant(tree, z, LEFT(z)); + } else { + y = parsec_rbtree_minimum(tree, RIGHT(z)); + y_original_color = y->color; + x = RIGHT(y); + if (y->parent == z) { + x->parent = y; + } else { + parsec_rbtree_transplant(tree, y, RIGHT(y)); + RIGHT(y) = RIGHT(z); + RIGHT(y)->parent = y; + } + parsec_rbtree_transplant(tree, z, y); + LEFT(y) = LEFT(z); + LEFT(y)->parent = y; + y->color = z->color; + } + if (y_original_color == PARSEC_RBTREE_BLACK) { + parsec_rbtree_delete_fixup(tree, x); + } +} + +parsec_rbtree_node_t* parsec_rbtree_find(parsec_rbtree_t *tree, int data) { + parsec_rbtree_node_t *current = tree->root; + while (current != tree->nil) { + int compval = COMPARISON_VAL(current, tree->comp_offset); + if (compval == data) { + return current; + } else if (compval < data) { + current = RIGHT(current); + } else { + current = LEFT(current); + } + } + return NULL; // data not found +} + +parsec_rbtree_node_t* parsec_rbtree_find_or_larger(parsec_rbtree_t *tree, int data) { + parsec_rbtree_node_t *current = tree->root; + parsec_rbtree_node_t *larger = tree->nil; + while (current != tree->nil) { + int compval = COMPARISON_VAL(current, tree->comp_offset); + if (compval == data) { + return current; + } else if (compval < data) { + current = RIGHT(current); + } else { + larger = current; + current = LEFT(current); + } + } + if (larger == tree->nil) return NULL; + return larger; // data not found +} + + +int parsec_rbtree_update_node(parsec_rbtree_t *tree, parsec_rbtree_node_t *node, int newdata) +{ + bool needs_reinsert = false; + parsec_rbtree_node_t *parent = node->parent; + /* check whether parent and left/right nodes would still be in the right place */ + if (parent != tree->nil) { + if (LEFT(parent) == node && COMPARISON_VAL(parent, tree->comp_offset) <= newdata) { + if (COMPARISON_VAL(parent, tree->comp_offset) == newdata) { + return PARSEC_ERR_EXISTS; + } + needs_reinsert = true; // node grew past the parent + } else if (RIGHT(parent) == node && COMPARISON_VAL(parent, tree->comp_offset) > newdata) { + /* no need to check for equality again here */ + needs_reinsert = true; // node shrunk past the parent + } else if (LEFT(node) != tree->nil && COMPARISON_VAL(LEFT(node), tree->comp_offset) >= newdata) { + if (COMPARISON_VAL(LEFT(node), tree->comp_offset) == newdata) { + return PARSEC_ERR_EXISTS; + } + needs_reinsert = true; // node shrunk past its left child + } else if (RIGHT(node) != tree->nil && COMPARISON_VAL(RIGHT(node), tree->comp_offset) < newdata) { + if (COMPARISON_VAL(RIGHT(node), tree->comp_offset) == newdata) { + return PARSEC_ERR_EXISTS; + } + needs_reinsert = true; // node grew past its right child + } + + if (needs_reinsert) { + /* remove and reinsert to ensure balancing */ + parsec_rbtree_remove(tree, node); + COMPARISON_VAL(node, tree->comp_offset) = newdata; + parsec_rbtree_insert(tree, node); + } else { + /* simply update the node value */ + COMPARISON_VAL(node, tree->comp_offset) = newdata; + } + } + + return PARSEC_SUCCESS; +} + +static void parsec_rbtree_foreach_node( + parsec_rbtree_t* tree, + parsec_rbtree_node_t* node, + parsec_rbtree_visitor_cb *fn, + void *cbdata) +{ + if (tree->nil != node) { + parsec_rbtree_foreach_node(tree, LEFT(node), fn, cbdata); + fn(node, cbdata); + parsec_rbtree_foreach_node(tree, RIGHT(node), fn, cbdata); + } +} + +void parsec_rbtree_foreach(parsec_rbtree_t *tree, parsec_rbtree_visitor_cb *fn, void *cbdata) { + if (NULL != tree) { + parsec_rbtree_foreach_node(tree, tree->root, fn, cbdata); + } +} \ No newline at end of file diff --git a/parsec/class/parsec_rbtree.h b/parsec/class/parsec_rbtree.h new file mode 100644 index 000000000..c25a0f9b9 --- /dev/null +++ b/parsec/class/parsec_rbtree.h @@ -0,0 +1,44 @@ +#ifndef PARSEC_RBTREE_H +#define PARSEC_RBTREE_H + +#include "parsec/class/list_item.h" + + +typedef enum parsec_rbtree_color_e { PARSEC_RBTREE_RED, PARSEC_RBTREE_BLACK } parsec_rbtree_color_e; + +typedef struct parsec_rbtree_node_t { + parsec_list_item_t super; // use prev/next for left/right + parsec_rbtree_color_e color; + struct parsec_rbtree_node_t *parent; +} parsec_rbtree_node_t; + +PARSEC_DECLSPEC PARSEC_OBJ_CLASS_DECLARATION(parsec_rbtree_node_t); + +typedef struct parsec_rbtree_t { + parsec_rbtree_node_t nil_element; + parsec_rbtree_node_t *root; + parsec_rbtree_node_t *nil; + size_t comp_offset; +} parsec_rbtree_t; + +typedef void (parsec_rbtree_visitor_cb)(parsec_rbtree_node_t*, void*); + +void parsec_rbtree_init(parsec_rbtree_t *tree, size_t compare_offset); + +void parsec_rbtree_fini(parsec_rbtree_t* tree); + +void parsec_rbtree_insert(parsec_rbtree_t *tree, parsec_rbtree_node_t *node); + +parsec_rbtree_node_t* parsec_rbtree_minimum(parsec_rbtree_t *tree, parsec_rbtree_node_t *x); + +void parsec_rbtree_remove(parsec_rbtree_t *tree, parsec_rbtree_node_t *z); + +parsec_rbtree_node_t* parsec_rbtree_find(parsec_rbtree_t *tree, int data); + +parsec_rbtree_node_t* parsec_rbtree_find_or_larger(parsec_rbtree_t *tree, int data); + +int parsec_rbtree_update_node(parsec_rbtree_t *tree, parsec_rbtree_node_t *node, int newdata); + +void parsec_rbtree_foreach(parsec_rbtree_t *tree, parsec_rbtree_visitor_cb *fn, void *cbdata); + +#endif // PARSEC_RBTREE_H \ No newline at end of file diff --git a/parsec/utils/zone_malloc.c b/parsec/utils/zone_malloc.c index 81dad5ccd..3c6f3d00f 100644 --- a/parsec/utils/zone_malloc.c +++ b/parsec/utils/zone_malloc.c @@ -8,8 +8,41 @@ #include "parsec/utils/zone_malloc.h" #include "parsec/utils/debug.h" +#include "parsec/class/list.h" +#include "parsec/class/parsec_rbtree.h" + #include +typedef struct zone_malloc_chunk_list_t { + parsec_rbtree_node_t super; + parsec_list_t list; /* list of free segments of specific size */ + int nb_units; /* size of free segments */ +} zone_malloc_chunk_list_t; + +static inline void +zone_malloc_chunk_list_construct( zone_malloc_chunk_list_t* item ) +{ + item->nb_units = 0; + PARSEC_OBJ_CONSTRUCT(&item->list, parsec_list_t); +} + +PARSEC_OBJ_CLASS_INSTANCE(zone_malloc_chunk_list_t, parsec_rbtree_node_t, + zone_malloc_chunk_list_construct, NULL); + +static zone_malloc_chunk_list_t* allocate_chunk_list(zone_malloc_t *gdata, int nb_units) { + zone_malloc_chunk_list_t* fl; + /* add a new segment */ + if (!parsec_lifo_is_empty(&gdata->rbtree_free_list)) { + fl = (zone_malloc_chunk_list_t*)parsec_lifo_pop(&gdata->rbtree_free_list); + } else { + /* allocate new */ + fl = PARSEC_OBJ_NEW(zone_malloc_chunk_list_t); + } + PARSEC_LIST_ITEM_SINGLETON(&fl->super.super); + fl->nb_units = nb_units; + return fl; +} + static inline void zone_malloc_error(const char *msg) { fprintf(stderr, "%s", msg); @@ -41,16 +74,22 @@ zone_malloc_t* zone_malloc_init(void* base_ptr, int _max_segment, size_t _unit_s gdata->next_tid = 0; gdata->segments = (segment_t *)malloc(sizeof(segment_t) * _max_segment); parsec_atomic_lock_init(&gdata->lock); -#if defined(PARSEC_DEBUG) + parsec_rbtree_init(&gdata->rbtree, offsetof(zone_malloc_chunk_list_t, nb_units)); + PARSEC_OBJ_CONSTRUCT(&gdata->rbtree_free_list, parsec_lifo_t); for(int i = 0; i < _max_segment; i++) { SEGMENT_AT_TID(gdata, i)->status = SEGMENT_UNDEFINED; + PARSEC_OBJ_CONSTRUCT(&SEGMENT_AT_TID(gdata, i)->super, parsec_list_item_t); } -#endif /* defined(PARSEC_DEBUG) */ head = SEGMENT_AT_TID(gdata, 0); head->status = SEGMENT_EMPTY; head->nb_units = _max_segment; head->nb_prev = 1; /**< This is to force SEGMENT_OF_TID( 0 - prev ) to return NULL */ + zone_malloc_chunk_list_t *fl = PARSEC_OBJ_NEW(zone_malloc_chunk_list_t); + fl->nb_units = head->nb_units; + parsec_list_push_back(&fl->list, &head->super); + parsec_rbtree_insert(&gdata->rbtree, &fl->super); + return gdata; } @@ -63,6 +102,8 @@ void* zone_malloc_fini(zone_malloc_t** gdata) (*gdata)->max_segment = 0; (*gdata)->unit_size = 0; (*gdata)->base = NULL; + + parsec_rbtree_fini(&(*gdata)->rbtree); free(*gdata); *gdata = NULL; return base_ptr; @@ -72,52 +113,81 @@ void *zone_malloc(zone_malloc_t *gdata, size_t size) { segment_t *current_segment, *next_segment, *new_segment; int next_tid, current_tid, new_tid; - int cycled_through = 0, nb_units; + int nb_units; + zone_malloc_chunk_list_t* fl; - parsec_atomic_lock(&gdata->lock); - /* Let's start with the last remembered free slot */ - current_tid = gdata->next_tid; nb_units = (size + gdata->unit_size - 1) / gdata->unit_size; - do { - current_segment = SEGMENT_AT_TID(gdata, current_tid); - if( NULL == current_segment ) { - /* Maybe there is a free slot in the beginning. Let's cycle at least once before we bail out */ - if( 0 != cycled_through ) break; - current_tid = 0; - cycled_through = 1; - current_segment = SEGMENT_AT_TID(gdata, current_tid); - } - - if( current_segment->status == SEGMENT_EMPTY && current_segment->nb_units >= nb_units ) { - current_segment->status = SEGMENT_FULL; - if( current_segment->nb_units > nb_units ) { - next_tid = current_tid + current_segment->nb_units; + if (nb_units == 0) { + return NULL; + } - next_segment = SEGMENT_AT_TID(gdata, next_tid); - if( NULL != next_segment ) - next_segment->nb_prev -= nb_units; + parsec_atomic_lock(&gdata->lock); + /* try to find the smallest possible element, or one size larger */ + fl = (zone_malloc_chunk_list_t*) parsec_rbtree_find_or_larger(&gdata->rbtree, nb_units); - new_tid = current_tid + nb_units; - new_segment = SEGMENT_AT_TID(gdata, new_tid); - new_segment->status = SEGMENT_EMPTY; - new_segment->nb_prev = nb_units; - new_segment->nb_units = current_segment->nb_units - nb_units; + if (NULL == fl) { + /* no segment found */ + parsec_atomic_unlock(&gdata->lock); + return NULL; + } - /* new_tid is a free slot, remember for next malloc */ - gdata->next_tid = new_tid; + current_segment = (segment_t *)parsec_list_nolock_pop_front(&fl->list); + assert(current_segment->nb_units >= nb_units); + current_segment->status = SEGMENT_FULL; + if (parsec_list_nolock_is_empty(&fl->list)) { + /* empty chunk list, remove */ + parsec_rbtree_remove(&gdata->rbtree, &fl->super); + PARSEC_LIST_ITEM_SINGLETON(&fl->super.super); + /* put into free list */ + parsec_lifo_nolock_push(&gdata->rbtree_free_list, &fl->super.super); + } + current_tid = (current_segment - gdata->segments); + if (current_segment->nb_units > nb_units) { + /* segment must be split */ + next_tid = current_tid + current_segment->nb_units; - current_segment->nb_units = nb_units; - } - parsec_atomic_unlock(&gdata->lock); - return (void*)(gdata->base + (current_tid * gdata->unit_size)); + /* get the following segment and put it into the rbtree */ + next_segment = SEGMENT_AT_TID(gdata, next_tid); + if( NULL != next_segment ) { + next_segment->nb_prev -= nb_units; } - current_tid += current_segment->nb_units; - } while( current_tid != gdata->next_tid ); + new_tid = current_tid + nb_units; + new_segment = SEGMENT_AT_TID(gdata, new_tid); + new_segment->status = SEGMENT_EMPTY; + new_segment->nb_prev = nb_units; + new_segment->nb_units = current_segment->nb_units - nb_units; + + fl = (zone_malloc_chunk_list_t*)parsec_rbtree_find(&gdata->rbtree, new_segment->nb_units); + if (fl == NULL) { + /* create new chunk list and insert into rbtree */ + fl = allocate_chunk_list(gdata, new_segment->nb_units); + parsec_rbtree_insert(&gdata->rbtree, &fl->super); + } + parsec_list_nolock_push_front(&fl->list, &new_segment->super); + /* reduce size of current segment */ + current_segment->nb_units = nb_units; + } + /* found segment of right size, done */ parsec_atomic_unlock(&gdata->lock); - return NULL; + return (void*)(gdata->base + (current_tid * gdata->unit_size)); +} + +static void remove_segment_from_rbtree(zone_malloc_t *gdata, segment_t *current_segment) { + zone_malloc_chunk_list_t *fl; + /* find chunk list */ + fl = (zone_malloc_chunk_list_t*)parsec_rbtree_find(&gdata->rbtree, current_segment->nb_units); + assert(fl != NULL); + /* remove from list */ + parsec_list_nolock_remove(&fl->list, ¤t_segment->super); + if (parsec_list_nolock_is_empty(&fl->list)) { + /* empty chunk list, remove */ + parsec_rbtree_remove(&gdata->rbtree, &fl->super); + /* put into free list */ + parsec_lifo_nolock_push(&gdata->rbtree_free_list, &fl->super.super); + } } void zone_free(zone_malloc_t *gdata, void *add) @@ -125,6 +195,7 @@ void zone_free(zone_malloc_t *gdata, void *add) segment_t *current_segment, *next_segment, *prev_segment; int current_tid, next_tid, prev_tid; off_t offset; + zone_malloc_chunk_list_t *fl; parsec_atomic_lock(&gdata->lock); offset = (char*)add -gdata->base; @@ -144,6 +215,7 @@ void zone_free(zone_malloc_t *gdata, void *add) return; } + /* check if we can merge segments */ current_segment->status = SEGMENT_EMPTY; prev_tid = current_tid - current_segment->nb_prev; @@ -152,7 +224,8 @@ void zone_free(zone_malloc_t *gdata, void *add) next_tid = current_tid + current_segment->nb_units; next_segment = SEGMENT_AT_TID(gdata, next_tid); - if( NULL != prev_segment && prev_segment->status == SEGMENT_EMPTY ) { + if (NULL != prev_segment && prev_segment->status == SEGMENT_EMPTY) { + remove_segment_from_rbtree(gdata, prev_segment); /* We can merge prev and current */ if( NULL != next_segment ) { next_segment->nb_prev += prev_segment->nb_units; @@ -164,10 +237,8 @@ void zone_free(zone_malloc_t *gdata, void *add) current_tid = prev_tid; } - /* current_tid is a free slot, remember for next malloc */ - gdata->next_tid = current_tid; - - if( NULL != next_segment && next_segment->status == SEGMENT_EMPTY ) { + if (NULL != next_segment && next_segment->status == SEGMENT_EMPTY) { + remove_segment_from_rbtree(gdata, next_segment); /* We can merge current and next */ next_tid += next_segment->nb_units; current_segment->nb_units += next_segment->nb_units; @@ -176,6 +247,16 @@ void zone_free(zone_malloc_t *gdata, void *add) next_segment->nb_prev = current_segment->nb_units; } } + + /* add the chunk into the RB tree */ + fl = (zone_malloc_chunk_list_t*)parsec_rbtree_find(&gdata->rbtree, current_segment->nb_units); + if (fl == NULL) { + /* no chunk list, create a new entry */ + fl = allocate_chunk_list(gdata, current_segment->nb_units); + parsec_rbtree_insert(&gdata->rbtree, &fl->super); + } + assert(fl != NULL); + parsec_list_nolock_push_front(&fl->list, ¤t_segment->super); parsec_atomic_unlock(&gdata->lock); } @@ -185,6 +266,7 @@ size_t zone_in_use(zone_malloc_t *gdata) segment_t *current_segment; int current_tid; parsec_atomic_lock(&gdata->lock); + /* check segments */ for(current_tid = 0; (current_segment = SEGMENT_AT_TID(gdata, current_tid)) != NULL; current_tid += current_segment->nb_units) { @@ -196,6 +278,19 @@ size_t zone_in_use(zone_malloc_t *gdata) return ret; } +typedef struct zone_malloc_rbtree_debug_t { + int level, output_id; + const char* prefix; +} zone_malloc_rbtree_debug_t; + +static void zone_rbtree_cb(parsec_rbtree_node_t *node, void *data) { + zone_malloc_chunk_list_t *fl = (zone_malloc_chunk_list_t*)node; + zone_malloc_rbtree_debug_t* info = (zone_malloc_rbtree_debug_t*)data; + int len = 0; + PARSEC_LIST_NOLOCK_ITERATOR(&fl->list, iter, (void)iter; ++len; ); + parsec_debug_verbose(info->level, info->output_id, "%srbtree node: %d segments a %d units", + info->prefix, len, fl->nb_units); +} size_t zone_debug(zone_malloc_t *gdata, int level, int output_id, const char *prefix) { @@ -224,6 +319,10 @@ size_t zone_debug(zone_malloc_t *gdata, int level, int output_id, const char *pr gdata->base + (current_tid+current_segment->nb_units) * gdata->unit_size - 1); } } + zone_malloc_rbtree_debug_t info = {level, output_id, prefix}; + parsec_rbtree_foreach(&gdata->rbtree, zone_rbtree_cb, &info); + parsec_atomic_unlock(&gdata->lock); + return ret; } diff --git a/parsec/utils/zone_malloc.h b/parsec/utils/zone_malloc.h index 3529834e0..4a4adfc0a 100644 --- a/parsec/utils/zone_malloc.h +++ b/parsec/utils/zone_malloc.h @@ -10,6 +10,9 @@ #include "parsec/parsec_config.h" #include "parsec/sys/atomic.h" +#include "parsec/class/parsec_rbtree.h" +#include "parsec/class/lifo.h" + #include #include @@ -20,18 +23,21 @@ BEGIN_C_DECLS #define SEGMENT_UNDEFINED 3 typedef struct segment { + parsec_list_item_t super; int status; /* True if this segment is full, false if it is free */ - int32_t nb_units; /* Number of units on this segment */ - int32_t nb_prev; /* Number of units on the segment before */ + int nb_units; /* Number of units on this segment */ + int nb_prev; /* Number of units on the segment before */ } segment_t; typedef struct zone_malloc_s { char *base; /* Base pointer */ - segment_t *segments; /* Array of available segments */ + segment_t *segments; /* Array of segments */ size_t unit_size; /* Basic Unit */ int max_segment; /* Maximum number of segment */ int next_tid; /* Next TID to look at for a malloc */ parsec_atomic_lock_t lock; + parsec_rbtree_t rbtree; /* RB tree tracking chunks of free segments */ + parsec_lifo_t rbtree_free_list; } zone_malloc_t; diff --git a/tests/runtime/cuda/CMakeLists.txt b/tests/runtime/cuda/CMakeLists.txt index fbb5a5022..a8a0f7f27 100644 --- a/tests/runtime/cuda/CMakeLists.txt +++ b/tests/runtime/cuda/CMakeLists.txt @@ -26,3 +26,6 @@ if(PARSEC_HAVE_CUDA) target_include_directories(testing_get_best_device PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(testing_get_best_device PRIVATE "get_best_device_check.jdf") endif(PARSEC_HAVE_CUDA) + +parsec_addtest_executable(C zonemalloc SOURCES zonemalloc.c) + diff --git a/tests/runtime/cuda/zonemalloc.c b/tests/runtime/cuda/zonemalloc.c new file mode 100644 index 000000000..a97246d92 --- /dev/null +++ b/tests/runtime/cuda/zonemalloc.c @@ -0,0 +1,88 @@ +#include "parsec.h" +#include "parsec/utils/zone_malloc.h" + +#define NUM_SEGMENTS 128 + + +int main(int argc, char **argv) { + parsec_context_t *parsec = NULL; + +#if defined(DISTRIBUTED) + { + int provided; + MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); + } +#endif /* DISTRIBUTED */ + + /* Initialize PaRSEC */ + parsec = parsec_init(1, &argc, &argv); + if( NULL == parsec ) { + /* Failed to correctly initialize. In a correct scenario report*/ + /* upstream, but in this particular case bail out.*/ + exit(-1); + } + + char** segments = calloc(NUM_SEGMENTS, sizeof(char*)); + + char *base = malloc(NUM_SEGMENTS*512); + zone_malloc_t *gdata; + gdata = zone_malloc_init(base, NUM_SEGMENTS, 512); + assert(gdata != NULL); + + /* allocate and free all segments same size */ + for (int i = 0; i < NUM_SEGMENTS; ++i) { + segments[i] = zone_malloc(gdata, 512); + assert(segments[i] != NULL); + } + zone_debug(gdata, 0, 0, "sequential alloc: "); + + /* free all segments sequentially */ + for (int i = 0; i < NUM_SEGMENTS; ++i) { + zone_free(gdata, segments[i]); + segments[i] = NULL; + } + zone_debug(gdata, 0, 0, "sequential free: "); + + /* allocate all segments same size */ + for (int i = 0; i < NUM_SEGMENTS; ++i) { + segments[i] = zone_malloc(gdata, 512); + assert(segments[i] != NULL); + } + zone_debug(gdata, 0, 0, "stride alloc: "); + + /* free segments with stride */ + for (int i = 0; i < NUM_SEGMENTS; i += 2) { + zone_free(gdata, segments[i]); + segments[i] = NULL; + } + zone_debug(gdata, 0, 0, "stried free1: "); + for (int i = 1; i < NUM_SEGMENTS; i += 2) { + zone_free(gdata, segments[i]); + segments[i] = NULL; + } + zone_debug(gdata, 0, 0, "stried free2: "); + + /* allocate segments with 2 different sizes (256, 512) */ + for (int i = 0; i < NUM_SEGMENTS; ++i) { + segments[i] = zone_malloc(gdata, 512/((i%2)+1)); + assert(segments[i] != NULL); + } + + /* free them in reverse order */ + for (int i = NUM_SEGMENTS-1; i > 0; i--) { + zone_free(gdata, segments[i]); + segments[i] = NULL; + } + + + + zone_malloc_fini(&gdata); + free(base); + free(segments); + + + parsec_fini(&parsec); +#if defined(DISTRIBUTED) + MPI_Finalize(); +#endif /* DISTRIBUTED */ +} \ No newline at end of file