Skip to content

Commit

Permalink
Merge pull request #174 from ROSS-org/release-7.2.1
Browse files Browse the repository at this point in the history
Release 7.2.1
- Includes Random Cleanup (#169)
  • Loading branch information
gonsie authored Sep 29, 2020
2 parents 5eb95cf + 80204c0 commit a2b6bfc
Show file tree
Hide file tree
Showing 12 changed files with 94 additions and 108 deletions.
17 changes: 5 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,11 @@ if (COVERALLS)
coveralls_turn_on_coverage()
endif()

# Old way to do this
# Data Structure for Unprocessed Event List
#SET(QUEUE calendar) #Calendar Queue
#SET(QUEUE heap) #Push/Down Heap
#SET(QUEUE splay) #Splay Tree
#SET(QUEUE kp_splay) #Splay Tree in KPs

# New way as of CMake 2.8
# The default value for the QUEUE variable is splay
# The other options are presented at config time by cmake-gui
SET(QUEUE splay CACHE STRING "Queue type chosen by the user at configure time")
SET_PROPERTY(CACHE QUEUE PROPERTY STRINGS splay calendar heap kp_splay)
# Priority Queue Implementation
SET(QUEUE splay)
# Other queue implementations are no longer supported.
# SET(QUEUE splay CACHE STRING "Queue type chosen by the user at configure time")
# SET_PROPERTY(CACHE QUEUE PROPERTY STRINGS splay calendar heap kp_splay)

# Random Library
SET(RAND clcg4)
Expand Down
3 changes: 1 addition & 2 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ tw-timing.c

tw-sched.c
tw-setup.c
tw-signal.c
tw-stats.c
tw-util.c

Expand Down Expand Up @@ -94,7 +93,7 @@ set(VERSION_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")

# Data Structure for remote Events
# If AVL_TREE is OFF, ROSS reverts to hashing
OPTION(AVL_TREE "Use AVL trees for optimistic mode events? (hash tabels otherwise)" ON)
OPTION(AVL_TREE "Use AVL trees for optimistic mode events? (hash tables otherwise)" ON)
IF(AVL_TREE)
SET(ross_srcs ${ross_srcs} avl_tree.h avl_tree.c)
ENDIF(AVL_TREE)
Expand Down
3 changes: 1 addition & 2 deletions core/gvt/mpi_allreduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ static const tw_optdef gvt_opts [] =
{
TWOPT_GROUP("ROSS MPI GVT"),
TWOPT_UINT("gvt-interval", g_tw_gvt_interval, "GVT Interval: Iterations through scheduling loop (synch=1,2,3,4), or ms between GVTs (synch=5)"),
TWOPT_DOUBLE("report-interval", gvt_print_interval,
"percent of runtime to print GVT"),
TWOPT_DOUBLE("report-interval", gvt_print_interval, "percent of runtime to print GVT"),
TWOPT_END()
};

Expand Down
49 changes: 19 additions & 30 deletions core/network-mpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@ int custom_communicator = 0;
/**
* @struct act_q
* @brief Keeps track of posted send or recv operations.
*
* This list structure is used *only* by the network mpi layer (this
* file). Within this file, two lists are used, for MPI Irecv and
* Isend requests. The MPI requests and statusus are linked with an
* event buffer through this struct.
*/
struct act_q
{
const char *name;
const char *name; /**< name of the list, used in error printouts */

tw_event **event_list; /**< list of event pointers in this queue */
MPI_Request *req_list; /**< list of MPI request handles */
Expand All @@ -27,8 +32,8 @@ static struct act_q posted_sends;
static struct act_q posted_recvs;
static tw_eventq outq;

static unsigned int read_buffer = 16;
static unsigned int send_buffer = 1024;
static unsigned int read_buffer = 16; /**< Number of Irecv's to buffer, length of posted_recvs queue */
static unsigned int send_buffer = 1024; /**< Number of Isend's to buffer, length of posted_sends queue */
static int world_size = 1;

static const tw_optdef mpi_opts[] = {
Expand Down Expand Up @@ -85,20 +90,13 @@ tw_net_init(int *argc, char ***argv)
* @param[in] name name of the queue
*/
static void
init_q(struct act_q *q, const char *name)
init_q(struct act_q *q, const char *name, unsigned int size)
{
unsigned int n;

if(q == &posted_sends)
n = send_buffer;
else
n = read_buffer;

q->name = name;
q->event_list = (tw_event **) tw_calloc(TW_LOC, name, sizeof(*q->event_list), n);
q->req_list = (MPI_Request *) tw_calloc(TW_LOC, name, sizeof(*q->req_list), n);
q->idx_list = (int *) tw_calloc(TW_LOC, name, sizeof(*q->idx_list), n);
q->status_list = (MPI_Status *) tw_calloc(TW_LOC, name, sizeof(*q->status_list), n);
q->event_list = (tw_event **) tw_calloc(TW_LOC, name, sizeof(tw_event *), size);
q->req_list = (MPI_Request *) tw_calloc(TW_LOC, name, sizeof(MPI_Request), size);
q->idx_list = (int *) tw_calloc(TW_LOC, name, sizeof(int), size);
q->status_list = (MPI_Status *) tw_calloc(TW_LOC, name, sizeof(MPI_Status), size);
}

unsigned int
Expand All @@ -110,6 +108,7 @@ tw_nnodes(void)
void
tw_net_start(void)
{
// sets value of tw_nnodes
if (MPI_Comm_size(MPI_COMM_ROSS, &world_size) != MPI_SUCCESS)
tw_error(TW_LOC, "Cannot get MPI_Comm_size(MPI_COMM_ROSS)");

Expand All @@ -131,22 +130,12 @@ tw_net_start(void)

tw_pe_init();

//If we're in (some variation of) optimistic mode, we need this hash
if (g_tw_synchronization_protocol == OPTIMISTIC ||
g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
g_tw_synchronization_protocol == OPTIMISTIC_REALTIME) {
g_tw_pe->hash_t = tw_hash_create();
} else {
g_tw_pe->hash_t = NULL;
}

if (send_buffer < 1)
tw_error(TW_LOC, "network send buffer must be >= 1");
if (read_buffer < 1)
tw_error(TW_LOC, "network read buffer must be >= 1");
// these values are command line options
if (send_buffer < 1) tw_error(TW_LOC, "network send buffer must be >= 1");
if (read_buffer < 1) tw_error(TW_LOC, "network read buffer must be >= 1");

init_q(&posted_sends, "MPI send queue");
init_q(&posted_recvs, "MPI recv queue");
init_q(&posted_sends, "MPI send queue", send_buffer);
init_q(&posted_recvs, "MPI recv queue", read_buffer);

g_tw_net_device_size = read_buffer;

Expand Down
1 change: 0 additions & 1 deletion core/queue/tw-queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ unsigned int tw_pq_max_size(tw_pq *);
#ifdef ROSS_QUEUE_kp_splay
tw_eventpq * tw_eventpq_create(void);
#endif

6 changes: 0 additions & 6 deletions core/ross-extern.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,6 @@ extern void tw_scheduler_optimistic(tw_pe * me);
extern void tw_scheduler_optimistic_debug(tw_pe * me);
extern void tw_scheduler_optimistic_realtime(tw_pe * me);

/*
* tw-signal.c
*/
extern void tw_sigsegv(int sig);
extern void tw_sigterm(int sig);

/*
* tw-state.c
*/
Expand Down
1 change: 0 additions & 1 deletion core/ross-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,6 @@ struct tw_pe {
tw_eventq event_q; /**< @brief Linked list of events sent to this PE */
tw_event *cancel_q; /**< @brief List of canceled events */
tw_pq *pq; /**< @brief Priority queue used to sort events */
tw_kp *kp_list; /**< @brief */

tw_eventq free_q; /**< @brief Linked list of free tw_events */
tw_event *abort_event; /**< @brief Placeholder event for when free_q is empty */
Expand Down
2 changes: 1 addition & 1 deletion core/tw-event.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void tw_event_send(tw_event * event) {
#endif

// moved from network-mpi.c in order to give all events a seq_num
event->event_id = (tw_eventid) ++send_pe->seq_num;
event->event_id = (tw_eventid) ++send_pe->seq_num;

// call LP remote mapping function to get dest_pe
dest_peid = (*src_lp->type->map) ((tw_lpid) event->dest_lp);
Expand Down
46 changes: 44 additions & 2 deletions core/tw-eventq.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

#include <ross.h>

/**
* debug assitant fuction
*/
static inline void
tw_eventq_debug(tw_eventq * q)
{
Expand Down Expand Up @@ -39,6 +42,9 @@ tw_eventq_debug(tw_eventq * q)
#endif
}

/**
* push the contents of one list onto another??
*/
static inline void
tw_eventq_push_list(tw_eventq * q, tw_event * h, tw_event * t, long cnt)
{
Expand Down Expand Up @@ -115,6 +121,15 @@ tw_eventq_push_list(tw_eventq * q, tw_event * h, tw_event * t, long cnt)
tw_eventq_debug(q);
}

/**
* Given a list, move the portion of its contents that is older than GVT to
* the free list.
*
* Assumptions:
* - The provided q is not the free_q
* - The head of the list has the maximum time stamp in the list. Therefore,
* if the head is older than GVT, everything in the list is as well.
*/
static inline void
tw_eventq_fossil_collect(tw_eventq *q, tw_pe *pe)
{
Expand Down Expand Up @@ -164,6 +179,9 @@ tw_eventq_fossil_collect(tw_eventq *q, tw_pe *pe)
}
}

/**
* allocate a events into a given tw_eventq
*/
static inline void
tw_eventq_alloc(tw_eventq * q, unsigned int cnt)
{
Expand Down Expand Up @@ -198,7 +216,6 @@ tw_eventq_alloc(tw_eventq * q, unsigned int cnt)
g_tw_event_msg_sz = event_len;

// compute number of events needed for the network.
g_tw_gvt_threshold = (int) ceil(g_tw_net_device_size / g_tw_event_msg_sz);
g_tw_gvt_threshold = g_tw_net_device_size;
g_tw_events_per_pe += g_tw_gvt_threshold;
cnt += g_tw_gvt_threshold;
Expand Down Expand Up @@ -233,6 +250,9 @@ tw_eventq_alloc(tw_eventq * q, unsigned int cnt)
q->tail = event;
}

/**
* push to tail of list
*/
static inline void
tw_eventq_push(tw_eventq *q, tw_event *e)
{
Expand All @@ -253,12 +273,18 @@ tw_eventq_push(tw_eventq *q, tw_event *e)
tw_eventq_debug(q);
}

/**
* peek to tail of list
*/
static inline tw_event *
tw_eventq_peek(tw_eventq *q)
{
return q->tail;
}

/**
* pop to tail of list
*/
static inline tw_event *
tw_eventq_pop(tw_eventq * q)
{
Expand Down Expand Up @@ -287,6 +313,9 @@ tw_eventq_pop(tw_eventq * q)
return t;
}

/**
* push to head of list
*/
static inline void
tw_eventq_unshift(tw_eventq *q, tw_event *e)
{
Expand All @@ -308,12 +337,18 @@ tw_eventq_unshift(tw_eventq *q, tw_event *e)
tw_eventq_debug(q);
}

/**
* peek at head of list
*/
static inline tw_event *
tw_eventq_peek_head(tw_eventq *q)
{
return q->head;
}

/**
* pop from head of list
*/
static inline tw_event *
tw_eventq_shift(tw_eventq *q)
{
Expand Down Expand Up @@ -342,6 +377,9 @@ tw_eventq_shift(tw_eventq *q)
return h;
}

/**
* delete an event from anywhere in the list
*/
static inline void
tw_eventq_delete_any(tw_eventq *q, tw_event *e)
{
Expand All @@ -366,6 +404,10 @@ tw_eventq_delete_any(tw_eventq *q, tw_event *e)
tw_eventq_debug(q);
}

/**
* pop the entire list.
* After this operation, the size of the provided q is 0.
*/
static inline tw_event *
tw_eventq_pop_list(tw_eventq * q)
{
Expand All @@ -377,7 +419,7 @@ tw_eventq_pop_list(tw_eventq * q)
return h;
}

/*
/**
* The purpose of this function is to be able to remove some
* part of a list.. could be all of list, from head to some inner
* buffer, or from some inner buffer to tail. I only care about the
Expand Down
32 changes: 21 additions & 11 deletions core/tw-pe.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,29 +22,39 @@ tw_pe_settype(const tw_petype * type)
#undef copy_pef
}

/*
* tw_pe_init: initialize individual PE structs
/**
* initialize individual PE structs
*
* must be called after tw_nnodes / MPI world size is set.
*
*/
void
tw_pe_init(void)
{
if (g_tw_pe)
tw_error(TW_LOC, "PE %u already initialized", g_tw_mynode);
if (g_tw_pe) tw_error(TW_LOC, "PE %u already initialized", g_tw_mynode);

g_tw_pe = (tw_pe*)tw_calloc(TW_LOC, "PE Struct", sizeof(*g_tw_pe), 1);
tw_petype no_type;

memset(&no_type, 0, sizeof(no_type));
g_tw_pe->id = g_tw_mynode;

g_tw_pe->id = g_tw_mynode;
tw_pe_settype(&no_type);
tw_petype no_type;
memset(&no_type, 0, sizeof(no_type));
tw_pe_settype(&no_type);

g_tw_pe->trans_msg_ts = TW_STIME_MAX;
g_tw_pe->gvt_status = 0;
g_tw_pe->trans_msg_ts = DBL_MAX;
g_tw_pe->gvt_status = 0;

// TODO is the PE RNG ever actually used?
g_tw_pe->rng = tw_rand_init(31, 41);
g_tw_pe->rng = tw_rand_init(31, 41);

//If we're in (some variation of) optimistic mode, we need this hash
if (g_tw_synchronization_protocol == OPTIMISTIC ||
g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
g_tw_synchronization_protocol == OPTIMISTIC_REALTIME) {
g_tw_pe->hash_t = tw_hash_create();
} else {
g_tw_pe->hash_t = NULL;
}

}

Expand Down
4 changes: 2 additions & 2 deletions core/tw-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ static inline void reset_bitfields(tw_event *revent)
memset(&revent->cv, 0, sizeof(revent->cv));
}

/*
/**
* Get all events out of my event queue and spin them out into
* the priority queue so they can be processed in time stamp
* order.
Expand Down Expand Up @@ -66,7 +66,7 @@ static void tw_sched_event_q(tw_pe * me) {
}
}

/*
/**
* OPT: need to link events into canq in reverse order so
* that when we rollback the 1st event, we should not
* need to do any further rollbacks.
Expand Down
Loading

0 comments on commit a2b6bfc

Please sign in to comment.