diff --git a/resrc/Makefile.am b/resrc/Makefile.am index feb97c436..bfd8b4a48 100644 --- a/resrc/Makefile.am +++ b/resrc/Makefile.am @@ -8,9 +8,9 @@ SUBDIRS = . test noinst_LTLIBRARIES = libflux-resrc.la -noinst_HEADERS = resrc.h resrc_tree.h resrc_flow.h resrc_reqst.h +noinst_HEADERS = resrc.h resrc_tree.h resrc_flow.h resrc_reqst.h planner.h -libflux_resrc_la_SOURCES = resrc.c resrc_tree.c resrc_flow.c resrc_reqst.c +libflux_resrc_la_SOURCES = resrc.c resrc_tree.c resrc_flow.c resrc_reqst.c planner.c libflux_resrc_la_CFLAGS = $(AM_CFLAGS) -I$(top_srcdir)/rdl libflux_resrc_la_LIBADD = $(top_builddir)/rdl/libflux-rdl.la \ $(top_builddir)/src/common/libutil/libutil.la \ diff --git a/resrc/planner.c b/resrc/planner.c new file mode 100644 index 000000000..dbb726ee5 --- /dev/null +++ b/resrc/planner.c @@ -0,0 +1,1389 @@ +/*****************************************************************************\ + * Copyright (c) 2014 Lawrence Livermore National Security, LLC. Produced at + * the Lawrence Livermore National Laboratory (cf, AUTHORS, DISCLAIMER.LLNS). + * LLNL-CODE-658032 All rights reserved. + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the license, or (at your option) + * any later version. + * + * Flux is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * See also: http://www.gnu.org/licenses/ +\*****************************************************************************/ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include "src/common/libutil/xzmalloc.h" +#include "src/common/libutil/rbtree.h" +#include "src/common/libutil/rbtree_augmented.h" +#include "src/common/libutil/interval_tree_generic.h" +#include "planner.h" + +#define DEBUG_PLANNER 0 +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) +#define FREE_NOREF_POINT(rsv) do { \ + if ((rsv)->start_p->ref_count == 0) { \ + free ((rsv)->start_p); \ + (rsv)->start_p = NULL; \ + } \ + if ((rsv)->last_p->ref_count == 0) { \ + free ((rsv)->last_p); \ + (rsv)->last_p = NULL; \ + } \ +} while (false); + +#define OUT_OF_RANGE(A,B,C) (((A)-(B)) >= (C)? 1: 0) + +typedef int64_t resrc_vector_t[MAX_RESRC_DIM]; +typedef char * rtype_vector_t[MAX_RESRC_DIM]; +typedef struct rb_root rb_root_t; +typedef struct rb_node rb_node_t; + +/* Scheduled point: a time at which resource state changes. Each point's resource + * requirements are tracked as a node in a min-time resource (MTR) binary search + * tree -- MAX_RESRC_DIM dimensional. + */ +typedef struct scheduled_point { + rb_node_t point_rb; /* BST node for scheduled point search */ + rb_node_t resrc_rb; /* Min time resource BST node */ + int64_t __subtree_min; /* Min time of the subtree of this node */ + int64_t at; /* Resource-state changing time */ + int inserted_to_resrc; /* 1 when this point is inserted in min-time tree */ + int new_point; /* 1 when this point is newly created */ + int ref_count; /* reference counter */ + resrc_vector_t scheduled_resrcs; /* scheduled resources at this point */ + resrc_vector_t remaining_resrcs; /* remaining resources (available) */ +} scheduled_point_t; + +/* Reservation: a node in a reservation tree (interval tree) to enable fast + * retrieval of intercepting reservations. + */ +struct reservation { + rb_node_t resv_rb; /* RB node for reservation interval tree */ + int64_t start; /* start time of the reservation */ + int64_t last; /* end time of the reservation */ + int64_t __subtree_last; /* maximum end time of my subtree */ + int64_t resv_id; /* unique reservation id */ + resrc_vector_t reserved_resrcs; /* required resources */ + size_t resrc_dim; /* vector size of required resources */ + int added; /* added to the reservation interval tree */ + struct scheduled_point *start_p; /* scheduled point object at start */ + struct scheduled_point *last_p; /* scheduled point object at last */ +}; + +/* Planner context + */ +struct planner { + resrc_vector_t total_resrc_vector; /* total resources avail for planning */ + rtype_vector_t resrc_type_vector; /* array of resrc type strings */ + size_t resrc_dim; /* size of the above vector */ + int64_t plan_start; /* begin of the planning span */ + int64_t plan_end; /* end of the planning span */ + zhash_t *avail_time_iter; /* tracking points temporarily deleted from MTR */ + req_t *avail_time_iter_req;/* the req copy for avail time iteration */ + int avail_time_iter_set; /* iterator set */ + scheduled_point_t *p1; /* system's scheduled point at t0*/ + zhashx_t *r_lookup; /* reservation look up table */ + rb_root_t reservations_root; /* resource interval tree */ + rb_root_t scheduled_points_root; /* scheduled points red black BST */ + rb_root_t scheduled_resrcs_root; /* minimum time resource BST */ +}; + + +/******************************************************************************* + * * + * INTERNAL PLANNER API * + * * + *******************************************************************************/ + +/******************************************************************************* + * Scheduled Points Binary Search Tree * + * Efficient Searching of Scheduled Points * + *******************************************************************************/ +static scheduled_point_t *scheduled_point_search (int64_t t, struct rb_root *root) +{ + rb_node_t *node = root->rb_node; + while (node) { + scheduled_point_t *this_data = NULL; + this_data = container_of(node, scheduled_point_t, point_rb); + int64_t result = t - this_data->at; + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this_data; + } + return NULL; +} + +static inline scheduled_point_t *recent_state (scheduled_point_t *new_data, + scheduled_point_t *old_data) +{ + if (!old_data) + return new_data; + return (new_data->at > old_data->at)? new_data : old_data; +} + +/* while scheduled_point_search returns the exact match + * scheduled_point_state returns the most recent scheduled point + * -- which represents the resource state at the time t. + */ +static scheduled_point_t *scheduled_point_state (int64_t t, struct rb_root *root) +{ + scheduled_point_t *last_state = NULL; + rb_node_t *node = root->rb_node; + while (node) { + scheduled_point_t *this_data = NULL; + this_data = container_of(node, scheduled_point_t, point_rb); + int64_t result = t - this_data->at; + if (result < 0) + node = node->rb_left; + else if (result > 0) { + last_state = recent_state (this_data, last_state); + node = node->rb_right; + } + else + return this_data; + } + return last_state; +} + +static int scheduled_point_insert (scheduled_point_t *new_data, rb_root_t *root) +{ + rb_node_t **link = &(root->rb_node); + rb_node_t *parent = NULL; + while (*link) { + scheduled_point_t *this_data = NULL; + this_data = container_of(*link, scheduled_point_t, point_rb); + int64_t result = new_data->at - this_data->at; + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return -1; + } + rb_link_node(&(new_data->point_rb), parent, link); + rb_insert_color(&(new_data->point_rb), root); + return 0; +} + +static int scheduled_point_remove (scheduled_point_t *data, struct rb_root *root) +{ + int rc = -1; + scheduled_point_t *n = scheduled_point_search (data->at, root); + if (n) { + rb_erase (&(n->point_rb), root); + /* Note: this should only remove the node from the scheduled point BST + * and does NOT free memory allocated to the node + */ + rc = 0; + } + return rc; +} + +static void scheduled_points_destroy (rb_node_t *node) +{ + if (node->rb_left) + scheduled_points_destroy (node->rb_left); + if (node->rb_right) + scheduled_points_destroy (node->rb_right); + scheduled_point_t *data = container_of(node, scheduled_point_t, point_rb); + free (data); +} + + +/******************************************************************************* + * Reservation Interval Tree * + * Efficient Intersection Searching * + *******************************************************************************/ +INTERVAL_TREE_DEFINE(struct reservation, resv_rb, int64_t, __subtree_last, + START, LAST,, reservation) + +static void reservations_destroy(rb_node_t *node) +{ + if (node->rb_left) + reservations_destroy (node->rb_left); + if (node->rb_right) + reservations_destroy (node->rb_right); + reservation_t *rsv = container_of(node, reservation_t, resv_rb); + free (rsv); +} + + +/******************************************************************************* + * Min Time Resource Tree * + * Efficient Searching of Earliest Schedulable Points * + *******************************************************************************/ +static inline int64_t scheduled_resrc_subtree_min (scheduled_point_t *point) +{ + int64_t min = point->at; + int64_t subtree_min; + if (point->resrc_rb.rb_left) { + subtree_min = rb_entry(point->resrc_rb.rb_left, + scheduled_point_t, resrc_rb)->__subtree_min; + if (min > subtree_min) + min = subtree_min; + } + if (point->resrc_rb.rb_right) { + subtree_min = rb_entry(point->resrc_rb.rb_right, + scheduled_point_t, resrc_rb)->__subtree_min; + if (min > subtree_min) + min = subtree_min; + } + return min; +} + +static inline void scheduled_resrc_propagate (rb_node_t *rb, rb_node_t *stop) +{ + while (rb != stop) { + scheduled_point_t *point = rb_entry(rb, scheduled_point_t, resrc_rb); + int64_t subtree_min = scheduled_resrc_subtree_min (point); + if (point->__subtree_min == subtree_min) + break; + point->__subtree_min = subtree_min; + rb = rb_parent(&point->resrc_rb); + } +} + +static inline void scheduled_resrc_copy (rb_node_t *rb_old, rb_node_t *rb_new) +{ + scheduled_point_t *o = rb_entry(rb_old, scheduled_point_t, resrc_rb); + scheduled_point_t *n = rb_entry(rb_new, scheduled_point_t, resrc_rb); + n->__subtree_min = o->__subtree_min; +} + +static inline void scheduled_resrc_rotate (rb_node_t *rb_old, rb_node_t *rb_new) +{ + scheduled_point_t *o = rb_entry(rb_old, scheduled_point_t, resrc_rb); + scheduled_point_t *n = rb_entry(rb_new, scheduled_point_t, resrc_rb); + n->__subtree_min = o->__subtree_min; + o->__subtree_min = scheduled_resrc_subtree_min (o); +} + +static const struct rb_augment_callbacks scheduled_resrc_aug_cb = { + scheduled_resrc_propagate, scheduled_resrc_copy, scheduled_resrc_rotate +}; + +static inline int64_t veccmp (resrc_vector_t s1, resrc_vector_t s2, size_t len) +{ + int i = 0; + int less = 0; + int64_t r = 0; + for (i = 0; i < len; ++i) { + if ((r = (int64_t)s1[i] - (int64_t)s2[i]) > 0) + break; + less += r; + } + return (r > 0)? r : less; +} + +static void scheduled_resrc_insert (scheduled_point_t *new_data, rb_root_t *root) +{ + rb_node_t **link = &(root->rb_node); + scheduled_point_t *this_data = NULL; + rb_node_t *parent = NULL; + while (*link) { + this_data = rb_entry(*link, scheduled_point_t, resrc_rb); + parent = *link; + if (this_data->__subtree_min > new_data->at) + this_data->__subtree_min = new_data->at; + int64_t result = 0; + if ((result = veccmp (new_data->remaining_resrcs, + this_data->remaining_resrcs, MAX_RESRC_DIM)) < 0) + link = &(this_data->resrc_rb.rb_left); + else + link = &(this_data->resrc_rb.rb_right); + } + new_data->__subtree_min = new_data->at; + new_data->inserted_to_resrc = 1; + rb_link_node(&(new_data->resrc_rb), parent, link); + rb_insert_augmented(&(new_data->resrc_rb), root, &scheduled_resrc_aug_cb); +} + +static void scheduled_resrc_remove (scheduled_point_t *data, rb_root_t *root) +{ + rb_erase_augmented (&data->resrc_rb, root, &scheduled_resrc_aug_cb); + data->inserted_to_resrc = 0; +} + +static inline int64_t rbranch_mintm (rb_node_t *node) +{ + int64_t mn = INT64_MAX; + rb_node_t *r = node->rb_right; + mn = r? rb_entry(r, scheduled_point_t, resrc_rb)->__subtree_min : mn; + scheduled_point_t *this_data = rb_entry(node, scheduled_point_t, resrc_rb); + return (this_data->at < mn)? this_data->at : mn; +} + +static inline scheduled_point_t *find_mintm_point (rb_node_t *anchor, + int64_t mintm) +{ + if (!anchor) + return NULL; + + scheduled_point_t *this_data = NULL; + this_data = rb_entry(anchor, scheduled_point_t, resrc_rb); + if (this_data->at == mintm) + return this_data; + + rb_node_t *node = anchor->rb_right; + while (node) { + this_data = rb_entry(node, scheduled_point_t, resrc_rb); + if (this_data->at == mintm) + return this_data; + + if (node->rb_left + && rb_entry(node->rb_left, scheduled_point_t, + resrc_rb)->__subtree_min == mintm) + node = node->rb_left; + else + node = node->rb_right; + } + + /* this is an error condition: when an anchor was found, there must be + * a point that meets the requirements. + */ + errno = ENOTSUP; + return NULL; +} + +static inline int64_t find_mintm_anchor (int64_t *rv, rb_root_t *rt, + rb_node_t **anchor_p) +{ + rb_node_t *node = rt->rb_node; + int64_t mintm = INT64_MAX; + int64_t r_mintm = INT64_MAX; + + while (node) { + scheduled_point_t *this_data = NULL; + this_data = rb_entry(node, scheduled_point_t, resrc_rb); + int64_t result = 0; + result = veccmp (rv, this_data->remaining_resrcs, MAX_RESRC_DIM); + if (result <= 0) { + /* visiting node satisfies the resource requirements this means all + * of the nodes at its subtree also satisfies the requirements. Thus, + * rbranch_mintime is the best min time. + */ + r_mintm = rbranch_mintm (node); + if (r_mintm < mintm) { + mintm = r_mintm; + *anchor_p = node; + } + /* next, we should search the left subtree for potentially better + * then current mintm; + */ + node = node->rb_left; + } else { + /* visiting node does not satisfy the resource requirements. This + * means, nothing in its left branch will meet these requirements: + * time to search the right subtree. + */ + node = node->rb_right; + } + } + return mintm; +} + +static scheduled_point_t *scheduled_resrc_mintm (int64_t *rv, rb_root_t *rt) +{ + rb_node_t *anchor = NULL; + int64_t mintm = find_mintm_anchor (rv, rt, &anchor); + return find_mintm_point (anchor, mintm); +} + +#if DEBUG_PLANNER +static void scheduled_resrc_print (rb_root_t *rt) +{ + rb_node_t *node; + int i = 0; + for (node = rb_first(rt); node; node = rb_next(node)) { + i++; + printf("..\n"); + printf("+ at=%ld\n", + rb_entry(node, scheduled_point_t, resrc_rb)->at); + printf("+ __subtree_min=%ld\n", + rb_entry(node, scheduled_point_t, resrc_rb)->__subtree_min); + printf("+ inserted_to_resrc=%d\n", + rb_entry(node, scheduled_point_t, resrc_rb)->inserted_to_resrc); + printf("+ new_point=%d\n", + rb_entry(node, scheduled_point_t, resrc_rb)->new_point); + printf("+ ref_count=%d\n", + rb_entry(node, scheduled_point_t, resrc_rb)->ref_count); + printf("+ scheduled_resrcs[0]=%jd\n", (intmax_t) rb_entry(node, + scheduled_point_t, resrc_rb)->scheduled_resrcs[0]); + printf("+ remaining_resrcs[0]=%ld\n", (intmax_t) rb_entry(node, + scheduled_point_t, resrc_rb)->remaining_resrcs[0]); + } + printf ("SIZE: %d\n", i); + printf ("===============================================================+=\n"); +} +#endif + + +/******************************************************************************* + * Scheduled Point and Resrc Update APIs * + * * + *******************************************************************************/ +static inline int track_points (zhash_t *tracker, struct scheduled_point *point) +{ + /* XXX OPTIMIZATION: Keep track of tracking status to avoid insert */ + /* XXX Use zlist or a new search tree */ + char key[32]; + sprintf (key, "%jd", (intmax_t)point->at); + /* caller will rely on the fact that rc == -1 when key already exists */ + /* don't need to register free */ + return zhash_insert (tracker, key, point); +} + +static inline void restore_track_points (planner_t *ctx, rb_root_t *root) +{ + scheduled_point_t *point = NULL; + zlist_t *keys = zhash_keys (ctx->avail_time_iter); + const char *k = NULL; + for (k = zlist_first (keys); k; k = zlist_next (keys)) { + point = zhash_lookup (ctx->avail_time_iter, k); + scheduled_resrc_insert (point, root); + zhash_delete (ctx->avail_time_iter, k); + } + zlist_destroy (&keys); +} + +static inline int update_scheduled_resrcs (zhash_t *tracker, rb_root_t *rt) +{ + int rc = 0; + const char *k = NULL; + scheduled_point_t *point = NULL; + zlist_t *keys = zhash_keys (tracker); + for (k = zlist_first (keys); k; k = zlist_next (keys)) { + point = zhash_lookup (tracker, k); + if (point->inserted_to_resrc) + scheduled_resrc_remove (point, rt); + if (point->ref_count && !(point->inserted_to_resrc)) + scheduled_resrc_insert (point, rt); + zhash_delete (tracker, k); + } + zlist_destroy (&keys); + return rc; +} + +static inline scheduled_point_t *add_P (planner_t *ctx, int64_t at, + reservation_t *rsv, bool up) +{ + int i = 0; + rb_root_t *rt = &(ctx->scheduled_points_root); + scheduled_point_t *point = NULL; + if (!(point = scheduled_point_search (at, rt))) { + /* If point is not found, we must create a new scheduled point obj */ + point = xzmalloc (sizeof (*point)); + point->at = at; + memset (point->scheduled_resrcs, '\0', sizeof (point->scheduled_resrcs)); + memcpy (point->remaining_resrcs, ctx->total_resrc_vector, + sizeof (point->remaining_resrcs)); + point->inserted_to_resrc = 0; /* not been inserted to resource BST */ + point->new_point = 1; + point->ref_count = 0; + if (scheduled_point_insert (point, rt) < 0) { + /* same key is rejected (should never happen) */ + errno = EKEYREJECTED; + free (point); + point = NULL; + goto done; + } + } + + for (i = 0; up && i < rsv->resrc_dim; ++i) { + point->scheduled_resrcs[i] += rsv->reserved_resrcs[i]; + point->remaining_resrcs[i] -= rsv->reserved_resrcs[i]; + if (point->scheduled_resrcs[i] > ctx->total_resrc_vector[i] + || point->remaining_resrcs[i] < 0) + errno = ERANGE; + } + +done: + return point; +} + +static inline int add_R (planner_t *ctx, reservation_t *rsv, zhash_t *tracker) +{ + rsv->start_p = add_P (ctx, rsv->start, rsv, true); + rsv->last_p = add_P (ctx, rsv->last, rsv, false); + if (rsv->start_p) { + rsv->start_p->ref_count++; + track_points (tracker, rsv->start_p); + } + if (rsv->last_p) { + rsv->last_p->ref_count++; + track_points (tracker, rsv->last_p); + } + return (!rsv->start_p || !rsv->last_p)? -1 : 0; +} + +static inline int sub_R (planner_t *ctx, reservation_t *rsv, zhash_t *tracker) +{ + int rc = 0; + int i = 0; + + if (rsv->start_p) { + rsv->start_p->ref_count--; + track_points (tracker, rsv->start_p); + for (i = 0; i < rsv->resrc_dim; ++i) { + rsv->start_p->scheduled_resrcs[i] -= rsv->reserved_resrcs[i]; + rsv->start_p->remaining_resrcs[i] += rsv->reserved_resrcs[i]; + if (rsv->start_p->scheduled_resrcs[i] < 0 + || rsv->start_p->remaining_resrcs[i] > ctx->total_resrc_vector[i]) { + errno = ERANGE; + rc = -1; + } + } + if (!(rsv->start_p->ref_count)) + scheduled_point_remove (rsv->start_p, &(ctx->scheduled_points_root)); + } + if (rsv->last_p) { + rsv->last_p->ref_count--; + track_points (tracker, rsv->last_p); + if (!(rsv->last_p->ref_count)) + scheduled_point_remove (rsv->last_p, &(ctx->scheduled_points_root)); + } + + return (!rsv->start_p || !rsv->last_p)? -1 : rc; +} + +static inline int add_I (planner_t *ctx, int64_t t, scheduled_point_t *p, + reservation_t *r, zhash_t *tracker, int force) +{ + int rc = 0; + /* interception due to being equal has already been taken care */ + if ((START(r) < (t) && (t) < LAST(r))) { + /* an existing point requires only one update w.r.t. new reservation + * if a new point, it needs to be updated w.r.t. all existing ones (force) + */ + if (track_points (tracker, p) == 0 || force) { + int i = 0; + for (i = 0; i < r->resrc_dim; ++i) { + p->scheduled_resrcs[i] += r->reserved_resrcs[i]; + p->remaining_resrcs[i] -= r->reserved_resrcs[i]; + if (p->scheduled_resrcs[i] > ctx->total_resrc_vector[i] + || p->remaining_resrcs[i] < 0) { + rc = -1; + errno = ERANGE; + } + } + } + } + return rc; +} + +static inline int sub_I (planner_t *ctx, int64_t t, scheduled_point_t *p, + reservation_t *r, zhash_t *tracker) +{ + int rc = 0; + /* interception due to being equal has already been taken care */ + if ((START(r) < (t) && (t) < LAST(r))) { + /* an existing point requires only one update w.r.t. new reservation */ + if (track_points (tracker, p) == 0) { + int i = 0; + for (i = 0; i < r->resrc_dim; ++i) { + p->scheduled_resrcs[i] -= r->reserved_resrcs[i]; + p->remaining_resrcs[i] += r->reserved_resrcs[i]; + if (p->scheduled_resrcs[i] > ctx->total_resrc_vector[i] + || p->remaining_resrcs[i] < 0) { + rc = -1; + errno = ERANGE; + } + } + } + } + return rc; +} + +static inline bool add_Is (planner_t *ctx, reservation_t *r1, + reservation_t *r2, zhash_t *tracker, int force) +{ + return ((add_I (ctx, START(r1), r1->start_p, r2, tracker, + force? r1->start_p->new_point : 0) == 0) + && (add_I (ctx, LAST(r1), r1->last_p, r2, tracker, + force? r1->last_p->new_point : 0) == 0)); + +} + +static inline bool sub_Is (planner_t *ctx, reservation_t *r1, + reservation_t *r2, zhash_t *tracker) +{ + return ((sub_I (ctx, START(r1), r1->start_p, r2, tracker) == 0) + && (sub_I (ctx, LAST(r1), r1->last_p, r2, tracker) == 0)); + +} + +static inline void copy_req (req_t *dest, req_t *src) +{ + dest->duration = src->duration; + dest->vector_dim = src->vector_dim; + size_t s1 = sizeof (*(dest->resrc_vector)) * MAX_RESRC_DIM; + memset (dest->resrc_vector, '\0', s1); + size_t s2 = sizeof (*(src->resrc_vector)) * src->vector_dim; + memcpy (dest->resrc_vector, src->resrc_vector, s2); +} + +static inline int64_t avail_time_internal (planner_t *ctx, req_t *req) +{ + int sat = 0; + int64_t at = -1; + int64_t *rv = NULL; + int64_t *eff_rv = NULL; + scheduled_point_t *p= NULL; + rb_root_t *r = &(ctx->scheduled_resrcs_root); + rv = (int64_t *)req->resrc_vector; + + if (veccmp (rv, ctx->total_resrc_vector, req->vector_dim) > 0) { + errno = ERANGE; + ctx->avail_time_iter_set = 0; + goto done; /* unsatisfiable */ + } + /* zero resource reservation is disallowed; a full resource check enough*/ + eff_rv = (req->exclusive)? ctx->total_resrc_vector : (int64_t *)rv; + + /* retrieve the minimum time when the requsted resources are available */ + while (!sat && (p = scheduled_resrc_mintm (eff_rv, r))) { + rb_node_t *n = rb_next(&(p->point_rb)); + scheduled_point_t *d_chk = NULL; + sat = 1; + /* retrieve the next scheduled point and see if its time overlaps + * with the request. If overlaps, check resource availability. + */ + while ((d_chk = rb_entry(n, scheduled_point_t, point_rb))) { + if (OUT_OF_RANGE(d_chk->at, p->at, req->duration)) + break; + else { + int64_t result; + result = veccmp (eff_rv, d_chk->remaining_resrcs, req->vector_dim); + if (result > 0) { + scheduled_resrc_remove (p, r); + track_points (ctx->avail_time_iter, p); + sat = 0; + break; + } + } + n = rb_next (&(d_chk->point_rb)); + } + } + + if (p) { + at = p->at; + scheduled_resrc_remove (p, r); + track_points (ctx->avail_time_iter, p); + if (!OUT_OF_RANGE(ctx->plan_end, at, req->duration)) + at = -1; + } + +done: + return at; +} + +static inline int avail_resources_at_internal (planner_t *ctx, int64_t starttime, + int64_t lasttime, int64_t *rv, int vd, int exclusive) +{ + int avail = -1; + int64_t *eff_rv = NULL; + if (starttime < 0 || !rv || !ctx) { + errno = EINVAL; + goto done; + } else if (veccmp (rv, ctx->total_resrc_vector, vd) > 0) { + errno = ERANGE; + goto done; + } + + eff_rv = exclusive? ctx->total_resrc_vector : (int64_t *)rv; + rb_root_t *spr = &(ctx->scheduled_points_root); + scheduled_point_t *state_at_start = NULL; + + if ((state_at_start = scheduled_point_state (starttime, spr)) == NULL) { + errno = ENOTSUP; + goto done; + } else if (veccmp (eff_rv, state_at_start->remaining_resrcs, vd) > 0) + goto done; + + rb_node_t *n = rb_next(&(state_at_start->point_rb)); + scheduled_point_t *d_chk = NULL; + while ((d_chk = rb_entry(n, scheduled_point_t, point_rb))) { + if (OUT_OF_RANGE(d_chk->at, starttime, (lasttime - starttime))) + break; + else { + int64_t result; + result = veccmp (eff_rv, d_chk->remaining_resrcs, vd); + if (result > 0) + goto done; + } + n = rb_next (&(d_chk->point_rb)); + } + avail = 0; + +done: + return avail; +} + + +/******************************************************************************* + * Utilities * + * * + *******************************************************************************/ +static inline void planner_set_bound (planner_t *ctx, int64_t plan_starttime, + int64_t plan_duration) +{ + int i = 0; + + ctx->p1 = xzmalloc (sizeof (*(ctx->p1))); + ctx->p1->at = plan_starttime; + ctx->p1->ref_count = 1; + memset (ctx->p1->scheduled_resrcs, '\0', + sizeof (ctx->p1->scheduled_resrcs)); + memset (ctx->p1->remaining_resrcs, '\0', + sizeof (ctx->p1->remaining_resrcs)); + for (i = 0; i < ctx->resrc_dim; ++i) + ctx->p1->remaining_resrcs[i] = ctx->total_resrc_vector[i]; + ctx->plan_start = plan_starttime; + ctx->plan_end = plan_starttime + plan_duration; + ctx->avail_time_iter = zhash_new (); + ctx->avail_time_iter_req = xzmalloc (sizeof (*(ctx->avail_time_iter_req))); + size_t s = sizeof(*(ctx->avail_time_iter_req->resrc_vector)) * MAX_RESRC_DIM; + ctx->avail_time_iter_req->resrc_vector = xzmalloc (s); + ctx->avail_time_iter_set = 0; + ctx->reservations_root = RB_ROOT; + ctx->scheduled_points_root = RB_ROOT; + ctx->scheduled_resrcs_root = RB_ROOT; + scheduled_point_insert (ctx->p1, &(ctx->scheduled_points_root)); + scheduled_resrc_insert (ctx->p1, &(ctx->scheduled_resrcs_root)); +} + +static inline void planner_clean_internal (planner_t *ctx) +{ + if (ctx->avail_time_iter) { + zhash_destroy (&ctx->avail_time_iter); + ctx->avail_time_iter = NULL; + } + if (ctx->avail_time_iter_req) { + if (ctx->avail_time_iter_req->resrc_vector) + free (ctx->avail_time_iter_req->resrc_vector); + free (ctx->avail_time_iter_req); + ctx->avail_time_iter_req = NULL; + } + if (ctx->r_lookup) + zhashx_purge (ctx->r_lookup); + if (ctx->p1) { + scheduled_resrc_remove (ctx->p1, &(ctx->scheduled_resrcs_root)); + ctx->p1 = NULL; + } + + rb_node_t *n = NULL; + if ((n = rb_first(&(ctx->scheduled_points_root)))) + scheduled_points_destroy (n); + if ((n = rb_first(&(ctx->reservations_root)))) + reservations_destroy (n); +} + +static inline bool not_feasable (planner_t *ctx, plan_t *plan) +{ + return (plan->start < ctx->plan_start || plan->req->duration < 1 + || plan->start + (plan->req->duration - 1) > ctx->plan_end + || !plan->req->resrc_vector || plan->req->vector_dim > MAX_RESRC_DIM); +} + +static inline int plan_input_check (planner_t *ctx, plan_t *plan) +{ + int i = 0; + int rc = -1; + char key[32]; + if (!ctx || !plan || !plan->req || not_feasable (ctx, plan)) { + errno = EINVAL; + goto done; + } else { + int64_t sum = 0; + for (i = 0; i < plan->req->vector_dim; ++i) { + if (plan->req->resrc_vector[i] > ctx->total_resrc_vector[i]) { + errno = ERANGE; + goto done; + } + sum += plan->req->resrc_vector[i]; + } + if (sum <= 0) { + errno = ERANGE; + goto done; + } + } + + sprintf (key, "%jd", (intmax_t)plan->id); + if (zhashx_lookup (ctx->r_lookup, key) != NULL) { + errno = EINVAL; + goto done; + } + rc = 0; + +done: + return rc; +} + +static inline char *scheduled_point_to_string (scheduled_point_t *point) +{ + int i = 0; + size_t size = 0; + char *ptr = NULL; + FILE *fptr = NULL; + + if (!point) { + errno = EINVAL; + goto done; + } else if (!(fptr = open_memstream (&ptr, &size))) { + errno = ENOMEM; + goto done; + } + + if (fprintf (fptr, "\t SCHEDULED POINT INFO\n") < 0) + goto done; + else if (fprintf (fptr, "\t\t at: %jd\n", (intmax_t)point->at) < 0) + goto done; + + for (i = 0; i < MAX_RESRC_DIM; ++i) { + if (fprintf (fptr, "\t\t scheduled resources for type %d: %ju\n", i, + (intmax_t)point->scheduled_resrcs[i]) < 0) + goto done; + else if (fprintf (fptr, "\t\t remaining resources for type %d: %ju\n", i, + (intmax_t)point->remaining_resrcs[i]) < 0) + goto done; + } + +done: + if (fptr) + fclose (fptr); + return ptr; +} + +static inline int print_csv (planner_t *ctx, FILE *fptr, size_t d) +{ + rb_node_t *n = NULL; + for (n = rb_first(&(ctx->scheduled_points_root)); n; n = rb_next(n)) { + scheduled_point_t *data = container_of(n, scheduled_point_t, point_rb); + if (fprintf (fptr, "%jd %jd\n", (intmax_t)data->at, + (intmax_t)data->scheduled_resrcs[d]) < 0) + return -1; + } + return 0; +} + +static inline int print_gp (planner_t *ctx, FILE *fptr, + const char *csvfn, size_t d) +{ + int rc = 0; + if (!fptr || !csvfn || d > MAX_RESRC_DIM || !ctx) { + errno = EINVAL; + return -1; + } + + rc = fprintf (fptr, "reset\n"); + rc += fprintf (fptr, "set terminal png size 1024 768\n"); + rc += fprintf (fptr, "set yrange [0:%jd]\n", (ctx->total_resrc_vector[d]+50)); + rc += fprintf (fptr, "set xlabel \"Scheduled Points in Time\"\n"); + rc += fprintf (fptr, "set ylabel \"Scheduled Resources of Type %d\"\n", (int)d); + rc += fprintf (fptr, "set title \"Scheduled Resources Over Time\"\n"); + rc += fprintf (fptr, "set key below\n"); + rc += fprintf (fptr, "plot \"%s\" using 1:2 with steps lw 2 \n", csvfn); + return rc; +} + + +/******************************************************************************* + * * + * PUBLIC PLANNER API * + * * + *******************************************************************************/ + +/******************************************************************************* + * C'Tor/D'Tor * + *******************************************************************************/ +planner_t *planner_new (int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len) +{ + int i = 0; + planner_t *ctx = NULL; + + if (plan_starttime < 0 || plan_duration < 1 + || !total_resrcs || len > MAX_RESRC_DIM) { + errno = EINVAL; + goto done; + } else { + for (i = 0; i < len; ++i) { + if (total_resrcs[i] > INT64_MAX) { + errno = ERANGE; + goto done; + } + } + } + + ctx = xzmalloc (sizeof (*ctx)); + ctx->resrc_dim = len; + ctx->r_lookup = zhashx_new (); + memset (ctx->total_resrc_vector, '\0', sizeof (ctx->total_resrc_vector)); + for (i = 0; i < len; ++i) + ctx->total_resrc_vector[i] = (int64_t)total_resrcs[i]; + for (i = 0; i < MAX_RESRC_DIM; ++i) + ctx->resrc_type_vector[i] = NULL; + planner_set_bound (ctx, plan_starttime, plan_duration); + +done: + return ctx; +} + +void planner_destroy (planner_t **ctx_p) +{ + if (ctx_p && *ctx_p) { + planner_clean_internal (*ctx_p); + zhashx_destroy (&((*ctx_p)->r_lookup)); + free (*ctx_p); + *ctx_p = NULL; + } +} + +int planner_reset (planner_t *ctx, int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len) +{ + int i = 0; + int rc = -1; + if (plan_starttime < 0 || plan_duration < 1 || len > MAX_RESRC_DIM) { + errno = EINVAL; + goto done; + } else if (total_resrcs && !len) { + for (i = 0; i < len; ++i) { + if (total_resrcs[i] > INT64_MAX) { + errno = ERANGE; + goto done; + } + } + } + + planner_clean_internal (ctx); + if (total_resrcs && !len) { + memset (ctx->total_resrc_vector, '\0', sizeof (ctx->total_resrc_vector)); + for (i = 0; i < len; ++i) + ctx->total_resrc_vector[i] = (int64_t)total_resrcs[i]; + } + planner_set_bound (ctx, plan_starttime, plan_duration); + rc = 0; + +done: + return rc = 0; +} + +int64_t planner_plan_starttime (planner_t *ctx) +{ + return ctx? ctx->plan_start : -1; +} + +int64_t planner_plan_duration (planner_t *ctx) +{ + return ctx? (ctx->plan_end - ctx->plan_start) : -1; +} + +const uint64_t *planner_total_resrcs (planner_t *ctx) +{ + return ctx? (const uint64_t *)ctx->total_resrc_vector : NULL; +} + +size_t planner_total_resrcs_len (planner_t *ctx) +{ + return ctx? ctx->resrc_dim : -1; +} + +int planner_set_resrc_types (planner_t *ctx, const char **rts, size_t len) +{ + int i = 0, j = 0; + + if (rts == NULL || len > ctx->resrc_dim) + return -1; + + for (i = 0; i < len; ++i) { + if (ctx->resrc_type_vector[i] != NULL) { + free (ctx->resrc_type_vector[i]); + ctx->resrc_type_vector[i] = NULL; + } + ctx->resrc_type_vector[i] = xstrdup (rts[i]); + } + + for (j = i; j < ctx->resrc_dim; ++j) { + if (ctx->resrc_type_vector[i] != NULL) { + free (ctx->resrc_type_vector[i]); + ctx->resrc_type_vector[i] = NULL; + } + } + + return 0; +} + +const char *planner_resrc_index2type (planner_t *ctx, int i) +{ + if (i < 0 || i >= ctx->resrc_dim) + return NULL; + return ctx->resrc_type_vector[i]; +} + +int planner_resrc_type2index (planner_t *ctx, const char *t) +{ + int i = 0; + if (t == NULL) + return -1; + + for (i = 0; i < ctx->resrc_dim; ++i) { + if (strcmp (ctx->resrc_type_vector[i], t) == 0) + break; + } + return (i < ctx->resrc_dim)? i : -1; +} + +int64_t planner_avail_time_first (planner_t *ctx, req_t *req) +{ + if (!req || !ctx) { + errno = EINVAL; + return -1; + } + restore_track_points (ctx, &(ctx->scheduled_resrcs_root)); + copy_req (ctx->avail_time_iter_req, req); + ctx->avail_time_iter_set = 1; + return avail_time_internal (ctx, ctx->avail_time_iter_req); +} + +int64_t planner_avail_time_next (planner_t *ctx) +{ + if (!ctx || !ctx->avail_time_iter_set) { + errno = EINVAL; + return -1; + } + return avail_time_internal (ctx, ctx->avail_time_iter_req); +} + +int planner_avail_resources_at (planner_t *ctx, int64_t starttime, req_t *req) +{ + return avail_resources_at_internal (ctx, starttime, starttime + req->duration, + (int64_t *)req->resrc_vector, req->vector_dim, req->exclusive); +} + +reservation_t *planner_reservation_new (planner_t *ctx, plan_t *plan) +{ + int i = 0; + reservation_t *rsv = NULL; + char key[32]; + + if (plan_input_check (ctx, plan) == -1) + goto done; + + rsv = xzmalloc (sizeof (*rsv)); + rsv->start = plan->start; + rsv->last = plan->start + plan->req->duration; + rsv->resv_id = plan->id; + memset (rsv->reserved_resrcs, '\0', sizeof (rsv->reserved_resrcs)); + rsv->resrc_dim = plan->req->vector_dim; + for (i = 0; i < plan->req->vector_dim; ++i) + rsv->reserved_resrcs[i] = (int64_t)plan->req->resrc_vector[i]; + rsv->added = 0; + rsv->start_p = NULL; + rsv->last_p = NULL; + sprintf (key, "%jd", (intmax_t)rsv->resv_id); + zhashx_insert (ctx->r_lookup, key, rsv); + +done: + return rsv; +} + +void planner_reservation_destroy (planner_t *ctx, reservation_t **rsv_p) +{ + char key[32]; + if (!rsv_p || !(*rsv_p)) { + errno = EINVAL; + return; + } + sprintf (key, "%jd", (intmax_t)(*rsv_p)->resv_id); + zhashx_delete (ctx->r_lookup, key); + if ((*rsv_p)->added) + planner_rem_reservation (ctx, (*rsv_p)); + + free ((*rsv_p)); + *rsv_p = NULL; +} + +int planner_add_reservation (planner_t *ctx, reservation_t *rsv, int validate) +{ + int rc = -1; + if (!rsv || !ctx) { + errno = EINVAL; + goto done2; + } else if (rsv->added) { + goto done2; + } else if (validate == 1) { + if (avail_resources_at_internal (ctx, rsv->start, + rsv->last, rsv->reserved_resrcs, rsv->resrc_dim, 0) == -1) + goto done2; + } + + rb_root_t *srr = &(ctx->scheduled_resrcs_root); + rb_root_t *rr = &(ctx->reservations_root); + restore_track_points (ctx, srr); + + /* tr is used to keep track of the scheduled points that + * need to be updated in the min-time resource tree + */ + zhash_t *tr = zhash_new (); + + /* update the specific start and last scheduled points + * if a point already exist, simply update; otherwise + * a new point object is inserted into scheduled point tree + */ + if ((rc = add_R (ctx, rsv, tr)) < 0) + goto done; + + /* + * Go through all of the reservations that each of the two scheduled + * points of the new reservation intersects and update relevant points + */ + reservation_t *i = NULL; + for (i = reservation_iter_first (rr, START(rsv), LAST(rsv)); i; + i = reservation_iter_next (i, START(rsv), LAST(rsv))) { + + /* The point(s) of the intercepting reservation intersects the new one. + * The point(s) of the new reservation intercept the old one. + */ + if (!add_Is (ctx, i, rsv, tr, 0) || !add_Is (ctx, rsv, i, tr, 1)) + goto done; + } + rsv->start_p->new_point = 0; + rsv->last_p->new_point = 0; + + /* Update the min-time resource tree w.r.t. tracked scheduled points */ + if ((rc = update_scheduled_resrcs (tr, srr)) < 0) + goto done; + + reservation_insert (rsv, rr); + rsv->added = 1; + rc = 0; + +done: + if (tr) + zhash_destroy (&tr); +done2: + return rc; +} + +int planner_rem_reservation (planner_t *ctx, reservation_t *rsv) +{ + int rc = -1; + if (!rsv || !ctx) { + errno = EINVAL; + goto done2; + } else if (rsv->added != 1) + goto done2; + + rb_root_t *srr = &(ctx->scheduled_resrcs_root); + rb_root_t *rr = &(ctx->reservations_root); + reservation_t *i = NULL; + restore_track_points (ctx, srr); + + /* tr is used to keep track of the scheduled points that + * need to be updated in the min-time resource tree + */ + zhash_t *tr = zhash_new (); + + /* update the specific start and last scheduled points + * if a point already exist, simply update; otherwise + * a new point object is inserted into scheduled point tree + */ + if ((rc = sub_R (ctx, rsv, tr)) < 0) + goto done; + + /* + * Go through all of the reservations that each of the two scheduled + * points of the new reservation intersects and update relevant points + */ + for (i = reservation_iter_first (rr, START(rsv), LAST(rsv)); i; + i = reservation_iter_next (i, START(rsv), LAST(rsv))) { + if (!sub_Is (ctx, i, rsv, tr)) + goto done; + } + + if ((rc = update_scheduled_resrcs (tr, srr)) < 0) + goto done; + + reservation_remove (rsv, rr); + FREE_NOREF_POINT(rsv); + rsv->added = 0; + rc = 0; + +done: + if (tr) + zhash_destroy (&tr); +done2: + return rc; +} + +reservation_t *planner_reservation_first (planner_t *ctx) +{ + int64_t s = ctx->plan_start; + int64_t e = ctx->plan_end; + return reservation_iter_first (&(ctx->reservations_root), s, e); +} + +reservation_t *planner_reservation_next (planner_t *ctx, reservation_t *rsv) +{ + return reservation_iter_next (rsv, ctx->plan_start, ctx->plan_end); +} + +reservation_t *planner_reservation_by_id (planner_t *ctx, int64_t id) +{ + char key[32]; + sprintf (key, "%jd", (intmax_t)id); + return zhashx_lookup (ctx->r_lookup, key); +} + +reservation_t *planner_reservation_by_id_str (planner_t *ctx, const char *str) +{ + return (str)? zhashx_lookup (ctx->r_lookup, str) : NULL; +} + +int planner_reservation_added (planner_t *ctx, reservation_t *rsv) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return -1; + } + return rsv->added? 0 : -1; +} + +int64_t planner_reservation_starttime (planner_t *ctx, reservation_t *rsv) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return -1; + } + return rsv->start; +} + +int64_t planner_reservation_endtime (planner_t *ctx, reservation_t *rsv) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return -1; + } + return rsv->last; +} + +const uint64_t *planner_reservation_reserved (planner_t *ctx, reservation_t *rsv, + size_t *len) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return NULL; + } + *len = rsv->resrc_dim; + return (const uint64_t *) rsv->reserved_resrcs; +} + +char *planner_reservation_to_string (planner_t *ctx, reservation_t *rsv) +{ + int i = 0; + size_t size = 0; + char *ptr = NULL; + FILE *fptr = NULL; + + if (!rsv) { + errno = EINVAL; + goto done; + } else if (!(fptr = open_memstream (&ptr, &size))) { + errno = ENOMEM; + goto done; + } + + if (fprintf (fptr, "Reservation Info:\n") < 0) + goto done; + else if (fprintf (fptr, "\t id: %jd\n", (intmax_t)rsv->resv_id) < 0) + goto done; + else if (fprintf (fptr, "\t start: %jd\n", (intmax_t)rsv->start) < 0) + goto done; + else if (fprintf (fptr, "\t last: %jd\n", (intmax_t)rsv->last) < 0) + goto done; + + for (i = 0; i < rsv->resrc_dim; ++i) { + if (fprintf (fptr, " - reserved_resrcs type %d: %ju\n", i, + (intmax_t)rsv->reserved_resrcs[i]) < 0) + goto done; + } + + if (fprintf (fptr, "%s", scheduled_point_to_string (rsv->start_p)) < 0) + goto done; + else if (fprintf (fptr, "%s", scheduled_point_to_string (rsv->last_p)) < 0) + goto done; + +done: + if (fptr) + fclose (fptr); + return ptr; +} + +int planner_print_gnuplot (planner_t *ctx, const char *fname, size_t d) +{ + int rc = -1; + char *path1 = NULL; + char *path2 = NULL; + FILE *fptr1 = NULL; + FILE *fptr2 = NULL; + + if (!fname || d > MAX_RESRC_DIM || !ctx) { + errno = EINVAL; + goto done; + } + + if (!(path1 = xasprintf ("%s.csv", fname))) + goto done; + else if (!(path2 = xasprintf ("%s.gp", fname))) + goto done; + else if (!(fptr1 = fopen (path1, "w"))) + goto done; + else if (!(fptr2 = fopen (path2, "w"))) + goto done; + else if (print_csv (ctx, fptr1, d) < 0) + goto done; + else if (print_gp (ctx, fptr2, path1, d) < 0) + goto done; + + rc = 0; + +done: + if (fptr1) + fclose (fptr1); + if (fptr2) + fclose (fptr2); + if (path1) + free (path1); + if (path2) + free (path2); + return rc; +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/resrc/planner.h b/resrc/planner.h new file mode 100644 index 000000000..a7e316949 --- /dev/null +++ b/resrc/planner.h @@ -0,0 +1,330 @@ +/*****************************************************************************\ + * Copyright (c) 2014 Lawrence Livermore National Security, LLC. Produced at + * the Lawrence Livermore National Laboratory (cf, AUTHORS, DISCLAIMER.LLNS). + * LLNL-CODE-658032 All rights reserved. + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the license, or (at your option) + * any later version. + * + * Flux is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * See also: http://www.gnu.org/licenses/ +\*****************************************************************************/ + +/* Planner provides a simple API and efficient mechanisms to allow + * a Flux scheduler to keep track of the state of resource aggregates + * of a composite resource. + * + * In a resource hierarchy used by flux-sched (e.g., hardware + * hierarchy), a composite resource is represented as a tree graph + * in which a higher-level vertex has essentially pointers to its + * immediate child resources, each of which also has pointers to + * its immediate children etc. With such an organization, the + * scheduler must essentially walk "all" of the vertices below any + * composite resource in order to determine if the "sub-resources" + * requirement can be met. + * + * When the scheduler performs such walks excessively in particular, + * on large graph, however, this can quickly become a performance and + * scalability bottleneck. Planner addresses this problem by allowing + * the scheduler to track the "sub-resources" summary information + * (i.e., aggregates) efficiently at each upper-level composite + * resource vertex and to use this aggregate information to prune + * unneccessary descent down into the subtree. + * + * Planner offers update and query APIs to support these schemes. + * Through a planner API, the scheduler can ask a high-level composite + * a question: "given a request of x, y, z "sub-resources" in aggregate + * for d time unit, when is the earliest time t at which this request + * can be satisfied?" + * Another example would be to answer, "from time t to t+d, does + * this composite resource vertex has y, z sub-resources available + * in aggregate. By composing these queries at different levels in a + * resource hierarchy, the scheduler can significantly reduce the + * numbers of tree walks. Ultimately, planner will be integrated + * into our preorder tree-walk pruning filter in our future + * visitor-pattern-based resource matching scheme. + */ + +#ifndef PLANNER_H +#define PLANNER_H + +#include +#include +#include + +#define MAX_RESRC_DIM 5 + +typedef struct request { + uint64_t duration; + uint64_t *resrc_vector; + size_t vector_dim; + int exclusive; +} req_t; + +typedef struct plan { + int64_t id; + int64_t start; + struct request *req; +} plan_t; + +typedef struct reservation reservation_t; +typedef struct planner planner_t; + +/* Planner constructor: + * + * - plan_starttime: the earliest schedulable point (in time) + * planned by this planner. + * - plan_duration: the span of this planner--i.e., all reservations + * must end before plan_starttime + plan_duration. + * - total_resrcs: an array of size of len containing + * total numbers of available resources (of up to + * five different types) used in this planner. Each + * element of this array would often represent the + * total number of each sub-resource under the target + * composite resource. Note that nothing prevents + * one from using this to represent the numbers + * or amounts of available resources directly at + * the resource vertex itself, though. + * - len: must be less than or equal to MAX_RESRC_DIM + */ +planner_t *planner_new (int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len); + +/* Reset the planner with new time bound and optionally resource quantities. + * Destroy all of the existing reservations. + * + * - plan_starttime: the earliest schedulable point (in time) + * planned by this planner. + * - plan_duration: the span of this planner--i.e., all reservations + * must end before plan_starttime + plan_duration. + * - total_resrcs: an array of size of len containing + * total numbers of available resources (of up to + * five different types) used in this planner. Each + * element of this array would often represent the + * total number of each sub-resource under the target + * composite resource. Note that nothing prevents + * one from using this to represent the numbers + * or amounts of available resources directly at + * the resource vertex itself, though. + * If NULL, the existing resource quantities will be used. + * - len: must be less than or equal to MAX_RESRC_DIM. + * pass 0, if the existing resource quantities must + * be used. + */ +int planner_reset (planner_t *ctx, int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len); + +/* Planner destructor: + * + * - ctx_p: a pointer to the opaque planner context returned + * from planner_new. + */ +void planner_destroy (planner_t **ctx_p); + +/* Getters: + */ +int64_t planner_plan_starttime (planner_t *ctx); +int64_t planner_plan_duration (planner_t *ctx); +const uint64_t *planner_total_resrcs (planner_t *ctx); +size_t planner_total_resrcs_len (planner_t *ctx); + +/* Set resource type strings corresponding to resources planned by this + * planner. rts is an array of resource type strings: the first element + * is the resource type name of the first-order resource of this planner, + * the second is the second-order, and so on and so forth. len is the + * size of this array, and this must not exceed the resource dimension + * set for this planner. + * + * - ctx: the opaque planner context returned from planner_new + * - rts: an array of resource type strings + * - len: the length of rts + */ +int planner_set_resrc_types (planner_t *ctx, const char **rts, size_t len); + +/* Return the name of the resource type corresponding to the i_th order + * resource. + * + * - ctx: the opaque planner context returned from planner_new + * - i: order index of the target resource + */ +const char *planner_resrc_index2type (planner_t *ctx, int i); + +/* Return the index of the resource type name, t + * + * - ctx: the opaque planner context returned from planner_new + * - t: the name string of the resource type + */ +int planner_resrc_type2index (planner_t *ctx, const char *t); + +/* Find the earliest point in time when the request can be reserved + * and return that time. Note that this only returns a point at which + * resource state changes. In other words, if the number of available + * resources change at t1 and t2, the possible returns are only t1 and + * t2, not t1+1 or t1+2 even if the latter points also satisfy the + * request. Return -1 on error and set errno. + * + * - ctx: the opaque planner context returned from planner_new + * - req: request specifying the resource amounts and duration + * duration must be greater than or equal to 1 (time units) + */ +int64_t planner_avail_time_first (planner_t *ctx, req_t *req); + +/* Find the next earliest point in time for the same request queried + * before through either planner_avail_time_first or planner_avail_time_next + * and and return that time. Note that this only returns a point at which + * resource state changes. In other words, if the number of available + * resources change at t1 and t2, the possible returns are only t1 and + * t2, not t1+1 or t1+2 even if the latter points also satisfy the + * request. Return -1 on error and set errno. + * + * - ctx: the opaque planner context returned from planner_new + */ +int64_t planner_avail_time_next (planner_t *ctx); + +/* Return 0 if the given request consisting of numbers of resources and + * duration can be satisfied at starttime. Unlike planner_avail_time* + * functions, this works with an arbirary time within the valid + * planner span. Return -1 if the request cannot be satisfied or an error + * is encountered in which case errno is set. + * + * - ctx: the opaque planner context returned from planner_new + * - starttime: start time at which the resource request must + * be available + * - req: request specifying the resource amounts and duration. duration + * must be greater than or equal to 1 (time unit) + */ +int planner_avail_resources_at (planner_t *ctx, int64_t starttime, req_t *req); + +/* Allocate and return an object of reservation_t (opaque) type, being built + * of the passed-in plan. The object must be freed using + * planner_reservation_destroy when it is not needed. + * + * - ctx: the opaque planner context returned from planner_new + * - plan: describe the resource and duration requests. The start + * time of this request should have been previously determined + * to be satisfiable by the planner_avail_time_* functions above. + * Duration request in the plan must be greater than or equal + * to 2 (time units) as a reservation is represented as two + * unique time points. + */ +reservation_t *planner_reservation_new (planner_t *ctx, plan_t *plan); + +/* Add a new reservation to the planner and update the planner's + * resource/schduled-point state. It resets the planner's iterator + * so that planner_avail_time_next will be made to return the + * earliest schedulable point. + * + * Return -1 on error and set errno. User should check and print + * errno if -1. Otherwise return 0. + * + * EINVAL: invalid argument + * EKEYREJECTED: can't update planner's internal data structures + * ERANGE: resource state became out of range e.g., reserving more than + * what is available: rsv wasn't created with available time returnedi + * and thus validated using a planner_avail famility function)? + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: new reservation. + * - validate: if 1 is passed, extra check is performed if rsv is + * a valid reservation. + */ +int planner_add_reservation (planner_t *ctx, reservation_t *rsv, int validate); + +/* Remove the existing reservation from the planner and update its + * state. It resets the planner's iterator such that planner_avail_time_next + * will be made to return the earliest schedulable point. + * + * Return -1 on error and set errno; otherwise return 0. + * + * EINVAL: invalid argument + * EKEYREJECTED: can't update one of planner's internal data structures + * ERANGE: resource state became invalid. e.g., reserving more than + * what is available: rsv wasn't created with available time returnedi + * and thus validated using a planner_avail famility function)? + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: an existing reservation + */ +int planner_rem_reservation (planner_t *ctx, reservation_t *rsv); + +/* Destroy the reservation object. If rsv has not been removed (using + * planner_rem_reservation), this call first removes the rsv before + * deallocating its memory. + * + * - ctx: the opaque planner context returned from planner_new + * - rsv_p: a pointer to the reservation object returned + * from planner_reservation_new + */ +void planner_reservation_destroy (planner_t *ctx, reservation_t **rsv_p); + +/* Return the reservation with the earliest start time. One should + * use this function to get the first reservation from which to iterate + * through subsequent reservations. This scheme allows you to + * iterate through the reservations sorted in starting time order. + * + * - ctx: the opaque planner context returned from planner_new + */ +reservation_t *planner_reservation_first (planner_t *ctx); + +/* Return the next reservation planned in the planner. Please see the + * comments above for planner_reservation_first. planner_reservation_next + * returns the reservation that appears right after rsv in start-time + * sorted order. + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: a reservation object returned previously + */ +reservation_t *planner_reservation_next (planner_t *ctx, reservation_t *rsv); + +/* Return the reservation keyed by the id. id is the id field + * of the plan_t field given to planner_reservation_new. + * Return NULL when no reservation by id exists. + */ +reservation_t *planner_reservation_by_id (planner_t *ctx, int64_t id); +reservation_t *planner_reservation_by_id_str (planner_t *ctx, const char *str); + +/* Return 0 if rsv has been added to the planner; otherwise -1 + */ +int planner_reservation_added (planner_t *ctx, reservation_t *rsv); + +/* Return a string containing the information on a reservation. The + * returned string must be deallocated by the caller using free. + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: a reservation object + */ +char *planner_reservation_to_string (planner_t *ctx, reservation_t *rsv); + +/* Getters for reservation_t: + */ +int64_t planner_reservation_starttime (planner_t *ctx, reservation_t *rsv); +int64_t planner_reservation_endtime (planner_t *ctx, reservation_t *rsv); +const uint64_t *planner_reservation_reserved (planner_t *ctx, + reservation_t *rsv, size_t *len); + +/* Print the planner information in the files that can be visualized using gnuplot + * + * - ctx: the opaque planner context returned from planner_new + * - base_fname: base filename (.csv and .gp) + * to render: % gnuplot .gp > planner_out.png + * - d: which resource dimension to print + */ +int planner_print_gnuplot (planner_t *ctx, const char *base_fname, size_t d); + +#endif /* PLANNER_H */ + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/resrc/resrc.c b/resrc/resrc.c index 58fc12720..ba92577d2 100644 --- a/resrc/resrc.c +++ b/resrc/resrc.c @@ -42,33 +42,6 @@ #include "src/common/libutil/xzmalloc.h" - -typedef struct window { - int64_t starttime; - int64_t endtime; - const char *job_id; -} window_t; - -/* static window_t * window_new (int64_t starttime, int64_t endtime) { */ -/* window_t *ret = malloc (sizeof *ret); */ -/* ret->starttime = starttime; */ -/* ret->endtime = endtime; */ -/* return ret; */ -/* } */ - -static void window_destructor (void **window_v) { - if (window_v) { - free(*window_v); - *window_v = NULL; - } -} - -static void *window_dup (const void *window) { - window_t * ret = malloc(sizeof *ret); - memcpy(ret, window, sizeof *ret); - return ret; -} - struct resrc { char *type; char *path; @@ -87,11 +60,47 @@ struct resrc { zhash_t *tags; zhash_t *allocs; zhash_t *reservtns; - zhashx_t *twindow; + planner_t *twindow; }; static zhash_t *resrc_hash = NULL; +static inline plan_t *plan_new (int64_t job_id, int64_t start, uint64_t duration, + int exclusive, size_t len, ...) +{ + plan_t *plan = NULL; + int i = 0; + + va_list ap; + va_start(ap, len); + plan = xzmalloc (sizeof (*plan)); + plan->id = job_id; + plan->start = start; + plan->req = xzmalloc (sizeof (*(plan->req))); + plan->req->resrc_vector = xzmalloc (len * sizeof (*(plan->req->resrc_vector))); + plan->req->vector_dim = len; + for (i=0; i < len; ++i) + plan->req->resrc_vector[i] = (uint64_t)va_arg(ap, int); + plan->req->duration = duration; + plan->req->exclusive = exclusive; + va_end(ap); + return plan; +} + +static inline void plan_destroy (plan_t **plan_p) +{ + if (plan_p && *plan_p) { + if ((*plan_p)->req) { + if ((*plan_p)->req->resrc_vector) + free ((*plan_p)->req->resrc_vector); + free ((*plan_p)->req); + } + free (*plan_p); + *plan_p = NULL; + } +} + + /*************************************************************************** * API ***************************************************************************/ @@ -174,228 +183,30 @@ size_t resrc_available (resrc_t *resrc) return 0; } -size_t resrc_available_at_time (resrc_t *resrc, int64_t time) -{ - const char *id_ptr = NULL; - window_t *window = NULL; - size_t *size_ptr = NULL; - - size_t available = resrc->size; - - if (time < 0) { - time = epochtime(); - } - - // Check that the time is during the resource lifetime - window = zhashx_lookup (resrc->twindow, "0"); - if (window && (time < window->starttime || time > window->endtime)) { - return 0; - } - - // Iterate over all allocation windows in resrc. We iterate using - // the hash to avoid copying the entire hash every time, using - // zhashx_cursor to retrieve the key to lookup the size in resrc->allocs. - window = zhashx_first (resrc->twindow); - while (window) { - id_ptr = zhashx_cursor(resrc->twindow); - if (!strcmp (id_ptr, "0")) { - /* This is the resource lifetime entry and should not be - * evaluated as an allocation or reservation entry */ - window = zhashx_next (resrc->twindow); - continue; - } - - // Does time intersect with window? - if (time >= window->starttime && time <= window->endtime) { - // Decrement available by allocation and/or reservation size - size_ptr = (size_t*)zhash_lookup (resrc->allocs, id_ptr); - if (size_ptr) { - available -= *size_ptr; - } - size_ptr = (size_t*)zhash_lookup (resrc->reservtns, id_ptr); - if (size_ptr) { - available -= *size_ptr; - } - } - - window = zhashx_next (resrc->twindow); - } - - return available; -} - -static int compare_windows_starttime (const void *item1, const void *item2) +/* Note: I think quantities should be changed to either unsigned int or int64_t */ +int resrc_available_at_time (resrc_t *resrc, int64_t time, size_t reqrd_size) { - const window_t * lhs = item1, *rhs = item2; - if (lhs->starttime < rhs->starttime) - return -1; - if (lhs->starttime == rhs->starttime) - return 0; - return 1; + req_t req; + planner_t *pl = resrc->twindow; + int64_t start = time; + req.duration = 1; + req.resrc_vector = (uint64_t *)&reqrd_size; + req.vector_dim = 1; + req.exclusive = 0; + return planner_avail_resources_at (pl, start, &req); } -static int compare_windows_endtime (const void *item1, const void *item2) +int resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, + int64_t range_endtime, size_t reqrd_size, bool exclusive) { - const window_t * lhs = item1, *rhs = item2; - if (lhs->endtime < rhs->endtime) - return -1; - if (lhs->endtime == rhs->endtime) - return 0; - return 1; -} - -size_t resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, - int64_t range_endtime, bool exclusive) -{ - window_t *window = NULL; - const char *id_ptr = NULL; - int64_t curr_endtime = 0; - int64_t curr_starttime = 0; - size_t curr_available = 0; - size_t min_available = 0; - size_t *alloc_ptr = NULL; - size_t *reservtn_ptr = NULL; - size_t *size_ptr = NULL; - zlistx_t *matching_windows = NULL; - - if (range_starttime == range_endtime) { - return resrc_available_at_time (resrc, range_starttime); - } - - matching_windows = zlistx_new (); - /* zlistx_set_duplicator(matching_windows, window_dup); */ - zlistx_set_destructor(matching_windows, window_destructor); - - // Check that the time is during the resource lifetime - window = zhashx_lookup (resrc->twindow, "0"); - if (window) { - curr_starttime = window->starttime; - curr_endtime = window->endtime; - if ( (range_starttime < curr_starttime) || - (range_endtime > curr_endtime) ) { - return 0; - } - } - - // Map allocation window strings to JSON objects. Filter out - // windows that don't overlap with the input range. Then add the - // job id to the JSON obj and insert the JSON obj into the - // "matching windows" list. - window = zhashx_first (resrc->twindow); - while (window) { - id_ptr = zhashx_cursor(resrc->twindow); - if (!strcmp (id_ptr, "0")) { - /* This is the resource lifetime entry and should not be - * evaluated as an allocation or reservation entry */ - window = zhashx_next (resrc->twindow); - continue; - } - curr_starttime = window->starttime; - curr_endtime = window->endtime; - - // Does input range intersect with window? - if ( !((curr_starttime < range_starttime && - curr_endtime < range_starttime) || - (curr_starttime > range_endtime && - curr_endtime > range_endtime)) ) { - - /* If the sample requires exclusive access and we are - * here, then we now know that exclusivity cannot be - * granted over the requested range. Leave now. */ - if (exclusive) - goto ret; - - alloc_ptr = (size_t*)zhash_lookup (resrc->allocs, id_ptr); - reservtn_ptr = (size_t*)zhash_lookup (resrc->reservtns, id_ptr); - if (alloc_ptr || reservtn_ptr) { - // Add the window key and insert JSON obj into the - // "matching windows" list - window_t * new_window = window_dup (window); - new_window->job_id = id_ptr; - zlistx_add_end (matching_windows, new_window); - } - } - - window = zhashx_next (resrc->twindow); - } - - // Duplicate the "matching windows" list and then sort the 2 lists - // based on start and end times. We will walk through these lists - // in order to find the minimum available during the input range - zlistx_t *start_windows = matching_windows; - zlistx_set_comparator(start_windows, compare_windows_starttime); - zlistx_t *end_windows = zlistx_dup (start_windows); - // Do not free items in this list, they are owned by the start_windows - // list - zlistx_set_destructor(end_windows, NULL); - zlistx_set_comparator(end_windows, compare_windows_endtime); - zlistx_sort (start_windows); - zlistx_sort (end_windows); - - window_t *curr_start_window = zlistx_first (start_windows); - window_t *curr_end_window = zlistx_first (end_windows); - - min_available = resrc->size; - curr_available = resrc->size; - - // Start iterating over the windows and calculating the min - // available - // - // OPTIMIZE: stop iterating when curr_start_window == NULL Once we - // run out of start windows, curr available cannot get any - // smaller; we have hit our min. Just need to test to verify that - // this optimziation is correct/safe. - while (curr_start_window) { - curr_starttime = curr_start_window->starttime; - curr_endtime = curr_end_window->endtime; - - if ((curr_start_window) && - (curr_starttime < curr_endtime)) { - // New range is starting, get its size and subtract it - // from current available - size_ptr = (size_t*)zhash_lookup (resrc->allocs, curr_start_window->job_id); - if (size_ptr) - curr_available -= *size_ptr; - size_ptr = (size_t*)zhash_lookup (resrc->reservtns, curr_start_window->job_id); - if (size_ptr) - curr_available -= *size_ptr; - curr_start_window = zlistx_next (start_windows); - if (curr_start_window) { - curr_starttime = curr_start_window->starttime; - } else { - curr_starttime = TIME_MAX; - } - } else if ((curr_end_window) && - (curr_endtime < curr_starttime)) { - // A range just ended, get its size and add it back into - // current available - id_ptr = curr_end_window->job_id; - size_ptr = (size_t*)zhash_lookup (resrc->allocs, id_ptr); - if (size_ptr) - curr_available += *size_ptr; - size_ptr = (size_t*)zhash_lookup (resrc->reservtns, id_ptr); - if (size_ptr) - curr_available += *size_ptr; - curr_end_window = zlistx_next (end_windows); - if (curr_end_window) { - curr_endtime = curr_end_window->endtime; - } else { - curr_endtime = TIME_MAX; - } - } else { - fprintf (stderr, - "%s - ERR: Both start/end windows are empty\n", - __FUNCTION__); - } - min_available = (curr_available < min_available) ? curr_available : - min_available; - } - - zlistx_destroy (&end_windows); -ret: - zlistx_destroy (&matching_windows); - - return min_available; + req_t req; + planner_t *pl = resrc->twindow; + int64_t start = range_starttime; + req.duration = (uint64_t)(range_endtime - range_starttime + 1); + req.resrc_vector = (uint64_t *)&reqrd_size; + req.vector_dim = 1; + req.exclusive = exclusive? 1 : 0; + return planner_avail_resources_at (pl, start, &req); } char* resrc_state (resrc_t *resrc) @@ -424,6 +235,13 @@ char* resrc_state (resrc_t *resrc) return str; } +planner_t *resrc_twindow (resrc_t *resrc) +{ + if (resrc) + return resrc->twindow; + return NULL; +} + resrc_tree_t *resrc_phys_tree (resrc_t *resrc) { if (resrc) @@ -445,13 +263,6 @@ size_t resrc_size_reservtns (resrc_t *resrc) return 0; } -int resrc_twindow_insert (resrc_t *resrc, const char *key, int64_t starttime, int64_t endtime) -{ - const window_t w = {.starttime = starttime, .endtime = endtime}; - int rc = zhashx_insert (resrc->twindow, key, (void *)&w); - return rc; -} - int resrc_graph_insert (resrc_t *resrc, const char *name, resrc_flow_t *flow) { int rc = zhash_insert (resrc->graphs, name, flow); @@ -511,9 +322,7 @@ resrc_t *resrc_new_resource (const char *type, const char *path, resrc->reservtns = zhash_new (); resrc->properties = zhash_new (); resrc->tags = zhash_new (); - resrc->twindow = zhashx_new (); - zhashx_set_destructor(resrc->twindow, window_destructor); - zhashx_set_duplicator(resrc->twindow, window_dup); + resrc->twindow = planner_new (0, TIME_MAX, (uint64_t *)&size, 1); } return resrc; @@ -539,10 +348,11 @@ resrc_t *resrc_copy_resource (resrc_t *resrc) new_resrc->reservtns = zhash_dup (resrc->reservtns); new_resrc->properties = zhash_dup (resrc->properties); new_resrc->tags = zhash_dup (resrc->tags); - if (resrc->twindow) - new_resrc->twindow = zhashx_dup (resrc->twindow); - else - new_resrc->twindow = NULL; + /* Note: we don't make a deep copy of twindow in this copy constructor yet + * @lipari and @dongahn want to see user cases of this constructor + * before deciding the semantics of member copies. + */ + new_resrc->twindow = NULL; } return new_resrc; @@ -574,7 +384,7 @@ void resrc_resource_destroy (void *object) zhash_destroy (&resrc->properties); zhash_destroy (&resrc->tags); if (resrc->twindow) - zhashx_destroy (&resrc->twindow); + planner_destroy (&(resrc->twindow)); free (resrc); } } @@ -670,8 +480,8 @@ resrc_t *resrc_new_from_json (json_t *o, resrc_t *parent, bool physical) else endtime = TIME_MAX; } - - resrc_twindow_insert (resrc, "0", starttime, endtime); + int64_t d = endtime - starttime + 1; + planner_reset (resrc->twindow, starttime, d, NULL, 0); } } @@ -843,7 +653,8 @@ static resrc_t *resrc_new_from_hwloc_obj (hwloc_obj_t obj, resrc_t *parent, /* add twindow */ if ((!strncmp (type, "node", 5)) || (!strncmp (type, "core", 5))) { - resrc_twindow_insert (resrc, "0", epochtime (), TIME_MAX); + int64_t e = epochtime (); + planner_reset (resrc->twindow, e, TIME_MAX - e, NULL, 0); } } ret: @@ -1061,29 +872,15 @@ resrc_t *resrc_create_cluster (char *cluster) bool resrc_walltime_match (resrc_t *resrc, resrc_reqst_t *request, size_t reqrd_size) { - bool rc = false; - window_t *window = NULL; - int64_t endtime = resrc_reqst_endtime (request); - int64_t starttime = resrc_reqst_starttime (request); - size_t available = 0; - - /* If request endtime is greater than the lifetime of the - resource, then return false */ - window = zhashx_lookup (resrc->twindow, "0"); - if (window) { - if (endtime > (window->endtime - 10)) { - return false; - } - } - - /* find the minimum available resources during the requested time - * range */ - available = resrc_available_during_range (resrc, starttime, endtime, - resrc_reqst_exclusive (request)); - - rc = (available >= reqrd_size); - - return rc; + req_t req; + planner_t *pl = resrc->twindow; + int64_t start = resrc_reqst_starttime (request); + req.duration = (uint64_t)(resrc_reqst_endtime (request) - start); + uint64_t sz = (uint64_t)reqrd_size; + req.resrc_vector = &sz; + req.vector_dim = 1; + req.exclusive = resrc_reqst_exclusive (request)? 1: 0; + return (planner_avail_resources_at (pl, start, &req) == 0); } bool resrc_match_resource (resrc_t *resrc, resrc_reqst_t *request, @@ -1259,14 +1056,16 @@ static int resrc_allocate_resource_in_time (resrc_t *resrc, int64_t job_id, char *id_ptr = NULL; int rc = -1; size_t *size_ptr; - size_t available; + plan_t *pin = NULL; + uint64_t d = (uint64_t)(endtime - starttime + 1); + + pin = plan_new (job_id, starttime, d, 0, 1, resrc->staged); /* Don't bother going through the exclusivity checks. We will * save cycles and assume the selected resources are * exclusively available if that was the criteria of the * search. */ - available = resrc_available_during_range (resrc, starttime, endtime, false); - if (resrc->staged > available) + if (planner_avail_resources_at (resrc->twindow, starttime, pin->req) != 0) goto ret; id_ptr = xasprintf ("%"PRId64"", job_id); @@ -1277,11 +1076,16 @@ static int resrc_allocate_resource_in_time (resrc_t *resrc, int64_t job_id, resrc->staged = 0; /* add walltime */ - resrc_twindow_insert (resrc, id_ptr, starttime, endtime); - - rc = 0; + reservation_t *rsv = NULL; + if (!(rsv = planner_reservation_new (resrc->twindow, pin))) + goto ret; + else if ((rc = planner_add_reservation (resrc->twindow, rsv, 0)) == -1) + goto ret; free (id_ptr); + ret: + if (pin) + plan_destroy (&pin); return rc; } @@ -1364,14 +1168,16 @@ static int resrc_reserve_resource_in_time (resrc_t *resrc, int64_t job_id, char *id_ptr = NULL; int rc = -1; size_t *size_ptr; - size_t available; + plan_t *pin = NULL; + reservation_t *rsv = NULL; + uint64_t d = (uint64_t)(endtime - starttime + 1); /* Don't bother going through the exclusivity checks. We will * save cycles and assume the selected resources are * exclusively available if that was the criteria of the * search. */ - available = resrc_available_during_range (resrc, starttime, endtime, false); - if (resrc->staged > available) + pin = plan_new (job_id, starttime, d, 0, 1, resrc->staged); + if (planner_avail_resources_at (resrc->twindow, starttime, pin->req) != 0) goto ret; id_ptr = xasprintf ("%"PRId64"", job_id); @@ -1382,11 +1188,15 @@ static int resrc_reserve_resource_in_time (resrc_t *resrc, int64_t job_id, resrc->staged = 0; /* add walltime */ - resrc_twindow_insert (resrc, id_ptr, starttime, endtime); - - rc = 0; + if (!(rsv = planner_reservation_new (resrc->twindow, pin))) + goto ret; + else if ((rc = planner_add_reservation (resrc->twindow, rsv, 0)) == -1) + goto ret; free (id_ptr); + ret: + if (pin) + plan_destroy (&pin); return rc; } @@ -1455,8 +1265,11 @@ int resrc_release_allocation (resrc_t *resrc, int64_t rel_job) if (size_ptr) { if (resrc->state == RESOURCE_ALLOCATED) resrc->available += *size_ptr; - else - zhashx_delete (resrc->twindow, id_ptr); + else { + reservation_t *rsv = planner_reservation_by_id_str (resrc->twindow, + (const char*)id_ptr); + planner_reservation_destroy (resrc->twindow, &rsv); + } zhash_delete (resrc->allocs, id_ptr); if ((resrc->state != RESOURCE_INVALID) && !zhash_size (resrc->allocs)) { @@ -1498,7 +1311,9 @@ int resrc_release_all_reservations (resrc_t *resrc) resrc->available += *size_ptr; else { id_ptr = (char *)zhash_cursor (resrc->reservtns); - zhashx_delete (resrc->twindow, id_ptr); + reservation_t *rsv = planner_reservation_by_id_str (resrc->twindow, + (const char*)id_ptr); + planner_reservation_destroy (resrc->twindow, &rsv); } size_ptr = zhash_next (resrc->reservtns); } diff --git a/resrc/resrc.h b/resrc/resrc.h index c9c6665a6..ab73beb8c 100644 --- a/resrc/resrc.h +++ b/resrc/resrc.h @@ -6,8 +6,9 @@ */ #include +#include "planner.h" -#define TIME_MAX INT64_MAX +#define TIME_MAX INT64_MAX - 10 typedef struct hwloc_topology * TOPOLOGY; typedef struct resrc resrc_t; @@ -88,22 +89,28 @@ size_t resrc_size (resrc_t *resrc); size_t resrc_available (resrc_t *resrc); /* - * Return the amount of the resource available at the given time + * Return 0 if the required amount of the resource is available at the given time; + * otehr -1. */ -size_t resrc_available_at_time (resrc_t *resrc, int64_t time); +int resrc_available_at_time (resrc_t *resrc, int64_t time, size_t reqrd_size); /* - * Return the least amount of the resource available during the time + * Return 0 if the required amount of the resource is available during the time * range */ -size_t resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, - int64_t range_endtime, bool exclusive); +int resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, + int64_t range_endtime, size_t reqrd_size, bool exclusive); /* * Return the resource state as a string */ char* resrc_state (resrc_t *resrc); +/* + * Return twindow of planner_t type + */ +planner_t *resrc_twindow (resrc_t *resrc); + /* * Return the physical tree for the resouce */ diff --git a/resrc/resrc_flow.c b/resrc/resrc_flow.c index 2d3da3f2b..be73e3623 100644 --- a/resrc/resrc_flow.c +++ b/resrc/resrc_flow.c @@ -60,7 +60,7 @@ struct resrc_flow_list { * size_t staged; * zhash_t *allocs; * zhash_t *reservtns; - * zhash_t *twindow; + * planner_t *twindow; * * The resrc_flow structure therefore includes a flow_resrc resource, * independent from the associated resource, to hold all these values @@ -233,8 +233,8 @@ resrc_flow_t *resrc_flow_new_from_json (json_t *o, resrc_flow_t *parent) endtime = TIME_MAX; } - resrc_twindow_insert (resrc_flow->flow_resrc, "0", - starttime, endtime); + planner_reset (resrc_twindow (resrc_flow->flow_resrc), starttime, + endtime - starttime, NULL, 0); } } if (resrc) diff --git a/resrc/resrc_reqst.c b/resrc/resrc_reqst.c index 0e648e7da..02016d7ce 100644 --- a/resrc/resrc_reqst.c +++ b/resrc/resrc_reqst.c @@ -41,6 +41,11 @@ struct resrc_reqst_list { zlist_t *list; }; +struct subresrc_aggregate { + const char *type; + int64_t qty; +}; + struct resrc_reqst { resrc_reqst_t *parent; resrc_t *resrc; @@ -50,6 +55,7 @@ struct resrc_reqst { int64_t reqrd_qty; int64_t reqrd_size; int64_t nfound; + zlist_t *subresrcs; resrc_reqst_list_t *children; resrc_graph_req_t *g_reqs; }; @@ -246,6 +252,7 @@ resrc_reqst_t *resrc_reqst_new (resrc_t *resrc, int64_t qty, int64_t size, resrc_reqst->reqrd_qty = qty; resrc_reqst->reqrd_size = size; resrc_reqst->nfound = 0; + resrc_reqst->subresrcs = NULL; resrc_reqst->g_reqs = NULL; resrc_reqst->children = resrc_reqst_list_new (); } @@ -284,6 +291,35 @@ static resrc_graph_req_t *resrc_graph_req_from_json (json_t *ga) return NULL; } +static zlist_t *subresrc_aggregates_from_json (json_t *o) +{ + int64_t agg = -1; + zlist_t *zl = NULL; + struct subresrc_aggregate *subresrc = NULL; + + /* when other other resource types need to be supported + * the following needs to be extended + */ + if (Jget_int64 (o, "aggr_qty_node", &agg)) { + subresrc = xzmalloc (sizeof (*subresrc)); + subresrc->type = "node"; + subresrc->qty = agg; + zl = zlist_new (); + zlist_append (zl, subresrc); + zlist_freefn (zl, subresrc, free, false); + } + if (Jget_int64 (o, "aggr_qty_core", &agg)) { + subresrc = xzmalloc (sizeof (*subresrc)); + subresrc->type = "core"; + subresrc->qty = agg; + if (!zl) + zl = zlist_new (); + zlist_append (zl, subresrc); + zlist_freefn (zl, subresrc, free, false); + } + return zl; +} + resrc_reqst_t *resrc_reqst_from_json (json_t *o, resrc_reqst_t *parent) { bool exclusive = false; @@ -336,6 +372,8 @@ resrc_reqst_t *resrc_reqst_from_json (json_t *o, resrc_reqst_t *parent) resrc_reqst = resrc_reqst_new (resrc, qty, size, starttime, endtime, exclusive); + resrc_reqst->subresrcs = subresrc_aggregates_from_json (o); + if ((ga = Jobj_get (o, "graphs"))) resrc_reqst->g_reqs = resrc_graph_req_from_json (ga); @@ -365,6 +403,8 @@ void resrc_reqst_destroy (resrc_reqst_t *resrc_reqst) if (resrc_reqst) { if (resrc_reqst->parent) resrc_reqst_list_remove (resrc_reqst->parent->children, resrc_reqst); + if (resrc_reqst->subresrcs) + zlist_destroy (&(resrc_reqst->subresrcs)); resrc_reqst_list_destroy (resrc_reqst->children); resrc_resource_destroy (resrc_reqst->resrc); resrc_graph_req_destroy (resrc_reqst->g_reqs); diff --git a/resrc/resrc_version.map b/resrc/resrc_version.map index d9b194945..a78c54e72 100644 --- a/resrc/resrc_version.map +++ b/resrc/resrc_version.map @@ -1,5 +1,6 @@ { global: resrc_*; + planner_*; local: *; }; diff --git a/resrc/test/Makefile.am b/resrc/test/Makefile.am index db7e17044..8e2db3c88 100644 --- a/resrc/test/Makefile.am +++ b/resrc/test/Makefile.am @@ -8,7 +8,7 @@ TESTS_ENVIRONMENT = \ LUA_PATH="$(abs_top_srcdir)/rdl/?.lua;$(FLUX_PREFIX)/share/lua/5.1/?.lua;$(LUA_PATH);;" \ LUA_CPATH="$(abs_top_builddir)/rdl/?.so;$(FLUX_PREFIX)/lib64/lua/5.1/?.so;$(LUA_CPATH);;" -TESTS = tresrc +TESTS = tresrc tplanner check_PROGRAMS = $(TESTS) tresrc_SOURCES = tresrc.c @@ -18,3 +18,12 @@ tresrc_LDADD = $(top_builddir)/resrc/libflux-resrc.la \ $(top_builddir)/src/common/libutil/libutil.la \ $(top_builddir)/src/common/libtap/libtap.la \ $(LUA_LIB) $(JANSSON_LIBS) $(CZMQ_LIBS) + +tplanner_SOURCES = tplanner.c +tplanner_CFLAGS = $(AM_CFLAGS) -I$(top_srcdir)/resrc +tplanner_LDADD = $(top_builddir)/resrc/libflux-resrc.la \ + $(top_builddir)/src/common/liblsd/liblsd.la \ + $(top_builddir)/src/common/libutil/libutil.la \ + $(top_builddir)/src/common/libtap/libtap.la \ + $(LUA_LIB) $(JANSSON_LIBS) $(CZMQ_LIBS) + diff --git a/resrc/test/tplanner.c b/resrc/test/tplanner.c new file mode 100644 index 000000000..ce3fdb3bf --- /dev/null +++ b/resrc/test/tplanner.c @@ -0,0 +1,848 @@ +/*****************************************************************************\ + * Copyright (c) 2014 Lawrence Livermore National Security, LLC. Produced at + * the Lawrence Livermore National Laboratory (cf, AUTHORS, DISCLAIMER.LLNS). + * LLNL-CODE-658032 All rights reserved. + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the license, or (at your option) + * any later version. + * + * Flux is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * See also: http://www.gnu.org/licenses/ +\*****************************************************************************/ + +#include +#include +#include +#include "planner.h" +#include "src/common/libtap/tap.h" +#include "src/common/libutil/xzmalloc.h" + +#define EXTRA_VALIDATION 0 + +static int64_t gl_plid = 0; + +static inline plan_t *pt_plan_new (planner_t *ctx, req_t *req, int64_t start) +{ + plan_t *plan = NULL; + + if (!req || start < 0) + goto done; + + plan = xzmalloc (sizeof (*plan)); + plan->req = req; + plan->id = gl_plid; + plan->start = start; + gl_plid++; + +done: + return plan; +} + +static req_t *pt_vreq_new (uint64_t duration, size_t len, va_list ap) +{ + int i = 0; + req_t *req = NULL; + if (duration < 1 || len > MAX_RESRC_DIM) + goto done; + + req = xzmalloc (sizeof (*req)); + req->resrc_vector = xzmalloc (len * sizeof (*(req->resrc_vector))); + req->vector_dim = len; + req->duration = duration; + for (i=0; i < len; ++i) + req->resrc_vector[i] = (uint64_t)va_arg(ap, int); + +done: + return req; +} + +/* make sure to pass only integers for optional arguments */ +static req_t *pt_req_new (uint64_t duration, size_t len, ...) +{ + req_t *req = NULL; + + va_list ap; + va_start(ap, len); + req = pt_vreq_new (duration, len, ap); + va_end(ap); + return req; +} + +static void pt_req_free (req_t *req) +{ + if (req) { + free (req->resrc_vector); + free (req); + } +} + +int pt_make_n_valid_rsvs (planner_t *ctx, reservation_t ***ra_p, + int n, uint64_t duration, size_t len, ...) +{ + int i = 0; + int rc = -1; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t *rsv = NULL; + + va_list ap; + va_start(ap, len); + req = pt_vreq_new (duration, len, ap); + va_end(ap); + + if (!req) + goto done; + + (*ra_p) = xzmalloc (n * sizeof (**ra_p)); + for (i = 0; i < n; ++i) { + if (!(plan = pt_plan_new (ctx, req, planner_avail_time_first (ctx, req)))) + goto done; + else if (!(rsv = planner_reservation_new (ctx, plan))) + goto done; + else if (planner_add_reservation (ctx, rsv, EXTRA_VALIDATION) < 0) + goto done; + + free (plan); + plan = NULL; + (*ra_p)[i] = rsv; + } + pt_req_free (req); + req = NULL; + rc = 0; + +done: + if (plan) + free (plan); + if (req) + pt_req_free (req); + return rc; +} + + +int pt_make_n_decr_rsvs (planner_t *ctx, reservation_t ***ra_p, + int n, uint64_t start_duration, size_t len, ...) +{ + int i = 0; + int rc = -1; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t *rsv = NULL; + + va_list ap; + va_start(ap, len); + req = pt_vreq_new (start_duration, len, ap); + va_end(ap); + + if (!req) + goto done; + + (*ra_p) = xzmalloc (n * sizeof (**ra_p)); + for (i = 0; i < n; ++i) { + if (!(plan = pt_plan_new (ctx, req, planner_avail_time_first (ctx, req)))) + goto done; + else if (!(rsv = planner_reservation_new (ctx, plan))) + goto done; + else if (planner_add_reservation (ctx, rsv, EXTRA_VALIDATION) < 0) + goto done; + + free (plan); + plan = NULL; + (*ra_p)[i] = rsv; + req->duration--; + } + pt_req_free (req); + req = NULL; + rc = 0; + +done: + if (plan) + free (plan); + if (req) + pt_req_free (req); + return rc; +} + +static void test_1r0_10p_basic () +{ + int i = 0; + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs = 1; + req_t *req = NULL; + req_t *req2 = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + + planner_t *ctx = planner_new (0, 10, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1>, 0-9}: 1-d 1 resrc for span of 10"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 5, 2, 1, 1); + ok (rc == 0, "add the max num of reservations, each requesting {<1>, 2}"); + + rc = planner_rem_reservation (ctx, ra[1]); + ok (rc == 0, "remove a reservation at 2 for {<1>, 2}"); + + rc = planner_rem_reservation (ctx, ra[2]); + ok (rc == 0, "remove a reservation at 4 for {<1>, 2}"); + + req = pt_req_new (2, 1, 1); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 2, "find the first available time for {<1>, 2}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "should not find the next available time"); + + rc = planner_avail_resources_at (ctx, 2, req); + ok (rc == 0, "find availability at 2, for {<1>, 2}"); + + rc = planner_avail_resources_at (ctx, 3, req); + ok (rc == 0, "find availability at 3, for {<1>, 2}"); + + rc = planner_avail_resources_at (ctx, 4, req); + ok (rc == 0, "find availability at 4, for {<1>, 2}"); + + rc = planner_avail_resources_at (ctx, 5, req); + ok ((rc == -1) && !errno, "find no availability at 5, for {<1>, 2}"); + + req2 = pt_req_new (2, 1, 1); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find the first available time for {<1>, 3}"); + + req2 = pt_req_new (4, 1, 1); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find the first available time for {<1>, 4}"); + + req2 = pt_req_new (5, 1, 1); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == -1, "no availability for {<1>, 5}"); + + plan = pt_plan_new (ctx, req, 3); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 3, for {<1>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == -1, "no availability for {<1>, 2}"); + + planner_reservation_destroy (ctx, &new_rsv); + for (i=0; i < 5; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + + free (ra); + ra = NULL; + planner_destroy (&ctx); +} + +static void test_1kr0_10kp_larger () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 1000; + planner_t *ctx = planner_new (0, 10000, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1000>, 0-9999}: <1k> resrc for 10k"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 9, 100, 1, 100); + ok (rc == 0, "add 9 reservations, each requesting {<100>, 100}"); + + req = pt_req_new (100, 1, 99); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<99>, 100}"); + + req = pt_req_new (100, 1, 100); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<100>, 100}"); + + req = pt_req_new (101, 1, 100); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<100>, 101}"); + + req = pt_req_new (100, 1, 101); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 100, "find the first available time for {<101>, 100}"); + + req = pt_req_new (100, 1, 100); + plan = pt_plan_new (ctx, req, 0); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<100>, 100}"); + + req = pt_req_new (1000, 1, 1); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 100, "find the first available time for {<1>, 1000}"); + plan = pt_plan_new (ctx, req, starttime); + reservation_t *new_rsv2 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, new_rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 100, for {<100>, 100}"); + + planner_reservation_destroy (ctx, &new_rsv); + planner_reservation_destroy (ctx, &new_rsv2); + for (i=0; i < 9; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + free (ra); + ra = NULL; + planner_destroy (&ctx); +} + +void test_5r0_90p_noncontiguous () +{ + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + + int64_t starttime = -1; + uint64_t total_resrcs = 5; + planner_t *ctx = planner_new (0, 90, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<5>, 0-89}: <5> resrc for 90 span"); + + req = pt_req_new (10, 1, 5); + rc = planner_avail_resources_at (ctx, 0, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + + plan = pt_plan_new (ctx, req, 0); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<5>, 10}"); + + rc = planner_avail_resources_at (ctx, 15, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + plan = pt_plan_new (ctx, req, 15); + reservation_t *rsv2 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 15, for {<5>, 10}"); + + rc = planner_avail_resources_at (ctx, 35, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + plan = pt_plan_new (ctx, req, 35); + reservation_t *rsv3 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv3, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 35, for {<5>, 10}"); + + rc = planner_avail_resources_at (ctx, 60, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + plan = pt_plan_new (ctx, req, 60); + reservation_t *rsv4 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv4, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 60, for {<5>, 10}"); + + req = pt_req_new (5, 1, 5); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 10, "find the first available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 25, "find the next available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 45, "find the next available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 70, "find the next available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 5}"); + + req = pt_req_new (10, 1, 5); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 25, "find the first available time for {<5>, 10}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 45, "find the next available time for {<5>, 10}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 70, "find the next available time for {<5>, 10}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 10}"); + + req = pt_req_new (15, 1, 5); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 45, "find the next available time for {<5>, 15}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 70, "find the next available time for {<5>, 15}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 15}"); + + req = pt_req_new (20, 1, 5); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 70, "find the next available time for {<5>, 20}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 20}"); + plan = pt_plan_new (ctx, req, 70); + reservation_t *rsv5 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv5, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 70, for {<5>, 20}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == -1, "find no available time for {<5>, 20}"); + rc = planner_rem_reservation (ctx, rsv4); + ok (rc == 0, "remove reservation at 60"); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 45, "find the first available time for {<5>, 20}"); + + planner_reservation_destroy (ctx, &rsv1); + planner_reservation_destroy (ctx, &rsv2); + planner_reservation_destroy (ctx, &rsv3); + planner_reservation_destroy (ctx, &rsv4); + planner_reservation_destroy (ctx, &rsv5); + + planner_destroy (&ctx); +} + +void test_1r0_12p_midstart () +{ + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 1; + + planner_t *ctx = planner_new (0, 12, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1>, 0-9}: <1> resrc for 10 span"); + + req = pt_req_new (4, 1, 1); + rc = planner_avail_resources_at (ctx, 4, req); + ok (rc == 0, "find availability at 4, for {<1>, 4}"); + + plan = pt_plan_new (ctx, req, 4); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 4, for {<1>, 4}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find the first available time at 0 for {<1>, 4}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == 8, "find the next available time at 8 for {<1>, 4}"); + + rc = planner_rem_reservation (ctx, rsv1); + ok (rc == 0, "remove reservation at 4: {<1>, 4}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find the first available time at 0 for {<1>, 4}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "no other scheduled point exists for {<1>, 4}"); + + planner_reservation_destroy (ctx, &rsv1); + plan = pt_plan_new (ctx, req, 3); + rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 3, for {<1>, 4}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 7, "find the first available time at 7 for {<1>, 4}"); + pt_req_free (req); + + planner_reservation_destroy (ctx, &rsv1); + planner_destroy (&ctx); +} + +void test_100r0_5000000_long () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 100; + + planner_t *ctx = planner_new (0, 6000000, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<100>, 0-4999999}: <100> for 5000000"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 10000, 10000, 1, 5); + ok (rc == 0, "add 10000 reservations, each requesting {<100>, 10000}"); + + req = pt_req_new (10000, 1, 100); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 5000000, "find the first available time for {<100>, 10000}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add a reservation at 5000000 requesting {<100>, 10000}"); + + for (i=0; i < 10000; ++i) { + planner_rem_reservation (ctx, ra[i]); + planner_reservation_destroy (ctx, &(ra[i])); + } + + free (ra); + ra = NULL; + + req = pt_req_new (10000, 1, 55); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<55>, 10000}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == 5010000, "find the first available time for {<55>, 10000}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find the first available time for {<55>, 10000}"); + + planner_reservation_destroy (ctx, &new_rsv); + + planner_destroy (&ctx); +} + +void test_5r0_2200_short () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 5; + + planner_t *ctx = planner_new (0, 2200, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<5>, 0-2199}: <5> for 2200"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 1000, 1, 1, 3); + ok (rc == 0, "add 1000 reservations, each requesting {<3>, 1}"); + + req = pt_req_new (10, 1, 2); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find the first available time for {<2>, 10}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add a reservation at 0 requesting {<2>, 10}"); + + req = pt_req_new (10, 1, 4); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 1000, "find the first available time for {<4>, 10}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv2 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add a reservation at 1000 requesting {<4>, 10}"); + + req = pt_req_new (990, 1, 2); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 10, "find the first available time for {<2>, 910}"); + + req = pt_req_new (991, 1, 2); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 1010, "find the first available time for {<2>, 911}"); + + //rc = planner_print_gnuplot (ctx, "plan.out", 0); + //ok (rc == 0, "print gnuplot works"); + + for (i=0; i < 1000; ++i) { + planner_rem_reservation (ctx, ra[i]); + planner_reservation_destroy (ctx, &(ra[i])); + } + + free (ra); + ra = NULL; + + planner_reservation_destroy (ctx, &rsv1); + planner_reservation_destroy (ctx, &rsv2); + planner_destroy (&ctx); +} + +static void test_5xr0_4_10p_basic () +{ + int i = 0; + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs_a[5] = {5, 50, 500, 5000, 50000}; + req_t *req = NULL; + req_t *req2 = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + + planner_t *ctx = planner_new (0, 10, total_resrcs_a, 5); + ok (ctx != NULL, "a planner for {<5,50,500,5000,50000>, 0-9}: 5-d for 10"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 5, 2, 5, 5, 50, 500, 5000, 50000); + ok (rc == 0, "add reservations, each requesting {<5,50,500,5000,50000>, 2}"); + + rc = planner_rem_reservation (ctx, ra[1]); + ok (rc == 0, "remove a reservation at 2 for {<5,50,500,5000,50000>, 2}"); + + rc = planner_rem_reservation (ctx, ra[2]); + ok (rc == 0, "remove a reservation at 4 for {<5,50,500,5000,50000>, 2}"); + + req = pt_req_new (2, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 2, "find first availability for {<5,50,500,5000,50000>, 2}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "should not find the next available time"); + + rc = planner_avail_resources_at (ctx, 2, req); + ok (rc == 0, "find availability at 2, for {<5,50,500,5000,50000>, 2}"); + + rc = planner_avail_resources_at (ctx, 3, req); + ok (rc == 0, "find availability at 3, for {<5,50,500,5000,50000>, 2}"); + + rc = planner_avail_resources_at (ctx, 4, req); + ok (rc == 0, "find availability at 4, for {<5,50,500,5000,50000>, 2}"); + + rc = planner_avail_resources_at (ctx, 5, req); + ok ((rc == -1) && !errno, "find no availability at 5"); + + req2 = pt_req_new (3, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find first availability for {<5,50,500,5000,50000>, 3}"); + + req2 = pt_req_new (4, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find first availability for {<5,50,500,5000,50000>, 4}"); + + req2 = pt_req_new (5, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == -1, "no availability for {<5,50,500,5000,50000>, 5}"); + + plan = pt_plan_new (ctx, req, 3); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 3, for {<5,50,500,5000,50000>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == -1, "no availability for {<5,50,500,5000,50000>, 2}"); + + planner_reservation_destroy (ctx, &new_rsv); + for (i=0; i < 5; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + + free (ra); + ra = NULL; + planner_destroy (&ctx); +} + +void test_2xr0_4_10p_2D_unmet () +{ + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs_a[5] = {2, 20, 200, 2000, 20000}; + req_t *req = NULL; + req_t *req2 = NULL; + plan_t *plan = NULL; + + planner_t *ctx = planner_new (0, 10, total_resrcs_a, 5); + ok (ctx != NULL, "a planner for {<2,20,200,2000,20000>, 0-9}: 5-d for 10"); + + req = pt_req_new (2, 5, 1, 10, 100, 1000, 10000); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find first availability for {<1,10,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<1,10,100,1000,10000>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find first availability for {<1,10,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv2 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<1,10,100,1000,10000>, 2}"); + + req2 = pt_req_new (2, 5, 0, 20, 100, 1000, 10000); + starttime = planner_avail_time_first (ctx, req2); + ok (starttime == 2, "find first availability for {<0,20,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req2, starttime); + reservation_t *rsv3 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req2); + rc = planner_add_reservation (ctx, rsv3, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 2, for {<1,20,100,1000,10000>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 4, "find first availability for {<1,10,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv4 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv4, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 4, for {<1,10,100,1000,10000>, 2}"); + + req = pt_req_new (2, 5, 1, 0, 100, 1000, 10000); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 2, "find first availability for {<1,0,100,1000,10000>, 2}"); + + planner_reservation_destroy (ctx, &rsv1); + planner_reservation_destroy (ctx, &rsv2); + planner_reservation_destroy (ctx, &rsv3); + planner_reservation_destroy (ctx, &rsv4); + + planner_destroy (&ctx); +} + +void test_many_complete_times () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 2000; + + planner_t *ctx = planner_new (0, 2500, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<2000>, 0-2499}: <2000> for 2500"); + + rc = pt_make_n_decr_rsvs (ctx, &ra, 2000, 2000, 1, 1); + ok (rc == 0, "add 2000 reservations, each requesting {<1>, 2000--}"); + + for (i=1; i < 1999; ++i) { + req = pt_req_new (10, 1, i); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + } + req = pt_req_new (10, 1, i); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == i, "find the first available time for {<1000++>, 10}"); + + for (i=0; i < 2000; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + + free (ra); + ra = NULL; + + planner_destroy (&ctx); +} + +void test_misc () +{ + int i = 0; + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs = 1; + reservation_t **ra = NULL; + reservation_t *rsv = NULL; + + planner_t *ctx = planner_new (0, 20, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1>, 0-9}: 1-d 1 resrc for span of 10"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 5, 2, 1, 1); + ok (rc == 0, "add the max num of reservations, each requesting {<1>, 2}"); + + req_t *req = pt_req_new (2, 1, 1); + plan_t *plan = pt_plan_new (ctx, req, 12); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + rc = planner_add_reservation (ctx, rsv1, 1); + plan->start = 10; + reservation_t *rptr = planner_reservation_by_id (ctx, plan->id); + ok (rsv1 == rptr, "planner_reservation_by_id works"); + char key[32]; + sprintf (key, "%jd", (intmax_t)plan->id); + rptr = planner_reservation_by_id_str (ctx, key); + ok (rsv1 == rptr, "planner_reservation_by_id works"); + rptr = planner_reservation_new (ctx, plan); + ok ((rptr == NULL) && (errno == EINVAL), "existing id correctly rejected"); + free (plan); + pt_req_free (req); + char *str = planner_reservation_to_string (ctx, rsv1); + ok (str != NULL, "planner_reservation_to_string works"); + free (str); + + for (rsv = planner_reservation_first (ctx); rsv; + rsv = planner_reservation_next (ctx, rsv)) { + int64_t st = planner_reservation_starttime (ctx, rsv); + if (st != -1 && st < starttime) + break; + starttime = st; + i++; + } + ok ((i == 6), "planner_revervation iterator works"); + for (i=0; i < 5; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + free (ra); + ra = NULL; + planner_reservation_destroy (ctx, &rsv1); + planner_destroy (&ctx); +} + +int main (int argc, char *argv[]) +{ + + plan (NO_PLAN); + + test_1r0_10p_basic (); + + test_1kr0_10kp_larger (); + + test_5r0_90p_noncontiguous (); + + test_1r0_12p_midstart (); + + test_100r0_5000000_long (); + + test_5r0_2200_short (); + + test_5xr0_4_10p_basic (); + + test_2xr0_4_10p_2D_unmet (); + + test_many_complete_times (); + + test_misc (); + + done_testing (); + + return 0; +} + + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/resrc/test/tresrc.c b/resrc/test/tresrc.c index 089f396b5..9a0726886 100644 --- a/resrc/test/tresrc.c +++ b/resrc/test/tresrc.c @@ -95,14 +95,12 @@ static int num_temporal_allocation_tests = 10; static void test_temporal_allocation () { int rc = 0; - size_t available; + int tmp = 0; resrc_t *resource = resrc_new_resource ("custom", "/test", "test", "test1", NULL, 1, NULL, 10); - available = resrc_available_at_time (resource, 0); - rc = (rc || !(available == 10)); - available = resrc_available_during_range (resource, 0, 1000, false); - rc = (rc || !(available == 10)); + rc = resrc_available_at_time (resource, 0, 10); + rc += resrc_available_during_range (resource, 0, 1000, 10, false); ok (!rc, "resrc_available...(time/range) on unallocated resource work"); // Setup the resource allocations for the rest of the tests @@ -128,30 +126,20 @@ static void test_temporal_allocation () // Test "available at time" // Job 1 - available = resrc_available_at_time (resource, 1); - rc = (rc || !(available == 5)); + rc = resrc_available_at_time (resource, 1, 5); // Jobs 1 & 3 - available = resrc_available_at_time (resource, 10); - rc = (rc || !(available == 4)); - available = resrc_available_at_time (resource, 500); - rc = (rc || !(available == 4)); - available = resrc_available_at_time (resource, 1000); - rc = (rc || !(available == 4)); + rc += resrc_available_at_time (resource, 10, 4); + rc += resrc_available_at_time (resource, 500, 4); + rc += resrc_available_at_time (resource, 1000, 4); // Job 3 - available = resrc_available_at_time (resource, 1500); - rc = (rc || !(available == 9)); - available = resrc_available_at_time (resource, 1999); - rc = (rc || !(available == 9)); + rc += resrc_available_at_time (resource, 1500, 9); + rc += resrc_available_at_time (resource, 1999, 9); // Job 2 - available = resrc_available_at_time (resource, 2000); - rc = (rc || !(available == 0)); - available = resrc_available_at_time (resource, 2500); - rc = (rc || !(available == 0)); - available = resrc_available_at_time (resource, 3000); - rc = (rc || !(available == 0)); + rc += (resrc_available_at_time (resource, 2000, 1) == -1)? 0: -1; + rc += (resrc_available_at_time (resource, 2500, 1) == -1)? 0: -1; + rc += (resrc_available_at_time (resource, 3000, 1) == -1)? 0: -1; // No Jobs - available = resrc_available_at_time (resource, 3001); - rc = (rc || !(available == 10)); + rc += resrc_available_at_time (resource, 3001, 10); ok (!rc, "resrc_available_at_time works"); if (rc) { return; @@ -160,71 +148,59 @@ static void test_temporal_allocation () // Test "available during range" // Range == job window (both edges are the same) - available = resrc_available_during_range (resource, 2000, 3000, false); - rc = (rc || !(available == 0)); - available = resrc_available_during_range (resource, 0, 1000, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 10, 1999, false); - rc = (rc || !(available == 4)); + tmp = resrc_available_during_range (resource, 2000, 3000, 1, false); + rc = (tmp == -1)? 0 : -1; + rc += resrc_available_during_range (resource, 0, 1000, 4, false); + rc += resrc_available_during_range (resource, 10, 1999, 4, false); ok (!rc, "resrc_available_during_range: range == job window works"); rc = 0; // Range is a subset of job window (no edges are the same) - available = resrc_available_during_range (resource, 4, 6, false); - rc = (rc || !(available == 5)); - available = resrc_available_during_range (resource, 20, 999, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 1001, 1998, false); - rc = (rc || !(available == 9)); - available = resrc_available_during_range (resource, 2500, 2600, false); - rc = (rc || !(available == 0)); + rc = resrc_available_during_range (resource, 4, 6, 5, false); + rc += resrc_available_during_range (resource, 20, 999, 4, false); + rc += resrc_available_during_range (resource, 1001, 1998, 9, false); + tmp = resrc_available_during_range (resource, 2500, 2600, 1, false); + rc += (tmp == -1)? 0: -1; ok (!rc, "resrc_available_during_range: range is a subset (no edges) works"); rc = 0; // Range is a subset of a job window (one edge is the same) - available = resrc_available_during_range (resource, 0, 999, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 10, 999, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 20, 1000, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 1001, 1999, false); - rc = (rc || !(available == 9)); - available = resrc_available_during_range (resource, 1001, 1999, false); - rc = (rc || !(available == 9)); + rc = resrc_available_during_range (resource, 0, 999, 4, false); + rc += resrc_available_during_range (resource, 10, 999, 4, false); + rc += resrc_available_during_range (resource, 20, 1000, 4, false); + rc += resrc_available_during_range (resource, 1001, 1999, 9, false); + rc += resrc_available_during_range (resource, 1001, 1999, 9, false); ok (!rc, "resrc_available_during_range: range is a subset (1 edge) works"); rc = 0; // Range overlaps 1 job window // (no edges are exactly equal) - available = resrc_available_during_range (resource, 2500, 4000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 2500, 4000, 1, false); + rc = (tmp == -1)? 0: -1; // (1 edge is exactly equal) - available = resrc_available_during_range (resource, 3000, 5000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 3000, 5000, 1, false); + rc += (tmp == -1)? 0: -1; ok (!rc, "resrc_available_during_range: range overlaps 1 job works"); rc = 0; // Range overlaps multiple job windows // (no edges are exactly equal) - available = resrc_available_during_range (resource, 100, 1500, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 1500, 2500, false); - rc = (rc || !(available == 0)); + rc = resrc_available_during_range (resource, 100, 1500, 4, false); + tmp = resrc_available_during_range (resource, 1500, 2500, 1, false); + rc += (tmp == -1)? 0: -1; // (some edges are exactly equal) - available = resrc_available_during_range (resource, 1000, 2000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 1000, 2000, 1, false); + rc += (tmp == -1)? 0: -1; ok (!rc, "resrc_available_during_range: range overlaps multiple job works"); rc = 0; // Range overlaps all job windows (edges exactly equal) - available = resrc_available_during_range (resource, 0, 3000, false); - rc = (rc || !(available == 0)); - available = resrc_available_during_range (resource, 0, 2000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 0, 3000, 1, false); + rc = (tmp == -1)? 0: -1; + tmp = resrc_available_during_range (resource, 0, 2000, 1, false); + rc += (tmp == -1)? 0: -1; // Range overlaps no job windows - available = resrc_available_during_range (resource, 3001, 5000, false); - rc = (rc || !(available == 10)); + rc += resrc_available_during_range (resource, 3001, 5000, 10, false); ok (!rc, "resrc_available_during_range: range overlaps all job works"); resrc_resource_destroy (resource); @@ -442,6 +418,7 @@ int main (int argc, char *argv[]) resrc_flow_t *power_flow = NULL; resrc_flow_t *bw_flow = NULL; + plan (26); plan (26 + num_temporal_allocation_tests); test_temporal_allocation (); diff --git a/sched/sched.c b/sched/sched.c index 05699cb9b..b2dfb48a0 100644 --- a/sched/sched.c +++ b/sched/sched.c @@ -1389,28 +1389,14 @@ static int req_tpexec_run (flux_t *h, flux_lwj_t *job) * * *******************************************************************************/ -/* - * schedule_job() searches through all of the idle resources to - * satisfy a job's requirements. If enough resources are found, it - * proceeds to allocate those resources and update the kvs's lwj entry - * in preparation for job execution. If less resources - * are found than the job requires, and if the job asks to reserve - * resources, then those resources will be reserved. - */ -int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) +static resrc_reqst_t *get_resrc_reqst (flux_lwj_t *job, int64_t starttime, + int64_t *nreqrd) { + int64_t cll_aggr_nnodes = 0; /* cluster-level nnodes request in aggregate */ + int64_t cll_aggr_ncores = 0; /* cluster-level ncores request in aggregate */ + json_t *req_cluster = NULL; json_t *req_res = NULL; - flux_t *h = ctx->h; - int rc = -1; - int64_t nfound = 0; - int64_t nreqrd = 0; resrc_reqst_t *resrc_reqst = NULL; - resrc_tree_t *found_tree = NULL; - resrc_tree_t *selected_tree = NULL; - struct sched_plugin *plugin = sched_plugin_get (ctx->loader); - - if (!plugin) - return rc; /* * Require at least one task per node, and @@ -1439,7 +1425,9 @@ int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) Jadd_str (req_res, "type", "node"); Jadd_int64 (req_res, "req_qty", job->req->nnodes); - nreqrd = job->req->nnodes; + *nreqrd = job->req->nnodes; + /* num of nodes required in aggregate at the cluster level */ + cll_aggr_nnodes = *nreqrd; /* Since nodes are requested, make sure we look for at * least one core on each node */ @@ -1447,6 +1435,12 @@ int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) job->req->ncores = job->req->nnodes; job->req->corespernode = (job->req->ncores + job->req->nnodes - 1) / job->req->nnodes; + + /* num of cores required in aggregate at the cluster level */ + cll_aggr_ncores = job->req->corespernode * cll_aggr_nnodes; + + /* num of cores required in aggregate at the node level */ + Jadd_int64 (req_res, "aggr_qty_core", job->req->corespernode); if (job->req->node_exclusive) { Jadd_int64 (req_res, "req_size", 1); Jadd_bool (req_res, "exclusive", true); @@ -1467,7 +1461,9 @@ int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) } else if (job->req->ncores > 0) { Jadd_str (req_res, "type", "core"); Jadd_int (req_res, "req_qty", job->req->ncores); - nreqrd = job->req->ncores; + *nreqrd = job->req->ncores; + /* num of cores required in aggregate at the cluster level */ + cll_aggr_ncores = *nreqrd; Jadd_int64 (req_res, "req_size", 1); /* setting exclusive to true prevents multiple jobs per core */ @@ -1477,9 +1473,48 @@ int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) Jadd_int64 (req_res, "starttime", starttime); Jadd_int64 (req_res, "endtime", starttime + job->req->walltime); + + /* Add cluster and encode encode nnodes and ncores requests in aggregate */ + req_cluster = Jnew (); + Jadd_str (req_cluster, "type", "cluster"); + Jadd_int64 (req_cluster, "req_qty", 1); + Jadd_int64 (req_cluster, "aggr_qty_node", cll_aggr_nnodes); + Jadd_int64 (req_cluster, "aggr_qty_core", cll_aggr_ncores); + Jadd_int64 (req_cluster, "starttime", starttime); + Jadd_int64 (req_cluster, "endtime", starttime + job->req->walltime); + json_object_set_new (req_cluster, "req_child", req_res); + resrc_reqst = resrc_reqst_from_json (req_res, NULL); - Jput (req_res); - if (!resrc_reqst) + +done: + if (req_res) + Jput (req_res); + return resrc_reqst; +} + +/* + * schedule_job() searches through all of the idle resources to + * satisfy a job's requirements. If enough resources are found, it + * proceeds to allocate those resources and update the kvs's lwj entry + * in preparation for job execution. If less resources + * are found than the job requires, and if the job asks to reserve + * resources, then those resources will be reserved. + */ +int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) +{ + flux_t *h = ctx->h; + int rc = -1; + int64_t nfound = 0; + int64_t nreqrd = 0; + resrc_reqst_t *resrc_reqst = NULL; + resrc_tree_t *found_tree = NULL; + resrc_tree_t *selected_tree = NULL; + struct sched_plugin *plugin = sched_plugin_get (ctx->loader); + + if (!plugin) + return rc; + + if (!(resrc_reqst = get_resrc_reqst (job, starttime, &nreqrd))) goto done; if ((nfound = plugin->find_resources (h, ctx->rctx.root_resrc, diff --git a/src/common/libutil/Makefile.am b/src/common/libutil/Makefile.am index 9ce957f34..161b42ebd 100644 --- a/src/common/libutil/Makefile.am +++ b/src/common/libutil/Makefile.am @@ -16,4 +16,12 @@ libutil_la_SOURCES = \ log.c \ log.h \ oom.h \ - shortjansson.h + shortjansson.h \ + compiler.h \ + interval_tree.c \ + interval_tree.h \ + interval_tree_generic.h \ + rbtree.c \ + rbtree.h \ + rbtree_augmented.h + diff --git a/src/common/libutil/compiler.h b/src/common/libutil/compiler.h new file mode 100644 index 000000000..af2b34356 --- /dev/null +++ b/src/common/libutil/compiler.h @@ -0,0 +1,15 @@ +#ifndef __INT_COMPILER_H__ +#define __INT_COMPILER_H__ + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#endif /* __INT_COMPILER_H__ */ diff --git a/src/common/libutil/interval_tree.c b/src/common/libutil/interval_tree.c new file mode 100644 index 000000000..3f1940a22 --- /dev/null +++ b/src/common/libutil/interval_tree.c @@ -0,0 +1,12 @@ +#include +#include + +#include "interval_tree.h" +#include "interval_tree_generic.h" + +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, + unsigned long, __subtree_last, + START, LAST,, interval_tree) diff --git a/src/common/libutil/interval_tree.h b/src/common/libutil/interval_tree.h new file mode 100644 index 000000000..cf82ddc75 --- /dev/null +++ b/src/common/libutil/interval_tree.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_INTERVAL_TREE_H +#define _LINUX_INTERVAL_TREE_H + +#include "rbtree.h" + +struct interval_tree_node { + struct rb_node rb; + unsigned int start; + unsigned int last; + unsigned long __subtree_last; +}; + +extern void +interval_tree_insert(struct interval_tree_node *node, struct rb_root *root); + +extern void +interval_tree_remove(struct interval_tree_node *node, struct rb_root *root); + +extern struct interval_tree_node * +interval_tree_iter_first(struct rb_root *root, + unsigned long start, unsigned long last); + +extern struct interval_tree_node * +interval_tree_iter_next(struct interval_tree_node *node, + unsigned long start, unsigned long last); + +#endif /* _LINUX_INTERVAL_TREE_H */ diff --git a/src/common/libutil/interval_tree_generic.h b/src/common/libutil/interval_tree_generic.h new file mode 100644 index 000000000..e26c7322c --- /dev/null +++ b/src/common/libutil/interval_tree_generic.h @@ -0,0 +1,193 @@ +/* + Interval Trees + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + include/linux/interval_tree_generic.h +*/ + +#include + +#include "rbtree_augmented.h" + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoint of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + * + * Note - before using this, please consider if non-generic version + * (interval_tree.h) would work for you... + */ + +#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \ + ITSTART, ITLAST, ITSTATIC, ITPREFIX) \ + \ +/* Callbacks for augmented rbtree insert and remove */ \ + \ +static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node) \ +{ \ + ITTYPE max = ITLAST(node), subtree_last; \ + if (node->ITRB.rb_left) { \ + subtree_last = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + if (node->ITRB.rb_right) { \ + subtree_last = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + return max; \ +} \ + \ +RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ + ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last) \ + \ +/* Insert / remove interval nodes from the tree */ \ + \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ +{ \ + struct rb_node **link = &root->rb_node, *rb_parent = NULL; \ + ITTYPE start = ITSTART(node), last = ITLAST(node); \ + ITSTRUCT *parent; \ + \ + while (*link) { \ + rb_parent = *link; \ + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \ + if (parent->ITSUBTREE < last) \ + parent->ITSUBTREE = last; \ + if (start < ITSTART(parent)) \ + link = &parent->ITRB.rb_left; \ + else \ + link = &parent->ITRB.rb_right; \ + } \ + \ + node->ITSUBTREE = last; \ + rb_link_node(&node->ITRB, rb_parent, link); \ + rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \ +{ \ + rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +/* \ + * Iterate over intervals intersecting [start;last] \ + * \ + * Note that a node's interval intersects [start;last] iff: \ + * Cond1: ITSTART(node) <= last \ + * and \ + * Cond2: start <= ITLAST(node) \ + */ \ + \ +static ITSTRUCT * \ +ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + while (true) { \ + /* \ + * Loop invariant: start <= node->ITSUBTREE \ + * (Cond2 is satisfied by one of the subtree nodes) \ + */ \ + if (node->ITRB.rb_left) { \ + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB); \ + if (start <= left->ITSUBTREE) { \ + /* \ + * Some nodes in left subtree satisfy Cond2. \ + * Iterate to find the leftmost such node N. \ + * If it also satisfies Cond1, that's the \ + * match we are looking for. Otherwise, there \ + * is no matching interval as nodes to the \ + * right of N can't satisfy Cond1 either. \ + */ \ + node = left; \ + continue; \ + } \ + } \ + if (ITSTART(node) <= last) { /* Cond1 */ \ + if (start <= ITLAST(node)) /* Cond2 */ \ + return node; /* node is leftmost match */ \ + if (node->ITRB.rb_right) { \ + node = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB); \ + if (start <= node->ITSUBTREE) \ + continue; \ + } \ + } \ + return NULL; /* No match */ \ + } \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \ +{ \ + ITSTRUCT *node; \ + \ + if (!root->rb_node) \ + return NULL; \ + node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \ + if (node->ITSUBTREE < start) \ + return NULL; \ + return ITPREFIX ## _subtree_search(node, start, last); \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + struct rb_node *rb = node->ITRB.rb_right, *prev; \ + \ + while (true) { \ + /* \ + * Loop invariants: \ + * Cond1: ITSTART(node) <= last \ + * rb == node->ITRB.rb_right \ + * \ + * First, search right subtree if suitable \ + */ \ + if (rb) { \ + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \ + if (start <= right->ITSUBTREE) \ + return ITPREFIX ## _subtree_search(right, \ + start, last); \ + } \ + \ + /* Move up the tree until we come from a node's left child */ \ + do { \ + rb = rb_parent(&node->ITRB); \ + if (!rb) \ + return NULL; \ + prev = &node->ITRB; \ + node = rb_entry(rb, ITSTRUCT, ITRB); \ + rb = node->ITRB.rb_right; \ + } while (prev == rb); \ + \ + /* Check if the node intersects [start;last] */ \ + if (last < ITSTART(node)) /* !Cond1 */ \ + return NULL; \ + else if (start <= ITLAST(node)) /* Cond2 */ \ + return node; \ + } \ +} diff --git a/src/common/libutil/rbtree.c b/src/common/libutil/rbtree.c new file mode 100644 index 000000000..a5b0d313d --- /dev/null +++ b/src/common/libutil/rbtree.c @@ -0,0 +1,549 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include +#include "rbtree_augmented.h" + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/src/common/libutil/rbtree.h b/src/common/libutil/rbtree.h new file mode 100644 index 000000000..da67ade04 --- /dev/null +++ b/src/common/libutil/rbtree.h @@ -0,0 +1,108 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +#include +#include "compiler.h" + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, + struct rb_node ** rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + +/** + * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of + * given type safe against removal of rb_node entry + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ + pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ + typeof(*pos), field); 1; }); \ + pos = n) + +#endif /* _LINUX_RBTREE_H */ diff --git a/src/common/libutil/rbtree_augmented.h b/src/common/libutil/rbtree_augmented.h new file mode 100644 index 000000000..311abb6cd --- /dev/null +++ b/src/common/libutil/rbtree_augmented.h @@ -0,0 +1,245 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _LINUX_RBTREE_AUGMENTED_H +#define _LINUX_RBTREE_AUGMENTED_H + +#include + +#include "compiler.h" + +#include "rbtree.h" + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _LINUX_RBTREE_AUGMENTED_H */