From bb64c2b6e5dd61bcdcef6260f93e07ea7c3463b4 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Tue, 27 Dec 2016 17:45:44 -0800 Subject: [PATCH 1/6] util: Add Linux red black tree Pull in Linux red black tree and interval tree adapted for user space use. Original repo is at https://github.com/markfasheh/interval-tree --- src/common/libutil/Makefile.am | 10 +- src/common/libutil/compiler.h | 15 + src/common/libutil/interval_tree.c | 12 + src/common/libutil/interval_tree.h | 27 + src/common/libutil/interval_tree_generic.h | 193 ++++++++ src/common/libutil/rbtree.c | 549 +++++++++++++++++++++ src/common/libutil/rbtree.h | 108 ++++ src/common/libutil/rbtree_augmented.h | 245 +++++++++ 8 files changed, 1158 insertions(+), 1 deletion(-) create mode 100644 src/common/libutil/compiler.h create mode 100644 src/common/libutil/interval_tree.c create mode 100644 src/common/libutil/interval_tree.h create mode 100644 src/common/libutil/interval_tree_generic.h create mode 100644 src/common/libutil/rbtree.c create mode 100644 src/common/libutil/rbtree.h create mode 100644 src/common/libutil/rbtree_augmented.h diff --git a/src/common/libutil/Makefile.am b/src/common/libutil/Makefile.am index 9ce957f34..161b42ebd 100644 --- a/src/common/libutil/Makefile.am +++ b/src/common/libutil/Makefile.am @@ -16,4 +16,12 @@ libutil_la_SOURCES = \ log.c \ log.h \ oom.h \ - shortjansson.h + shortjansson.h \ + compiler.h \ + interval_tree.c \ + interval_tree.h \ + interval_tree_generic.h \ + rbtree.c \ + rbtree.h \ + rbtree_augmented.h + diff --git a/src/common/libutil/compiler.h b/src/common/libutil/compiler.h new file mode 100644 index 000000000..af2b34356 --- /dev/null +++ b/src/common/libutil/compiler.h @@ -0,0 +1,15 @@ +#ifndef __INT_COMPILER_H__ +#define __INT_COMPILER_H__ + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#endif /* __INT_COMPILER_H__ */ diff --git a/src/common/libutil/interval_tree.c b/src/common/libutil/interval_tree.c new file mode 100644 index 000000000..3f1940a22 --- /dev/null +++ b/src/common/libutil/interval_tree.c @@ -0,0 +1,12 @@ +#include +#include + +#include "interval_tree.h" +#include "interval_tree_generic.h" + +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, + unsigned long, __subtree_last, + START, LAST,, interval_tree) diff --git a/src/common/libutil/interval_tree.h b/src/common/libutil/interval_tree.h new file mode 100644 index 000000000..cf82ddc75 --- /dev/null +++ b/src/common/libutil/interval_tree.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_INTERVAL_TREE_H +#define _LINUX_INTERVAL_TREE_H + +#include "rbtree.h" + +struct interval_tree_node { + struct rb_node rb; + unsigned int start; + unsigned int last; + unsigned long __subtree_last; +}; + +extern void +interval_tree_insert(struct interval_tree_node *node, struct rb_root *root); + +extern void +interval_tree_remove(struct interval_tree_node *node, struct rb_root *root); + +extern struct interval_tree_node * +interval_tree_iter_first(struct rb_root *root, + unsigned long start, unsigned long last); + +extern struct interval_tree_node * +interval_tree_iter_next(struct interval_tree_node *node, + unsigned long start, unsigned long last); + +#endif /* _LINUX_INTERVAL_TREE_H */ diff --git a/src/common/libutil/interval_tree_generic.h b/src/common/libutil/interval_tree_generic.h new file mode 100644 index 000000000..e26c7322c --- /dev/null +++ b/src/common/libutil/interval_tree_generic.h @@ -0,0 +1,193 @@ +/* + Interval Trees + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + include/linux/interval_tree_generic.h +*/ + +#include + +#include "rbtree_augmented.h" + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoint of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + * + * Note - before using this, please consider if non-generic version + * (interval_tree.h) would work for you... + */ + +#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \ + ITSTART, ITLAST, ITSTATIC, ITPREFIX) \ + \ +/* Callbacks for augmented rbtree insert and remove */ \ + \ +static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node) \ +{ \ + ITTYPE max = ITLAST(node), subtree_last; \ + if (node->ITRB.rb_left) { \ + subtree_last = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + if (node->ITRB.rb_right) { \ + subtree_last = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + return max; \ +} \ + \ +RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ + ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last) \ + \ +/* Insert / remove interval nodes from the tree */ \ + \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ +{ \ + struct rb_node **link = &root->rb_node, *rb_parent = NULL; \ + ITTYPE start = ITSTART(node), last = ITLAST(node); \ + ITSTRUCT *parent; \ + \ + while (*link) { \ + rb_parent = *link; \ + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \ + if (parent->ITSUBTREE < last) \ + parent->ITSUBTREE = last; \ + if (start < ITSTART(parent)) \ + link = &parent->ITRB.rb_left; \ + else \ + link = &parent->ITRB.rb_right; \ + } \ + \ + node->ITSUBTREE = last; \ + rb_link_node(&node->ITRB, rb_parent, link); \ + rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \ +{ \ + rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +/* \ + * Iterate over intervals intersecting [start;last] \ + * \ + * Note that a node's interval intersects [start;last] iff: \ + * Cond1: ITSTART(node) <= last \ + * and \ + * Cond2: start <= ITLAST(node) \ + */ \ + \ +static ITSTRUCT * \ +ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + while (true) { \ + /* \ + * Loop invariant: start <= node->ITSUBTREE \ + * (Cond2 is satisfied by one of the subtree nodes) \ + */ \ + if (node->ITRB.rb_left) { \ + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB); \ + if (start <= left->ITSUBTREE) { \ + /* \ + * Some nodes in left subtree satisfy Cond2. \ + * Iterate to find the leftmost such node N. \ + * If it also satisfies Cond1, that's the \ + * match we are looking for. Otherwise, there \ + * is no matching interval as nodes to the \ + * right of N can't satisfy Cond1 either. \ + */ \ + node = left; \ + continue; \ + } \ + } \ + if (ITSTART(node) <= last) { /* Cond1 */ \ + if (start <= ITLAST(node)) /* Cond2 */ \ + return node; /* node is leftmost match */ \ + if (node->ITRB.rb_right) { \ + node = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB); \ + if (start <= node->ITSUBTREE) \ + continue; \ + } \ + } \ + return NULL; /* No match */ \ + } \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \ +{ \ + ITSTRUCT *node; \ + \ + if (!root->rb_node) \ + return NULL; \ + node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \ + if (node->ITSUBTREE < start) \ + return NULL; \ + return ITPREFIX ## _subtree_search(node, start, last); \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + struct rb_node *rb = node->ITRB.rb_right, *prev; \ + \ + while (true) { \ + /* \ + * Loop invariants: \ + * Cond1: ITSTART(node) <= last \ + * rb == node->ITRB.rb_right \ + * \ + * First, search right subtree if suitable \ + */ \ + if (rb) { \ + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \ + if (start <= right->ITSUBTREE) \ + return ITPREFIX ## _subtree_search(right, \ + start, last); \ + } \ + \ + /* Move up the tree until we come from a node's left child */ \ + do { \ + rb = rb_parent(&node->ITRB); \ + if (!rb) \ + return NULL; \ + prev = &node->ITRB; \ + node = rb_entry(rb, ITSTRUCT, ITRB); \ + rb = node->ITRB.rb_right; \ + } while (prev == rb); \ + \ + /* Check if the node intersects [start;last] */ \ + if (last < ITSTART(node)) /* !Cond1 */ \ + return NULL; \ + else if (start <= ITLAST(node)) /* Cond2 */ \ + return node; \ + } \ +} diff --git a/src/common/libutil/rbtree.c b/src/common/libutil/rbtree.c new file mode 100644 index 000000000..a5b0d313d --- /dev/null +++ b/src/common/libutil/rbtree.c @@ -0,0 +1,549 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include +#include "rbtree_augmented.h" + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/src/common/libutil/rbtree.h b/src/common/libutil/rbtree.h new file mode 100644 index 000000000..da67ade04 --- /dev/null +++ b/src/common/libutil/rbtree.h @@ -0,0 +1,108 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +#include +#include "compiler.h" + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, + struct rb_node ** rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + +/** + * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of + * given type safe against removal of rb_node entry + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ + pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ + typeof(*pos), field); 1; }); \ + pos = n) + +#endif /* _LINUX_RBTREE_H */ diff --git a/src/common/libutil/rbtree_augmented.h b/src/common/libutil/rbtree_augmented.h new file mode 100644 index 000000000..311abb6cd --- /dev/null +++ b/src/common/libutil/rbtree_augmented.h @@ -0,0 +1,245 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _LINUX_RBTREE_AUGMENTED_H +#define _LINUX_RBTREE_AUGMENTED_H + +#include + +#include "compiler.h" + +#include "rbtree.h" + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _LINUX_RBTREE_AUGMENTED_H */ From fdd5e5f8194e25eb9dadbe72af5dc5714a294b27 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Tue, 27 Dec 2016 17:51:26 -0800 Subject: [PATCH 2/6] planner: API to enable scheduler-driven aggregate updates Add the planner class, a simple API and efficient mechanisms to allow a Flux scheduler to keep track of the state of resource aggregates of a composite resource. [From its header file] Planner provides a simple API and efficient mechanisms to allow a Flux scheduler to keep track of the state of resource aggregates of a composite resource. In a resource hierarchy used by flux-sched (e.g., hardware hierarchy), a composite resource is represented as a tree graph in which a higher-level vertex has essentially pointers to its immediate child resources, each of which also has pointers to its immediate children etc. With such an organization, the scheduler must essentially walk "all" of the vertices below any composite resource in order to determine if the "sub-resources" requirement can be met. When the scheduler performs such walks excessively in particular, on large graph, however, this can quickly become a performance and scalability bottleneck. Planner addresses this problem by allowing the scheduler to track the "sub-resources" summary information (i.e., aggregates) efficiently at each upper-level composite resource vertex and to use this aggregate information to prune unneccessary descent down into the subtree. Planner offers update and query APIs to support these schemes. Through a planner API, the scheduler can ask a high-level composite a question: "given a request of x, y, z "sub-resources" in aggregate for d time unit, when is the earliest time t at which this request can be satisfied?" Another example would be to answer, "from time t to t+d, does this composite resource vertex has y, z sub-resources available in aggregate. By composing these queries at different levels in a resource hierarchy, the scheduler can significantly reduce the numbers of tree walks. Ultimately, planner will be integrated into our preorder tree-walk pruning filter in our future visitor-pattern-based resource matching scheme. --- resrc/Makefile.am | 4 +- resrc/planner.c | 1389 +++++++++++++++++++++++++++++++++++++++ resrc/planner.h | 330 ++++++++++ resrc/resrc_version.map | 1 + 4 files changed, 1722 insertions(+), 2 deletions(-) create mode 100644 resrc/planner.c create mode 100644 resrc/planner.h diff --git a/resrc/Makefile.am b/resrc/Makefile.am index feb97c436..bfd8b4a48 100644 --- a/resrc/Makefile.am +++ b/resrc/Makefile.am @@ -8,9 +8,9 @@ SUBDIRS = . test noinst_LTLIBRARIES = libflux-resrc.la -noinst_HEADERS = resrc.h resrc_tree.h resrc_flow.h resrc_reqst.h +noinst_HEADERS = resrc.h resrc_tree.h resrc_flow.h resrc_reqst.h planner.h -libflux_resrc_la_SOURCES = resrc.c resrc_tree.c resrc_flow.c resrc_reqst.c +libflux_resrc_la_SOURCES = resrc.c resrc_tree.c resrc_flow.c resrc_reqst.c planner.c libflux_resrc_la_CFLAGS = $(AM_CFLAGS) -I$(top_srcdir)/rdl libflux_resrc_la_LIBADD = $(top_builddir)/rdl/libflux-rdl.la \ $(top_builddir)/src/common/libutil/libutil.la \ diff --git a/resrc/planner.c b/resrc/planner.c new file mode 100644 index 000000000..dbb726ee5 --- /dev/null +++ b/resrc/planner.c @@ -0,0 +1,1389 @@ +/*****************************************************************************\ + * Copyright (c) 2014 Lawrence Livermore National Security, LLC. Produced at + * the Lawrence Livermore National Laboratory (cf, AUTHORS, DISCLAIMER.LLNS). + * LLNL-CODE-658032 All rights reserved. + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the license, or (at your option) + * any later version. + * + * Flux is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * See also: http://www.gnu.org/licenses/ +\*****************************************************************************/ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include "src/common/libutil/xzmalloc.h" +#include "src/common/libutil/rbtree.h" +#include "src/common/libutil/rbtree_augmented.h" +#include "src/common/libutil/interval_tree_generic.h" +#include "planner.h" + +#define DEBUG_PLANNER 0 +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) +#define FREE_NOREF_POINT(rsv) do { \ + if ((rsv)->start_p->ref_count == 0) { \ + free ((rsv)->start_p); \ + (rsv)->start_p = NULL; \ + } \ + if ((rsv)->last_p->ref_count == 0) { \ + free ((rsv)->last_p); \ + (rsv)->last_p = NULL; \ + } \ +} while (false); + +#define OUT_OF_RANGE(A,B,C) (((A)-(B)) >= (C)? 1: 0) + +typedef int64_t resrc_vector_t[MAX_RESRC_DIM]; +typedef char * rtype_vector_t[MAX_RESRC_DIM]; +typedef struct rb_root rb_root_t; +typedef struct rb_node rb_node_t; + +/* Scheduled point: a time at which resource state changes. Each point's resource + * requirements are tracked as a node in a min-time resource (MTR) binary search + * tree -- MAX_RESRC_DIM dimensional. + */ +typedef struct scheduled_point { + rb_node_t point_rb; /* BST node for scheduled point search */ + rb_node_t resrc_rb; /* Min time resource BST node */ + int64_t __subtree_min; /* Min time of the subtree of this node */ + int64_t at; /* Resource-state changing time */ + int inserted_to_resrc; /* 1 when this point is inserted in min-time tree */ + int new_point; /* 1 when this point is newly created */ + int ref_count; /* reference counter */ + resrc_vector_t scheduled_resrcs; /* scheduled resources at this point */ + resrc_vector_t remaining_resrcs; /* remaining resources (available) */ +} scheduled_point_t; + +/* Reservation: a node in a reservation tree (interval tree) to enable fast + * retrieval of intercepting reservations. + */ +struct reservation { + rb_node_t resv_rb; /* RB node for reservation interval tree */ + int64_t start; /* start time of the reservation */ + int64_t last; /* end time of the reservation */ + int64_t __subtree_last; /* maximum end time of my subtree */ + int64_t resv_id; /* unique reservation id */ + resrc_vector_t reserved_resrcs; /* required resources */ + size_t resrc_dim; /* vector size of required resources */ + int added; /* added to the reservation interval tree */ + struct scheduled_point *start_p; /* scheduled point object at start */ + struct scheduled_point *last_p; /* scheduled point object at last */ +}; + +/* Planner context + */ +struct planner { + resrc_vector_t total_resrc_vector; /* total resources avail for planning */ + rtype_vector_t resrc_type_vector; /* array of resrc type strings */ + size_t resrc_dim; /* size of the above vector */ + int64_t plan_start; /* begin of the planning span */ + int64_t plan_end; /* end of the planning span */ + zhash_t *avail_time_iter; /* tracking points temporarily deleted from MTR */ + req_t *avail_time_iter_req;/* the req copy for avail time iteration */ + int avail_time_iter_set; /* iterator set */ + scheduled_point_t *p1; /* system's scheduled point at t0*/ + zhashx_t *r_lookup; /* reservation look up table */ + rb_root_t reservations_root; /* resource interval tree */ + rb_root_t scheduled_points_root; /* scheduled points red black BST */ + rb_root_t scheduled_resrcs_root; /* minimum time resource BST */ +}; + + +/******************************************************************************* + * * + * INTERNAL PLANNER API * + * * + *******************************************************************************/ + +/******************************************************************************* + * Scheduled Points Binary Search Tree * + * Efficient Searching of Scheduled Points * + *******************************************************************************/ +static scheduled_point_t *scheduled_point_search (int64_t t, struct rb_root *root) +{ + rb_node_t *node = root->rb_node; + while (node) { + scheduled_point_t *this_data = NULL; + this_data = container_of(node, scheduled_point_t, point_rb); + int64_t result = t - this_data->at; + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this_data; + } + return NULL; +} + +static inline scheduled_point_t *recent_state (scheduled_point_t *new_data, + scheduled_point_t *old_data) +{ + if (!old_data) + return new_data; + return (new_data->at > old_data->at)? new_data : old_data; +} + +/* while scheduled_point_search returns the exact match + * scheduled_point_state returns the most recent scheduled point + * -- which represents the resource state at the time t. + */ +static scheduled_point_t *scheduled_point_state (int64_t t, struct rb_root *root) +{ + scheduled_point_t *last_state = NULL; + rb_node_t *node = root->rb_node; + while (node) { + scheduled_point_t *this_data = NULL; + this_data = container_of(node, scheduled_point_t, point_rb); + int64_t result = t - this_data->at; + if (result < 0) + node = node->rb_left; + else if (result > 0) { + last_state = recent_state (this_data, last_state); + node = node->rb_right; + } + else + return this_data; + } + return last_state; +} + +static int scheduled_point_insert (scheduled_point_t *new_data, rb_root_t *root) +{ + rb_node_t **link = &(root->rb_node); + rb_node_t *parent = NULL; + while (*link) { + scheduled_point_t *this_data = NULL; + this_data = container_of(*link, scheduled_point_t, point_rb); + int64_t result = new_data->at - this_data->at; + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return -1; + } + rb_link_node(&(new_data->point_rb), parent, link); + rb_insert_color(&(new_data->point_rb), root); + return 0; +} + +static int scheduled_point_remove (scheduled_point_t *data, struct rb_root *root) +{ + int rc = -1; + scheduled_point_t *n = scheduled_point_search (data->at, root); + if (n) { + rb_erase (&(n->point_rb), root); + /* Note: this should only remove the node from the scheduled point BST + * and does NOT free memory allocated to the node + */ + rc = 0; + } + return rc; +} + +static void scheduled_points_destroy (rb_node_t *node) +{ + if (node->rb_left) + scheduled_points_destroy (node->rb_left); + if (node->rb_right) + scheduled_points_destroy (node->rb_right); + scheduled_point_t *data = container_of(node, scheduled_point_t, point_rb); + free (data); +} + + +/******************************************************************************* + * Reservation Interval Tree * + * Efficient Intersection Searching * + *******************************************************************************/ +INTERVAL_TREE_DEFINE(struct reservation, resv_rb, int64_t, __subtree_last, + START, LAST,, reservation) + +static void reservations_destroy(rb_node_t *node) +{ + if (node->rb_left) + reservations_destroy (node->rb_left); + if (node->rb_right) + reservations_destroy (node->rb_right); + reservation_t *rsv = container_of(node, reservation_t, resv_rb); + free (rsv); +} + + +/******************************************************************************* + * Min Time Resource Tree * + * Efficient Searching of Earliest Schedulable Points * + *******************************************************************************/ +static inline int64_t scheduled_resrc_subtree_min (scheduled_point_t *point) +{ + int64_t min = point->at; + int64_t subtree_min; + if (point->resrc_rb.rb_left) { + subtree_min = rb_entry(point->resrc_rb.rb_left, + scheduled_point_t, resrc_rb)->__subtree_min; + if (min > subtree_min) + min = subtree_min; + } + if (point->resrc_rb.rb_right) { + subtree_min = rb_entry(point->resrc_rb.rb_right, + scheduled_point_t, resrc_rb)->__subtree_min; + if (min > subtree_min) + min = subtree_min; + } + return min; +} + +static inline void scheduled_resrc_propagate (rb_node_t *rb, rb_node_t *stop) +{ + while (rb != stop) { + scheduled_point_t *point = rb_entry(rb, scheduled_point_t, resrc_rb); + int64_t subtree_min = scheduled_resrc_subtree_min (point); + if (point->__subtree_min == subtree_min) + break; + point->__subtree_min = subtree_min; + rb = rb_parent(&point->resrc_rb); + } +} + +static inline void scheduled_resrc_copy (rb_node_t *rb_old, rb_node_t *rb_new) +{ + scheduled_point_t *o = rb_entry(rb_old, scheduled_point_t, resrc_rb); + scheduled_point_t *n = rb_entry(rb_new, scheduled_point_t, resrc_rb); + n->__subtree_min = o->__subtree_min; +} + +static inline void scheduled_resrc_rotate (rb_node_t *rb_old, rb_node_t *rb_new) +{ + scheduled_point_t *o = rb_entry(rb_old, scheduled_point_t, resrc_rb); + scheduled_point_t *n = rb_entry(rb_new, scheduled_point_t, resrc_rb); + n->__subtree_min = o->__subtree_min; + o->__subtree_min = scheduled_resrc_subtree_min (o); +} + +static const struct rb_augment_callbacks scheduled_resrc_aug_cb = { + scheduled_resrc_propagate, scheduled_resrc_copy, scheduled_resrc_rotate +}; + +static inline int64_t veccmp (resrc_vector_t s1, resrc_vector_t s2, size_t len) +{ + int i = 0; + int less = 0; + int64_t r = 0; + for (i = 0; i < len; ++i) { + if ((r = (int64_t)s1[i] - (int64_t)s2[i]) > 0) + break; + less += r; + } + return (r > 0)? r : less; +} + +static void scheduled_resrc_insert (scheduled_point_t *new_data, rb_root_t *root) +{ + rb_node_t **link = &(root->rb_node); + scheduled_point_t *this_data = NULL; + rb_node_t *parent = NULL; + while (*link) { + this_data = rb_entry(*link, scheduled_point_t, resrc_rb); + parent = *link; + if (this_data->__subtree_min > new_data->at) + this_data->__subtree_min = new_data->at; + int64_t result = 0; + if ((result = veccmp (new_data->remaining_resrcs, + this_data->remaining_resrcs, MAX_RESRC_DIM)) < 0) + link = &(this_data->resrc_rb.rb_left); + else + link = &(this_data->resrc_rb.rb_right); + } + new_data->__subtree_min = new_data->at; + new_data->inserted_to_resrc = 1; + rb_link_node(&(new_data->resrc_rb), parent, link); + rb_insert_augmented(&(new_data->resrc_rb), root, &scheduled_resrc_aug_cb); +} + +static void scheduled_resrc_remove (scheduled_point_t *data, rb_root_t *root) +{ + rb_erase_augmented (&data->resrc_rb, root, &scheduled_resrc_aug_cb); + data->inserted_to_resrc = 0; +} + +static inline int64_t rbranch_mintm (rb_node_t *node) +{ + int64_t mn = INT64_MAX; + rb_node_t *r = node->rb_right; + mn = r? rb_entry(r, scheduled_point_t, resrc_rb)->__subtree_min : mn; + scheduled_point_t *this_data = rb_entry(node, scheduled_point_t, resrc_rb); + return (this_data->at < mn)? this_data->at : mn; +} + +static inline scheduled_point_t *find_mintm_point (rb_node_t *anchor, + int64_t mintm) +{ + if (!anchor) + return NULL; + + scheduled_point_t *this_data = NULL; + this_data = rb_entry(anchor, scheduled_point_t, resrc_rb); + if (this_data->at == mintm) + return this_data; + + rb_node_t *node = anchor->rb_right; + while (node) { + this_data = rb_entry(node, scheduled_point_t, resrc_rb); + if (this_data->at == mintm) + return this_data; + + if (node->rb_left + && rb_entry(node->rb_left, scheduled_point_t, + resrc_rb)->__subtree_min == mintm) + node = node->rb_left; + else + node = node->rb_right; + } + + /* this is an error condition: when an anchor was found, there must be + * a point that meets the requirements. + */ + errno = ENOTSUP; + return NULL; +} + +static inline int64_t find_mintm_anchor (int64_t *rv, rb_root_t *rt, + rb_node_t **anchor_p) +{ + rb_node_t *node = rt->rb_node; + int64_t mintm = INT64_MAX; + int64_t r_mintm = INT64_MAX; + + while (node) { + scheduled_point_t *this_data = NULL; + this_data = rb_entry(node, scheduled_point_t, resrc_rb); + int64_t result = 0; + result = veccmp (rv, this_data->remaining_resrcs, MAX_RESRC_DIM); + if (result <= 0) { + /* visiting node satisfies the resource requirements this means all + * of the nodes at its subtree also satisfies the requirements. Thus, + * rbranch_mintime is the best min time. + */ + r_mintm = rbranch_mintm (node); + if (r_mintm < mintm) { + mintm = r_mintm; + *anchor_p = node; + } + /* next, we should search the left subtree for potentially better + * then current mintm; + */ + node = node->rb_left; + } else { + /* visiting node does not satisfy the resource requirements. This + * means, nothing in its left branch will meet these requirements: + * time to search the right subtree. + */ + node = node->rb_right; + } + } + return mintm; +} + +static scheduled_point_t *scheduled_resrc_mintm (int64_t *rv, rb_root_t *rt) +{ + rb_node_t *anchor = NULL; + int64_t mintm = find_mintm_anchor (rv, rt, &anchor); + return find_mintm_point (anchor, mintm); +} + +#if DEBUG_PLANNER +static void scheduled_resrc_print (rb_root_t *rt) +{ + rb_node_t *node; + int i = 0; + for (node = rb_first(rt); node; node = rb_next(node)) { + i++; + printf("..\n"); + printf("+ at=%ld\n", + rb_entry(node, scheduled_point_t, resrc_rb)->at); + printf("+ __subtree_min=%ld\n", + rb_entry(node, scheduled_point_t, resrc_rb)->__subtree_min); + printf("+ inserted_to_resrc=%d\n", + rb_entry(node, scheduled_point_t, resrc_rb)->inserted_to_resrc); + printf("+ new_point=%d\n", + rb_entry(node, scheduled_point_t, resrc_rb)->new_point); + printf("+ ref_count=%d\n", + rb_entry(node, scheduled_point_t, resrc_rb)->ref_count); + printf("+ scheduled_resrcs[0]=%jd\n", (intmax_t) rb_entry(node, + scheduled_point_t, resrc_rb)->scheduled_resrcs[0]); + printf("+ remaining_resrcs[0]=%ld\n", (intmax_t) rb_entry(node, + scheduled_point_t, resrc_rb)->remaining_resrcs[0]); + } + printf ("SIZE: %d\n", i); + printf ("===============================================================+=\n"); +} +#endif + + +/******************************************************************************* + * Scheduled Point and Resrc Update APIs * + * * + *******************************************************************************/ +static inline int track_points (zhash_t *tracker, struct scheduled_point *point) +{ + /* XXX OPTIMIZATION: Keep track of tracking status to avoid insert */ + /* XXX Use zlist or a new search tree */ + char key[32]; + sprintf (key, "%jd", (intmax_t)point->at); + /* caller will rely on the fact that rc == -1 when key already exists */ + /* don't need to register free */ + return zhash_insert (tracker, key, point); +} + +static inline void restore_track_points (planner_t *ctx, rb_root_t *root) +{ + scheduled_point_t *point = NULL; + zlist_t *keys = zhash_keys (ctx->avail_time_iter); + const char *k = NULL; + for (k = zlist_first (keys); k; k = zlist_next (keys)) { + point = zhash_lookup (ctx->avail_time_iter, k); + scheduled_resrc_insert (point, root); + zhash_delete (ctx->avail_time_iter, k); + } + zlist_destroy (&keys); +} + +static inline int update_scheduled_resrcs (zhash_t *tracker, rb_root_t *rt) +{ + int rc = 0; + const char *k = NULL; + scheduled_point_t *point = NULL; + zlist_t *keys = zhash_keys (tracker); + for (k = zlist_first (keys); k; k = zlist_next (keys)) { + point = zhash_lookup (tracker, k); + if (point->inserted_to_resrc) + scheduled_resrc_remove (point, rt); + if (point->ref_count && !(point->inserted_to_resrc)) + scheduled_resrc_insert (point, rt); + zhash_delete (tracker, k); + } + zlist_destroy (&keys); + return rc; +} + +static inline scheduled_point_t *add_P (planner_t *ctx, int64_t at, + reservation_t *rsv, bool up) +{ + int i = 0; + rb_root_t *rt = &(ctx->scheduled_points_root); + scheduled_point_t *point = NULL; + if (!(point = scheduled_point_search (at, rt))) { + /* If point is not found, we must create a new scheduled point obj */ + point = xzmalloc (sizeof (*point)); + point->at = at; + memset (point->scheduled_resrcs, '\0', sizeof (point->scheduled_resrcs)); + memcpy (point->remaining_resrcs, ctx->total_resrc_vector, + sizeof (point->remaining_resrcs)); + point->inserted_to_resrc = 0; /* not been inserted to resource BST */ + point->new_point = 1; + point->ref_count = 0; + if (scheduled_point_insert (point, rt) < 0) { + /* same key is rejected (should never happen) */ + errno = EKEYREJECTED; + free (point); + point = NULL; + goto done; + } + } + + for (i = 0; up && i < rsv->resrc_dim; ++i) { + point->scheduled_resrcs[i] += rsv->reserved_resrcs[i]; + point->remaining_resrcs[i] -= rsv->reserved_resrcs[i]; + if (point->scheduled_resrcs[i] > ctx->total_resrc_vector[i] + || point->remaining_resrcs[i] < 0) + errno = ERANGE; + } + +done: + return point; +} + +static inline int add_R (planner_t *ctx, reservation_t *rsv, zhash_t *tracker) +{ + rsv->start_p = add_P (ctx, rsv->start, rsv, true); + rsv->last_p = add_P (ctx, rsv->last, rsv, false); + if (rsv->start_p) { + rsv->start_p->ref_count++; + track_points (tracker, rsv->start_p); + } + if (rsv->last_p) { + rsv->last_p->ref_count++; + track_points (tracker, rsv->last_p); + } + return (!rsv->start_p || !rsv->last_p)? -1 : 0; +} + +static inline int sub_R (planner_t *ctx, reservation_t *rsv, zhash_t *tracker) +{ + int rc = 0; + int i = 0; + + if (rsv->start_p) { + rsv->start_p->ref_count--; + track_points (tracker, rsv->start_p); + for (i = 0; i < rsv->resrc_dim; ++i) { + rsv->start_p->scheduled_resrcs[i] -= rsv->reserved_resrcs[i]; + rsv->start_p->remaining_resrcs[i] += rsv->reserved_resrcs[i]; + if (rsv->start_p->scheduled_resrcs[i] < 0 + || rsv->start_p->remaining_resrcs[i] > ctx->total_resrc_vector[i]) { + errno = ERANGE; + rc = -1; + } + } + if (!(rsv->start_p->ref_count)) + scheduled_point_remove (rsv->start_p, &(ctx->scheduled_points_root)); + } + if (rsv->last_p) { + rsv->last_p->ref_count--; + track_points (tracker, rsv->last_p); + if (!(rsv->last_p->ref_count)) + scheduled_point_remove (rsv->last_p, &(ctx->scheduled_points_root)); + } + + return (!rsv->start_p || !rsv->last_p)? -1 : rc; +} + +static inline int add_I (planner_t *ctx, int64_t t, scheduled_point_t *p, + reservation_t *r, zhash_t *tracker, int force) +{ + int rc = 0; + /* interception due to being equal has already been taken care */ + if ((START(r) < (t) && (t) < LAST(r))) { + /* an existing point requires only one update w.r.t. new reservation + * if a new point, it needs to be updated w.r.t. all existing ones (force) + */ + if (track_points (tracker, p) == 0 || force) { + int i = 0; + for (i = 0; i < r->resrc_dim; ++i) { + p->scheduled_resrcs[i] += r->reserved_resrcs[i]; + p->remaining_resrcs[i] -= r->reserved_resrcs[i]; + if (p->scheduled_resrcs[i] > ctx->total_resrc_vector[i] + || p->remaining_resrcs[i] < 0) { + rc = -1; + errno = ERANGE; + } + } + } + } + return rc; +} + +static inline int sub_I (planner_t *ctx, int64_t t, scheduled_point_t *p, + reservation_t *r, zhash_t *tracker) +{ + int rc = 0; + /* interception due to being equal has already been taken care */ + if ((START(r) < (t) && (t) < LAST(r))) { + /* an existing point requires only one update w.r.t. new reservation */ + if (track_points (tracker, p) == 0) { + int i = 0; + for (i = 0; i < r->resrc_dim; ++i) { + p->scheduled_resrcs[i] -= r->reserved_resrcs[i]; + p->remaining_resrcs[i] += r->reserved_resrcs[i]; + if (p->scheduled_resrcs[i] > ctx->total_resrc_vector[i] + || p->remaining_resrcs[i] < 0) { + rc = -1; + errno = ERANGE; + } + } + } + } + return rc; +} + +static inline bool add_Is (planner_t *ctx, reservation_t *r1, + reservation_t *r2, zhash_t *tracker, int force) +{ + return ((add_I (ctx, START(r1), r1->start_p, r2, tracker, + force? r1->start_p->new_point : 0) == 0) + && (add_I (ctx, LAST(r1), r1->last_p, r2, tracker, + force? r1->last_p->new_point : 0) == 0)); + +} + +static inline bool sub_Is (planner_t *ctx, reservation_t *r1, + reservation_t *r2, zhash_t *tracker) +{ + return ((sub_I (ctx, START(r1), r1->start_p, r2, tracker) == 0) + && (sub_I (ctx, LAST(r1), r1->last_p, r2, tracker) == 0)); + +} + +static inline void copy_req (req_t *dest, req_t *src) +{ + dest->duration = src->duration; + dest->vector_dim = src->vector_dim; + size_t s1 = sizeof (*(dest->resrc_vector)) * MAX_RESRC_DIM; + memset (dest->resrc_vector, '\0', s1); + size_t s2 = sizeof (*(src->resrc_vector)) * src->vector_dim; + memcpy (dest->resrc_vector, src->resrc_vector, s2); +} + +static inline int64_t avail_time_internal (planner_t *ctx, req_t *req) +{ + int sat = 0; + int64_t at = -1; + int64_t *rv = NULL; + int64_t *eff_rv = NULL; + scheduled_point_t *p= NULL; + rb_root_t *r = &(ctx->scheduled_resrcs_root); + rv = (int64_t *)req->resrc_vector; + + if (veccmp (rv, ctx->total_resrc_vector, req->vector_dim) > 0) { + errno = ERANGE; + ctx->avail_time_iter_set = 0; + goto done; /* unsatisfiable */ + } + /* zero resource reservation is disallowed; a full resource check enough*/ + eff_rv = (req->exclusive)? ctx->total_resrc_vector : (int64_t *)rv; + + /* retrieve the minimum time when the requsted resources are available */ + while (!sat && (p = scheduled_resrc_mintm (eff_rv, r))) { + rb_node_t *n = rb_next(&(p->point_rb)); + scheduled_point_t *d_chk = NULL; + sat = 1; + /* retrieve the next scheduled point and see if its time overlaps + * with the request. If overlaps, check resource availability. + */ + while ((d_chk = rb_entry(n, scheduled_point_t, point_rb))) { + if (OUT_OF_RANGE(d_chk->at, p->at, req->duration)) + break; + else { + int64_t result; + result = veccmp (eff_rv, d_chk->remaining_resrcs, req->vector_dim); + if (result > 0) { + scheduled_resrc_remove (p, r); + track_points (ctx->avail_time_iter, p); + sat = 0; + break; + } + } + n = rb_next (&(d_chk->point_rb)); + } + } + + if (p) { + at = p->at; + scheduled_resrc_remove (p, r); + track_points (ctx->avail_time_iter, p); + if (!OUT_OF_RANGE(ctx->plan_end, at, req->duration)) + at = -1; + } + +done: + return at; +} + +static inline int avail_resources_at_internal (planner_t *ctx, int64_t starttime, + int64_t lasttime, int64_t *rv, int vd, int exclusive) +{ + int avail = -1; + int64_t *eff_rv = NULL; + if (starttime < 0 || !rv || !ctx) { + errno = EINVAL; + goto done; + } else if (veccmp (rv, ctx->total_resrc_vector, vd) > 0) { + errno = ERANGE; + goto done; + } + + eff_rv = exclusive? ctx->total_resrc_vector : (int64_t *)rv; + rb_root_t *spr = &(ctx->scheduled_points_root); + scheduled_point_t *state_at_start = NULL; + + if ((state_at_start = scheduled_point_state (starttime, spr)) == NULL) { + errno = ENOTSUP; + goto done; + } else if (veccmp (eff_rv, state_at_start->remaining_resrcs, vd) > 0) + goto done; + + rb_node_t *n = rb_next(&(state_at_start->point_rb)); + scheduled_point_t *d_chk = NULL; + while ((d_chk = rb_entry(n, scheduled_point_t, point_rb))) { + if (OUT_OF_RANGE(d_chk->at, starttime, (lasttime - starttime))) + break; + else { + int64_t result; + result = veccmp (eff_rv, d_chk->remaining_resrcs, vd); + if (result > 0) + goto done; + } + n = rb_next (&(d_chk->point_rb)); + } + avail = 0; + +done: + return avail; +} + + +/******************************************************************************* + * Utilities * + * * + *******************************************************************************/ +static inline void planner_set_bound (planner_t *ctx, int64_t plan_starttime, + int64_t plan_duration) +{ + int i = 0; + + ctx->p1 = xzmalloc (sizeof (*(ctx->p1))); + ctx->p1->at = plan_starttime; + ctx->p1->ref_count = 1; + memset (ctx->p1->scheduled_resrcs, '\0', + sizeof (ctx->p1->scheduled_resrcs)); + memset (ctx->p1->remaining_resrcs, '\0', + sizeof (ctx->p1->remaining_resrcs)); + for (i = 0; i < ctx->resrc_dim; ++i) + ctx->p1->remaining_resrcs[i] = ctx->total_resrc_vector[i]; + ctx->plan_start = plan_starttime; + ctx->plan_end = plan_starttime + plan_duration; + ctx->avail_time_iter = zhash_new (); + ctx->avail_time_iter_req = xzmalloc (sizeof (*(ctx->avail_time_iter_req))); + size_t s = sizeof(*(ctx->avail_time_iter_req->resrc_vector)) * MAX_RESRC_DIM; + ctx->avail_time_iter_req->resrc_vector = xzmalloc (s); + ctx->avail_time_iter_set = 0; + ctx->reservations_root = RB_ROOT; + ctx->scheduled_points_root = RB_ROOT; + ctx->scheduled_resrcs_root = RB_ROOT; + scheduled_point_insert (ctx->p1, &(ctx->scheduled_points_root)); + scheduled_resrc_insert (ctx->p1, &(ctx->scheduled_resrcs_root)); +} + +static inline void planner_clean_internal (planner_t *ctx) +{ + if (ctx->avail_time_iter) { + zhash_destroy (&ctx->avail_time_iter); + ctx->avail_time_iter = NULL; + } + if (ctx->avail_time_iter_req) { + if (ctx->avail_time_iter_req->resrc_vector) + free (ctx->avail_time_iter_req->resrc_vector); + free (ctx->avail_time_iter_req); + ctx->avail_time_iter_req = NULL; + } + if (ctx->r_lookup) + zhashx_purge (ctx->r_lookup); + if (ctx->p1) { + scheduled_resrc_remove (ctx->p1, &(ctx->scheduled_resrcs_root)); + ctx->p1 = NULL; + } + + rb_node_t *n = NULL; + if ((n = rb_first(&(ctx->scheduled_points_root)))) + scheduled_points_destroy (n); + if ((n = rb_first(&(ctx->reservations_root)))) + reservations_destroy (n); +} + +static inline bool not_feasable (planner_t *ctx, plan_t *plan) +{ + return (plan->start < ctx->plan_start || plan->req->duration < 1 + || plan->start + (plan->req->duration - 1) > ctx->plan_end + || !plan->req->resrc_vector || plan->req->vector_dim > MAX_RESRC_DIM); +} + +static inline int plan_input_check (planner_t *ctx, plan_t *plan) +{ + int i = 0; + int rc = -1; + char key[32]; + if (!ctx || !plan || !plan->req || not_feasable (ctx, plan)) { + errno = EINVAL; + goto done; + } else { + int64_t sum = 0; + for (i = 0; i < plan->req->vector_dim; ++i) { + if (plan->req->resrc_vector[i] > ctx->total_resrc_vector[i]) { + errno = ERANGE; + goto done; + } + sum += plan->req->resrc_vector[i]; + } + if (sum <= 0) { + errno = ERANGE; + goto done; + } + } + + sprintf (key, "%jd", (intmax_t)plan->id); + if (zhashx_lookup (ctx->r_lookup, key) != NULL) { + errno = EINVAL; + goto done; + } + rc = 0; + +done: + return rc; +} + +static inline char *scheduled_point_to_string (scheduled_point_t *point) +{ + int i = 0; + size_t size = 0; + char *ptr = NULL; + FILE *fptr = NULL; + + if (!point) { + errno = EINVAL; + goto done; + } else if (!(fptr = open_memstream (&ptr, &size))) { + errno = ENOMEM; + goto done; + } + + if (fprintf (fptr, "\t SCHEDULED POINT INFO\n") < 0) + goto done; + else if (fprintf (fptr, "\t\t at: %jd\n", (intmax_t)point->at) < 0) + goto done; + + for (i = 0; i < MAX_RESRC_DIM; ++i) { + if (fprintf (fptr, "\t\t scheduled resources for type %d: %ju\n", i, + (intmax_t)point->scheduled_resrcs[i]) < 0) + goto done; + else if (fprintf (fptr, "\t\t remaining resources for type %d: %ju\n", i, + (intmax_t)point->remaining_resrcs[i]) < 0) + goto done; + } + +done: + if (fptr) + fclose (fptr); + return ptr; +} + +static inline int print_csv (planner_t *ctx, FILE *fptr, size_t d) +{ + rb_node_t *n = NULL; + for (n = rb_first(&(ctx->scheduled_points_root)); n; n = rb_next(n)) { + scheduled_point_t *data = container_of(n, scheduled_point_t, point_rb); + if (fprintf (fptr, "%jd %jd\n", (intmax_t)data->at, + (intmax_t)data->scheduled_resrcs[d]) < 0) + return -1; + } + return 0; +} + +static inline int print_gp (planner_t *ctx, FILE *fptr, + const char *csvfn, size_t d) +{ + int rc = 0; + if (!fptr || !csvfn || d > MAX_RESRC_DIM || !ctx) { + errno = EINVAL; + return -1; + } + + rc = fprintf (fptr, "reset\n"); + rc += fprintf (fptr, "set terminal png size 1024 768\n"); + rc += fprintf (fptr, "set yrange [0:%jd]\n", (ctx->total_resrc_vector[d]+50)); + rc += fprintf (fptr, "set xlabel \"Scheduled Points in Time\"\n"); + rc += fprintf (fptr, "set ylabel \"Scheduled Resources of Type %d\"\n", (int)d); + rc += fprintf (fptr, "set title \"Scheduled Resources Over Time\"\n"); + rc += fprintf (fptr, "set key below\n"); + rc += fprintf (fptr, "plot \"%s\" using 1:2 with steps lw 2 \n", csvfn); + return rc; +} + + +/******************************************************************************* + * * + * PUBLIC PLANNER API * + * * + *******************************************************************************/ + +/******************************************************************************* + * C'Tor/D'Tor * + *******************************************************************************/ +planner_t *planner_new (int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len) +{ + int i = 0; + planner_t *ctx = NULL; + + if (plan_starttime < 0 || plan_duration < 1 + || !total_resrcs || len > MAX_RESRC_DIM) { + errno = EINVAL; + goto done; + } else { + for (i = 0; i < len; ++i) { + if (total_resrcs[i] > INT64_MAX) { + errno = ERANGE; + goto done; + } + } + } + + ctx = xzmalloc (sizeof (*ctx)); + ctx->resrc_dim = len; + ctx->r_lookup = zhashx_new (); + memset (ctx->total_resrc_vector, '\0', sizeof (ctx->total_resrc_vector)); + for (i = 0; i < len; ++i) + ctx->total_resrc_vector[i] = (int64_t)total_resrcs[i]; + for (i = 0; i < MAX_RESRC_DIM; ++i) + ctx->resrc_type_vector[i] = NULL; + planner_set_bound (ctx, plan_starttime, plan_duration); + +done: + return ctx; +} + +void planner_destroy (planner_t **ctx_p) +{ + if (ctx_p && *ctx_p) { + planner_clean_internal (*ctx_p); + zhashx_destroy (&((*ctx_p)->r_lookup)); + free (*ctx_p); + *ctx_p = NULL; + } +} + +int planner_reset (planner_t *ctx, int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len) +{ + int i = 0; + int rc = -1; + if (plan_starttime < 0 || plan_duration < 1 || len > MAX_RESRC_DIM) { + errno = EINVAL; + goto done; + } else if (total_resrcs && !len) { + for (i = 0; i < len; ++i) { + if (total_resrcs[i] > INT64_MAX) { + errno = ERANGE; + goto done; + } + } + } + + planner_clean_internal (ctx); + if (total_resrcs && !len) { + memset (ctx->total_resrc_vector, '\0', sizeof (ctx->total_resrc_vector)); + for (i = 0; i < len; ++i) + ctx->total_resrc_vector[i] = (int64_t)total_resrcs[i]; + } + planner_set_bound (ctx, plan_starttime, plan_duration); + rc = 0; + +done: + return rc = 0; +} + +int64_t planner_plan_starttime (planner_t *ctx) +{ + return ctx? ctx->plan_start : -1; +} + +int64_t planner_plan_duration (planner_t *ctx) +{ + return ctx? (ctx->plan_end - ctx->plan_start) : -1; +} + +const uint64_t *planner_total_resrcs (planner_t *ctx) +{ + return ctx? (const uint64_t *)ctx->total_resrc_vector : NULL; +} + +size_t planner_total_resrcs_len (planner_t *ctx) +{ + return ctx? ctx->resrc_dim : -1; +} + +int planner_set_resrc_types (planner_t *ctx, const char **rts, size_t len) +{ + int i = 0, j = 0; + + if (rts == NULL || len > ctx->resrc_dim) + return -1; + + for (i = 0; i < len; ++i) { + if (ctx->resrc_type_vector[i] != NULL) { + free (ctx->resrc_type_vector[i]); + ctx->resrc_type_vector[i] = NULL; + } + ctx->resrc_type_vector[i] = xstrdup (rts[i]); + } + + for (j = i; j < ctx->resrc_dim; ++j) { + if (ctx->resrc_type_vector[i] != NULL) { + free (ctx->resrc_type_vector[i]); + ctx->resrc_type_vector[i] = NULL; + } + } + + return 0; +} + +const char *planner_resrc_index2type (planner_t *ctx, int i) +{ + if (i < 0 || i >= ctx->resrc_dim) + return NULL; + return ctx->resrc_type_vector[i]; +} + +int planner_resrc_type2index (planner_t *ctx, const char *t) +{ + int i = 0; + if (t == NULL) + return -1; + + for (i = 0; i < ctx->resrc_dim; ++i) { + if (strcmp (ctx->resrc_type_vector[i], t) == 0) + break; + } + return (i < ctx->resrc_dim)? i : -1; +} + +int64_t planner_avail_time_first (planner_t *ctx, req_t *req) +{ + if (!req || !ctx) { + errno = EINVAL; + return -1; + } + restore_track_points (ctx, &(ctx->scheduled_resrcs_root)); + copy_req (ctx->avail_time_iter_req, req); + ctx->avail_time_iter_set = 1; + return avail_time_internal (ctx, ctx->avail_time_iter_req); +} + +int64_t planner_avail_time_next (planner_t *ctx) +{ + if (!ctx || !ctx->avail_time_iter_set) { + errno = EINVAL; + return -1; + } + return avail_time_internal (ctx, ctx->avail_time_iter_req); +} + +int planner_avail_resources_at (planner_t *ctx, int64_t starttime, req_t *req) +{ + return avail_resources_at_internal (ctx, starttime, starttime + req->duration, + (int64_t *)req->resrc_vector, req->vector_dim, req->exclusive); +} + +reservation_t *planner_reservation_new (planner_t *ctx, plan_t *plan) +{ + int i = 0; + reservation_t *rsv = NULL; + char key[32]; + + if (plan_input_check (ctx, plan) == -1) + goto done; + + rsv = xzmalloc (sizeof (*rsv)); + rsv->start = plan->start; + rsv->last = plan->start + plan->req->duration; + rsv->resv_id = plan->id; + memset (rsv->reserved_resrcs, '\0', sizeof (rsv->reserved_resrcs)); + rsv->resrc_dim = plan->req->vector_dim; + for (i = 0; i < plan->req->vector_dim; ++i) + rsv->reserved_resrcs[i] = (int64_t)plan->req->resrc_vector[i]; + rsv->added = 0; + rsv->start_p = NULL; + rsv->last_p = NULL; + sprintf (key, "%jd", (intmax_t)rsv->resv_id); + zhashx_insert (ctx->r_lookup, key, rsv); + +done: + return rsv; +} + +void planner_reservation_destroy (planner_t *ctx, reservation_t **rsv_p) +{ + char key[32]; + if (!rsv_p || !(*rsv_p)) { + errno = EINVAL; + return; + } + sprintf (key, "%jd", (intmax_t)(*rsv_p)->resv_id); + zhashx_delete (ctx->r_lookup, key); + if ((*rsv_p)->added) + planner_rem_reservation (ctx, (*rsv_p)); + + free ((*rsv_p)); + *rsv_p = NULL; +} + +int planner_add_reservation (planner_t *ctx, reservation_t *rsv, int validate) +{ + int rc = -1; + if (!rsv || !ctx) { + errno = EINVAL; + goto done2; + } else if (rsv->added) { + goto done2; + } else if (validate == 1) { + if (avail_resources_at_internal (ctx, rsv->start, + rsv->last, rsv->reserved_resrcs, rsv->resrc_dim, 0) == -1) + goto done2; + } + + rb_root_t *srr = &(ctx->scheduled_resrcs_root); + rb_root_t *rr = &(ctx->reservations_root); + restore_track_points (ctx, srr); + + /* tr is used to keep track of the scheduled points that + * need to be updated in the min-time resource tree + */ + zhash_t *tr = zhash_new (); + + /* update the specific start and last scheduled points + * if a point already exist, simply update; otherwise + * a new point object is inserted into scheduled point tree + */ + if ((rc = add_R (ctx, rsv, tr)) < 0) + goto done; + + /* + * Go through all of the reservations that each of the two scheduled + * points of the new reservation intersects and update relevant points + */ + reservation_t *i = NULL; + for (i = reservation_iter_first (rr, START(rsv), LAST(rsv)); i; + i = reservation_iter_next (i, START(rsv), LAST(rsv))) { + + /* The point(s) of the intercepting reservation intersects the new one. + * The point(s) of the new reservation intercept the old one. + */ + if (!add_Is (ctx, i, rsv, tr, 0) || !add_Is (ctx, rsv, i, tr, 1)) + goto done; + } + rsv->start_p->new_point = 0; + rsv->last_p->new_point = 0; + + /* Update the min-time resource tree w.r.t. tracked scheduled points */ + if ((rc = update_scheduled_resrcs (tr, srr)) < 0) + goto done; + + reservation_insert (rsv, rr); + rsv->added = 1; + rc = 0; + +done: + if (tr) + zhash_destroy (&tr); +done2: + return rc; +} + +int planner_rem_reservation (planner_t *ctx, reservation_t *rsv) +{ + int rc = -1; + if (!rsv || !ctx) { + errno = EINVAL; + goto done2; + } else if (rsv->added != 1) + goto done2; + + rb_root_t *srr = &(ctx->scheduled_resrcs_root); + rb_root_t *rr = &(ctx->reservations_root); + reservation_t *i = NULL; + restore_track_points (ctx, srr); + + /* tr is used to keep track of the scheduled points that + * need to be updated in the min-time resource tree + */ + zhash_t *tr = zhash_new (); + + /* update the specific start and last scheduled points + * if a point already exist, simply update; otherwise + * a new point object is inserted into scheduled point tree + */ + if ((rc = sub_R (ctx, rsv, tr)) < 0) + goto done; + + /* + * Go through all of the reservations that each of the two scheduled + * points of the new reservation intersects and update relevant points + */ + for (i = reservation_iter_first (rr, START(rsv), LAST(rsv)); i; + i = reservation_iter_next (i, START(rsv), LAST(rsv))) { + if (!sub_Is (ctx, i, rsv, tr)) + goto done; + } + + if ((rc = update_scheduled_resrcs (tr, srr)) < 0) + goto done; + + reservation_remove (rsv, rr); + FREE_NOREF_POINT(rsv); + rsv->added = 0; + rc = 0; + +done: + if (tr) + zhash_destroy (&tr); +done2: + return rc; +} + +reservation_t *planner_reservation_first (planner_t *ctx) +{ + int64_t s = ctx->plan_start; + int64_t e = ctx->plan_end; + return reservation_iter_first (&(ctx->reservations_root), s, e); +} + +reservation_t *planner_reservation_next (planner_t *ctx, reservation_t *rsv) +{ + return reservation_iter_next (rsv, ctx->plan_start, ctx->plan_end); +} + +reservation_t *planner_reservation_by_id (planner_t *ctx, int64_t id) +{ + char key[32]; + sprintf (key, "%jd", (intmax_t)id); + return zhashx_lookup (ctx->r_lookup, key); +} + +reservation_t *planner_reservation_by_id_str (planner_t *ctx, const char *str) +{ + return (str)? zhashx_lookup (ctx->r_lookup, str) : NULL; +} + +int planner_reservation_added (planner_t *ctx, reservation_t *rsv) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return -1; + } + return rsv->added? 0 : -1; +} + +int64_t planner_reservation_starttime (planner_t *ctx, reservation_t *rsv) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return -1; + } + return rsv->start; +} + +int64_t planner_reservation_endtime (planner_t *ctx, reservation_t *rsv) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return -1; + } + return rsv->last; +} + +const uint64_t *planner_reservation_reserved (planner_t *ctx, reservation_t *rsv, + size_t *len) +{ + if (!ctx || !rsv) { + errno = EINVAL; + return NULL; + } + *len = rsv->resrc_dim; + return (const uint64_t *) rsv->reserved_resrcs; +} + +char *planner_reservation_to_string (planner_t *ctx, reservation_t *rsv) +{ + int i = 0; + size_t size = 0; + char *ptr = NULL; + FILE *fptr = NULL; + + if (!rsv) { + errno = EINVAL; + goto done; + } else if (!(fptr = open_memstream (&ptr, &size))) { + errno = ENOMEM; + goto done; + } + + if (fprintf (fptr, "Reservation Info:\n") < 0) + goto done; + else if (fprintf (fptr, "\t id: %jd\n", (intmax_t)rsv->resv_id) < 0) + goto done; + else if (fprintf (fptr, "\t start: %jd\n", (intmax_t)rsv->start) < 0) + goto done; + else if (fprintf (fptr, "\t last: %jd\n", (intmax_t)rsv->last) < 0) + goto done; + + for (i = 0; i < rsv->resrc_dim; ++i) { + if (fprintf (fptr, " - reserved_resrcs type %d: %ju\n", i, + (intmax_t)rsv->reserved_resrcs[i]) < 0) + goto done; + } + + if (fprintf (fptr, "%s", scheduled_point_to_string (rsv->start_p)) < 0) + goto done; + else if (fprintf (fptr, "%s", scheduled_point_to_string (rsv->last_p)) < 0) + goto done; + +done: + if (fptr) + fclose (fptr); + return ptr; +} + +int planner_print_gnuplot (planner_t *ctx, const char *fname, size_t d) +{ + int rc = -1; + char *path1 = NULL; + char *path2 = NULL; + FILE *fptr1 = NULL; + FILE *fptr2 = NULL; + + if (!fname || d > MAX_RESRC_DIM || !ctx) { + errno = EINVAL; + goto done; + } + + if (!(path1 = xasprintf ("%s.csv", fname))) + goto done; + else if (!(path2 = xasprintf ("%s.gp", fname))) + goto done; + else if (!(fptr1 = fopen (path1, "w"))) + goto done; + else if (!(fptr2 = fopen (path2, "w"))) + goto done; + else if (print_csv (ctx, fptr1, d) < 0) + goto done; + else if (print_gp (ctx, fptr2, path1, d) < 0) + goto done; + + rc = 0; + +done: + if (fptr1) + fclose (fptr1); + if (fptr2) + fclose (fptr2); + if (path1) + free (path1); + if (path2) + free (path2); + return rc; +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/resrc/planner.h b/resrc/planner.h new file mode 100644 index 000000000..a7e316949 --- /dev/null +++ b/resrc/planner.h @@ -0,0 +1,330 @@ +/*****************************************************************************\ + * Copyright (c) 2014 Lawrence Livermore National Security, LLC. Produced at + * the Lawrence Livermore National Laboratory (cf, AUTHORS, DISCLAIMER.LLNS). + * LLNL-CODE-658032 All rights reserved. + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the license, or (at your option) + * any later version. + * + * Flux is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * See also: http://www.gnu.org/licenses/ +\*****************************************************************************/ + +/* Planner provides a simple API and efficient mechanisms to allow + * a Flux scheduler to keep track of the state of resource aggregates + * of a composite resource. + * + * In a resource hierarchy used by flux-sched (e.g., hardware + * hierarchy), a composite resource is represented as a tree graph + * in which a higher-level vertex has essentially pointers to its + * immediate child resources, each of which also has pointers to + * its immediate children etc. With such an organization, the + * scheduler must essentially walk "all" of the vertices below any + * composite resource in order to determine if the "sub-resources" + * requirement can be met. + * + * When the scheduler performs such walks excessively in particular, + * on large graph, however, this can quickly become a performance and + * scalability bottleneck. Planner addresses this problem by allowing + * the scheduler to track the "sub-resources" summary information + * (i.e., aggregates) efficiently at each upper-level composite + * resource vertex and to use this aggregate information to prune + * unneccessary descent down into the subtree. + * + * Planner offers update and query APIs to support these schemes. + * Through a planner API, the scheduler can ask a high-level composite + * a question: "given a request of x, y, z "sub-resources" in aggregate + * for d time unit, when is the earliest time t at which this request + * can be satisfied?" + * Another example would be to answer, "from time t to t+d, does + * this composite resource vertex has y, z sub-resources available + * in aggregate. By composing these queries at different levels in a + * resource hierarchy, the scheduler can significantly reduce the + * numbers of tree walks. Ultimately, planner will be integrated + * into our preorder tree-walk pruning filter in our future + * visitor-pattern-based resource matching scheme. + */ + +#ifndef PLANNER_H +#define PLANNER_H + +#include +#include +#include + +#define MAX_RESRC_DIM 5 + +typedef struct request { + uint64_t duration; + uint64_t *resrc_vector; + size_t vector_dim; + int exclusive; +} req_t; + +typedef struct plan { + int64_t id; + int64_t start; + struct request *req; +} plan_t; + +typedef struct reservation reservation_t; +typedef struct planner planner_t; + +/* Planner constructor: + * + * - plan_starttime: the earliest schedulable point (in time) + * planned by this planner. + * - plan_duration: the span of this planner--i.e., all reservations + * must end before plan_starttime + plan_duration. + * - total_resrcs: an array of size of len containing + * total numbers of available resources (of up to + * five different types) used in this planner. Each + * element of this array would often represent the + * total number of each sub-resource under the target + * composite resource. Note that nothing prevents + * one from using this to represent the numbers + * or amounts of available resources directly at + * the resource vertex itself, though. + * - len: must be less than or equal to MAX_RESRC_DIM + */ +planner_t *planner_new (int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len); + +/* Reset the planner with new time bound and optionally resource quantities. + * Destroy all of the existing reservations. + * + * - plan_starttime: the earliest schedulable point (in time) + * planned by this planner. + * - plan_duration: the span of this planner--i.e., all reservations + * must end before plan_starttime + plan_duration. + * - total_resrcs: an array of size of len containing + * total numbers of available resources (of up to + * five different types) used in this planner. Each + * element of this array would often represent the + * total number of each sub-resource under the target + * composite resource. Note that nothing prevents + * one from using this to represent the numbers + * or amounts of available resources directly at + * the resource vertex itself, though. + * If NULL, the existing resource quantities will be used. + * - len: must be less than or equal to MAX_RESRC_DIM. + * pass 0, if the existing resource quantities must + * be used. + */ +int planner_reset (planner_t *ctx, int64_t plan_starttime, int64_t plan_duration, + uint64_t *total_resrcs, size_t len); + +/* Planner destructor: + * + * - ctx_p: a pointer to the opaque planner context returned + * from planner_new. + */ +void planner_destroy (planner_t **ctx_p); + +/* Getters: + */ +int64_t planner_plan_starttime (planner_t *ctx); +int64_t planner_plan_duration (planner_t *ctx); +const uint64_t *planner_total_resrcs (planner_t *ctx); +size_t planner_total_resrcs_len (planner_t *ctx); + +/* Set resource type strings corresponding to resources planned by this + * planner. rts is an array of resource type strings: the first element + * is the resource type name of the first-order resource of this planner, + * the second is the second-order, and so on and so forth. len is the + * size of this array, and this must not exceed the resource dimension + * set for this planner. + * + * - ctx: the opaque planner context returned from planner_new + * - rts: an array of resource type strings + * - len: the length of rts + */ +int planner_set_resrc_types (planner_t *ctx, const char **rts, size_t len); + +/* Return the name of the resource type corresponding to the i_th order + * resource. + * + * - ctx: the opaque planner context returned from planner_new + * - i: order index of the target resource + */ +const char *planner_resrc_index2type (planner_t *ctx, int i); + +/* Return the index of the resource type name, t + * + * - ctx: the opaque planner context returned from planner_new + * - t: the name string of the resource type + */ +int planner_resrc_type2index (planner_t *ctx, const char *t); + +/* Find the earliest point in time when the request can be reserved + * and return that time. Note that this only returns a point at which + * resource state changes. In other words, if the number of available + * resources change at t1 and t2, the possible returns are only t1 and + * t2, not t1+1 or t1+2 even if the latter points also satisfy the + * request. Return -1 on error and set errno. + * + * - ctx: the opaque planner context returned from planner_new + * - req: request specifying the resource amounts and duration + * duration must be greater than or equal to 1 (time units) + */ +int64_t planner_avail_time_first (planner_t *ctx, req_t *req); + +/* Find the next earliest point in time for the same request queried + * before through either planner_avail_time_first or planner_avail_time_next + * and and return that time. Note that this only returns a point at which + * resource state changes. In other words, if the number of available + * resources change at t1 and t2, the possible returns are only t1 and + * t2, not t1+1 or t1+2 even if the latter points also satisfy the + * request. Return -1 on error and set errno. + * + * - ctx: the opaque planner context returned from planner_new + */ +int64_t planner_avail_time_next (planner_t *ctx); + +/* Return 0 if the given request consisting of numbers of resources and + * duration can be satisfied at starttime. Unlike planner_avail_time* + * functions, this works with an arbirary time within the valid + * planner span. Return -1 if the request cannot be satisfied or an error + * is encountered in which case errno is set. + * + * - ctx: the opaque planner context returned from planner_new + * - starttime: start time at which the resource request must + * be available + * - req: request specifying the resource amounts and duration. duration + * must be greater than or equal to 1 (time unit) + */ +int planner_avail_resources_at (planner_t *ctx, int64_t starttime, req_t *req); + +/* Allocate and return an object of reservation_t (opaque) type, being built + * of the passed-in plan. The object must be freed using + * planner_reservation_destroy when it is not needed. + * + * - ctx: the opaque planner context returned from planner_new + * - plan: describe the resource and duration requests. The start + * time of this request should have been previously determined + * to be satisfiable by the planner_avail_time_* functions above. + * Duration request in the plan must be greater than or equal + * to 2 (time units) as a reservation is represented as two + * unique time points. + */ +reservation_t *planner_reservation_new (planner_t *ctx, plan_t *plan); + +/* Add a new reservation to the planner and update the planner's + * resource/schduled-point state. It resets the planner's iterator + * so that planner_avail_time_next will be made to return the + * earliest schedulable point. + * + * Return -1 on error and set errno. User should check and print + * errno if -1. Otherwise return 0. + * + * EINVAL: invalid argument + * EKEYREJECTED: can't update planner's internal data structures + * ERANGE: resource state became out of range e.g., reserving more than + * what is available: rsv wasn't created with available time returnedi + * and thus validated using a planner_avail famility function)? + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: new reservation. + * - validate: if 1 is passed, extra check is performed if rsv is + * a valid reservation. + */ +int planner_add_reservation (planner_t *ctx, reservation_t *rsv, int validate); + +/* Remove the existing reservation from the planner and update its + * state. It resets the planner's iterator such that planner_avail_time_next + * will be made to return the earliest schedulable point. + * + * Return -1 on error and set errno; otherwise return 0. + * + * EINVAL: invalid argument + * EKEYREJECTED: can't update one of planner's internal data structures + * ERANGE: resource state became invalid. e.g., reserving more than + * what is available: rsv wasn't created with available time returnedi + * and thus validated using a planner_avail famility function)? + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: an existing reservation + */ +int planner_rem_reservation (planner_t *ctx, reservation_t *rsv); + +/* Destroy the reservation object. If rsv has not been removed (using + * planner_rem_reservation), this call first removes the rsv before + * deallocating its memory. + * + * - ctx: the opaque planner context returned from planner_new + * - rsv_p: a pointer to the reservation object returned + * from planner_reservation_new + */ +void planner_reservation_destroy (planner_t *ctx, reservation_t **rsv_p); + +/* Return the reservation with the earliest start time. One should + * use this function to get the first reservation from which to iterate + * through subsequent reservations. This scheme allows you to + * iterate through the reservations sorted in starting time order. + * + * - ctx: the opaque planner context returned from planner_new + */ +reservation_t *planner_reservation_first (planner_t *ctx); + +/* Return the next reservation planned in the planner. Please see the + * comments above for planner_reservation_first. planner_reservation_next + * returns the reservation that appears right after rsv in start-time + * sorted order. + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: a reservation object returned previously + */ +reservation_t *planner_reservation_next (planner_t *ctx, reservation_t *rsv); + +/* Return the reservation keyed by the id. id is the id field + * of the plan_t field given to planner_reservation_new. + * Return NULL when no reservation by id exists. + */ +reservation_t *planner_reservation_by_id (planner_t *ctx, int64_t id); +reservation_t *planner_reservation_by_id_str (planner_t *ctx, const char *str); + +/* Return 0 if rsv has been added to the planner; otherwise -1 + */ +int planner_reservation_added (planner_t *ctx, reservation_t *rsv); + +/* Return a string containing the information on a reservation. The + * returned string must be deallocated by the caller using free. + * + * - ctx: the opaque planner context returned from planner_new + * - rsv: a reservation object + */ +char *planner_reservation_to_string (planner_t *ctx, reservation_t *rsv); + +/* Getters for reservation_t: + */ +int64_t planner_reservation_starttime (planner_t *ctx, reservation_t *rsv); +int64_t planner_reservation_endtime (planner_t *ctx, reservation_t *rsv); +const uint64_t *planner_reservation_reserved (planner_t *ctx, + reservation_t *rsv, size_t *len); + +/* Print the planner information in the files that can be visualized using gnuplot + * + * - ctx: the opaque planner context returned from planner_new + * - base_fname: base filename (.csv and .gp) + * to render: % gnuplot .gp > planner_out.png + * - d: which resource dimension to print + */ +int planner_print_gnuplot (planner_t *ctx, const char *base_fname, size_t d); + +#endif /* PLANNER_H */ + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/resrc/resrc_version.map b/resrc/resrc_version.map index d9b194945..a78c54e72 100644 --- a/resrc/resrc_version.map +++ b/resrc/resrc_version.map @@ -1,5 +1,6 @@ { global: resrc_*; + planner_*; local: *; }; From c16464db9a29177a35b7bc4822b3e0fac2a29b38 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Tue, 27 Dec 2016 17:57:37 -0800 Subject: [PATCH 3/6] planner: Add unit tests for planner APIs Added 100 some tests. --- resrc/test/Makefile.am | 11 +- resrc/test/tplanner.c | 848 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 858 insertions(+), 1 deletion(-) create mode 100644 resrc/test/tplanner.c diff --git a/resrc/test/Makefile.am b/resrc/test/Makefile.am index db7e17044..8e2db3c88 100644 --- a/resrc/test/Makefile.am +++ b/resrc/test/Makefile.am @@ -8,7 +8,7 @@ TESTS_ENVIRONMENT = \ LUA_PATH="$(abs_top_srcdir)/rdl/?.lua;$(FLUX_PREFIX)/share/lua/5.1/?.lua;$(LUA_PATH);;" \ LUA_CPATH="$(abs_top_builddir)/rdl/?.so;$(FLUX_PREFIX)/lib64/lua/5.1/?.so;$(LUA_CPATH);;" -TESTS = tresrc +TESTS = tresrc tplanner check_PROGRAMS = $(TESTS) tresrc_SOURCES = tresrc.c @@ -18,3 +18,12 @@ tresrc_LDADD = $(top_builddir)/resrc/libflux-resrc.la \ $(top_builddir)/src/common/libutil/libutil.la \ $(top_builddir)/src/common/libtap/libtap.la \ $(LUA_LIB) $(JANSSON_LIBS) $(CZMQ_LIBS) + +tplanner_SOURCES = tplanner.c +tplanner_CFLAGS = $(AM_CFLAGS) -I$(top_srcdir)/resrc +tplanner_LDADD = $(top_builddir)/resrc/libflux-resrc.la \ + $(top_builddir)/src/common/liblsd/liblsd.la \ + $(top_builddir)/src/common/libutil/libutil.la \ + $(top_builddir)/src/common/libtap/libtap.la \ + $(LUA_LIB) $(JANSSON_LIBS) $(CZMQ_LIBS) + diff --git a/resrc/test/tplanner.c b/resrc/test/tplanner.c new file mode 100644 index 000000000..ce3fdb3bf --- /dev/null +++ b/resrc/test/tplanner.c @@ -0,0 +1,848 @@ +/*****************************************************************************\ + * Copyright (c) 2014 Lawrence Livermore National Security, LLC. Produced at + * the Lawrence Livermore National Laboratory (cf, AUTHORS, DISCLAIMER.LLNS). + * LLNL-CODE-658032 All rights reserved. + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the license, or (at your option) + * any later version. + * + * Flux is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * See also: http://www.gnu.org/licenses/ +\*****************************************************************************/ + +#include +#include +#include +#include "planner.h" +#include "src/common/libtap/tap.h" +#include "src/common/libutil/xzmalloc.h" + +#define EXTRA_VALIDATION 0 + +static int64_t gl_plid = 0; + +static inline plan_t *pt_plan_new (planner_t *ctx, req_t *req, int64_t start) +{ + plan_t *plan = NULL; + + if (!req || start < 0) + goto done; + + plan = xzmalloc (sizeof (*plan)); + plan->req = req; + plan->id = gl_plid; + plan->start = start; + gl_plid++; + +done: + return plan; +} + +static req_t *pt_vreq_new (uint64_t duration, size_t len, va_list ap) +{ + int i = 0; + req_t *req = NULL; + if (duration < 1 || len > MAX_RESRC_DIM) + goto done; + + req = xzmalloc (sizeof (*req)); + req->resrc_vector = xzmalloc (len * sizeof (*(req->resrc_vector))); + req->vector_dim = len; + req->duration = duration; + for (i=0; i < len; ++i) + req->resrc_vector[i] = (uint64_t)va_arg(ap, int); + +done: + return req; +} + +/* make sure to pass only integers for optional arguments */ +static req_t *pt_req_new (uint64_t duration, size_t len, ...) +{ + req_t *req = NULL; + + va_list ap; + va_start(ap, len); + req = pt_vreq_new (duration, len, ap); + va_end(ap); + return req; +} + +static void pt_req_free (req_t *req) +{ + if (req) { + free (req->resrc_vector); + free (req); + } +} + +int pt_make_n_valid_rsvs (planner_t *ctx, reservation_t ***ra_p, + int n, uint64_t duration, size_t len, ...) +{ + int i = 0; + int rc = -1; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t *rsv = NULL; + + va_list ap; + va_start(ap, len); + req = pt_vreq_new (duration, len, ap); + va_end(ap); + + if (!req) + goto done; + + (*ra_p) = xzmalloc (n * sizeof (**ra_p)); + for (i = 0; i < n; ++i) { + if (!(plan = pt_plan_new (ctx, req, planner_avail_time_first (ctx, req)))) + goto done; + else if (!(rsv = planner_reservation_new (ctx, plan))) + goto done; + else if (planner_add_reservation (ctx, rsv, EXTRA_VALIDATION) < 0) + goto done; + + free (plan); + plan = NULL; + (*ra_p)[i] = rsv; + } + pt_req_free (req); + req = NULL; + rc = 0; + +done: + if (plan) + free (plan); + if (req) + pt_req_free (req); + return rc; +} + + +int pt_make_n_decr_rsvs (planner_t *ctx, reservation_t ***ra_p, + int n, uint64_t start_duration, size_t len, ...) +{ + int i = 0; + int rc = -1; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t *rsv = NULL; + + va_list ap; + va_start(ap, len); + req = pt_vreq_new (start_duration, len, ap); + va_end(ap); + + if (!req) + goto done; + + (*ra_p) = xzmalloc (n * sizeof (**ra_p)); + for (i = 0; i < n; ++i) { + if (!(plan = pt_plan_new (ctx, req, planner_avail_time_first (ctx, req)))) + goto done; + else if (!(rsv = planner_reservation_new (ctx, plan))) + goto done; + else if (planner_add_reservation (ctx, rsv, EXTRA_VALIDATION) < 0) + goto done; + + free (plan); + plan = NULL; + (*ra_p)[i] = rsv; + req->duration--; + } + pt_req_free (req); + req = NULL; + rc = 0; + +done: + if (plan) + free (plan); + if (req) + pt_req_free (req); + return rc; +} + +static void test_1r0_10p_basic () +{ + int i = 0; + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs = 1; + req_t *req = NULL; + req_t *req2 = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + + planner_t *ctx = planner_new (0, 10, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1>, 0-9}: 1-d 1 resrc for span of 10"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 5, 2, 1, 1); + ok (rc == 0, "add the max num of reservations, each requesting {<1>, 2}"); + + rc = planner_rem_reservation (ctx, ra[1]); + ok (rc == 0, "remove a reservation at 2 for {<1>, 2}"); + + rc = planner_rem_reservation (ctx, ra[2]); + ok (rc == 0, "remove a reservation at 4 for {<1>, 2}"); + + req = pt_req_new (2, 1, 1); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 2, "find the first available time for {<1>, 2}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "should not find the next available time"); + + rc = planner_avail_resources_at (ctx, 2, req); + ok (rc == 0, "find availability at 2, for {<1>, 2}"); + + rc = planner_avail_resources_at (ctx, 3, req); + ok (rc == 0, "find availability at 3, for {<1>, 2}"); + + rc = planner_avail_resources_at (ctx, 4, req); + ok (rc == 0, "find availability at 4, for {<1>, 2}"); + + rc = planner_avail_resources_at (ctx, 5, req); + ok ((rc == -1) && !errno, "find no availability at 5, for {<1>, 2}"); + + req2 = pt_req_new (2, 1, 1); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find the first available time for {<1>, 3}"); + + req2 = pt_req_new (4, 1, 1); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find the first available time for {<1>, 4}"); + + req2 = pt_req_new (5, 1, 1); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == -1, "no availability for {<1>, 5}"); + + plan = pt_plan_new (ctx, req, 3); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 3, for {<1>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == -1, "no availability for {<1>, 2}"); + + planner_reservation_destroy (ctx, &new_rsv); + for (i=0; i < 5; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + + free (ra); + ra = NULL; + planner_destroy (&ctx); +} + +static void test_1kr0_10kp_larger () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 1000; + planner_t *ctx = planner_new (0, 10000, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1000>, 0-9999}: <1k> resrc for 10k"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 9, 100, 1, 100); + ok (rc == 0, "add 9 reservations, each requesting {<100>, 100}"); + + req = pt_req_new (100, 1, 99); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<99>, 100}"); + + req = pt_req_new (100, 1, 100); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<100>, 100}"); + + req = pt_req_new (101, 1, 100); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<100>, 101}"); + + req = pt_req_new (100, 1, 101); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 100, "find the first available time for {<101>, 100}"); + + req = pt_req_new (100, 1, 100); + plan = pt_plan_new (ctx, req, 0); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<100>, 100}"); + + req = pt_req_new (1000, 1, 1); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 100, "find the first available time for {<1>, 1000}"); + plan = pt_plan_new (ctx, req, starttime); + reservation_t *new_rsv2 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, new_rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 100, for {<100>, 100}"); + + planner_reservation_destroy (ctx, &new_rsv); + planner_reservation_destroy (ctx, &new_rsv2); + for (i=0; i < 9; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + free (ra); + ra = NULL; + planner_destroy (&ctx); +} + +void test_5r0_90p_noncontiguous () +{ + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + + int64_t starttime = -1; + uint64_t total_resrcs = 5; + planner_t *ctx = planner_new (0, 90, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<5>, 0-89}: <5> resrc for 90 span"); + + req = pt_req_new (10, 1, 5); + rc = planner_avail_resources_at (ctx, 0, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + + plan = pt_plan_new (ctx, req, 0); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<5>, 10}"); + + rc = planner_avail_resources_at (ctx, 15, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + plan = pt_plan_new (ctx, req, 15); + reservation_t *rsv2 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 15, for {<5>, 10}"); + + rc = planner_avail_resources_at (ctx, 35, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + plan = pt_plan_new (ctx, req, 35); + reservation_t *rsv3 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv3, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 35, for {<5>, 10}"); + + rc = planner_avail_resources_at (ctx, 60, req); + ok (rc == 0, "find availability at 0, for {<5>, 10}"); + plan = pt_plan_new (ctx, req, 60); + reservation_t *rsv4 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv4, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 60, for {<5>, 10}"); + + req = pt_req_new (5, 1, 5); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 10, "find the first available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 25, "find the next available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 45, "find the next available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 70, "find the next available time for {<5>, 5}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 5}"); + + req = pt_req_new (10, 1, 5); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 25, "find the first available time for {<5>, 10}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 45, "find the next available time for {<5>, 10}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 70, "find the next available time for {<5>, 10}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 10}"); + + req = pt_req_new (15, 1, 5); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 45, "find the next available time for {<5>, 15}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == 70, "find the next available time for {<5>, 15}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 15}"); + + req = pt_req_new (20, 1, 5); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 70, "find the next available time for {<5>, 20}"); + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find no available time for {<5>, 20}"); + plan = pt_plan_new (ctx, req, 70); + reservation_t *rsv5 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv5, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 70, for {<5>, 20}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == -1, "find no available time for {<5>, 20}"); + rc = planner_rem_reservation (ctx, rsv4); + ok (rc == 0, "remove reservation at 60"); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 45, "find the first available time for {<5>, 20}"); + + planner_reservation_destroy (ctx, &rsv1); + planner_reservation_destroy (ctx, &rsv2); + planner_reservation_destroy (ctx, &rsv3); + planner_reservation_destroy (ctx, &rsv4); + planner_reservation_destroy (ctx, &rsv5); + + planner_destroy (&ctx); +} + +void test_1r0_12p_midstart () +{ + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 1; + + planner_t *ctx = planner_new (0, 12, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1>, 0-9}: <1> resrc for 10 span"); + + req = pt_req_new (4, 1, 1); + rc = planner_avail_resources_at (ctx, 4, req); + ok (rc == 0, "find availability at 4, for {<1>, 4}"); + + plan = pt_plan_new (ctx, req, 4); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 4, for {<1>, 4}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find the first available time at 0 for {<1>, 4}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == 8, "find the next available time at 8 for {<1>, 4}"); + + rc = planner_rem_reservation (ctx, rsv1); + ok (rc == 0, "remove reservation at 4: {<1>, 4}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find the first available time at 0 for {<1>, 4}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "no other scheduled point exists for {<1>, 4}"); + + planner_reservation_destroy (ctx, &rsv1); + plan = pt_plan_new (ctx, req, 3); + rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 3, for {<1>, 4}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 7, "find the first available time at 7 for {<1>, 4}"); + pt_req_free (req); + + planner_reservation_destroy (ctx, &rsv1); + planner_destroy (&ctx); +} + +void test_100r0_5000000_long () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 100; + + planner_t *ctx = planner_new (0, 6000000, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<100>, 0-4999999}: <100> for 5000000"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 10000, 10000, 1, 5); + ok (rc == 0, "add 10000 reservations, each requesting {<100>, 10000}"); + + req = pt_req_new (10000, 1, 100); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 5000000, "find the first available time for {<100>, 10000}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add a reservation at 5000000 requesting {<100>, 10000}"); + + for (i=0; i < 10000; ++i) { + planner_rem_reservation (ctx, ra[i]); + planner_reservation_destroy (ctx, &(ra[i])); + } + + free (ra); + ra = NULL; + + req = pt_req_new (10000, 1, 55); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 0, "find the first available time for {<55>, 10000}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == 5010000, "find the first available time for {<55>, 10000}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "find the first available time for {<55>, 10000}"); + + planner_reservation_destroy (ctx, &new_rsv); + + planner_destroy (&ctx); +} + +void test_5r0_2200_short () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 5; + + planner_t *ctx = planner_new (0, 2200, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<5>, 0-2199}: <5> for 2200"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 1000, 1, 1, 3); + ok (rc == 0, "add 1000 reservations, each requesting {<3>, 1}"); + + req = pt_req_new (10, 1, 2); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find the first available time for {<2>, 10}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add a reservation at 0 requesting {<2>, 10}"); + + req = pt_req_new (10, 1, 4); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 1000, "find the first available time for {<4>, 10}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv2 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add a reservation at 1000 requesting {<4>, 10}"); + + req = pt_req_new (990, 1, 2); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 10, "find the first available time for {<2>, 910}"); + + req = pt_req_new (991, 1, 2); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 1010, "find the first available time for {<2>, 911}"); + + //rc = planner_print_gnuplot (ctx, "plan.out", 0); + //ok (rc == 0, "print gnuplot works"); + + for (i=0; i < 1000; ++i) { + planner_rem_reservation (ctx, ra[i]); + planner_reservation_destroy (ctx, &(ra[i])); + } + + free (ra); + ra = NULL; + + planner_reservation_destroy (ctx, &rsv1); + planner_reservation_destroy (ctx, &rsv2); + planner_destroy (&ctx); +} + +static void test_5xr0_4_10p_basic () +{ + int i = 0; + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs_a[5] = {5, 50, 500, 5000, 50000}; + req_t *req = NULL; + req_t *req2 = NULL; + plan_t *plan = NULL; + reservation_t **ra = NULL; + + planner_t *ctx = planner_new (0, 10, total_resrcs_a, 5); + ok (ctx != NULL, "a planner for {<5,50,500,5000,50000>, 0-9}: 5-d for 10"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 5, 2, 5, 5, 50, 500, 5000, 50000); + ok (rc == 0, "add reservations, each requesting {<5,50,500,5000,50000>, 2}"); + + rc = planner_rem_reservation (ctx, ra[1]); + ok (rc == 0, "remove a reservation at 2 for {<5,50,500,5000,50000>, 2}"); + + rc = planner_rem_reservation (ctx, ra[2]); + ok (rc == 0, "remove a reservation at 4 for {<5,50,500,5000,50000>, 2}"); + + req = pt_req_new (2, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 2, "find first availability for {<5,50,500,5000,50000>, 2}"); + + starttime = planner_avail_time_next (ctx); + ok (starttime == -1, "should not find the next available time"); + + rc = planner_avail_resources_at (ctx, 2, req); + ok (rc == 0, "find availability at 2, for {<5,50,500,5000,50000>, 2}"); + + rc = planner_avail_resources_at (ctx, 3, req); + ok (rc == 0, "find availability at 3, for {<5,50,500,5000,50000>, 2}"); + + rc = planner_avail_resources_at (ctx, 4, req); + ok (rc == 0, "find availability at 4, for {<5,50,500,5000,50000>, 2}"); + + rc = planner_avail_resources_at (ctx, 5, req); + ok ((rc == -1) && !errno, "find no availability at 5"); + + req2 = pt_req_new (3, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find first availability for {<5,50,500,5000,50000>, 3}"); + + req2 = pt_req_new (4, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == 2, "find first availability for {<5,50,500,5000,50000>, 4}"); + + req2 = pt_req_new (5, 5, 5, 50, 500, 5000, 50000); + starttime = planner_avail_time_first (ctx, req2); + pt_req_free (req2); + ok (starttime == -1, "no availability for {<5,50,500,5000,50000>, 5}"); + + plan = pt_plan_new (ctx, req, 3); + reservation_t *new_rsv = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, new_rsv, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 3, for {<5,50,500,5000,50000>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == -1, "no availability for {<5,50,500,5000,50000>, 2}"); + + planner_reservation_destroy (ctx, &new_rsv); + for (i=0; i < 5; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + + free (ra); + ra = NULL; + planner_destroy (&ctx); +} + +void test_2xr0_4_10p_2D_unmet () +{ + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs_a[5] = {2, 20, 200, 2000, 20000}; + req_t *req = NULL; + req_t *req2 = NULL; + plan_t *plan = NULL; + + planner_t *ctx = planner_new (0, 10, total_resrcs_a, 5); + ok (ctx != NULL, "a planner for {<2,20,200,2000,20000>, 0-9}: 5-d for 10"); + + req = pt_req_new (2, 5, 1, 10, 100, 1000, 10000); + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find first availability for {<1,10,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv1, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<1,10,100,1000,10000>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 0, "find first availability for {<1,10,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv2 = planner_reservation_new (ctx, plan); + free (plan); + rc = planner_add_reservation (ctx, rsv2, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 0, for {<1,10,100,1000,10000>, 2}"); + + req2 = pt_req_new (2, 5, 0, 20, 100, 1000, 10000); + starttime = planner_avail_time_first (ctx, req2); + ok (starttime == 2, "find first availability for {<0,20,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req2, starttime); + reservation_t *rsv3 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req2); + rc = planner_add_reservation (ctx, rsv3, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 2, for {<1,20,100,1000,10000>, 2}"); + + starttime = planner_avail_time_first (ctx, req); + ok (starttime == 4, "find first availability for {<1,10,100,1000,10000>, 2}"); + + plan = pt_plan_new (ctx, req, starttime); + reservation_t *rsv4 = planner_reservation_new (ctx, plan); + free (plan); + pt_req_free (req); + rc = planner_add_reservation (ctx, rsv4, EXTRA_VALIDATION); + ok (rc == 0, "add reservation at 4, for {<1,10,100,1000,10000>, 2}"); + + req = pt_req_new (2, 5, 1, 0, 100, 1000, 10000); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == 2, "find first availability for {<1,0,100,1000,10000>, 2}"); + + planner_reservation_destroy (ctx, &rsv1); + planner_reservation_destroy (ctx, &rsv2); + planner_reservation_destroy (ctx, &rsv3); + planner_reservation_destroy (ctx, &rsv4); + + planner_destroy (&ctx); +} + +void test_many_complete_times () +{ + int i = 0; + int rc = 0; + req_t *req = NULL; + reservation_t **ra = NULL; + int64_t starttime = -1; + uint64_t total_resrcs = 2000; + + planner_t *ctx = planner_new (0, 2500, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<2000>, 0-2499}: <2000> for 2500"); + + rc = pt_make_n_decr_rsvs (ctx, &ra, 2000, 2000, 1, 1); + ok (rc == 0, "add 2000 reservations, each requesting {<1>, 2000--}"); + + for (i=1; i < 1999; ++i) { + req = pt_req_new (10, 1, i); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + } + req = pt_req_new (10, 1, i); + starttime = planner_avail_time_first (ctx, req); + pt_req_free (req); + ok (starttime == i, "find the first available time for {<1000++>, 10}"); + + for (i=0; i < 2000; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + + free (ra); + ra = NULL; + + planner_destroy (&ctx); +} + +void test_misc () +{ + int i = 0; + int rc = 0; + int64_t starttime = 0; + uint64_t total_resrcs = 1; + reservation_t **ra = NULL; + reservation_t *rsv = NULL; + + planner_t *ctx = planner_new (0, 20, &total_resrcs, 1); + ok (ctx != NULL, "a planner for {<1>, 0-9}: 1-d 1 resrc for span of 10"); + + rc = pt_make_n_valid_rsvs (ctx, &ra, 5, 2, 1, 1); + ok (rc == 0, "add the max num of reservations, each requesting {<1>, 2}"); + + req_t *req = pt_req_new (2, 1, 1); + plan_t *plan = pt_plan_new (ctx, req, 12); + reservation_t *rsv1 = planner_reservation_new (ctx, plan); + rc = planner_add_reservation (ctx, rsv1, 1); + plan->start = 10; + reservation_t *rptr = planner_reservation_by_id (ctx, plan->id); + ok (rsv1 == rptr, "planner_reservation_by_id works"); + char key[32]; + sprintf (key, "%jd", (intmax_t)plan->id); + rptr = planner_reservation_by_id_str (ctx, key); + ok (rsv1 == rptr, "planner_reservation_by_id works"); + rptr = planner_reservation_new (ctx, plan); + ok ((rptr == NULL) && (errno == EINVAL), "existing id correctly rejected"); + free (plan); + pt_req_free (req); + char *str = planner_reservation_to_string (ctx, rsv1); + ok (str != NULL, "planner_reservation_to_string works"); + free (str); + + for (rsv = planner_reservation_first (ctx); rsv; + rsv = planner_reservation_next (ctx, rsv)) { + int64_t st = planner_reservation_starttime (ctx, rsv); + if (st != -1 && st < starttime) + break; + starttime = st; + i++; + } + ok ((i == 6), "planner_revervation iterator works"); + for (i=0; i < 5; ++i) + planner_reservation_destroy (ctx, &(ra[i])); + free (ra); + ra = NULL; + planner_reservation_destroy (ctx, &rsv1); + planner_destroy (&ctx); +} + +int main (int argc, char *argv[]) +{ + + plan (NO_PLAN); + + test_1r0_10p_basic (); + + test_1kr0_10kp_larger (); + + test_5r0_90p_noncontiguous (); + + test_1r0_12p_midstart (); + + test_100r0_5000000_long (); + + test_5r0_2200_short (); + + test_5xr0_4_10p_basic (); + + test_2xr0_4_10p_2D_unmet (); + + test_many_complete_times (); + + test_misc (); + + done_testing (); + + return 0; +} + + +/* + * vi: ts=4 sw=4 expandtab + */ From 0317335814f56a80cc8b139532410368af2bd60e Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Sat, 31 Dec 2016 13:11:19 -0800 Subject: [PATCH 4/6] planner: replace twindow with planner-based Replace resrc's twindow using planner-based API. two things to note: 1. Because of an API mismatch between resrc's "avail" functions and planner's, this commit changes the signiture of the resrc's "avail" functions a bit and adjust the resrc test cases. 2. Because there is yet no use case for resrc's copy constructor, this commit doesn't make a deep copy of the planner-based twindow member. The exact semantics of the copy constructor needs to be discussed. --- resrc/resrc.c | 419 +++++++++++++------------------------------- resrc/resrc.h | 19 +- resrc/resrc_flow.c | 6 +- resrc/test/tresrc.c | 107 +++++------ 4 files changed, 175 insertions(+), 376 deletions(-) diff --git a/resrc/resrc.c b/resrc/resrc.c index 58fc12720..ba92577d2 100644 --- a/resrc/resrc.c +++ b/resrc/resrc.c @@ -42,33 +42,6 @@ #include "src/common/libutil/xzmalloc.h" - -typedef struct window { - int64_t starttime; - int64_t endtime; - const char *job_id; -} window_t; - -/* static window_t * window_new (int64_t starttime, int64_t endtime) { */ -/* window_t *ret = malloc (sizeof *ret); */ -/* ret->starttime = starttime; */ -/* ret->endtime = endtime; */ -/* return ret; */ -/* } */ - -static void window_destructor (void **window_v) { - if (window_v) { - free(*window_v); - *window_v = NULL; - } -} - -static void *window_dup (const void *window) { - window_t * ret = malloc(sizeof *ret); - memcpy(ret, window, sizeof *ret); - return ret; -} - struct resrc { char *type; char *path; @@ -87,11 +60,47 @@ struct resrc { zhash_t *tags; zhash_t *allocs; zhash_t *reservtns; - zhashx_t *twindow; + planner_t *twindow; }; static zhash_t *resrc_hash = NULL; +static inline plan_t *plan_new (int64_t job_id, int64_t start, uint64_t duration, + int exclusive, size_t len, ...) +{ + plan_t *plan = NULL; + int i = 0; + + va_list ap; + va_start(ap, len); + plan = xzmalloc (sizeof (*plan)); + plan->id = job_id; + plan->start = start; + plan->req = xzmalloc (sizeof (*(plan->req))); + plan->req->resrc_vector = xzmalloc (len * sizeof (*(plan->req->resrc_vector))); + plan->req->vector_dim = len; + for (i=0; i < len; ++i) + plan->req->resrc_vector[i] = (uint64_t)va_arg(ap, int); + plan->req->duration = duration; + plan->req->exclusive = exclusive; + va_end(ap); + return plan; +} + +static inline void plan_destroy (plan_t **plan_p) +{ + if (plan_p && *plan_p) { + if ((*plan_p)->req) { + if ((*plan_p)->req->resrc_vector) + free ((*plan_p)->req->resrc_vector); + free ((*plan_p)->req); + } + free (*plan_p); + *plan_p = NULL; + } +} + + /*************************************************************************** * API ***************************************************************************/ @@ -174,228 +183,30 @@ size_t resrc_available (resrc_t *resrc) return 0; } -size_t resrc_available_at_time (resrc_t *resrc, int64_t time) -{ - const char *id_ptr = NULL; - window_t *window = NULL; - size_t *size_ptr = NULL; - - size_t available = resrc->size; - - if (time < 0) { - time = epochtime(); - } - - // Check that the time is during the resource lifetime - window = zhashx_lookup (resrc->twindow, "0"); - if (window && (time < window->starttime || time > window->endtime)) { - return 0; - } - - // Iterate over all allocation windows in resrc. We iterate using - // the hash to avoid copying the entire hash every time, using - // zhashx_cursor to retrieve the key to lookup the size in resrc->allocs. - window = zhashx_first (resrc->twindow); - while (window) { - id_ptr = zhashx_cursor(resrc->twindow); - if (!strcmp (id_ptr, "0")) { - /* This is the resource lifetime entry and should not be - * evaluated as an allocation or reservation entry */ - window = zhashx_next (resrc->twindow); - continue; - } - - // Does time intersect with window? - if (time >= window->starttime && time <= window->endtime) { - // Decrement available by allocation and/or reservation size - size_ptr = (size_t*)zhash_lookup (resrc->allocs, id_ptr); - if (size_ptr) { - available -= *size_ptr; - } - size_ptr = (size_t*)zhash_lookup (resrc->reservtns, id_ptr); - if (size_ptr) { - available -= *size_ptr; - } - } - - window = zhashx_next (resrc->twindow); - } - - return available; -} - -static int compare_windows_starttime (const void *item1, const void *item2) +/* Note: I think quantities should be changed to either unsigned int or int64_t */ +int resrc_available_at_time (resrc_t *resrc, int64_t time, size_t reqrd_size) { - const window_t * lhs = item1, *rhs = item2; - if (lhs->starttime < rhs->starttime) - return -1; - if (lhs->starttime == rhs->starttime) - return 0; - return 1; + req_t req; + planner_t *pl = resrc->twindow; + int64_t start = time; + req.duration = 1; + req.resrc_vector = (uint64_t *)&reqrd_size; + req.vector_dim = 1; + req.exclusive = 0; + return planner_avail_resources_at (pl, start, &req); } -static int compare_windows_endtime (const void *item1, const void *item2) +int resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, + int64_t range_endtime, size_t reqrd_size, bool exclusive) { - const window_t * lhs = item1, *rhs = item2; - if (lhs->endtime < rhs->endtime) - return -1; - if (lhs->endtime == rhs->endtime) - return 0; - return 1; -} - -size_t resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, - int64_t range_endtime, bool exclusive) -{ - window_t *window = NULL; - const char *id_ptr = NULL; - int64_t curr_endtime = 0; - int64_t curr_starttime = 0; - size_t curr_available = 0; - size_t min_available = 0; - size_t *alloc_ptr = NULL; - size_t *reservtn_ptr = NULL; - size_t *size_ptr = NULL; - zlistx_t *matching_windows = NULL; - - if (range_starttime == range_endtime) { - return resrc_available_at_time (resrc, range_starttime); - } - - matching_windows = zlistx_new (); - /* zlistx_set_duplicator(matching_windows, window_dup); */ - zlistx_set_destructor(matching_windows, window_destructor); - - // Check that the time is during the resource lifetime - window = zhashx_lookup (resrc->twindow, "0"); - if (window) { - curr_starttime = window->starttime; - curr_endtime = window->endtime; - if ( (range_starttime < curr_starttime) || - (range_endtime > curr_endtime) ) { - return 0; - } - } - - // Map allocation window strings to JSON objects. Filter out - // windows that don't overlap with the input range. Then add the - // job id to the JSON obj and insert the JSON obj into the - // "matching windows" list. - window = zhashx_first (resrc->twindow); - while (window) { - id_ptr = zhashx_cursor(resrc->twindow); - if (!strcmp (id_ptr, "0")) { - /* This is the resource lifetime entry and should not be - * evaluated as an allocation or reservation entry */ - window = zhashx_next (resrc->twindow); - continue; - } - curr_starttime = window->starttime; - curr_endtime = window->endtime; - - // Does input range intersect with window? - if ( !((curr_starttime < range_starttime && - curr_endtime < range_starttime) || - (curr_starttime > range_endtime && - curr_endtime > range_endtime)) ) { - - /* If the sample requires exclusive access and we are - * here, then we now know that exclusivity cannot be - * granted over the requested range. Leave now. */ - if (exclusive) - goto ret; - - alloc_ptr = (size_t*)zhash_lookup (resrc->allocs, id_ptr); - reservtn_ptr = (size_t*)zhash_lookup (resrc->reservtns, id_ptr); - if (alloc_ptr || reservtn_ptr) { - // Add the window key and insert JSON obj into the - // "matching windows" list - window_t * new_window = window_dup (window); - new_window->job_id = id_ptr; - zlistx_add_end (matching_windows, new_window); - } - } - - window = zhashx_next (resrc->twindow); - } - - // Duplicate the "matching windows" list and then sort the 2 lists - // based on start and end times. We will walk through these lists - // in order to find the minimum available during the input range - zlistx_t *start_windows = matching_windows; - zlistx_set_comparator(start_windows, compare_windows_starttime); - zlistx_t *end_windows = zlistx_dup (start_windows); - // Do not free items in this list, they are owned by the start_windows - // list - zlistx_set_destructor(end_windows, NULL); - zlistx_set_comparator(end_windows, compare_windows_endtime); - zlistx_sort (start_windows); - zlistx_sort (end_windows); - - window_t *curr_start_window = zlistx_first (start_windows); - window_t *curr_end_window = zlistx_first (end_windows); - - min_available = resrc->size; - curr_available = resrc->size; - - // Start iterating over the windows and calculating the min - // available - // - // OPTIMIZE: stop iterating when curr_start_window == NULL Once we - // run out of start windows, curr available cannot get any - // smaller; we have hit our min. Just need to test to verify that - // this optimziation is correct/safe. - while (curr_start_window) { - curr_starttime = curr_start_window->starttime; - curr_endtime = curr_end_window->endtime; - - if ((curr_start_window) && - (curr_starttime < curr_endtime)) { - // New range is starting, get its size and subtract it - // from current available - size_ptr = (size_t*)zhash_lookup (resrc->allocs, curr_start_window->job_id); - if (size_ptr) - curr_available -= *size_ptr; - size_ptr = (size_t*)zhash_lookup (resrc->reservtns, curr_start_window->job_id); - if (size_ptr) - curr_available -= *size_ptr; - curr_start_window = zlistx_next (start_windows); - if (curr_start_window) { - curr_starttime = curr_start_window->starttime; - } else { - curr_starttime = TIME_MAX; - } - } else if ((curr_end_window) && - (curr_endtime < curr_starttime)) { - // A range just ended, get its size and add it back into - // current available - id_ptr = curr_end_window->job_id; - size_ptr = (size_t*)zhash_lookup (resrc->allocs, id_ptr); - if (size_ptr) - curr_available += *size_ptr; - size_ptr = (size_t*)zhash_lookup (resrc->reservtns, id_ptr); - if (size_ptr) - curr_available += *size_ptr; - curr_end_window = zlistx_next (end_windows); - if (curr_end_window) { - curr_endtime = curr_end_window->endtime; - } else { - curr_endtime = TIME_MAX; - } - } else { - fprintf (stderr, - "%s - ERR: Both start/end windows are empty\n", - __FUNCTION__); - } - min_available = (curr_available < min_available) ? curr_available : - min_available; - } - - zlistx_destroy (&end_windows); -ret: - zlistx_destroy (&matching_windows); - - return min_available; + req_t req; + planner_t *pl = resrc->twindow; + int64_t start = range_starttime; + req.duration = (uint64_t)(range_endtime - range_starttime + 1); + req.resrc_vector = (uint64_t *)&reqrd_size; + req.vector_dim = 1; + req.exclusive = exclusive? 1 : 0; + return planner_avail_resources_at (pl, start, &req); } char* resrc_state (resrc_t *resrc) @@ -424,6 +235,13 @@ char* resrc_state (resrc_t *resrc) return str; } +planner_t *resrc_twindow (resrc_t *resrc) +{ + if (resrc) + return resrc->twindow; + return NULL; +} + resrc_tree_t *resrc_phys_tree (resrc_t *resrc) { if (resrc) @@ -445,13 +263,6 @@ size_t resrc_size_reservtns (resrc_t *resrc) return 0; } -int resrc_twindow_insert (resrc_t *resrc, const char *key, int64_t starttime, int64_t endtime) -{ - const window_t w = {.starttime = starttime, .endtime = endtime}; - int rc = zhashx_insert (resrc->twindow, key, (void *)&w); - return rc; -} - int resrc_graph_insert (resrc_t *resrc, const char *name, resrc_flow_t *flow) { int rc = zhash_insert (resrc->graphs, name, flow); @@ -511,9 +322,7 @@ resrc_t *resrc_new_resource (const char *type, const char *path, resrc->reservtns = zhash_new (); resrc->properties = zhash_new (); resrc->tags = zhash_new (); - resrc->twindow = zhashx_new (); - zhashx_set_destructor(resrc->twindow, window_destructor); - zhashx_set_duplicator(resrc->twindow, window_dup); + resrc->twindow = planner_new (0, TIME_MAX, (uint64_t *)&size, 1); } return resrc; @@ -539,10 +348,11 @@ resrc_t *resrc_copy_resource (resrc_t *resrc) new_resrc->reservtns = zhash_dup (resrc->reservtns); new_resrc->properties = zhash_dup (resrc->properties); new_resrc->tags = zhash_dup (resrc->tags); - if (resrc->twindow) - new_resrc->twindow = zhashx_dup (resrc->twindow); - else - new_resrc->twindow = NULL; + /* Note: we don't make a deep copy of twindow in this copy constructor yet + * @lipari and @dongahn want to see user cases of this constructor + * before deciding the semantics of member copies. + */ + new_resrc->twindow = NULL; } return new_resrc; @@ -574,7 +384,7 @@ void resrc_resource_destroy (void *object) zhash_destroy (&resrc->properties); zhash_destroy (&resrc->tags); if (resrc->twindow) - zhashx_destroy (&resrc->twindow); + planner_destroy (&(resrc->twindow)); free (resrc); } } @@ -670,8 +480,8 @@ resrc_t *resrc_new_from_json (json_t *o, resrc_t *parent, bool physical) else endtime = TIME_MAX; } - - resrc_twindow_insert (resrc, "0", starttime, endtime); + int64_t d = endtime - starttime + 1; + planner_reset (resrc->twindow, starttime, d, NULL, 0); } } @@ -843,7 +653,8 @@ static resrc_t *resrc_new_from_hwloc_obj (hwloc_obj_t obj, resrc_t *parent, /* add twindow */ if ((!strncmp (type, "node", 5)) || (!strncmp (type, "core", 5))) { - resrc_twindow_insert (resrc, "0", epochtime (), TIME_MAX); + int64_t e = epochtime (); + planner_reset (resrc->twindow, e, TIME_MAX - e, NULL, 0); } } ret: @@ -1061,29 +872,15 @@ resrc_t *resrc_create_cluster (char *cluster) bool resrc_walltime_match (resrc_t *resrc, resrc_reqst_t *request, size_t reqrd_size) { - bool rc = false; - window_t *window = NULL; - int64_t endtime = resrc_reqst_endtime (request); - int64_t starttime = resrc_reqst_starttime (request); - size_t available = 0; - - /* If request endtime is greater than the lifetime of the - resource, then return false */ - window = zhashx_lookup (resrc->twindow, "0"); - if (window) { - if (endtime > (window->endtime - 10)) { - return false; - } - } - - /* find the minimum available resources during the requested time - * range */ - available = resrc_available_during_range (resrc, starttime, endtime, - resrc_reqst_exclusive (request)); - - rc = (available >= reqrd_size); - - return rc; + req_t req; + planner_t *pl = resrc->twindow; + int64_t start = resrc_reqst_starttime (request); + req.duration = (uint64_t)(resrc_reqst_endtime (request) - start); + uint64_t sz = (uint64_t)reqrd_size; + req.resrc_vector = &sz; + req.vector_dim = 1; + req.exclusive = resrc_reqst_exclusive (request)? 1: 0; + return (planner_avail_resources_at (pl, start, &req) == 0); } bool resrc_match_resource (resrc_t *resrc, resrc_reqst_t *request, @@ -1259,14 +1056,16 @@ static int resrc_allocate_resource_in_time (resrc_t *resrc, int64_t job_id, char *id_ptr = NULL; int rc = -1; size_t *size_ptr; - size_t available; + plan_t *pin = NULL; + uint64_t d = (uint64_t)(endtime - starttime + 1); + + pin = plan_new (job_id, starttime, d, 0, 1, resrc->staged); /* Don't bother going through the exclusivity checks. We will * save cycles and assume the selected resources are * exclusively available if that was the criteria of the * search. */ - available = resrc_available_during_range (resrc, starttime, endtime, false); - if (resrc->staged > available) + if (planner_avail_resources_at (resrc->twindow, starttime, pin->req) != 0) goto ret; id_ptr = xasprintf ("%"PRId64"", job_id); @@ -1277,11 +1076,16 @@ static int resrc_allocate_resource_in_time (resrc_t *resrc, int64_t job_id, resrc->staged = 0; /* add walltime */ - resrc_twindow_insert (resrc, id_ptr, starttime, endtime); - - rc = 0; + reservation_t *rsv = NULL; + if (!(rsv = planner_reservation_new (resrc->twindow, pin))) + goto ret; + else if ((rc = planner_add_reservation (resrc->twindow, rsv, 0)) == -1) + goto ret; free (id_ptr); + ret: + if (pin) + plan_destroy (&pin); return rc; } @@ -1364,14 +1168,16 @@ static int resrc_reserve_resource_in_time (resrc_t *resrc, int64_t job_id, char *id_ptr = NULL; int rc = -1; size_t *size_ptr; - size_t available; + plan_t *pin = NULL; + reservation_t *rsv = NULL; + uint64_t d = (uint64_t)(endtime - starttime + 1); /* Don't bother going through the exclusivity checks. We will * save cycles and assume the selected resources are * exclusively available if that was the criteria of the * search. */ - available = resrc_available_during_range (resrc, starttime, endtime, false); - if (resrc->staged > available) + pin = plan_new (job_id, starttime, d, 0, 1, resrc->staged); + if (planner_avail_resources_at (resrc->twindow, starttime, pin->req) != 0) goto ret; id_ptr = xasprintf ("%"PRId64"", job_id); @@ -1382,11 +1188,15 @@ static int resrc_reserve_resource_in_time (resrc_t *resrc, int64_t job_id, resrc->staged = 0; /* add walltime */ - resrc_twindow_insert (resrc, id_ptr, starttime, endtime); - - rc = 0; + if (!(rsv = planner_reservation_new (resrc->twindow, pin))) + goto ret; + else if ((rc = planner_add_reservation (resrc->twindow, rsv, 0)) == -1) + goto ret; free (id_ptr); + ret: + if (pin) + plan_destroy (&pin); return rc; } @@ -1455,8 +1265,11 @@ int resrc_release_allocation (resrc_t *resrc, int64_t rel_job) if (size_ptr) { if (resrc->state == RESOURCE_ALLOCATED) resrc->available += *size_ptr; - else - zhashx_delete (resrc->twindow, id_ptr); + else { + reservation_t *rsv = planner_reservation_by_id_str (resrc->twindow, + (const char*)id_ptr); + planner_reservation_destroy (resrc->twindow, &rsv); + } zhash_delete (resrc->allocs, id_ptr); if ((resrc->state != RESOURCE_INVALID) && !zhash_size (resrc->allocs)) { @@ -1498,7 +1311,9 @@ int resrc_release_all_reservations (resrc_t *resrc) resrc->available += *size_ptr; else { id_ptr = (char *)zhash_cursor (resrc->reservtns); - zhashx_delete (resrc->twindow, id_ptr); + reservation_t *rsv = planner_reservation_by_id_str (resrc->twindow, + (const char*)id_ptr); + planner_reservation_destroy (resrc->twindow, &rsv); } size_ptr = zhash_next (resrc->reservtns); } diff --git a/resrc/resrc.h b/resrc/resrc.h index c9c6665a6..ab73beb8c 100644 --- a/resrc/resrc.h +++ b/resrc/resrc.h @@ -6,8 +6,9 @@ */ #include +#include "planner.h" -#define TIME_MAX INT64_MAX +#define TIME_MAX INT64_MAX - 10 typedef struct hwloc_topology * TOPOLOGY; typedef struct resrc resrc_t; @@ -88,22 +89,28 @@ size_t resrc_size (resrc_t *resrc); size_t resrc_available (resrc_t *resrc); /* - * Return the amount of the resource available at the given time + * Return 0 if the required amount of the resource is available at the given time; + * otehr -1. */ -size_t resrc_available_at_time (resrc_t *resrc, int64_t time); +int resrc_available_at_time (resrc_t *resrc, int64_t time, size_t reqrd_size); /* - * Return the least amount of the resource available during the time + * Return 0 if the required amount of the resource is available during the time * range */ -size_t resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, - int64_t range_endtime, bool exclusive); +int resrc_available_during_range (resrc_t *resrc, int64_t range_starttime, + int64_t range_endtime, size_t reqrd_size, bool exclusive); /* * Return the resource state as a string */ char* resrc_state (resrc_t *resrc); +/* + * Return twindow of planner_t type + */ +planner_t *resrc_twindow (resrc_t *resrc); + /* * Return the physical tree for the resouce */ diff --git a/resrc/resrc_flow.c b/resrc/resrc_flow.c index 2d3da3f2b..be73e3623 100644 --- a/resrc/resrc_flow.c +++ b/resrc/resrc_flow.c @@ -60,7 +60,7 @@ struct resrc_flow_list { * size_t staged; * zhash_t *allocs; * zhash_t *reservtns; - * zhash_t *twindow; + * planner_t *twindow; * * The resrc_flow structure therefore includes a flow_resrc resource, * independent from the associated resource, to hold all these values @@ -233,8 +233,8 @@ resrc_flow_t *resrc_flow_new_from_json (json_t *o, resrc_flow_t *parent) endtime = TIME_MAX; } - resrc_twindow_insert (resrc_flow->flow_resrc, "0", - starttime, endtime); + planner_reset (resrc_twindow (resrc_flow->flow_resrc), starttime, + endtime - starttime, NULL, 0); } } if (resrc) diff --git a/resrc/test/tresrc.c b/resrc/test/tresrc.c index 089f396b5..9a0726886 100644 --- a/resrc/test/tresrc.c +++ b/resrc/test/tresrc.c @@ -95,14 +95,12 @@ static int num_temporal_allocation_tests = 10; static void test_temporal_allocation () { int rc = 0; - size_t available; + int tmp = 0; resrc_t *resource = resrc_new_resource ("custom", "/test", "test", "test1", NULL, 1, NULL, 10); - available = resrc_available_at_time (resource, 0); - rc = (rc || !(available == 10)); - available = resrc_available_during_range (resource, 0, 1000, false); - rc = (rc || !(available == 10)); + rc = resrc_available_at_time (resource, 0, 10); + rc += resrc_available_during_range (resource, 0, 1000, 10, false); ok (!rc, "resrc_available...(time/range) on unallocated resource work"); // Setup the resource allocations for the rest of the tests @@ -128,30 +126,20 @@ static void test_temporal_allocation () // Test "available at time" // Job 1 - available = resrc_available_at_time (resource, 1); - rc = (rc || !(available == 5)); + rc = resrc_available_at_time (resource, 1, 5); // Jobs 1 & 3 - available = resrc_available_at_time (resource, 10); - rc = (rc || !(available == 4)); - available = resrc_available_at_time (resource, 500); - rc = (rc || !(available == 4)); - available = resrc_available_at_time (resource, 1000); - rc = (rc || !(available == 4)); + rc += resrc_available_at_time (resource, 10, 4); + rc += resrc_available_at_time (resource, 500, 4); + rc += resrc_available_at_time (resource, 1000, 4); // Job 3 - available = resrc_available_at_time (resource, 1500); - rc = (rc || !(available == 9)); - available = resrc_available_at_time (resource, 1999); - rc = (rc || !(available == 9)); + rc += resrc_available_at_time (resource, 1500, 9); + rc += resrc_available_at_time (resource, 1999, 9); // Job 2 - available = resrc_available_at_time (resource, 2000); - rc = (rc || !(available == 0)); - available = resrc_available_at_time (resource, 2500); - rc = (rc || !(available == 0)); - available = resrc_available_at_time (resource, 3000); - rc = (rc || !(available == 0)); + rc += (resrc_available_at_time (resource, 2000, 1) == -1)? 0: -1; + rc += (resrc_available_at_time (resource, 2500, 1) == -1)? 0: -1; + rc += (resrc_available_at_time (resource, 3000, 1) == -1)? 0: -1; // No Jobs - available = resrc_available_at_time (resource, 3001); - rc = (rc || !(available == 10)); + rc += resrc_available_at_time (resource, 3001, 10); ok (!rc, "resrc_available_at_time works"); if (rc) { return; @@ -160,71 +148,59 @@ static void test_temporal_allocation () // Test "available during range" // Range == job window (both edges are the same) - available = resrc_available_during_range (resource, 2000, 3000, false); - rc = (rc || !(available == 0)); - available = resrc_available_during_range (resource, 0, 1000, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 10, 1999, false); - rc = (rc || !(available == 4)); + tmp = resrc_available_during_range (resource, 2000, 3000, 1, false); + rc = (tmp == -1)? 0 : -1; + rc += resrc_available_during_range (resource, 0, 1000, 4, false); + rc += resrc_available_during_range (resource, 10, 1999, 4, false); ok (!rc, "resrc_available_during_range: range == job window works"); rc = 0; // Range is a subset of job window (no edges are the same) - available = resrc_available_during_range (resource, 4, 6, false); - rc = (rc || !(available == 5)); - available = resrc_available_during_range (resource, 20, 999, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 1001, 1998, false); - rc = (rc || !(available == 9)); - available = resrc_available_during_range (resource, 2500, 2600, false); - rc = (rc || !(available == 0)); + rc = resrc_available_during_range (resource, 4, 6, 5, false); + rc += resrc_available_during_range (resource, 20, 999, 4, false); + rc += resrc_available_during_range (resource, 1001, 1998, 9, false); + tmp = resrc_available_during_range (resource, 2500, 2600, 1, false); + rc += (tmp == -1)? 0: -1; ok (!rc, "resrc_available_during_range: range is a subset (no edges) works"); rc = 0; // Range is a subset of a job window (one edge is the same) - available = resrc_available_during_range (resource, 0, 999, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 10, 999, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 20, 1000, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 1001, 1999, false); - rc = (rc || !(available == 9)); - available = resrc_available_during_range (resource, 1001, 1999, false); - rc = (rc || !(available == 9)); + rc = resrc_available_during_range (resource, 0, 999, 4, false); + rc += resrc_available_during_range (resource, 10, 999, 4, false); + rc += resrc_available_during_range (resource, 20, 1000, 4, false); + rc += resrc_available_during_range (resource, 1001, 1999, 9, false); + rc += resrc_available_during_range (resource, 1001, 1999, 9, false); ok (!rc, "resrc_available_during_range: range is a subset (1 edge) works"); rc = 0; // Range overlaps 1 job window // (no edges are exactly equal) - available = resrc_available_during_range (resource, 2500, 4000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 2500, 4000, 1, false); + rc = (tmp == -1)? 0: -1; // (1 edge is exactly equal) - available = resrc_available_during_range (resource, 3000, 5000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 3000, 5000, 1, false); + rc += (tmp == -1)? 0: -1; ok (!rc, "resrc_available_during_range: range overlaps 1 job works"); rc = 0; // Range overlaps multiple job windows // (no edges are exactly equal) - available = resrc_available_during_range (resource, 100, 1500, false); - rc = (rc || !(available == 4)); - available = resrc_available_during_range (resource, 1500, 2500, false); - rc = (rc || !(available == 0)); + rc = resrc_available_during_range (resource, 100, 1500, 4, false); + tmp = resrc_available_during_range (resource, 1500, 2500, 1, false); + rc += (tmp == -1)? 0: -1; // (some edges are exactly equal) - available = resrc_available_during_range (resource, 1000, 2000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 1000, 2000, 1, false); + rc += (tmp == -1)? 0: -1; ok (!rc, "resrc_available_during_range: range overlaps multiple job works"); rc = 0; // Range overlaps all job windows (edges exactly equal) - available = resrc_available_during_range (resource, 0, 3000, false); - rc = (rc || !(available == 0)); - available = resrc_available_during_range (resource, 0, 2000, false); - rc = (rc || !(available == 0)); + tmp = resrc_available_during_range (resource, 0, 3000, 1, false); + rc = (tmp == -1)? 0: -1; + tmp = resrc_available_during_range (resource, 0, 2000, 1, false); + rc += (tmp == -1)? 0: -1; // Range overlaps no job windows - available = resrc_available_during_range (resource, 3001, 5000, false); - rc = (rc || !(available == 10)); + rc += resrc_available_during_range (resource, 3001, 5000, 10, false); ok (!rc, "resrc_available_during_range: range overlaps all job works"); resrc_resource_destroy (resource); @@ -442,6 +418,7 @@ int main (int argc, char *argv[]) resrc_flow_t *power_flow = NULL; resrc_flow_t *bw_flow = NULL; + plan (26); plan (26 + num_temporal_allocation_tests); test_temporal_allocation (); From 9f6906e8c8243d79cfcdd322c6c6047ab0dc0187 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Fri, 17 Feb 2017 12:00:28 -0800 Subject: [PATCH 5/6] resrc_reqst: carve out job-to-resrc_reqst logic into its own function Add get_resrc_reqst and move the logic to fill resrc_reqst to this function. Enhance readability of schedule_job which has grown somewhat large. More importantly, being ready to encode resource aggregate info without having to modify the main schedule_job function. --- sched/sched.c | 57 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/sched/sched.c b/sched/sched.c index 05699cb9b..32f4d0115 100644 --- a/sched/sched.c +++ b/sched/sched.c @@ -1389,28 +1389,11 @@ static int req_tpexec_run (flux_t *h, flux_lwj_t *job) * * *******************************************************************************/ -/* - * schedule_job() searches through all of the idle resources to - * satisfy a job's requirements. If enough resources are found, it - * proceeds to allocate those resources and update the kvs's lwj entry - * in preparation for job execution. If less resources - * are found than the job requires, and if the job asks to reserve - * resources, then those resources will be reserved. - */ -int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) +static resrc_reqst_t *get_resrc_reqst (flux_lwj_t *job, int64_t starttime, + int64_t *nreqrd) { json_t *req_res = NULL; - flux_t *h = ctx->h; - int rc = -1; - int64_t nfound = 0; - int64_t nreqrd = 0; resrc_reqst_t *resrc_reqst = NULL; - resrc_tree_t *found_tree = NULL; - resrc_tree_t *selected_tree = NULL; - struct sched_plugin *plugin = sched_plugin_get (ctx->loader); - - if (!plugin) - return rc; /* * Require at least one task per node, and @@ -1439,7 +1422,7 @@ int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) Jadd_str (req_res, "type", "node"); Jadd_int64 (req_res, "req_qty", job->req->nnodes); - nreqrd = job->req->nnodes; + *nreqrd = job->req->nnodes; /* Since nodes are requested, make sure we look for at * least one core on each node */ @@ -1467,7 +1450,7 @@ int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) } else if (job->req->ncores > 0) { Jadd_str (req_res, "type", "core"); Jadd_int (req_res, "req_qty", job->req->ncores); - nreqrd = job->req->ncores; + *nreqrd = job->req->ncores; Jadd_int64 (req_res, "req_size", 1); /* setting exclusive to true prevents multiple jobs per core */ @@ -1478,8 +1461,36 @@ int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) Jadd_int64 (req_res, "starttime", starttime); Jadd_int64 (req_res, "endtime", starttime + job->req->walltime); resrc_reqst = resrc_reqst_from_json (req_res, NULL); - Jput (req_res); - if (!resrc_reqst) + +done: + if (req_res) + Jput (req_res); + return resrc_reqst; +} + +/* + * schedule_job() searches through all of the idle resources to + * satisfy a job's requirements. If enough resources are found, it + * proceeds to allocate those resources and update the kvs's lwj entry + * in preparation for job execution. If less resources + * are found than the job requires, and if the job asks to reserve + * resources, then those resources will be reserved. + */ +int schedule_job (ssrvctx_t *ctx, flux_lwj_t *job, int64_t starttime) +{ + flux_t *h = ctx->h; + int rc = -1; + int64_t nfound = 0; + int64_t nreqrd = 0; + resrc_reqst_t *resrc_reqst = NULL; + resrc_tree_t *found_tree = NULL; + resrc_tree_t *selected_tree = NULL; + struct sched_plugin *plugin = sched_plugin_get (ctx->loader); + + if (!plugin) + return rc; + + if (!(resrc_reqst = get_resrc_reqst (job, starttime, &nreqrd))) goto done; if ((nfound = plugin->find_resources (h, ctx->rctx.root_resrc, From 419038d98e2c9a2871b92d779d87efedb5147d45 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Tue, 21 Feb 2017 18:11:18 -0800 Subject: [PATCH 6/6] resrc_reqst: add resurce aggregates request directly in resrc_reqst --- resrc/resrc_reqst.c | 40 ++++++++++++++++++++++++++++++++++++++++ sched/sched.c | 24 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/resrc/resrc_reqst.c b/resrc/resrc_reqst.c index 0e648e7da..02016d7ce 100644 --- a/resrc/resrc_reqst.c +++ b/resrc/resrc_reqst.c @@ -41,6 +41,11 @@ struct resrc_reqst_list { zlist_t *list; }; +struct subresrc_aggregate { + const char *type; + int64_t qty; +}; + struct resrc_reqst { resrc_reqst_t *parent; resrc_t *resrc; @@ -50,6 +55,7 @@ struct resrc_reqst { int64_t reqrd_qty; int64_t reqrd_size; int64_t nfound; + zlist_t *subresrcs; resrc_reqst_list_t *children; resrc_graph_req_t *g_reqs; }; @@ -246,6 +252,7 @@ resrc_reqst_t *resrc_reqst_new (resrc_t *resrc, int64_t qty, int64_t size, resrc_reqst->reqrd_qty = qty; resrc_reqst->reqrd_size = size; resrc_reqst->nfound = 0; + resrc_reqst->subresrcs = NULL; resrc_reqst->g_reqs = NULL; resrc_reqst->children = resrc_reqst_list_new (); } @@ -284,6 +291,35 @@ static resrc_graph_req_t *resrc_graph_req_from_json (json_t *ga) return NULL; } +static zlist_t *subresrc_aggregates_from_json (json_t *o) +{ + int64_t agg = -1; + zlist_t *zl = NULL; + struct subresrc_aggregate *subresrc = NULL; + + /* when other other resource types need to be supported + * the following needs to be extended + */ + if (Jget_int64 (o, "aggr_qty_node", &agg)) { + subresrc = xzmalloc (sizeof (*subresrc)); + subresrc->type = "node"; + subresrc->qty = agg; + zl = zlist_new (); + zlist_append (zl, subresrc); + zlist_freefn (zl, subresrc, free, false); + } + if (Jget_int64 (o, "aggr_qty_core", &agg)) { + subresrc = xzmalloc (sizeof (*subresrc)); + subresrc->type = "core"; + subresrc->qty = agg; + if (!zl) + zl = zlist_new (); + zlist_append (zl, subresrc); + zlist_freefn (zl, subresrc, free, false); + } + return zl; +} + resrc_reqst_t *resrc_reqst_from_json (json_t *o, resrc_reqst_t *parent) { bool exclusive = false; @@ -336,6 +372,8 @@ resrc_reqst_t *resrc_reqst_from_json (json_t *o, resrc_reqst_t *parent) resrc_reqst = resrc_reqst_new (resrc, qty, size, starttime, endtime, exclusive); + resrc_reqst->subresrcs = subresrc_aggregates_from_json (o); + if ((ga = Jobj_get (o, "graphs"))) resrc_reqst->g_reqs = resrc_graph_req_from_json (ga); @@ -365,6 +403,8 @@ void resrc_reqst_destroy (resrc_reqst_t *resrc_reqst) if (resrc_reqst) { if (resrc_reqst->parent) resrc_reqst_list_remove (resrc_reqst->parent->children, resrc_reqst); + if (resrc_reqst->subresrcs) + zlist_destroy (&(resrc_reqst->subresrcs)); resrc_reqst_list_destroy (resrc_reqst->children); resrc_resource_destroy (resrc_reqst->resrc); resrc_graph_req_destroy (resrc_reqst->g_reqs); diff --git a/sched/sched.c b/sched/sched.c index 32f4d0115..b2dfb48a0 100644 --- a/sched/sched.c +++ b/sched/sched.c @@ -1392,6 +1392,9 @@ static int req_tpexec_run (flux_t *h, flux_lwj_t *job) static resrc_reqst_t *get_resrc_reqst (flux_lwj_t *job, int64_t starttime, int64_t *nreqrd) { + int64_t cll_aggr_nnodes = 0; /* cluster-level nnodes request in aggregate */ + int64_t cll_aggr_ncores = 0; /* cluster-level ncores request in aggregate */ + json_t *req_cluster = NULL; json_t *req_res = NULL; resrc_reqst_t *resrc_reqst = NULL; @@ -1423,6 +1426,8 @@ static resrc_reqst_t *get_resrc_reqst (flux_lwj_t *job, int64_t starttime, Jadd_str (req_res, "type", "node"); Jadd_int64 (req_res, "req_qty", job->req->nnodes); *nreqrd = job->req->nnodes; + /* num of nodes required in aggregate at the cluster level */ + cll_aggr_nnodes = *nreqrd; /* Since nodes are requested, make sure we look for at * least one core on each node */ @@ -1430,6 +1435,12 @@ static resrc_reqst_t *get_resrc_reqst (flux_lwj_t *job, int64_t starttime, job->req->ncores = job->req->nnodes; job->req->corespernode = (job->req->ncores + job->req->nnodes - 1) / job->req->nnodes; + + /* num of cores required in aggregate at the cluster level */ + cll_aggr_ncores = job->req->corespernode * cll_aggr_nnodes; + + /* num of cores required in aggregate at the node level */ + Jadd_int64 (req_res, "aggr_qty_core", job->req->corespernode); if (job->req->node_exclusive) { Jadd_int64 (req_res, "req_size", 1); Jadd_bool (req_res, "exclusive", true); @@ -1451,6 +1462,8 @@ static resrc_reqst_t *get_resrc_reqst (flux_lwj_t *job, int64_t starttime, Jadd_str (req_res, "type", "core"); Jadd_int (req_res, "req_qty", job->req->ncores); *nreqrd = job->req->ncores; + /* num of cores required in aggregate at the cluster level */ + cll_aggr_ncores = *nreqrd; Jadd_int64 (req_res, "req_size", 1); /* setting exclusive to true prevents multiple jobs per core */ @@ -1460,6 +1473,17 @@ static resrc_reqst_t *get_resrc_reqst (flux_lwj_t *job, int64_t starttime, Jadd_int64 (req_res, "starttime", starttime); Jadd_int64 (req_res, "endtime", starttime + job->req->walltime); + + /* Add cluster and encode encode nnodes and ncores requests in aggregate */ + req_cluster = Jnew (); + Jadd_str (req_cluster, "type", "cluster"); + Jadd_int64 (req_cluster, "req_qty", 1); + Jadd_int64 (req_cluster, "aggr_qty_node", cll_aggr_nnodes); + Jadd_int64 (req_cluster, "aggr_qty_core", cll_aggr_ncores); + Jadd_int64 (req_cluster, "starttime", starttime); + Jadd_int64 (req_cluster, "endtime", starttime + job->req->walltime); + json_object_set_new (req_cluster, "req_child", req_res); + resrc_reqst = resrc_reqst_from_json (req_res, NULL); done: