diff --git a/tempesta.sh b/tempesta.sh index fe9ee67f71..14253d0c3a 100755 --- a/tempesta.sh +++ b/tempesta.sh @@ -7,6 +7,7 @@ SSOCKET=sync_socket TDB=tempesta_db TFW=tempesta_fw +SCHED=tfw_sched_dummy TFW_ROOT=`pwd`/$TFW TFW_CACHE_SIZE=`expr 256 \* 1024` TFW_CACHE_PATH=$TFW_ROOT/cache @@ -44,6 +45,9 @@ start() cache_path="$TFW_CACHE_PATH" [ $? -ne 0 ] && error "cannot load tempesta module" + insmod $TFW_ROOT/sched/$SCHED.ko + [ $? -ne 0 ] && error "cannot load scheduler module" + sysctl --load=tempesta.sysctl.conf [ $? -ne 0 ] && error "cannot apply configuration via sysctl" diff --git a/tempesta_fw/Makefile b/tempesta_fw/Makefile index c28513bfdd..17064bc290 100644 --- a/tempesta_fw/Makefile +++ b/tempesta_fw/Makefile @@ -22,6 +22,6 @@ obj-m = tempesta_fw.o tempesta_fw-objs = cache.o classifier.o client.o connection.o filter.o gfsm.o \ http.o http_msg.o http_parser.o if.o lib.o main.o pool.o \ sched.o server.o session.o sock_backend.o sock_frontend.o \ - stress.o + stress.o debugfs.o obj-m += log/ classifier/ stress/ sched/ filter/ diff --git a/tempesta_fw/debugfs.c b/tempesta_fw/debugfs.c new file mode 100644 index 0000000000..9e44363549 --- /dev/null +++ b/tempesta_fw/debugfs.c @@ -0,0 +1,369 @@ +/** + * Tempesta FW + * + * Copyright (C) 2012-2014 NatSys Lab. (info@natsys-lab.com). + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include + +#include "debugfs.h" +#include "log.h" +#include "tempesta.h" + + +#ifdef DEBUG + +/* The root directory for the Tempesta debugfs module. + * All paths are referenced relative to this root. */ +struct dentry *tfw_debugfs_root; + +/* Name of the directory which is created in the debugfs upon initialization. */ +#define TFW_DEBUGFS_ROOT_NAME "tempesta" + +/** + * Initialize the debugfs wrapper by creating a directory in the debugfs + * where all files owned by the Tempesta FW are hosted. + */ +int +tfw_debugfs_init(void) +{ + int ret = 0; + + tfw_debugfs_root = debugfs_create_dir(TFW_DEBUGFS_ROOT_NAME, NULL); + + if (IS_ERR_OR_NULL(tfw_debugfs_root)) { + ret = (int)PTR_ERR(tfw_debugfs_root); + TFW_WARN("Can't create debugfs directory (%d)\n", ret); + } + + return ret; +} + +/** + * Delete the Tempesta's debugfs directory with all files recursively. + */ +void +tfw_debugfs_exit(void) +{ + if (tfw_debugfs_root) { + debugfs_remove_recursive(tfw_debugfs_root); + tfw_debugfs_root = NULL; + } +} + +/** + * Look up for a file (or directory) in VFS. + */ +static struct dentry * +lookup_file(const char *path, struct dentry *parent) +{ + int ret; + struct path p; + + BUG_ON(!parent); + ret = vfs_path_lookup(parent, NULL, path, 0, &p); + + return (ret ? NULL : p.dentry); +} + +/** + * Create a file in the debugfs, also create parent directories if needed and + * remove the old file if it exists. + * + * @param path Path to a file to be created. + * The path is always treated relative to the Tempesta root + * directory in the debugfs (see tfw_debugfs_root). + * @param data A pointer to some data which is saved in 'file' and 'inode' + * structures. It may be retrieved by any function in @fops as + * file->private_data or file->f_inode->i_private (it is copied + * into both places). + * @param fops A set of functions that handle system calls on the created file. + * + * + * The function creates a file in the debugfs, but does it in a robust way: + * - the file is replaced if it already exists + * - all parent directories are created if they don't exist + * + * Returns: An ERR_PTR if the file is not created. + */ +static struct dentry * +create_with_parents(const char *path, void *data, + const struct file_operations *fops) +{ + size_t name_size; + char *buf, *pos, *component; + struct dentry *parent, *child; + + /* Copy the path to a temporary buffer where it can be modified. */ + name_size = strlen(path) + 1; + buf = kmalloc(name_size, GFP_KERNEL); + BUG_ON(ZERO_OR_NULL_PTR(buf)); + strlcpy(buf, path, name_size); + + /* Eat the leading slash to allow specify either /foo/bar or foo/bar */ + pos = buf; + if (*pos == '/') + ++pos; + + /* Walk over the path and create non-existing directories. */ + parent = tfw_debugfs_root; + component = pos; + do { + if (*pos != '/') + continue; + + *pos = '\0'; + child = lookup_file(component, parent); + if (!child) { + child = debugfs_create_dir(component, parent); + BUG_ON(!parent); + } + parent = child; + component = pos + 1; + } while (*(++pos)); + + /* Remove the file if it already exists. */ + child = lookup_file(component, parent); + if (child) { + TFW_DBG("Removing already existing debugfs file: %s\n", path); + debugfs_remove(child); + } + + /* Create the actual file. */ + child = debugfs_create_file(component, S_IRWXU, parent, data, fops); + if (IS_ERR_OR_NULL(child)) { + int err = PTR_ERR(child); + TFW_WARN("Can't create debugfs file: %s (%d)\n", path, err); + } else { + TFW_DBG("Created debugfs file: %s\n", path); + } + + kfree(buf); + + return child; +} + +/** + * A state maintained between open() and release() calls. + * + * Usage of this structure depens on the data direction: + * + * 1. If a file is open()'ed for reading (@is_input is false): + * - open() allocates the @buf and invokes a callback that writes data to it. + * - read() just copies data from the @buf to the user-space. + * - write() returns an error. + * - close() releases all the allocated memory. + * + * 2. If a file is open()'ed for writing (@is_input is true): + * - open() allocates the @buf + * - read() returns an error. + * - write() copies data from user-space to the @buf. + * - close() invokes a callback passing the @buf to it and releases the memory. + * + * The file may be open()'ed only for either reading or writing but not both. + */ +typedef struct { + bool is_input; + int len; + int buf_size; + char *buf; +} TfwDebugfsIoState; + +static int +fop_open(struct inode *inode, struct file *file) +{ + fmode_t mode = file->f_mode; + tfw_debugfs_handler_t fn = file->f_inode->i_private; + TfwDebugfsIoState *state; + + if ((mode & FMODE_READ) && (mode & FMODE_WRITE)) { + TFW_ERR("This debugfs file can't be opened in read-write mode"); + return -EPERM; + } + + state = kzalloc(sizeof(*state), GFP_KERNEL); + BUG_ON(!state); + + /* Simply allocate a fixed-size buffer. The buffer doesn't expand, the + * data is cropped if it doesn't fit to it. Later on we may change it */ + state->buf_size = PAGE_SIZE; + state->buf = kmalloc(state->buf_size, GFP_KERNEL); + BUG_ON(!state->buf); + + if (mode & FMODE_WRITE) + state->is_input = true; + else + state->len = fn(state->is_input, state->buf, state->buf_size); + + file->private_data = state; + + return 0; +} + +static ssize_t +fop_read(struct file *file, char __user *user_buf, size_t count, + loff_t *ppos) +{ + TfwDebugfsIoState *state = file->private_data; + + if (state->is_input) { + TFW_ERR("Can't read this debugfs file: " + "it was open()'ed only for writing\n"); + return -EPERM; + } + + if (state->len < 0) + return state->len; + + return simple_read_from_buffer(user_buf, count, ppos, + state->buf, + state->len); +} + +static ssize_t +fop_write(struct file *file, const char __user *user_buf, + size_t count, + loff_t *ppos) +{ + int len; + TfwDebugfsIoState *state = file->private_data; + + if (!state->is_input) { + TFW_ERR("Can't write to this debugfs file: " + "it was open()'ed only for reading\n"); + return -EPERM; + } + + /* copy data from user-space */ + len = simple_write_to_buffer(state->buf, (state->buf_size - 1), + ppos, + user_buf, count); + if (len > 0) { + state->len += len; + state->buf[state->len] = '\0'; + } + + return len; +} + +static int +fop_release(struct inode *inode, struct file *file) +{ + int ret = 0; + tfw_debugfs_handler_t fn = file->f_inode->i_private; + TfwDebugfsIoState *state = file->private_data; + + if (state->is_input) + ret = fn(state->is_input, state->buf, state->len); + + kfree(state->buf); + kfree(state); + + return ret; +} + +/** + * Create a file in debugfs and bind a function with it. + * + * @path Path to of a file to be created. + * The path is always treated as relative to tempesta root directory + * in the debugfs. The file may exist, in this case it will be replaced. + * Parent directories may not exist (they will be created automatically). + * @fn A function that handles I/O (that is either receives or sends data to + * user-space). + * + * This function allows to bind a file (that may be read/written in user-space) + * with a function (that works in kernel-space). + * + * Consider the following example: + * + * int my_io_handler(bool is_input, char *buf, size_t size) + * { + * if (is_input) { + * printk("got input data from user-space: %.*s", size, buf); + * return 0; + * } else { + * return snprintf("hello from kernel-space!\n"); + * } + * } + * + * tfw_debugfs_bind("/foo/bar", my_io_handler); + * + * The code binds the file /foo/bar with the function my_io_handler() and allows + * to interact with the kernel function from a user-space shell like this: + * $ cat /sys/kernel/debug/tempesta/foo/bar + * hello from kernel-space! + * $ echo "hi from user-space" > /sys/kernel/debug/tempesta/foo/bar + * $ dmesg | head -n1 + * got input data from user-space: hi from user-space + * + * When the file is open()'ed in read mode, the my_io_handler() is called with + * is_input=false. In this case the function is requested to put some data to + * the buffer. The buffer then is saved and returned to user-space during + * subsequent read() calls until the file is close()'d. + * + * When the file is open()'ed in write mode, the data is accumulated in the + * buffer during write() calls. When the file is close()'d, the my_io_handler() + * is called to take the data received from user-space. + * + * Therefore, the handler function doesn't need to care about streaming, it + * receives or sends the data "atomically" with a single big chunk. + * This approach simplifies your code (no need to preserve state between calls), + * but it is not very effective, so don't use it for large amounts of data. + * + * @return A status code: 0 if the file is created, -1 otherwise. + */ +int +tfw_debugfs_bind(const char *path, tfw_debugfs_handler_t fn) +{ + static const struct file_operations fops = { + .llseek = default_llseek, + .open = fop_open, + .read = fop_read, + .write = fop_write, + .release = fop_release, + }; + struct dentry *d; + + BUG_ON(!path || !fn); + d = create_with_parents(path, fn, &fops); + + return IS_ERR_OR_NULL(d) ? -1 : 0; +} +EXPORT_SYMBOL(tfw_debugfs_bind); + +#else /* ifdef DEBUG */ + +int tfw_debugfs_bind(const char *path, tfw_debugfs_handler_t handler_fn) +{ + return 0; +} +EXPORT_SYMBOL(tfw_debugfs_bind); + +int tfw_debugfs_init(void) +{ + return 0; +} + +void tfw_debugfs_exit(void) +{ +} + +#endif /* ifndef DEBUG */ diff --git a/tempesta_fw/debugfs.h b/tempesta_fw/debugfs.h new file mode 100644 index 0000000000..4ab66eb928 --- /dev/null +++ b/tempesta_fw/debugfs.h @@ -0,0 +1,32 @@ +/** + * Tempesta FW + * + * Copyright (C) 2012-2014 NatSys Lab. (info@natsys-lab.com). + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef __TFW_DEBUGFS_H__ +#define __TFW_DEBUGFS_H__ + +typedef int (*tfw_debugfs_handler_t)(bool is_input, char *buf, size_t size); + +int tfw_debugfs_bind(const char *path, tfw_debugfs_handler_t handler_fn); + +int tfw_debugfs_init(void); +void tfw_debugfs_exit(void); + + +#endif /* __TFW_DEBUGFS_H__ */ + diff --git a/tempesta_fw/main.c b/tempesta_fw/main.c index 537a102486..f0c4866f45 100644 --- a/tempesta_fw/main.c +++ b/tempesta_fw/main.c @@ -28,6 +28,7 @@ #include "http.h" #include "log.h" #include "server.h" +#include "debugfs.h" MODULE_AUTHOR(TFW_AUTHOR); MODULE_DESCRIPTION("Tempesta FW"); @@ -47,6 +48,9 @@ MODULE_PARM_DESC(cache_path, "Path to cache directory"); int tfw_connection_init(void); void tfw_connection_exit(void); +int tfw_sched_dummy_init(void); +void tfw_sched_dummy_exit(void); + static int __init tfw_init(void) { @@ -63,6 +67,10 @@ tfw_init(void) if (r) return r; + r = tfw_debugfs_init(); + if (r) + goto err_debugfs; + r = tfw_cache_init(); if (r) goto err_cache; @@ -104,6 +112,8 @@ tfw_init(void) err_http: tfw_cache_exit(); err_cache: + tfw_debugfs_exit(); +err_debugfs: tfw_if_exit(); return r; diff --git a/tempesta_fw/sched.c b/tempesta_fw/sched.c index 238908cfef..cf9fe0e5bb 100644 --- a/tempesta_fw/sched.c +++ b/tempesta_fw/sched.c @@ -24,59 +24,74 @@ #include "log.h" #include "sched.h" -static TfwScheduler *sched = NULL; -static rwlock_t tfw_sched_lock = __RW_LOCK_UNLOCKED(tfw_sched_lock); -/* TODO the TfwServer structures must be handled by scheduler modules. */ -static TfwServer *dummy_srv = NULL; +static TfwScheduler *tfw_sched = NULL; -int -tfw_sched_add_srv(TfwServer *srv) + +TfwServer * +tfw_sched_get_srv(TfwMsg *msg) { - dummy_srv = srv; - TFW_DBG("Added new server %p\n", dummy_srv); - return 0; + BUG_ON(!msg); + BUG_ON(!tfw_sched); + + return tfw_sched->get_srv(msg); } int -tfw_sched_del_srv(TfwServer *srv) +tfw_sched_add_srv(TfwServer *srv) { - BUG_ON(srv != dummy_srv); - dummy_srv = NULL; + int ret; + + BUG_ON(!srv); + BUG_ON(!tfw_sched); + + ret = tfw_sched->add_srv(srv); + if (ret) + TFW_ERR("Can't add a server to the scheduler (%d)\n", ret); - return 0; + return ret; } -TfwServer * -tfw_sched_get_srv(TfwMsg *msg) +int +tfw_sched_del_srv(TfwServer *srv) { - /* TODO call scheduling module if we have any registered. */ + int ret; + + BUG_ON(!srv); + BUG_ON(!tfw_sched); - return dummy_srv; + ret = tfw_sched->del_srv(srv); + if (ret) + TFW_ERR("Can't remove a server from the scheduler (%d)\n", ret); + + return ret; } int tfw_sched_register(TfwScheduler *mod) { - write_lock(&tfw_sched_lock); - if (sched) { - write_unlock(&tfw_sched_lock); - TFW_ERR("Can't register a scheduler - there is already one" - " registered\n"); - return -1; + BUG_ON(!mod); + BUG_ON(!mod->name || !mod->get_srv || !mod->add_srv || !mod->del_srv); + + TFW_LOG("Registering new scheduler: %s\n", mod->name); + + if (!tfw_sched) { + tfw_sched = mod; + return 0; } - sched = mod; - write_unlock(&tfw_sched_lock); - return 0; + TFW_ERR("Can't register a scheduler - the '%s' is already registered\n", + tfw_sched->name); + return -EEXIST; } EXPORT_SYMBOL(tfw_sched_register); void tfw_sched_unregister(void) { - write_lock(&tfw_sched_lock); - sched = NULL; - write_unlock(&tfw_sched_lock); + BUG_ON(!tfw_sched); + + TFW_LOG("Un-registering scheduler: %s\n", tfw_sched->name); + tfw_sched = NULL; } EXPORT_SYMBOL(tfw_sched_unregister); diff --git a/tempesta_fw/sched.h b/tempesta_fw/sched.h index 365a55ad56..9a376f707e 100644 --- a/tempesta_fw/sched.h +++ b/tempesta_fw/sched.h @@ -23,6 +23,7 @@ #ifndef __TFW_SCHED_H__ #define __TFW_SCHED_H__ +#include "tempesta.h" #include "msg.h" #include "server.h" @@ -35,14 +36,20 @@ */ typedef struct { + const char *name; + + TfwServer * (*get_srv)(TfwMsg *msg); + int (*add_srv)(TfwServer *srv); + int (*del_srv)(TfwServer *srv); } TfwScheduler; +TfwServer *tfw_sched_get_srv(TfwMsg *msg); int tfw_sched_add_srv(TfwServer *srv); int tfw_sched_del_srv(TfwServer *srv); -TfwServer *tfw_sched_get_srv(TfwMsg *msg); -extern int tfw_sched_register(TfwScheduler *mod); -extern void tfw_sched_unregister(void); +int tfw_sched_register(TfwScheduler *mod); +void tfw_sched_unregister(void); + #endif /* __TFW_SCHED_H__ */ diff --git a/tempesta_fw/sched/Makefile b/tempesta_fw/sched/Makefile index ed54cd3925..84a90133d0 100644 --- a/tempesta_fw/sched/Makefile +++ b/tempesta_fw/sched/Makefile @@ -16,14 +16,7 @@ # this program; if not, write to the Free Software Foundation, Inc., 59 # Temple Place - Suite 330, Boston, MA 02111-1307, USA. -EXTRA_CFLAGS = # -DDEBUG +EXTRA_CFLAGS = -I$(src)/../../tempesta_db -I$(src)/../../sync_socket -DDEBUG -#obj-m = -#tempesta_http-objs = - -#all: -# make -C ../../linux-$(KVERSION) M=$(PWD) modules - -#clean: -# make -C ../../linux-$(KVERSION) M=$(PWD) clean +obj-m = tfw_sched_rr.o tfw_sched_dummy.o diff --git a/tempesta_fw/sched/tfw_sched_dummy.c b/tempesta_fw/sched/tfw_sched_dummy.c new file mode 100644 index 0000000000..6e2a5f98cf --- /dev/null +++ b/tempesta_fw/sched/tfw_sched_dummy.c @@ -0,0 +1,84 @@ +/** + * Tempesta FW + * + * Copyright (C) 2012-2014 NatSys Lab. (info@natsys-lab.com). + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include + +#include "../log.h" +#include "../sched.h" + +MODULE_AUTHOR(TFW_AUTHOR); +MODULE_DESCRIPTION("Tempesta dummy scheduler"); +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL"); + +/* The only single server is supported by the dummy scheduler. */ +static TfwServer *dummy_srv = NULL; + +TfwServer * +tfw_sched_dummy_get_srv(TfwMsg *msg) +{ + return dummy_srv; +} + +int +tfw_sched_dummy_add_srv(TfwServer *srv) +{ + if (srv && dummy_srv) + TFW_WARN("Can't add multiple servers to the dummy scheduler," + "so only the most recently added server is used\n"); + + dummy_srv = srv; + + return 0; +} + +int +tfw_sched_dummy_del_srv(TfwServer *srv) +{ + if (srv != dummy_srv) { + TFW_WARN("Can't remove the server from the dummy scheduler\n"); + return -ENOENT; + } else { + dummy_srv = NULL; + return 0; + } +} + +int +tfw_sched_dummy_init(void) +{ + static TfwScheduler tfw_sched_dummy_mod = { + .name = "dummy", + .get_srv = tfw_sched_dummy_get_srv, + .add_srv = tfw_sched_dummy_add_srv, + .del_srv = tfw_sched_dummy_del_srv + }; + + return tfw_sched_register(&tfw_sched_dummy_mod); +} +module_init(tfw_sched_dummy_init); + +void +tfw_sched_dummy_exit(void) +{ + dummy_srv = NULL; + + tfw_sched_unregister(); +} +module_exit(tfw_sched_dummy_exit); diff --git a/tempesta_fw/sched/tfw_sched_rr.c b/tempesta_fw/sched/tfw_sched_rr.c new file mode 100644 index 0000000000..d8f79b3512 --- /dev/null +++ b/tempesta_fw/sched/tfw_sched_rr.c @@ -0,0 +1,273 @@ +/** + * Tempesta FW + * + * Copyright (C) 2012-2014 NatSys Lab. (info@natsys-lab.com). + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include +#include + +#include "../debugfs.h" +#include "../lib.h" +#include "../sched.h" + +MODULE_AUTHOR(TFW_AUTHOR); +MODULE_DESCRIPTION("Tempesta round-robin scheduler"); +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL"); + +#define RR_BANNER "tfw_sched_rr: " +#define RR_ERR(...) TFW_ERR(RR_BANNER __VA_ARGS__) +#define RR_LOG(...) TFW_LOG(RR_BANNER __VA_ARGS__) + +/** + * The memory for servers list is allocated statically, so this is a maximum + * number of servers that may be added for scheduling in this module. + */ +#define RR_MAX_SERVERS_N 64 + +/** + * The structure represents a list of servers read in a round-robin fashion. + * + * The @counter is incremented on each get() call and the "current" server is + * obtained as servers[counter % servers_n]. + * This approach is chosen (instead of storing an index of the current server) + * for optimization purposes: it allows sequential reading from the array + * without synchronization between readers. + */ +typedef struct { + unsigned int servers_n; + unsigned int counter; + TfwServer *servers[RR_MAX_SERVERS_N]; +} RrSrvList; + +/** + * There is a total of NR_CPUS identical copies of the RrSrvList. + * Each CPU has its own copy stored in its local memory. + * The effects are following: + * 1. get() operates faster on because: + * - Counter is not shared among CPUs, so the cache line bouncing is reduced. + * - Access to the local memory is faster on NUMA systems. + * 2. add()/del() work slower because they need to update all copies. + * 3. Each CPU has its own "current" server independent from other CPUs. + */ +static DEFINE_PER_CPU(RrSrvList, rr_srv_list) = { + .servers_n = 0, + .counter = 0, + .servers = { NULL } +}; + +/** + * The lock is needed only to synchronize writers (add() and del() methods). + * The get() doesn't require it. + */ +static DEFINE_SPINLOCK(rr_write_lock); + + +/** + * On each subsequent call the function returns the next element from the + * list of servers added to the scheduler. + * + * This function is called for each incoming HTTP request, so it is relatively + * performance critical. Therefore, a bunch of optimizations is used to reduce + * the overhead and that imposes certain effects: + * + * - Each CPU has its own "current" server, so the sequence of servers is + * broken when you switch to another CPU. On average, messages are still + * distributed equally across the servers, but you can't rely on the order. + * + * - The function intended for use from a softirq context, it uses per-CPU + * variables and busy-waiting without disabling the preemption or perform + * any locking (although it is safe to call it with preemption enabled - you + * get a wasted CPU time slice in a worst case). + * + * - The function assumes that add() doesn't add NULL elements to the list + * and del() sets deleted elements to NULL. If this invariant is violated, + * it may lock up the system or return a pointer to deleted server. + */ +static TfwServer * +tfw_sched_rr_get_srv(TfwMsg *msg) +{ + unsigned int n; + TfwServer *srv; + RrSrvList *lst = &__get_cpu_var(rr_srv_list); + + do { + n = lst->servers_n; + n |= !n; /* Return the first element if n=0 (has to be NULL). */ + srv = lst->servers[lst->counter++ % n]; + } while (unlikely(n > 1 && !srv)); + + return srv; +} + +static int +get_servers_n(void) +{ + return __get_cpu_var(rr_srv_list).servers_n; +} + +static int +find_server_idx(TfwServer *srv) +{ + int i; + RrSrvList *lst = &__get_cpu_var(rr_srv_list); + for (i = 0; i < lst->servers_n; ++i) { + if (lst->servers[i] == srv) + return i; + } + + return -1; +} + +/** + * Add a server to the end of the round-robin list. + * + * Returns: + * Zero if the server is added. + * ENOMEM if there is no room for the server in the statically allocated array. + * EEXIST if the given pointer is already present in the list. + */ +static int +tfw_sched_rr_add_srv(TfwServer *srv) +{ + int ret = 0; + int cpu; + RrSrvList *lst; + + BUG_ON(!srv); + + spin_lock_bh(&rr_write_lock); + if (get_servers_n() >= RR_MAX_SERVERS_N) { + RR_ERR("Can't add a server to the scheduler - " + "the maximum number of servers (%d) is reached\n", + RR_MAX_SERVERS_N); + ret = -ENOMEM; + } else if (find_server_idx(srv) >= 0) { + RR_ERR("Can't add the server to the scheduler - " + "it is already present in the servers list\n"); + ret = -EEXIST; + } else { + for_each_possible_cpu(cpu) { + lst = &per_cpu(rr_srv_list, cpu); + lst->servers[lst->servers_n] = srv; + ++lst->servers_n; + } + } + spin_unlock_bh(&rr_write_lock); + + return ret; +} + +/** + * Delete a given server from the round-robin list. + * + * The function deletes an element by replacing it with the last element in the + * array and then deleting this last element. This is fast, but it changes the + * order of servers. + * + * Returns zero on success or ENOENT if the serve is not found in the list. + */ +static int +tfw_sched_rr_del_srv(TfwServer *srv) +{ + int ret = 0; + int i, cpu; + RrSrvList *lst; + + spin_lock_bh(&rr_write_lock); + i = find_server_idx(srv); + if (i < 0) { + RR_ERR("Can't delete the server from the scheduler - " + " it is not found in the servers list\n"); + ret = -ENOENT; + } else { + for_each_possible_cpu(cpu) { + lst = &per_cpu(rr_srv_list, cpu); + lst->servers[i] = lst->servers[lst->servers_n - 1]; + --lst->servers_n; + lst->servers[lst->servers_n] = NULL; + } + } + spin_unlock_bh(&rr_write_lock); + + return ret; +} + +static int +tfw_sched_rr_debugfs_hook(bool input, char *buf, size_t size) +{ + int pos = 0; + int cpu, i; + RrSrvList *my, *this; + + /* Turn the current server on write(). */ + if (input) { + tfw_sched_rr_get_srv(NULL); + return 0; + } + + spin_lock_bh(&rr_write_lock); + + /* Dump the servers list on read(). */ + my = &__get_cpu_var(rr_srv_list); + pos += snprintf(buf + pos, size - pos, "servers: %d, counter: %d\n", + my->servers_n, my->counter); + + for (i = 0; i < my->servers_n; ++i) { + char mark = (i == (my->counter % my->servers_n)) ? '>' : ' '; + char srv_str[TFW_MAX_SERVER_STR_SIZE]; + + tfw_server_snprint(my->servers[i], srv_str, sizeof(srv_str)); + pos += snprintf(buf + pos, size - pos, "%c%s\n", mark, srv_str); + } + + for_each_possible_cpu(cpu) { + this = &per_cpu(rr_srv_list, cpu); + BUG_ON(my->servers_n != this->servers_n); + BUG_ON(memcmp(my->servers, this->servers, sizeof(my->servers))); + } + + spin_unlock_bh(&rr_write_lock); + + return pos; +} + +int +tfw_sched_rr_init(void) +{ + static TfwScheduler tfw_sched_rr_mod = { + .name = "round-robin", + .get_srv = tfw_sched_rr_get_srv, + .add_srv = tfw_sched_rr_add_srv, + .del_srv = tfw_sched_rr_del_srv + }; + + RR_LOG("init\n"); + + tfw_debugfs_bind("/sched/rr/state", tfw_sched_rr_debugfs_hook); + + return tfw_sched_register(&tfw_sched_rr_mod); +} +module_init(tfw_sched_rr_init); + +void +tfw_sched_rr_exit(void) +{ + tfw_sched_unregister(); +} +module_exit(tfw_sched_rr_exit); + diff --git a/tempesta_fw/server.c b/tempesta_fw/server.c index c854261e25..646ab0f0f1 100644 --- a/tempesta_fw/server.c +++ b/tempesta_fw/server.c @@ -22,11 +22,11 @@ #include #include "connection.h" +#include "lib.h" #include "log.h" #include "sched.h" #include "server.h" -static TfwServer *be_srv; static struct kmem_cache *srv_cache; void @@ -50,8 +50,6 @@ tfw_destroy_server(struct sock *s) srv->sock = NULL; conn->hndl = NULL; - - be_srv = NULL; /* FIXME clear the server references from all current sessions. */ #if 0 @@ -74,13 +72,27 @@ tfw_create_server(struct sock *s) return NULL; } - /* TODO Only one back-end server is supported yet. */ - BUG_ON(be_srv); - be_srv = srv; - return srv; } +int tfw_server_snprint(const TfwServer *srv, char *buf, size_t buf_size) +{ + TfwAddr addr; + int len = sizeof(addr); + char addr_str_buf[MAX_ADDR_LEN]; + + BUG_ON(!srv || !buf || !buf_size); + + memset(&addr, 0, sizeof(addr)); + kernel_getpeername(srv->sock->sk_socket, &addr.addr, &len); + tfw_inet_ntop(&addr, addr_str_buf); + + len = snprintf(buf, buf_size, "srv %p: %s", srv, addr_str_buf); + + return len; +} +EXPORT_SYMBOL(tfw_server_snprint); + int __init tfw_server_init(void) { diff --git a/tempesta_fw/server.h b/tempesta_fw/server.h index c7da42990b..6f4d0ecdae 100644 --- a/tempesta_fw/server.h +++ b/tempesta_fw/server.h @@ -22,6 +22,8 @@ #include +#define TFW_MAX_SERVER_STR_SIZE 100 + typedef struct { /* The server current stress (overloading) value. */ int stress; @@ -32,6 +34,8 @@ typedef struct { TfwServer *tfw_create_server(struct sock *s); void tfw_destroy_server(struct sock *s); +int tfw_server_snprint(const TfwServer *srv, char *buf, size_t buf_size); + int tfw_server_init(void); void tfw_server_exit(void); diff --git a/tempesta_fw/tempesta.h b/tempesta_fw/tempesta.h index 0a1ab48b0a..4dc3a77abb 100644 --- a/tempesta_fw/tempesta.h +++ b/tempesta_fw/tempesta.h @@ -42,6 +42,7 @@ typedef union { struct sockaddr_in v4; struct sockaddr_in6 v6; + struct sockaddr addr; } TfwAddr; typedef struct {