diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..9e921aa1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +# Object files +*.o +*.ko +*.obj +*.elf +*.lo + +# Temporary files +*~ +*.tmp + +# Libraries +*.lib +*.a + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# ignore byproducts of autotools (bootstrap,configure) +*.log +*.patch +*.in +*.m4 +*.guess +*.status +*.sub diff --git a/INSTALL b/INSTALL new file mode 100644 index 00000000..007e9396 --- /dev/null +++ b/INSTALL @@ -0,0 +1,370 @@ +Installation Instructions +************************* + +Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation, +Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell commands `./configure; make; make install' should +configure, build, and install this package. The following +more-detailed instructions are generic; see the `README' file for +instructions specific to this package. Some packages provide this +`INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. Caching is +disabled by default to prevent problems with accidental use of stale +cache files. + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You need `configure.ac' if +you want to change it or regenerate `configure' using a newer version +of `autoconf'. + + The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. + + Running `configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type `make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the `make install' phase executed with root + privileges. + + 5. Optionally, type `make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior `make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type `make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide `make + distcheck', which can by used by developers to test that all other + targets like `make install' and `make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. Run `./configure --help' +for details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. This +is known as a "VPATH" build. + + With a non-GNU `make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use `make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple `-arch' options to the +compiler but only a single `-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the `lipo' tool if you have problems. + +Installation Names +================== + + By default, `make install' installs the package's commands under +`/usr/local/bin', include files under `/usr/local/include', etc. You +can specify an installation prefix other than `/usr/local' by giving +`configure' the option `--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option `--exec-prefix=PREFIX' to `configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the +default for these options is expressed in terms of `${prefix}', so that +specifying just `--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to `configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +`make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, `make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +`${prefix}'. Any directories that were specified during `configure', +but not in terms of `${prefix}', must each be overridden at install +time for the entire installation to be relocated. The approach of +makefile variable overrides for each directory variable is required by +the GNU Coding Standards, and ideally causes no recompilation. +However, some platforms have known limitations with the semantics of +shared libraries that end up requiring recompilation when using this +method, particularly noticeable in packages that use GNU Libtool. + + The second method involves providing the `DESTDIR' variable. For +example, `make install DESTDIR=/alternate/directory' will prepend +`/alternate/directory' before all installation names. The approach of +`DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of `${prefix}' +at `configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + + Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of `make' will be. For these packages, running `./configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with `make V=1'; while running `./configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with `make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU +CC is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + HP-UX `make' updates targets which have the same time stamps as +their prerequisites, which makes it generally unusable when shipped +generated files such as `configure' are involved. Use GNU `make' +instead. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its `' header file. The option `-nodtk' can be used as +a workaround. If GNU CC is not installed, it is therefore recommended +to try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put `/usr/ucb' early in your `PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in `/usr/bin'. So, if you need `/usr/ucb' +in your `PATH', put it _after_ `/usr/bin'. + + On Haiku, software installed for all users goes in `/boot/common', +not `/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features `configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, `configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option `--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for `CONFIG_SHELL' due to +an Autoconf limitation. Until the limitation is lifted, you can use +this workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +`configure' Invocation +====================== + + `configure' recognizes the following options to control how it +operates. + +`--help' +`-h' + Print a summary of all of the options to `configure', and exit. + +`--help=short' +`--help=recursive' + Print a summary of the options unique to this package's + `configure', and exit. The `short' variant lists options used + only in the top level, while the `recursive' variant lists options + also present in any nested packages. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: + for more details, including other options available for fine-tuning + the installation locations. + +`--no-create' +`-n' + Run the configure checks, but stop before creating any output + files. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..9020cc37 --- /dev/null +++ b/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2014, OpenFP +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the {organization} nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 00000000..cb421896 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,94 @@ +ACLOCAL_AMFLAGS=-I m4 +AUTOMAKE_OPTIONS = foreign +SUBDIRS = src example test + +include_HEADERS = \ + $(top_srcdir)/include/api/ofp.h \ + $(top_srcdir)/include/api/ofp_init.h \ + $(top_srcdir)/include/api/ofp_pkt_processing.h \ + $(top_srcdir)/include/api/ofp_cli.h \ + $(top_srcdir)/include/api/ofp_log.h \ + $(top_srcdir)/include/api/ofp_timer.h \ + $(top_srcdir)/include/api/ofp_types.h \ + $(top_srcdir)/include/api/ofp_socket_types.h \ + $(top_srcdir)/include/api/ofp_socket.h \ + $(top_srcdir)/include/api/ofp_in.h \ + $(top_srcdir)/include/api/ofp_in6.h \ + $(top_srcdir)/include/api/ofp_errno.h \ + $(top_srcdir)/include/api/ofp_hook.h \ + $(top_srcdir)/include/api/ofp_route_arp.h \ + $(top_srcdir)/include/api/ofp_portconf.h \ + $(top_srcdir)/include/api/ofp_debug.h \ + $(top_srcdir)/include/api/ofp_stat.h \ + $(top_srcdir)/include/api/ofp_ioctl.h \ + $(top_srcdir)/include/api/ofp_queue.h \ + $(top_srcdir)/include/api/ofp_sysctl.h \ + $(top_srcdir)/include/api/ofp_utils.h \ + $(top_srcdir)/include/api/ofp_ethernet.h \ + $(top_srcdir)/include/api/ofp_ip.h \ + $(top_srcdir)/include/api/ofp_ip6.h \ + $(top_srcdir)/include/api/ofp_icmp.h \ + $(top_srcdir)/include/api/ofp_icmp6.h \ + $(top_srcdir)/include/api/ofp_if_vlan.h \ + $(top_srcdir)/include/api/ofp_udp.h \ + $(top_srcdir)/include/api/ofp_ip_var.h \ + $(top_srcdir)/include/api/ofp_tcp.h + +noinst_HEADERS = \ + $(top_srcdir)/include/ofpi_netlink.h \ + $(top_srcdir)/include/ofpi_pkt_processing.h \ + $(top_srcdir)/include/ofpi_arp.h \ + $(top_srcdir)/include/ofpi_avl.h \ + $(top_srcdir)/include/ofpi_callout.h \ + $(top_srcdir)/include/ofpi_cli.h \ + $(top_srcdir)/include/ofpi_debug.h \ + $(top_srcdir)/include/ofpi_domain.h \ + $(top_srcdir)/include/ofpi_errno.h \ + $(top_srcdir)/include/ofpi_ethernet.h \ + $(top_srcdir)/include/ofpi.h \ + $(top_srcdir)/include/ofpi_hash.h \ + $(top_srcdir)/include/ofpi_vnet.h \ + $(top_srcdir)/include/ofpi_icmp6.h \ + $(top_srcdir)/include/ofpi_icmp.h \ + $(top_srcdir)/include/ofpi_if_arp.h \ + $(top_srcdir)/include/ofpi_if_gre.h \ + $(top_srcdir)/include/ofpi_if_vlan.h \ + $(top_srcdir)/include/ofpi_inet.h \ + $(top_srcdir)/include/ofpi_in6.h \ + $(top_srcdir)/include/ofpi_in6_pcb.h \ + $(top_srcdir)/include/ofpi_in.h \ + $(top_srcdir)/include/ofpi_in_pcb.h \ + $(top_srcdir)/include/ofpi_ip6.h \ + $(top_srcdir)/include/ofpi_ip6_var.h \ + $(top_srcdir)/include/ofpi_ip6protosw.h \ + $(top_srcdir)/include/ofpi_ip.h \ + $(top_srcdir)/include/ofpi_ip_var.h \ + $(top_srcdir)/include/ofpi_log.h \ + $(top_srcdir)/include/ofpi_md5.h \ + $(top_srcdir)/include/ofpi_portconf.h \ + $(top_srcdir)/include/ofpi_protosw.h \ + $(top_srcdir)/include/ofpi_queue.h \ + $(top_srcdir)/include/ofpi_route.h \ + $(top_srcdir)/include/ofpi_rt_lookup.h \ + $(top_srcdir)/include/ofpi_sockbuf.h \ + $(top_srcdir)/include/ofpi_socket.h \ + $(top_srcdir)/include/ofpi_socketvar.h \ + $(top_srcdir)/include/ofpi_sockopt.h \ + $(top_srcdir)/include/ofpi_sockstate.h \ + $(top_srcdir)/include/ofpi_stat.h \ + $(top_srcdir)/include/ofpi_systm.h \ + $(top_srcdir)/include/ofpi_tcp_fsm.h \ + $(top_srcdir)/include/ofpi_tcp.h \ + $(top_srcdir)/include/ofpi_tcp_offload.h \ + $(top_srcdir)/include/ofpi_tcp_seq.h \ + $(top_srcdir)/include/ofpi_tcp_syncache.h \ + $(top_srcdir)/include/ofpi_tcp_timer.h \ + $(top_srcdir)/include/ofpi_tcp_var.h \ + $(top_srcdir)/include/ofpi_gre.h \ + $(top_srcdir)/include/ofpi_timer.h \ + $(top_srcdir)/include/ofpi_udp.h \ + $(top_srcdir)/include/ofpi_udp_var.h \ + $(top_srcdir)/include/ofpi_udp6_var.h \ + $(top_srcdir)/include/ofpi_hook.h \ + $(top_srcdir)/include/ofpi_util.h + diff --git a/README b/README new file mode 100644 index 00000000..edcaf5ee --- /dev/null +++ b/README @@ -0,0 +1,98 @@ +Open IP Fast Path +-- + +Intent and purpose: +-- +The intent of this project is to enable accelerated routing/forwarding +for IPv4 and IPv6, tunneling and termination for a variety of protocols. +Unsupported functionality is provided by the host OS networking stack; +aka slowpath. + +The implemented IP fastpath stack functionality is provided as an library +to Fast Path applications that use ODP execution model and framework. +See an example application: example/fpm/app_main.c + +Termination of protocols with POSIX interface for legacy applications +is also supported. + +Architecture: +-- +State is kept by the slowpath stack, i.e. routing tables, arp tables are +only written to by the slowpath side. All control and CLI functionality +works against slowpath. Fastpath receives state notifications and information +through NETLINK API. + +An end goal could be to have a full ODP IP fastpath stack that can work +independently of a slowpath in a Bare Metal environment. + +Classification API should be used to further improve the fastpath performance. + +Any crypto, checksum or other operation will be offloaded by ODP API +with HW support. + +File structure definition +-- +./docs/ - This is where you can find more detailed documentation +./example/fpm/ - Template application example that uses the project library +./include/api/ - Public interface headers used by an application. +./include/ - Internal interface headers that are used in fastpath library. +./scripts/ - Scripts that start/stop the application and configure system. +./src/ - .c files with fastpath library implementation. +./src/cli/ - Command Line Interface implementation used mainly for debug. +./test/cunit/ - CUnit testcases implementation + +Coding Style: +-- +Project code uses Linux kernel style that is verified through checkpatch.pl + +Licensing: +-- +Project uses BSD 3-CLause License as default license. One should not use code +that is licensed under any GPL type. + +Mailing list +-- +We have a mailing list, reached via the address: +odp-fp@lists.enea.com + + +Building: +-- +This project is currently building on a generic 32/64bit Linux machine +with X86 arch. +Autotools are required for building. + +Build and install ODP from: +git clone git://git.linaro.org/lng/odp.git +./bootstrap +./configure +./make +./make install + +Get this repository + +Build library and examples: +./bootstrap +./configure +./make + +Build library, CUnit testcases and examples. +make check will build everything and run testcases. +./bootstrap +./configure --enable-cunit +./make check + +Running: +-- +A start_device.sh/stop_device.sh script is available in the scripts directory +to start/stop the fpm example application. +By default the eth0 interface is used for the fastpath processing but any +other interface/interfaces names can be supplied to the script as parameter. + +When using ethX interface for fastpath processing this will be disconnected +from Linux. A fpY TUN/TAP interface is created by fastpath application and +this can be used by Linux to send and receive packets. +The sent packets from Linux on fpY interface are forwarded to ethX (wire). +The packets received by the ethX interface are captured by ODP and then these +are received by FPM fastpath application. +When no fastpath operation is aplicable the packet is forwarded to slowpath. diff --git a/TODO b/TODO new file mode 100644 index 00000000..67d95402 --- /dev/null +++ b/TODO @@ -0,0 +1,11 @@ +* Reuse libnl for NETLINK if (LGPL & dynlink) is OK for control plane. +** ARP monitoring +Fixup code to get initial snapshot when connecting. +** Route monitoring +Fixup code to get initial snapshot when connecting. + +* ARP cache, SPMC table, fast lookup + +* SPMC IPv4 routing cache, fast lookup. +* SPMC IPv6 routing cache, fast lookup. + diff --git a/bootstrap b/bootstrap new file mode 100755 index 00000000..5c6fc2d9 --- /dev/null +++ b/bootstrap @@ -0,0 +1,7 @@ +#! /bin/sh + +aclocal \ +&& libtoolize --copy \ +&& autoheader \ +&& automake --gnu --add-missing --copy \ +&& autoreconf diff --git a/configure.ac b/configure.ac new file mode 100644 index 00000000..233f937d --- /dev/null +++ b/configure.ac @@ -0,0 +1,310 @@ +AC_PREREQ([2.69]) +AC_INIT([OpenFastPath], + [1.0], + [sorin.vultureanu at enea.com], + [OpenFastPath], + [http://www.enea.com]) +AM_INIT_AUTOMAKE([subdir-objects]) +AC_CONFIG_SRCDIR([include/config.h.in]) +AM_CONFIG_HEADER([include/config.h]) + + +AC_USE_SYSTEM_EXTENSIONS +AC_SYS_LARGEFILE +AC_CONFIG_MACRO_DIR([m4]) +AM_SILENT_RULES([yes]) + +# Checks for programs. +AC_PROG_CC +AM_PROG_CC_C_O + +# Use libtool +LT_INIT([]) +AC_SUBST([LIBTOOL_DEPS]) +AM_PROG_LIBTOOL + +# Checks for header files. +AC_CHECK_HEADERS([arpa/inet.h memory.h stdint.h stdlib.h string.h sys/socket.h unistd.h]) +AC_CHECK_HEADERS([sys/ioctl.h]) +AC_CHECK_HEADERS([sys/time.h]) +AC_CHECK_HEADERS([fcntl.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_C_INLINE +AC_TYPE_SIZE_T +AC_TYPE_UINT16_T + +AC_CHECK_SIZEOF([int *]) + +########################################################################## +# Check for pthreads availability +########################################################################## + +AX_PTHREAD([], [ + echo "Error! We require pthreads to be available" + exit -1 + ]) +LIBS="$PTHREAD_LIBS $LIBS" +AM_CFLAGS="$AM_CFLAGS $PTHREAD_CFLAGS" +AM_LDFLAGS="$AM_LDFLAGS $PTHREAD_LDFLAGS" +CC="$PTHREAD_CC" + +AC_SEARCH_LIBS([timer_create],[rt posix4]) + +########################################################################## +# Default warning setup +########################################################################## +OFP_CFLAGS="$OFP_CFLAGS -W -Wall -Wstrict-prototypes -Wmissing-prototypes" +OFP_CFLAGS="$OFP_CFLAGS -Wmissing-declarations -Wold-style-definition -Wpointer-arith" +OFP_CFLAGS="$OFP_CFLAGS -Wcast-align -Wnested-externs -Wcast-qual -Wformat-nonliteral" +OFP_CFLAGS="$OFP_CFLAGS -Wformat-security -Wundef -Wwrite-strings" +## OFP_CFLAGS="$OFP_CFLAGS -Werror" + +########################################################################## +# Default include setup +########################################################################## +AM_CFLAGS="$AM_CFLAGS $OFP_CFLAGS" + +########################################################################## +# Checks for library functions. +########################################################################## +AC_FUNC_MALLOC +AC_CHECK_FUNCS([inet_ntoa malloc memset socket strerror strrchr]) + +AC_CONFIG_FILES([ + Makefile + src/Makefile + example/Makefile + example/fpm/Makefile + example/fpm_burstmode/Makefile + example/socket/Makefile + example/classifier/Makefile + example/webserver/Makefile + example/webserver2/Makefile + example/udpecho/Makefile + example/ioctl_test/Makefile + example/sysctl/Makefile + test/Makefile + test/cunit/Makefile + ]) + +AC_ARG_ENABLE([lto], +[AS_HELP_STRING([--enable-lto],[enable link-time optimization @<:@no@:>@])], +[want_lto="${enableval}"], [want_lto=no]) +## FIXME: add a test for gcc >= 4.5.0 +if test "x${want_lto}" == xyes; then + AM_CFLAGS="$AM_CFLAGS -flto" +fi + +########################################################################## +# adding the ODP library (e.g. with static name 'libodp.a') +########################################################################## + +# prepending lib to the files to link +LIBS="-lodp $LIBS" + +# introduce the optional configure parameter for a non-standard install prefix of XXX +AC_ARG_WITH([odp], + [AS_HELP_STRING([--with-odp=prefix], + [non-standard install prefix of odp])], + [ODPPATHSET=1], + [ODPPATHSET=0]) + +# if optional parameter used, extend path flags for compliler and linker +if test $ODPPATHSET = 1 ; then + # extend the compiler and linker flags according to the path set + AM_CFLAGS="$AM_CFLAGS -I$with_odp/include -DHAVE_LIBODP" + AM_LDFLAGS="$AM_LDFLAGS -L$with_odp/lib" + AC_DEFINE(HAVE_LIBODP, [1], [Have libodp ?]) +else + AC_CHECK_LIB(odp, odp_packet_alloc, [ + AC_DEFINE(HAVE_LIBODP, [1], [Have libodp ?])], [ + echo "Error! This package needs OpenDataPlane (libodp.a) installed" + exit -1 + ]) +fi + +# Enable/disable OFP_DEBUG_PRINT +ODP_DEBUG=1 +AC_ARG_ENABLE([debug], + [ --enable-debug Enable/disable ODP debug], + [if ! test "x$enableval" = "xyes"; then + ODP_DEBUG=0 + fi]) +AM_CFLAGS="$AM_CFLAGS -DOFP_DEBUG_PRINT=$ODP_DEBUG" + +# Enable/disable INET6 domain +AC_ARG_ENABLE([ipv6], + [ --enable-ipv6 Turn on IPv6 processing], + [case "${enableval}" in + yes) ipv6_support=true ;; + no) ipv6_support=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-ipv6]) ;; + esac],[ipv6_support=true]) +AM_CONDITIONAL([OFP_IPv6], [test x$ipv6_support = xtrue]) + +# Enable/disable Slow Path processing +AC_ARG_ENABLE([sp], + [ --enable-sp Turn on Slow Path processing], + [case "${enableval}" in + yes) sp_support=true ;; + no) sp_support=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-sp]) ;; + esac],[sp_support=true]) +AM_CONDITIONAL([OFP_SP], [test x$sp_support = xtrue]) + +# Enable/disable libCK use +AC_ARG_ENABLE([libck], + [ --enable-libck Enable/disable use of libCK], + [case "${enableval}" in + yes) use_ck=true ;; + no) use_ck=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-libck]) ;; + esac],[use_ck=false]) +AM_CONDITIONAL([OFP_USE_LIBCK], [test x$use_ck = xtrue]) + +# Enable/disable MTRIE IPv4 use +AC_ARG_ENABLE([mtrie], + [ --enable-mtrie Enable/disable use of mtrie route tables], + [case "${enableval}" in + yes) use_mtrie=true ;; + no) use_mtrie=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-mtrie]) ;; + esac],[use_mtrie=true]) +AM_CONDITIONAL([OFP_MTRIE], [test x$use_mtrie = xtrue]) + + +########################################################################## +# Enable/disable Unit tests +########################################################################## +AC_ARG_ENABLE( + [cunit], + [ --enable-cunit Enable/disable cunit], + [if test x$enableval = xyes; then + cunit_support=yes + fi]) + +AC_ARG_WITH( + [cunit-path], + AC_HELP_STRING( + [--with-cunit-path=DIR Path to Cunit libs and headers], + [(or in the default path if not specified).]), + [CUNIT_PATH=$withval cunit_support=yes + AM_CFLAGS="$AM_CFLAGS -I$CUNIT_PATH" + AM_LDFLAGS="$AM_LDFLAGS -L$CUNIT_PATH/lib"], + [AS_IF( + [test x$cunit_support = xyes ], + [AC_CHECK_HEADERS( + [CUnit/Basic.h], [], + [AC_MSG_FAILURE(["can't find cunit headers"])]) + ]) + ]) + +AC_SUBST(CUNIT_PATH) +AM_CONDITIONAL([OFP_CUNIT_ENABLED], [test x$cunit_support = xyes ]) + +########################################################################## +# adding the quagga dir +########################################################################## + +# introduce the optional configure parameter for a non-standard install prefix of XXX +AC_ARG_WITH([quagga], + [AS_HELP_STRING([--with-quagga=prefix], + [non-standard path prefix of the quagga source directory])], + [QUAGGAPATHSET=1], + [QUAGGAPATHSET=0]) + +# if optional parameter used, extend path flags for compliler and linker +if test $QUAGGAPATHSET = 1 ; then + # extend the compiler and linker flags according to the path set + AM_CFLAGS="$AM_CFLAGS -I$with_quagga" + AC_DEFINE(HAVE_QUAGGA, [1], [Have quagga source dir ?]) +else +AC_CHECK_HEADERS([fpm/fpm.h], [ + AC_DEFINE(HAVE_QUAGGA, [1], [Have quagga source dir ?])], []) +fi + + +########################################################################## +# Libconcurrenykit +########################################################################## + +# introduce the optional configure parameter for a non-standard install prefix of XXX +AC_ARG_WITH([libck], + [AS_HELP_STRING([--with-libck=prefix], + [non-standard path prefix of the concurrenykit install directory])], + [CKPATHSET=1], + [CKPATHSET=0]) + +if test x$use_ck = xtrue ; then + # if optional parameter used, extend path flags for compliler and linker + if test $CKPATHSET = 1 ; then + # extend the compiler and linker flags according to the path set + AM_CFLAGS="$AM_CFLAGS -I$with_libck/include" + AM_LDFLAGS="$AM_LDFLAGS -L$with_libck/lib -L$with_libck/src" + LIBS="-lck $LIBS" + AC_DEFINE(HAVE_LIBCK, [1], [Have libconcurrencykit]) + else + AC_CHECK_HEADERS([ck_epoch.h], [ + AC_DEFINE(HAVE_LIBCK, [1], [Have concurrentykit])], []) + + AC_SEARCH_LIBS([ck_epoch_init],[ck],[], [ + AC_MSG_ERROR([Unable to find libck library])]) + fi +fi + +########################################################################## +# adding the RumpKernel dir +########################################################################## + +# introduce the optional configure parameter for a non-standard install prefix of XXX +AC_ARG_WITH([buildrump], + [AS_HELP_STRING([--with-buildrump=prefix], + [non-standard path prefix of the rump source directory])], + [RUMPPATHSET=1], + [RUMPPATHSET=0]) + +# if optional parameter used, extend path flags for compliler and linker +if test $RUMPPATHSET = 1 ; then + # extend the compiler and linker flags according to the path set + AM_CFLAGS="$AM_CFLAGS -I$with_buildrump/rump/include" + AM_LDFLAGS="$AM_LDFLAGS -L$with_buildrump/rump/lib/" + AC_DEFINE(HAVE_BUILDRUMP, [1], [Have buildrump libs ?]) + LIBS="-lrumpclient -lrumpnet_shmif -lrumpnet_config -lrumpnet_netinet6 -lrumpnet_net -lrumpnet -lrump -lrumpuser $LIBS" +else + AC_CHECK_HEADERS([rump/rump.h], [ + AC_DEFINE(HAVE_BUILDRUMP, [1], [Have buildrump libs ?])], + []) + AC_SEARCH_LIBS([rumpclient_init],[rumpclient]) + AC_SEARCH_LIBS([rump_pub_shmif_create],[rumpnet_shmif]) + AC_SEARCH_LIBS([rumpns_if_init],[rumpnet_config]) + AC_SEARCH_LIBS([_init],[rumpnet_netinet6]) + AC_SEARCH_LIBS([_init],[rumpnet_netinet]) + AC_SEARCH_LIBS([rumpns_rt_init],[rumpnet_net]) + AC_SEARCH_LIBS([rumpns_rt_inithead],[rumpnet]) + AC_SEARCH_LIBS([rump_init],[rump]) + AC_SEARCH_LIBS([rumpuser_init],[rumpuser]) +fi + +########################################################################## +# distribute the changed variables among the Makefiles +AC_SUBST([LIBS]) +AC_SUBST([AM_CFLAGS]) +AC_SUBST([AM_LDFLAGS]) + +AC_OUTPUT +AC_MSG_RESULT([ + $PACKAGE $VERSION + ======== + + prefix: ${prefix} + sysconfdir: ${sysconfdir} + libdir: ${libdir} + includedir: ${includedir} + + compiler: ${CC} + cflags: ${CFLAGS} + am_cflags: ${AM_CFLAGS} + ldflags: ${LDFLAGS} + am_ldflags: ${AM_LDFLAGS} +]) diff --git a/docs/Open_IP_Fast_Path_Architecture.odt b/docs/Open_IP_Fast_Path_Architecture.odt new file mode 100644 index 00000000..53f09c2a Binary files /dev/null and b/docs/Open_IP_Fast_Path_Architecture.odt differ diff --git a/docs/RFC_Compliance_Results.odt b/docs/RFC_Compliance_Results.odt new file mode 100755 index 00000000..ab26f3b0 Binary files /dev/null and b/docs/RFC_Compliance_Results.odt differ diff --git a/docs/System_view.pptx b/docs/System_view.pptx new file mode 100755 index 00000000..b2905939 Binary files /dev/null and b/docs/System_view.pptx differ diff --git a/docs/images/odpfp-port-data-structures.png b/docs/images/odpfp-port-data-structures.png new file mode 100644 index 00000000..eca4aa69 Binary files /dev/null and b/docs/images/odpfp-port-data-structures.png differ diff --git a/docs/images/odpfp-route-tree-structures.png b/docs/images/odpfp-route-tree-structures.png new file mode 100644 index 00000000..17531841 Binary files /dev/null and b/docs/images/odpfp-route-tree-structures.png differ diff --git a/docs/images/odpfp_components.jpg b/docs/images/odpfp_components.jpg new file mode 100644 index 00000000..c7bf841f Binary files /dev/null and b/docs/images/odpfp_components.jpg differ diff --git a/docs/images/odpfp_components.pptx b/docs/images/odpfp_components.pptx new file mode 100644 index 00000000..64d7063a Binary files /dev/null and b/docs/images/odpfp_components.pptx differ diff --git a/docs/routing.dia b/docs/routing.dia new file mode 100644 index 00000000..b5e3b40f Binary files /dev/null and b/docs/routing.dia differ diff --git a/example/Makefile.am b/example/Makefile.am new file mode 100644 index 00000000..f006a6ea --- /dev/null +++ b/example/Makefile.am @@ -0,0 +1,2 @@ +SUBDIRS = fpm fpm_burstmode socket classifier webserver webserver2 udpecho \ + ioctl_test sysctl diff --git a/example/Makefile.inc b/example/Makefile.inc new file mode 100644 index 00000000..113ded29 --- /dev/null +++ b/example/Makefile.inc @@ -0,0 +1,10 @@ +LIB = $(top_builddir)/lib +LDADD = $(LIB)/libofp.la + +DEFAULT_INCLUDES=-I. + +AM_CFLAGS += \ + -I$(srcdir) \ + -I$(top_srcdir)/include/api + +AM_LDFLAGS += -L$(LIB) diff --git a/example/README b/example/README new file mode 100644 index 00000000..016a79ca --- /dev/null +++ b/example/README @@ -0,0 +1,48 @@ +Examples in this directory: + +------------------------------------------------------------------------------- +fpm + +fpm is a good starting point for application development. +It includes basically everything needed to run an application. + +------------------------------------------------------------------------------- +socket + +------------------------------------------------------------------------------- +udpecho + +udpecho demonstrates usage of event based UDP sockets. Application receives +data as odp packets, not via sockets. Application sends data back using two +optional methods: +1. standard socket interface, or +2. reusing the same packet and sending it using a packet send function. +------------------------------------------------------------------------------- +webserver + +This is a simple web server that uses sockets as expected. + +------------------------------------------------------------------------------- +webserver2 + +This web server receives requests as odp packets and sends responses +using normal sockets. Benefit is that it is not necessary to have +bookkeeping for the sockets. For example accept is executed to create +necessary data structures but the result (new socket) is ignored. +At the moment there is one restriction: reply length is limited to 63 +socket writings. + +------------------------------------------------------------------------------- +ioctl_test + +Examples how to use ioctl to control sockets, interfaces, and routes. + +------------------------------------------------------------------------------- +sysctl + +Management Information Base (MIB) is a hierarchical database that +describes an application. Object Identifiers (OID) can be used to +read and write variables. Sysctl example demonstrates how to create +new nodes and variables and how to read and write them. + +------------------------------------------------------------------------------- diff --git a/example/classifier/Makefile.am b/example/classifier/Makefile.am new file mode 100644 index 00000000..7660ae34 --- /dev/null +++ b/example/classifier/Makefile.am @@ -0,0 +1,7 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = classifier +classifier_LDFLAGS = $(AM_LDFLAGS) -static +classifier_CFLAGS = $(AM_CFLAGS) + +dist_classifier_SOURCES = classifier_main.c diff --git a/example/classifier/classifier_main.c b/example/classifier/classifier_main.c new file mode 100644 index 00000000..8fd8441d --- /dev/null +++ b/example/classifier/classifier_main.c @@ -0,0 +1,456 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include + +#include "ofp.h" + +#define MAX_WORKERS 32 +#define TEST_PORT 54321 + +#define IP4(a, b, c, d) (a|(b<<8)|(c<<16)|(d<<24)) + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); +static int build_classifier(int if_count, char **if_names); +static odp_cos_t build_cos_w_queue(const char *name); +static odp_cos_t build_cos_set_queue(const char *name, odp_queue_t queue_cos); +static odp_pmr_t build_udp_prm(void); +static void app_processing(void); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + memset(&app_init_params, 0, sizeof(app_init_params)); + app_init_params.linux_core_id = 0; + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + + ofp_init_global(&app_init_params); + ofp_init_local(); + + build_classifier(app_init_params.if_count, app_init_params.if_names); + + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + sleep(1); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + odph_linux_pthread_create(thread_tbl, + &cpumask, + default_event_dispatcher, + ofp_udp4_processing); + + app_processing(); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} + +int build_classifier(int if_count, char **if_names) +{ + odp_pktio_t pktio; + odp_cos_t cos_def; + odp_cos_t cos_udp; + odp_pmr_t pmr_udp; + char name[80]; + int i; + + cos_udp = build_cos_w_queue("cos_udp"); + if (cos_udp == ODP_COS_INVALID) { + OFP_ERR("Failed to create UDP COS"); + return -1; + } + + pmr_udp = build_udp_prm(); + if (pmr_udp == ODP_PMR_INVAL) { + OFP_ERR("Failed to create UDP PRM"); + return -1; + } + + for (i = 0; i < if_count; i++) { + pktio = odp_pktio_lookup(if_names[i]); + if (pktio == ODP_PKTIO_INVALID) { + OFP_ERR("Failed to get pktio for interface %s\n", + if_names[i]); + return -1; + } + + sprintf(name, "cos_default_%s", if_names[i]); + cos_def = build_cos_set_queue(name, ofp_pktio_spq_get(pktio)); + if (cos_def == ODP_COS_INVALID) { + OFP_ERR("Failed to create default COS " + "for interface %s\n", if_names[i]); + return -1; + } + + if (odp_pktio_default_cos_set(pktio, cos_def) < 0) { + OFP_ERR("Failed to set default COS on interface %s\n", + if_names[i]); + return -1; + } + + if (odp_pktio_error_cos_set(pktio, cos_def) < 0) { + OFP_ERR("Failed to set error COS on interface %s\n", + if_names[i]); + return -1; + } + + if (odp_pktio_pmr_cos(pmr_udp, pktio, cos_udp) < 0) { + OFP_ERR("Failed to set UDP PRM on interface %s\n", + if_names[i]); + return 1; + } + } + + return 0; +} + +static odp_cos_t build_cos_w_queue(const char *name) +{ + odp_cos_t cos; + odp_queue_t queue_cos; + odp_queue_param_t qparam; + + cos = odp_cos_create(name); + if (cos == ODP_COS_INVALID) { + OFP_ERR("Failed to create COS"); + return ODP_COS_INVALID; + } + + memset(&qparam, 0, sizeof(odp_queue_param_t)); + qparam.sched.prio = ODP_SCHED_PRIO_DEFAULT; + qparam.sched.sync = ODP_SCHED_SYNC_ATOMIC; + qparam.sched.group = ODP_SCHED_GROUP_DEFAULT; + + queue_cos = odp_queue_create(name, + ODP_QUEUE_TYPE_SCHED, + &qparam); + if (queue_cos == ODP_QUEUE_INVALID) { + OFP_ERR("Failed to create queue\n"); + odp_cos_destroy(cos); + return ODP_COS_INVALID; + } + + if (odp_cos_set_queue(cos, queue_cos) < 0) { + OFP_ERR("Failed to set queue on COS"); + odp_cos_destroy(cos); + odp_queue_destroy(queue_cos); + return ODP_COS_INVALID; + } + + return cos; +} + +static odp_cos_t build_cos_set_queue(const char *name, odp_queue_t queue_cos) +{ + odp_cos_t cos; + + cos = odp_cos_create(name); + if (cos == ODP_COS_INVALID) { + OFP_ERR("Failed to create COS"); + return ODP_COS_INVALID; + } + + if (odp_cos_set_queue(cos, queue_cos) < 0) { + OFP_ERR("Failed to set queue on COS"); + odp_cos_destroy(cos); + return ODP_COS_INVALID; + } + + return cos; +} + +static odp_pmr_t build_udp_prm(void) +{ + uint32_t pmr_udp_val = TEST_PORT; + uint32_t pmr_udp_mask = 0xffffffff; + + return odp_pmr_create(ODP_PMR_UDP_DPORT, + &pmr_udp_val, + &pmr_udp_mask, + 1); +} + +static void app_processing(void) +{ + int fd_rcv = -1; + char buf[1500]; + int len = sizeof(buf); + + do { + struct ofp_sockaddr_in addr = {0}; + + fd_rcv = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (fd_rcv == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + break; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(fd_rcv, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + break; + } + + len = ofp_recv(fd_rcv, buf, len, 0); + if (len == -1) + OFP_ERR("Faild to receive data (errno = %d)\n", + ofp_errno); + else + OFP_INFO("Data received: length = %d.\n", len); + + } while (0); + + if (fd_rcv != -1) { + ofp_close(fd_rcv); + fd_rcv = -1; + } + OFP_INFO("Test ended.\n"); +} + diff --git a/example/classifier/ofp.conf b/example/classifier/ofp.conf new file mode 100644 index 00000000..63189379 --- /dev/null +++ b/example/classifier/ofp.conf @@ -0,0 +1,3 @@ +debug 0 +loglevel set info +ifconfig fp0 192.168.100.1/24 diff --git a/example/classifier/udp_test.c b/example/classifier/udp_test.c new file mode 100644 index 00000000..1ec8d504 --- /dev/null +++ b/example/classifier/udp_test.c @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include + +int main(void) +{ + char *buffer = "test_udp"; + struct sockaddr_in dest_addr = {0}; + int sd = -1; + + sd = socket(AF_INET, SOCK_DGRAM, 0); + if(sd == -1) { + printf("Error: failed to create socket\n"); + exit(0); + } + + dest_addr.sin_family = AF_INET; + dest_addr.sin_port = htons(54321); + inet_aton("192.168.100.1", &dest_addr.sin_addr.s_addr); + + sendto(sd, buffer, strlen(buffer) + 1, 0, + (const struct sockaddr *)&dest_addr, sizeof(dest_addr)); + + close (sd); + return 0; +} diff --git a/example/fpm/Makefile.am b/example/fpm/Makefile.am new file mode 100644 index 00000000..fab971f8 --- /dev/null +++ b/example/fpm/Makefile.am @@ -0,0 +1,7 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = fpm +fpm_LDFLAGS = $(AM_LDFLAGS) -static +fpm_CFLAGS = $(AM_CFLAGS) + +dist_fpm_SOURCES = app_main.c diff --git a/example/fpm/app_main.c b/example/fpm/app_main.c new file mode 100644 index 00000000..ef03ee4e --- /dev/null +++ b/example/fpm/app_main.c @@ -0,0 +1,292 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofp.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** local hook + * + * @param pkt odp_packet_t + * @param protocol int + * @return int + * + */ +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + (void) protocol; + return OFP_PKT_CONTINUE; +} + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + /* + * By default core #0 runs Linux kernel background tasks. + * Start mapping thread from core #1 + */ + memset(&app_init_params, 0, sizeof(app_init_params)); + + app_init_params.linux_core_id = 0; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + app_init_params.pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + ofp_init_global(&app_init_params); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + odph_linux_pthread_create(thread_tbl, + &cpumask, + default_event_dispatcher, + ofp_eth_vlan_processing); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/fpm_burstmode/Makefile.am b/example/fpm_burstmode/Makefile.am new file mode 100644 index 00000000..55bd724d --- /dev/null +++ b/example/fpm_burstmode/Makefile.am @@ -0,0 +1,7 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = fpm_burstmode +fpm_burstmode_LDFLAGS = $(AM_LDFLAGS) -static +fpm_burstmode_CFLAGS = $(AM_CFLAGS) + +dist_fpm_burstmode_SOURCES = app_main.c diff --git a/example/fpm_burstmode/app_main.c b/example/fpm_burstmode/app_main.c new file mode 100644 index 00000000..c14a0c2a --- /dev/null +++ b/example/fpm_burstmode/app_main.c @@ -0,0 +1,400 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofp.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** local hook + * + * @param pkt odp_packet_t + * @param protocol int + * @return int + * + */ +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + (void) protocol; + return OFP_PKT_CONTINUE; +} + +#define OFP_PKT_BURST_SIZE 16 + +struct pktio_thr_arg { + int port; + ofp_pkt_processing_func pkt_func; +}; + +static void *pkt_io_recv(void *arg) +{ + odp_pktio_t pktio; + odp_packet_t pkt, pkt_tbl[OFP_PKT_BURST_SIZE]; + int pkt_idx, pkt_cnt; + struct pktio_thr_arg *thr_args; + ofp_pkt_processing_func pkt_func; + + thr_args = arg; + pkt_func = thr_args->pkt_func; + + ofp_init_local(); + + pktio = ofp_port_pktio_get(thr_args->port); + + OFP_DBG("PKT-IO receive starting on port: %d, pktio-id: %"PRIX64"\n", + thr_args->port, odp_pktio_to_u64(pktio)); + + while (1) { + pkt_cnt = odp_pktio_recv(pktio, pkt_tbl, OFP_PKT_BURST_SIZE); + + for (pkt_idx = 0; pkt_idx < pkt_cnt; pkt_idx++) { + pkt = pkt_tbl[pkt_idx]; + + if (odp_unlikely(odp_packet_has_error(pkt))) { + OFP_DBG("Packet with error dropped.\n"); + odp_packet_free(pkt); + } + + ofp_packet_input(pkt, ODP_QUEUE_INVALID, pkt_func); + } + } + + /* Never reached */ + return NULL; +} + +/* + * Should receive timeouts only + */ +static void *event_dispatcher(void *arg) +{ + odp_event_t ev; + + (void)arg; + + ofp_init_local(); + + while (1) { + ev = odp_schedule(NULL, ODP_SCHED_WAIT); + + if (ev == ODP_EVENT_INVALID) + continue; + + if (odp_event_type(ev) == ODP_EVENT_TIMEOUT) { + ofp_timer_handle(ev); + continue; + } + + OFP_ERR("Event_dispatcher: " + "Error, unexpected event type: %u\n", + odp_event_type(ev)); + + odp_buffer_free(odp_buffer_from_event(ev)); + } + + /* Never reached */ + return NULL; +} + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS], dispatcher_thread; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpu_mask; + char cpumaskstr[64]; + int cpu, first_cpu, i; + struct pktio_thr_arg pktio_thr_args[MAX_WORKERS]; + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + memset(pktio_thr_args, 0, sizeof(pktio_thr_args)); + + core_count = odp_cpu_count(); + num_workers = core_count; + first_cpu = 1; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + /* + * By default core #0 runs Linux kernel background tasks. + * Start mapping thread from core #1 + */ + memset(&app_init_params, 0, sizeof(app_init_params)); + + app_init_params.linux_core_id = 0; + + if (core_count <= 1) { + OFP_ERR("Burst mode requires multiple cores.\n"); + exit(EXIT_FAILURE); + } + num_workers--; + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", first_cpu); + + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + app_init_params.pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + app_init_params.burst_recv_mode = 1; + ofp_init_global(&app_init_params); + + if (num_workers < params.if_count) { + OFP_ERR("At least %u fastpath cores required.\n", + params.if_count); + exit(EXIT_FAILURE); + } + + for (i = 0; i < num_workers; ++i) { + pktio_thr_args[i].pkt_func = ofp_eth_vlan_processing; + pktio_thr_args[i].port = i % params.if_count; + + odp_cpumask_zero(&cpu_mask); + cpu = first_cpu + i; + odp_cpumask_set(&cpu_mask, cpu); + odp_cpumask_to_str(&cpu_mask, cpumaskstr, sizeof(cpumaskstr)); + + OFP_DBG("Starting pktio receive on core: %d port: %d\n", + cpu, pktio_thr_args[i].port); + OFP_DBG("cpu mask: %s\n", cpumaskstr); + + odph_linux_pthread_create(&thread_tbl[i], + &cpu_mask, + pkt_io_recv, + &pktio_thr_args[i]); + } + + odp_cpumask_zero(&cpu_mask); + odp_cpumask_set(&cpu_mask, app_init_params.linux_core_id); + odph_linux_pthread_create(&dispatcher_thread, + &cpu_mask, + event_dispatcher, + NULL); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/ioctl_test/Makefile.am b/example/ioctl_test/Makefile.am new file mode 100644 index 00000000..c450f9b6 --- /dev/null +++ b/example/ioctl_test/Makefile.am @@ -0,0 +1,11 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = ioctl_test + +AM_CFLAGS += -I$(top_srcdir)/include +AM_CFLAGS += -I$(top_srcdir)/include/api + +ioctl_test_LDFLAGS = $(AM_LDFLAGS) -static +ioctl_test_CFLAGS = $(AM_CFLAGS) + +dist_ioctl_test_SOURCES = app_main.c ioctl_test.c diff --git a/example/ioctl_test/app_main.c b/example/ioctl_test/app_main.c new file mode 100644 index 00000000..009c27fb --- /dev/null +++ b/example/ioctl_test/app_main.c @@ -0,0 +1,365 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofp.h" + +#include "ioctl_test.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); +static void *app_dispatcher_thread(void *arg); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** local hook + * + * @param pkt odp_packet_t + * @param protocol int + * @return int + * + */ +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + (void) protocol; + return OFP_PKT_CONTINUE; +} + + +/** Application Dispatcher worker threads + * + * @param arg void* + * @return void* + * + */ +static void *app_dispatcher_thread(void *arg) +{ + odp_event_t ev; + odp_packet_t pkt; + odp_queue_t in_queue; + + arg = arg; + + ofp_init_local(); + + /* PER CORE DISPATCHER */ + while (1) { + ev = odp_schedule(&in_queue, ODP_SCHED_WAIT); + + if (ev == ODP_EVENT_INVALID) + continue; + + if (odp_event_type(ev) == ODP_EVENT_TIMEOUT) { + ofp_timer_handle(ev); + continue; + } + + if (odp_event_type(ev) == ODP_EVENT_PACKET) { + pkt = odp_packet_from_event(ev); + + ofp_packet_input(pkt, in_queue, + ofp_eth_vlan_processing); + continue; + } + + printf("App_dispatcher: Error, unexpected event type: %u\n", + odp_event_type(ev)); + + /* Free events by type */ + if (odp_event_type(ev) == ODP_EVENT_BUFFER) { + odp_buffer_free(odp_buffer_from_event(ev)); + continue; + } + + if (odp_event_type(ev) == ODP_EVENT_CRYPTO_COMPL) { + odp_crypto_compl_free(odp_crypto_compl_from_event(ev)); + continue; + } + + } + + /* Never reached */ + return NULL; +} + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +#include +#include + +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + struct rlimit rlp; + getrlimit(RLIMIT_CORE, &rlp); + printf("RLIMIT_CORE: %ld/%ld\n", rlp.rlim_cur, rlp.rlim_max); + rlp.rlim_cur = 200000000; + printf("Setting to max: %d\n", setrlimit(RLIMIT_CORE, &rlp)); + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + /* + * By default core #0 runs Linux kernel background tasks. + * Start mapping thread from core #1 + */ + memset(&app_init_params, 0, sizeof(app_init_params)); + + app_init_params.linux_core_id = 0; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + app_init_params.pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + ofp_init_global(&app_init_params); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + + odph_linux_pthread_create(thread_tbl, + &cpumask, + app_dispatcher_thread, + NULL); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + /* ioctl test thread */ + ofp_start_ioctl_thread(app_init_params.linux_core_id); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/ioctl_test/ioctl_test.c b/example/ioctl_test/ioctl_test.c new file mode 100644 index 00000000..ba29cd98 --- /dev/null +++ b/example/ioctl_test/ioctl_test.c @@ -0,0 +1,351 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#include +#include +#include +#include +#include +#include + +#include "ofp.h" + +#include "ioctl_test.h" + +#define logfilename "/tmp/iocrl-test.log" +static FILE *logfile; +#define logprint(a...) fprintf(logfile, a) +//#define logprint OFP_LOG +#define IFNAME "fp0" +#define GRENAME "gre1" + +#define IP4(a,b,c,d) (a|(b<<8)|(c<<16)|(d<<24)) + +static uint32_t +get_ip_address(int fd, const char *name) +{ + struct ofp_ifreq ifr; + + strcpy(ifr.ifr_name, name); + if (ofp_ioctl(fd, OFP_SIOCGIFADDR, &ifr) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + return 0; + } else { + struct ofp_sockaddr_in *ipaddr; + ipaddr = (struct ofp_sockaddr_in *)&ifr.ifr_addr; + return ipaddr->sin_addr.s_addr; + } +} + +static uint32_t +get_netmask(int fd, const char *name) +{ + struct ofp_ifreq ifr; + + strcpy(ifr.ifr_name, name); + if (ofp_ioctl(fd, OFP_SIOCGIFNETMASK, &ifr) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + return 0; + } else { + struct ofp_sockaddr_in *ipaddr; + ipaddr = (struct ofp_sockaddr_in *)&ifr.ifr_addr; + return ipaddr->sin_addr.s_addr; + } +} + +static uint32_t +get_broadcast_address(int fd, const char *name) +{ + struct ofp_ifreq ifr; + + strcpy(ifr.ifr_name, name); + if (ofp_ioctl(fd, OFP_SIOCGIFBRDADDR, &ifr) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + return 0; + } else { + struct ofp_sockaddr_in *ipaddr; + ipaddr = (struct ofp_sockaddr_in *)&ifr.ifr_addr; + return ipaddr->sin_addr.s_addr; + } +} + +static void +set_ip_address_and_mask(int fd, const char *name, uint32_t addr, uint32_t mask) +{ + struct ofp_in_aliasreq ifra; + + strcpy(ifra.ifra_name, name); + ifra.ifra_addr.sin_family = OFP_AF_INET; + ifra.ifra_addr.sin_addr.s_addr = addr; + ifra.ifra_mask.sin_addr.s_addr = mask; + if (ofp_ioctl(fd, OFP_SIOCSIFADDR, &ifra) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + } +} + +static void +delete_if_address(int fd, const char *name) +{ + struct ofp_in_aliasreq ifra; + + strcpy(ifra.ifra_name, name); + ifra.ifra_addr.sin_family = OFP_AF_INET; + if (ofp_ioctl(fd, OFP_SIOCDIFADDR, &ifra) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + } +} + +static void +receive_non_blocking(void) +{ + int s, ret, nb = 1; + struct ofp_sockaddr_in addr; + struct ofp_sockaddr_in remote; + ofp_socklen_t remote_len; + char buf[64]; + int len = sizeof(buf); + + s = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, OFP_IPPROTO_UDP); + + addr.sin_family = OFP_AF_INET; + addr.sin_addr.s_addr = 0; + addr.sin_port = odp_cpu_to_be_16(2048); + addr.sin_len = sizeof(addr); + + if ((ret = ofp_bind(s, (struct ofp_sockaddr *)&addr, sizeof(addr)))) { + OFP_LOG("bind ret=%d %s\n", ret, ofp_strerror(ofp_errno)); + } + + /* + * Set non-blocking mode. + */ + if (ofp_ioctl(s, OFP_FIONBIO, &nb) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + } + + /* + * No data expected. Immediate return. + */ + ret = ofp_recvfrom(s, buf, len, 0, + (struct ofp_sockaddr *)&remote, &remote_len); + ofp_close(s); +} + +static int +get_sockbuf_data(int fd, uint32_t cmd) +{ + int val; + if (ofp_ioctl(fd, cmd, &val) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + } + return val; +} + +static void +get_if_conf(int fd, struct ofp_ifconf *conf) +{ + /* + * Get all interfaces. + */ + if (ofp_ioctl(fd, OFP_SIOCGIFCONF, conf) < 0) { + logprint("Ioctlx error (%s)!\n", + ofp_strerror(ofp_errno)); + } +} + +static void +set_gre_tunnel(int fd, const char *name, uint32_t addr, uint32_t p2p, + uint32_t local, uint32_t remote, int vrf) +{ + struct ofp_in_tunreq treq; + + strcpy(treq.iftun_name, name); + treq.iftun_addr.sin_addr.s_addr = addr; + treq.iftun_p2p_addr.sin_addr.s_addr = p2p; + treq.iftun_local_addr.sin_addr.s_addr = local; + treq.iftun_remote_addr.sin_addr.s_addr = remote; + treq.iftun_vrf = vrf; + + if (ofp_ioctl(fd, OFP_SIOCSIFTUN, &treq) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + } +} + +static void +set_vrf(int fd, const char *name, int vrf) +{ + struct ofp_ifreq ifr; + + strcpy(ifr.ifr_name, name); + ifr.ifr_fib = vrf; + + if (ofp_ioctl(fd, OFP_SIOCSIFFIB, &ifr) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + } +} + +static void +set_route(int fd, const char *dev, int vrf, + uint32_t dst, uint32_t mask, uint32_t gw) +{ + struct ofp_rtentry rt; + + rt.rt_vrf = vrf; + rt.rt_dev = (char *)(uintptr_t)dev; + ((struct ofp_sockaddr_in *)&rt.rt_dst)->sin_addr.s_addr = dst; + ((struct ofp_sockaddr_in *)&rt.rt_genmask)->sin_addr.s_addr = mask; + ((struct ofp_sockaddr_in *)&rt.rt_gateway)->sin_addr.s_addr = gw; + + if (ofp_ioctl(fd, OFP_SIOCADDRT, &rt) < 0) { + logprint("Ioctl error (%s)!\n", + ofp_strerror(ofp_errno)); + } +} + +static void * +ioctl_test(void *arg) +{ + int fd; + uint32_t addr, origaddr, origmask; + (void)arg; + + logfile = fopen(logfilename, "w"); + logprint("Ioctl test thread started\n"); + + odp_init_local(); + ofp_init_local(); + sleep(2); + + if ((fd = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, OFP_IPPROTO_UDP)) < 0) { + logprint("Cannot open UDP socket (%s)!\n", + ofp_strerror(ofp_errno)); + return NULL; + } + + logprint("\n=====================================\n"); + logprint("Get IP address of %s\n", IFNAME); + origaddr = get_ip_address(fd, IFNAME); + logprint(" %s\n", ofp_print_ip_addr(origaddr)); + + logprint("\n=====================================\n"); + logprint("Get netmask of %s\n", IFNAME); + origmask = get_netmask(fd, IFNAME); + logprint(" %s\n", ofp_print_ip_addr(origmask)); + + logprint("\n=====================================\n"); + logprint("Get broadcast address of %s\n", IFNAME); + logprint(" %s\n", + ofp_print_ip_addr(get_broadcast_address(fd, IFNAME))); + + logprint("\n=====================================\n"); + logprint("Delete IP address of %s\n", IFNAME); + delete_if_address(fd, IFNAME); + + logprint("\n=====================================\n"); + addr = IP4(192,168,156,111); + logprint("Set IP address of %s to %s/%d\n", + IFNAME, ofp_print_ip_addr(addr), 25); + set_ip_address_and_mask(fd, IFNAME, addr, odp_cpu_to_be_32(0xffffff80)); + + logprint("Set back original address and mask\n"); + set_ip_address_and_mask(fd, IFNAME, origaddr, origmask); + + logprint("\n=====================================\n"); + logprint("Receiving from socket\n"); + receive_non_blocking(); + logprint("Immediate return\n"); + + logprint("\n=====================================\n"); + logprint("Get sockbuf bytes to read\n"); + logprint(" %d\n", get_sockbuf_data(fd, OFP_FIONREAD)); + + logprint("\n=====================================\n"); + logprint("Get sockbuf bytes yet to write\n"); + + logprint(" %d\n", get_sockbuf_data(fd, OFP_FIONWRITE)); + + logprint("\n=====================================\n"); + logprint("Get sockbuf send space\n"); + + logprint(" %d\n", get_sockbuf_data(fd, OFP_FIONSPACE)); + + logprint("\n=====================================\n"); + logprint("Set GRE tunnel\n"); + set_gre_tunnel(fd, GRENAME, IP4(10,3,4,1), IP4(10,3,4,2), + origaddr, IP4(192,168,56,104), 0); + + logprint("\n=====================================\n"); + logprint("Change GRE tunnel's VRF\n"); + set_vrf(fd, GRENAME, 7); + + logprint("\n=====================================\n"); + logprint("Get all interfaces\n"); + + struct ofp_ifconf conf; + char data[1024]; + struct ofp_ifreq *ifr; + int i = 1; + + conf.ifc_len = sizeof(data); + conf.ifc_buf = (char *)data; + + get_if_conf(fd, &conf); + + ifr = (struct ofp_ifreq *)data; + while ((char *)ifr < data + conf.ifc_len) { + switch (ifr->ifr_addr.sa_family) { + case OFP_AF_INET: + logprint(" %d. %s : %s\n", i, ifr->ifr_name, + ofp_print_ip_addr(((struct ofp_sockaddr_in *) + &ifr->ifr_addr)->sin_addr.s_addr)); + break; + } + ifr++; + i++; + } + + logprint("\n=====================================\n"); + logprint("Set routes\n"); + + set_route(fd, GRENAME, 0, IP4(10,1,1,0), IP4(255,255,255,0), IP4(10,3,4,2)); + /* + * If output device is not set it will be found using the route to gateway. + */ + set_route(fd, NULL, 0, IP4(10,7,0,0), IP4(255,255,0,0), IP4(192,168,56,254)); + + logprint("\n=====================================\n"); + ofp_close(fd); + logprint("Ioctl test exit\n"); + logprint("\n=====================================\n"); + + fclose(logfile); + if (system("cat " logfilename) < 0) + OFP_ERR("Cannot run system()\n"); + return NULL; +} + +void ofp_start_ioctl_thread(int core_id) +{ + odph_linux_pthread_t test_linux_pthread; + odp_cpumask_t cpumask; + + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, core_id); + + odph_linux_pthread_create(&test_linux_pthread, + &cpumask, + ioctl_test, + NULL); +} diff --git a/example/ioctl_test/ioctl_test.h b/example/ioctl_test/ioctl_test.h new file mode 100644 index 00000000..989a2322 --- /dev/null +++ b/example/ioctl_test/ioctl_test.h @@ -0,0 +1,12 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef _IOCTL_TEST_H_ +#define _IOCTL_TEST_H_ + +void ofp_start_ioctl_thread(int core_id); + +#endif diff --git a/example/socket/Makefile.am b/example/socket/Makefile.am new file mode 100644 index 00000000..757fa234 --- /dev/null +++ b/example/socket/Makefile.am @@ -0,0 +1,24 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = socket +socket_LDFLAGS = $(AM_LDFLAGS) -static +socket_CFLAGS = $(AM_CFLAGS) + +dist_socket_SOURCES = \ +suite_framework.c \ +socket_create_close.c \ +socket_bind.c \ +socket_shutdown.c \ +socket_connect_udp.c \ +socket_send_sendto_udp.c \ +socket_send_recv_udp.c \ +socket_listen_tcp.c \ +socket_connect_accept_tcp.c \ +socket_send_recv_tcp.c \ +socket_select.c \ +socket_sigevent.c \ +socket_main.c + +if OFP_IPv6 +socket_CFLAGS += -DINET6 +endif diff --git a/example/socket/ofp.conf b/example/socket/ofp.conf new file mode 100644 index 00000000..2324233b --- /dev/null +++ b/example/socket/ofp.conf @@ -0,0 +1,4 @@ +debug 0 +loglevel set abort +ifconfig fp0 192.168.100.1/24 +ifconfig -A inet6 fp0 fd00:1baf::1/64 diff --git a/example/socket/socket_bind.c b/example/socket/socket_bind.c new file mode 100644 index 00000000..094804d7 --- /dev/null +++ b/example/socket/socket_bind.c @@ -0,0 +1,170 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_bind.h" +#include "socket_util.h" + +int init_udp_create_socket(int *pfd_thread1, int *pfd_thread2) +{ + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +int init_tcp_create_socket(int *pfd_thread1, int *pfd_thread2) +{ + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +#ifdef INET6 +int init_udp6_create_socket(int *pfd_thread1, int *pfd_thread2) +{ + *pfd_thread1 = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +int init_tcp6_create_socket(int *pfd_thread1, int *pfd_thread2) +{ + *pfd_thread1 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} +#endif /* INET6 */ + + +int bind_ip4_local_ip(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +int bind_ip4_any(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +#ifdef INET6 +int bind_ip6_local_ip(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +int bind_ip6_any(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /* INET6 */ + diff --git a/example/socket/socket_bind.h b/example/socket/socket_bind.h new file mode 100644 index 00000000..e6db8a9d --- /dev/null +++ b/example/socket/socket_bind.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_BIND_H__ +#define __SOCKET_BIND_H__ + +int init_udp_create_socket(int *pfd_thread1, int *pfd_thread2); +int init_tcp_create_socket(int *pfd_thread1, int *pfd_thread2); + +#ifdef INET6 +int init_udp6_create_socket(int *pfd_thread1, int *pfd_thread2); +int init_tcp6_create_socket(int *pfd_thread1, int *pfd_thread2); +#endif /* INET6 */ + +int bind_ip4_local_ip(int fd); +int bind_ip4_any(int fd); + +#ifdef INET6 +int bind_ip6_local_ip(int fd); +int bind_ip6_any(int fd); +#endif /* INET6 */ + +#endif /* __SOCKET_BIND_H__ */ diff --git a/example/socket/socket_connect_accept_tcp.c b/example/socket/socket_connect_accept_tcp.c new file mode 100644 index 00000000..f368b84a --- /dev/null +++ b/example/socket/socket_connect_accept_tcp.c @@ -0,0 +1,463 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_connect_accept_tcp.h" +#include "socket_util.h" + +int init_tcp_bind_listen_local_ip(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in addr = {0}; + int optval = 1; + + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, + OFP_IPPROTO_TCP); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create SEND socket (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, + OFP_IPPROTO_TCP); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_listen(*pfd_thread2, 10) == -1) { + OFP_ERR("Faild to listen (errno = %d)\n", ofp_errno); + return -1; + } + + return 0; +} + +int init_tcp_bind_listen_any(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in addr = {0}; + int optval = 1; + + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, + OFP_IPPROTO_TCP); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create SEND socket (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, + OFP_IPPROTO_TCP); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_listen(*pfd_thread2, 10) == -1) { + OFP_ERR("Faild to listen (errno = %d)\n", ofp_errno); + return -1; + } + + return 0; +} + +#ifdef INET6 +int init_tcp6_bind_listen_local_ip(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in6 addr = {0}; + int optval = 1; + + *pfd_thread1 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_listen(*pfd_thread2, 10) == -1) { + OFP_ERR("Faild to listen (errno = %d)\n", ofp_errno); + return -1; + } + + return 0; +} + +int init_tcp6_bind_listen_any(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in6 addr = {0}; + int optval = 1; + + *pfd_thread1 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_listen(*pfd_thread2, 10) == -1) { + OFP_ERR("Faild to listen (errno = %d)\n", ofp_errno); + return -1; + } + + return 0; +} +#endif /* INET6 */ + +int connect_tcp4_local_ip(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_tcp4_any(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int accept_tcp4(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + ofp_socklen_t addr_len = sizeof(addr); + int fd_accepted = -1; + + fd_accepted = ofp_accept(fd, (struct ofp_sockaddr *)&addr, + &addr_len); + + if (fd_accepted == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + + if (addr_len != sizeof(struct ofp_sockaddr_in)) { + OFP_ERR("Faild to accept: invalid address size %d\n", + addr_len); + return -1; + } + + OFP_INFO("Address: 0x%x, port: %d.\n", + odp_be_to_cpu_32(addr.sin_addr.s_addr), + odp_be_to_cpu_16(addr.sin_port)); + + if (ofp_close(fd_accepted) == -1) { + OFP_ERR("Faild to close accepted socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int accept_tcp4_null_addr(int fd) +{ + int fd_accepted; + + fd_accepted = ofp_accept(fd, NULL, NULL); + + if (fd_accepted == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_close(fd_accepted) == -1) { + OFP_ERR("Faild to close accepted socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + + +#ifdef INET6 +int connect_tcp6_local_ip(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_tcp6_any(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int accept_tcp6(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + ofp_socklen_t addr_len = sizeof(addr); + int fd_accepted = -1; + + fd_accepted = ofp_accept(fd, (struct ofp_sockaddr *)&addr, + &addr_len); + if (fd_accepted == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + + if (addr_len != sizeof(struct ofp_sockaddr_in6)) { + OFP_ERR("Faild to accept: invalid address size %d\n", + addr_len); + return -1; + } + + OFP_INFO("Address: %x:%x:%x:%x, port: %d.\n", + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[0]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[1]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[2]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[3]), + odp_be_to_cpu_16(addr.sin6_port)); + + if (ofp_close(fd_accepted) == -1) { + OFP_ERR("Faild to close accepted socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int accept_tcp6_null_addr(int fd) +{ + int fd_accepted = -1; + + fd_accepted = ofp_accept(fd, NULL, NULL); + if (fd_accepted == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_close(fd_accepted) == -1) { + OFP_ERR("Faild to close accepted socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /* INET6 */ + diff --git a/example/socket/socket_connect_accept_tcp.h b/example/socket/socket_connect_accept_tcp.h new file mode 100644 index 00000000..6166d75c --- /dev/null +++ b/example/socket/socket_connect_accept_tcp.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_CONNECT_ACCEPT_H__ +#define __SOCKET_CONNECT_ACCEPT_H__ + +int init_tcp_bind_listen_local_ip(int *pfd_thread1, int *pfd_thread2); +int init_tcp_bind_listen_any(int *pfd_thread1, int *pfd_thread2); + +#ifdef INET6 +int init_tcp6_bind_listen_local_ip(int *pfd_thread1, int *pfd_thread2); +int init_tcp6_bind_listen_any(int *pfd_thread1, int *pfd_thread2); +#endif /* INET6 */ + +int connect_tcp4_local_ip(int fd); +int connect_tcp4_any(int fd); + +int accept_tcp4(int fd); +int accept_tcp4_null_addr(int fd); + +#ifdef INET6 +int connect_tcp6_local_ip(int fd); +int connect_tcp6_any(int fd); + +int accept_tcp6(int fd); +int accept_tcp6_null_addr(int fd); +#endif /* INET6 */ + +#endif /* __SOCKET_CONNECT_ACCEPT_H__ */ + diff --git a/example/socket/socket_connect_udp.c b/example/socket/socket_connect_udp.c new file mode 100644 index 00000000..9d2faa8b --- /dev/null +++ b/example/socket/socket_connect_udp.c @@ -0,0 +1,308 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_connect_udp.h" +#include "socket_util.h" + +int connect_udp4(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 2); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +int connect_bind_udp4(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 3); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_shutdown_udp4(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 2); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_shutdown(fd, OFP_SHUT_RDWR) == -1) { + OFP_ERR("Faild to shutdown socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_shutdown_bind_udp4(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 3); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_shutdown(fd, OFP_SHUT_RDWR) == -1) { + OFP_ERR("Faild to shutdown socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + + +#ifdef INET6 +int connect_udp6(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 2); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_bind_udp6(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 3); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_shutdown_udp6(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 2); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_shutdown(fd, OFP_SHUT_RDWR) == -1) { + OFP_ERR("Faild to shutdown socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_shutdown_bind_udp6(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 3); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_shutdown(fd, OFP_SHUT_RDWR) == -1) { + OFP_ERR("Faild to shutdown socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_shutdown_udp6_any(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 2); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_shutdown(fd, OFP_SHUT_RDWR) == -1) { + OFP_ERR("Faild to shutdown socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int connect_shutdown_bind_udp6_any(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_bind(fd, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 3); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_shutdown(fd, OFP_SHUT_RDWR) == -1) { + OFP_ERR("Faild to shutdown socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /* INET6 */ diff --git a/example/socket/socket_connect_udp.h b/example/socket/socket_connect_udp.h new file mode 100644 index 00000000..016e4929 --- /dev/null +++ b/example/socket/socket_connect_udp.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_CONNECT_UDP_H__ +#define __SOCKET_CONNECT_UDP_H__ + +int connect_udp4(int fd); +int connect_bind_udp4(int fd); +int connect_shutdown_udp4(int fd); +int connect_shutdown_bind_udp4(int fd); + +#ifdef INET6 +int connect_udp6(int fd); +int connect_bind_udp6(int fd); + +int connect_shutdown_udp6(int fd); +int connect_shutdown_bind_udp6(int fd); + +int connect_shutdown_udp6_any(int fd); +int connect_shutdown_bind_udp6_any(int fd); +#endif /* INET6 */ + + +#endif /* __SOCKET_CONNECT_UDP_H__ */ + diff --git a/example/socket/socket_create_close.c b/example/socket/socket_create_close.c new file mode 100644 index 00000000..06109a37 --- /dev/null +++ b/example/socket/socket_create_close.c @@ -0,0 +1,148 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_create_close.h" + +int create_close_udp(int fd) +{ + fd = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, OFP_IPPROTO_UDP); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int create_close_udp_noproto(int fd) +{ + fd = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, 0); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int create_close_tcp(int fd) +{ + fd = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, OFP_IPPROTO_TCP); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int create_close_tcp_noproto(int fd) +{ + fd = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, 0); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +#ifdef INET6 +int create_close_udp6(int fd) +{ + fd = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, OFP_IPPROTO_UDP); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int create_close_udp6_noproto(int fd) +{ + fd = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, 0); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int create_close_tcp6(int fd) +{ + fd = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, OFP_IPPROTO_TCP); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int create_close_tcp6_noproto(int fd) +{ + fd = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (fd == -1) { + OFP_ERR("Faild to create socket (errno = %d)\n", ofp_errno); + return -1; + } + + if (ofp_close(fd) == -1) { + OFP_ERR("Faild to close socket (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /* INET6 */ + diff --git a/example/socket/socket_create_close.h b/example/socket/socket_create_close.h new file mode 100644 index 00000000..74324041 --- /dev/null +++ b/example/socket/socket_create_close.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_CREATE_CLOSE_H__ +#define __SOCKET_CREATE_CLOSE_H__ + +int create_close_udp(int fd); +int create_close_udp_noproto(int fd); + +int create_close_tcp(int fd); +int create_close_tcp_noproto(int fd); + +#ifdef INET6 +int create_close_udp6(int fd); +int create_close_udp6_noproto(int fd); + +int create_close_tcp6(int fd); +int create_close_tcp6_noproto(int fd); +#endif /* INET6 */ + +#endif /* __SOCKET_CREATE_CLOSE_H__ */ + diff --git a/example/socket/socket_listen_tcp.c b/example/socket/socket_listen_tcp.c new file mode 100644 index 00000000..d00d158d --- /dev/null +++ b/example/socket/socket_listen_tcp.c @@ -0,0 +1,128 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_listen_tcp.h" +#include "socket_util.h" + +int init_tcp_bind_local_ip(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in addr = {0}; + int optval = 1; + + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, + OFP_IPPROTO_TCP); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create SEND socket (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread1, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, + OFP_IPPROTO_TCP); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + return -1; + } + + optval = 1; + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEADDR, + &optval, sizeof(optval)); + ofp_setsockopt(*pfd_thread2, OFP_SOL_SOCKET, OFP_SO_REUSEPORT, + &optval, sizeof(optval)); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +#ifdef INET6 +int init_tcp6_bind_local_ip(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in6 addr = {0}; + + *pfd_thread1 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET6, OFP_SOCK_STREAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} +#endif /* INET6 */ + +int listen_tcp(int fd) +{ + if (ofp_listen(fd, 10) == -1) { + OFP_ERR("Faild to listen (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} diff --git a/example/socket/socket_listen_tcp.h b/example/socket/socket_listen_tcp.h new file mode 100644 index 00000000..ed7c916b --- /dev/null +++ b/example/socket/socket_listen_tcp.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_LISTEN_TCP_H__ +#define __SOCKET_LISTEN_TCP_H__ + +int init_tcp_bind_local_ip(int *pfd_thread1, int *pfd_thread2); + +#ifdef INET6 +int init_tcp6_bind_local_ip(int *pfd_thread1, int *pfd_thread2); +#endif /* INET6 */ + +int listen_tcp(int fd); + +#endif /* __SOCKET_LISTEN_TCP_H__ */ + diff --git a/example/socket/socket_main.c b/example/socket/socket_main.c new file mode 100644 index 00000000..b0da5099 --- /dev/null +++ b/example/socket/socket_main.c @@ -0,0 +1,650 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include + +#include "ofp.h" +#include "socket_util.h" +#include "suite_framework.h" +#include "socket_create_close.h" +#include "socket_bind.h" +#include "socket_shutdown.h" +#include "socket_connect_udp.h" +#include "socket_send_sendto_udp.h" +#include "socket_send_recv_udp.h" +#include "socket_listen_tcp.h" +#include "socket_connect_accept_tcp.h" +#include "socket_send_recv_tcp.h" +#include "socket_select.h" +#include "socket_sigevent.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); + +/*static int null_function(int fd) +{ + (void)fd; + + return 0; +}*/ + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + memset(&app_init_params, 0, sizeof(app_init_params)); + app_init_params.linux_core_id = 0; + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + + ofp_init_global(&app_init_params); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + odph_linux_pthread_create(thread_tbl, + &cpumask, + default_event_dispatcher, + ofp_eth_vlan_processing); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + sleep(5); + + ofp_loglevel = OFP_LOG_INFO; + + config_suite_framework(app_init_params.linux_core_id); + + OFP_INFO("\n\nSuite: IPv4 UDP socket: create and close.\n\n"); + if (!init_suite(NULL)) + run_suite(create_close_udp, create_close_udp_noproto); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 TCP socket: create and close.\n\n"); + if (!init_suite(NULL)) + run_suite(create_close_tcp, create_close_tcp_noproto); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP socket: create and close.\n\n"); + if (!init_suite(NULL)) + run_suite(create_close_udp6, create_close_udp6_noproto); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 TCP socket: create and close.\n\n"); + if (!init_suite(NULL)) + run_suite(create_close_tcp6, create_close_tcp6_noproto); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /* INET6 */ + + OFP_INFO("\n\nSuite: IPv4 UDP socket: bind.\n\n"); + if (!init_suite(init_udp_create_socket)) + run_suite(bind_ip4_local_ip, bind_ip4_any); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 TCP socket: bind.\n\n"); + if (!init_suite(init_tcp_create_socket)) + run_suite(bind_ip4_local_ip, bind_ip4_any); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP socket: bind.\n\n"); + if (!init_suite(init_udp6_create_socket)) + run_suite(bind_ip6_local_ip, bind_ip6_any); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 TCP socket: bind.\n\n"); + if (!init_suite(init_tcp6_create_socket)) + run_suite(bind_ip6_local_ip, bind_ip6_any); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /* INET6 */ + + OFP_INFO("\n\nSuite: IPv4 UDP socket: shutdown.\n\n"); + if (!init_suite(init_udp_create_socket)) + run_suite(shutdown_socket, shutdown_socket); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 TCP socket: shutdown (no connection).\n\n"); + if (!init_suite(init_tcp_create_socket)) + run_suite(shutdown_socket, shutdown_socket); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP socket: shutdown.\n\n"); + if (!init_suite(init_udp6_create_socket)) + run_suite(shutdown_socket, shutdown_socket); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 TCP socket: shutdown (no connection).\n\n"); + if (!init_suite(init_tcp6_create_socket)) + run_suite(shutdown_socket, shutdown_socket); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /* INET6 */ + + OFP_INFO("\n\nSuite: IPv4 UDP socket: connect.\n\n"); + if (!init_suite(init_udp_create_socket)) + run_suite(connect_udp4, connect_bind_udp4); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 UDP socket: connect + shutdown.\n\n"); + if (!init_suite(init_udp_create_socket)) + run_suite(connect_shutdown_udp4, connect_shutdown_bind_udp4); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP socket: connect.\n\n"); + if (!init_suite(init_udp6_create_socket)) + run_suite(connect_udp6, connect_bind_udp6); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 UDP socket: connect + shutdown.\n\n"); + if (!init_suite(init_udp6_create_socket)) + run_suite(connect_shutdown_udp6, connect_shutdown_bind_udp6); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 UDP socket: connect + shutdown + any.\n\n"); + if (!init_suite(init_udp6_create_socket)) + run_suite(connect_shutdown_udp6_any, + connect_shutdown_bind_udp6_any); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /* INET6 */ + + OFP_INFO("\n\nSuite: IPv4 UDP socket BIND local address: send + sendto\n\n"); + if (!init_suite(init_udp_bind_local_ip)) + run_suite(send_ip4_udp_local_ip, sendto_ip4_udp_local_ip); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 UDP socket bind any address: send + sendto\n\n"); + if (!init_suite(init_udp_bind_any)) + run_suite(send_ip4_udp_any, sendto_ip4_udp_any); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP socket BIND local address: send + sendto\n\n"); + if (!init_suite(init_udp6_bind_local_ip)) + run_suite(send_ip6_udp_local_ip, sendto_ip6_udp_local_ip); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 UDP socket bind any address: send + sendto\n\n"); + if (!init_suite(init_udp6_bind_any)) + run_suite(send_ip6_udp_any, sendto_ip6_udp_any); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /* INET6 */ + + OFP_INFO("\n\nSuite: IPv4 UDP bind local IP: sendto + recv.\n\n"); + if (!init_suite(init_udp_local_ip)) + run_suite(send_udp_local_ip, recv_udp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 UDP bind local IP: sendto + recvfrom.\n\n"); + if (!init_suite(init_udp_bind_local_ip)) + run_suite(send_udp_local_ip, recvfrom_udp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 UDP bind any address: sendto + recv.\n\n"); + if (!init_suite(init_udp_any)) + run_suite(send_udp_any, recv_udp); + end_suite(); + + OFP_INFO("\n\nSuite: IPv4 UDP bind any address: sendto + recvfrom.\n\n"); + if (!init_suite(init_udp_bind_any)) + run_suite(send_udp_any, recvfrom_udp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 UDP bind any address: sendto + recvfrom(NULL addr).\n\n"); + if (!init_suite(init_udp_bind_any)) + run_suite(send_udp_any, recvfrom_udp_null_addr); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP bind local IP: sendto + recv.\n\n"); + if (!init_suite(init_udp6_bind_local_ip)) + run_suite(send_udp6_local_ip, recv_udp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 UDP bind local IP: sendto + recvfrom.\n\n"); + if (!init_suite(init_udp6_bind_local_ip)) + run_suite(send_udp6_local_ip, recvfrom_udp6); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 UDP bind any IP: sendto + recv.\n\n"); + if (!init_suite(init_udp6_bind_any)) + run_suite(send_udp6_any, recv_udp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 UDP bind any IP: sendto + recvfrom.\n\n"); + if (!init_suite(init_udp6_bind_any)) + run_suite(send_udp6_any, recvfrom_udp6); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 UDP bind any IP: sendto + recvfrom(NULL addr).\n\n"); + if (!init_suite(init_udp6_bind_any)) + run_suite(send_udp6_any, recvfrom_udp_null_addr); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + OFP_INFO("\n\nSuite: IPv4 TCP socket local IP: listen.\n\n"); + if (!init_suite(init_tcp_bind_local_ip)) + run_suite(listen_tcp, listen_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 TCP socket local IP: listen.\n\n"); + if (!init_suite(init_tcp6_bind_local_ip)) + run_suite(listen_tcp, listen_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + OFP_INFO("\n\nSuite: IPv4 TCP socket local IP: connect + accept.\n\n"); + if (!init_suite(init_tcp_bind_listen_local_ip)) + run_suite(connect_tcp4_local_ip, accept_tcp4); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 TCP socket any IP: connect + accept.\n\n"); + if (!init_suite(init_tcp_bind_listen_any)) + run_suite(connect_tcp4_any, accept_tcp4); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 TCP socket local IP: connect + accept null address.\n\n"); + if (!init_suite(init_tcp_bind_listen_local_ip)) + run_suite(connect_tcp4_local_ip, accept_tcp4_null_addr); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 TCP socket local IP: connect + accept.\n\n"); + if (!init_suite(init_tcp6_bind_listen_local_ip)) + run_suite(connect_tcp6_local_ip, accept_tcp6); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 TCP socket any IP: connect + accept.\n\n"); + if (!init_suite(init_tcp6_bind_listen_any)) + run_suite(connect_tcp6_any, accept_tcp6); + end_suite(); + OFP_INFO("Test ended.\n"); + + + OFP_INFO("\n\nSuite: IPv6 TCP socket local IP: connect + accept null address.\n\n"); + if (!init_suite(init_tcp6_bind_listen_local_ip)) + run_suite(connect_tcp6_local_ip, accept_tcp6_null_addr); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + OFP_INFO("\n\nSuite: IPv4 TCP socket local IP: send + recv.\n\n"); + if (!init_suite(init_tcp_bind_listen_local_ip)) + run_suite(send_tcp4_local_ip, receive_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 TCP socket any IP: send + recv.\n\n"); + if (!init_suite(init_tcp_bind_listen_any)) + run_suite(send_tcp4_any, receive_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 TCP socket local IP: send + recv.\n\n"); + if (!init_suite(init_tcp6_bind_listen_local_ip)) + run_suite(send_tcp6_local_ip, receive_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 TCP socket any IP: send + recv.\n\n"); + if (!init_suite(init_tcp6_bind_listen_any)) + run_suite(send_tcp6_any, receive_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + OFP_INFO("\n\nSuite: IPv4 UDP bind local IP: select + recv.\n\n"); + if (!init_suite(init_udp_bind_local_ip)) + run_suite(send_udp_local_ip, select_recv_udp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 TCP bind local IP: select + accept + recv.\n\n"); + if (!init_suite(init_tcp_bind_listen_local_ip)) + run_suite(send_tcp4_local_ip, select_recv_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP bind local IP: select + recv.\n\n"); + if (!init_suite(init_udp6_bind_local_ip)) + run_suite(send_udp6_local_ip, select_recv_udp); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv6 TCP bind local IP: select + accept + recv.\n\n"); + if (!init_suite(init_tcp6_bind_listen_local_ip)) + run_suite(send_tcp6_local_ip, select_recv_tcp); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + OFP_INFO("\n\nSuite: IPv4 UDP bindlocal IP: select + recv x2.\n\n"); + if (!init_suite(init_udp_bind_local_ip)) + run_suite(send_udp_local_ip, select_recv_udp_2); + end_suite(); + OFP_INFO("Test ended.\n"); + + OFP_INFO("\n\nSuite: IPv4 UDP bind local IP: socket_sigevent rcv.\n\n"); + if (!init_suite(init_udp_bind_local_ip)) + run_suite(recv_send_udp_local_ip, socket_sigevent_udp4); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 UDP bind local IP: socket_sigevent rcv.\n\n"); + if (!init_suite(init_udp6_bind_local_ip)) + run_suite(recv_send_udp6_local_ip, socket_sigevent_udp6); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + OFP_INFO("\n\nSuite: IPv4 TCP bind local IP: socket_sigevent rcv.\n\n"); + if (!init_suite(init_tcp_bind_listen_local_ip)) + run_suite(connect_recv_send_tcp_local_ip, socket_sigevent_tcp_rcv); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 TCP bind local IP: socket_sigevent rcv.\n\n"); + if (!init_suite(init_tcp6_bind_listen_local_ip)) + run_suite(connect_recv_send_tcp6_local_ip, socket_sigevent_tcp_rcv); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + OFP_INFO("\n\nSuite: IPv4 TCP bind local IP: socket_sigevent accept.\n\n"); + if (!init_suite(init_tcp_bind_listen_local_ip)) + run_suite(connect_tcp_delayed_local_ip, socket_sigevent_tcp_accept); + end_suite(); + OFP_INFO("Test ended.\n"); + +#ifdef INET6 + OFP_INFO("\n\nSuite: IPv6 TCP bind local IP: socket_sigevent accept.\n\n"); + if (!init_suite(init_tcp6_bind_listen_local_ip)) + run_suite(connect_tcp6_delayed_local_ip, + socket_sigevent_tcp_accept); + end_suite(); + OFP_INFO("Test ended.\n"); +#endif /*INET6*/ + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/socket/socket_select.c b/example/socket/socket_select.c new file mode 100644 index 00000000..d564016f --- /dev/null +++ b/example/socket/socket_select.c @@ -0,0 +1,211 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_select.h" +#include "socket_util.h" + +int select_recv_udp(int fd) +{ + + char buf[20]; + int len = sizeof(buf); + struct ofp_timeval timeout; + int ret_select = 0; + ofp_fd_set read_fd; + + OFP_FD_ZERO(&read_fd); + OFP_FD_SET(fd, &read_fd); + + timeout.tv_sec = 1; + timeout.tv_usec = 0; + + ret_select = ofp_select(fd + 1, &read_fd, NULL, NULL, &timeout); + if (ret_select == -1) { + OFP_ERR("Faild to select (errno = %d)\n", ofp_errno); + return -1; + } + if (ret_select != 1) { + OFP_ERR("Faild to select: invalid value returned %d\n", + ret_select); + return -1; + } + + if (!OFP_FD_ISSET(fd, &read_fd)) { + OFP_ERR("Faild: socket is not selected\n"); + return -1; + } + + OFP_INFO("ofp_select() returned %d; socket is selected.\n", + ret_select); + + len = ofp_recv(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int select_recv_tcp(int fd) +{ + char buf[20]; + int len = sizeof(buf); + int fd_accepted = -1; + struct ofp_timeval timeout; + int ret_select = 0; + ofp_fd_set read_fd; + + OFP_FD_ZERO(&read_fd); + OFP_FD_SET(fd, &read_fd); + + timeout.tv_sec = 0; + timeout.tv_usec = 200000; + + ret_select = ofp_select(fd + 1, &read_fd, NULL, NULL, &timeout); + if (ret_select == -1) { + OFP_ERR("Faild to select (errno = %d)\n", ofp_errno); + return -1; + } + if (ret_select != 1) { + OFP_ERR("Faild to select: invalid value returned %d\n", + ret_select); + return -1; + } + + if (!OFP_FD_ISSET(fd, &read_fd)) { + OFP_ERR("Faild: socket is not selected\n"); + return -1; + } + OFP_INFO("ofp_select() returned %d; socket is selected.\n", + ret_select); + + fd_accepted = ofp_accept(fd, NULL, NULL); + + if (fd_accepted == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + + len = ofp_recv(fd_accepted, buf, sizeof(buf), 0); + if (len == -1) { + OFP_ERR("Faild to recv (errno = %d)\n", + ofp_errno); + ofp_close(fd_accepted); + return -1; + } + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + if (ofp_close(fd_accepted) == -1) { + OFP_ERR("Faild to close accepted socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int select_recv_udp_2(int fd) +{ + char buf[20]; + int len = sizeof(buf); + struct ofp_timeval timeout; + int ret_select = 0; + ofp_fd_set read_fd; + struct ofp_sockaddr_in addr; + int fd2 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + int fd3 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 2); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_bind(fd3, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + ofp_close(fd2); + ofp_close(fd3); + return -1; + } + + strcpy(buf, "socket_2"); + if (ofp_sendto(fd2, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + ofp_close(fd2); + ofp_close(fd3); + return -1; + } + + OFP_FD_ZERO(&read_fd); + OFP_FD_SET(fd, &read_fd); + OFP_FD_SET(fd3, &read_fd); + + timeout.tv_sec = 1; + timeout.tv_usec = 0; + + ret_select = ofp_select(fd + 1, &read_fd, NULL, NULL, &timeout); + if (ret_select == -1) { + OFP_ERR("Faild to select (errno = %d)\n", ofp_errno); + ofp_close(fd2); + ofp_close(fd3); + return -1; + } + + if (!OFP_FD_ISSET(fd, &read_fd)) { + OFP_ERR("Faild: socket is not selected\n"); + return -1; + } + if (!OFP_FD_ISSET(fd3, &read_fd)) { + OFP_ERR("Faild: socket is not selected\n"); + return -1; + } + OFP_INFO("ofp_select() returned %d; sockets are selected.\n", + ret_select); + + len = ofp_recv(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + ofp_close(fd2); + ofp_close(fd3); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data1 (%s, len = %d) was received.\n", buf, len); + + len = ofp_recv(fd3, buf, sizeof(buf), 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + ofp_close(fd2); + ofp_close(fd3); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data2 (%s, len = %d) was received.\n", buf, len); + + ofp_close(fd2); + ofp_close(fd3); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + diff --git a/example/socket/socket_select.h b/example/socket/socket_select.h new file mode 100644 index 00000000..045d45a6 --- /dev/null +++ b/example/socket/socket_select.h @@ -0,0 +1,17 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_SELECT_H__ +#define __SOCKET_SELECT_H__ + +int select_recv_udp(int fd); +int select_recv_tcp(int fd); + +int select_recv_udp_2(int fd); + +#endif /* __SOCKET_SELECT_H__ */ + diff --git a/example/socket/socket_send_recv_tcp.c b/example/socket/socket_send_recv_tcp.c new file mode 100644 index 00000000..d285831e --- /dev/null +++ b/example/socket/socket_send_recv_tcp.c @@ -0,0 +1,155 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_send_recv_tcp.h" +#include "socket_util.h" + +int send_tcp4_local_ip(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + const char *buf = "socket_test"; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + sleep(1); /* ToFix: connect is not blocking*/ + + if (ofp_send(fd, buf, strlen(buf) + 1, 0) == -1) { + OFP_ERR("Faild to send (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int send_tcp4_any(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + const char *buf = "socket_test"; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + sleep(1); /* ToFix: connect is not blocking*/ + + if (ofp_send(fd, buf, strlen(buf) + 1, 0) == -1) { + OFP_ERR("Faild to send (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +#ifdef INET6 +int send_tcp6_local_ip(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + const char *buf = "socket_test"; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + sleep(1); /* ToFix: connect is not blocking*/ + + if (ofp_send(fd, buf, strlen(buf) + 1, 0) == -1) { + OFP_ERR("Faild to send (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int send_tcp6_any(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + const char *buf = "socket_test"; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + sleep(1); /* ToFix: connect is not blocking*/ + + if (ofp_send(fd, buf, strlen(buf) + 1, 0) == -1) { + OFP_ERR("Faild to send (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /*INET6*/ + +int receive_tcp(int fd) +{ + char buf[20]; + int len = sizeof(buf); + int fd_accepted = -1; + + fd_accepted = ofp_accept(fd, NULL, NULL); + + if (fd_accepted == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + + len = ofp_recv(fd_accepted, buf, sizeof(buf), 0); + if (len == -1) { + OFP_ERR("Faild to recv (errno = %d)\n", + ofp_errno); + ofp_close(fd_accepted); + return -1; + } + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + if (ofp_close(fd_accepted) == -1) { + OFP_ERR("Faild to close accepted socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + diff --git a/example/socket/socket_send_recv_tcp.h b/example/socket/socket_send_recv_tcp.h new file mode 100644 index 00000000..020c7e3f --- /dev/null +++ b/example/socket/socket_send_recv_tcp.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_SEND_RECV_TCP_H__ +#define __SOCKET_SEND_RECV_TCP_H__ + +int send_tcp4_local_ip(int fd); +int send_tcp4_any(int fd); + +#ifdef INET6 +int send_tcp6_local_ip(int fd); +int send_tcp6_any(int fd); +#endif /* INET6 */ + +int receive_tcp(int fd); + +#endif /* __SOCKET_SEND_RECV_TCP_H__ */ + diff --git a/example/socket/socket_send_recv_udp.c b/example/socket/socket_send_recv_udp.c new file mode 100644 index 00000000..222b0821 --- /dev/null +++ b/example/socket/socket_send_recv_udp.c @@ -0,0 +1,273 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_send_recv_udp.h" +#include "socket_util.h" + +int init_udp_local_ip(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in addr = {0}; + + + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create SEND socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +int init_udp_any(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in addr = {0}; + + + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create SEND socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +int send_udp_local_ip(int fd) +{ + const char *buf = "socket_test"; + struct ofp_sockaddr_in dest_addr = {0}; + + dest_addr.sin_len = sizeof(struct ofp_sockaddr_in); + dest_addr.sin_family = OFP_AF_INET; + dest_addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + dest_addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int recv_udp(int fd) +{ + char buf[20]; + int len = sizeof(buf); + + len = ofp_recv(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int send_udp_any(int fd) +{ + const char *buf = "socket_test"; + struct ofp_sockaddr_in dest_addr = {0}; + + dest_addr.sin_len = sizeof(struct ofp_sockaddr_in); + dest_addr.sin_family = OFP_AF_INET; + dest_addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + dest_addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int recvfrom_udp(int fd) +{ + char buf[20]; + int len = sizeof(buf); + struct ofp_sockaddr_in addr = {0}; + ofp_socklen_t addr_len = 0; + + len = ofp_recvfrom(fd, buf, len, 0, + (struct ofp_sockaddr *)&addr, &addr_len); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + if (addr_len != sizeof(addr)) { + OFP_ERR("Faild to rcv source address: %d (errno = %d)\n", + addr_len, ofp_errno); + return -1; + } + + OFP_INFO("Data was received on address 0x%x, port = %d.\n", + odp_be_to_cpu_32(addr.sin_addr.s_addr), + odp_be_to_cpu_16(addr.sin_port)); + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int recvfrom_udp_null_addr(int fd) +{ + char buf[20]; + int len = sizeof(buf); + + len = ofp_recvfrom(fd, buf, len, 0, NULL, NULL); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +#ifdef INET6 +int send_udp6_local_ip(int fd) +{ + struct ofp_sockaddr_in6 dest_addr = {0}; + const char *buf = "socket_snd2"; + + dest_addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + dest_addr.sin6_family = OFP_AF_INET6; + dest_addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&dest_addr.sin6_addr); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int send_udp6_any(int fd) +{ + struct ofp_sockaddr_in6 dest_addr = {0}; + const char *buf = "socket_test"; + + dest_addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + dest_addr.sin6_family = OFP_AF_INET6; + dest_addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + dest_addr.sin6_addr = ofp_in6addr_any; + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int recvfrom_udp6(int fd) +{ + char buf[20]; + int len = sizeof(buf); + struct ofp_sockaddr_in6 addr = {0}; + ofp_socklen_t addr_len = 0; + + len = ofp_recvfrom(fd, buf, len, 0, + (struct ofp_sockaddr *)&addr, &addr_len); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + if (addr_len != sizeof(addr)) { + OFP_ERR("Faild to rcv source address: %d (errno = %d)\n", + addr_len, ofp_errno); + return -1; + } + + OFP_INFO("Data was received on address %x:%x:%x:%x, port = %d.\n", + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[0]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[1]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[2]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[3]), + odp_be_to_cpu_16(addr.sin6_port)); + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /*INET6*/ diff --git a/example/socket/socket_send_recv_udp.h b/example/socket/socket_send_recv_udp.h new file mode 100644 index 00000000..1711fc62 --- /dev/null +++ b/example/socket/socket_send_recv_udp.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_SEND_RECV_UDP_H__ +#define __SOCKET_SEND_RECV_UDP_H__ + +int init_udp_local_ip(int *pfd_thread1, int *pfd_thread2); +int init_udp_any(int *pfd_thread1, int *pfd_thread2); + +int send_udp_local_ip(int fd); +int send_udp_any(int fd); +int recv_udp(int fd); +int recvfrom_udp(int fd); +int recvfrom_udp_null_addr(int fd); + +#ifdef INET6 +int send_udp6_local_ip(int fd); +int send_udp6_any(int fd); +int recvfrom_udp6(int fd); +#endif /* INET6 */ + +#endif /* __SOCKET_SEND_RECV_UDP_H__ */ + diff --git a/example/socket/socket_send_sendto_udp.c b/example/socket/socket_send_sendto_udp.c new file mode 100644 index 00000000..d4e14e53 --- /dev/null +++ b/example/socket/socket_send_sendto_udp.c @@ -0,0 +1,396 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_send_sendto_udp.h" +#include "socket_util.h" + +int init_udp_bind_local_ip(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in addr = {0}; + + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create SEND socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} +int init_udp_bind_any(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in addr = {0}; + + + *pfd_thread1 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create SEND socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, + OFP_IPPROTO_UDP); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create RCV socket (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +#ifdef INET6 +int init_udp6_bind_local_ip(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in6 addr = {0}; + + *pfd_thread1 = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +int init_udp6_bind_any(int *pfd_thread1, int *pfd_thread2) +{ + struct ofp_sockaddr_in6 addr = {0}; + + *pfd_thread1 = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, 0); + if (*pfd_thread1 == -1) { + OFP_ERR("Faild to create socket 1 (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_bind(*pfd_thread1, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + *pfd_thread2 = ofp_socket(OFP_AF_INET6, OFP_SOCK_DGRAM, 0); + if (*pfd_thread2 == -1) { + OFP_ERR("Faild to create socket 2 (errno = %d)\n", + ofp_errno); + return -1; + } + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_bind(*pfd_thread2, (const struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to bind socket (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} +#endif /* INET6 */ + +int send_ip4_udp_local_ip(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + const char *buf = "socket_test"; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_send(fd, buf, strlen(buf), 0) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} +int sendto_ip4_udp_local_ip(int fd) +{ + struct ofp_sockaddr_in dest_addr = {0}; + const char *buf = "socket_test"; + + dest_addr.sin_len = sizeof(struct ofp_sockaddr_in); + dest_addr.sin_family = OFP_AF_INET; + dest_addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + dest_addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int send_ip4_udp_any(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + const char *buf = "socket_test"; + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_send(fd, buf, strlen(buf), 0) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} +int sendto_ip4_udp_any(int fd) +{ + struct ofp_sockaddr_in dest_addr = {0}; + const char *buf = "socket_test"; + + dest_addr.sin_len = sizeof(struct ofp_sockaddr_in); + dest_addr.sin_family = OFP_AF_INET; + dest_addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + dest_addr.sin_addr.s_addr = OFP_INADDR_ANY; + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +#ifdef INET6 +int send_ip6_udp_local_ip(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + const char *buf = "socket_test"; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + if (ofp_send(fd, buf, strlen(buf), 0) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} +int sendto_ip6_udp_local_ip(int fd) +{ + struct ofp_sockaddr_in6 dest_addr = {0}; + const char *buf = "socket_snd2"; + + dest_addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + dest_addr.sin6_family = OFP_AF_INET6; + dest_addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&dest_addr.sin6_addr); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int send_ip6_udp_any(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + const char *buf = "socket_test"; + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin6_addr = ofp_in6addr_any; + + if (ofp_connect(fd, (const struct ofp_sockaddr *)&addr, + sizeof(struct ofp_sockaddr_in6)) == -1) { + OFP_ERR("Faild to connect socket (errno = %d)\n", + ofp_errno); + return -1; + } + + if (ofp_send(fd, buf, strlen(buf), 0) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +int sendto_ip6_udp_any(int fd) +{ + struct ofp_sockaddr_in6 dest_addr = {0}; + const char *buf = "socket_test"; + + dest_addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + dest_addr.sin6_family = OFP_AF_INET6; + dest_addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + dest_addr.sin6_addr = ofp_in6addr_any; + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /* INET6 */ diff --git a/example/socket/socket_send_sendto_udp.h b/example/socket/socket_send_sendto_udp.h new file mode 100644 index 00000000..72c2a90c --- /dev/null +++ b/example/socket/socket_send_sendto_udp.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_SEND_SENDTO_UDP_H__ +#define __SOCKET_SEND_SENDTO_UDP_H__ + +int init_udp_bind_local_ip(int *pfd_thread1, int *pfd_thread2); +int init_udp_bind_any(int *pfd_thread1, int *pfd_thread2); + +#ifdef INET6 +int init_udp6_bind_local_ip(int *pfd_thread1, int *pfd_thread2); +int init_udp6_bind_any(int *pfd_thread1, int *pfd_thread2); +#endif /* INET6 */ + +int send_ip4_udp_local_ip(int fd); +int sendto_ip4_udp_local_ip(int fd); +int send_ip4_udp_any(int fd); +int sendto_ip4_udp_any(int fd); + +#ifdef INET6 +int send_ip6_udp_local_ip(int fd); +int sendto_ip6_udp_local_ip(int fd); +int send_ip6_udp_any(int fd); +int sendto_ip6_udp_any(int fd); +#endif /* INET6 */ + +#endif /* __SOCKET_SEND_SENDTO_UDP_H__ */ + diff --git a/example/socket/socket_shutdown.c b/example/socket/socket_shutdown.c new file mode 100644 index 00000000..02bb95c9 --- /dev/null +++ b/example/socket/socket_shutdown.c @@ -0,0 +1,23 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_shutdown.h" +#include "socket_util.h" + + +int shutdown_socket(int fd) +{ + if (ofp_shutdown(fd, OFP_SHUT_RDWR) == -1) { + OFP_ERR("Faild to shutdown socket (errno = %d)\n", + ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} diff --git a/example/socket/socket_shutdown.h b/example/socket/socket_shutdown.h new file mode 100644 index 00000000..d57e26c4 --- /dev/null +++ b/example/socket/socket_shutdown.h @@ -0,0 +1,14 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_SHUTDOWN_H__ +#define __SOCKET_SHUTDOWN_H__ + +int shutdown_socket(int fd); + +#endif /* __SOCKET_SHUTDOWN_H__ */ + diff --git a/example/socket/socket_sigevent.c b/example/socket/socket_sigevent.c new file mode 100644 index 00000000..8d91e3a6 --- /dev/null +++ b/example/socket/socket_sigevent.c @@ -0,0 +1,445 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "socket_sigevent.h" +#include "socket_util.h" + +int recv_send_udp_local_ip(int fd) +{ + char buf[20]; + int len = sizeof(buf); + struct ofp_sockaddr_in dest_addr = {0}; + + len = ofp_recv(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + dest_addr.sin_len = sizeof(struct ofp_sockaddr_in); + dest_addr.sin_family = OFP_AF_INET; + dest_addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + dest_addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + OFP_INFO("SUCCESS.\n"); + return 0; +} + +static void notify_udp_ipv4(union ofp_sigval sv); +int socket_sigevent_udp4(int fd) +{ + struct ofp_sigevent ev; + struct ofp_sock_sigval ss; + struct ofp_sockaddr_in dest_addr = {0}; + const char *buf = "sigevent_test"; + + ss.sockfd = fd; + ss.event = OFP_EVENT_INVALID; + ss.pkt = ODP_PACKET_INVALID; + + ev.ofp_sigev_notify = OFP_SIGEV_HOOK; + ev.ofp_sigev_notify_function = notify_udp_ipv4; + ev.ofp_sigev_value.sival_ptr = &ss; + ofp_socket_sigevent(&ev); + + dest_addr.sin_len = sizeof(struct ofp_sockaddr_in); + dest_addr.sin_family = OFP_AF_INET; + dest_addr.sin_port = odp_cpu_to_be_16(TEST_PORT); + dest_addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + sleep(2); + return 0; +} + +static void notify_udp_ipv4(union ofp_sigval sv) +{ + struct ofp_sockaddr_in addr = {0}; + ofp_socklen_t addr_len = sizeof(addr); + int data_len = 0; + uint8_t *data = NULL; + struct ofp_sock_sigval *ss; + int i; + + ss = (struct ofp_sock_sigval *)sv.sival_ptr; + + data = ofp_udp_packet_parse(ss->pkt, &data_len, + (struct ofp_sockaddr *)&addr, + &addr_len); + + OFP_INFO("UDP data received: size %d, data: ", data_len); + + for (i = 0; i < data_len; i++) + OFP_LOG_NO_CTX(OFP_LOG_INFO, "%c", data[i]); + + OFP_LOG_NO_CTX(OFP_LOG_INFO, "\n"); + + if (addr_len != sizeof(addr)) { + OFP_ERR("Faild to rcv source address: %d (errno = %d)\n", + addr_len, ofp_errno); + return; + } + + OFP_INFO("Data was received from address 0x%x, port = %d.\n", + odp_be_to_cpu_32(addr.sin_addr.s_addr), + odp_be_to_cpu_16(addr.sin_port)); + /* + * Mark ss->pkt invalid to indicate it was released or reused by us. + */ + odp_packet_free(ss->pkt); + ss->pkt = ODP_PACKET_INVALID; + OFP_INFO("SUCCESS.\n"); +} + +#ifdef INET6 +int recv_send_udp6_local_ip(int fd) +{ + char buf[20]; + int len = sizeof(buf); + struct ofp_sockaddr_in6 dest_addr = {0}; + + len = ofp_recv(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + buf[len] = 0; + OFP_INFO("Data (%s, len = %d) was received.\n", buf, len); + + dest_addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + dest_addr.sin6_family = OFP_AF_INET6; + dest_addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&dest_addr.sin6_addr); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("Data (%s) sent successfully.\n", buf); + OFP_INFO("SUCCESS.\n"); + return 0; +} + +static void notify_udp_ipv6(union ofp_sigval sv); +int socket_sigevent_udp6(int fd) +{ + struct ofp_sigevent ev; + struct ofp_sock_sigval ss; + struct ofp_sockaddr_in6 dest_addr = {0}; + const char *buf = "sigevent_test"; + + ss.sockfd = fd; + ss.event = OFP_EVENT_INVALID; + ss.pkt = ODP_PACKET_INVALID; + + ev.ofp_sigev_notify = OFP_SIGEV_HOOK; + ev.ofp_sigev_notify_function = notify_udp_ipv6; + ev.ofp_sigev_value.sival_ptr = &ss; + ofp_socket_sigevent(&ev); + + dest_addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + dest_addr.sin6_family = OFP_AF_INET6; + dest_addr.sin6_port = odp_cpu_to_be_16(TEST_PORT); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&dest_addr.sin6_addr); + + if (ofp_sendto(fd, buf, strlen(buf), 0, + (struct ofp_sockaddr *)&dest_addr, + sizeof(dest_addr)) == -1) { + OFP_ERR("Faild to send data(errno = %d)\n", ofp_errno); + return -1; + } + + sleep(2); + return 0; +} + +static void notify_udp_ipv6(union ofp_sigval sv) +{ + struct ofp_sockaddr_in6 addr = {0}; + ofp_socklen_t addr_len = sizeof(addr); + int data_len = 0; + uint8_t *data = NULL; + struct ofp_sock_sigval *ss; + int i; + + ss = (struct ofp_sock_sigval *)sv.sival_ptr; + + data = ofp_udp_packet_parse(ss->pkt, &data_len, + (struct ofp_sockaddr *)&addr, + &addr_len); + + OFP_INFO("UDP data received: size %d, data: ", data_len); + + for (i = 0; i < data_len; i++) + OFP_LOG_NO_CTX(OFP_LOG_INFO, "%c", data[i]); + + OFP_LOG_NO_CTX(OFP_LOG_INFO, "\n"); + + if (addr_len != sizeof(addr)) { + OFP_ERR("Faild to rcv source address: %d (errno = %d)\n", + addr_len, ofp_errno); + return; + } + + OFP_INFO("Address: %x:%x:%x:%x, port: %d.\n", + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[0]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[1]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[2]), + odp_be_to_cpu_32(addr.sin6_addr.ofp_s6_addr32[3]), + odp_be_to_cpu_16(addr.sin6_port)); + /* + * Mark ss->pkt invalid to indicate it was released or reused by us. + */ + ss->pkt = ODP_PACKET_INVALID; + OFP_INFO("SUCCESS.\n"); +} +#endif /* INET6 */ + +int connect_recv_send_tcp_local_ip(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + char buf[20]; + int len = sizeof(buf); + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + len = ofp_recv(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + len = ofp_send(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to send data. (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +#ifdef INET6 +int connect_recv_send_tcp6_local_ip(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + char buf[20]; + int len = sizeof(buf); + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + len = ofp_recv(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to rcv data(errno = %d)\n", ofp_errno); + return -1; + } + + len = ofp_send(fd, buf, len, 0); + if (len == -1) { + OFP_ERR("Faild to send data. (errno = %d)\n", ofp_errno); + return -1; + } + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /*INET6*/ + +static void notify_tcp_rcv(union ofp_sigval sv); +int socket_sigevent_tcp_rcv(int fd) +{ + struct ofp_sigevent ev; + struct ofp_sock_sigval ss; + const char *buf = "socket_test"; + int len = 0; + int fd_accept = -1; + + fd_accept = ofp_accept(fd, NULL, NULL); + if (fd == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + + ss.sockfd = fd_accept; + ss.event = OFP_EVENT_INVALID; + ss.pkt = ODP_PACKET_INVALID; + + ev.ofp_sigev_notify = OFP_SIGEV_HOOK; + ev.ofp_sigev_notify_function = notify_tcp_rcv; + ev.ofp_sigev_value.sival_ptr = &ss; + if (ofp_socket_sigevent(&ev) == -1) { + OFP_ERR("Faild to set sigevent(errno = %d)\n", ofp_errno); + return -1; + } + + len = ofp_send(fd_accept, buf, strlen(buf) + 1, 0); + if (len == -1) { + OFP_ERR("Faild to send data. (errno = %d)\n", ofp_errno); + return -1; + } + sleep(3); + ofp_close(fd_accept); + OFP_INFO("Socket sigevent set.\n"); + return 0; +} + +static void notify_tcp_rcv(union ofp_sigval sv) +{ + struct ofp_sock_sigval *ss; + uint8_t *data = NULL; + int data_len = 0; + int i; + + ss = (struct ofp_sock_sigval *)sv.sival_ptr; + data = odp_packet_data(ss->pkt); + data_len = odp_packet_len(ss->pkt); + + OFP_INFO("TCP data received: size %d, data: ", data_len); + + for (i = 0; i < data_len; i++) + OFP_LOG_NO_CTX(OFP_LOG_INFO, "%c", data[i]); + + OFP_LOG_NO_CTX(OFP_LOG_INFO, "\n"); + + OFP_INFO("SUCCESS.\n"); +} + +int connect_tcp_delayed_local_ip(int fd) +{ + struct ofp_sockaddr_in addr = {0}; + + sleep(1); /*Let the other side to init.*/ + + addr.sin_len = sizeof(struct ofp_sockaddr_in); + addr.sin_family = OFP_AF_INET; + addr.sin_port = odp_cpu_to_be_16(TEST_PORT + 1); + addr.sin_addr.s_addr = IP4(192, 168, 100, 1); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + sleep(1); /* ToFix: connect is not blocking*/ + + OFP_INFO("SUCCESS.\n"); + return 0; +} + +#ifdef INET6 +int connect_tcp6_delayed_local_ip(int fd) +{ + struct ofp_sockaddr_in6 addr = {0}; + + sleep(1); /*Let the other side to init.*/ + + addr.sin6_len = sizeof(struct ofp_sockaddr_in6); + addr.sin6_family = OFP_AF_INET6; + addr.sin6_port = odp_cpu_to_be_16(TEST_PORT + 1); + inet_pton(AF_INET6, "fd00:1baf::1", (void *)&addr.sin6_addr); + + if (ofp_connect(fd, (struct ofp_sockaddr *)&addr, + sizeof(addr)) == -1) { + OFP_ERR("Faild to connect (errno = %d)\n", ofp_errno); + return -1; + } + + sleep(1); /* ToFix: connect is not blocking*/ + + OFP_INFO("SUCCESS.\n"); + return 0; +} +#endif /* INET6 */ + +static void notify_tcp_accept(union ofp_sigval sv); +int socket_sigevent_tcp_accept(int fd) +{ + struct ofp_sigevent ev; + struct ofp_sock_sigval ss; + int fd_accept = -1; + + ss.sockfd = fd; + ss.event = OFP_EVENT_INVALID; + ss.pkt = ODP_PACKET_INVALID; + + ev.ofp_sigev_notify = OFP_SIGEV_HOOK; + ev.ofp_sigev_notify_function = notify_tcp_accept; + ev.ofp_sigev_value.sival_ptr = &ss; + if (ofp_socket_sigevent(&ev) == -1) { + OFP_ERR("Faild to set sigevent(errno = %d)\n", ofp_errno); + return -1; + } + OFP_INFO("Socket sigevent set.\n"); + + fd_accept = ofp_accept(fd, NULL, NULL); + if (fd_accept == -1) { + OFP_ERR("Faild to accept connection (errno = %d)\n", + ofp_errno); + return -1; + } + if (ofp_close(fd_accept) == -1) { + OFP_ERR("Faild to close connection (errno = %d)\n", + ofp_errno); + return -1; + } + + return 0; +} + +static void notify_tcp_accept(union ofp_sigval sv) +{ + struct ofp_sock_sigval *ss; + + ss = (struct ofp_sock_sigval *)sv.sival_ptr; + OFP_INFO("TCP Connection received on socket %d: %d created.\n", + ss->sockfd, + ss->sockfd2); + + OFP_INFO("SUCCESS.\n"); +} + diff --git a/example/socket/socket_sigevent.h b/example/socket/socket_sigevent.h new file mode 100644 index 00000000..fa7af540 --- /dev/null +++ b/example/socket/socket_sigevent.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_SIGEVENT_H__ +#define __SOCKET_SIGEVENT_H__ + +int recv_send_udp_local_ip(int fd); +int socket_sigevent_udp4(int fd); + +#ifdef INET6 +int recv_send_udp6_local_ip(int fd); +int socket_sigevent_udp6(int fd); +#endif /* INET6 */ + +int connect_recv_send_tcp_local_ip(int fd); +#ifdef INET6 +int connect_recv_send_tcp6_local_ip(int fd); +#endif /* INET6 */ +int socket_sigevent_tcp_rcv(int fd); + +int connect_tcp_delayed_local_ip(int fd); +#ifdef INET6 +int connect_tcp6_delayed_local_ip(int fd); +#endif /* INET6 */ +int socket_sigevent_tcp_accept(int fd); + +#endif /* __SOCKET_SIGEVENT_H__ */ + diff --git a/example/socket/socket_util.h b/example/socket/socket_util.h new file mode 100644 index 00000000..d9935f1b --- /dev/null +++ b/example/socket/socket_util.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __SOCKET_UTIL_H__ +#define __SOCKET_UTIL_H__ + +#include +#include +#include +#include +#include + +#define TEST_PORT 54321 + +#define IP4(a, b, c, d) (a|(b<<8)|(c<<16)|(d<<24)) + +#endif /*__SOCKET_UTIL_H__*/ + diff --git a/example/socket/suite_framework.c b/example/socket/suite_framework.c new file mode 100644 index 00000000..ee15dfdc --- /dev/null +++ b/example/socket/suite_framework.c @@ -0,0 +1,98 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofp.h" +#include "suite_framework.h" + +static void *suite_thread1(void *arg); +static void *suite_thread2(void *arg); + +int fd_thread1 = -1; +int fd_thread2 = -1; +int core_id = -1; + +int config_suite_framework(uint16_t linux_core_id) +{ + core_id = linux_core_id; + + return 0; +} + +int init_suite(init_function init_func) +{ + fd_thread1 = -1; + fd_thread2 = -1; + + if (init_func) + return init_func(&fd_thread1, &fd_thread2); + else + return 0; +} + +void run_suite(run_function run_func1, run_function run_func2) +{ + odph_linux_pthread_t sock_pthread1; + odph_linux_pthread_t sock_pthread2; + odp_cpumask_t sock_cpumask; + + odp_cpumask_zero(&sock_cpumask); + odp_cpumask_set(&sock_cpumask, core_id); + + odph_linux_pthread_create(&sock_pthread1, + &sock_cpumask, + suite_thread1, + run_func1); + + odph_linux_pthread_create(&sock_pthread2, + &sock_cpumask, + suite_thread2, + run_func2); + + odph_linux_pthread_join(&sock_pthread1, 1); + odph_linux_pthread_join(&sock_pthread2, 1); +} + +void end_suite(void) +{ + if (fd_thread1 != -1) { + if (ofp_close(fd_thread1) == -1) + OFP_ERR("Faild to close socket 1 (errno = %d)\n", + ofp_errno); + fd_thread1 = -1; + } + + if (fd_thread2 != -1) { + if (ofp_close(fd_thread2) == -1) + OFP_ERR("Faild to close socket 1 (errno = %d)\n", + ofp_errno); + fd_thread2 = -1; + } +} + +static void *suite_thread1(void *arg) +{ + run_function run_func = (run_function)arg; + + odp_init_local(); + ofp_init_local(); + + (void)run_func(fd_thread1); + + return NULL; +} + +static void *suite_thread2(void *arg) +{ + run_function run_func = (run_function)arg; + + odp_init_local(); + ofp_init_local(); + + (void)run_func(fd_thread2); + + return NULL; +} diff --git a/example/socket/suite_framework.h b/example/socket/suite_framework.h new file mode 100644 index 00000000..8277dae0 --- /dev/null +++ b/example/socket/suite_framework.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef __SUITE_FRAMEWORK_H__ +#define __SUITE_FRAMEWORK_H__ + +typedef int (*init_function)(int *pfd_thread1, int *pfd_thread2); +typedef int (*run_function)(int fd); + +int config_suite_framework(uint16_t linux_core_id); + +int init_suite(init_function init_func); +void run_suite(run_function run_func1, run_function run_func2); +void end_suite(void); + +#endif /* __SUITE_FRAMEWORK_H__ */ + diff --git a/example/sysctl/Makefile.am b/example/sysctl/Makefile.am new file mode 100644 index 00000000..069de633 --- /dev/null +++ b/example/sysctl/Makefile.am @@ -0,0 +1,11 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = sysctl + +AM_CFLAGS += -I$(top_srcdir)/include +AM_CFLAGS += -I$(top_srcdir)/include/api + +sysctl_LDFLAGS = $(AM_LDFLAGS) -static +sysctl_CFLAGS = $(AM_CFLAGS) + +dist_sysctl_SOURCES = app_main.c sysctl.c diff --git a/example/sysctl/app_main.c b/example/sysctl/app_main.c new file mode 100644 index 00000000..b5632df5 --- /dev/null +++ b/example/sysctl/app_main.c @@ -0,0 +1,299 @@ + /* + * Copyright (c) 2015, Nokia Solutions and Networks + * Copyright (c) 2015, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofp.h" + +#include "sysctl.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** local hook + * + * @param pkt odp_packet_t + * @param protocol int + * @return int + * + */ +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + (void) protocol; + return OFP_PKT_CONTINUE; +} + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + /* + * By default core #0 runs Linux kernel background tasks. + * Start mapping thread from core #1 + */ + memset(&app_init_params, 0, sizeof(app_init_params)); + + app_init_params.linux_core_id = 0; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + app_init_params.pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + ofp_init_global(&app_init_params); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + + odph_linux_pthread_create(thread_tbl, + &cpumask, + default_event_dispatcher, + ofp_eth_vlan_processing); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + /* sysctl test thread */ + ofp_start_sysctl_thread(app_init_params.linux_core_id); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/sysctl/sysctl.c b/example/sysctl/sysctl.c new file mode 100644 index 00000000..667a6252 --- /dev/null +++ b/example/sysctl/sysctl.c @@ -0,0 +1,282 @@ + /* + * Copyright (c) 2015, Nokia Solutions and Networks + * Copyright (c) 2015, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include + +#include "ofp.h" + +#include "sysctl.h" + +/* + * Management Information Base (MIB) is a hierarchical database that + * describes an application. Object Identifiers (OID) are used to + * read and write the variables. In IETF notation OIDs are composed of digits + * and dots. + * + * For example in this library 'net' branch has OID number 4, + * 'inet' has number 4.2, 'udp' 4.2.17, and udp's checksum variable 4.2.17.1. + * The checksum variable can be expressed as a sequence of integers: + * 4.2.17.1 + * or as a string: + * net.inet.udp.checksum + * + * When you create a new OID it is possible to allocate new static numbers, but + * you have to be careful not to use reserved numbers. Recommended way is to use + * automatic allocation and access the variables using the string notation. + * + * Example of a hierarcy tree: + * 4 net RW Node + * 2 inet RW Node + * 0 ip RW Node + * 1 icmp RW Node + * 2 igmp RW Node + * 6 tcp RW Node + * 17 udp RW Node + * 1 checksum RW Int + * 256 blackhole RW Int + * 257 log_in_vain RW Int + * + * OIDs are created at compile time. Create a new branch at root level using + * static OID number 73: + */ + +OFP_SYSCTL_NODE(, 73, mybranch, OFP_CTLFLAG_RW, 0, "My test branch"); + +/* + * Create two child branches 'telnet' and 'ssh'. Use automatic OID allocation. + * Note the underscore before mybranch: + */ + +OFP_SYSCTL_NODE(_mybranch, OFP_OID_AUTO, telnet, OFP_CTLFLAG_RW, 0, "Telnet control"); +OFP_SYSCTL_NODE(_mybranch, OFP_OID_AUTO, ssh, OFP_CTLFLAG_RW, 0, "Ssh control"); + +/* + * If you want to use the branches in other files use the following + * declarations in relevant include file: + * + * SYSCTL_DECL(_mybranch_telnet); + * SYSCTL_DECL(_mybranch_ssh); + */ + +/* + * For an integer rw variable useful syntax is: + * OFP_SYSCTL_INT(_parent, OFP_OID_AUTO, name, OFP_CTLFLAG_RW, + * &variable, value, "Description"); + * There are many other types than integer available, too. + * + * Create four variables, two for enabling the protocols and two for statistics. + * Statistical variables are 64 bit read only. + */ + +static int enable_telnet = 1; +static int enable_ssh = 0; +static uint64_t telnet_bytes; +static uint64_t ssh_bytes; + +OFP_SYSCTL_INT(_mybranch_telnet, OFP_OID_AUTO, enabled, OFP_CTLFLAG_RW, + &enable_telnet, 0, "Enable telnet protocol"); +OFP_SYSCTL_INT(_mybranch_ssh, OFP_OID_AUTO, enabled, OFP_CTLFLAG_RW, + &enable_ssh, 0, "Enable ssh protocol"); +OFP_SYSCTL_QUAD(_mybranch_telnet, OFP_OID_AUTO, counter, OFP_CTLFLAG_RD, + &telnet_bytes, 0, "Telnet counter"); +OFP_SYSCTL_QUAD(_mybranch_ssh, OFP_OID_AUTO, counter, OFP_CTLFLAG_RD, + &ssh_bytes, 0, "Ssh counter"); + +/* + * Hello message for clients. If 6th value (length) is zero + * the string cannot be changed. + */ +static char hello_msg[32]; + +OFP_SYSCTL_STRING(_mybranch, OFP_OID_AUTO, hello, OFP_CTLFLAG_RW, + hello_msg, sizeof(hello_msg), "Hello message"); + +/* + * End of compile time definitions. Our branch looks like this: + * + * 73 mybranch RW Node + * 256 hello RW String + * 261 ssh RW Node + * 257 counter R int64_t + * 259 enabled RW int + * 262 telnet RW Node + * 258 counter R int64_t + * 260 enabled RW int + * + * OID values > 255 are dynamically allocated. + */ + +static void * +sysctl(void *arg) +{ + (void)arg; + + odp_init_local(); + ofp_init_local(); + sleep(2); + + /* + * Variables may be visible per thread. Addresses of the shared + * variables are not known at compile time. Also sometimes it may be + * necessary to create OIDs dynamically. + * + * Add an OID dynamically to the existing compile time + * created branch: + */ + static int created; + + OFP_SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_mybranch), OFP_OID_AUTO, + "created", OFP_CTLFLAG_RW, &created, 0, + "Dynamically created"); + + /* + * Create a branch dynamically: + */ + struct ofp_sysctl_oid *dyn_root; + + dyn_root = OFP_SYSCTL_ADD_NODE + (NULL, + SYSCTL_STATIC_CHILDREN(_mybranch), OFP_OID_AUTO, "subbranch", + OFP_CTLFLAG_RW, 0, "Dynamically created branch"); + + /* + * Add a variable to that, for example one from the shared memory. + * Here we use a static integer. + */ + static int shared; + + OFP_SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(dyn_root), OFP_OID_AUTO, + "shared", OFP_CTLFLAG_RW, &shared, 0, + "Shared memory variable"); + /* + * Our branch is complete: + * + * 73 mybranch RW Node (My test branch) + * 256 hello RW string (Hello message) + * 261 ssh RW Node (Ssh control) + * 257 counter R int64_t (Ssh counter) + * 259 enabled RW int (Enable ssh protocol) + * 262 telnet RW Node (Telnet control) + * 258 counter R int64_t (Telnet counter) + * 260 enabled RW int (Enable telnet protocol) + * 328 created RW int (Dynamically created) + * 329 subbranch RW Node (Dynamically created branch) + * 330 shared RW int (Shared memory variable) + */ + + /* + * Use created variables. First set some meaningful values: + */ + telnet_bytes = 123456; + ssh_bytes = 567890; + strcpy(hello_msg, "Hello, world!"); + + /* + * There are several functions to access MIB data. Simplest one + * is the following: + * + * ofp_sysctl(const char *name, void *old, size_t *oldlenp, + * const void *new, size_t newlen, size_t *retval) + * + * name: OID using string notation (like "net.inet.udp.checksum"). + * old: Pointer to memory where old value will be saved. + * Can be NULL. + * oldlenp: Pointer to variable whose value is the result space + * in bytes. Will be updated to the real space. + * new: Pointer to the new value. Can be NULL. + * newlen: Size of the new value in bytes or zero. + * retval: Pointer to a variable that will be set to + * response's length. + */ + + /* + * Read the telnet bytes: + */ + uint64_t counter; + size_t counterlen = sizeof(counter); + size_t retval; + ofp_sysctl("mybranch.telnet.counter", &counter, &counterlen, + NULL, 0, &retval); + OFP_LOG("mybranch.telnet.counter=%ld len=%ld retval=%ld\n", + counter, counterlen, retval); + /* + * Read the ssh bytes: + */ + ofp_sysctl("mybranch.ssh.counter", &counter, &counterlen, + NULL, 0, &retval); + OFP_LOG("mybranch.ssh.counter=%ld len=%ld retval=%ld\n", + counter, counterlen, retval); + + /* + * Check if telnet is enabled: + */ + int enabled; + size_t enalen = sizeof(enabled); + ofp_sysctl("mybranch.telnet.enabled", &enabled, &enalen, + NULL, 0, &retval); + OFP_LOG("mybranch.telnet.enabled=%d\n", enabled); + /* + * Disable telnet: + */ + enabled = 0; + ofp_sysctl("mybranch.telnet.enabled", NULL, 0, + &enabled, sizeof(enabled), &retval); + /* + * Check if that worked. Init variable with something to ensure it is + * really changed: + */ + enabled = 123; + enalen = sizeof(enabled); + ofp_sysctl("mybranch.telnet.enabled", &enabled, &enalen, + NULL, 0, &retval); + OFP_LOG("After disabling: mybranch.telnet.enabled=%d, real value=%d\n", + enabled, enable_telnet); + + /* + * Read and change the hello message: + */ + char msg[32]; + size_t msglen = sizeof(msg); + ofp_sysctl("mybranch.hello", msg, &msglen, + "Server is down.", 16, &retval); + OFP_LOG("mybranch.hello: old value=%s, new value=%s\n", + msg, hello_msg); + + /* + * Make telnet connection to local address port 2345. + * Try commands: + * sysctl dump + * sysctl r mybranch.ssh.counter + * sysctl w mybranch.ssh.enabled 1 + * sysctl w mybranch.ssh.counter 777 + */ + + while (1) + sleep(1); + + return NULL; +} + +void ofp_start_sysctl_thread(int core_id) +{ + odph_linux_pthread_t test_linux_pthread; + odp_cpumask_t cpumask; + + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, core_id); + + odph_linux_pthread_create(&test_linux_pthread, + &cpumask, + sysctl, + NULL); +} diff --git a/example/sysctl/sysctl.h b/example/sysctl/sysctl.h new file mode 100644 index 00000000..b5612f71 --- /dev/null +++ b/example/sysctl/sysctl.h @@ -0,0 +1,14 @@ + /* + * Copyright (c) 2015, Nokia Solutions and Networks + * Copyright (c) 2015, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _SYSCTL_H_ +#define _SYSCTL_H_ + +void ofp_start_sysctl_thread(int core_id); + +#endif diff --git a/example/udpecho/Makefile.am b/example/udpecho/Makefile.am new file mode 100644 index 00000000..87893a65 --- /dev/null +++ b/example/udpecho/Makefile.am @@ -0,0 +1,11 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = udpecho + +AM_CFLAGS += -I$(top_srcdir)/include +AM_CFLAGS += -I$(top_srcdir)/include/api + +udpecho_LDFLAGS = $(AM_LDFLAGS) -static +udpecho_CFLAGS = $(AM_CFLAGS) + +dist_udpecho_SOURCES = app_main.c udp_server.c diff --git a/example/udpecho/app_main.c b/example/udpecho/app_main.c new file mode 100644 index 00000000..67428206 --- /dev/null +++ b/example/udpecho/app_main.c @@ -0,0 +1,307 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofp.h" + +#include "udp_server.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** local hook + * + * @param pkt odp_packet_t + * @param protocol int + * @return int + * + */ +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + (void) protocol; + return OFP_PKT_CONTINUE; +} + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +#include +#include + +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + struct rlimit rlp; + getrlimit(RLIMIT_CORE, &rlp); + printf("RLIMIT_CORE: %ld/%ld\n", rlp.rlim_cur, rlp.rlim_max); + rlp.rlim_cur = 200000000; + printf("Setting to max: %d\n", setrlimit(RLIMIT_CORE, &rlp)); + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + /* + * By default core #0 runs Linux kernel background tasks. + * Start mapping thread from core #1 + */ + memset(&app_init_params, 0, sizeof(app_init_params)); + + app_init_params.linux_core_id = 0; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + app_init_params.pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + ofp_init_global(&app_init_params); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + + odph_linux_pthread_create(thread_tbl, + &cpumask, + default_event_dispatcher, + ofp_eth_vlan_processing); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + /* udp echo server */ + ofp_start_udpserver_thread(app_init_params.linux_core_id); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/udpecho/ofp.conf b/example/udpecho/ofp.conf new file mode 100644 index 00000000..6f1c9100 --- /dev/null +++ b/example/udpecho/ofp.conf @@ -0,0 +1,3 @@ +debug 0 +loglevel set debug +ifconfig fp0 192.168.56.33/24 diff --git a/example/udpecho/udp_server.c b/example/udpecho/udp_server.c new file mode 100644 index 00000000..7cb49c6c --- /dev/null +++ b/example/udpecho/udp_server.c @@ -0,0 +1,151 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include + +#include "ofp.h" + +#include "udp_server.h" + +#define INVALID_SOCKET -1 +#define SOCKET_ERROR -1 + +//#define logprint(a...) do {} while (0) +#define logprint OFP_LOG + +static void notify(union ofp_sigval sv) +{ + struct ofp_sock_sigval *ss = sv.sival_ptr; + int s = ss->sockfd; + int event = ss->event; + odp_packet_t pkt = ss->pkt; + int n; + struct ofp_sockaddr_in addr; + ofp_socklen_t addr_len = sizeof(addr); + + /* + * Only receive events are accepted. + */ + if (event != OFP_EVENT_RECV) + return; + + /* + * L2, L3, and L4 pointers are as they were when the packet was + * received. L2 and L3 areas may have ancillary data written + * over original headers. Only L4 pointer and data after that is valid. + * Note that short packets may have padding. Thus odp_packet_length() + * may give wrong results. Sender information is over L2 area. + * It is best to use function ofp_udp_packet_parse() to + * retrieve the information. It also sets the packet's data pointer + * to payload and removes padding from the end. + */ + uint8_t *p = ofp_udp_packet_parse(pkt, &n, + (struct ofp_sockaddr *)&addr, + &addr_len); + /* Pointer and length are not used here. */ + (void)p; + (void)n; + + /* + * There are two alternatives to send a respond. + */ +#if 1 + /* + * Reuse received packet. + * Here we want to send the same payload back prepended with "ECHO:". + */ + odp_packet_push_head(pkt, 5); + memcpy(odp_packet_data(pkt), "ECHO:", 5); + ofp_udp_pkt_sendto(s, pkt, (struct ofp_sockaddr *)&addr, sizeof(addr)); +#else + /* + * Send using usual sendto(). Remember to free the packet. + */ + ofp_sendto(s, p, r, 0, + (struct ofp_sockaddr *)&addr, sizeof(addr)); + odp_packet_free(pkt); +#endif + /* + * Mark ss->pkt invalid to indicate it was released or reused by us. + */ + ss->pkt = ODP_PACKET_INVALID; +} + +static void *udpecho(void *arg) +{ + int serv_fd; + struct ofp_sockaddr_in my_addr; + uint32_t my_ip_addr; + ofp_fd_set read_fd; + (void)arg; + + logprint("UDP server thread started\n"); + + odp_init_local(); + ofp_init_local(); + sleep(1); + + my_ip_addr = ofp_port_get_ipv4_addr(0, 0, OFP_PORTCONF_IP_TYPE_IP_ADDR); + + if ((serv_fd = ofp_socket(OFP_AF_INET, OFP_SOCK_DGRAM, OFP_IPPROTO_UDP)) < 0) { + logprint("Cannot open UDP socket (%s)!\n", + ofp_strerror(ofp_errno)); + return NULL; + } + + memset(&my_addr, 0, sizeof(my_addr)); + my_addr.sin_family = OFP_AF_INET; + my_addr.sin_port = odp_cpu_to_be_16(2048); + my_addr.sin_addr.s_addr = my_ip_addr; + my_addr.sin_len = sizeof(my_addr); + + if (ofp_bind(serv_fd, (struct ofp_sockaddr *)&my_addr, + sizeof(struct ofp_sockaddr)) < 0) { + logprint("Cannot bind http socket (%s)!\n", + ofp_strerror(ofp_errno)); + return NULL; + } + + struct ofp_sigevent ev; + struct ofp_sock_sigval ss; + ss.sockfd = serv_fd; + ss.event = 0; + ss.pkt = ODP_PACKET_INVALID; + ev.ofp_sigev_notify = 1; + ev.ofp_sigev_notify_function = notify; + ev.ofp_sigev_value.sival_ptr = &ss; + ofp_socket_sigevent(&ev); + + OFP_FD_ZERO(&read_fd); + OFP_FD_SET(serv_fd, &read_fd); + + while (1) { + sleep(1); + } + + logprint("UDP server exit\n"); + return NULL; +} + +void ofp_start_udpserver_thread(int core_id) +{ + odph_linux_pthread_t test_linux_pthread; + odp_cpumask_t cpumask; + + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, core_id); + + odph_linux_pthread_create(&test_linux_pthread, + &cpumask, + udpecho, + NULL); +} diff --git a/example/udpecho/udp_server.h b/example/udpecho/udp_server.h new file mode 100644 index 00000000..15ac80b1 --- /dev/null +++ b/example/udpecho/udp_server.h @@ -0,0 +1,13 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _UDP_SERVER_H_ +#define _UDP_SERVER_H_ + +void ofp_start_udpserver_thread(int core_id); + +#endif diff --git a/example/webserver/Makefile.am b/example/webserver/Makefile.am new file mode 100644 index 00000000..cab2a9de --- /dev/null +++ b/example/webserver/Makefile.am @@ -0,0 +1,11 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = webserver + +AM_CFLAGS += -I$(top_srcdir)/include +AM_CFLAGS += -I$(top_srcdir)/include/api + +webserver_LDFLAGS = $(AM_LDFLAGS) -static +webserver_CFLAGS = $(AM_CFLAGS) + +dist_webserver_SOURCES = app_main.c httpd.c diff --git a/example/webserver/app_main.c b/example/webserver/app_main.c new file mode 100644 index 00000000..85f21206 --- /dev/null +++ b/example/webserver/app_main.c @@ -0,0 +1,306 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofp.h" +#include "httpd.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** local hook + * + * @param pkt odp_packet_t + * @param protocol int + * @return int + * + */ +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + (void) protocol; + return OFP_PKT_CONTINUE; +} + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +#include +#include + +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + struct rlimit rlp; + getrlimit(RLIMIT_CORE, &rlp); + printf("RLIMIT_CORE: %ld/%ld\n", rlp.rlim_cur, rlp.rlim_max); + rlp.rlim_cur = 200000000; + printf("Setting to max: %d\n", setrlimit(RLIMIT_CORE, &rlp)); + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + /* + * By default core #0 runs Linux kernel background tasks. + * Start mapping thread from core #1 + */ + memset(&app_init_params, 0, sizeof(app_init_params)); + + app_init_params.linux_core_id = 0; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + app_init_params.pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + ofp_init_global(&app_init_params); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + + odph_linux_pthread_create(thread_tbl, + &cpumask, + default_event_dispatcher, + ofp_eth_vlan_processing); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + /* webserver */ + ofp_start_webserver_thread(app_init_params.linux_core_id); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/webserver/httpd.c b/example/webserver/httpd.c new file mode 100644 index 00000000..fddb64e9 --- /dev/null +++ b/example/webserver/httpd.c @@ -0,0 +1,289 @@ +#include +#include +#include +#include +#include +#include + +#include "ofp.h" + +#include "httpd.h" + +void httpd_main(uint32_t addr); + +#define logprint(a...) do {} while (0) +//#define logprint OFP_LOG + +int sigreceived = 0; +static uint32_t myaddr; + +/* Set www_dir to point to your web directory. */ +static const char *www_dir = "/home/hjokinen/Dropbox/kolumbus-web"; + +/* Table of concurrent connections */ +#define NUM_CONNECTIONS 16 +static struct { + int fd; + uint32_t addr; + int closed; + FILE *post; +} connections[NUM_CONNECTIONS]; + +/* Sending function with some debugging. */ +static int mysend(int s, char *p, int len) +{ + int n; + + while (len > 0) { + n = ofp_send(s, p, len, 0); + if (n < 0) { + OFP_LOG("mysend: cannot send (%d): %s\n", + n, ofp_strerror(ofp_errno)); + return n; + } + len -= n; + p += n; + if (len) { + logprint("mysend: only %d bytes sent\n", n); + } + } + return len; +} + +static int sendf(int fd, const char *fmt, ...) +{ + char buf[1024]; + int ret; + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + ret = mysend(fd, buf, n); + return ret; +} + +/* Send one file. */ +static void get_file(int s, char *url) +{ + char bufo[512]; + int n, w; + + const char *mime = NULL; + const char *p = url + 1; + + if (*p == 0) + p = "index.html"; + + char *p2 = strrchr(p, '.'); + if (p2) { + p2++; + if (!strcmp(p2, "html")) mime = "text/html"; + else if (!strcmp(p2, "htm")) mime = "text/html"; + else if (!strcmp(p2, "css")) mime = "text/css"; + else if (!strcmp(p2, "txt")) mime = "text/plain"; + else if (!strcmp(p2, "png")) mime = "image/png"; + else if (!strcmp(p2, "jpg")) mime = "image/jpg"; + else if (!strcmp(p2, "class")) mime = "application/x-java-applet"; + else if (!strcmp(p2, "jar")) mime = "application/java-archive"; + else if (!strcmp(p2, "pdf")) mime = "application/pdf"; + else if (!strcmp(p2, "swf")) mime = "application/x-shockwave-flash"; + else if (!strcmp(p2, "ico")) mime = "image/vnd.microsoft.icon"; + else if (!strcmp(p2, "js")) mime = "text/javascript"; + } + + snprintf(bufo, sizeof(bufo), "%s/%s", www_dir, p); + FILE *f = fopen(bufo, "rb"); + + if (!f) { + sendf(s, "HTTP/1.0 404 NOK\r\n\r\n"); + return; + } + + sendf(s, "HTTP/1.0 200 OK\r\n"); + if (mime) + sendf(s, "Content-Type: %s\r\n\r\n", mime); + else + sendf(s, "\r\n"); + + while ((n = fread(bufo, 1, sizeof(bufo), f)) > 0) + if ((w = mysend(s, bufo, n)) < 0) + break; + fclose(f); +} + +static int analyze_http(char *http, int s) { + char *url; + + if (!strncmp(http, "GET ", 4)) { + url = http + 4; + while (*url == ' ') + url++; + char *p = strchr(url, ' '); + if (p) + *p = 0; + else + return -1; + logprint("GET %s (fd=%d)\n", url, s); + get_file(s, url); + } else if (!strncmp(http, "POST ", 5)) { + /* Post is not supported. */ + logprint("%s\n", http); + } + + return 0; +} + +static void *webserver(void *arg) +{ + int serv_fd, tmp_fd; + unsigned int alen; + struct ofp_sockaddr_in my_addr, caller; + ofp_fd_set read_fd; + (void)arg; + + logprint("HTTP thread started\n"); + + odp_init_local(); + ofp_init_local(); + sleep(1); + + myaddr = ofp_port_get_ipv4_addr(0, 0, OFP_PORTCONF_IP_TYPE_IP_ADDR); + + if ((serv_fd = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, OFP_IPPROTO_TCP)) < 0) { + perror("serv socket"); + logprint("Cannot open http socket!\n"); + return NULL; + } + + memset(&my_addr, 0, sizeof(my_addr)); + my_addr.sin_family = OFP_AF_INET; + my_addr.sin_port = odp_cpu_to_be_16(2048); + my_addr.sin_addr.s_addr = myaddr; + my_addr.sin_len = sizeof(my_addr); + + if (ofp_bind(serv_fd, (struct ofp_sockaddr *)&my_addr, + sizeof(struct ofp_sockaddr)) < 0) { + logprint("Cannot bind http socket (%s)!\n", ofp_strerror(ofp_errno)); + return 0; + } + + ofp_listen(serv_fd, 10); + OFP_FD_ZERO(&read_fd); + OFP_FD_SET(serv_fd, &read_fd); + + for ( ; ; ) + { + int r, i; + static char buf[1024]; + struct ofp_timeval timeout; + + timeout.tv_sec = 0; + timeout.tv_usec = 200000; + + r = ofp_select(32, &read_fd, NULL, NULL, &timeout); + if (r <= 0) + continue; + + if (OFP_FD_ISSET(serv_fd, &read_fd)) { + alen = sizeof(caller); + if ((tmp_fd = ofp_accept(serv_fd, + (struct ofp_sockaddr *)&caller, + &alen)) > 0) { + logprint("accept %d\n", tmp_fd); + + for (i = 0; i < NUM_CONNECTIONS; i++) + if (connections[i].fd == 0) + break; + + if (i >= NUM_CONNECTIONS) { + logprint("Node cannot accept new connections!\n"); + ofp_close(tmp_fd); + continue; + } + +#if 0 + struct ofp_linger so_linger; + so_linger.l_onoff = 1; + so_linger.l_linger = 0; + int r1 = ofp_setsockopt(tmp_fd, + OFP_SOL_SOCKET, + OFP_SO_LINGER, + &so_linger, + sizeof so_linger); + if (r1) OFP_LOG("SO_LINGER failed!\n"); +#endif + struct ofp_timeval tv; + tv.tv_sec = 3; + tv.tv_usec = 0; + int r2 = ofp_setsockopt(tmp_fd, + OFP_SOL_SOCKET, + OFP_SO_SNDTIMEO, + &tv, + sizeof tv); + if (r2) OFP_LOG("SO_SNDTIMEO failed!\n"); + + connections[i].fd = tmp_fd; + connections[i].addr = caller.sin_addr.s_addr; + connections[i].closed = FALSE; + + OFP_FD_SET(tmp_fd, &read_fd); + } + } + + for (i = 0; i < NUM_CONNECTIONS; i++) { + if (connections[i].fd == 0) + continue; + + if (!(OFP_FD_ISSET(connections[i].fd, &read_fd))) + continue; + + r = ofp_recv(connections[i].fd, buf, sizeof(buf)-1, 0); + if (r > 0) { + buf[r] = 0; + logprint("DATA='%s'\n", buf); + + if (!strncmp(buf, "GET", 3)) + analyze_http(buf, connections[i].fd); + else + logprint("http req error\n"); + + logprint("closing %d\n", connections[i].fd); + OFP_FD_CLR(connections[i].fd, &read_fd); + while (ofp_close(connections[i].fd) < 0) { + OFP_LOG("Socket %d close err: %s\n", + connections[i].fd, + ofp_strerror(ofp_errno)); + sleep(1); + } + logprint("closed %d\n", connections[i].fd); + connections[i].fd = 0; + } else if (r == 0) { + if (connections[i].post) { + printf("file download finished\n"); + fclose(connections[i].post); + connections[i].post = NULL; + } + ofp_close(connections[i].fd); + OFP_FD_CLR(connections[i].fd, &read_fd); + connections[i].fd = 0; + } + } + } + + logprint("httpd exit\n"); + return NULL; +} + +void ofp_start_webserver_thread(int core_id) +{ + odph_linux_pthread_t test_linux_pthread; + odp_cpumask_t cpumask; + + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, core_id); + + odph_linux_pthread_create(&test_linux_pthread, + &cpumask, + webserver, + NULL); +} diff --git a/example/webserver/httpd.h b/example/webserver/httpd.h new file mode 100644 index 00000000..c2f312e8 --- /dev/null +++ b/example/webserver/httpd.h @@ -0,0 +1,6 @@ +#ifndef _HTTPD_H_ +#define _HTTPD_H_ + +void ofp_start_webserver_thread(int core_id); + +#endif diff --git a/example/webserver/ofp.conf b/example/webserver/ofp.conf new file mode 100644 index 00000000..6f1c9100 --- /dev/null +++ b/example/webserver/ofp.conf @@ -0,0 +1,3 @@ +debug 0 +loglevel set debug +ifconfig fp0 192.168.56.33/24 diff --git a/example/webserver2/Makefile.am b/example/webserver2/Makefile.am new file mode 100644 index 00000000..1ee01d39 --- /dev/null +++ b/example/webserver2/Makefile.am @@ -0,0 +1,11 @@ +include $(top_srcdir)/example/Makefile.inc + +bin_PROGRAMS = webserver2 + +AM_CFLAGS += -I$(top_srcdir)/include +AM_CFLAGS += -I$(top_srcdir)/include/api + +webserver2_LDFLAGS = $(AM_LDFLAGS) -static +webserver2_CFLAGS = $(AM_CFLAGS) + +dist_webserver2_SOURCES = app_main.c httpd2.c diff --git a/example/webserver2/app_main.c b/example/webserver2/app_main.c new file mode 100644 index 00000000..6a64b078 --- /dev/null +++ b/example/webserver2/app_main.c @@ -0,0 +1,307 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofp.h" +#include "httpd.h" + +#define MAX_WORKERS 32 + +/** + * Parsed command line application arguments + */ +typedef struct { + int core_count; + int if_count; /**< Number of interfaces to be used */ + char **if_names; /**< Array of pointers to interface names */ + char *conf_file; +} appl_args_t; + +/* helper funcs */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args); +static void print_info(char *progname, appl_args_t *appl_args); +static void usage(char *progname); + +ofp_init_global_t app_init_params; /**< global OFP init parms */ + +/** Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(file_name) (strrchr((file_name), '/') ? \ + strrchr((file_name), '/') + 1 : (file_name)) + + +/** local hook + * + * @param pkt odp_packet_t + * @param protocol int + * @return int + * + */ +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + (void) protocol; + return OFP_PKT_CONTINUE; +} + + +/** main() Application entry point + * + * @param argc int + * @param argv[] char* + * @return int + * + */ +#include +#include + +int main(int argc, char *argv[]) +{ + odph_linux_pthread_t thread_tbl[MAX_WORKERS]; + appl_args_t params; + int core_count, num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[64]; + + struct rlimit rlp; + getrlimit(RLIMIT_CORE, &rlp); + printf("RLIMIT_CORE: %ld/%ld\n", rlp.rlim_cur, rlp.rlim_max); + rlp.rlim_cur = 200000000; + printf("Setting to max: %d\n", setrlimit(RLIMIT_CORE, &rlp)); + + /* Parse and store the application arguments */ + parse_args(argc, argv, ¶ms); + + /* Print both system and application information */ + print_info(NO_PATH(argv[0]), ¶ms); + + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + odp_init_local(); + + core_count = odp_cpu_count(); + num_workers = core_count; + + if (params.core_count) + num_workers = params.core_count; + if (num_workers > MAX_WORKERS) + num_workers = MAX_WORKERS; + + /* + * By default core #0 runs Linux kernel background tasks. + * Start mapping thread from core #1 + */ + memset(&app_init_params, 0, sizeof(app_init_params)); + + app_init_params.linux_core_id = 0; + + if (core_count > 1) + num_workers--; + + num_workers = odph_linux_cpumask_default(&cpumask, num_workers); + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("Num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + app_init_params.if_count = params.if_count; + app_init_params.if_names = params.if_names; + app_init_params.pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + ofp_init_global(&app_init_params); + + memset(thread_tbl, 0, sizeof(thread_tbl)); + /* Start dataplane dispatcher worker threads */ + + odph_linux_pthread_create(thread_tbl, + &cpumask, + default_event_dispatcher, + ofp_eth_vlan_processing); + + /* other app code here.*/ + /* Start CLI */ + ofp_start_cli_thread(app_init_params.linux_core_id, params.conf_file); + + /* webserver */ + ofp_start_webserver_thread(app_init_params.linux_core_id); + + odph_linux_pthread_join(thread_tbl, num_workers); + printf("End Main()\n"); + + return 0; +} + +/** + * Parse and store the command line arguments + * + * @param argc argument count + * @param argv[] argument vector + * @param appl_args Store application arguments here + */ +static void parse_args(int argc, char *argv[], appl_args_t *appl_args) +{ + int opt; + int long_index; + char *names, *str, *token, *save; + size_t len; + int i; + static struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"interface", required_argument, NULL, 'i'}, /* return 'i' */ + {"help", no_argument, NULL, 'h'}, /* return 'h' */ + {"configuration file", required_argument, + NULL, 'f'},/* return 'f' */ + {NULL, 0, NULL, 0} + }; + + memset(appl_args, 0, sizeof(*appl_args)); + + while (1) { + opt = getopt_long(argc, argv, "+c:i:hf:", + longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + appl_args->core_count = atoi(optarg); + break; + /* parse packet-io interface names */ + case 'i': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + names = malloc(len); + if (names == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* count the number of tokens separated by ',' */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + } + appl_args->if_count = i; + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + /* allocate storage for the if names */ + appl_args->if_names = + calloc(appl_args->if_count, sizeof(char *)); + + /* store the if names (reset names string) */ + strcpy(names, optarg); + for (str = names, i = 0;; str = NULL, i++) { + token = strtok_r(str, ",", &save); + if (token == NULL) + break; + appl_args->if_names[i] = token; + } + break; + + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + + case 'f': + len = strlen(optarg); + if (len == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + len += 1; /* add room for '\0' */ + + appl_args->conf_file = malloc(len); + if (appl_args->conf_file == NULL) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + strcpy(appl_args->conf_file, optarg); + break; + + default: + break; + } + } + + if (appl_args->if_count == 0) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ +} + +/** + * Print system and application info + */ +static void print_info(char *progname, appl_args_t *appl_args) +{ + int i; + + printf("\n" + "ODP system info\n" + "---------------\n" + "ODP API version: %s\n" + "CPU model: %s\n" + "CPU freq (hz): %"PRIu64"\n" + "Cache line size: %i\n" + "Core count: %i\n" + "\n", + odp_version_api_str(), odp_sys_cpu_model_str(), + odp_sys_cpu_hz(), odp_sys_cache_line_size(), + odp_cpu_count()); + + printf("Running ODP appl: \"%s\"\n" + "-----------------\n" + "IF-count: %i\n" + "Using IFs: ", + progname, appl_args->if_count); + for (i = 0; i < appl_args->if_count; ++i) + printf(" %s", appl_args->if_names[i]); + printf("\n\n"); + fflush(NULL); +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i eth1,eth2,eth3\n" + "\n" + "ODPFastpath application.\n" + "\n" + "Mandatory OPTIONS:\n" + " -i, --interface Eth interfaces (comma-separated, no spaces)\n" + "\n" + "Optional OPTIONS\n" + " -c, --count Core count.\n" + " -h, --help Display help and exit.\n" + "\n", NO_PATH(progname), NO_PATH(progname) + ); +} diff --git a/example/webserver2/httpd.h b/example/webserver2/httpd.h new file mode 100644 index 00000000..c2f312e8 --- /dev/null +++ b/example/webserver2/httpd.h @@ -0,0 +1,6 @@ +#ifndef _HTTPD_H_ +#define _HTTPD_H_ + +void ofp_start_webserver_thread(int core_id); + +#endif diff --git a/example/webserver2/httpd2.c b/example/webserver2/httpd2.c new file mode 100644 index 00000000..3c2e5770 --- /dev/null +++ b/example/webserver2/httpd2.c @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include + +#include "ofp.h" + +#include "httpd.h" + +void httpd_main(uint32_t addr); + +#define logprint(a...) do {} while (0) +//#define logprint OFP_LOG + +int sigreceived = 0; +static uint32_t myaddr; + +/* Set www_dir to point to your web directory. */ +static const char *www_dir = "/home/hjokinen/Dropbox/kolumbus-web"; + +/* Sending function with some debugging. */ +static int mysend(int s, char *p, int len) +{ + int n; + + while (len > 0) { + n = ofp_send(s, p, len, 0); + if (n < 0) { + OFP_LOG("mysend: cannot send (%d): %s\n", + n, ofp_strerror(ofp_errno)); + return n; + } + len -= n; + p += n; + if (len) { + logprint("mysend: only %d bytes sent\n", n); + } + } + return len; +} + +static int sendf(int fd, const char *fmt, ...) +{ + char buf[1024]; + int ret; + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + ret = mysend(fd, buf, n); + return ret; +} + +/* Send one file. */ +static void get_file(int s, char *url) +{ + char bufo[512]; + int n, w; + + const char *mime = NULL; + const char *p = url + 1; + + if (*p == 0) + p = "index.html"; + + char *p2 = strrchr(p, '.'); + if (p2) { + p2++; + if (!strcmp(p2, "html")) mime = "text/html"; + else if (!strcmp(p2, "htm")) mime = "text/html"; + else if (!strcmp(p2, "css")) mime = "text/css"; + else if (!strcmp(p2, "txt")) mime = "text/plain"; + else if (!strcmp(p2, "png")) mime = "image/png"; + else if (!strcmp(p2, "jpg")) mime = "image/jpg"; + else if (!strcmp(p2, "class")) mime = "application/x-java-applet"; + else if (!strcmp(p2, "jar")) mime = "application/java-archive"; + else if (!strcmp(p2, "pdf")) mime = "application/pdf"; + else if (!strcmp(p2, "swf")) mime = "application/x-shockwave-flash"; + else if (!strcmp(p2, "ico")) mime = "image/vnd.microsoft.icon"; + else if (!strcmp(p2, "js")) mime = "text/javascript"; + } + + snprintf(bufo, sizeof(bufo), "%s/%s", www_dir, p); + FILE *f = fopen(bufo, "rb"); + + if (!f) { + sendf(s, "HTTP/1.0 404 NOK\r\n\r\n"); + return; + } + + sendf(s, "HTTP/1.0 200 OK\r\n"); + if (mime) + sendf(s, "Content-Type: %s\r\n\r\n", mime); + else + sendf(s, "\r\n"); + + while ((n = fread(bufo, 1, sizeof(bufo), f)) > 0) + if ((w = mysend(s, bufo, n)) < 0) + break; + fclose(f); +} + +static int analyze_http(char *http, int s) { + char *url; + + if (!strncmp(http, "GET ", 4)) { + url = http + 4; + while (*url == ' ') + url++; + char *p = strchr(url, ' '); + if (p) + *p = 0; + else + return -1; + logprint("GET %s (fd=%d)\n", url, s); + get_file(s, url); + } else if (!strncmp(http, "POST ", 5)) { + /* Post is not supported. */ + logprint("%s\n", http); + } + + return 0; +} + +static void notify(union ofp_sigval sv) +{ + struct ofp_sock_sigval *ss = sv.sival_ptr; + int s = ss->sockfd; + int event = ss->event; + odp_packet_t pkt = ss->pkt; + int r; + char *buf, *tail; + + if (event == OFP_EVENT_ACCEPT) { + struct ofp_sockaddr_in caller; + ofp_socklen_t alen = sizeof(caller); + /* + * ss->sockfd is the original listened socket. + * ss->sockfd2 is the new socket that is returned by accept. + * We don't need the returned socket, but accept + * must be called to set the data structures. + */ + int new = ofp_accept(ss->sockfd, + (struct ofp_sockaddr *)&caller, + &alen); + (void)new; + /* new == ss->sockfd2 */ + return; + } + + if (event != OFP_EVENT_RECV) + return; + + r = odp_packet_len(pkt); + + if (r > 0) { + buf = odp_packet_data(pkt); + /* Add 0 to the end */ + tail = odp_packet_push_tail(pkt, 1); + *tail = 0; + + analyze_http(buf, s); + + if (ofp_close(s) < 0) + OFP_ERR("Socket %d close err: %s\n", + s, + ofp_strerror(ofp_errno)); + } else if (r == 0) { + ofp_close(s); + } + + odp_packet_free(pkt); + /* + * Mark ss->pkt invalid to indicate it was released by us. + */ + ss->pkt = ODP_PACKET_INVALID; +} + +static void *webserver(void *arg) +{ + int serv_fd; + struct ofp_sockaddr_in my_addr; + ofp_fd_set read_fd; + (void)arg; + + logprint("HTTP thread started\n"); + + odp_init_local(); + ofp_init_local(); + sleep(1); + + myaddr = ofp_port_get_ipv4_addr(0, 0, OFP_PORTCONF_IP_TYPE_IP_ADDR); + + if ((serv_fd = ofp_socket(OFP_AF_INET, OFP_SOCK_STREAM, OFP_IPPROTO_TCP)) < 0) { + perror("serv socket"); + logprint("Cannot open http socket!\n"); + return NULL; + } + + memset(&my_addr, 0, sizeof(my_addr)); + my_addr.sin_family = OFP_AF_INET; + my_addr.sin_port = odp_cpu_to_be_16(2048); + my_addr.sin_addr.s_addr = myaddr; + my_addr.sin_len = sizeof(my_addr); + + if (ofp_bind(serv_fd, (struct ofp_sockaddr *)&my_addr, + sizeof(struct ofp_sockaddr)) < 0) { + logprint("Cannot bind http socket (%s)!\n", ofp_strerror(ofp_errno)); + return 0; + } + + ofp_listen(serv_fd, 10); + + struct ofp_sigevent ev; + struct ofp_sock_sigval ss; + ss.sockfd = serv_fd; + ss.event = 0; + ss.pkt = ODP_PACKET_INVALID; + ev.ofp_sigev_notify = 1; + ev.ofp_sigev_notify_function = notify; + ev.ofp_sigev_value.sival_ptr = &ss; + ofp_socket_sigevent(&ev); + + OFP_FD_ZERO(&read_fd); + OFP_FD_SET(serv_fd, &read_fd); + + while (1) { + sleep(1); + } + + logprint("httpd exit\n"); + return NULL; +} + +void ofp_start_webserver_thread(int core_id) +{ + odph_linux_pthread_t test_linux_pthread; + odp_cpumask_t cpumask; + + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, core_id); + + odph_linux_pthread_create(&test_linux_pthread, + &cpumask, + webserver, + NULL); +} diff --git a/example/webserver2/ofp.conf b/example/webserver2/ofp.conf new file mode 100644 index 00000000..6f1c9100 --- /dev/null +++ b/example/webserver2/ofp.conf @@ -0,0 +1,3 @@ +debug 0 +loglevel set debug +ifconfig fp0 192.168.56.33/24 diff --git a/include/api/ofp.h b/include/api/ofp.h new file mode 100644 index 00000000..45fe8940 --- /dev/null +++ b/include/api/ofp.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_H__ +#define __OFP_H__ + +#include +#include + +#include "ofp_types.h" +#include "ofp_init.h" +#include "ofp_pkt_processing.h" +#include "ofp_cli.h" +#include "ofp_log.h" +#include "ofp_timer.h" +#include "ofp_hook.h" +#include "ofp_route_arp.h" +#include "ofp_portconf.h" +#include "ofp_debug.h" +#include "ofp_stat.h" +#include "ofp_socket_types.h" +#include "ofp_socket.h" +#include "ofp_in.h" +#include "ofp_in6.h" +#include "ofp_errno.h" +#include "ofp_ioctl.h" +#include "ofp_utils.h" +#include "ofp_sysctl.h" +#include "ofp_ethernet.h" +#include "ofp_ip.h" +#include "ofp_ip6.h" +#include "ofp_icmp.h" +#include "ofp_icmp6.h" +#include "ofp_if_vlan.h" +#include "ofp_udp.h" +#include "ofp_ip_var.h" +#include "ofp_tcp.h" + +#endif /* __OFP_H__ */ + diff --git a/include/api/ofp_cli.h b/include/api/ofp_cli.h new file mode 100644 index 00000000..38e424ad --- /dev/null +++ b/include/api/ofp_cli.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_CLI_H__ +#define __OFP_CLI_H__ + +/** CLI Start thread + */ +void ofp_start_cli_thread(int core_id, char *conf_file); + +#endif /* __OFP_CLI_H__ */ diff --git a/include/api/ofp_debug.h b/include/api/ofp_debug.h new file mode 100644 index 00000000..94e1d3ca --- /dev/null +++ b/include/api/ofp_debug.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_DEBUG_H__ +#define __OFP_DEBUG_H__ + + +/* + * Debug configure interface + */ +#define OFP_DEBUG_PRINT_RECV_NIC 1 +#define OFP_DEBUG_PRINT_SEND_NIC 2 +#define OFP_DEBUG_PRINT_RECV_KNI 4 +#define OFP_DEBUG_PRINT_SEND_KNI 8 +#define OFP_DEBUG_PRINT_CONSOLE 16 +#define OFP_DEBUG_CAPTURE 64 + +void ofp_set_debug_flags(int flags); +int ofp_get_debug_flags(void); + +#define OFP_DEBUG_PCAP_PORT_MASK 0x3f +#define OFP_DEBUG_PCAP_CONF_ADD_INFO 0x80000000 + +void ofp_set_debug_capture_ports(int ports); +int ofp_get_debug_capture_ports(void); + +/* + * Debug PCAP interface + */ +void ofp_set_capture_file(const char *filename); +void ofp_get_capture_file(char *filename, int max_size); + +/* + * Debug PRINT interface + */ +void ofp_print_packet(const char *comment, odp_packet_t pkt); + +#endif /*__OFP_DEBUG_H__*/ diff --git a/include/api/ofp_errno.h b/include/api/ofp_errno.h new file mode 100644 index 00000000..966af5a0 --- /dev/null +++ b/include/api/ofp_errno.h @@ -0,0 +1,177 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef __OFP_ERRNO_H__ +#define __OFP_ERRNO_H__ + +#define OFP_EPERM 1 /* Operation not permitted */ +#define OFP_ENOENT 2 /* No such file or directory */ +#define OFP_ESRCH 3 /* No such process */ +#define OFP_EINTR 4 /* Interrupted system call */ +#define OFP_EIO 5 /* Input/output error */ +#define OFP_ENXIO 6 /* Device not configured */ +#define OFP_E2BIG 7 /* Argument list too long */ +#define OFP_ENOEXEC 8 /* Exec format error */ +#define OFP_EBADF 9 /* Bad file descriptor */ +#define OFP_ECHILD 10 /* No child processes */ +#define OFP_EDEADLK 11 /* Resource deadlock avoided */ + /* 11 was OFP_EAGAIN */ +#define OFP_ENOMEM 12 /* Cannot allocate memory */ +#define OFP_EACCES 13 /* Permission denied */ +#define OFP_EFAULT 14 /* Bad address */ + +#define OFP_ENOTBLK 15 /* Block device required */ + +#define OFP_EBUSY 16 /* Device busy */ +#define OFP_EEXIST 17 /* File exists */ +#define OFP_EXDEV 18 /* Cross-device link */ +#define OFP_ENODEV 19 /* Operation not supported by device */ +#define OFP_ENOTDIR 20 /* Not a directory */ +#define OFP_EISDIR 21 /* Is a directory */ +#define OFP_EINVAL 22 /* Invalid argument */ +#define OFP_ENFILE 23 /* Too many open files in system */ +#define OFP_EMFILE 24 /* Too many open files */ +#define OFP_ENOTTY 25 /* Inappropriate ioctl for device */ + +#define OFP_ETXTBSY 26 /* Text file busy */ + +#define OFP_EFBIG 27 /* File too large */ +#define OFP_ENOSPC 28 /* No space left on device */ +#define OFP_ESPIPE 29 /* Illegal seek */ +#define OFP_EROFS 30 /* Read-only filesystem */ +#define OFP_EMLINK 31 /* Too many links */ +#define OFP_EPIPE 32 /* Broken pipe */ + +/* math software */ +#define OFP_EDOM 33 /* Numerical argument out of domain */ +#define OFP_ERANGE 34 /* Result too large */ + +/* non-blocking and interrupt i/o */ +#define OFP_EAGAIN 35 /* Resource temporarily unavailable */ + +#define OFP_EWOULDBLOCK OFP_EAGAIN /* Operation would block */ +#define OFP_EINPROGRESS 36 /* Operation now in progress */ +#define OFP_EALREADY 37 /* Operation already in progress */ + +/* ipc/network software -- argument errors */ +#define OFP_ENOTSOCK 38 /* Socket operation on non-socket */ +#define OFP_EDESTADDRREQ 39 /* Destination address required */ +#define OFP_EMSGSIZE 40 /* Message too long */ +#define OFP_EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define OFP_ENOPROTOOPT 42 /* Protocol not available */ +#define OFP_EPROTONOSUPPORT 43 /* Protocol not supported */ +#define OFP_ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define OFP_EOPNOTSUPP 45 /* Operation not supported */ +#define OFP_ENOTSUP OFP_EOPNOTSUPP /* Operation not supported */ +#define OFP_EPFNOSUPPORT 46 /* Protocol family not supported */ +#define OFP_EAFNOSUPPORT 47 /* Address family not supported by protocol family */ +#define OFP_EADDRINUSE 48 /* Address already in use */ +#define OFP_EADDRNOTAVAIL 49 /* Can't assign requested address */ + +/* ipc/network software -- operational errors */ +#define OFP_ENETDOWN 50 /* Network is down */ +#define OFP_ENETUNREACH 51 /* Network is unreachable */ +#define OFP_ENETRESET 52 /* Network dropped connection on reset */ +#define OFP_ECONNABORTED 53 /* Software caused connection abort */ +#define OFP_ECONNRESET 54 /* Connection reset by peer */ +#define OFP_ENOBUFS 55 /* No buffer space available */ +#define OFP_EISCONN 56 /* Socket is already connected */ +#define OFP_ENOTCONN 57 /* Socket is not connected */ +#define OFP_ESHUTDOWN 58 /* Can't send after socket shutdown */ +#define OFP_ETOOMANYREFS 59 /* Too many references: can't splice */ +#define OFP_ETIMEDOUT 60 /* Operation timed out */ +#define OFP_ECONNREFUSED 61 /* Connection refused */ + +#define OFP_ELOOP 62 /* Too many levels of symbolic links */ + +#define OFP_ENAMETOOLONG 63 /* File name too long */ + +/* should be rearranged */ + +#define OFP_EHOSTDOWN 64 /* Host is down */ +#define OFP_EHOSTUNREACH 65 /* No route to host */ + +#define OFP_ENOTEMPTY 66 /* Directory not empty */ + +/* quotas & mush */ + +#define OFP_EPROCLIM 67 /* Too many processes */ +#define OFP_EUSERS 68 /* Too many users */ +#define OFP_EDQUOT 69 /* Disc quota exceeded */ + +/* Network File System */ +#define OFP_ESTALE 70 /* Stale NFS file handle */ +#define OFP_EREMOTE 71 /* Too many levels of remote in path */ +#define OFP_EBADRPC 72 /* RPC struct is bad */ +#define OFP_ERPCMISMATCH 73 /* RPC version wrong */ +#define OFP_EPROGUNAVAIL 74 /* RPC prog. not avail */ +#define OFP_EPROGMISMATCH 75 /* Program version wrong */ +#define OFP_EPROCUNAVAIL 76 /* Bad procedure for program */ + +#define OFP_ENOLCK 77 /* No locks available */ +#define OFP_ENOSYS 78 /* Function not implemented */ + +#define OFP_EFTYPE 79 /* Inappropriate file type or format */ +#define OFP_EAUTH 80 /* Authentication error */ +#define OFP_ENEEDAUTH 81 /* Need authenticator */ +#define OFP_EIDRM 82 /* Identifier removed */ +#define OFP_ENOMSG 83 /* No message of desired type */ +#define OFP_EOVERFLOW 84 /* Value too large to be stored in data type */ +#define OFP_ECANCELED 85 /* Operation canceled */ +#define OFP_EILSEQ 86 /* Illegal byte sequence */ +#define OFP_ENOATTR 87 /* Attribute not found */ + +#define OFP_EDOOFUS 88 /* Programming error */ + +#define OFP_EBADMSG 89 /* Bad message */ +#define OFP_EMULTIHOP 90 /* Multihop attempted */ +#define OFP_ENOLINK 91 /* Link has been severed */ +#define OFP_EPROTO 92 /* Protocol error */ + +#define OFP_ENOTCAPABLE 93 /* Capabilities insufficient */ +#define OFP_ECAPMODE 94 /* Not permitted in capability mode */ + +#define OFP_ELAST 94 /* Must be equal largest errno */ + +/* pseudo-errors returned inside kernel to modify return to process */ +#define OFP_ERESTART (-1) /* restart syscall */ +#define OFP_EJUSTRETURN (-2) /* don't modify regs, just return */ +#define OFP_ENOIOCTL (-3) /* ioctl not handled by this layer */ +#define OFP_EDIRIOCTL (-4) /* do direct ioctl in GEOM */ + +extern int ofp_errno; +const char *ofp_strerror(int errnum); + +#endif /* __OFP_ERRNO_H__ */ diff --git a/include/api/ofp_ethernet.h b/include/api/ofp_ethernet.h new file mode 100644 index 00000000..7c7def6c --- /dev/null +++ b/include/api/ofp_ethernet.h @@ -0,0 +1,339 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* + * Fundamental constants relating to ethernet. + * + * $FreeBSD: release/9.1.0/sys/net/ethernet.h 191148 2009-04-16 20:30:28Z kmacy $ + * + */ + +#ifndef _OFP_ETHERNET_H_ +#define _OFP_ETHERNET_H_ + +/* + * Some basic Ethernet constants. + */ +#define OFP_ETHER_ADDR_LEN 6 /* length of an Ethernet address */ +#define OFP_ETHER_TYPE_LEN 2 /* length of the Ethernet type field */ +#define OFP_ETHER_CRC_LEN 4 /* length of the Ethernet CRC */ +#define OFP_ETHER_HDR_LEN (OFP_ETHER_ADDR_LEN*2+OFP_ETHER_TYPE_LEN) +#define OFP_ETHER_MIN_LEN 64 /* minimum frame len, including CRC */ +#define OFP_ETHER_MAX_LEN 1518 /* maximum frame len, including CRC */ +#define OFP_ETHER_MAX_LEN_JUMBO 9018 /* max jumbo frame len, including CRC */ + +#define OFP_ETHER_VLAN_ENCAP_LEN 4 /* len of 802.1Q VLAN encapsulation */ + +/* + * A macro to validate a length with + */ +#define OFP_ETHER_IS_VALID_LEN(foo) \ + ((foo) >= OFP_ETHER_MIN_LEN && (foo) <= OFP_ETHER_MAX_LEN) + +/* + * Structure of a 10Mb/s Ethernet header. + */ +struct ofp_ether_header { + uint8_t ether_dhost[OFP_ETHER_ADDR_LEN]; + uint8_t ether_shost[OFP_ETHER_ADDR_LEN]; + uint16_t ether_type; +} __attribute__((packed)); + +/* + * Structure of a 48-bit Ethernet address. + */ +struct ofp_ether_addr { + uint8_t octet[OFP_ETHER_ADDR_LEN]; +} __attribute__((packed)); + +#define OFP_ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */ + +#define OFP_ETHERTYPE_IS_VLAN(et) \ + (((et) == OFP_ETHERTYPE_VLAN) || \ + ((et) == OFP_ETHERTYPE_QINQ_STD) || \ + ((et) == OFP_ETHERTYPE_QINQ_VENDOR1) || \ + ((et) == OFP_ETHERTYPE_QINQ_VENDOR2) || \ + ((et) == OFP_ETHERTYPE_QINQ_VENDOR3)) + +/* + * NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields. + * However, there are some conflicts. + */ + +#define OFP_ETHERTYPE_8023 0x0004 /* IEEE 802.3 packet */ + /* 0x0101 .. 0x1FF Experimental */ +#define OFP_ETHERTYPE_PUP 0x0200 /* Xerox PUP protocol - see 0A00 */ +#define OFP_ETHERTYPE_PUPAT 0x0200 /* PUP Address Translation - see 0A01 */ +#define OFP_ETHERTYPE_SPRITE 0x0500 /* ??? */ + /* 0x0400 Nixdorf */ +#define OFP_ETHERTYPE_NS 0x0600 /* XNS */ +#define OFP_ETHERTYPE_NSAT 0x0601 /* XNS Address Translation (3Mb only) */ +#define OFP_ETHERTYPE_DLOG1 0x0660 /* DLOG (?) */ +#define OFP_ETHERTYPE_DLOG2 0x0661 /* DLOG (?) */ +#define OFP_ETHERTYPE_IP 0x0800 /* IP protocol */ +#define OFP_ETHERTYPE_X75 0x0801 /* X.75 Internet */ +#define OFP_ETHERTYPE_NBS 0x0802 /* NBS Internet */ +#define OFP_ETHERTYPE_ECMA 0x0803 /* ECMA Internet */ +#define OFP_ETHERTYPE_CHAOS 0x0804 /* CHAOSnet */ +#define OFP_ETHERTYPE_X25 0x0805 /* X.25 Level 3 */ +#define OFP_ETHERTYPE_ARP 0x0806 /* Address resolution protocol */ +#define OFP_ETHERTYPE_NSCOMPAT 0x0807 /* XNS Compatibility */ +#define OFP_ETHERTYPE_FRARP 0x0808 /* Frame Relay ARP (RFC1701) */ + /* 0x081C Symbolics Private */ + /* 0x0888 - 0x088A Xyplex */ +#define OFP_ETHERTYPE_UBDEBUG 0x0900 /* Ungermann-Bass network debugger */ +#define OFP_ETHERTYPE_IEEEPUP 0x0A00 /* Xerox IEEE802.3 PUP */ +#define OFP_ETHERTYPE_IEEEPUPAT 0x0A01 /* Xerox IEEE802.3 PUP Address Translation */ +#define OFP_ETHERTYPE_VINES 0x0BAD /* Banyan VINES */ +#define OFP_ETHERTYPE_VINESLOOP 0x0BAE /* Banyan VINES Loopback */ +#define OFP_ETHERTYPE_VINESECHO 0x0BAF /* Banyan VINES Echo */ + +/* 0x1000 - 0x100F Berkeley Trailer */ +/* + * The OFP_OFP_ETHERTYPE_NTRAILER packet types starting at OFP_OFP_ETHERTYPE_TRAIL have + * (type-OFP_ETHERTYPE_TRAIL)*512 bytes of data followed + * by an OFP_ETHER type (as given above) and then the (variable-length) header. + */ +#define OFP_ETHERTYPE_TRAIL 0x1000 /* Trailer packet */ +#define OFP_ETHERTYPE_NTRAILER 16 + +#define OFP_ETHERTYPE_DCA 0x1234 /* DCA - Multicast */ +#define OFP_ETHERTYPE_VALID 0x1600 /* VALID system protocol */ +#define OFP_ETHERTYPE_DOGFIGHT 0x1989 /* Artificial Horizons ("Aviator" dogfight simulator [on Sun]) */ +#define OFP_ETHERTYPE_RCL 0x1995 /* Datapoint Corporation (RCL lan protocol) */ + + /* The following 3C0x types + are unregistered: */ +#define OFP_ETHERTYPE_NBPVCD 0x3C00 /* 3Com NBP virtual circuit datagram (like XNS SPP) not registered */ +#define OFP_ETHERTYPE_NBPSCD 0x3C01 /* 3Com NBP System control datagram not registered */ +#define OFP_ETHERTYPE_NBPCREQ 0x3C02 /* 3Com NBP Connect request (virtual cct) not registered */ +#define OFP_ETHERTYPE_NBPCRSP 0x3C03 /* 3Com NBP Connect response not registered */ +#define OFP_ETHERTYPE_NBPCC 0x3C04 /* 3Com NBP Connect complete not registered */ +#define OFP_ETHERTYPE_NBPCLREQ 0x3C05 /* 3Com NBP Close request (virtual cct) not registered */ +#define OFP_ETHERTYPE_NBPCLRSP 0x3C06 /* 3Com NBP Close response not registered */ +#define OFP_ETHERTYPE_NBPDG 0x3C07 /* 3Com NBP Datagram (like XNS IDP) not registered */ +#define OFP_ETHERTYPE_NBPDGB 0x3C08 /* 3Com NBP Datagram broadcast not registered */ +#define OFP_ETHERTYPE_NBPCLAIM 0x3C09 /* 3Com NBP Claim NetBIOS name not registered */ +#define OFP_ETHERTYPE_NBPDLTE 0x3C0A /* 3Com NBP Delete NetBIOS name not registered */ +#define OFP_ETHERTYPE_NBPRAS 0x3C0B /* 3Com NBP Remote adaptor status request not registered */ +#define OFP_ETHERTYPE_NBPRAR 0x3C0C /* 3Com NBP Remote adaptor response not registered */ +#define OFP_ETHERTYPE_NBPRST 0x3C0D /* 3Com NBP Reset not registered */ + +#define OFP_ETHERTYPE_PCS 0x4242 /* PCS Basic Block Protocol */ +#define OFP_ETHERTYPE_IMLBLDIAG 0x424C /* Information Modes Little Big LAN diagnostic */ +#define OFP_ETHERTYPE_DIDDLE 0x4321 /* THD - Diddle */ +#define OFP_ETHERTYPE_IMLBL 0x4C42 /* Information Modes Little Big LAN */ +#define OFP_ETHERTYPE_SIMNET 0x5208 /* BBN Simnet Private */ +#define OFP_ETHERTYPE_DECEXPER 0x6000 /* DEC Unassigned, experimental */ +#define OFP_ETHERTYPE_MOPDL 0x6001 /* DEC MOP dump/load */ +#define OFP_ETHERTYPE_MOPRC 0x6002 /* DEC MOP remote console */ +#define OFP_ETHERTYPE_DECnet 0x6003 /* DEC DECNET Phase IV route */ +#define OFP_ETHERTYPE_DN OFP_ETHERTYPE_DECnet /* libpcap, tcpdump */ +#define OFP_ETHERTYPE_LAT 0x6004 /* DEC LAT */ +#define OFP_ETHERTYPE_DECDIAG 0x6005 /* DEC diagnostic protocol (at interface initialization?) */ +#define OFP_ETHERTYPE_DECCUST 0x6006 /* DEC customer protocol */ +#define OFP_ETHERTYPE_SCA 0x6007 /* DEC LAVC, SCA */ +#define OFP_ETHERTYPE_AMBER 0x6008 /* DEC AMBER */ +#define OFP_ETHERTYPE_DECMUMPS 0x6009 /* DEC MUMPS */ + /* 0x6010 - 0x6014 3Com Corporation */ +#define OFP_ETHERTYPE_TRANSETHER 0x6558 /* Trans Ether Bridging (RFC1701)*/ +#define OFP_ETHERTYPE_RAWFR 0x6559 /* Raw Frame Relay (RFC1701) */ +#define OFP_ETHERTYPE_UBDL 0x7000 /* Ungermann-Bass download */ +#define OFP_ETHERTYPE_UBNIU 0x7001 /* Ungermann-Bass NIUs */ +#define OFP_ETHERTYPE_UBDIAGLOOP 0x7002 /* Ungermann-Bass diagnostic/loopback */ +#define OFP_ETHERTYPE_UBNMC 0x7003 /* Ungermann-Bass ??? (NMC to/from UB Bridge) */ +#define OFP_ETHERTYPE_UBBST 0x7005 /* Ungermann-Bass Bridge Spanning Tree */ +#define OFP_ETHERTYPE_OS9 0x7007 /* OS/9 Microware */ +#define OFP_ETHERTYPE_OS9NET 0x7009 /* OS/9 Net? */ + /* 0x7020 - 0x7029 LRT (England) (now Sintrom) */ +#define OFP_ETHERTYPE_RACAL 0x7030 /* Racal-Interlan */ +#define OFP_ETHERTYPE_PRIMENTS 0x7031 /* Prime NTS (Network Terminal Service) */ +#define OFP_ETHERTYPE_CABLETRON 0x7034 /* Cabletron */ +#define OFP_ETHERTYPE_CRONUSVLN 0x8003 /* Cronus VLN */ +#define OFP_ETHERTYPE_CRONUS 0x8004 /* Cronus Direct */ +#define OFP_ETHERTYPE_HP 0x8005 /* HP Probe */ +#define OFP_ETHERTYPE_NESTAR 0x8006 /* Nestar */ +#define OFP_ETHERTYPE_ATTSTANFORD 0x8008 /* AT&T/Stanford (local use) */ +#define OFP_ETHERTYPE_EXCELAN 0x8010 /* Excelan */ +#define OFP_ETHERTYPE_SG_DIAG 0x8013 /* SGI diagnostic type */ +#define OFP_ETHERTYPE_SG_NETGAMES 0x8014 /* SGI network games */ +#define OFP_ETHERTYPE_SG_RESV 0x8015 /* SGI reserved type */ +#define OFP_ETHERTYPE_SG_BOUNCE 0x8016 /* SGI bounce server */ +#define OFP_ETHERTYPE_APOLLODOMAIN 0x8019 /* Apollo DOMAIN */ +#define OFP_ETHERTYPE_TYMSHARE 0x802E /* Tymeshare */ +#define OFP_ETHERTYPE_TIGAN 0x802F /* Tigan, Inc. */ +#define OFP_ETHERTYPE_REVARP 0x8035 /* Reverse addr resolution protocol */ +#define OFP_ETHERTYPE_AEONIC 0x8036 /* Aeonic Systems */ +#define OFP_ETHERTYPE_IPXNEW 0x8037 /* IPX (Novell Netware?) */ +#define OFP_ETHERTYPE_LANBRIDGE 0x8038 /* DEC LANBridge */ +#define OFP_ETHERTYPE_DSMD 0x8039 /* DEC DSM/DDP */ +#define OFP_ETHERTYPE_ARGONAUT 0x803A /* DEC Argonaut Console */ +#define OFP_ETHERTYPE_VAXELN 0x803B /* DEC VAXELN */ +#define OFP_ETHERTYPE_DECDNS 0x803C /* DEC DNS Naming Service */ +#define OFP_ETHERTYPE_ENCRYPT 0x803D /* DEC Ethernet Encryption */ +#define OFP_ETHERTYPE_DECDTS 0x803E /* DEC Distributed Time Service */ +#define OFP_ETHERTYPE_DECLTM 0x803F /* DEC LAN Traffic Monitor */ +#define OFP_ETHERTYPE_DECNETBIOS 0x8040 /* DEC PATHWORKS DECnet NETBIOS Emulation */ +#define OFP_ETHERTYPE_DECLAST 0x8041 /* DEC Local Area System Transport */ + /* 0x8042 DEC Unassigned */ +#define OFP_ETHERTYPE_PLANNING 0x8044 /* Planning Research Corp. */ + /* 0x8046 - 0x8047 AT&T */ +#define OFP_ETHERTYPE_DECAM 0x8048 /* DEC Availability Manager for Distributed Systems DECamds (but someone at DEC says not) */ +#define OFP_ETHERTYPE_EXPERDATA 0x8049 /* ExperData */ +#define OFP_ETHERTYPE_VEXP 0x805B /* Stanford V Kernel exp. */ +#define OFP_ETHERTYPE_VPROD 0x805C /* Stanford V Kernel prod. */ +#define OFP_ETHERTYPE_ES 0x805D /* Evans & Sutherland */ +#define OFP_ETHERTYPE_LITTLE 0x8060 /* Little Machines */ +#define OFP_ETHERTYPE_COUNTERPOINT 0x8062 /* Counterpoint Computers */ + /* 0x8065 - 0x8066 Univ. of Mass @ Amherst */ +#define OFP_ETHERTYPE_VEECO 0x8067 /* Veeco Integrated Auto. */ +#define OFP_ETHERTYPE_GENDYN 0x8068 /* General Dynamics */ +#define OFP_ETHERTYPE_ATT 0x8069 /* AT&T */ +#define OFP_ETHERTYPE_AUTOPHON 0x806A /* Autophon */ +#define OFP_ETHERTYPE_COMDESIGN 0x806C /* ComDesign */ +#define OFP_ETHERTYPE_COMPUGRAPHIC 0x806D /* Compugraphic Corporation */ + /* 0x806E - 0x8077 Landmark Graphics Corp. */ +#define OFP_ETHERTYPE_MATRA 0x807A /* Matra */ +#define OFP_ETHERTYPE_DDE 0x807B /* Dansk Data Elektronik */ +#define OFP_ETHERTYPE_MERIT 0x807C /* Merit Internodal (or Univ of Michigan?) */ + /* 0x807D - 0x807F Vitalink Communications */ +#define OFP_ETHERTYPE_VLTLMAN 0x8080 /* Vitalink TransLAN III Management */ + /* 0x8081 - 0x8083 Counterpoint Computers */ + /* 0x8088 - 0x808A Xyplex */ +#define OFP_ETHERTYPE_ATALK 0x809B /* AppleTalk */ +#define OFP_ETHERTYPE_AT OFP_ETHERTYPE_ATALK /* old NetBSD */ +#define OFP_ETHERTYPE_APPLETALK OFP_ETHERTYPE_ATALK /* HP-UX */ + /* 0x809C - 0x809E Datability */ +#define OFP_ETHERTYPE_SPIDER 0x809F /* Spider Systems Ltd. */ + /* 0x80A3 Nixdorf */ + /* 0x80A4 - 0x80B3 Siemens Gammasonics Inc. */ + /* 0x80C0 - 0x80C3 DCA (Digital Comm. Assoc.) Data Exchange Cluster */ + /* 0x80C4 - 0x80C5 Banyan Systems */ +#define OFP_ETHERTYPE_PACER 0x80C6 /* Pacer Software */ +#define OFP_ETHERTYPE_APPLITEK 0x80C7 /* Applitek Corporation */ + /* 0x80C8 - 0x80CC Intergraph Corporation */ + /* 0x80CD - 0x80CE Harris Corporation */ + /* 0x80CF - 0x80D2 Taylor Instrument */ + /* 0x80D3 - 0x80D4 Rosemount Corporation */ +#define OFP_ETHERTYPE_SNA 0x80D5 /* IBM SNA Services over Ethernet */ +#define OFP_ETHERTYPE_VARIAN 0x80DD /* Varian Associates */ + /* 0x80DE - 0x80DF TRFS (Integrated Solutions Transparent Remote File System) */ + /* 0x80E0 - 0x80E3 Allen-Bradley */ + /* 0x80E4 - 0x80F0 Datability */ +#define OFP_ETHERTYPE_RETIX 0x80F2 /* Retix */ +#define OFP_ETHERTYPE_AARP 0x80F3 /* AppleTalk AARP */ + /* 0x80F4 - 0x80F5 Kinetics */ +#define OFP_ETHERTYPE_APOLLO 0x80F7 /* Apollo Computer */ +#define OFP_ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging (XXX conflicts) */ + /* 0x80FF - 0x8101 Wellfleet Communications (XXX conflicts) */ +#define OFP_ETHERTYPE_BOFL 0x8102 /* Wellfleet; BOFL (Breath OF Life) pkts [every 5-10 secs.] */ +#define OFP_ETHERTYPE_WELLFLEET 0x8103 /* Wellfleet Communications */ + /* 0x8107 - 0x8109 Symbolics Private */ +#define OFP_ETHERTYPE_TALARIS 0x812B /* Talaris */ +#define OFP_ETHERTYPE_WATERLOO 0x8130 /* Waterloo Microsystems Inc. (XXX which?) */ +#define OFP_ETHERTYPE_HAYES 0x8130 /* Hayes Microcomputers (XXX which?) */ +#define OFP_ETHERTYPE_VGLAB 0x8131 /* VG Laboratory Systems */ + /* 0x8132 - 0x8137 Bridge Communications */ +#define OFP_ETHERTYPE_IPX 0x8137 /* Novell (old) NetWare IPX (ECONFIG E option) */ +#define OFP_ETHERTYPE_NOVELL 0x8138 /* Novell, Inc. */ + /* 0x8139 - 0x813D KTI */ +#define OFP_ETHERTYPE_MUMPS 0x813F /* M/MUMPS data sharing */ +#define OFP_ETHERTYPE_AMOEBA 0x8145 /* Vrije Universiteit (NL) Amoeba 4 RPC (obsolete) */ +#define OFP_ETHERTYPE_FLIP 0x8146 /* Vrije Universiteit (NL) FLIP (Fast Local Internet Protocol) */ +#define OFP_ETHERTYPE_VURESERVED 0x8147 /* Vrije Universiteit (NL) [reserved] */ +#define OFP_ETHERTYPE_LOGICRAFT 0x8148 /* Logicraft */ +#define OFP_ETHERTYPE_NCD 0x8149 /* Network Computing Devices */ +#define OFP_ETHERTYPE_ALPHA 0x814A /* Alpha Micro */ +#define OFP_ETHERTYPE_SNMP 0x814C /* SNMP over Ethernet (see RFC1089) */ + /* 0x814D - 0x814E BIIN */ +#define OFP_ETHERTYPE_TEC 0x814F /* Technically Elite Concepts */ +#define OFP_ETHERTYPE_RATIONAL 0x8150 /* Rational Corp */ + /* 0x8151 - 0x8153 Qualcomm */ + /* 0x815C - 0x815E Computer Protocol Pty Ltd */ + /* 0x8164 - 0x8166 Charles River Data Systems */ +#define OFP_ETHERTYPE_XTP 0x817D /* Protocol Engines XTP */ +#define OFP_ETHERTYPE_SGITW 0x817E /* SGI/Time Warner prop. */ +#define OFP_ETHERTYPE_HIPPI_FP 0x8180 /* HIPPI-FP encapsulation */ +#define OFP_ETHERTYPE_STP 0x8181 /* Scheduled Transfer STP, HIPPI-ST */ + /* 0x8182 - 0x8183 Reserved for HIPPI-6400 */ + /* 0x8184 - 0x818C SGI prop. */ +#define OFP_ETHERTYPE_MOTOROLA 0x818D /* Motorola */ +#define OFP_ETHERTYPE_NETBEUI 0x8191 /* PowerLAN NetBIOS/NetBEUI (PC) */ + /* 0x819A - 0x81A3 RAD Network Devices */ + /* 0x81B7 - 0x81B9 Xyplex */ + /* 0x81CC - 0x81D5 Apricot Computers */ + /* 0x81D6 - 0x81DD Artisoft Lantastic */ + /* 0x81E6 - 0x81EF Polygon */ + /* 0x81F0 - 0x81F2 Comsat Labs */ + /* 0x81F3 - 0x81F5 SAIC */ + /* 0x81F6 - 0x81F8 VG Analytical */ + /* 0x8203 - 0x8205 QNX Software Systems Ltd. */ + /* 0x8221 - 0x8222 Ascom Banking Systems */ + /* 0x823E - 0x8240 Advanced Encryption Systems */ + /* 0x8263 - 0x826A Charles River Data Systems */ + /* 0x827F - 0x8282 Athena Programming */ + /* 0x829A - 0x829B Inst Ind Info Tech */ + /* 0x829C - 0x82AB Taurus Controls */ + /* 0x82AC - 0x8693 Walker Richer & Quinn */ +#define OFP_ETHERTYPE_ACCTON 0x8390 /* Accton Technologies (unregistered) */ +#define OFP_ETHERTYPE_TALARISMC 0x852B /* Talaris multicast */ +#define OFP_ETHERTYPE_KALPANA 0x8582 /* Kalpana */ + /* 0x8694 - 0x869D Idea Courier */ + /* 0x869E - 0x86A1 Computer Network Tech */ + /* 0x86A3 - 0x86AC Gateway Communications */ +#define OFP_ETHERTYPE_SECTRA 0x86DB /* SECTRA */ +#define OFP_ETHERTYPE_IPV6 0x86DD /* IP protocol version 6 */ +#define OFP_ETHERTYPE_DELTACON 0x86DE /* Delta Controls */ +#define OFP_ETHERTYPE_ATOMIC 0x86DF /* ATOMIC */ + /* 0x86E0 - 0x86EF Landis & Gyr Powers */ + /* 0x8700 - 0x8710 Motorola */ +#define OFP_ETHERTYPE_RDP 0x8739 /* Control Technology Inc. RDP Without IP */ +#define OFP_ETHERTYPE_MICP 0x873A /* Control Technology Inc. Mcast Industrial Ctrl Proto. */ + /* 0x873B - 0x873C Control Technology Inc. Proprietary */ +#define OFP_ETHERTYPE_TCPCOMP 0x876B /* TCP/IP Compression (RFC1701) */ +#define OFP_ETHERTYPE_IPAS 0x876C /* IP Autonomous Systems (RFC1701) */ +#define OFP_ETHERTYPE_SECUREDATA 0x876D /* Secure Data (RFC1701) */ +#define OFP_ETHERTYPE_FLOWCONTROL 0x8808 /* 802.3x flow control packet */ +#define OFP_ETHERTYPE_SLOW 0x8809 /* 802.3ad link aggregation (LACP) */ +#define OFP_ETHERTYPE_PPP 0x880B /* PPP (obsolete by PPPoE) */ +#define OFP_ETHERTYPE_HITACHI 0x8820 /* Hitachi Cable (Optoelectronic Systems Laboratory) */ +#define OFP_ETHERTYPE_MPLS 0x8847 /* MPLS Unicast */ +#define OFP_ETHERTYPE_MPLS_MCAST 0x8848 /* MPLS Multicast */ +#define OFP_ETHERTYPE_AXIS 0x8856 /* Axis Communications AB proprietary bootstrap/config */ +#define OFP_ETHERTYPE_PPPOEDISC 0x8863 /* PPP Over Ethernet Discovery Stage */ +#define OFP_ETHERTYPE_PPPOE 0x8864 /* PPP Over Ethernet Session Stage */ +#define OFP_ETHERTYPE_LANPROBE 0x8888 /* HP LanProbe test? */ +#define OFP_ETHERTYPE_PAE 0x888e /* EAPOL PAE/802.1x */ +#define OFP_ETHERTYPE_QINQ_STD 0x88A8 /* 802.1ad QinQ */ +#define OFP_ETHERTYPE_LOOPBACK 0x9000 /* Loopback: used to test interfaces */ +#define OFP_ETHERTYPE_LBACK OFP_ETHERTYPE_LOOPBACK /* DEC MOP loopback */ +#define OFP_ETHERTYPE_XNSSM 0x9001 /* 3Com (Formerly Bridge Communications), XNS Systems Management */ +#define OFP_ETHERTYPE_TCPSM 0x9002 /* 3Com (Formerly Bridge Communications), TCP/IP Systems Management */ +#define OFP_ETHERTYPE_BCLOOP 0x9003 /* 3Com (Formerly Bridge Communications), loopback detection */ +#define OFP_ETHERTYPE_QINQ_VENDOR1 0x9100 /* Vendor-specific QinQ */ +#define OFP_ETHERTYPE_QINQ_VENDOR2 0x9200 /* Vendor-specific QinQ */ +#define OFP_ETHERTYPE_QINQ_VENDOR3 0x9300 /* Vendor-specific QinQ */ +#define OFP_ETHERTYPE_DEBNI 0xAAAA /* DECNET? Used by VAX 6220 DEBNI */ +#define OFP_ETHERTYPE_SONIX 0xFAF5 /* Sonix Arpeggio */ +#define OFP_ETHERTYPE_VITAL 0xFF00 /* BBN VITAL-LanBridge cache wakeups */ + /* 0xFF00 - 0xFFOF ISC Bunker Ramo */ + +#define OFP_ETHERTYPE_MAX 0xFFFF /* Maximum valid ethernet type, reserved */ + +/* + * The OFP_OFP_ETHERTYPE_NTRAILER packet types starting at OFP_OFP_ETHERTYPE_TRAIL have + * (type-OFP_ETHERTYPE_TRAIL)*512 bytes of data followed + * by an OFP_ETHER type (as given above) and then the (variable-length) header. + */ +#define OFP_ETHERTYPE_TRAIL 0x1000 /* Trailer packet */ +#define OFP_ETHERTYPE_NTRAILER 16 + +#define OFP_ETHERMTU (OFP_ETHER_MAX_LEN-OFP_ETHER_HDR_LEN-OFP_ETHER_CRC_LEN) +#define OFP_ETHERMIN (OFP_ETHER_MIN_LEN-OFP_ETHER_HDR_LEN-OFP_ETHER_CRC_LEN) +#define OFP_ETHERMTU_JUMBO (OFP_ETHER_MAX_LEN_JUMBO - OFP_ETHER_HDR_LEN - OFP_ETHER_CRC_LEN) + +#endif diff --git a/include/api/ofp_hook.h b/include/api/ofp_hook.h new file mode 100644 index 00000000..e5846751 --- /dev/null +++ b/include/api/ofp_hook.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_HOOK_H__ +#define __OFP_HOOK_H__ + +#include + +typedef enum ofp_return_code (*ofp_pkt_hook)(odp_packet_t pkt, void *arg); + +enum ofp_hook_id { + OFP_HOOK_LOCAL = 0, + OFP_HOOK_FWD_IPv4, + OFP_HOOK_FWD_IPv6, + OFP_HOOK_GRE, + OFP_HOOK_MAX +}; + +enum ofp_hook_local_par { + IS_IPV4 = 0, + IS_IPV6, + IS_IPV4_UDP, + IS_IPV6_UDP +}; + +#endif /* __OFP_HOOK_H__ */ diff --git a/include/api/ofp_icmp.h b/include/api/ofp_icmp.h new file mode 100644 index 00000000..522624e8 --- /dev/null +++ b/include/api/ofp_icmp.h @@ -0,0 +1,227 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet/ip_icmp.h 207369 2010-04-29 11:52:42Z bz $ + */ + +#ifndef _OFP_ICMP_H_ +#define _OFP_ICMP_H_ + +#include "ofp_ip.h" + +/* + * Interface Control Message Protocol Definitions. + * Per RFC 792, September 1981. + */ + +/* + * Internal of an ICMP Router Advertisement + */ +struct ofp_icmp_ra_addr { + uint32_t ira_addr; + uint32_t ira_preference; +}; + +/* + * Structure of an icmp header. + */ +struct ofp_icmphdr { + uint8_t icmp_type; /* type of message, see below */ + uint8_t icmp_code; /* type sub code */ + uint16_t icmp_cksum; /* ones complement cksum of struct */ +}; + +/* + * Structure of an icmp packet. + * + * XXX: should start with a struct icmphdr. + */ +struct ofp_icmp { + uint8_t icmp_type; /* type of message, see below */ + uint8_t icmp_code; /* type sub code */ + uint16_t icmp_cksum; /* ones complement cksum of struct */ + union { + uint8_t ih_pptr; /* ICMP_PARAMPROB */ + struct ofp_in_addr ih_gwaddr; /* ICMP_REDIRECT */ + struct ofp_ih_idseq { + uint16_t icd_id; /* network format */ + uint16_t icd_seq; /* network format */ + } ih_idseq; + int ih_void; + + /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ + struct ih_pmtu { + uint16_t ipm_void; /* network format */ + uint16_t ipm_nextmtu; /* network format */ + } ih_pmtu; + + struct ofp_ih_rtradv { + uint8_t irt_num_addrs; + uint8_t irt_wpa; + uint16_t irt_lifetime; + } ih_rtradv; + } icmp_hun; +#define ofp_icmp_pptr icmp_hun.ih_pptr +#define ofp_icmp_gwaddr icmp_hun.ih_gwaddr +#define ofp_icmp_id icmp_hun.ih_idseq.icd_id +#define ofp_icmp_seq icmp_hun.ih_idseq.icd_seq +#define ofp_icmp_void icmp_hun.ih_void +#define ofp_icmp_pmvoid icmp_hun.ih_pmtu.ipm_void +#define ofp_icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu +#define ofp_icmp_num_addrs icmp_hun.ih_rtradv.irt_num_addrs +#define ofp_icmp_wpa icmp_hun.ih_rtradv.irt_wpa +#define ofp_icmp_lifetime icmp_hun.ih_rtradv.irt_lifetime + union { + struct id_ts { /* ICMP Timestamp */ + /* + * The next 3 fields are in network format, + * milliseconds since 00:00 GMT + */ + uint32_t its_otime; /* Originate */ + uint32_t its_rtime; /* Receive */ + uint32_t its_ttime; /* Transmit */ + } id_ts; + struct ofp_id_ip { + struct ofp_ip idi_ip; + /* options and then 64 bits of data */ + } id_ip; + struct ofp_icmp_ra_addr id_radv; + uint32_t id_mask; + char id_data[1]; + } icmp_dun; +#define ofp_icmp_otime icmp_dun.id_ts.its_otime +#define ofp_icmp_rtime icmp_dun.id_ts.its_rtime +#define ofp_icmp_ttime icmp_dun.id_ts.its_ttime +#define ofp_icmp_ip icmp_dun.id_ip.idi_ip +#define ofp_icmp_radv icmp_dun.id_radv +#define ofp_icmp_mask icmp_dun.id_mask +#define ofp_icmp_data icmp_dun.id_data +}; + +/* + * Lower bounds on packet lengths for various types. + * For the error advice packets must first insure that the + * packet is large enough to contain the returned ip header. + * Only then can we do the check to see if 64 bits of packet + * data have been returned, since we need to check the returned + * ip header length. + */ +#define OFP_ICMP_MINLEN 8 /* abs minimum */ +#define OFP_ICMP_TSLEN (8 + 3 * sizeof (uint32_t)) /* timestamp */ +#define OFP_ICMP_MASKLEN 12 /* address mask */ +#define OFP_ICMP_ADVLENMIN (8 + sizeof (struct ofp_ip) + 8) /* min */ +#define OFP_ICMP_ADVLEN(p) (8 + ((p)->ofp_icmp_ip.ip_hl << 2) + 8) + /* N.B.: must separately check that ip_hl >= 5 */ + +/* + * Definition of type and code field values. + */ +#define OFP_ICMP_ECHOREPLY 0 /* echo reply */ +#define OFP_ICMP_UNREACH 3 /* dest unreachable, codes: */ +#define OFP_ICMP_UNREACH_NET 0 /* bad net */ +#define OFP_ICMP_UNREACH_HOST 1 /* bad host */ +#define OFP_ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ +#define OFP_ICMP_UNREACH_PORT 3 /* bad port */ +#define OFP_ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ +#define OFP_ICMP_UNREACH_SRCFAIL 5 /* src route failed */ +#define OFP_ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ +#define OFP_ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ +#define OFP_ICMP_UNREACH_ISOLATED 8 /* src host isolated */ +#define OFP_ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ +#define OFP_ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ +#define OFP_ICMP_UNREACH_TOSNET 11 /* bad tos for net */ +#define OFP_ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ +#define OFP_ICMP_UNREACH_FILTER_PROHIB 13 /* admin prohib */ +#define OFP_ICMP_UNREACH_HOST_PRECEDENCE 14 /* host prec vio. */ +#define OFP_ICMP_UNREACH_PRECEDENCE_CUTOFF 15 /* prec cutoff */ +#define OFP_ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ +#define OFP_ICMP_REDIRECT 5 /* shorter route, codes: */ +#define OFP_ICMP_REDIRECT_NET 0 /* for network */ +#define OFP_ICMP_REDIRECT_HOST 1 /* for host */ +#define OFP_ICMP_REDIRECT_TOSNET 2 /* for tos and net */ +#define OFP_ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ +#define OFP_ICMP_ALTHOSTADDR 6 /* alternate host address */ +#define OFP_ICMP_ECHO 8 /* echo service */ +#define OFP_ICMP_ROUTERADVERT 9 /* router advertisement */ +#define OFP_ICMP_ROUTERADVERT_NORMAL 0 /* normal advertisement */ +#define OFP_ICMP_ROUTERADVERT_NOROUTE_COMMON 16 /* selective routing */ +#define OFP_ICMP_ROUTERSOLICIT 10 /* router solicitation */ +#define OFP_ICMP_TIMXCEED 11 /* time exceeded, code: */ +#define OFP_ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ +#define OFP_ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ +#define OFP_ICMP_PARAMPROB 12 /* ip header bad */ +#define OFP_ICMP_PARAMPROB_ERRATPTR 0 /* error at param ptr */ +#define OFP_ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ +#define OFP_ICMP_PARAMPROB_LENGTH 2 /* bad length */ +#define OFP_ICMP_TSTAMP 13 /* timestamp request */ +#define OFP_ICMP_TSTAMPREPLY 14 /* timestamp reply */ +#define OFP_ICMP_IREQ 15 /* information request */ +#define OFP_ICMP_IREQREPLY 16 /* information reply */ +#define OFP_ICMP_MASKREQ 17 /* address mask request */ +#define OFP_ICMP_MASKREPLY 18 /* address mask reply */ +#define OFP_ICMP_TRACEROUTE 30 /* traceroute */ +#define OFP_ICMP_DATACONVERR 31 /* data conversion error */ +#define OFP_ICMP_MOBILE_REDIRECT 32 /* mobile host redirect */ +#define OFP_ICMP_IPV6_WHEREAREYOU 33 /* IPv6 where-are-you */ +#define OFP_ICMP_IPV6_IAMHERE 34 /* IPv6 i-am-here */ +#define OFP_ICMP_MOBILE_REGREQUEST 35 /* mobile registration req */ +#define OFP_ICMP_MOBILE_REGREPLY 36 /* mobile registration reply */ +#define OFP_ICMP_SKIP 39 /* SKIP */ +#define OFP_ICMP_PHOTURIS 40 /* Photuris */ +#define OFP_ICMP_PHOTURIS_UNKNOWN_INDEX 1 /* unknown sec index */ +#define OFP_ICMP_PHOTURIS_AUTH_FAILED 2 /* auth failed */ +#define OFP_ICMP_PHOTURIS_DECRYPT_FAILED 3 /* decrypt failed */ + +#define OFP_ICMP_MAXTYPE 40 + +#define OFP_ICMP_INFOTYPE(type) \ + ((type) == OFP_ICMP_ECHOREPLY || (type) == OFP_ICMP_ECHO || \ + (type) == OFP_ICMP_ROUTERADVERT || (type) == OFP_ICMP_ROUTERSOLICIT || \ + (type) == OFP_ICMP_TSTAMP || (type) == OFP_ICMP_TSTAMPREPLY || \ + (type) == OFP_ICMP_IREQ || (type) == OFP_ICMP_IREQREPLY || \ + (type) == OFP_ICMP_MASKREQ || (type) == OFP_ICMP_MASKREPLY) + + +#define BANDLIM_UNLIMITED -1 +#define BANDLIM_ICMP_UNREACH 0 +#define BANDLIM_ICMP_ECHO 1 +#define BANDLIM_ICMP_TSTAMP 2 +#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ +#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ +#define BANDLIM_ICMP6_UNREACH 5 +#define BANDLIM_SCTP_OOTB 6 +#define BANDLIM_MAX 6 + +#endif diff --git a/include/api/ofp_icmp6.h b/include/api/ofp_icmp6.h new file mode 100644 index 00000000..674bcaec --- /dev/null +++ b/include/api/ofp_icmp6.h @@ -0,0 +1,523 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 + */ + +#ifndef _OFP_ICMP6_H_ +#define _OFP_ICMP6_H_ + +#include + +#define OFP_ICMPV6_PLD_MAXLEN 1232 /* IPV6_MMTU - sizeof(struct ip6_hdr) + - sizeof(struct icmp6_hdr) */ + +struct ofp_icmp6_hdr { + uint8_t icmp6_type; /* type field */ + uint8_t icmp6_code; /* code field */ + uint16_t icmp6_cksum; /* checksum field */ + union { + uint32_t icmp6_un_data32[1]; /* type-specific field */ + uint16_t icmp6_un_data16[2]; /* type-specific field */ + uint8_t icmp6_un_data8[4]; /* type-specific field */ + } icmp6_dataun; +} __attribute__((packed)); + +#define ofp_icmp6_data32 icmp6_dataun.icmp6_un_data32 +#define ofp_icmp6_data16 icmp6_dataun.icmp6_un_data16 +#define ofp_icmp6_data8 icmp6_dataun.icmp6_un_data8 +#define ofp_icmp6_pptr icmp6_data32[0] /* parameter prob */ +#define ofp_icmp6_mtu icmp6_data32[0] /* packet too big */ +#define ofp_icmp6_id icmp6_data16[0] /* echo request/reply */ +#define ofp_icmp6_seq icmp6_data16[1] /* echo request/reply */ +#define ofp_icmp6_maxdelay icmp6_data16[0] /* mcast group membership */ + +#define OFP_ICMP6_DST_UNREACH 1 /* dest unreachable, codes: */ +#define OFP_ICMP6_PACKET_TOO_BIG 2 /* packet too big */ +#define OFP_ICMP6_TIME_EXCEEDED 3 /* time exceeded, code: */ +#define OFP_ICMP6_PARAM_PROB 4 /* ip6 header bad */ + +#define OFP_ICMP6_ECHO_REQUEST 128 /* echo service */ +#define OFP_ICMP6_ECHO_REPLY 129 /* echo reply */ +#define OFP_MLD_LISTENER_QUERY 130 /* multicast listener query */ +#define OFP_MLD_LISTENER_REPORT 131 /* multicast listener report */ +#define OFP_MLD_LISTENER_DONE 132 /* multicast listener done */ +#define OFP_MLD_LISTENER_REDUCTION MLD_LISTENER_DONE /* RFC3542 definition */ + +/* RFC2292 decls */ +#define OFP_ICMP6_MEMBERSHIP_QUERY 130 /* group membership query */ +#define OFP_ICMP6_MEMBERSHIP_REPORT 131 /* group membership report */ +#define OFP_ICMP6_MEMBERSHIP_REDUCTION 132 /* group membership termination */ + +/* the followings are for backward compatibility to old KAME apps. */ +#define OFP_MLD6_LISTENER_QUERY MLD_LISTENER_QUERY +#define OFP_MLD6_LISTENER_REPORT MLD_LISTENER_REPORT +#define OFP_MLD6_LISTENER_DONE MLD_LISTENER_DONE + +#define OFP_ND_ROUTER_SOLICIT 133 /* router solicitation */ +#define OFP_ND_ROUTER_ADVERT 134 /* router advertisement */ +#define OFP_ND_NEIGHBOR_SOLICIT 135 /* neighbor solicitation */ +#define OFP_ND_NEIGHBOR_ADVERT 136 /* neighbor advertisement */ +#define OFP_ND_REDIRECT 137 /* redirect */ + +#define OFP_ICMP6_ROUTER_RENUMBERING 138 /* router renumbering */ + +#define OFP_ICMP6_WRUREQUEST 139 /* who are you request */ +#define OFP_ICMP6_WRUREPLY 140 /* who are you reply */ +#define OFP_ICMP6_FQDN_QUERY 139 /* FQDN query */ +#define OFP_ICMP6_FQDN_REPLY 140 /* FQDN reply */ +#define OFP_ICMP6_NI_QUERY 139 /* node information request */ +#define OFP_ICMP6_NI_REPLY 140 /* node information reply */ +#define OFP_MLDV2_LISTENER_REPORT 143 /* RFC3810 listener report */ + +/* The definitions below are experimental. TBA */ +#define OFP_MLD_MTRACE_RESP 200 /* mtrace resp (to sender) */ +#define OFP_MLD_MTRACE 201 /* mtrace messages */ + +#define OFP_MLD6_MTRACE_RESP MLD_MTRACE_RESP +#define OFP_MLD6_MTRACE MLD_MTRACE + +#define OFP_ICMP6_MAXTYPE 201 + +#define OFP_ICMP6_DST_UNREACH_NOROUTE 0 /* no route to destination */ +#define OFP_ICMP6_DST_UNREACH_ADMIN 1 /* administratively prohibited */ +#define OFP_ICMP6_DST_UNREACH_NOTNEIGHBOR 2 /* not a neighbor(obsolete) */ +#define OFP_ICMP6_DST_UNREACH_BEYONDSCOPE 2 /* beyond scope of source address */ +#define OFP_ICMP6_DST_UNREACH_ADDR 3 /* address unreachable */ +#define OFP_ICMP6_DST_UNREACH_NOPORT 4 /* port unreachable */ + +#define OFP_ICMP6_TIME_EXCEED_TRANSIT 0 /* ttl==0 in transit */ +#define OFP_ICMP6_TIME_EXCEED_REASSEMBLY 1 /* ttl==0 in reass */ + +#define OFP_ICMP6_PARAMPROB_HEADER 0 /* erroneous header field */ +#define OFP_ICMP6_PARAMPROB_NEXTHEADER 1 /* unrecognized next header */ +#define OFP_ICMP6_PARAMPROB_OPTION 2 /* unrecognized option */ + +#define OFP_ICMP6_INFOMSG_MASK 0x80 /* all informational messages */ + +#define OFP_ICMP6_NI_SUBJ_IPV6 0 /* Query Subject is an IPv6 address */ +#define OFP_ICMP6_NI_SUBJ_FQDN 1 /* Query Subject is a Domain name */ +#define OFP_ICMP6_NI_SUBJ_IPV4 2 /* Query Subject is an IPv4 address */ + +#define OFP_ICMP6_NI_SUCCESS 0 /* node information successful reply */ +#define OFP_ICMP6_NI_REFUSED 1 /* node information request is refused */ +#define OFP_ICMP6_NI_UNKNOWN 2 /* unknown Qtype */ + +#define OFP_ICMP6_ROUTER_RENUMBERING_COMMAND 0 /* rr command */ +#define OFP_ICMP6_ROUTER_RENUMBERING_RESULT 1 /* rr result */ +#define OFP_ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET 255 /* rr seq num reset */ + +/* Used in kernel only */ +#define OFP_ND_REDIRECT_ONLINK 0 /* redirect to an on-link node */ +#define OFP_ND_REDIRECT_ROUTER 1 /* redirect to a better router */ + +/* + * Multicast Listener Discovery + */ +struct ofp_mld_hdr { + struct ofp_icmp6_hdr mld_icmp6_hdr; + struct ofp_in6_addr mld_addr; /* multicast address */ +} __attribute__((packed)); + +/* definitions to provide backward compatibility to old KAME applications */ +#define ofp_mld6_hdr mld_hdr +#define ofp_mld6_type mld_type +#define ofp_mld6_code mld_code +#define ofp_mld6_cksum mld_cksum +#define ofp_mld6_maxdelay mld_maxdelay +#define ofp_mld6_reserved mld_reserved +#define ofp_mld6_addr mld_addr + +/* shortcut macro definitions */ +#define ofp_mld_type mld_icmp6_hdr.icmp6_type +#define ofp_mld_code mld_icmp6_hdr.icmp6_code +#define ofp_mld_cksum mld_icmp6_hdr.icmp6_cksum +#define ofp_mld_maxdelay mld_icmp6_hdr.icmp6_data16[0] +#define ofp_mld_reserved mld_icmp6_hdr.icmp6_data16[1] +#define ofp_mld_v2_reserved mld_icmp6_hdr.icmp6_data16[0] +#define ofp_mld_v2_numrecs mld_icmp6_hdr.icmp6_data16[1] + +/* + * Neighbor Discovery + */ + +struct ofp_nd_router_solicit { /* router solicitation */ + struct ofp_icmp6_hdr nd_rs_hdr; + /* could be followed by options */ +} __attribute__((packed)); + +#define ofp_nd_rs_type nd_rs_hdr.icmp6_type +#define ofp_nd_rs_code nd_rs_hdr.icmp6_code +#define ofp_nd_rs_cksum nd_rs_hdr.icmp6_cksum +#define ofp_nd_rs_reserved nd_rs_hdr.icmp6_data32[0] + +struct ofp_nd_router_advert { /* router advertisement */ + struct ofp_icmp6_hdr nd_ra_hdr; + uint32_t nd_ra_reachable; /* reachable time */ + uint32_t nd_ra_retransmit; /* retransmit timer */ + /* could be followed by options */ +} __attribute__((packed)); + +#define ofp_nd_ra_type nd_ra_hdr.icmp6_type +#define ofp_nd_ra_code nd_ra_hdr.icmp6_code +#define ofp_nd_ra_cksum nd_ra_hdr.icmp6_cksum +#define ofp_nd_ra_curhoplimit nd_ra_hdr.icmp6_data8[0] +#define ofp_nd_ra_flags_reserved nd_ra_hdr.icmp6_data8[1] +#define OFP_ND_RA_FLAG_MANAGED 0x80 +#define OFP_ND_RA_FLAG_OTHER 0x40 +#define OFP_ND_RA_FLAG_HA 0x20 + +/* + * Router preference values based on draft-draves-ipngwg-router-selection-01. + * These are non-standard definitions. + */ +#define OFP_ND_RA_FLAG_RTPREF_MASK 0x18 /* 00011000 */ + +#define OFP_ND_RA_FLAG_RTPREF_HIGH 0x08 /* 00001000 */ +#define OFP_ND_RA_FLAG_RTPREF_MEDIUM 0x00 /* 00000000 */ +#define OFP_ND_RA_FLAG_RTPREF_LOW 0x18 /* 00011000 */ +#define OFP_ND_RA_FLAG_RTPREF_RSV 0x10 /* 00010000 */ + +#define ofp_nd_ra_router_lifetime nd_ra_hdr.icmp6_data16[1] + +struct ofp_nd_neighbor_solicit { /* neighbor solicitation */ + struct ofp_icmp6_hdr nd_ns_hdr; + struct ofp_in6_addr nd_ns_target; /*target address */ + /* could be followed by options */ +} __attribute__((packed)); + +#define ofp_nd_ns_type nd_ns_hdr.icmp6_type +#define ofp_nd_ns_code nd_ns_hdr.icmp6_code +#define ofp_nd_ns_cksum nd_ns_hdr.icmp6_cksum +#define ofp_nd_ns_reserved nd_ns_hdr.icmp6_data32[0] + +struct ofp_nd_neighbor_advert { /* neighbor advertisement */ + struct ofp_icmp6_hdr nd_na_hdr; + struct ofp_in6_addr nd_na_target; /* target address */ + /* could be followed by options */ +} __attribute__((packed)); + +#define ofp_nd_na_type nd_na_hdr.icmp6_type +#define ofp_nd_na_code nd_na_hdr.icmp6_code +#define ofp_nd_na_cksum nd_na_hdr.icmp6_cksum +#define ofp_nd_na_flags_reserved nd_na_hdr.icmp6_data32[0] +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_ND_NA_FLAG_ROUTER 0x80000000 +#define OFP_ND_NA_FLAG_SOLICITED 0x40000000 +#define OFP_ND_NA_FLAG_OVERRIDE 0x20000000 +#else +#if ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_ND_NA_FLAG_ROUTER 0x80 +#define OFP_ND_NA_FLAG_SOLICITED 0x40 +#define OFP_ND_NA_FLAG_OVERRIDE 0x20 +#endif +#endif + +struct ofp_nd_redirect { /* redirect */ + struct ofp_icmp6_hdr nd_rd_hdr; + struct ofp_in6_addr nd_rd_target; /* target address */ + struct ofp_in6_addr nd_rd_dst; /* destination address */ + /* could be followed by options */ +} __attribute__((packed)); + +#define ofp_nd_rd_type nd_rd_hdr.icmp6_type +#define ofp_nd_rd_code nd_rd_hdr.icmp6_code +#define ofp_nd_rd_cksum nd_rd_hdr.icmp6_cksum +#define ofp_nd_rd_reserved nd_rd_hdr.icmp6_data32[0] + +struct ofp_nd_opt_hdr { /* Neighbor discovery option header */ + uint8_t nd_opt_type; + uint8_t nd_opt_len; + /* followed by option specific data*/ +} __attribute__((packed)); + +#define OFP_ND_OPT_SOURCE_LINKADDR 1 +#define OFP_ND_OPT_TARGET_LINKADDR 2 +#define OFP_ND_OPT_PREFIX_INFORMATION 3 +#define OFP_ND_OPT_REDIRECTED_HEADER 4 +#define OFP_ND_OPT_MTU 5 +#define OFP_ND_OPT_ROUTE_INFO 24 /* RFC 4191 */ +#define OFP_ND_OPT_RDNSS 25 /* RFC 6106 */ +#define OFP_ND_OPT_DNSSL 31 /* RFC 6106 */ + +struct ofp_nd_opt_prefix_info { /* prefix information */ + uint8_t nd_opt_pi_type; + uint8_t nd_opt_pi_len; + uint8_t nd_opt_pi_prefix_len; + uint8_t nd_opt_pi_flags_reserved; + uint32_t nd_opt_pi_valid_time; + uint32_t nd_opt_pi_preferred_time; + uint32_t nd_opt_pi_reserved2; + struct ofp_in6_addr nd_opt_pi_prefix; +} __attribute__((packed)); + +#define OFP_ND_OPT_PI_FLAG_ONLINK 0x80 +#define OFP_ND_OPT_PI_FLAG_AUTO 0x40 + +struct ofp_nd_opt_rd_hdr { /* redirected header */ + uint8_t nd_opt_rh_type; + uint8_t nd_opt_rh_len; + uint16_t nd_opt_rh_reserved1; + uint32_t nd_opt_rh_reserved2; + /* followed by IP header and data */ +} __attribute__((packed)); + +struct ofp_nd_opt_mtu { /* MTU option */ + uint8_t nd_opt_mtu_type; + uint8_t nd_opt_mtu_len; + uint16_t nd_opt_mtu_reserved; + uint32_t nd_opt_mtu_mtu; +} __attribute__((packed)); + +struct ofp_nd_opt_route_info { /* route info */ + uint8_t nd_opt_rti_type; + uint8_t nd_opt_rti_len; + uint8_t nd_opt_rti_prefixlen; + uint8_t nd_opt_rti_flags; + uint32_t nd_opt_rti_lifetime; + /* prefix follows */ +} __attribute__((packed)); + +struct ofp_nd_opt_rdnss { /* RDNSS option (RFC 6106) */ + uint8_t nd_opt_rdnss_type; + uint8_t nd_opt_rdnss_len; + uint16_t nd_opt_rdnss_reserved; + uint32_t nd_opt_rdnss_lifetime; + /* followed by list of recursive DNS servers */ +} __attribute__((packed)); + +struct ofp_nd_opt_dnssl { /* DNSSL option (RFC 6106) */ + uint8_t nd_opt_dnssl_type; + uint8_t nd_opt_dnssl_len; + uint16_t nd_opt_dnssl_reserved; + uint32_t nd_opt_dnssl_lifetime; + /* followed by list of DNS search domains */ +} __attribute__((packed)); + +/* + * icmp6 namelookup + */ + +struct ofp_icmp6_namelookup { + struct ofp_icmp6_hdr icmp6_nl_hdr; + uint8_t icmp6_nl_nonce[8]; + int32_t icmp6_nl_ttl; +#if 0 + uint8_t icmp6_nl_len; + uint8_t icmp6_nl_name[3]; +#endif + /* could be followed by options */ +} __attribute__((packed)); + +/* + * icmp6 node information + */ +struct ofp_icmp6_nodeinfo { + struct ofp_icmp6_hdr icmp6_ni_hdr; + uint8_t icmp6_ni_nonce[8]; + /* could be followed by reply data */ +} __attribute__((packed)); + +#define ofp_ni_type icmp6_ni_hdr.icmp6_type +#define ofp_ni_code icmp6_ni_hdr.icmp6_code +#define ofp_ni_cksum icmp6_ni_hdr.icmp6_cksum +#define ofp_ni_qtype icmp6_ni_hdr.icmp6_data16[0] +#define ofp_ni_flags icmp6_ni_hdr.icmp6_data16[1] + +#define OFP_NI_QTYPE_NOOP 0 /* NOOP */ +#define OFP_NI_QTYPE_SUPTYPES 1 /* Supported Qtypes */ +#define OFP_NI_QTYPE_FQDN 2 /* FQDN (draft 04) */ +#define OFP_NI_QTYPE_DNSNAME 2 /* DNS Name */ +#define OFP_NI_QTYPE_NODEADDR 3 /* Node Addresses */ +#define OFP_NI_QTYPE_IPV4ADDR 4 /* IPv4 Addresses */ + +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_NI_SUPTYPE_FLAG_COMPRESS 0x1 +#define OFP_NI_FQDN_FLAG_VALIDTTL 0x1 +#elif ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_NI_SUPTYPE_FLAG_COMPRESS 0x0100 +#define OFP_NI_FQDN_FLAG_VALIDTTL 0x0100 +#endif + +#ifdef NAME_LOOKUPS_04 +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_NI_NODEADDR_FLAG_LINKLOCAL 0x1 +#define OFP_NI_NODEADDR_FLAG_SITELOCAL 0x2 +#define OFP_NI_NODEADDR_FLAG_GLOBAL 0x4 +#define OFP_NI_NODEADDR_FLAG_ALL 0x8 +#define OFP_NI_NODEADDR_FLAG_TRUNCATE 0x10 +#define OFP_NI_NODEADDR_FLAG_ANYCAST 0x20 /* just experimental. not in spec */ +#elif ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_NI_NODEADDR_FLAG_LINKLOCAL 0x0100 +#define OFP_NI_NODEADDR_FLAG_SITELOCAL 0x0200 +#define OFP_NI_NODEADDR_FLAG_GLOBAL 0x0400 +#define OFP_NI_NODEADDR_FLAG_ALL 0x0800 +#define OFP_NI_NODEADDR_FLAG_TRUNCATE 0x1000 +#define OFP_NI_NODEADDR_FLAG_ANYCAST 0x2000 /* just experimental. not in spec */ +#endif +#else /* draft-ietf-ipngwg-icmp-name-lookups-05 (and later?) */ +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_NI_NODEADDR_FLAG_TRUNCATE 0x1 +#define OFP_NI_NODEADDR_FLAG_ALL 0x2 +#define OFP_NI_NODEADDR_FLAG_COMPAT 0x4 +#define OFP_NI_NODEADDR_FLAG_LINKLOCAL 0x8 +#define OFP_NI_NODEADDR_FLAG_SITELOCAL 0x10 +#define OFP_NI_NODEADDR_FLAG_GLOBAL 0x20 +#define OFP_NI_NODEADDR_FLAG_ANYCAST 0x40 /* just experimental. not in spec */ +#elif ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_NI_NODEADDR_FLAG_TRUNCATE 0x0100 +#define OFP_NI_NODEADDR_FLAG_ALL 0x0200 +#define OFP_NI_NODEADDR_FLAG_COMPAT 0x0400 +#define OFP_NI_NODEADDR_FLAG_LINKLOCAL 0x0800 +#define OFP_NI_NODEADDR_FLAG_SITELOCAL 0x1000 +#define OFP_NI_NODEADDR_FLAG_GLOBAL 0x2000 +#define OFP_NI_NODEADDR_FLAG_ANYCAST 0x4000 /* just experimental. not in spec */ +#endif +#endif + +struct ofp_ni_reply_fqdn { + uint32_t ni_fqdn_ttl; /* TTL */ + uint8_t ni_fqdn_namelen; /* length in octets of the FQDN */ + uint8_t ni_fqdn_name[3]; /* XXX: alignment */ +} __attribute__((packed)); + +/* + * Router Renumbering. as router-renum-08.txt + */ +struct ofp_icmp6_router_renum { /* router renumbering header */ + struct ofp_icmp6_hdr rr_hdr; + uint8_t rr_segnum; + uint8_t rr_flags; + uint16_t rr_maxdelay; + uint32_t rr_reserved; +} __attribute__((packed)); + +#define OFP_ICMP6_RR_FLAGS_TEST 0x80 +#define OFP_ICMP6_RR_FLAGS_REQRESULT 0x40 +#define OFP_ICMP6_RR_FLAGS_FORCEAPPLY 0x20 +#define OFP_ICMP6_RR_FLAGS_SPECSITE 0x10 +#define OFP_ICMP6_RR_FLAGS_PREVDONE 0x08 + +#define OFP_rr_type rr_hdr.icmp6_type +#define OFP_rr_code rr_hdr.icmp6_code +#define OFP_rr_cksum rr_hdr.icmp6_cksum +#define OFP_rr_seqnum rr_hdr.icmp6_data32[0] + +struct ofp_rr_pco_match { /* match prefix part */ + uint8_t rpm_code; + uint8_t rpm_len; + uint8_t rpm_ordinal; + uint8_t rpm_matchlen; + uint8_t rpm_minlen; + uint8_t rpm_maxlen; + uint16_t rpm_reserved; + struct ofp_in6_addr rpm_prefix; +} __attribute__((packed)); + +#define OFP_RPM_PCO_ADD 1 +#define OFP_RPM_PCO_CHANGE 2 +#define OFP_RPM_PCO_SETGLOBAL 3 +#define OFP_RPM_PCO_MAX 4 + +struct ofp_rr_pco_use { /* use prefix part */ + uint8_t rpu_uselen; + uint8_t rpu_keeplen; + uint8_t rpu_ramask; + uint8_t rpu_raflags; + uint32_t rpu_vltime; + uint32_t rpu_pltime; + uint32_t rpu_flags; + struct ofp_in6_addr rpu_prefix; +} __attribute__((packed)); +#define OFP_ICMP6_RR_PCOUSE_RAFLAGS_ONLINK 0x80 +#define OFP_ICMP6_RR_PCOUSE_RAFLAGS_AUTO 0x40 + +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_ICMP6_RR_PCOUSE_FLAGS_DECRVLTIME 0x80000000 +#define OFP_ICMP6_RR_PCOUSE_FLAGS_DECRPLTIME 0x40000000 +#elif ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_ICMP6_RR_PCOUSE_FLAGS_DECRVLTIME 0x80 +#define OFP_ICMP6_RR_PCOUSE_FLAGS_DECRPLTIME 0x40 +#endif + +struct ofp_rr_result { /* router renumbering result message */ + uint16_t rrr_flags; + uint8_t rrr_ordinal; + uint8_t rrr_matchedlen; + uint32_t rrr_ifid; + struct ofp_in6_addr rrr_prefix; +} __attribute__((packed)); +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_ICMP6_RR_RESULT_FLAGS_OOB 0x0002 +#define OFP_ICMP6_RR_RESULT_FLAGS_FORBIDDEN 0x0001 +#elif ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_ICMP6_RR_RESULT_FLAGS_OOB 0x0200 +#define OFP_ICMP6_RR_RESULT_FLAGS_FORBIDDEN 0x0100 +#endif + +#define OFP_ICMP6_NODEINFO_FQDNOK 0x1 +#define OFP_ICMP6_NODEINFO_NODEADDROK 0x2 +#define OFP_ICMP6_NODEINFO_TMPADDROK 0x4 +#define OFP_ICMP6_NODEINFO_GLOBALOK 0x8 + +#endif /* not _OFP_ICMP6_H_ */ diff --git a/include/api/ofp_if_vlan.h b/include/api/ofp_if_vlan.h new file mode 100644 index 00000000..c4649f1a --- /dev/null +++ b/include/api/ofp_if_vlan.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright 1998 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: release/9.1.0/sys/net/if_vlan_var.h 219819 2011-03-21 09:40:01Z jeff $ + */ + +#ifndef _OFP_IF_VLAN_H_ +#define _OFP_IF_VLAN_H_ 1 + +struct ofp_ether_vlan_header { + uint8_t evl_dhost[OFP_ETHER_ADDR_LEN]; + uint8_t evl_shost[OFP_ETHER_ADDR_LEN]; + uint16_t evl_encap_proto; + uint16_t evl_tag; + uint16_t evl_proto; +} __attribute__((packed)); + +#define OFP_EVL_VLID_MASK 0x0FFF +#define OFP_EVL_PRI_MASK 0xE000 +#define OFP_EVL_VLANOFTAG(tag) ((tag) & OFP_EVL_VLID_MASK) +#define OFP_EVL_PRIOFTAG(tag) (((tag) >> 13) & 7) +#define OFP_EVL_CFIOFTAG(tag) (((tag) >> 12) & 1) +#define OFP_EVL_MAKETAG(vlid, pri, cfi) \ + ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & \ + OFP_EVL_VLID_MASK)) \ + +#endif diff --git a/include/api/ofp_in.h b/include/api/ofp_in.h new file mode 100644 index 00000000..2f63d999 --- /dev/null +++ b/include/api/ofp_in.h @@ -0,0 +1,391 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: release/9.1.0/sys/netinet/in.h 237910 2012-07-01 08:47:15Z tuexen $ + */ + +#ifndef __OFP_IN_H__ +#define __OFP_IN_H__ + +#include "ofp_socket_types.h" + +/* Protocols common to RFC 1700, POSIX, and X/Open. */ +#define OFP_IPPROTO_IP 0 /* dummy for IP */ +#define OFP_IPPROTO_ICMP 1 /* control message protocol */ +#define OFP_IPPROTO_TCP 6 /* tcp */ +#define OFP_IPPROTO_UDP 17 /* user datagram protocol */ + +#define OFP_INADDR_ANY (uint32_t)0x00000000 +#define OFP_INADDR_BROADCAST (uint32_t)0xffffffff /* must be masked */ + +#ifndef OFP__IN_ADDR_T_DECLARED +typedef uint32_t ofp_in_addr_t; +#define OFP__IN_ADDR_T_DECLARED +#endif + +#ifndef _IN_PORT_T_DECLARED +typedef uint16_t ofp_in_port_t; +#define _IN_PORT_T_DECLARED +#endif + +#ifndef OFP__SA_FAMILY_T_DECLARED +typedef uint8_t ofp_sa_family_t; +#define OFP__SA_FAMILY_T_DECLARED +#endif + +/* Internet address (a structure for historical reasons). */ +#ifndef OFP__STRUCT_IN_ADDR_DECLARED +struct ofp_in_addr { + ofp_in_addr_t s_addr; +}; +#define OFP__STRUCT_IN_ADDR_DECLARED +#endif + +#ifndef OFP__SOCKLEN_T_DECLARED +typedef __ofp_socklen_t ofp_socklen_t; +#define OFP__SOCKLEN_T_DECLARED +#endif /* OFP__SOCKLEN_T_DECLARED */ + +/* Socket address, internet style. */ +struct ofp_sockaddr_in { + uint8_t sin_len; + ofp_sa_family_t sin_family; + ofp_in_port_t sin_port; + struct ofp_in_addr sin_addr; + int8_t sin_zero[8]; +}; + +#define OFP_IPPROTO_RAW 255 /* raw IP packet */ +#define OFP_INET_ADDRSTRLEN 16 + +/* + * Constants and structures defined by the internet system, + * Per RFC 790, September 1981, and numerous additions. + */ + +/* + * Protocols (RFC 1700) + */ +#define OFP_IPPROTO_HOPOPTS 0 /* IP6 hop-by-hop options */ +#define OFP_IPPROTO_IGMP 2 /* group mgmt protocol */ +#define OFP_IPPROTO_GGP 3 /* gateway^2 (deprecated) */ +#define OFP_IPPROTO_IPV4 4 /* IPv4 encapsulation */ +#define OFP_IPPROTO_IPIP OFP_IPPROTO_IPV4 /* for compatibility */ +#define OFP_IPPROTO_ST 7 /* Stream protocol II */ +#define OFP_IPPROTO_EGP 8 /* exterior gateway protocol */ +#define OFP_IPPROTO_PIGP 9 /* private interior gateway */ +#define OFP_IPPROTO_RCCMON 10 /* BBN RCC Monitoring */ +#define OFP_IPPROTO_NVPII 11 /* network voice protocol*/ +#define OFP_IPPROTO_PUP 12 /* pup */ +#define OFP_IPPROTO_ARGUS 13 /* Argus */ +#define OFP_IPPROTO_EMCON 14 /* EMCON */ +#define OFP_IPPROTO_XNET 15 /* Cross Net Debugger */ +#define OFP_IPPROTO_CHAOS 16 /* Chaos*/ +#define OFP_IPPROTO_MUX 18 /* Multiplexing */ +#define OFP_IPPROTO_MEAS 19 /* DCN Measurement Subsystems */ +#define OFP_IPPROTO_HMP 20 /* Host Monitoring */ +#define OFP_IPPROTO_PRM 21 /* Packet Radio Measurement */ +#define OFP_IPPROTO_IDP 22 /* xns idp */ +#define OFP_IPPROTO_TRUNK1 23 /* Trunk-1 */ +#define OFP_IPPROTO_TRUNK2 24 /* Trunk-2 */ +#define OFP_IPPROTO_LEAF1 25 /* Leaf-1 */ +#define OFP_IPPROTO_LEAF2 26 /* Leaf-2 */ +#define OFP_IPPROTO_RDP 27 /* Reliable Data */ +#define OFP_IPPROTO_IRTP 28 /* Reliable Transaction */ +#define OFP_IPPROTO_TP 29 /* tp-4 w/ class negotiation */ +#define OFP_IPPROTO_BLT 30 /* Bulk Data Transfer */ +#define OFP_IPPROTO_NSP 31 /* Network Services */ +#define OFP_IPPROTO_INP 32 /* Merit Internodal */ +#define OFP_IPPROTO_SEP 33 /* Sequential Exchange */ +#define OFP_IPPROTO_3PC 34 /* Third Party Connect */ +#define OFP_IPPROTO_IDPR 35 /* InterDomain Policy Routing */ +#define OFP_IPPROTO_XTP 36 /* XTP */ +#define OFP_IPPROTO_DDP 37 /* Datagram Delivery */ +#define OFP_IPPROTO_CMTP 38 /* Control Message Transport */ +#define OFP_IPPROTO_TPXX 39 /* TP++ Transport */ +#define OFP_IPPROTO_IL 40 /* IL transport protocol */ +#define OFP_IPPROTO_IPV6 41 /* IP6 header */ +#define OFP_IPPROTO_SDRP 42 /* Source Demand Routing */ +#define OFP_IPPROTO_ROUTING 43 /* IP6 routing header */ +#define OFP_IPPROTO_FRAGMENT 44 /* IP6 fragmentation header */ +#define OFP_IPPROTO_IDRP 45 /* InterDomain Routing*/ +#define OFP_IPPROTO_RSVP 46 /* resource reservation */ +#define OFP_IPPROTO_GRE 47 /* General Routing Encap. */ +#define OFP_IPPROTO_MHRP 48 /* Mobile Host Routing */ +#define OFP_IPPROTO_BHA 49 /* BHA */ +#define OFP_IPPROTO_ESP 50 /* IP6 Encap Sec. Payload */ +#define OFP_IPPROTO_AH 51 /* IP6 Auth Header */ +#define OFP_IPPROTO_INLSP 52 /* Integ. Net Layer Security */ +#define OFP_IPPROTO_SWIPE 53 /* IP with encryption */ +#define OFP_IPPROTO_NHRP 54 /* Next Hop Resolution */ +#define OFP_IPPROTO_MOBILE 55 /* IP Mobility */ +#define OFP_IPPROTO_TLSP 56 /* Transport Layer Security */ +#define OFP_IPPROTO_SKIP 57 /* SKIP */ +#define OFP_IPPROTO_ICMPV6 58 /* ICMP6 */ +#define OFP_IPPROTO_NONE 59 /* IP6 no next header */ +#define OFP_IPPROTO_DSTOPTS 60 /* IP6 destination option */ +#define OFP_IPPROTO_AHIP 61 /* any host internal protocol */ +#define OFP_IPPROTO_CFTP 62 /* CFTP */ +#define OFP_IPPROTO_HELLO 63 /* "hello" routing protocol */ +#define OFP_IPPROTO_SATEXPAK 64 /* SATNET/Backroom EXPAK */ +#define OFP_IPPROTO_KRYPTOLAN 65 /* Kryptolan */ +#define OFP_IPPROTO_RVD 66 /* Remote Virtual Disk */ +#define OFP_IPPROTO_IPPC 67 /* Pluribus Packet Core */ +#define OFP_IPPROTO_ADFS 68 /* Any distributed FS */ +#define OFP_IPPROTO_SATMON 69 /* Satnet Monitoring */ +#define OFP_IPPROTO_VISA 70 /* VISA Protocol */ +#define OFP_IPPROTO_IPCV 71 /* Packet Core Utility */ +#define OFP_IPPROTO_CPNX 72 /* Comp. Prot. Net. Executive */ +#define OFP_IPPROTO_CPHB 73 /* Comp. Prot. HeartBeat */ +#define OFP_IPPROTO_WSN 74 /* Wang Span Network */ +#define OFP_IPPROTO_PVP 75 /* Packet Video Protocol */ +#define OFP_IPPROTO_BRSATMON 76 /* BackRoom SATNET Monitoring */ +#define OFP_IPPROTO_ND 77 /* Sun net disk proto (temp.) */ +#define OFP_IPPROTO_WBMON 78 /* WIDEBAND Monitoring */ +#define OFP_IPPROTO_WBEXPAK 79 /* WIDEBAND EXPAK */ +#define OFP_IPPROTO_EON 80 /* ISO cnlp */ +#define OFP_IPPROTO_VMTP 81 /* VMTP */ +#define OFP_IPPROTO_SVMTP 82 /* Secure VMTP */ +#define OFP_IPPROTO_VINES 83 /* Banyon VINES */ +#define OFP_IPPROTO_TTP 84 /* TTP */ +#define OFP_IPPROTO_IGP 85 /* NSFNET-IGP */ +#define OFP_IPPROTO_DGP 86 /* dissimilar gateway prot. */ +#define OFP_IPPROTO_TCF 87 /* TCF */ +#define OFP_IPPROTO_IGRP 88 /* Cisco/GXS IGRP */ +#define OFP_IPPROTO_OSPFIGP 89 /* OSPFIGP */ +#define OFP_IPPROTO_SRPC 90 /* Strite RPC protocol */ +#define OFP_IPPROTO_LARP 91 /* Locus Address Resoloution */ +#define OFP_IPPROTO_MTP 92 /* Multicast Transport */ +#define OFP_IPPROTO_AX25 93 /* AX.25 Frames */ +#define OFP_IPPROTO_IPEIP 94 /* IP encapsulated in IP */ +#define OFP_IPPROTO_MICP 95 /* Mobile Int.ing control */ +#define OFP_IPPROTO_SCCSP 96 /* Semaphore Comm. security */ +#define OFP_IPPROTO_ETHERIP 97 /* Ethernet IP encapsulation */ +#define OFP_IPPROTO_ENCAP 98 /* encapsulation header */ +#define OFP_IPPROTO_APES 99 /* any private encr. scheme */ +#define OFP_IPPROTO_GMTP 100 /* GMTP*/ +#define OFP_IPPROTO_IPCOMP 108 /* payload compression (IPComp) */ +#define OFP_IPPROTO_SCTP 132 /* SCTP */ +#define OFP_IPPROTO_MH 135 /* IPv6 Mobility Header */ +/* 101-254: Partly Unassigned */ +#define OFP_IPPROTO_PIM 103 /* Protocol Independent Mcast */ +#define OFP_IPPROTO_CARP 112 /* CARP */ +#define OFP_IPPROTO_PGM 113 /* PGM */ +#define OFP_IPPROTO_PFSYNC 240 /* PFSYNC */ +/* 255: Reserved */ +/* BSD Private, local use, namespace incursion, no longer used */ +#define OFP_IPPROTO_OLD_DIVERT 254 /* OLD divert pseudo-proto */ +#define OFP_IPPROTO_MAX 256 + +/* last return value of *_input(), meaning "all job for this pkt is done". */ +#define OFP_IPPROTO_DONE 257 + +/* Only used internally, so can be outside the range of valid IP protocols. */ +#define OFP_IPPROTO_DIVERT 258 /* divert pseudo-protocol */ +#define OFP_IPPROTO_SEND 259 /* SeND pseudo-protocol */ + +/* Only used internally, so can be outside the range of valid IP protocols. */ +#define OFP_IPPROTO_SP 260 /* continue processing + on Slow Path*/ +/* + * Local port number conventions: + * + * When a user does a bind(2) or connect(2) with a port number of zero, + * a non-conflicting local port address is chosen. + * The default range is IPPORT_HIFIRSTAUTO through + * IPPORT_HILASTAUTO, although that is settable by sysctl. + * + * A user may set the IPPROTO_IP option IP_PORTRANGE to change this + * default assignment range. + * + * The value IP_PORTRANGE_DEFAULT causes the default behavior. + * + * The value IP_PORTRANGE_HIGH changes the range of candidate port numbers + * into the "high" range. These are reserved for client outbound connections + * which do not want to be filtered by any firewalls. + * + * The value IP_PORTRANGE_LOW changes the range to the "low" are + * that is (by convention) restricted to privileged processes. This + * convention is based on "vouchsafe" principles only. It is only secure + * if you trust the remote host to restrict these ports. + * + * The default range of ports and the high range can be changed by + * sysctl(3). (net.inet.ip.port{hi,low}{first,last}_auto) + * + * Changing those values has bad security implications if you are + * using a stateless firewall that is allowing packets outside of that + * range in order to allow transparent outgoing connections. + * + * Such a firewall configuration will generally depend on the use of these + * default values. If you change them, you may find your Security + * Administrator looking for you with a heavy object. + * + * For a slightly more orthodox text view on this: + * + * ftp://ftp.isi.edu/in-notes/iana/assignments/port-numbers + * + * port numbers are divided into three ranges: + * + * 0 - 1023 Well Known Ports + * 1024 - 49151 Registered Ports + * 49152 - 65535 Dynamic and/or Private Ports + * + */ + +/* + * Ports < IPPORT_RESERVED are reserved for + * privileged processes (e.g. root). (IP_PORTRANGE_LOW) + */ +#define OFP_IPPORT_RESERVED 1024 + +/* + * Default local port range, used by IP_PORTRANGE_DEFAULT + */ +#define OFP_IPPORT_EPHEMERALFIRST 10000 +#define OFP_IPPORT_EPHEMERALLAST 65535 + +/* + * Dynamic port range, used by IP_PORTRANGE_HIGH. + */ +#define OFP_IPPORT_HIFIRSTAUTO 49152 +#define OFP_IPPORT_HILASTAUTO 65535 + +/* + * Scanning for a free reserved port return a value below IPPORT_RESERVED, + * but higher than IPPORT_RESERVEDSTART. Traditionally the start value was + * 512, but that conflicts with some well-known-services that firewalls may + * have a fit if we use. + */ +#define OFP_IPPORT_RESERVEDSTART 600 + +#define OFP_IPPORT_MAX 65535 + +/* + * Definitions of bits in internet address integers. + * On subnets, the decomposition of addresses to host and net parts + * is done according to subnet mask, not the masks here. + */ +#define OFP_IN_CLASSA(i) (((uint32_t)(i) & 0x80000000) == 0) +#define OFP_IN_CLASSA_NET 0xff000000 +#define OFP_IN_CLASSA_NSHIFT 24 +#define OFP_IN_CLASSA_HOST 0x00ffffff +#define OFP_IN_CLASSA_MAX 128 + +#define OFP_IN_CLASSB(i) (((uint32_t)(i) & 0xc0000000) == 0x80000000) +#define OFP_IN_CLASSB_NET 0xffff0000 +#define OFP_IN_CLASSB_NSHIFT 16 +#define OFP_IN_CLASSB_HOST 0x0000ffff +#define OFP_IN_CLASSB_MAX 65536 + +#define OFP_IN_CLASSC(i) (((uint32_t)(i) & 0xe0000000) == 0xc0000000) +#define OFP_IN_CLASSC_NET 0xffffff00 +#define OFP_IN_CLASSC_NSHIFT 8 +#define OFP_IN_CLASSC_HOST 0x000000ff + +#define OFP_IN_CLASSD(i) (((uint32_t)(i) & 0xf0000000) == 0xe0000000) +#define OFP_IN_CLASSD_NET 0xf0000000 /* These ones aren't really */ +#define OFP_IN_CLASSD_NSHIFT 28 /* net and host fields, but */ +#define OFP_IN_CLASSD_HOST 0x0fffffff /* routing needn't know. */ +#define OFP_IN_MULTICAST(i) OFP_IN_CLASSD(i) + +#define OFP_IN_EXPERIMENTAL(i) (((uint32_t)(i) & 0xf0000000) == 0xf0000000) +#define OFP_IN_BADCLASS(i) (((uint32_t)(i) & 0xf0000000) == 0xf0000000) + +#define OFP_IN_LINKLOCAL(i) (((uint32_t)(i) & 0xffff0000) == 0xa9fe0000) +#define OFP_IN_LOOPBACK(i) (((uint32_t)(i) & 0xff000000) == 0x7f000000) +#define OFP_IN_ZERONET(i) (((uint32_t)(i) & 0xff000000) == 0) + +#define OFP_IN_PRIVATE(i) ((((uint32_t)(i) & 0xff000000) == 0x0a000000) || \ + (((uint32_t)(i) & 0xfff00000) == 0xac100000) || \ + (((uint32_t)(i) & 0xffff0000) == 0xc0a80000)) + +#define OFP_IN_LOCAL_GROUP(i) (((uint32_t)(i) & 0xffffff00) == 0xe0000000) + +#define OFP_IN_ANY_LOCAL(i) (IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i)) + +#define OFP_INADDR_LOOPBACK (uint32_t)0x7f000001 + +#define OFP_INADDR_NONE 0xffffffff /* -1 return */ + + +#define OFP_INADDR_UNSPEC_GROUP (uint32_t)0xe0000000 /* 224.0.0.0 */ +#define OFP_INADDR_ALLHOSTS_GROUP (uint32_t)0xe0000001 /* 224.0.0.1 */ +#define OFP_INADDR_ALLRTRS_GROUP (uint32_t)0xe0000002 /* 224.0.0.2 */ +#define OFP_INADDR_ALLRPTS_GROUP (uint32_t)0xe0000016 /* 224.0.0.22, IGMPv3 */ +#define OFP_INADDR_CARP_GROUP (uint32_t)0xe0000012 /* 224.0.0.18 */ +#define OFP_INADDR_PFSYNC_GROUP (uint32_t)0xe00000f0 /* 224.0.0.240 */ +#define OFP_INADDR_ALLMDNS_GROUP (uint32_t)0xe00000fb /* 224.0.0.251 */ +#define OFP_INADDR_MAX_LOCAL_GROUP (uint32_t)0xe00000ff /* 224.0.0.255 */ + +#define OFP_IN_LOOPBACKNET 127 /* official! */ + +#define OFP_IN_RFC3021_MASK (uint32_t)0xfffffffe + +/* + * Options for use with [gs]etsockopt at the IP level. + * First word of comment is data type; bool is stored in int. + */ +#define OFP_IP_OPTIONS 1 /* buf/ip_opts; set/get IP options */ +#define OFP_IP_HDRINCL 2 /* int; header is included with data */ +#define OFP_IP_TOS 3 /* int; IP type of service and preced. */ +#define OFP_IP_TTL 4 /* int; IP time to live */ +#define OFP_IP_RECVOPTS 5 /* bool; receive all IP opts w/dgram */ +#define OFP_IP_RECVRETOPTS 6 /* bool; receive IP opts for response */ +#define OFP_IP_RECVDSTADDR 7 /* bool; receive IP dst addr w/dgram */ +#define OFP_IP_SENDSRCADDR OFP_IP_RECVDSTADDR /* cmsg_type to set src addr */ +#define OFP_IP_RETOPTS 8 /* ip_opts; set/get IP options */ +#define OFP_IP_MULTICAST_IF 9 /* struct ofp_in_addr *or* struct ip_mreqn; + * set/get IP multicast i/f */ +#define OFP_IP_MULTICAST_TTL 10 /* uint8_t; set/get IP multicast ttl */ +#define OFP_IP_MULTICAST_LOOP 11 /* uint8_t; set/get IP multicast loopback */ +#define OFP_IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */ +#define OFP_IP_DROP_MEMBERSHIP 13 /* ip_mreq; drop an IP group membership */ +#define OFP_IP_MULTICAST_VIF 14 /* set/get IP mcast virt. iface */ +#define OFP_IP_RSVP_ON 15 /* enable RSVP in kernel */ +#define OFP_IP_RSVP_OFF 16 /* disable RSVP in kernel */ +#define OFP_IP_RSVP_VIF_ON 17 /* set RSVP per-vif socket */ +#define OFP_IP_RSVP_VIF_OFF 18 /* unset RSVP per-vif socket */ +#define OFP_IP_PORTRANGE 19 /* int; range to choose for unspec port */ +#define OFP_IP_RECVIF 20 /* bool; receive reception if w/dgram */ +/* for IPSEC */ +#define OFP_IP_IPSEC_POLICY 21 /* int; set/get security policy */ +#define OFP_IP_FAITH 22 /* bool; accept FAITH'ed connections */ + +#define OFP_IP_ONESBCAST 23 /* bool: send all-ones broadcast */ +#define OFP_IP_BINDANY 24 /* bool: allow bind to any address */ + +char *ofp_inet_ntoa(struct ofp_in_addr); /* implement */ + +#endif /* __OFP_IN_H__*/ diff --git a/include/api/ofp_in6.h b/include/api/ofp_in6.h new file mode 100644 index 00000000..4270aa9a --- /dev/null +++ b/include/api/ofp_in6.h @@ -0,0 +1,300 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: release/9.1.0/sys/netinet6/in6.h 238227 2012-07-08 10:29:01Z bz $ + */ + +#ifndef __OFP_IN6_H__ +#define __OFP_IN6_H__ + +#include "ofp_socket_types.h" + +#ifndef OFP__SOCKLEN_T_DECLARED +typedef __ofp_socklen_t ofp_socklen_t; +#define OFP__SOCKLEN_T_DECLARED +#endif /* OFP__SOCKLEN_T_DECLARED */ + +/* + * IPv6 address + */ +struct ofp_in6_addr { + union { + uint8_t __u6_addr8[16]; + uint16_t __u6_addr16[8]; + uint32_t __u6_addr32[4]; + } __u6_addr; /* 128-bit IP6 address */ +}; + +#define ofp_s6_addr __u6_addr.__u6_addr8 +#define ofp_s6_addr16 __u6_addr.__u6_addr16 +#define ofp_s6_addr32 __u6_addr.__u6_addr32 +#define OFP_INET6_ADDRSTRLEN 46 + +/* + * Socket address for IPv6 + */ +struct ofp_sockaddr_in6 { + uint8_t sin6_len; /* length of this struct */ + ofp_sa_family_t sin6_family; /* AF_INET6 */ + ofp_in_port_t sin6_port; /* Transport layer port # */ + uint32_t sin6_flowinfo; /* IP6 flow information */ + struct ofp_in6_addr sin6_addr; /* IP6 address */ + uint32_t sin6_scope_id; /* scope zone index */ +}; + +/* + * Local definition for masks + */ +#define OFP_IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } +#define OFP_IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } } +#define OFP_IN6MASK64 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } } +#define OFP_IN6MASK96 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 } } } +#define OFP_IN6MASK128 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } } } + +extern const struct ofp_sockaddr_in6 ofp_sa6_any; + +extern const struct ofp_in6_addr ofp_in6mask0; +extern const struct ofp_in6_addr ofp_in6mask32; +extern const struct ofp_in6_addr ofp_in6mask64; +extern const struct ofp_in6_addr ofp_in6mask96; +extern const struct ofp_in6_addr ofp_in6mask128; + +/* + * Macros started with IPV6_ADDR is KAME local + */ +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_IPV6_ADDR_INT32_ONE 1 +#define OFP_IPV6_ADDR_INT32_TWO 2 +#define OFP_IPV6_ADDR_INT32_MNL 0xff010000 +#define OFP_IPV6_ADDR_INT32_MLL 0xff020000 +#define OFP_IPV6_ADDR_INT32_SMP 0x0000ffff +#define OFP_IPV6_ADDR_INT16_ULL 0xfe80 +#define OFP_IPV6_ADDR_INT16_USL 0xfec0 +#define OFP_IPV6_ADDR_INT16_MLL 0xff02 +#elif ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_IPV6_ADDR_INT32_ONE 0x01000000 +#define OFP_IPV6_ADDR_INT32_TWO 0x02000000 +#define OFP_IPV6_ADDR_INT32_MNL 0x000001ff +#define OFP_IPV6_ADDR_INT32_MLL 0x000002ff +#define OFP_IPV6_ADDR_INT32_SMP 0xffff0000 +#define OFP_IPV6_ADDR_INT16_ULL 0x80fe +#define OFP_IPV6_ADDR_INT16_USL 0xc0fe +#define OFP_IPV6_ADDR_INT16_MLL 0x02ff +#endif + +/* + * Definition of some useful macros to handle IP6 addresses + */ +#define OFP_IN6ADDR_ANY_INIT \ + {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } } +#define OFP_IN6ADDR_LOOPBACK_INIT \ + {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } } +#define OFP_IN6ADDR_NODELOCAL_ALLNODES_INIT \ + {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } } +#define OFP_IN6ADDR_INTFACELOCAL_ALLNODES_INIT \ + {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } } +#define OFP_IN6ADDR_LINKLOCAL_ALLNODES_INIT \ + {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } } +#define OFP_IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ + {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } } } +#define OFP_IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \ + {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 } } } + +extern const struct ofp_in6_addr ofp_in6addr_any; +extern const struct ofp_in6_addr ofp_in6addr_loopback; +extern const struct ofp_in6_addr ofp_in6addr_nodelocal_allnodes; +extern const struct ofp_in6_addr ofp_in6addr_linklocal_allnodes; +extern const struct ofp_in6_addr ofp_in6addr_linklocal_allrouters; +extern const struct ofp_in6_addr ofp_in6addr_linklocal_allv2routers; + +/* + * Equality + * NOTE: Some of kernel programming environment (for example, openbsd/sparc) + * does not supply memcmp(). For userland memcmp() is preferred as it is + * in ANSI standard. + */ +#define OFP_IN6_ARE_ADDR_EQUAL(a, b) \ + (memcmp(&(a)->ofp_s6_addr[0], &(b)->ofp_s6_addr[0],\ + sizeof(struct ofp_in6_addr)) == 0) + +/* + * Unspecified + */ +#define OFP_IN6_IS_ADDR_UNSPECIFIED(a) \ + (((a)->ofp_s6_addr32[0] == 0) && \ + ((a)->ofp_s6_addr32[1] == 0) && \ + ((a)->ofp_s6_addr32[2] == 0) && \ + ((a)->ofp_s6_addr32[3] == 0)) + +/* + * Loopback + */ +#define OFP_IN6_IS_ADDR_LOOPBACK(a) \ + (((a)->ofp_s6_addr32[0] == 0) &&\ + ((a)->ofp_s6_addr32[1] == 0) &&\ + ((a)->ofp_s6_addr32[2] == 0) &&\ + ((a)->ofp_s6_addr32[3] == odp_be_to_cpu_32(1))) + +/* + * IPv4 compatible + */ +#define OFP_IN6_IS_ADDR_V4COMPAT(a) \ + ((*(const uint32_t *)(const void *)(&(a)->ofp_s6_addr[0]) == 0) &&\ + (*(const uint32_t *)(const void *)(&(a)->ofp_s6_addr[4]) == 0) &&\ + (*(const uint32_t *)(const void *)(&(a)->ofp_s6_addr[8]) == 0) &&\ + (*(const uint32_t *)(const void *)(&(a)->ofp_s6_addr[12]) != 0) &&\ + (*(const uint32_t *)(const void *)(&(a)->ofp_s6_addr[12]) !=\ + odp_be_to_cpu_32(1))) + +/* + * Mapped + */ +#define OFP_IN6_IS_ADDR_V4MAPPED(a) \ + ((a)->ofp_s6_addr[0] == 0 && \ + (a)->ofp_s6_addr[4] == 0 && \ + (a)->ofp_s6_addr[8] == odp_be_to_cpu_32(0x0000ffff)) + +/* + * KAME Scope Values + */ +#define OFP_IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define OFP_IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 +#define OFP_IPV6_ADDR_SCOPE_LINKLOCAL 0x02 +#define OFP_IPV6_ADDR_SCOPE_SITELOCAL 0x05 +#define OFP_IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ +#define OFP_IPV6_ADDR_SCOPE_GLOBAL 0x0e + +/* + * Unicast Scope + * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373). + */ +#define OFP_IN6_IS_ADDR_LINKLOCAL(a) \ + (((a)->ofp_s6_addr[0] == 0xfe) && \ + (((a)->ofp_s6_addr[1] & 0xc0) == 0x80)) +#define OFP_IN6_IS_ADDR_SITELOCAL(a) \ + (((a)->ofp_s6_addr[0] == 0xfe) && \ + (((a)->ofp_s6_addr[1] & 0xc0) == 0xc0)) + +/* + * Multicast + */ +#define OFP_IN6_IS_ADDR_MULTICAST(a) ((a)->ofp_s6_addr[0] == 0xff) +#define OFP_IPV6_ADDR_MC_SCOPE(a) ((a)->ofp_s6_addr[1] & 0x0f) + +/* + * Multicast Scope + */ +#define OFP_IN6_IS_ADDR_MC_LINKLOCAL(a) \ + (OFP_IN6_IS_ADDR_MULTICAST(a) && \ + (OFP_IPV6_ADDR_MC_SCOPE(a) == OFP_IPV6_ADDR_SCOPE_LINKLOCAL)) + +#define OFP_IN6_IS_ADDR_MC_INTFACELOCAL(a) \ + (OFP_IN6_IS_ADDR_MULTICAST(a) && \ + (OFP_IPV6_ADDR_MC_SCOPE(a) == OFP_IPV6_ADDR_SCOPE_INTFACELOCAL)) +/* + * KAME Scope + */ +#define OFP_IN6_IS_SCOPE_LINKLOCAL(a) \ + ((OFP_IN6_IS_ADDR_LINKLOCAL(a)) || \ + (OFP_IN6_IS_ADDR_MC_LINKLOCAL(a))) + +/* + * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP. + */ +struct ofp_ipv6_mreq { + struct ofp_in6_addr ipv6mr_multiaddr; + unsigned int ipv6mr_interface; +}; + +/* + * IPV6_PKTINFO: Packet information(RFC2292 sec 5) + */ +struct ofp_in6_pktinfo { + struct ofp_in6_addr ipi6_addr; /* src/dst IPv6 address */ + unsigned int ipi6_ifindex; /* send/recv interface index */ +}; + +/* + * Control structure for IPV6_RECVPATHMTU socket option. + */ +struct ofp_ip6_mtuinfo { + struct ofp_sockaddr_in6 ip6m_addr; /* or sockaddr_storage? */ + uint32_t ip6m_mtu; +}; + + +#endif /* __OFP_IN6_H__ */ diff --git a/include/api/ofp_init.h b/include/api/ofp_init.h new file mode 100644 index 00000000..20027ec3 --- /dev/null +++ b/include/api/ofp_init.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_INIT_H__ +#define __OFP_INIT_H__ + +#include "ofp_hook.h" + +typedef struct ofp_init_global_t { + uint16_t if_count; + uint16_t linux_core_id; + char **if_names; + ofp_pkt_hook pkt_hook[OFP_HOOK_MAX]; + uint8_t burst_recv_mode; +} ofp_init_global_t; + +int ofp_init_global(ofp_init_global_t *params); +int ofp_init_local(void); + +#endif /* __OFP_INIT_H__ */ diff --git a/include/api/ofp_ioctl.h b/include/api/ofp_ioctl.h new file mode 100644 index 00000000..523be126 --- /dev/null +++ b/include/api/ofp_ioctl.h @@ -0,0 +1,247 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sockio.h 8.1 (Berkeley) 3/28/94 + * $FreeBSD: release/9.1.0/sys/sys/sockio.h 223735 2011-07-03 12:22:02Z bz $ + */ + +#ifndef _SYS_IOCTL_H_ +#define _SYS_IOCTL_H_ + +/* + * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests + */ +struct ofp_ifreq_buffer { + size_t length; + void *buffer; +}; + +/* + * Interface request structure used for socket + * ofp_ioctl's. All interface ioctl's must have parameter + * definitions which begin with ifr_name. The + * remainder may be interface specific. + */ +struct ofp_ifreq { + char ifr_name[OFP_IFNAMSIZ]; /* if name, e.g. "en0" */ + union { + struct ofp_sockaddr ifru_addr; + struct ofp_sockaddr ifru_dstaddr; + struct ofp_sockaddr ifru_broadaddr; + struct ofp_ifreq_buffer ifru_buffer; + short ifru_flags[2]; + short ifru_index; + int ifru_jid; + int ifru_metric; + int ifru_mtu; + int ifru_phys; + int ifru_media; + char * ifru_data; + int ifru_cap[2]; + uint32_t ifru_fib; + } ifr_ifru; +#define ifr_addr ifr_ifru.ifru_addr /* address */ +#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ +#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ +#define ifr_buffer ifr_ifru.ifru_buffer /* user supplied buffer with its length */ +#define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ +#define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ +#define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ +#define ifr_metric ifr_ifru.ifru_metric /* metric */ +#define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ +#define ifr_phys ifr_ifru.ifru_phys /* physical wire */ +#define ifr_media ifr_ifru.ifru_media /* physical media */ +#define ifr_data ifr_ifru.ifru_data /* for use by interface */ +#define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ +#define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ +#define ifr_index ifr_ifru.ifru_index /* interface index */ +#define ifr_fib ifr_ifru.ifru_fib /* interface fib */ +}; + +struct ofp_ifconf { + int ifc_len; /* size of associated buffer */ + int ifc_current_len; + union { + char * ifcu_buf; + struct ofp_ifreq *ifcu_req; + } ifc_ifcu; +#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ +#define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ +}; + +/* + * Structure used to query names of interface cloners. + */ + +struct ofp_if_clonereq { + int ifcr_total; /* total cloners (out) */ + int ifcr_count; /* room for this many in user buffer */ + char *ifcr_buffer; /* buffer for cloner names */ +}; + +/* + * Used to lookup groups for an interface + */ +struct ofp_ifgroupreq { + char ifgr_name[OFP_IFNAMSIZ]; + uint32_t ifgr_len; + union { + char ifgru_group[OFP_IFNAMSIZ]; + struct ifg_req *ifgru_groups; + } ifgr_ifgru; +#define ifgr_group ifgr_ifgru.ifgru_group +#define ifgr_groups ifgr_ifgru.ifgru_groups +}; + +struct ofp_ifaliasreq { + char ifra_name[OFP_IFNAMSIZ]; /* if name, e.g. "en0" */ + struct ofp_sockaddr ifra_addr; + struct ofp_sockaddr ifra_broadaddr; + struct ofp_sockaddr ifra_mask; +}; + +struct ofp_in_aliasreq { + char ifra_name[OFP_IFNAMSIZ]; /* if name, e.g. "en0" */ + struct ofp_sockaddr_in ifra_addr; + struct ofp_sockaddr_in ifra_broadaddr; +#define ifra_dstaddr ifra_broadaddr + struct ofp_sockaddr_in ifra_mask; +}; + +struct ofp_in_tunreq { + char iftun_name[OFP_IFNAMSIZ]; /* if name, e.g. "gre1" */ + struct ofp_sockaddr_in iftun_addr; + struct ofp_sockaddr_in iftun_p2p_addr; + struct ofp_sockaddr_in iftun_local_addr; + struct ofp_sockaddr_in iftun_remote_addr; + int iftun_vrf; +}; + +/* + * Structure for SIOC[AGD]LIFADDR + */ +struct ofp_sockaddr_storage { + unsigned char ss_len; /* address length */ + ofp_sa_family_t ss_family; /* address family */ +}; + +struct ofp_if_laddrreq { + char iflr_name[OFP_IFNAMSIZ]; + uint32_t flags; +#define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */ + uint32_t prefixlen; /* in/out */ + struct ofp_sockaddr_storage addr; /* in/out */ + struct ofp_sockaddr_storage dstaddr; /* out */ +}; + +/* + * Structure for SIOCADDRT and SIOCDELRT + */ +struct ofp_rtentry { + struct ofp_sockaddr rt_dst; /* target address */ + struct ofp_sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ + struct ofp_sockaddr rt_genmask; /* target network mask (IP) */ + int rt_vrf; + uint16_t rt_flags; + int16_t rt_metric; /* +1 for binary compatibility! */ + char *rt_dev; /* forcing the device at add */ + unsigned long rt_mtu; /* per route MTU/Window */ +#define rt_mss rt_mtu /* Compatibility :-( */ + unsigned long rt_window; /* Window clamping */ + unsigned short rt_irtt; /* Initial RTT */ +}; + +/* + * Ioctl's have the command encoded in the lower word, and the size of + * any in or out parameters in the upper word. The high 3 bits of the + * upper word are used to encode the in/out status of the parameter. + */ +#define OFP_IOCPARM_SHIFT 13 /* number of bits for ofp_ioctl size */ +#define OFP_IOCPARM_MASK ((1 << OFP_IOCPARM_SHIFT) - 1) /* parameter length mask */ +#define OFP_IOCPARM_LEN(x) (((x) >> 16) & OFP_IOCPARM_MASK) +#define OFP_IOCBASECMD(x) ((x) & ~(OFP_IOCPARM_MASK << 16)) +#define OFP_IOCGROUP(x) (((x) >> 8) & 0xff) + +#define OFP_IOCPARM_MAX (1 << OFP_IOCPARM_SHIFT) /* max size of ofp_ioctl */ +#define OFP_IOC_VOID 0x20000000 /* no parameters */ +#define OFP_IOC_OUT 0x40000000 /* copy out parameters */ +#define OFP_IOC_IN 0x80000000 /* copy in parameters */ +#define OFP_IOC_INOUT (OFP_IOC_IN|OFP_IOC_OUT) +#define OFP_IOC_DIRMASK (OFP_IOC_VOID|OFP_IOC_OUT|OFP_IOC_IN) + +#define _OFP_IOC(inout,group,num,len) \ + ((unsigned long)((inout) | (((len) & OFP_IOCPARM_MASK) << 16) | ((group) << 8) | (num))) +#define _OFP_IO(g,n) _OFP_IOC(OFP_IOC_VOID, (g), (n), 0) +#define _OFP_IOWINT(g,n) _OFP_IOC(OFP_IOC_VOID, (g), (n), sizeof(int)) +#define _OFP_IOR(g,n,t) _OFP_IOC(OFP_IOC_OUT, (g), (n), sizeof(t)) +#define _OFP_IOW(g,n,t) _OFP_IOC(OFP_IOC_IN, (g), (n), sizeof(t)) +/* this should be _IORW, but stdio got there first */ +#define _OFP_IOWR(g,n,t) _OFP_IOC(OFP_IOC_INOUT, (g), (n), sizeof(t)) + +#define OFP_FIONREAD _OFP_IOR('f', 127, int) /* get # bytes to read */ +#define OFP_FIONBIO _OFP_IOW('f', 126, int) /* set/clear non-blocking i/o */ +#define OFP_FIOASYNC _OFP_IOW('f', 125, int) /* set/clear async i/o */ +#define OFP_FIONWRITE _OFP_IOR('f', 119, int) /* get # bytes (yet) to write */ +#define OFP_FIONSPACE _OFP_IOR('f', 118, int) /* get space in send queue */ + +#define OFP_SIOCATMARK _OFP_IOR('s', 7, int) /* at oob mark? */ + +#define OFP_SIOCADDRT _OFP_IOW('r', 10, struct ofp_rtentry) /* add route */ +#define OFP_SIOCDELRT _OFP_IOW('r', 11, struct ofp_rtentry) /* delete route */ + +#define OFP_SIOCSIFADDR _OFP_IOW('i', 12, struct ofp_ifreq) /* set ifnet address */ +#define OFP_SIOCGIFADDR _OFP_IOWR('i', 33, struct ofp_ifreq) /* get ifnet address */ +#define OFP_SIOCSIFDSTADDR _OFP_IOW('i', 14, struct ofp_ifreq) /* set p-p address */ +#define OFP_SIOCGIFDSTADDR _OFP_IOWR('i', 34, struct ofp_ifreq) /* get p-p address */ +#define OFP_OSIOCGIFBRDADDR _OFP_IOWR('i', 18, struct ofp_ifreq) /* get broadcast addr */ +#define OFP_SIOCGIFBRDADDR _OFP_IOWR('i', 35, struct ofp_ifreq) /* get broadcast addr */ +#define OFP_SIOCSIFBRDADDR _OFP_IOW('i', 19, struct ofp_ifreq) /* set broadcast addr */ +#define OFP_OSIOCGIFCONF _OFP_IOWR('i', 20, struct ofp_ifconf) /* get ifnet list */ +#define OFP_SIOCGIFCONF _OFP_IOWR('i', 36, struct ofp_ifconf) /* get ifnet list */ +#define OFP_SIOCGIFNETMASK _OFP_IOWR('i', 37, struct ofp_ifreq) /* get net addr mask */ +#define OFP_SIOCSIFNETMASK _OFP_IOW('i', 22, struct ofp_ifreq) /* set net addr mask */ +#define OFP_SIOCDIFADDR _OFP_IOW('i', 25, struct ofp_ifreq) /* delete IF addr */ +#define OFP_SIOCAIFADDR _OFP_IOW('i', 26, struct ofp_ifaliasreq) /* add/chg IF alias */ +#define OFP_SIOCALIFADDR _OFP_IOW('i', 27, struct ofp_if_laddrreq) /* add IF addr */ +#define OFP_SIOCGLIFADDR _OFP_IOWR('i', 28, struct ofp_if_laddrreq) /* get IF addr */ +#define OFP_SIOCDLIFADDR _OFP_IOW('i', 29, struct ofp_if_laddrreq) /* delete IF addr */ +#define OFP_SIOCGIFFIB _OFP_IOWR('i', 92, struct ofp_ifreq) /* get IF fib */ +#define OFP_SIOCSIFFIB _OFP_IOW('i', 93, struct ofp_ifreq) /* set IF fib */ +#define OFP_SIOCGIFVRF OFP_SIOCGIFFIB +#define OFP_SIOCSIFVRF OFP_SIOCSIFFIB +#define OFP_SIOCIFCREATE _OFP_IOWR('i', 122, struct ofp_ifreq) /* create clone if */ +#define OFP_SIOCIFCREATE2 _OFP_IOWR('i', 124, struct ofp_ifreq) /* create clone if */ +#define OFP_SIOCIFDESTROY _OFP_IOW('i', 121, struct ofp_ifreq) /* destroy clone if */ +#define OFP_SIOCIFGCLONERS _OFP_IOWR('i', 120, struct ofp_if_clonereq) /* get cloners */ +#define OFP_SIOCGIFGMEMB _OFP_IOWR('i', 138, struct ofp_ifgroupreq) /* get members */ +#define OFP_SIOCSIFTUN _OFP_IOW('i', 139, struct ofp_in_tunreq) /* set tunnel */ +#define OFP_SIOCGIFTUN _OFP_IOWR('i', 140, struct ofp_in_tunreq) /* get tunnel */ + +#endif /* !_SYS_SOCKIO_H_ */ diff --git a/include/api/ofp_ip.h b/include/api/ofp_ip.h new file mode 100644 index 00000000..5abdea84 --- /dev/null +++ b/include/api/ofp_ip.h @@ -0,0 +1,227 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.2 (Berkeley) 6/1/94 + * $FreeBSD: release/9.1.0/sys/netinet/ip.h 235805 2012-05-22 19:53:25Z delphij $ + */ + +#ifndef _OFP_IP_H_ +#define _OFP_IP_H_ + +#include + +/* + * Definitions for internet protocol version 4. + * + * Per RFC 791, September 1981. + */ +#define OFP_IPVERSION 4 + +/* + * Structure of an internet header, naked of options. + */ +struct ofp_ip { +#if ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN + uint8_t ip_hl:4, /* header length */ + ip_v:4; /* version */ +#endif +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN + uint8_t ip_v:4, /* version */ + ip_hl:4; /* header length */ +#endif + uint8_t ip_tos; /* type of service */ + uint16_t ip_len; /* total length */ + uint16_t ip_id; /* identification */ + uint16_t ip_off; /* fragment offset field */ +#define OFP_IP_RF 0x8000 /* reserved fragment flag */ +#define OFP_IP_DF 0x4000 /* dont fragment flag */ +#define OFP_IP_MF 0x2000 /* more fragments flag */ +#define OFP_IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + uint8_t ip_ttl; /* time to live */ + uint8_t ip_p; /* protocol */ + uint16_t ip_sum; /* checksum */ + struct ofp_in_addr ip_src,ip_dst; /* source and dest address */ +} __attribute__((packed)); + +#define OFP_IP_MAXPACKET 65535 /* maximum packet size */ + +/* + * Definitions for IP type of service (ip_tos). + */ +#define OFP_IPTOS_LOWDELAY 0x10 +#define OFP_IPTOS_THROUGHPUT 0x08 +#define OFP_IPTOS_RELIABILITY 0x04 +#define OFP_IPTOS_MINCOST 0x02 + +/* + * Definitions for IP precedence (also in ip_tos) (hopefully unused). + */ +#define OFP_IPTOS_PREC_NETCONTROL 0xe0 +#define OFP_IPTOS_PREC_INTERNETCONTROL 0xc0 +#define OFP_IPTOS_PREC_CRITIC_ECP 0xa0 +#define OFP_IPTOS_PREC_FLASHOVERRIDE 0x80 +#define OFP_IPTOS_PREC_FLASH 0x60 +#define OFP_IPTOS_PREC_IMMEDIATE 0x40 +#define OFP_IPTOS_PREC_PRIORITY 0x20 +#define OFP_IPTOS_PREC_ROUTINE 0x00 + +/* + * Definitions for DiffServ Codepoints as per RFC2474 + */ +#define OFP_IPTOS_DSCP_CS0 0x00 +#define OFP_IPTOS_DSCP_CS1 0x20 +#define OFP_IPTOS_DSCP_AF11 0x28 +#define OFP_IPTOS_DSCP_AF12 0x30 +#define OFP_IPTOS_DSCP_AF13 0x38 +#define OFP_IPTOS_DSCP_CS2 0x40 +#define OFP_IPTOS_DSCP_AF21 0x48 +#define OFP_IPTOS_DSCP_AF22 0x50 +#define OFP_IPTOS_DSCP_AF23 0x58 +#define OFP_IPTOS_DSCP_CS3 0x60 +#define OFP_IPTOS_DSCP_AF31 0x68 +#define OFP_IPTOS_DSCP_AF32 0x70 +#define OFP_IPTOS_DSCP_AF33 0x78 +#define OFP_IPTOS_DSCP_CS4 0x80 +#define OFP_IPTOS_DSCP_AF41 0x88 +#define OFP_IPTOS_DSCP_AF42 0x90 +#define OFP_IPTOS_DSCP_AF43 0x98 +#define OFP_IPTOS_DSCP_CS5 0xa0 +#define OFP_IPTOS_DSCP_EF 0xb8 +#define OFP_IPTOS_DSCP_CS6 0xc0 +#define OFP_IPTOS_DSCP_CS7 0xe0 + +/* + * ECN (Explicit Congestion Notification) codepoints in RFC3168 mapped to the + * lower 2 bits of the TOS field. + */ +#define OFP_IPTOS_ECN_NOTECT 0x00 /* not-ECT */ +#define OFP_IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */ +#define OFP_IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */ +#define OFP_IPTOS_ECN_CE 0x03 /* congestion experienced */ +#define OFP_IPTOS_ECN_MASK 0x03 /* ECN field mask */ + +/* + * Definitions for options. + */ +#define OFP_IPOPT_COPIED(o) ((o)&0x80) +#define OFP_IPOPT_CLASS(o) ((o)&0x60) +#define OFP_IPOPT_NUMBER(o) ((o)&0x1f) + +#define OFP_IPOPT_CONTROL 0x00 +#define OFP_IPOPT_RESERVED1 0x20 +#define OFP_IPOPT_DEBMEAS 0x40 +#define OFP_IPOPT_RESERVED2 0x60 + +#define OFP_IPOPT_EOL 0 /* end of option list */ +#define OFP_IPOPT_NOP 1 /* no operation */ + +#define OFP_IPOPT_RR 7 /* record packet route */ +#define OFP_IPOPT_TS 68 /* timestamp */ +#define OFP_IPOPT_SECURITY 130 /* provide s,c,h,tcc */ +#define OFP_IPOPT_LSRR 131 /* loose source route */ +#define OFP_IPOPT_ESO 133 /* extended security */ +#define OFP_IPOPT_CIPSO 134 /* commerical security */ +#define OFP_IPOPT_SATID 136 /* satnet id */ +#define OFP_IPOPT_SSRR 137 /* strict source route */ +#define OFP_IPOPT_RA 148 /* router alert */ + +/* + * Offsets to fields in options other than EOL and NOP. + */ +#define OFP_IPOPT_OPTVAL 0 /* option ID */ +#define OFP_IPOPT_OLEN 1 /* option length */ +#define OFP_IPOPT_OFFSET 2 /* offset within option */ +#define OFP_IPOPT_MINOFF 4 /* min value of above */ + +/* + * Time stamp option structure. + */ +struct ofp_ip_timestamp { + uint8_t ipt_code; /* IPOPT_TS */ + uint8_t ipt_len; /* size of structure (variable) */ + uint8_t ipt_ptr; /* index of current entry */ +#if ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN + uint8_t ipt_flg:4, /* flags, see below */ + ipt_oflw:4; /* overflow counter */ +#endif +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN + uint8_t ipt_oflw:4, /* overflow counter */ + ipt_flg:4; /* flags, see below */ +#endif + union ipt_timestamp { + uint32_t ipt_time[1]; /* network format */ + struct ofp_ipt_ta { + struct ofp_in_addr ipt_addr; + uint32_t ipt_time; /* network format */ + } ipt_ta[1]; + } ipt_timestamp; +}; + +/* Flag bits for ipt_flg. */ +#define OFP_IPOPT_TS_TSONLY 0 /* timestamps only */ +#define OFP_IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ +#define OFP_IPOPT_TS_PRESPEC 3 /* specified modules only */ + +/* Bits for security (not byte swapped). */ +#define OFP_IPOPT_SECUR_UNCLASS 0x0000 +#define OFP_IPOPT_SECUR_CONFID 0xf135 +#define OFP_IPOPT_SECUR_EFTO 0x789a +#define OFP_IPOPT_SECUR_MMMM 0xbc4d +#define OFP_IPOPT_SECUR_RESTR 0xaf13 +#define OFP_IPOPT_SECUR_SECRET 0xd788 +#define OFP_IPOPT_SECUR_TOPSECRET 0x6bc5 + +/* + * Internet implementation parameters. + */ +#define OFP_MAXTTL 255 /* maximum time to live (seconds) */ +#define OFP_IPDEFTTL 64 /* default ttl, from RFC 1340 */ +#define OFP_IPFRAGTTL 60 /* time to live for frags, slowhz */ +#define OFP_IPTTLDEC 1 /* subtracted when forwarding */ +#define OFP_IP_MSS 576 /* default maximum segment size */ + +/* + * This is the real IPv4 pseudo header, used for computing the TCP and UDP + * checksums. For the Internet checksum, struct ipovly can be used instead. + * For stronger checksums, the real thing must be used. + */ +struct ofp_ippseudo { + struct ofp_in_addr ippseudo_src; /* source internet address */ + struct ofp_in_addr ippseudo_dst; /* destination internet address */ + uint8_t ippseudo_pad; /* pad, must be zero */ + uint8_t ippseudo_p; /* protocol */ + uint16_t ippseudo_len; /* protocol length */ +}; +#endif diff --git a/include/api/ofp_ip6.h b/include/api/ofp_ip6.h new file mode 100644 index 00000000..9f4506c1 --- /dev/null +++ b/include/api/ofp_ip6.h @@ -0,0 +1,290 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.1 (Berkeley) 6/10/93 + */ + +#ifndef _OFP_IP6_H_ +#define _OFP_IP6_H_ + +/* + * Definition for internet protocol version 6. + * RFC 2460 + */ + +struct ofp_ip6_hdr { + union { + struct ofp_ip6_hdrctl { + uint32_t ip6_un1_flow; /* 20 bits of flow-ID */ + uint16_t ip6_un1_plen; /* payload length */ + uint8_t ip6_un1_nxt; /* next header */ + uint8_t ip6_un1_hlim; /* hop limit */ + } ip6_un1; + uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ + struct { +#if ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN + uint8_t ip6_un2_tclass1:4; + uint8_t ip6_un2_v:4; +#elif ODP_BYTE_ORDER == ODP_BIG_ENDIAN + uint8_t ip6_un2_v:4; + uint8_t ip6_un2_tclass1:4; +#else /* ODP_BYTE_ORDER */ +#error Unknown byte ordering. +#endif /* ODP_BYTE_ORDER */ + } __attribute__((packed)) ip6_s; + } ip6_ctlun; + struct ofp_in6_addr ip6_src; /* source address */ + struct ofp_in6_addr ip6_dst; /* destination address */ +} __attribute__((packed)); + +#define ofp_ip6_vfc ip6_ctlun.ip6_un2_vfc +#define ofp_ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow +#define ofp_ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen +#define ofp_ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt +#define ofp_ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim +#define ofp_ip6_hops ip6_ctlun.ip6_un1.ip6_un1_hlim + +#define OFP_IPV6_VERSION 0x60 +#define OFP_IPV6_VERSION_MASK 0xf0 + +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_IPV6_FLOWINFO_MASK 0x0fffffff /* flow info (28 bits) */ +#define OFP_IPV6_FLOWLABEL_MASK 0x000fffff /* flow label (20 bits) */ +#else +#if ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_IPV6_FLOWINFO_MASK 0xffffff0f /* flow info (28 bits) */ +#define OFP_IPV6_FLOWLABEL_MASK 0xffff0f00 /* flow label (20 bits) */ +#endif /* LITTLE_ENDIAN */ +#endif + +/* ECN bits proposed by Sally Floyd */ +#define OFP_IP6TOS_CE 0x01 /* congestion experienced */ +#define OFP_IP6TOS_ECT 0x02 /* ECN-capable transport */ + + +/* + * Extension Headers + */ + +struct ofp_ip6_ext { + uint8_t ip6e_nxt; + uint8_t ip6e_len; +} __attribute__((packed)); + +/* Hop-by-Hop options header */ +/* XXX should we pad it to force alignment on an 8-byte boundary? */ +struct ofp_ip6_hbh { + uint8_t ip6h_nxt; /* next header */ + uint8_t ip6h_len; /* length in units of 8 octets */ + /* followed by options */ +} __attribute__((packed)); + +/* Destination options header */ +/* XXX should we pad it to force alignment on an 8-byte boundary? */ +struct ofp_ip6_dest { + uint8_t ip6d_nxt; /* next header */ + uint8_t ip6d_len; /* length in units of 8 octets */ + /* followed by options */ +} __attribute__((packed)); + +/* Option types and related macros */ +#define OFP_IP6OPT_PAD1 0x00 /* 00 0 00000 */ +#define OFP_IP6OPT_PADN 0x01 /* 00 0 00001 */ +#define OFP_IP6OPT_JUMBO 0xC2 /* 11 0 00010 = 194 */ +#define OFP_IP6OPT_NSAP_ADDR 0xC3 /* 11 0 00011 */ +#define OFP_IP6OPT_TUNNEL_LIMIT 0x04 /* 00 0 00100 */ +#define OFP_IP6OPT_RTALERT 0x05 /* 00 0 00101 (KAME definition) */ +#define OFP_IP6OPT_ROUTER_ALERT 0x05 /* 00 0 00101 (RFC3542, recommended) */ + +#define OFP_IP6OPT_RTALERT_LEN 4 +#define OFP_IP6OPT_RTALERT_MLD 0 /* Datagram contains an MLD message */ +#define OFP_IP6OPT_RTALERT_RSVP 1 /* Datagram contains an RSVP message */ +#define OFP_IP6OPT_RTALERT_ACTNET 2 /* contains an Active Networks msg */ +#define OFP_IP6OPT_MINLEN 2 + +#define OFP_IP6OPT_EID 0x8a /* 10 0 01010 */ + +#define OFP_IP6OPT_TYPE(o) ((o) & 0xC0) +#define OFP_IP6OPT_TYPE_SKIP 0x00 +#define OFP_IP6OPT_TYPE_DISCARD 0x40 +#define OFP_IP6OPT_TYPE_FORCEICMP 0x80 +#define OFP_IP6OPT_TYPE_ICMP 0xC0 + +#define OFP_IP6OPT_MUTABLE 0x20 + +/* IPv6 options: common part */ +struct ofp_ip6_opt { + uint8_t ip6o_type; + uint8_t ip6o_len; +} __attribute__((packed)); + +/* Jumbo Payload Option */ +struct ofp_ip6_opt_jumbo { + uint8_t ip6oj_type; + uint8_t ip6oj_len; + uint8_t ip6oj_jumbo_len[4]; +} __attribute__((packed)); +#define OFP_IP6OPT_JUMBO_LEN 6 + +/* NSAP Address Option */ +struct ofp_ip6_opt_nsap { + uint8_t ip6on_type; + uint8_t ip6on_len; + uint8_t ip6on_src_nsap_len; + uint8_t ip6on_dst_nsap_len; + /* followed by source NSAP */ + /* followed by destination NSAP */ +} __attribute__((packed)); + +/* Tunnel Limit Option */ +struct ofp_ip6_opt_tunnel { + uint8_t ip6ot_type; + uint8_t ip6ot_len; + uint8_t ip6ot_encap_limit; +} __attribute__((packed)); + +/* Router Alert Option */ +struct ofp_ip6_opt_router { + uint8_t ip6or_type; + uint8_t ip6or_len; + uint8_t ip6or_value[2]; +} __attribute__((packed)); +/* Router alert values (in network byte order) */ +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_IP6_ALERT_MLD 0x0000 +#define OFP_IP6_ALERT_RSVP 0x0001 +#define OFP_IP6_ALERT_AN 0x0002 +#else +#if ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +#define OFP_IP6_ALERT_MLD 0x0000 +#define OFP_IP6_ALERT_RSVP 0x0100 +#define OFP_IP6_ALERT_AN 0x0200 +#endif /* LITTLE_ENDIAN */ +#endif + +/* Routing header */ +struct ofp_ip6_rthdr { + uint8_t ip6r_nxt; /* next header */ + uint8_t ip6r_len; /* length in units of 8 octets */ + uint8_t ip6r_type; /* routing type */ + uint8_t ip6r_segleft; /* segments left */ + /* followed by routing type specific data */ +} __attribute__((packed)); + +/* Type 0 Routing header, deprecated by RFC 5095. */ +struct ofp_ip6_rthdr0 { + uint8_t ip6r0_nxt; /* next header */ + uint8_t ip6r0_len; /* length in units of 8 octets */ + uint8_t ip6r0_type; /* always zero */ + uint8_t ip6r0_segleft; /* segments left */ + uint32_t ip6r0_reserved; /* reserved field */ + /* followed by up to 127 struct ofp_in6_addr */ +} __attribute__((packed)); + +/* Fragment header */ +struct ofp_ip6_frag { + uint8_t ip6f_nxt; /* next header */ + uint8_t ip6f_reserved; /* reserved field */ + uint16_t ip6f_offlg; /* offset, reserved, and flag */ + uint32_t ip6f_ident; /* identification */ +} __attribute__((packed)); + +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN +#define OFP_IP6F_OFF_MASK 0xfff8 /* mask out offset from _offlg */ +#define OFP_IP6F_RESERVED_MASK 0x0006 /* reserved bits in ip6f_offlg */ +#define OFP_IP6F_MORE_FRAG 0x0001 /* more-fragments flag */ +#else /* BYTE_ORDER == LITTLE_ENDIAN */ +#define OFP_IP6F_OFF_MASK 0xf8ff /* mask out offset from _offlg */ +#define OFP_IP6F_RESERVED_MASK 0x0600 /* reserved bits in ip6f_offlg */ +#define OFP_IP6F_MORE_FRAG 0x0100 /* more-fragments flag */ +#endif /* BYTE_ORDER == LITTLE_ENDIAN */ + +/* + * Internet implementation parameters. + */ +#define OFP_IPV6_MAXHLIM 255 /* maximum hoplimit */ +#define OFP_IPV6_DEFHLIM 64 /* default hlim */ +#define OFP_IPV6_FRAGTTL 120 /* ttl for fragment packets, in slowtimo tick */ +#define OFP_IPV6_HLIMDEC 1 /* subtracted when forwarding */ + +#define OFP_IPV6_MMTU 1280 /* minimal MTU and reassembly. 1024 + 256 */ +#define OFP_IPV6_MAXPACKET 65535 /* ip6 max packet size without Jumbo payload*/ +#define OFP_IPV6_MAXOPTHDR 2048 /* max option header size, 256 64-bit words */ + +/* + * OFP_IP6_EXTHDR_CHECK ensures that region between the IP6 header and the + * target header (including IPv6 itself, extension headers and + * TCP/UDP/ICMP6 headers) are contiguous. KAME requires drivers + * to store incoming data into one internal mbuf or one or more external + * mbufs(never into two or more internal mbufs). Thus, the third case is + * supposed to never be matched but is prepared just in case. + */ + +#define OFP_IP6_EXTHDR_CHECK(pkt, off, hlen, ret) \ +do { \ + if (odp_packet_seg_len((pkt)) < (uint32_t)((off) + (hlen))) { \ + return ret; \ + } \ +} while (/*CONSTCOND*/ 0) + +#endif /* not _OFP_IP6_H_ */ diff --git a/include/api/ofp_ip_var.h b/include/api/ofp_ip_var.h new file mode 100644 index 00000000..c618a8a6 --- /dev/null +++ b/include/api/ofp_ip_var.h @@ -0,0 +1,110 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_var.h 8.2 (Berkeley) 1/9/95 + * $FreeBSD: release/9.1.0/sys/netinet/ip_var.h 223666 2011-06-29 10:06:58Z ae $ + */ + +#ifndef _OFP_IP_VAR_H_ +#define _OFP_IP_VAR_H_ + +/* + * Overlay for ip header used by other protocols (tcp, udp). + */ +struct ipovly { + uint8_t ih_x1[9]; /* (unused) */ + uint8_t ih_pr; /* protocol */ + uint16_t ih_len; /* protocol length */ + struct ofp_in_addr ih_src; /* source internet address */ + struct ofp_in_addr ih_dst; /* destination internet address */ +}; + + +/* + * Structure stored in mbuf in inpcb.ip_options + * and passed to ip_output when ip options are in use. + * The actual length of the options (including ipopt_dst) + * is in m_len. + */ +#define MAX_IPOPTLEN 40 + +struct ipoption { + struct ofp_in_addr ipopt_dst; /* first-hop dst if source routed */ + char ipopt_list[MAX_IPOPTLEN]; /* options proper */ +}; + +/* + * Structure attached to inpcb.ip_moptions and + * passed to ip_output when IP multicast options are in use. + * This structure is lazy-allocated. + */ +struct ip_moptions { + struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */ + struct ofp_in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */ + uint64_t imo_multicast_vif; /* vif num outgoing multicasts */ + uint8_t imo_multicast_ttl; /* TTL for outgoing multicasts */ + uint8_t imo_multicast_loop; /* 1 => hear sends if a member */ + uint16_t imo_num_memberships; /* no. memberships this socket */ + uint16_t imo_max_memberships; /* max memberships this socket */ + struct in_multi **imo_membership; /* group memberships */ + struct in_mfilter *imo_mfilters; /* source filters */ +}; + +struct ofp_ipstat { + uint64_t ips_total; /* total packets received */ + uint64_t ips_badsum; /* checksum bad */ + uint64_t ips_tooshort; /* packet too short */ + uint64_t ips_toosmall; /* not enough data */ + uint64_t ips_badhlen; /* ip header length < data size */ + uint64_t ips_badlen; /* ip length < ip header length */ + uint64_t ips_fragments; /* fragments received */ + uint64_t ips_fragdropped; /* frags dropped (dups, out of space) */ + uint64_t ips_fragtimeout; /* fragments timed out */ + uint64_t ips_forward; /* packets forwarded */ + uint64_t ips_fastforward; /* packets fast forwarded */ + uint64_t ips_cantforward; /* packets rcvd for unreachable dest */ + uint64_t ips_redirectsent; /* packets forwarded on same net */ + uint64_t ips_noproto; /* unknown or unsupported protocol */ + uint64_t ips_delivered; /* datagrams delivered to upper level*/ + uint64_t ips_localout; /* total ip packets generated here */ + uint64_t ips_odropped; /* lost packets due to nobufs, etc. */ + uint64_t ips_reassembled; /* total packets reassembled ok */ + uint64_t ips_fragmented; /* datagrams successfully fragmented */ + uint64_t ips_ofragments; /* output fragments created */ + uint64_t ips_cantfrag; /* don't fragment flag was set, etc. */ + uint64_t ips_badoptions; /* error in option processing */ + uint64_t ips_noroute; /* packets discarded due to no route */ + uint64_t ips_badvers; /* ip version != 4 */ + uint64_t ips_rawout; /* total raw ip packets generated */ + uint64_t ips_toolong; /* ip length > max ip packet size */ + uint64_t ips_notmember; /* multicasts for unregistered grps */ + uint64_t ips_nogif; /* no match gif found */ + uint64_t ips_badaddr; /* invalid address on header */ +}; + +#endif /* !_OFP_IP_VAR_H_ */ diff --git a/include/api/ofp_log.h b/include/api/ofp_log.h new file mode 100644 index 00000000..913b9b1d --- /dev/null +++ b/include/api/ofp_log.h @@ -0,0 +1,139 @@ +/* Copyright (c) 2014, Linaro Limited + * All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * SPDX-License-Identifier: BSD-3-Clause + */ +/** + * @file + * + * ofp log + */ + +#ifndef __OFP_LOG_H__ +#define __OFP_LOG_H__ + +#include +#include +#include +#include "ofp_timer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef OFP_DEBUG_PRINT +#define OFP_DEBUG_PRINT 1 +#endif + +/** + * log level. + */ +enum ofp_log_level_s { + OFP_LOG_ABORT = 1, + OFP_LOG_ERR, + OFP_LOG_INFO, + OFP_LOG_DBG +}; + +extern enum ofp_log_level_s ofp_loglevel; + +/** + * default LOG macro. + */ +#define _ODP_FP_LOG(level, fmt, ...) \ +do { \ + int _t = ofp_timer_ticks(0); \ + if (level > ofp_loglevel) \ + break; \ + switch (level) { \ + case OFP_LOG_ERR: \ + fprintf(stderr, "[%d] %d.%02d %s:%d:%s():" fmt, \ + odp_cpu_id(), _t/100, _t%100, \ + __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ + break; \ + case OFP_LOG_DBG: \ + fprintf(stderr, "[%d] %d.%02d %s:%d:%s():" fmt, \ + odp_cpu_id(), _t/100, _t%100, \ + __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ + break; \ + case OFP_LOG_ABORT: \ + fprintf(stderr, "[%d] %d.%02d %s:%d:%s(): " fmt, \ + odp_cpu_id(), _t/100, _t%100, \ + __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ + abort(); \ + break; \ + case OFP_LOG_INFO: \ + fprintf(stderr, "[%d] %d.%02d %s:%d:%s(): " fmt, \ + odp_cpu_id(), _t/100, _t%100, \ + __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ + break; \ + default: \ + fprintf(stderr, "Unknown LOG level"); \ + break;\ + } \ +} while (0) + +/** + * Debug printing macro, which prints output when DEBUG flag is set. + */ +#if (OFP_DEBUG_PRINT == 1) +# define OFP_DBG(fmt, ...) \ + _ODP_FP_LOG(OFP_LOG_DBG, fmt, ##__VA_ARGS__) +# define OFP_IS_LOGLEVEL_DEBUG() \ + (ofp_loglevel == OFP_LOG_DBG ? 1 : 0) +#else +# define OFP_DBG(fmt, ...) +# define OFP_IS_LOGLEVEL_DEBUG() 0 +#endif + +/** + * Print output to stderr (file, line and function). + */ +#define OFP_ERR(fmt, ...) \ + _ODP_FP_LOG(OFP_LOG_ERR, fmt, ##__VA_ARGS__) + +/** + * Print output to stderr (file, line and function), + * then abort. + */ +#define OFP_ABORT(fmt, ...) \ + _ODP_FP_LOG(OFP_LOG_ABORT, fmt, ##__VA_ARGS__) + +/** + * Print output to stderr (file, line and function) + */ +#define OFP_LOG(fmt, ...) \ + _ODP_FP_LOG(OFP_LOG_INFO, fmt, ##__VA_ARGS__) + +/** + * Print output to stderr (file, line and function) + */ +#define OFP_INFO(fmt, ...) \ + _ODP_FP_LOG(OFP_LOG_INFO, fmt, ##__VA_ARGS__) + +/** + * Print output to stderr + */ +#define OFP_LOG_NO_CTX(level, fmt, ...) \ +do { \ + if (level > ofp_loglevel) \ + break; \ + fprintf(stderr, fmt, ##__VA_ARGS__); \ +} while (0) + +/** + * Intentionally unused variables to functions + */ +#define OFP_UNUSED __attribute__((__unused__)) + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif /*__OFP_LOG_H__*/ diff --git a/include/api/ofp_pkt_processing.h b/include/api/ofp_pkt_processing.h new file mode 100644 index 00000000..7c0cb6f9 --- /dev/null +++ b/include/api/ofp_pkt_processing.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_APP_H__ +#define __OFP_APP_H__ + +#include +#include "ofp_types.h" +#include "ofp_init.h" + +typedef enum ofp_return_code (*ofp_pkt_processing_func)(odp_packet_t pkt); + +struct ofp_ifnet; + +void *default_event_dispatcher(void *arg); + +enum ofp_return_code ofp_packet_input(odp_packet_t pkt, + odp_queue_t in_queue, ofp_pkt_processing_func pkt_func); + +enum ofp_return_code ofp_eth_vlan_processing(odp_packet_t pkt); +enum ofp_return_code ofp_ipv4_processing(odp_packet_t pkt); +enum ofp_return_code ofp_ipv6_processing(odp_packet_t pkt); +enum ofp_return_code ofp_gre_processing(odp_packet_t pkt); +enum ofp_return_code ofp_arp_processing(odp_packet_t pkt); +enum ofp_return_code ofp_udp4_processing(odp_packet_t pkt); +enum ofp_return_code ofp_tcp4_processing(odp_packet_t pkt); + +enum ofp_return_code ofp_ip_output(odp_packet_t pkt, + struct ofp_nh_entry *nh_param); +enum ofp_return_code ofp_ip6_output(odp_packet_t pkt, + struct ofp_nh6_entry *nh_param); + +enum ofp_return_code ofp_sp_input(odp_packet_t pkt, + struct ofp_ifnet *ifnet); + +#endif /*__OFP_APP_H__*/ diff --git a/include/api/ofp_portconf.h b/include/api/ofp_portconf.h new file mode 100644 index 00000000..09a128ef --- /dev/null +++ b/include/api/ofp_portconf.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_PORTCONF_H__ +#define __OFP_PORTCONF_H__ + +#include "odp.h" +#include "odp/helper/linux.h" + +#define OFP_IFNAMSIZ 16 + +struct ofp_ifnet; + +/* Interfaces: UP/DOWN */ + +const char *ofp_config_interface_up_v4(int port, uint16_t vlan, uint16_t vrf, + uint32_t addr, int masklen); +const char *ofp_config_interface_up_v6(int port, uint16_t vlan, + uint8_t *addr, int masklen); +const char *ofp_config_interface_up_tun(int port, uint16_t greid, + uint16_t vrf, uint32_t tun_loc, + uint32_t tun_rem, uint32_t p2p, + uint32_t addr, int mlen); +const char *ofp_config_interface_down(int port, uint16_t vlan); + +/* Interfaces: SHOW */ +void ofp_show_interfaces(int fd); + +/* Interfaces: operations*/ +int ofp_get_num_ports(void); + +struct ofp_ifnet *ofp_get_ifnet(int port, uint16_t vlan); +struct ofp_ifnet *ofp_get_create_ifnet(int port, uint16_t vlan); +int ofp_delete_ifnet(int port, uint16_t vlan); + +odp_pktio_t ofp_port_pktio_get(int port); + +#ifdef SP +/* LINUX interface lookup table*/ +struct ofp_ifnet *ofp_get_ifnet_by_linux_ifindex(int ix); +#endif /* SP */ +/* Finds the node interface by the local ip assigned */ +struct ofp_ifnet *ofp_get_ifnet_match(uint32_t ip, + uint16_t vrf, uint16_t vlan); + +/* Interface ODP queues */ +struct ofp_ifnet *ofp_get_ifnet_pktio(odp_pktio_t pktio); +odp_queue_t ofp_pktio_spq_get(odp_pktio_t pktio); +odp_queue_t ofp_pktio_loopq_get(odp_pktio_t pktio); + +enum ofp_portconf_ip_type { + OFP_PORTCONF_IP_TYPE_IP_ADDR = 0, + OFP_PORTCONF_IP_TYPE_P2P, + OFP_PORTCONF_IP_TYPE_TUN_LOCAL, + OFP_PORTCONF_IP_TYPE_TUN_REM +}; + +uint32_t ofp_port_get_ipv4_addr(int port, uint16_t vlan, + enum ofp_portconf_ip_type type); + +#endif /* __OFP_PORTCONF_H__ */ diff --git a/include/api/ofp_queue.h b/include/api/ofp_queue.h new file mode 100644 index 00000000..6373ebde --- /dev/null +++ b/include/api/ofp_queue.h @@ -0,0 +1,635 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD: release/9.1.0/sys/sys/queue.h 221843 2011-05-13 15:49:23Z mdf $ + */ + +#ifndef __OFP_QUEUE_H__ +#define __OFP_QUEUE_H__ + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_AFTER + - + - + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * _SWAP + + + + + * + */ +#ifdef OFP_QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define OFP_TRACEBUF struct qm_trace trace; +#define OFP_TRASHIT(x) do {(x) = (void *)-1;} while (0) +#define OFP_QMD_SAVELINK(name, link) void **name = (void *)&(link) + +#define OFP_QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define OFP_QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define OFP_QMD_TRACE_ELEM(elem) +#define OFP_QMD_TRACE_HEAD(head) +#define OFP_QMD_SAVELINK(name, link) +#define OFP_TRACEBUF +#define OFP_TRASHIT(x) +#endif /* OFP_QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define OFP_SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define OFP_SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define OFP_SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define OFP_SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define OFP_SLIST_FIRST(head) ((head)->slh_first) + +#define OFP_SLIST_FOREACH(var, head, field) \ + for ((var) = OFP_SLIST_FIRST((head)); \ + (var); \ + (var) = OFP_SLIST_NEXT((var), field)) + +#define OFP_SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = OFP_SLIST_FIRST((head)); \ + (var) && ((tvar) = OFP_SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define OFP_SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &OFP_SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &OFP_SLIST_NEXT((var), field)) + +#define OFP_SLIST_INIT(head) do { \ + OFP_SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define OFP_SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + OFP_SLIST_NEXT((elm), field) = OFP_SLIST_NEXT((slistelm), field); \ + OFP_SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define OFP_SLIST_INSERT_HEAD(head, elm, field) do { \ + OFP_SLIST_NEXT((elm), field) = OFP_SLIST_FIRST((head)); \ + OFP_SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define OFP_SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define OFP_SLIST_REMOVE(head, elm, type, field) do { \ + OFP_QMD_SAVELINK(oldnext, (elm)->field.sle_next); \ + if (OFP_SLIST_FIRST((head)) == (elm)) { \ + OFP_SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = OFP_SLIST_FIRST((head)); \ + while (OFP_SLIST_NEXT(curelm, field) != (elm)) \ + curelm = OFP_SLIST_NEXT(curelm, field); \ + OFP_SLIST_REMOVE_AFTER(curelm, field); \ + } \ + OFP_TRASHIT(*oldnext); \ +} while (0) + +#define OFP_SLIST_REMOVE_AFTER(elm, field) do { \ + OFP_SLIST_NEXT(elm, field) = \ + OFP_SLIST_NEXT(OFP_SLIST_NEXT(elm, field), field); \ +} while (0) + +#define OFP_SLIST_REMOVE_HEAD(head, field) do { \ + OFP_SLIST_FIRST((head)) = OFP_SLIST_NEXT(OFP_SLIST_FIRST((head)), field); \ +} while (0) + +#define OFP_SLIST_SWAP(head1, head2, type) do { \ + struct type *swap_first = OFP_SLIST_FIRST(head1); \ + OFP_SLIST_FIRST(head1) = OFP_SLIST_FIRST(head2); \ + OFP_SLIST_FIRST(head2) = swap_first; \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define OFP_STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define OFP_STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define OFP_STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define OFP_STAILQ_CONCAT(head1, head2) do { \ + if (!OFP_STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + OFP_STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define OFP_STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define OFP_STAILQ_FIRST(head) ((head)->stqh_first) + +#define OFP_STAILQ_FOREACH(var, head, field) \ + for((var) = OFP_STAILQ_FIRST((head)); \ + (var); \ + (var) = OFP_STAILQ_NEXT((var), field)) + + +#define OFP_STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = OFP_STAILQ_FIRST((head)); \ + (var) && ((tvar) = OFP_STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define OFP_STAILQ_INIT(head) do { \ + OFP_STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &OFP_STAILQ_FIRST((head)); \ +} while (0) + +#define OFP_STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((OFP_STAILQ_NEXT((elm), field) = OFP_STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &OFP_STAILQ_NEXT((elm), field); \ + OFP_STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define OFP_STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((OFP_STAILQ_NEXT((elm), field) = OFP_STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &OFP_STAILQ_NEXT((elm), field); \ + OFP_STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define OFP_STAILQ_INSERT_TAIL(head, elm, field) do { \ + OFP_STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &OFP_STAILQ_NEXT((elm), field); \ +} while (0) + +#define OFP_STAILQ_LAST(head, type, field) \ + (OFP_STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *)(void *) \ + ((char *)((head)->stqh_last) - offsetof(struct type, field)))) + +#define OFP_STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define OFP_STAILQ_REMOVE(head, elm, type, field) do { \ + OFP_QMD_SAVELINK(oldnext, (elm)->field.stqe_next); \ + if (OFP_STAILQ_FIRST((head)) == (elm)) { \ + OFP_STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = OFP_STAILQ_FIRST((head)); \ + while (OFP_STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = OFP_STAILQ_NEXT(curelm, field); \ + OFP_STAILQ_REMOVE_AFTER(head, curelm, field); \ + } \ + OFP_TRASHIT(*oldnext); \ +} while (0) + +#define OFP_STAILQ_REMOVE_AFTER(head, elm, field) do { \ + if ((OFP_STAILQ_NEXT(elm, field) = \ + OFP_STAILQ_NEXT(OFP_STAILQ_NEXT(elm, field), field)) == NULL) \ + (head)->stqh_last = &OFP_STAILQ_NEXT((elm), field); \ +} while (0) + +#define OFP_STAILQ_REMOVE_HEAD(head, field) do { \ + if ((OFP_STAILQ_FIRST((head)) = \ + OFP_STAILQ_NEXT(OFP_STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &OFP_STAILQ_FIRST((head)); \ +} while (0) + +#define OFP_STAILQ_SWAP(head1, head2, type) do { \ + struct type *swap_first = OFP_STAILQ_FIRST(head1); \ + struct type **swap_last = (head1)->stqh_last; \ + OFP_STAILQ_FIRST(head1) = OFP_STAILQ_FIRST(head2); \ + (head1)->stqh_last = (head2)->stqh_last; \ + OFP_STAILQ_FIRST(head2) = swap_first; \ + (head2)->stqh_last = swap_last; \ + if (OFP_STAILQ_EMPTY(head1)) \ + (head1)->stqh_last = &OFP_STAILQ_FIRST(head1); \ + if (OFP_STAILQ_EMPTY(head2)) \ + (head2)->stqh_last = &OFP_STAILQ_FIRST(head2); \ +} while (0) + + +/* + * List declarations. + */ +#define OFP_LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define OFP_LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define OFP_LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define OFP_QMD_LIST_CHECK_HEAD(head, field) do { \ + if (OFP_LIST_FIRST((head)) != NULL && \ + OFP_LIST_FIRST((head))->field.le_prev != \ + &OFP_LIST_FIRST((head))) \ + panic("Bad list head %p first->prev != head", (head)); \ +} while (0) + +#define OFP_QMD_LIST_CHECK_NEXT(elm, field) do { \ + if (OFP_LIST_NEXT((elm), field) != NULL && \ + OFP_LIST_NEXT((elm), field)->field.le_prev != \ + &((elm)->field.le_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define OFP_QMD_LIST_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.le_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define OFP_QMD_LIST_CHECK_HEAD(head, field) +#define OFP_QMD_LIST_CHECK_NEXT(elm, field) +#define OFP_QMD_LIST_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define OFP_LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define OFP_LIST_FIRST(head) ((head)->lh_first) + +#define OFP_LIST_FOREACH(var, head, field) \ + for ((var) = OFP_LIST_FIRST((head)); \ + (var); \ + (var) = OFP_LIST_NEXT((var), field)) + +#define OFP_LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = OFP_LIST_FIRST((head)); \ + (var) && ((tvar) = OFP_LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define OFP_LIST_INIT(head) do { \ + OFP_LIST_FIRST((head)) = NULL; \ +} while (0) + +#define OFP_LIST_INSERT_AFTER(listelm, elm, field) do { \ + OFP_QMD_LIST_CHECK_NEXT(listelm, field); \ + if ((OFP_LIST_NEXT((elm), field) = OFP_LIST_NEXT((listelm), field)) != NULL)\ + OFP_LIST_NEXT((listelm), field)->field.le_prev = \ + &OFP_LIST_NEXT((elm), field); \ + OFP_LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &OFP_LIST_NEXT((listelm), field); \ +} while (0) + +#define OFP_LIST_INSERT_BEFORE(listelm, elm, field) do { \ + OFP_QMD_LIST_CHECK_PREV(listelm, field); \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + OFP_LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &OFP_LIST_NEXT((elm), field); \ +} while (0) + +#define OFP_LIST_INSERT_HEAD(head, elm, field) do { \ + OFP_QMD_LIST_CHECK_HEAD((head), field); \ + if ((OFP_LIST_NEXT((elm), field) = OFP_LIST_FIRST((head))) != NULL) \ + OFP_LIST_FIRST((head))->field.le_prev = &OFP_LIST_NEXT((elm), field);\ + OFP_LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &OFP_LIST_FIRST((head)); \ +} while (0) + +#define OFP_LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define OFP_LIST_REMOVE(elm, field) do { \ + OFP_QMD_SAVELINK(oldnext, (elm)->field.le_next); \ + OFP_QMD_SAVELINK(oldprev, (elm)->field.le_prev); \ + OFP_QMD_LIST_CHECK_NEXT(elm, field); \ + OFP_QMD_LIST_CHECK_PREV(elm, field); \ + if (OFP_LIST_NEXT((elm), field) != NULL) \ + OFP_LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = OFP_LIST_NEXT((elm), field); \ + OFP_TRASHIT(*oldnext); \ + OFP_TRASHIT(*oldprev); \ +} while (0) + +#define OFP_LIST_SWAP(head1, head2, type, field) do { \ + struct type *swap_tmp = OFP_LIST_FIRST((head1)); \ + OFP_LIST_FIRST((head1)) = OFP_LIST_FIRST((head2)); \ + OFP_LIST_FIRST((head2)) = swap_tmp; \ + if ((swap_tmp = OFP_LIST_FIRST((head1))) != NULL) \ + swap_tmp->field.le_prev = &OFP_LIST_FIRST((head1)); \ + if ((swap_tmp = OFP_LIST_FIRST((head2))) != NULL) \ + swap_tmp->field.le_prev = &OFP_LIST_FIRST((head2)); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define OFP_TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + OFP_TRACEBUF \ +} + +#define OFP_TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define OFP_TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + OFP_TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define OFP_QMD_TAILQ_CHECK_HEAD(head, field) do { \ + if (!OFP_TAILQ_EMPTY(head) && \ + OFP_TAILQ_FIRST((head))->field.tqe_prev != \ + &OFP_TAILQ_FIRST((head))) \ + panic("Bad tailq head %p first->prev != head", (head)); \ +} while (0) + +#define OFP_QMD_TAILQ_CHECK_TAIL(head, field) do { \ + if (*(head)->tqh_last != NULL) \ + panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ +} while (0) + +#define OFP_QMD_TAILQ_CHECK_NEXT(elm, field) do { \ + if (OFP_TAILQ_NEXT((elm), field) != NULL && \ + OFP_TAILQ_NEXT((elm), field)->field.tqe_prev != \ + &((elm)->field.tqe_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define OFP_QMD_TAILQ_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.tqe_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define OFP_QMD_TAILQ_CHECK_HEAD(head, field) +#define OFP_QMD_TAILQ_CHECK_TAIL(head, headname) +#define OFP_QMD_TAILQ_CHECK_NEXT(elm, field) +#define OFP_QMD_TAILQ_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define OFP_TAILQ_CONCAT(head1, head2, field) do { \ + if (!OFP_TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + OFP_TAILQ_INIT((head2)); \ + OFP_QMD_TRACE_HEAD(head1); \ + OFP_QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define OFP_TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define OFP_TAILQ_FIRST(head) ((head)->tqh_first) + +#define OFP_TAILQ_FOREACH(var, head, field) \ + for ((var) = OFP_TAILQ_FIRST((head)); \ + (var); \ + (var) = OFP_TAILQ_NEXT((var), field)) + +#define OFP_TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = OFP_TAILQ_FIRST((head)); \ + (var) && ((tvar) = OFP_TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define OFP_TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = OFP_TAILQ_LAST((head), headname); \ + (var); \ + (var) = OFP_TAILQ_PREV((var), headname, field)) + +#define OFP_TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = OFP_TAILQ_LAST((head), headname); \ + (var) && ((tvar) = OFP_TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define OFP_TAILQ_INIT(head) do { \ + OFP_TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &OFP_TAILQ_FIRST((head)); \ + OFP_QMD_TRACE_HEAD(head); \ +} while (0) + +#define OFP_TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + OFP_QMD_TAILQ_CHECK_NEXT(listelm, field); \ + if ((OFP_TAILQ_NEXT((elm), field) = OFP_TAILQ_NEXT((listelm), field)) != NULL)\ + OFP_TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &OFP_TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &OFP_TAILQ_NEXT((elm), field); \ + OFP_QMD_TRACE_HEAD(head); \ + } \ + OFP_TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &OFP_TAILQ_NEXT((listelm), field); \ + OFP_QMD_TRACE_ELEM(&(elm)->field); \ + OFP_QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define OFP_TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + OFP_QMD_TAILQ_CHECK_PREV(listelm, field); \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + OFP_TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &OFP_TAILQ_NEXT((elm), field); \ + OFP_QMD_TRACE_ELEM(&(elm)->field); \ + OFP_QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define OFP_TAILQ_INSERT_HEAD(head, elm, field) do { \ + OFP_QMD_TAILQ_CHECK_HEAD(head, field); \ + if ((OFP_TAILQ_NEXT((elm), field) = OFP_TAILQ_FIRST((head))) != NULL) \ + OFP_TAILQ_FIRST((head))->field.tqe_prev = \ + &OFP_TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &OFP_TAILQ_NEXT((elm), field); \ + OFP_TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &OFP_TAILQ_FIRST((head)); \ + OFP_QMD_TRACE_HEAD(head); \ + OFP_QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define OFP_TAILQ_INSERT_TAIL(head, elm, field) do { \ + OFP_QMD_TAILQ_CHECK_TAIL(head, field); \ + OFP_TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &OFP_TAILQ_NEXT((elm), field); \ + OFP_QMD_TRACE_HEAD(head); \ + OFP_QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define OFP_TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define OFP_TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define OFP_TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define OFP_TAILQ_REMOVE(head, elm, field) do { \ + OFP_QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \ + OFP_QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \ + OFP_QMD_TAILQ_CHECK_NEXT(elm, field); \ + OFP_QMD_TAILQ_CHECK_PREV(elm, field); \ + if ((OFP_TAILQ_NEXT((elm), field)) != NULL) \ + OFP_TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + OFP_QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = OFP_TAILQ_NEXT((elm), field); \ + OFP_TRASHIT(*oldnext); \ + OFP_TRASHIT(*oldprev); \ + OFP_QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define OFP_TAILQ_SWAP(head1, head2, type, field) do { \ + struct type *swap_first = (head1)->tqh_first; \ + struct type **swap_last = (head1)->tqh_last; \ + (head1)->tqh_first = (head2)->tqh_first; \ + (head1)->tqh_last = (head2)->tqh_last; \ + (head2)->tqh_first = swap_first; \ + (head2)->tqh_last = swap_last; \ + if ((swap_first = (head1)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head1)->tqh_first; \ + else \ + (head1)->tqh_last = &(head1)->tqh_first; \ + if ((swap_first = (head2)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head2)->tqh_first; \ + else \ + (head2)->tqh_last = &(head2)->tqh_first; \ +} while (0) + +#endif /* __OFP_QUEUE_H__ */ diff --git a/include/api/ofp_route_arp.h b/include/api/ofp_route_arp.h new file mode 100644 index 00000000..6ca08705 --- /dev/null +++ b/include/api/ofp_route_arp.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_ROUTE_ARP_H__ +#define __OFP_ROUTE_ARP_H__ + +/* ROUTE: ADD/DEL*/ + +struct ofp_route_msg { + uint32_t type; +#define OFP_ROUTE_ADD 1 +#define OFP_ROUTE_DEL 2 +#define OFP_MOBILE_ROUTE_ADD 3 +#define OFP_MOBILE_ROUTE_DEL 4 +#define OFP_LOCAL_INTERFACE_ADD 5 +#define OFP_LOCAL_INTERFACE_DEL 6 +#define OFP_ROUTE6_ADD 7 +#define OFP_ROUTE6_DEL 8 + uint16_t vrf; + uint32_t dst; + uint32_t masklen; + uint32_t gw; + uint32_t port; + uint16_t vlan; + uint8_t dst6[16]; + uint8_t gw6[16]; +}; + +#define SET_ROUTE(_type, _vrf, _dst, _mlen, _gw, _port, _vlan) do { \ + struct ofp_route_msg msg; \ + msg.type = _type; \ + msg.vrf = _vrf; \ + msg.dst = _dst; \ + msg.masklen = _mlen; \ + msg.gw = _gw; \ + msg.port = _port; \ + msg.vlan = _vlan; \ + ofp_set_route(&msg); \ + } while (0) + +#define SET_ROUTE6(_type, _dst6, _prefix, _gw6, _port, _vlan) do { \ + struct ofp_route_msg msg; \ + memset(&msg, 0, sizeof(msg)); \ + msg.type = _type; \ + msg.vrf = 0; \ + memcpy(msg.dst6, _dst6, 16); \ + msg.masklen = _prefix; \ + memcpy(msg.gw6, _gw6, 16); \ + msg.port = _port; \ + msg.vlan = _vlan; \ + ofp_set_route(&msg); \ + } while (0) + +int32_t ofp_set_route(struct ofp_route_msg *msg); + +/* ROUTE: SHOW */ + +#define OFP_SHOW_ARP 0 +#define OFP_SHOW_ROUTES 1 +void ofp_show_routes(int fd, int what); + +/* ROUTE operations */ +struct ofp_nh_entry *ofp_get_next_hop(uint16_t vrf, + uint32_t addr, uint32_t *flags); +struct ofp_nh6_entry *ofp_get_next_hop6(uint16_t vrf, + uint8_t *addr, uint32_t *flags); + +uint16_t ofp_get_probable_vlan(int port, uint32_t addr); + +/* ARP */ +struct ofp_ifnet; +int ofp_add_mac(struct ofp_ifnet *dev, uint32_t addr, uint8_t *mac); +int ofp_get_mac(struct ofp_ifnet *dev, uint32_t addr, uint8_t *mac_out); +int ofp_del_mac(struct ofp_ifnet *dev, uint32_t addr, uint8_t *mac); +void ofp_add_mac6(struct ofp_ifnet *dev, uint8_t *addr, uint8_t *mac); + +#endif /* __OFP_ROUTE_ARP_H__ */ diff --git a/include/api/ofp_socket.h b/include/api/ofp_socket.h new file mode 100644 index 00000000..f53fed2c --- /dev/null +++ b/include/api/ofp_socket.h @@ -0,0 +1,492 @@ +/*- + * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)socket.h 8.4 (Berkeley) 2/21/94 + * $FreeBSD: release/9.1.0/sys/sys/socket.h 232805 2012-03-11 00:48:54Z kib $ + */ + +#ifndef __OFP_SOCKET_H__ +#define __OFP_SOCKET_H__ + +#include "odp/std_types.h" +#include "ofp_socket_types.h" + +/* + * Definitions related to sockets: types, address families, options. + */ + +/* + * Data types. + */ +#ifndef OFP__GID_T_DECLARED +typedef __ofp_gid_t ofp_gid_t; +#define OFP__GID_T_DECLARED +#endif /* OFP__GID_T_DECLARED */ + +#ifndef OFP__OFF_T_DECLARED +typedef __ofp_off_t ofp_off_t; +#define OFP__OFF_T_DECLARED +#endif /* OFP__OFF_T_DECLARED */ + +#ifndef OFP__PID_T_DECLARED +typedef __ofp_pid_t ofp_pid_t; +#define OFP__PID_T_DECLARED +#endif /* OFP__PID_T_DECLARED */ + +#ifndef OFP__SA_FAMILY_T_DECLARED +typedef __ofp_sa_family_t ofp_sa_family_t; +#define OFP__SA_FAMILY_T_DECLARED +#endif /* OFP__SA_FAMILY_T_DECLARED */ + +#ifndef OFP__SOCKLEN_T_DECLARED +typedef __ofp_socklen_t ofp_socklen_t; +#define OFP__SOCKLEN_T_DECLARED +#endif /* OFP__SOCKLEN_T_DECLARED */ + +#ifndef OFP__SSIZE_T_DECLARED +typedef __ofp_ssize_t ofp_ssize_t; +#define OFP__SSIZE_T_DECLARED +#endif /* OFP__SSIZE_T_DECLARED */ + +#ifndef OFP__UID_T_DECLARED +typedef __ofp_uid_t ofp_uid_t; +#define OFP__UID_T_DECLARED +#endif /*OFP__UID_T_DECLARED*/ + +/* + * Types + */ +#define OFP_SOCK_STREAM 1 /* stream socket */ +#define OFP_SOCK_DGRAM 2 /* datagram socket */ +#define OFP_SOCK_RAW 3 /* raw-protocol interface */ +#define OFP_SOCK_RDM 4 /* reliably-delivered message */ +#define OFP_SOCK_SEQPACKET 5 /* sequenced packet stream */ + +/* + * Option flags per-socket, kept in so_options. + */ +#define OFP_SO_DEBUG 0x00000001 /* turn on debugging info recording */ +#define OFP_SO_ACCEPTCONN 0x00000002 /* socket has had listen() */ +#define OFP_SO_REUSEADDR 0x00000004 /* allow local address reuse */ +#define OFP_SO_KEEPALIVE 0x00000008 /* keep connections alive */ +#define OFP_SO_DONTROUTE 0x00000010 /* just use interface addresses */ +#define OFP_SO_BROADCAST 0x00000020 /* permit sending of broadcast msgs */ +#define OFP_SO_USELOOPBACK 0x00000040 /* bypass hardware when possible */ +#define OFP_SO_LINGER 0x00000080 /* linger on close if data present */ +#define OFP_SO_OOBINLINE 0x00000100 /* leave received OOB data in line */ +#define OFP_SO_REUSEPORT 0x00000200 /* allow local address & port reuse */ +#define OFP_SO_TIMESTAMP 0x00000400 /* timestamp received dgram traffic */ +#define OFP_SO_NOSIGPIPE 0x00000800 /* no SIGPIPE from OFP_EPIPE */ +#define OFP_SO_ACCEPTFILTER 0x00001000 /* there is an accept filter */ +#define OFP_SO_BINTIME 0x00002000 /* timestamp received dgram traffic */ +#define OFP_SO_NO_OFFLOAD 0x00004000 /* socket cannot be offloaded */ +#define OFP_SO_NO_DDP 0x00008000 /* disable direct data placement */ +#define OFP_SO_PROMISC 0x00010000 /* socket will be used for promiscuous listen */ +#define OFP_SO_PASSIVE 0x00020000 /* socket will be used for passive reassembly */ +#define OFP_SO_PASSIVECLNT 0x00040000 /* client socket in the passive pair */ +#define OFP_SO_ALTFIB 0x00080000 /* alternate FIB is set */ + +/* + * Additional options, not kept in so_options. + */ +#define OFP_SO_SNDBUF 0x1001 /* send buffer size */ +#define OFP_SO_RCVBUF 0x1002 /* receive buffer size */ +#define OFP_SO_SNDLOWAT 0x1003 /* send low-water mark */ +#define OFP_SO_RCVLOWAT 0x1004 /* receive low-water mark */ +#define OFP_SO_SNDTIMEO 0x1005 /* send timeout */ +#define OFP_SO_RCVTIMEO 0x1006 /* receive timeout */ +#define OFP_SO_ERROR 0x1007 /* get error status and clear */ +#define OFP_SO_TYPE 0x1008 /* get socket type */ +#define OFP_SO_LABEL 0x1009 /* socket's MAC label */ +#define OFP_SO_PEERLABEL 0x1010 /* socket's peer's MAC label */ +#define OFP_SO_LISTENQLIMIT 0x1011 /* socket's backlog limit */ +#define OFP_SO_LISTENQLEN 0x1012 /* socket's complete queue length */ +#define OFP_SO_LISTENINCQLEN 0x1013 /* socket's incomplete queue length */ +#define OFP_SO_SETFIB 0x1014 /* use this FIB to route */ +#define OFP_SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */ +#define OFP_SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */ +#define OFP_SO_PROTOTYPE OFP_SO_PROTOCOL /* alias for OFP_SO_PROTOCOL (SunOS name) */ +#define OFP_SO_L2INFO 0x1017 /* PROMISCUOUS_INET MAC addrs and tags */ + +/* + * Structure used for manipulating linger option. + */ +struct ofp_linger { + int l_onoff; /* option on/off */ + int l_linger; /* linger time */ +}; + +struct accept_filter_arg { + char af_name[16]; + char af_arg[256-16]; +}; + +/* + * Level number for (get/set)sockopt() to apply to socket itself. + */ +#define OFP_SOL_SOCKET 0xffff /* options for socket level */ + +/* + * Address families. + */ +#define OFP_AF_UNSPEC 0 /* unspecified */ +#define OFP_AF_UNIX 1 /* standardized name for OFP_AF_LOCAL */ +#define OFP_AF_INET 2 /* internetwork: UDP, TCP, etc. */ +#define OFP_AF_INET6 3 /* IPv6 */ +#define OFP_AF_MAX 4 + +/* + * Structure used by kernel to store most + * addresses. + */ +struct ofp_sockaddr { + unsigned char sa_len; /* total length */ + ofp_sa_family_t sa_family; /* address family */ + char sa_data[14]; /* actually longer; address value */ +}; + +#define OFP_SOCK_MAXADDRLEN 255 /* longest possible addresses */ + +/* + * Structure used by kernel to pass protocol + * information in raw sockets. + */ +struct sockproto { + unsigned short sp_family; /* address family */ + unsigned short sp_protocol; /* protocol */ +}; + +/* + * Protocol families, same as address families for now. + */ +#define OFP_PF_UNSPEC OFP_AF_UNSPEC +#define OFP_PF_UNIX OFP_PF_LOCAL /* backward compatibility */ +#define OFP_PF_INET OFP_AF_INET +#define OFP_PF_INET6 OFP_AF_INET6 +#define OFP_PF_MAX OFP_AF_MAX + +/* + * OFP_PF_ROUTE - Routing table + * + * Three additional levels are defined: + * Fourth: address family, 0 is wildcard + * Fifth: type of info, defined below + * Sixth: flag(s) to mask with for NET_RT_FLAGS + */ +#define NET_RT_DUMP 1 /* dump; may limit to a.f. */ +#define NET_RT_FLAGS 2 /* by flags, e.g. RESOLVING */ +#define NET_RT_IFLIST 3 /* survey interface list */ +#define NET_RT_IFMALIST 4 /* return multicast address list */ +#define NET_RT_IFLISTL 5 /* Survey interface list, using 'l'en + * versions of msghdr structs. */ +#define NET_RT_MAXID 6 + + +/* + * Maximum queue length specifiable by listen. + */ +#define SOMAXCONN 128 + +/* + * Message header for recvmsg and sendmsg calls. + * Used value-result for recvmsg, value only for sendmsg. + */ +struct ofp_msghdr { + void *msg_name; /* optional address */ + ofp_socklen_t msg_namelen; /* size of address */ + struct ofp_iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + void *msg_control; /* ancillary data, see below */ + ofp_socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; + +#define OFP_MSG_OOB 0x1 /* process out-of-band data */ +#define OFP_MSG_PEEK 0x2 /* peek at incoming message */ +#define OFP_MSG_DONTROUTE 0x4 /* send without using routing tables */ +#define OFP_MSG_EOR 0x8 /* data completes record */ +#define OFP_MSG_TRUNC 0x10 /* data discarded before delivery */ +#define OFP_MSG_CTRUNC 0x20 /* control data lost before delivery */ +#define OFP_MSG_WAITALL 0x40 /* wait for full request or error */ +#define OFP_MSG_NOTIFICATION 0x2000 /* SCTP notification */ +#define OFP_MSG_DONTWAIT 0x80 /* this message should be nonblocking */ +#define OFP_MSG_EOF 0x100 /* data completes connection */ +#define OFP_MSG_NBIO 0x4000 /* FIONBIO mode, used by fifofs */ +#define OFP_MSG_COMPAT 0x8000 /* used in sendit() */ +#define OFP_MSG_SOCALLBCK 0x10000 /* for use by socket callbacks - ofp_soreceive (TCP) */ +#define OFP_MSG_NOSIGNAL 0x20000 /* do not generate SIGPIPE on EOF */ +#define OFP_MSG_HOLE_BREAK 0x40000 /* stop at and indicate hole boundary */ + +/* + * Header for ancillary data objects in msg_control buffer. + * Used for additional information with/about a datagram + * not expressible by flags. The format is a sequence + * of message elements headed by cmsghdr structures. + */ +struct ofp_cmsghdr { + ofp_socklen_t cmsg_len; /* data byte count, including hdr */ + int cmsg_level; /* originating protocol */ + int cmsg_type; /* protocol-specific type */ +/* followed by uint8_t cmsg_data[]; */ +}; + +/* + * While we may have more groups than this, the cmsgcred struct must + * be able to fit in an mbuf and we have historically supported a + * maximum of 16 groups. +*/ +#define CMGROUP_MAX 16 + +/* + * Credentials structure, used to verify the identity of a peer + * process that has sent us a message. This is allocated by the + * peer process but filled in by the kernel. This prevents the + * peer from lying about its identity. (Note that cmcred_groups[0] + * is the effective GID.) + */ +struct ofp_cmsgcred { + ofp_pid_t cmcred_pid; /* PID of sending process */ + ofp_uid_t cmcred_uid; /* real UID of sending process */ + ofp_uid_t cmcred_euid; /* effective UID of sending process */ + ofp_gid_t cmcred_gid; /* real GID of sending process */ + short cmcred_ngroups; /* number or groups */ + ofp_gid_t cmcred_groups[CMGROUP_MAX]; /* groups */ +}; + +/* + * Socket credentials. + */ +struct ofp_sockcred { + ofp_uid_t sc_uid; /* real user id */ + ofp_uid_t sc_euid; /* effective user id */ + ofp_gid_t sc_gid; /* real group id */ + ofp_gid_t sc_egid; /* effective group id */ + int sc_ngroups; /* number of supplemental groups */ + ofp_gid_t sc_groups[1]; /* variable length */ +}; + +/* + * Compute size of a sockcred structure with groups. + */ +#define OFP_SOCKCREDSIZE(ngrps) \ + (sizeof(struct ofp_sockcred) + (sizeof(ofp_gid_t) * ((ngrps) - 1))) + +/* given pointer to struct ofp_cmsghdr, return pointer to data */ +#define OFP_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ + _ALIGN(sizeof(struct ofp_cmsghdr))) + +/* given pointer to struct ofp_cmsghdr, return pointer to next cmsghdr */ +#define OFP_CMSG_NXTHDR(mhdr, cmsg) \ + ((char *)(cmsg) == NULL ? OFP_CMSG_FIRSTHDR(mhdr) : \ + ((char *)(cmsg) + _ALIGN(((struct ofp_cmsghdr *)(cmsg))->cmsg_len) + \ + _ALIGN(sizeof(struct ofp_cmsghdr)) > \ + (char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \ + (struct ofp_cmsghdr *)0 : \ + (struct ofp_cmsghdr *)(void *)((char *)(cmsg) + \ + _ALIGN(((struct ofp_cmsghdr *)(cmsg))->cmsg_len))) + +/* + * RFC 2292 requires to check msg_controllen, in case that the kernel returns + * an empty list for some reasons. + */ +#define OFP_CMSG_FIRSTHDR(mhdr) \ + ((mhdr)->msg_controllen >= sizeof(struct ofp_cmsghdr) ? \ + (struct ofp_cmsghdr *)(mhdr)->msg_control : \ + (struct ofp_cmsghdr *)NULL) + +/* HJo: NOTE! Architecture specific! */ +#define _ALIGNBYTES (sizeof(register_t) - 1) +#define _ALIGN(p) (((uintptr_t)(p) + _ALIGNBYTES) & ~_ALIGNBYTES) + +/* RFC 2292 additions */ +#define OFP_CMSG_SPACE(l) (_ALIGN(sizeof(struct ofp_cmsghdr)) + _ALIGN(l)) +#define OFP_CMSG_LEN(l) (_ALIGN(sizeof(struct ofp_cmsghdr)) + (l)) + +#define OFP_CMSG_ALIGN(n) _ALIGN(n) + +/* "Socket"-level control message types: */ +#define OFP_SCM_RIGHTS 0x01 /* access rights (array of int) */ +#define OFP_SCM_TIMESTAMP 0x02 /* timestamp (struct timeval) */ +#define OFP_SCM_CREDS 0x03 /* process creds (struct cmsgcred) */ +#define OFP_SCM_BINTIME 0x04 /* timestamp (struct bintime) */ + +/* + * 4.3 compat sockaddr, move to compat file later + */ +struct ofp_osockaddr { + unsigned short sa_family; /* address family */ + char sa_data[14]; /* up to 14 bytes of direct address */ +}; + +/* + * 4.3-compat message header (move to compat file later). + */ +struct ofp_omsghdr { + char *msg_name; /* optional address */ + int msg_namelen; /* size of address */ + struct ofp_iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + char *msg_accrights; /* access rights sent/received */ + int msg_accrightslen; +}; + +/* + * howto arguments for shutdown(2), specified by Posix.1g. + */ +#define OFP_SHUT_RD 0 /* shut down the reading side */ +#define OFP_SHUT_WR 1 /* shut down the writing side */ +#define OFP_SHUT_RDWR 2 /* shut down both sides */ + +/* we cheat and use the OFP_SHUT_XX defines for these */ +#define OFP_PRU_FLUSH_RD OFP_SHUT_RD +#define OFP_PRU_FLUSH_WR OFP_SHUT_WR +#define OFP_PRU_FLUSH_RDWR OFP_SHUT_RDWR + + +/* + * sendfile(2) header/trailer struct + */ +struct ofp_sf_hdtr { + struct ofp_iovec *headers; /* pointer to an array of header struct iovec's */ + int hdr_cnt; /* number of header ofp_iovec's */ + struct ofp_iovec *trailers; /* pointer to an array of trailer struct iovec's */ + int trl_cnt; /* number of trailer ofp_iovec's */ +}; + +/* + * Sendfile-specific flag(s) + */ +#define OFP_SF_NODISKIO 0x00000001 +#define OFP_SF_MNOWAIT 0x00000002 +#define OFP_SF_SYNC 0x00000004 + +/* Events */ +#define OFP_EVENT_INVALID 0 +#define OFP_EVENT_ACCEPT 1 +#define OFP_EVENT_RECV 2 + +struct ofp_sock_sigval { + int sockfd; + int sockfd2; + int event; + odp_packet_t pkt; +}; + +union ofp_sigval { /* Data passed with notification */ + int sival_int; /* Integer value */ + void *sival_ptr; /* Pointer value */ +}; + +#define OFP_SIGEV_NONE 0 +#define OFP_SIGEV_HOOK 1 +#define OFP_SIGEV_SIGNAL 2 +#define OFP_SIGEV_THREAD 3 + +struct ofp_sigevent { + int ofp_sigev_notify; /* Notification method */ + int ofp_sigev_signo; /* Notification signal */ + union ofp_sigval ofp_sigev_value; /* Data passed with + notification */ + void (*ofp_sigev_notify_function) (union ofp_sigval); + /* Function used for thread + notification (SIGEV_THREAD) */ + void *ofp_sigev_notify_attr; + /* Attributes for notification thread + (SIGEV_THREAD) */ + ofp_pid_t ofp_sigev_notify_thread_id; + /* ID of thread to signal (SIGEV_THREAD_ID) */ +}; + +struct ofp_timeval { + uint32_t tv_sec; /* seconds */ + uint32_t tv_usec; /* microseconds */ +}; + +struct selinfo; +struct ofp_fdset; +#define OFP_FD_SET_MEM_SIZE sizeof(void*) + +typedef struct { + uint8_t fd_set_buf[OFP_FD_SET_MEM_SIZE]; +} ofp_fd_set; + +void OFP_FD_CLR(int fd, ofp_fd_set *set); +int OFP_FD_ISSET(int fd, ofp_fd_set *set); +void OFP_FD_SET(int fd, ofp_fd_set *set); +void OFP_FD_ZERO(ofp_fd_set *set); + +int ofp_select(int nfds, ofp_fd_set *readfds, ofp_fd_set *writefds, + ofp_fd_set *exceptfds, struct ofp_timeval *timeout); + +int ofp_socket(int, int, int); +int ofp_accept(int, struct ofp_sockaddr *, ofp_socklen_t *); +int ofp_bind(int, const struct ofp_sockaddr *, ofp_socklen_t); +int ofp_connect(int, const struct ofp_sockaddr *, ofp_socklen_t); +int ofp_listen(int, int); +int ofp_shutdown(int, int); +int ofp_close(int); + +ofp_ssize_t ofp_recv(int, void *, size_t, int); +ofp_ssize_t ofp_recvfrom(int, void *, size_t, int, + struct ofp_sockaddr * __restrict, ofp_socklen_t * __restrict); + +ofp_ssize_t ofp_send(int, const void *, size_t, int); +ofp_ssize_t ofp_sendto(int, const void *, + size_t, int, const struct ofp_sockaddr *, ofp_socklen_t); + +int ofp_setsockopt(int, int, int, const void *, ofp_socklen_t); +int ofp_getsockopt(int, int, int, void *, ofp_socklen_t *); + +int ofp_ioctl(int, int, ...); + +int ofp_socket_sigevent(struct ofp_sigevent *); +void *ofp_udp_packet_parse(odp_packet_t, int *, + struct ofp_sockaddr *, + ofp_socklen_t *); +ofp_ssize_t ofp_udp_pkt_sendto(int, odp_packet_t, + const struct ofp_sockaddr *, ofp_socklen_t); + +#if 0 /* Not implemented */ +int ofp_getpeername(int, struct ofp_sockaddr * __restrict, ofp_socklen_t * __restrict); +int ofp_getsockname(int, struct ofp_sockaddr * __restrict, ofp_socklen_t * __restrict); + +ofp_ssize_t ofp_recvmsg(int, struct ofp_msghdr *, int); +ofp_ssize_t ofp_sendmsg(int, const struct ofp_msghdr *, int); +int ofp_sendfile(int, int, ofp_off_t, size_t, struct ofp_sf_hdtr *, + ofp_off_t *, int); + +int ofp_setfib(int); +int ofp_sockatmark(int); +int ofp_socketpair(int, int, int, int *); +#endif + +struct ofp_socket; + +#endif /* __OFP_SOCKET_H__ */ diff --git a/include/api/ofp_socket_types.h b/include/api/ofp_socket_types.h new file mode 100644 index 00000000..bb0c330f --- /dev/null +++ b/include/api/ofp_socket_types.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_SOCKET_TYPES_H__ +#define __OFP_SOCKET_TYPES_H__ + +#include "odp/std_types.h" + +typedef uint8_t __ofp_sa_family_t; +typedef uint32_t __ofp_socklen_t; +typedef long __ofp_suseconds_t; /* microseconds (signed) */ +typedef unsigned int __ofp_useconds_t; /* microseconds (unsigned) */ +typedef int __ofp_cpuwhich_t; /* which parameter for cpuset.*/ +typedef int __ofp_cpulevel_t; /* level parameter for cpuset.*/ +typedef int __ofp_cpusetid_t; /* cpuset identifier. */ +typedef uint32_t __ofp_gid_t; +typedef uint32_t __ofp_pid_t; +typedef uint32_t __ofp_uid_t; +typedef int64_t __ofp_ssize_t; +typedef int64_t __ofp_off_t; + +#endif /* __OFP_SOCKET_TYPES_H__ */ + diff --git a/include/api/ofp_stat.h b/include/api/ofp_stat.h new file mode 100644 index 00000000..98c6f644 --- /dev/null +++ b/include/api/ofp_stat.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_STAT_H__ +#define __OFP_STAT_H__ + +#include + +struct ofp_packet_stat { + struct { + int rx_fp; + int tx_fp; + int rx_sp; + int tx_sp; + int tx_eth_frag; + int rx_ip_frag; + int rx_ip_reass; + uint64_t input_latency[64]; + uint64_t last_input_cycles; + } per_core[ODP_CONFIG_MAX_THREADS]; +}; + +/* Stats: Get stats */ +struct ofp_packet_stat *ofp_get_packet_statistics(void); + +/* Stats: configure*/ +#define OFP_STAT_COMPUTE_LATENCY 1 + +void ofp_set_stat_flags(unsigned long int flags); +unsigned long int ofp_get_stat_flags(void); + +#endif /* __OFP_STAT_H__ */ diff --git a/include/api/ofp_sysctl.h b/include/api/ofp_sysctl.h new file mode 100644 index 00000000..a2006376 --- /dev/null +++ b/include/api/ofp_sysctl.h @@ -0,0 +1,456 @@ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2015, Nokia Solutions and Networks + * Copyright (c) 2015, ENEA Software AB + * + * This code is derived from software contributed to Berkeley by + * Mike Karels at Berkeley Software Design, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _OFP_SYSCTL_H_ +#define _OFP_SYSCTL_H_ + +#include "ofp_queue.h" + +/* + * Top-level identifiers + */ +#define OFP_CTL_UNSPEC 0 /* unused */ +#define OFP_CTL_NET 4 /* network, see socket.h */ +#define OFP_CTL_DEBUG 5 /* number of valid top-level ids */ +#define OFP_CTL_MAXID 6 /* number of valid top-level ids */ + +/* + * Helper definitions + */ +#define __CONCAT(x,y) x ## y +#define SET_DECLARE(set, ptype) \ + extern ptype *__CONCAT(__start_set_,set); \ + extern ptype *__CONCAT(__stop_set_,set) +#define SET_BEGIN(set) \ + (&__CONCAT(__start_set_,set)) +#define SET_LIMIT(set) \ + (&__CONCAT(__stop_set_,set)) +#define SET_FOREACH(pvar, set) \ + for (pvar = SET_BEGIN(set); pvar < SET_LIMIT(set); pvar++) +#define SET_ITEM(set, i) \ + ((SET_BEGIN(set))[i]) +#define SET_COUNT(set) \ + (SET_LIMIT(set) - SET_BEGIN(set)) + +#define __GLOBL1(sym) __asm__(".globl " #sym) +#define __GLOBL(sym) __GLOBL1(sym) +#define __MAKE_SET(set, sym) \ + __GLOBL(__CONCAT(__start_set_,set)); \ + __GLOBL(__CONCAT(__stop_set_,set)); \ + static void const * const __set_##set##_sym_##sym \ + __attribute__ ((section ("set_" #set))) \ + __attribute__ ((used)) = &sym + + +#define TEXT_SET(set, sym) __MAKE_SET(set, sym) +#define DATA_SET(set, sym) __MAKE_SET(set, sym) +#define BSS_SET(set, sym) __MAKE_SET(set, sym) +#define ABS_SET(set, sym) __MAKE_SET(set, sym) +#define SET_ENTRY(set, sym) __MAKE_SET(set, sym) + +#define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ + static struct sysinit uniquifier ## _sys_init = { \ + subsystem, \ + order, \ + func, \ + (ident) \ + }; \ + DATA_SET(sysinit_set,uniquifier ## _sys_init) +#define SYSINIT(uniquifier, subsystem, order, func, ident) \ + C_SYSINIT(uniquifier, subsystem, order, \ + (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) +/******/ + +struct thread; +/* + * Definitions for sysctl call. The sysctl call uses a hierarchical name + * for objects that can be examined or modified. The name is expressed as + * a sequence of integers. Like a file path name, the meaning of each + * component depends on its place in the hierarchy. The top-level and kern + * identifiers are defined here, and other identifiers are defined in the + * respective subsystem header files. + */ + +#define OFP_CTL_MAXNAME 24 /* largest number of components supported */ + +/* + * Each subsystem defined by sysctl defines a list of variables + * for that subsystem. Each name is either a node with further + * levels defined below it, or it is a leaf of some particular + * type given below. Each sysctl level defines a set of name/type + * pairs to be used by sysctl(8) in manipulating the subsystem. + */ +#define OFP_CTLTYPE 0xf /* Mask for the type */ +#define OFP_CTLTYPE_NODE 1 /* name is a node */ +#define OFP_CTLTYPE_INT 2 /* name describes an integer */ +#define OFP_CTLTYPE_STRING 3 /* name describes a string */ +#define OFP_CTLTYPE_S64 4 /* name describes a signed 64-bit number */ +#define OFP_CTLTYPE_OPAQUE 5 /* name describes a structure */ +#define OFP_CTLTYPE_STRUCT OFP_CTLTYPE_OPAQUE /* name describes a structure */ +#define OFP_CTLTYPE_UINT 6 /* name describes an unsigned integer */ +#define OFP_CTLTYPE_LONG 7 /* name describes a long */ +#define OFP_CTLTYPE_ULONG 8 /* name describes an unsigned long */ +#define OFP_CTLTYPE_U64 9 /* name describes an unsigned 64-bit number */ + +#define OFP_CTLFLAG_RD 0x80000000 /* Allow reads of variable */ +#define OFP_CTLFLAG_WR 0x40000000 /* Allow writes to the variable */ +#define OFP_CTLFLAG_RW (OFP_CTLFLAG_RD|OFP_CTLFLAG_WR) +#define OFP_CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */ +#define OFP_CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */ +#define OFP_CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */ +#define OFP_CTLFLAG_DYN 0x02000000 /* Dynamic oid - can be freed */ +#define OFP_CTLFLAG_SKIP 0x01000000 /* Skip this sysctl when listing */ +#define OFP_CTLMASK_SECURE 0x00F00000 /* Secure level */ +#define OFP_CTLFLAG_TUN 0x00080000 /* Tunable variable */ +#define OFP_CTLFLAG_MPSAFE 0x00040000 /* Handler is MP safe */ +#define OFP_CTLFLAG_VNET 0x00020000 /* Prisons with vnet can fiddle */ +#define OFP_CTLFLAG_RDTUN (OFP_CTLFLAG_RD|OFP_CTLFLAG_TUN) +#define OFP_CTLFLAG_DYING 0x00010000 /* oid is being removed */ +#define OFP_CTLFLAG_CAPRD 0x00008000 /* Can be read in capability mode */ +#define OFP_CTLFLAG_CAPWR 0x00004000 /* Can be written in capability mode */ +#define OFP_CTLFLAG_CAPRW (OFP_CTLFLAG_CAPRD|OFP_CTLFLAG_CAPWR) + +/* + * USE THIS instead of a hardwired number from the categories below + * to get dynamically assigned sysctl entries using the linker-set + * technology. This is the way nearly all new sysctl variables should + * be implemented. + * e.g. OFP_SYSCTL_INT(_parent, OFP_OID_AUTO, name, OFP_CTLFLAG_RW, &variable, 0, ""); + */ +#define OFP_OID_AUTO (-1) + +/* + * The starting number for dynamically-assigned entries. WARNING! + * ALL static sysctl entries should have numbers LESS than this! + */ +#define OFP_CTL_AUTO_START 0x100 + +#define OFP_SYSCTL_HANDLER_ARGS struct ofp_sysctl_oid *oidp, void *arg1, \ + intptr_t arg2, struct ofp_sysctl_req *req + +/* + * This describes the access space for a sysctl request. This is needed + * so that we can use the interface from the kernel or from user-space. + */ +struct ofp_sysctl_req { + struct thread *td; /* used for access checking */ + int lock; /* wiring state */ + void *oldptr; + size_t oldlen; + size_t oldidx; + int (*oldfunc)(struct ofp_sysctl_req *, const void *, size_t); + const void *newptr; + size_t newlen; + size_t newidx; + int (*newfunc)(struct ofp_sysctl_req *, void *, size_t); + size_t validlen; + int flags; +}; + +OFP_SLIST_HEAD(ofp_sysctl_oid_list, ofp_sysctl_oid); + +/* + * This describes one "oid" in the MIB tree. Potentially more nodes can + * be hidden behind it, expanded by the handler. + */ +struct ofp_sysctl_oid { + struct ofp_sysctl_oid_list *oid_parent; + OFP_SLIST_ENTRY(ofp_sysctl_oid) oid_link; + int oid_number; + unsigned int oid_kind; + void *oid_arg1; + intptr_t oid_arg2; + const char *oid_name; + int (*oid_handler)(OFP_SYSCTL_HANDLER_ARGS); + const char *oid_fmt; + int oid_refcnt; + unsigned int oid_running; + const char *oid_descr; +}; + +#define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l) +#define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l) + +int sysctl_handle_int(OFP_SYSCTL_HANDLER_ARGS); +int sysctl_msec_to_ticks(OFP_SYSCTL_HANDLER_ARGS); +int sysctl_handle_long(OFP_SYSCTL_HANDLER_ARGS); +int sysctl_handle_64(OFP_SYSCTL_HANDLER_ARGS); +int sysctl_handle_string(OFP_SYSCTL_HANDLER_ARGS); +int sysctl_handle_opaque(OFP_SYSCTL_HANDLER_ARGS); + +int sysctl_dpcpu_int(OFP_SYSCTL_HANDLER_ARGS); +int sysctl_dpcpu_long(OFP_SYSCTL_HANDLER_ARGS); +int sysctl_dpcpu_quad(OFP_SYSCTL_HANDLER_ARGS); + +/* + * These functions are used to add/remove an oid from the mib. + */ +//void sysctl_register_oid(struct ofp_sysctl_oid *oidp); +//void sysctl_unregister_oid(struct ofp_sysctl_oid *oidp); + +/* Declare a static oid to allow child oids to be added to it. */ +#define SYSCTL_DECL(name) \ + extern struct ofp_sysctl_oid_list sysctl_##name##_children + +/* Hide these in macros */ +#define SYSCTL_CHILDREN(oid_ptr) (struct ofp_sysctl_oid_list *) \ + (oid_ptr)->oid_arg1 +#define SYSCTL_CHILDREN_SET(oid_ptr, val) \ + (oid_ptr)->oid_arg1 = (val); +#define SYSCTL_STATIC_CHILDREN(oid_name) \ + (&sysctl_##oid_name##_children) + +/* === Structs and macros related to context handling === */ + +/* All dynamically created sysctls can be tracked in a context list. */ +struct sysctl_ctx_entry { + struct ofp_sysctl_oid *entry; + OFP_TAILQ_ENTRY(sysctl_ctx_entry) link; +}; + +OFP_TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); + +#define SYSCTL_NODE_CHILDREN(parent, name) \ + sysctl_##parent##_##name##_children + +/* + * These macros provide type safety for sysctls. SYSCTL_ALLOWED_TYPES() + * defines a transparent union of the allowed types. SYSCTL_ASSERT_TYPE() + * and SYSCTL_ADD_ASSERT_TYPE() use the transparent union to assert that + * the pointer matches the allowed types. + * + * The allow_0 member allows a literal 0 to be passed for ptr. + */ +#define SYSCTL_ALLOWED_TYPES(type, decls) \ + union sysctl_##type { \ + long allow_0; \ + decls \ + } __attribute__((__transparent_union__)); \ + \ + static inline void * \ + __sysctl_assert_##type(union sysctl_##type ptr) \ + { \ + return (ptr.a); \ + } \ + struct __hack + +SYSCTL_ALLOWED_TYPES(INT, int *a; ); +SYSCTL_ALLOWED_TYPES(UINT, unsigned int *a; ); +SYSCTL_ALLOWED_TYPES(LONG, long *a; ); +SYSCTL_ALLOWED_TYPES(ULONG, unsigned long *a; ); +SYSCTL_ALLOWED_TYPES(INT64, int64_t *a; long long *b; ); +SYSCTL_ALLOWED_TYPES(UINT64, uint64_t *a; unsigned long long *b; ); + +#define CTASSERT(x) _CTASSERT(x, __LINE__) +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] + +#ifdef notyet +#define SYSCTL_ADD_ASSERT_TYPE(type, ptr) \ + __sysctl_assert_ ## type (ptr) +#define SYSCTL_ASSERT_TYPE(type, ptr, parent, name) \ + _SYSCTL_ASSERT_TYPE(type, ptr, __LINE__, parent##_##name) +#else +#define SYSCTL_ADD_ASSERT_TYPE(type, ptr) ptr +#define SYSCTL_ASSERT_TYPE(type, ptr, parent, name) +#endif +#define _SYSCTL_ASSERT_TYPE(t, p, l, id) \ + __SYSCTL_ASSERT_TYPE(t, p, l, id) +#define __SYSCTL_ASSERT_TYPE(type, ptr, line, id) \ + static inline void \ + sysctl_assert_##line##_##id(void) \ + { \ + (void)__sysctl_assert_##type(ptr); \ + } \ + struct __hack + +#ifndef NO_SYSCTL_DESCR +#define __DESCR(d) d +#else +#define __DESCR(d) "" +#endif + +/* This constructs a "raw" MIB oid. */ +#define OFP_SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ + static struct ofp_sysctl_oid sysctl__##parent##_##name = { \ + &sysctl_##parent##_children, { NULL }, nbr, kind, \ + a1, a2, #name, handler, fmt, 0, 0, __DESCR(descr) }; \ + DATA_SET(sysctl_set, sysctl__##parent##_##name) + +#define OFP_SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, __DESCR(descr)) + +/* This constructs a node from which other oids can hang. */ +#define OFP_SYSCTL_NODE(parent, nbr, name, access, handler, descr) \ + struct ofp_sysctl_oid_list SYSCTL_NODE_CHILDREN(parent, name); \ + OFP_SYSCTL_OID(parent, nbr, name, OFP_CTLTYPE_NODE|(access), \ + (void*)&SYSCTL_NODE_CHILDREN(parent, name), 0, handler, "N", descr) + +#define OFP_SYSCTL_ADD_NODE(ctx, parent, nbr, name, access, handler, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, OFP_CTLTYPE_NODE|(access), \ + NULL, 0, handler, "N", __DESCR(descr)) + +/* Oid for a string. len can be 0 to indicate '\0' termination. */ +#define OFP_SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \ + OFP_SYSCTL_OID(parent, nbr, name, OFP_CTLTYPE_STRING|(access), \ + arg, len, sysctl_handle_string, "A", descr) + +#define OFP_SYSCTL_ADD_STRING(ctx, parent, nbr, name, access, arg, len, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, OFP_CTLTYPE_STRING|(access), \ + arg, len, sysctl_handle_string, "A", __DESCR(descr)) + +/* Oid for an int. If ptr is NULL, val is returned. */ +#define OFP_SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_ASSERT_TYPE(INT, ptr, parent, name); \ + OFP_SYSCTL_OID(parent, nbr, name, \ + OFP_CTLTYPE_INT | OFP_CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_int, "I", descr) + +#define OFP_SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, \ + OFP_CTLTYPE_INT | OFP_CTLFLAG_MPSAFE | (access), \ + SYSCTL_ADD_ASSERT_TYPE(INT, ptr), val, \ + sysctl_handle_int, "I", __DESCR(descr)) + +/* Oid for an unsigned int. If ptr is NULL, val is returned. */ +#define OFP_SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_ASSERT_TYPE(UINT, ptr, parent, name); \ + OFP_SYSCTL_OID(parent, nbr, name, \ + OFP_CTLTYPE_UINT | OFP_CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_int, "IU", descr) + +#define OFP_SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, \ + OFP_CTLTYPE_UINT | OFP_CTLFLAG_MPSAFE | (access), \ + SYSCTL_ADD_ASSERT_TYPE(UINT, ptr), val, \ + sysctl_handle_int, "IU", __DESCR(descr)) + +/* Oid for a long. The pointer must be non NULL. */ +#define OFP_SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_ASSERT_TYPE(LONG, ptr, parent, name); \ + OFP_SYSCTL_OID(parent, nbr, name, \ + OFP_CTLTYPE_LONG | OFP_CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_long, "L", descr) + +#define OFP_SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, \ + OFP_CTLTYPE_LONG | OFP_CTLFLAG_MPSAFE | (access), \ + SYSCTL_ADD_ASSERT_TYPE(LONG, ptr), 0, \ + sysctl_handle_long, "L", __DESCR(descr)) + +/* Oid for an unsigned long. The pointer must be non NULL. */ +#define OFP_SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_ASSERT_TYPE(ULONG, ptr, parent, name); \ + OFP_SYSCTL_OID(parent, nbr, name, \ + OFP_CTLTYPE_ULONG | OFP_CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_long, "LU", descr) + +#define OFP_SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, \ + OFP_CTLTYPE_ULONG | OFP_CTLFLAG_MPSAFE | (access), \ + SYSCTL_ADD_ASSERT_TYPE(ULONG, ptr), 0, \ + sysctl_handle_long, "LU", __DESCR(descr)) + +/* Oid for a quad. The pointer must be non NULL. */ +#define OFP_SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_ASSERT_TYPE(INT64, ptr, parent, name); \ + OFP_SYSCTL_OID(parent, nbr, name, \ + OFP_CTLTYPE_S64 | OFP_CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_64, "Q", descr) + +#define OFP_SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, \ + OFP_CTLTYPE_S64 | OFP_CTLFLAG_MPSAFE | (access), \ + SYSCTL_ADD_ASSERT_TYPE(INT64, ptr), 0, \ + sysctl_handle_64, "Q", __DESCR(descr)) + +#define OFP_SYSCTL_UQUAD(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_ASSERT_TYPE(UINT64, ptr, parent, name); \ + OFP_SYSCTL_OID(parent, nbr, name, \ + OFP_CTLTYPE_U64 | OFP_CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_64, "QU", descr) + +#define OFP_SYSCTL_ADD_UQUAD(ctx, parent, nbr, name, access, ptr, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, \ + OFP_CTLTYPE_U64 | OFP_CTLFLAG_MPSAFE | (access), \ + SYSCTL_ADD_ASSERT_TYPE(UINT64, ptr), 0, \ + sysctl_handle_64, "QU", __DESCR(descr)) + +/* Oid for an opaque object. Specified by a pointer and a length. */ +#define OFP_SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \ + OFP_SYSCTL_OID(parent, nbr, name, OFP_CTLTYPE_OPAQUE|(access), \ + ptr, len, sysctl_handle_opaque, fmt, descr) + +#define OFP_SYSCTL_ADD_OPAQUE(ctx, parent, nbr, name, access, ptr, len, fmt, descr)\ + ofp_sysctl_add_oid(ctx, parent, nbr, name, OFP_CTLTYPE_OPAQUE|(access), \ + ptr, len, sysctl_handle_opaque, fmt, __DESCR(descr)) + +/* Oid for a struct. Specified by a pointer and a type. */ +#define OFP_SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr) \ + OFP_SYSCTL_OID(parent, nbr, name, OFP_CTLTYPE_OPAQUE|(access), \ + ptr, sizeof(struct type), sysctl_handle_opaque, \ + "S," #type, descr) + +#define OFP_SYSCTL_ADD_STRUCT(ctx, parent, nbr, name, access, ptr, type, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, OFP_CTLTYPE_OPAQUE|(access), \ + ptr, sizeof(struct type), sysctl_handle_opaque, "S," #type, __DESCR(descr)) + +/* Oid for a procedure. Specified by a pointer and an arg. */ +#define OFP_SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ + CTASSERT(((access) & OFP_CTLTYPE) != 0); \ + OFP_SYSCTL_OID(parent, nbr, name, (access), \ + ptr, arg, handler, fmt, descr) + +#define OFP_SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ + ofp_sysctl_add_oid(ctx, parent, nbr, name, (access), \ + ptr, arg, handler, fmt, __DESCR(descr)) + +/* + * Declare some common oids. + */ +extern struct ofp_sysctl_oid_list sysctl__children; +SYSCTL_DECL(_net); +SYSCTL_DECL(_debug); + +/* Dynamic oid handling */ +struct ofp_sysctl_oid *ofp_sysctl_add_oid(struct sysctl_ctx_list *clist, + struct ofp_sysctl_oid_list *parent, int nbr, const char *name, + int kind, void *arg1, intptr_t arg2, + int (*handler) (OFP_SYSCTL_HANDLER_ARGS), + const char *fmt, const char *descr); + +int ofp_sysctl(const char *name, void *old, size_t *oldlenp, + const void *new, size_t newlen, size_t *retval); + +#endif diff --git a/include/api/ofp_tcp.h b/include/api/ofp_tcp.h new file mode 100644 index 00000000..d6e50eb9 --- /dev/null +++ b/include/api/ofp_tcp.h @@ -0,0 +1,267 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet/tcp.h 232945 2012-03-13 20:37:57Z glebius $ + */ + +#ifndef _OFP_TCP_H_ +#define _OFP_TCP_H_ + +#include +#include "ofp_ip_var.h" + +typedef uint32_t tcp_seq; + +#define ofp_tcp6_seq tcp_seq /* for KAME src sync over BSD*'s */ +#define ofp_tcp6hdr tcphdr /* for KAME src sync over BSD*'s */ + +/* + * TCP header. + * Per RFC 793, September, 1981. + */ +struct ofp_tcphdr { + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + tcp_seq th_seq; /* sequence number */ + tcp_seq th_ack; /* acknowledgement number */ +#if ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN + uint8_t th_x2:4, /* (unused) */ + th_off:4; /* data offset */ +#endif +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN + uint8_t th_off:4, /* data offset */ + th_x2:4; /* (unused) */ +#endif + uint8_t th_flags; +#define OFP_TH_FIN 0x01 +#define OFP_TH_SYN 0x02 +#define OFP_TH_RST 0x04 +#define OFP_TH_PUSH 0x08 +#define OFP_TH_ACK 0x10 +#define OFP_TH_URG 0x20 +#define OFP_TH_ECE 0x40 +#define OFP_TH_CWR 0x80 +#define OFP_TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR) +#define OFP_PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR" + + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +}; + +#define OFP_TCPOPT_EOL 0 +#define OFP_TCPOLEN_EOL 1 +#define OFP_TCPOPT_PAD 0 /* padding after EOL */ +#define OFP_TCPOLEN_PAD 1 +#define OFP_TCPOPT_NOP 1 +#define OFP_TCPOLEN_NOP 1 +#define OFP_TCPOPT_MAXSEG 2 +#define OFP_TCPOLEN_MAXSEG 4 +#define OFP_TCPOPT_WINDOW 3 +#define OFP_TCPOLEN_WINDOW 3 +#define OFP_TCPOPT_SACK_PERMITTED 4 +#define OFP_TCPOLEN_SACK_PERMITTED 2 +#define OFP_TCPOPT_SACK 5 +#define OFP_TCPOLEN_SACKHDR 2 +#define OFP_TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */ +#define OFP_TCPOPT_TIMESTAMP 8 +#define OFP_TCPOLEN_TIMESTAMP 10 +#define OFP_TCPOLEN_TSTAMP_APPA (OFP_TCPOLEN_TIMESTAMP+2) /* appendix A */ +#define OFP_TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */ +#define OFP_TCPOLEN_SIGNATURE 18 + +/* Miscellaneous constants */ +#define OFP_MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */ +#define OFP_TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */ + + +/* + * The default maximum segment size (MSS) to be used for new TCP connections + * when path MTU discovery is not enabled. + * + * RFC879 derives the default MSS from the largest datagram size hosts are + * minimally required to handle directly or through IP reassembly minus the + * size of the IP and TCP header. With IPv6 the minimum MTU is specified + * in RFC2460. + * + * For IPv4 the MSS is 576 - sizeof(struct tcpiphdr) + * For IPv6 the MSS is IPV6_MMTU - sizeof(struct ip6_hdr) - sizeof(struct ofp_tcphdr) + * + * We use explicit numerical definition here to avoid header pollution. + */ +#define OFP_TCP_MSS 536 +#define OFP_TCP6_MSS 1220 + +/* + * Limit the lowest MSS we accept for path MTU discovery and the TCP SYN MSS + * option. Allowing low values of MSS can consume significant resources and + * be used to mount a resource exhaustion attack. + * Connections requesting lower MSS values will be rounded up to this value + * and the OFP_IP_DF flag will be cleared to allow fragmentation along the path. + * + * See tcp_subr.c ofp_tcp_minmss SYSCTL declaration for more comments. Setting + * it to "0" disables the minmss check. + * + * The default value is fine for TCP across the Internet's smallest official + * link MTU (256 bytes for AX.25 packet radio). However, a connection is very + * unlikely to come across such low MTU interfaces these days (anno domini 2003). + */ +#define OFP_TCP_MINMSS 216 + +#define OFP_TCP_MAXWIN 65535 /* largest value for (unscaled) window */ +#define OFP_TTCP_CLIENT_SND_WND 4096 /* dflt send window for T/TCP client */ + +#define OFP_TCP_MAX_WINSHIFT 14 /* maximum window shift */ + +#define OFP_TCP_MAXBURST 4 /* maximum segments in a burst */ + +#define OFP_TCP_MAXHLEN (0xf<<2)/* max length of header in bytes */ +#define OFP_TCP_MAXOLEN (OFP_TCP_MAXHLEN - sizeof(struct ofp_tcphdr)) + /* max space left for options */ + +/* + * User-settable options (used with setsockopt). + */ +#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#define TCP_MAXSEG 0x02 /* set maximum segment size */ +#define TCP_NOPUSH 0x04 /* don't push last block of write */ +#define TCP_NOOPT 0x08 /* don't use TCP options */ +#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ +#define TCP_INFO 0x20 /* retrieve tcp_info structure */ +#define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */ +#define TCP_KEEPINIT 0x80 /* N, time to establish connection */ +#define TCP_KEEPIDLE 0x100 /* L,N,X start keepalives after this period */ +#define TCP_KEEPINTVL 0x200 /* L,N interval between keepalives */ +#define TCP_KEEPCNT 0x400 /* L,N number of keepalives before close */ +#define TCP_REASSDL 0x800 /* wait this long for missing segments */ + +#define TCP_CA_NAME_MAX 16 /* max congestion control name length */ + +#define TCPI_OPT_TIMESTAMPS 0x01 +#define TCPI_OPT_SACK 0x02 +#define TCPI_OPT_WSCALE 0x04 +#define TCPI_OPT_ECN 0x08 +#define TCPI_OPT_TOE 0x10 + +/* + * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits + * the caller to query certain information about the state of a TCP + * connection. We provide an overlapping set of fields with the Linux + * implementation, but since this is a fixed size structure, room has been + * left for growth. In order to maximize potential future compatibility with + * the Linux API, the same variable names and order have been adopted, and + * padding left to make room for omitted fields in case they are added later. + * + * XXX: This is currently an unstable ABI/API, in that it is expected to + * change. + */ +struct tcp_info { + uint8_t tcpi_state; /* TCP FSM state. */ + uint8_t __tcpi_ca_state; + uint8_t __tcpi_retransmits; + uint8_t __tcpi_probes; + uint8_t __tcpi_backoff; + uint8_t tcpi_options; /* Options enabled on conn. */ + uint8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */ + tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */ + + uint32_t tcpi_rto; /* Retransmission timeout (usec). */ + uint32_t __tcpi_ato; + uint32_t tcpi_snd_mss; /* Max segment size for send. */ + uint32_t tcpi_rcv_mss; /* Max segment size for receive. */ + + uint32_t __tcpi_unacked; + uint32_t __tcpi_sacked; + uint32_t __tcpi_lost; + uint32_t __tcpi_retrans; + uint32_t __tcpi_fackets; + + /* Times; measurements in usecs. */ + uint32_t __tcpi_last_data_sent; + uint32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */ + uint32_t tcpi_last_data_recv; /* Time since last recv data. */ + uint32_t __tcpi_last_ack_recv; + + /* Metrics; variable units. */ + uint32_t __tcpi_pmtu; + uint32_t __tcpi_rcv_ssthresh; + uint32_t tcpi_rtt; /* Smoothed RTT in usecs. */ + uint32_t tcpi_rttvar; /* RTT variance in usecs. */ + uint32_t tcpi_snd_ssthresh; /* Slow start threshold. */ + uint32_t tcpi_snd_cwnd; /* Send congestion window. */ + uint32_t __tcpi_advmss; + uint32_t __tcpi_reordering; + + uint32_t __tcpi_rcv_rtt; + uint32_t tcpi_rcv_space; /* Advertised recv window. */ + + /* FreeBSD extensions to tcp_info. */ + uint32_t tcpi_snd_wnd; /* Advertised send window. */ + uint32_t tcpi_snd_bwnd; /* No longer used. */ + uint32_t tcpi_snd_nxt; /* Next egress seqno */ + uint32_t tcpi_rcv_nxt; /* Next ingress seqno */ + uint32_t tcpi_toe_tid; /* HWTID for TOE endpoints */ + uint32_t tcpi_snd_rexmitpack; /* Retransmitted packets */ + uint32_t tcpi_rcv_ooopack; /* Out-of-order packets */ + uint32_t tcpi_snd_zerowin; /* Zero-sized windows sent */ + + /* Padding to grow without breaking ABI. */ + uint32_t __tcpi_pad[26]; /* Padding. */ +}; + +/* + * Tcp+ip header, after ip options removed. + */ +struct tcpiphdr { + struct ipovly ti_i; /* overlaid ip structure */ + struct ofp_tcphdr ti_t; /* tcp header */ +}; +#define ti_x1 ti_i.ih_x1 +#define ti_pr ti_i.ih_pr +#define ti_len ti_i.ih_len +#define ti_src ti_i.ih_src +#define ti_dst ti_i.ih_dst +#define ti_sport ti_t.th_sport +#define ti_dport ti_t.th_dport +#define ti_seq ti_t.th_seq +#define ti_ack ti_t.th_ack +#define ti_x2 ti_t.th_x2 +#define ti_off ti_t.th_off +#define ti_flags ti_t.th_flags +#define ti_win ti_t.th_win +#define ti_sum ti_t.th_sum +#define ti_urp ti_t.th_urp + +#endif /* !_OFP_TCP_H_ */ diff --git a/include/api/ofp_timer.h b/include/api/ofp_timer.h new file mode 100644 index 00000000..818a965f --- /dev/null +++ b/include/api/ofp_timer.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_TIMER_H__ +#define __OFP_TIMER_H__ + +typedef void (*ofp_timer_callback)(void *arg); + +odp_timer_t ofp_timer_start(uint64_t tmo_us, ofp_timer_callback callback, + void *arg, int arglen); +int ofp_timer_cancel(odp_timer_t tim); +void ofp_timer_handle(odp_event_t buf); +int ofp_timer_ticks(int timer_num); +odp_timer_pool_t ofp_timer(int timer_num); + +#endif /* __OFP_TIMER_H__ */ diff --git a/include/api/ofp_types.h b/include/api/ofp_types.h new file mode 100644 index 00000000..533f293b --- /dev/null +++ b/include/api/ofp_types.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_TYPES_H__ +#define __OFP_TYPES_H__ + +#include "ofp_queue.h" + +enum ofp_return_code { + OFP_PKT_CONTINUE = 0, + OFP_PKT_PROCESSED, + OFP_PKT_ON_HOLD, + OFP_PKT_DROP +}; + +struct ofp_nh_entry { + uint32_t gw; + uint16_t port; + uint16_t vlan; +}; + +struct pkt6_entry; +OFP_SLIST_HEAD(pkt6_list, pkt6_entry); + +struct ofp_nh6_entry { + uint8_t gw[16]; + uint16_t port; + uint16_t vlan; + uint8_t mac[6]; + struct pkt6_list pkt6_hold; +}; + +#endif /* __OFP_TYPES_H__ */ diff --git a/include/api/ofp_udp.h b/include/api/ofp_udp.h new file mode 100644 index 00000000..9025a005 --- /dev/null +++ b/include/api/ofp_udp.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet/udp.h 217126 2011-01-07 21:40:34Z jhb $ + */ + +#ifndef _OFP_UDP_H_ +#define _OFP_UDP_H_ + +/* + * UDP protocol header. + * Per RFC 768, September, 1981. + */ +struct ofp_udphdr { + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + uint16_t uh_ulen; /* udp length */ + uint16_t uh_sum; /* udp checksum */ +}; + +/* + * User-settable options (used with setsockopt). + */ +#define OFP_UDP_ENCAP 0x01 + + +/* + * UDP Encapsulation of IPsec Packets options. + */ +/* Encapsulation types. */ +#define OFP_UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ +#define OFP_UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */ + +/* Default ESP in UDP encapsulation port. */ +#define OFP_UDP_ENCAP_ESPINUDP_PORT 500 + +/* Maximum UDP fragment size for ESP over UDP. */ +#define OFP_UDP_ENCAP_ESPINUDP_MAXFRAGLEN 552 + +#endif diff --git a/include/api/ofp_utils.h b/include/api/ofp_utils.h new file mode 100644 index 00000000..a1444bba --- /dev/null +++ b/include/api/ofp_utils.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFP_UTILS_H__ +#define __OFP_UTILS_H__ + +#ifndef _TRUE_FALSE_ +#define FALSE 0 +#ifndef TRUE +#define TRUE (!FALSE) +#endif +#define _TRUE_FALSE_ 1 +#endif + +char *ofp_print_mac(uint8_t *mac); +char *ofp_print_ip_addr(uint32_t addr); +char *ofp_print_ip6_addr(uint8_t *addr); + +#endif /*__OFP_UTILS_H__*/ diff --git a/include/ofpi.h b/include/ofpi.h new file mode 100644 index 00000000..7ecaed9a --- /dev/null +++ b/include/ofpi.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_H_ +#define _OFPI_H_ + +#include +#include "odp/helper/linux.h" + +#include "ofpi_ethernet.h" +#include "ofpi_if_vlan.h" +#include "ofpi_if_arp.h" +#include "ofpi_in.h" +#include "ofpi_in6.h" +#include "ofpi_ip.h" +#include "ofpi_ip6.h" +#include "ofpi_if_gre.h" +#include "ofpi_icmp6.h" +#include "ofpi_icmp.h" +#include "ofpi_udp.h" +#include "ofpi_tcp.h" +#include "ofpi_timer.h" +#include "ofpi_hook.h" + +#endif + diff --git a/include/ofpi_arp.h b/include/ofpi_arp.h new file mode 100644 index 00000000..dd65bc8c --- /dev/null +++ b/include/ofpi_arp.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef __OFPI_ARP_H__ +#define __OFPI_ARP_H__ + +#include + +#include "ofpi_pkt_processing.h" /* return codes, i.e.: OFP_DROP */ + +struct arp_key { + uint32_t vrf; + uint32_t ipv4_addr; +}; + +#ifdef OFP_USE_LIBCK +#include + +struct arp_entry { + struct arp_key key; + /* Keep ifx/timestamp/state together in the same word! */ + uint16_t ifx; + uint8_t state; + uint8_t slowpath_keepalive_timer_armed; + uint64_t macaddr; + uint16_t timer_armed; /* Slowpath neigh update timer */ + CK_SLIST_ENTRY(arp_entry) next; +}; + +#else /* ! OFP_USE_LIBCK */ +#include "ofpi_queue.h" + +struct pkt_entry { + odp_packet_t pkt; + struct ofp_nh_entry *nh; + OFP_SLIST_ENTRY(pkt_entry) next; +}; + +struct pkt_list { + struct pkt_entry *slh_first; +}; /* OFP_SLIST_HEAD */ + +struct arp_entry { + struct arp_key key; + + odp_atomic_u64_t usetime; + odp_timer_t usetime_upd_tmo; + odp_rwlock_t usetime_rwlock; + + uint64_t macaddr; + struct pkt_list pkt_list_head; + odp_timer_t pkt_tmo; + OFP_SLIST_ENTRY(arp_entry) next; +}; +#endif /* OFP_USE_LIBCK */ + +struct arp_cache { + struct arp_key key; + uint16_t entry_idx; +}; + +#define ARP_IN_CACHE(_cache, _key) \ + (((_key)->vrf == (_cache)->key.vrf) && \ + ((_key)->ipv4_addr == (_cache)->key.ipv4_addr)) + +#define ARP_GET_CACHE(_cache) (&(shm->arp.entries[(_cache)->entry_idx])) + +#define ARP_SET_CACHE(_cache, _key, _entry) {\ + (_cache)->key.vrf = (_key)->vrf;\ + (_cache)->key.ipv4_addr = (_key)->ipv4_addr;\ + (_cache)->entry_idx = (_entry) - &(shm->arp.entries[0]);\ +} + +#define ARP_DEL_CACHE(_cache) {\ + (_cache)->key.vrf = 0;\ + (_cache)->key.ipv4_addr = 0;\ + (_cache)->entry_idx = 0;\ +} + +void ofp_arp_alloc_shared_memory(void); +void ofp_arp_lookup_shared_memory(void); + +void ofp_arp_global_init(void); +void ofp_arp_local_init(void); + +inline int ofp_arp_ipv4_insert(uint32_t ipv4_addr, unsigned char *ll_addr, + struct ofp_ifnet *dev); +inline int ofp_arp_ipv4_remove(uint32_t ipv4_addr, struct ofp_ifnet *dev); +inline int ofp_ipv4_lookup_mac(uint32_t ipv4_addr, unsigned char *ll_addr, + struct ofp_ifnet *dev); +int ofp_arp_save_ipv4_pkt(odp_packet_t pkt, struct ofp_nh_entry *nh_param, + uint32_t ipv4_addr, struct ofp_ifnet *dev); + +void ofp_arp_show_table(int fd); +void ofp_arp_show_saved_packets(int fd); +void ofp_arp_cleanup(void *arg); +void ofp_arp_init_tables(void); + +#endif /* __OFPI_ARP_H__ */ diff --git a/include/ofpi_avl.h b/include/ofpi_avl.h new file mode 100644 index 00000000..ba7a8943 --- /dev/null +++ b/include/ofpi_avl.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 1995 by Sam Rushing + */ + +/* $Id: avl.h,v 1.7 2003/07/07 01:10:14 brendan Exp $ */ + +#ifndef __AVL_H +#define __AVL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define NO_THREAD + +#ifndef NO_THREAD +#include "thread/thread.h" +#else +#define thread_rwlock_create(x) do{}while(0) +#define thread_rwlock_destroy(x) do{}while(0) +#define thread_rwlock_rlock(x) do{}while(0) +#define thread_rwlock_wlock(x) do{}while(0) +#define thread_rwlock_unlock(x) do{}while(0) +#endif + + +typedef struct avl_node_tag { + void * key; + struct avl_node_tag * left; + struct avl_node_tag * right; + struct avl_node_tag * parent; + /* + * The lower 2 bits of specify the balance + * factor: 00==-1, 01==0, 10==+1. + * The rest of the bits are used for + */ + unsigned int rank_and_balance; + +#if !defined(NO_THREAD) && defined(HAVE_AVL_NODE_LOCK) + rwlock_t rwlock; +#endif +} avl_node; + +#define AVL_GET_BALANCE(n) ((int)(((n)->rank_and_balance & 3) - 1)) + +#define AVL_GET_RANK(n) (((n)->rank_and_balance >> 2)) + +#define AVL_SET_BALANCE(n,b) \ + ((n)->rank_and_balance) = \ + (((n)->rank_and_balance & (~3)) | ((int)((b) + 1))) + +#define AVL_SET_RANK(n,r) \ + ((n)->rank_and_balance) = \ + (((n)->rank_and_balance & 3) | (r << 2)) + +struct _avl_tree; + +typedef int (*avl_key_compare_fun_type) (void * compare_arg, void * a, void * b); +typedef int (*avl_iter_fun_type) (void * key, void * iter_arg); +typedef int (*avl_iter_index_fun_type) (unsigned long index, void * key, void * iter_arg); +typedef int (*avl_free_key_fun_type) (void * key); +typedef int (*avl_key_printer_fun_type) (char *, void *); + +/* + * and let us associate a particular compare + * function with each tree, separately. + */ + +typedef struct _avl_tree { + avl_node * root; + unsigned int height; + unsigned int length; + avl_key_compare_fun_type compare_fun; + void * compare_arg; + odp_rwlock_t lock_rw; + +#ifndef NO_THREAD + rwlock_t rwlock; +#endif +} avl_tree; + +avl_tree * avl_tree_new (avl_key_compare_fun_type compare_fun, void * compare_arg); +avl_node * avl_node_new (void * key, avl_node * parent); + +void avl_tree_free ( + avl_tree * tree, + avl_free_key_fun_type free_key_fun + ); + +int avl_insert ( + avl_tree * ob, + void * key + ); + +int avl_delete ( + avl_tree * tree, + void * key, + avl_free_key_fun_type free_key_fun + ); + +int avl_get_by_index ( + avl_tree * tree, + unsigned long index, + void ** value_address + ); + +int avl_get_by_key ( + avl_tree * tree, + void * key, + void ** value_address + ); + +int avl_iterate_inorder ( + avl_tree * tree, + avl_iter_fun_type iter_fun, + void * iter_arg + ); + +int avl_iterate_index_range ( + avl_tree * tree, + avl_iter_index_fun_type iter_fun, + unsigned long low, + unsigned long high, + void * iter_arg + ); + +int avl_get_span_by_key ( + avl_tree * tree, + void * key, + unsigned long * low, + unsigned long * high + ); + +int avl_get_span_by_two_keys ( + avl_tree * tree, + void * key_a, + void * key_b, + unsigned long * low, + unsigned long * high + ); + +int avl_verify (avl_tree * tree); + +void avl_print_tree ( + avl_tree * tree, + avl_key_printer_fun_type key_printer + ); + +avl_node *avl_get_first(avl_tree *tree); + +avl_node *avl_get_prev(avl_node * node); + +avl_node *avl_get_next(avl_node * node); + +/* These two are from David Ascher */ + +int avl_get_item_by_key_most ( + avl_tree * tree, + void * key, + void ** value_address + ); + +int avl_get_item_by_key_least ( + avl_tree * tree, + void * key, + void ** value_address + ); + +/* optional locking stuff */ +void avl_tree_rlock(avl_tree *tree); +void avl_tree_wlock(avl_tree *tree); +void avl_tree_unlock(avl_tree *tree); +void avl_node_rlock(avl_node *node); +void avl_node_wlock(avl_node *node); +void avl_node_unlock(avl_node *node); +void ofp_avl_lookup_shared_memory(void); +void ofp_avl_alloc_shared_memory(void); + +void ofp_print_avl_stat(int fd); + +#ifdef __cplusplus +} +#endif + +#endif /* __AVL_H */ diff --git a/include/ofpi_callout.h b/include/ofpi_callout.h new file mode 100644 index 00000000..c7f43697 --- /dev/null +++ b/include/ofpi_callout.h @@ -0,0 +1,131 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. + * (c) UNIX System Laboratories, Inc. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)callout.h 8.2 (Berkeley) 1/21/94 + * $FreeBSD: release/9.1.0/sys/sys/callout.h 235220 2012-05-10 10:02:56Z kib $ + */ + +#ifndef _SYS_CALLOUT_H_ +#define _SYS_CALLOUT_H_ + +#include "ofpi_timer.h" +#include "ofpi_queue.h" + +struct lock_object; + +OFP_SLIST_HEAD(callout_list, callout); +OFP_TAILQ_HEAD(callout_tailq, callout); + +struct callout { +#if 0 + union { + OFP_SLIST_ENTRY(callout) sle; + OFP_TAILQ_ENTRY(callout) tqe; + } c_links; + int c_time; /* ticks to the event */ + void *c_arg; /* function argument */ + void (*c_func)(void *); /* function to call */ + struct lock_object *c_lock; /* lock to handle */ +#endif + int c_flags; /* state of this entry */ + volatile int c_cpu; /* CPU we're scheduled on */ + odp_timer_t odptmo; +}; + +#define CALLOUT_LOCAL_ALLOC 0x0001 /* was allocated from callfree */ +#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */ +#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */ +#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ +#define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ +#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ +#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ + +struct callout_handle { + struct callout *callout; +}; + +extern int ncallout; + +#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) +#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) +#define callout_pending(c) (1 /* always true (c)->c_flags & CALLOUT_PENDING*/) + +#define callout_drain(c) _callout_stop_safe(c, 1) + +void _callout_init_lock(struct callout *, struct lock_object *, int); +#define callout_init_mtx(c, mtx, flags) \ + _callout_init_lock((c), ((mtx) != NULL) ? &(mtx)->lock_object : \ + NULL, (flags)) +#define callout_init_rw(c, rw, flags) \ + _callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \ + NULL, (flags)) +#define callout_reset(c, on_tick, fn, arg) \ + callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu) +#define callout_reset_curcpu(c, on_tick, fn, arg) \ + callout_reset_on((c), (on_tick), (fn), (arg), PCPU_GET(cpuid)) +int callout_schedule(struct callout *, int); +int callout_schedule_on(struct callout *, int, int); +#define callout_schedule_curcpu(c, on_tick) \ + callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) + +#define callout_reset_on(_c, _ticks, _func, _arg, _cpu) \ + do { \ + void *param = _arg; \ + uint64_t us = ((uint64_t)_ticks)*OFP_TIMER_RESOLUTION_US; \ + odp_timer_t tmp = (_c)->odptmo; \ + (_c)->odptmo = ODP_TIMER_INVALID; \ + ofp_timer_cancel(tmp); \ + (_c)->odptmo = ofp_timer_start(us, _func, ¶m, sizeof(void *)); \ + (_c)->c_flags |= CALLOUT_ACTIVE; \ + } while (0) + +#define callout_init(_t, _f) do { (_t)->odptmo = ODP_TIMER_INVALID; } while (0) + +#define callout_stop(_t) \ + do { \ + odp_timer_t tmp = (_t)->odptmo; \ + (_t)->odptmo = ODP_TIMER_INVALID; \ + ofp_timer_cancel(tmp); \ + (_t)->c_flags &= ~CALLOUT_ACTIVE; \ + } while (0) + +void callout_tick(void); +int callout_tickstofirst(int limit); +extern void (*callout_new_inserted)(int cpu, int ticks); + +#define ticks ofp_timer_ticks(OFP_TIMER_SOCKET) + +#endif /* _SYS_CALLOUT_H_ */ diff --git a/include/ofpi_cli.h b/include/ofpi_cli.h new file mode 100644 index 00000000..3b57de18 --- /dev/null +++ b/include/ofpi_cli.h @@ -0,0 +1,96 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _CLI_H_ +#define _CLI_H_ + +#include +#include "api/ofp_cli.h" + +#define PASSWORD_LEN 32 + +#define NUM_OLD_BUFS 8 + +/** cli_conn: CLI connection context + */ +struct cli_conn { + int status; + int fd; + char inbuf[200]; + char oldbuf[NUM_OLD_BUFS][200]; + int old_put_cnt; + int old_get_cnt; + unsigned int pos; + unsigned char ch1; + char passwd[PASSWORD_LEN + 1]; +}; + +/** utils + */ +void sendcrlf(struct cli_conn *conn); +int ip4addr_get(const char *tk, uint32_t *addr); +int ip4net_get(const char *tk, uint32_t *addr, int *mask); +int ip6addr_get(const char *tk, int tk_len, uint8_t *addr); + +/** commands + */ +void f_route_show(struct cli_conn *conn, const char *s); +void f_route_add(struct cli_conn *conn, const char *s); +void f_route_add_v6(struct cli_conn *conn, const char *s); +void f_route_add_vrf(struct cli_conn *conn, const char *s); +void f_route_del(struct cli_conn *conn, const char *s); +void f_route_del_vrf(struct cli_conn *conn, const char *s); +void f_route_del_v6(struct cli_conn *conn, const char *s); +void f_route_add_dev_to_dev(struct cli_conn *conn, const char *s); +void f_help_route(struct cli_conn *conn, const char *s); + +void f_debug(struct cli_conn *conn, const char *s); +void f_debug_show(struct cli_conn *conn, const char *s); +void f_debug_capture(struct cli_conn *conn, const char *s); +void f_debug_info(struct cli_conn *conn, const char *s); +void f_debug_capture_file(struct cli_conn *conn, const char *s); +void f_help_debug(struct cli_conn *conn, const char *s); + +void f_loglevel(struct cli_conn *conn, const char *s); +void f_help_loglevel(struct cli_conn *conn, const char *s); +void f_loglevel_show(struct cli_conn *conn, const char *s); + +void f_arp(struct cli_conn *conn, const char *s); +void f_arp_flush(struct cli_conn *conn, const char *s); +void f_arp_cleanup(struct cli_conn *conn, const char *s); +void f_help_arp(struct cli_conn *conn, const char *s); + +#define ALIAS_TABLE_LEN 16 + +struct alias_table_s { + char *name; + char *cmd; +}; + +extern struct alias_table_s alias_table[]; +void f_alias_set(struct cli_conn *conn, const char *s); +void f_alias_show(struct cli_conn *conn, const char *s); +void f_help_alias(struct cli_conn *conn, const char *s); +void f_add_alias_command(const char *name); + +void f_stat_show(struct cli_conn *conn, const char *s); +void f_stat_set(struct cli_conn *conn, const char *s); +void f_stat_clear(struct cli_conn *conn, const char *s); +void f_help_stat(struct cli_conn *conn, const char *s); + +void f_ifconfig_show(struct cli_conn *conn, const char *s); +void f_help_ifconfig(struct cli_conn *conn, const char *s); +void f_ifconfig(struct cli_conn *conn, const char *s); +void f_ifconfig_v6(struct cli_conn *conn, const char *s); +void f_ifconfig_tun(struct cli_conn *conn, const char *s); +void f_ifconfig_down(struct cli_conn *conn, const char *s); + +void f_sysctl_dump(struct cli_conn *conn, const char *s); +void f_sysctl_read(struct cli_conn *conn, const char *s); +void f_sysctl_write(struct cli_conn *conn, const char *s); + +#endif diff --git a/include/ofpi_debug.h b/include/ofpi_debug.h new file mode 100644 index 00000000..a4ac44bb --- /dev/null +++ b/include/ofpi_debug.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2014, Linaro Limited + * All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * SPDX-License-Identifier: BSD-3-Clause + */ +/** + * @file + * + * ofp debug + */ + +#ifndef _OFPI_DEBUG_H_ +#define _OFPI_DEBUG_H_ + +#include +#include +#include +#include "api/ofp_debug.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +extern int ofp_debug_flags; + +#define OFP_DEBUG_PCAP_KNI 0x40 +#define OFP_DEBUG_PCAP_TX 0x80 +extern int ofp_debug_capture_ports; + +#define DEFAULT_DEBUG_TXT_FILE_NAME "packets.txt" +#define DEFAULT_DEBUG_PCAP_FILE_NAME "/root/packets.pcap" + +void ofp_pcap_alloc_shared_memory(void); +void ofp_pcap_lookup_shared_memory(void); + +void ofp_save_packet_to_pcap_file(uint32_t flag, odp_packet_t pkt, int port); +void ofp_print_packet_buffer(const char *comment, uint8_t *p); + +/* + * Debug LOG interface + */ +struct ofp_flag_descript_s { + uint32_t flag; + const char *flag_descript; +}; + +enum ofp_log_packet { + OFP_DEBUG_PKT_RECV_NIC = 0, + OFP_DEBUG_PKT_SEND_NIC, + OFP_DEBUG_PKT_RECV_KNI, + OFP_DEBUG_PKT_SEND_KNI +}; + +extern struct ofp_flag_descript_s ofp_flag_descript[]; + +#define OFP_DEBUG_PACKET(_type_, pkt, port) {\ + if (ofp_debug_flags & ofp_flag_descript[_type_].flag) { \ + ofp_print_packet( \ + ofp_flag_descript[_type_].flag_descript, \ + pkt); \ + if (ofp_debug_flags & OFP_DEBUG_CAPTURE) { \ + ofp_save_packet_to_pcap_file( \ + ofp_flag_descript[_type_].flag, \ + pkt, port); \ + } \ + } \ + } + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/ofpi_domain.h b/include/ofpi_domain.h new file mode 100644 index 00000000..883f7f77 --- /dev/null +++ b/include/ofpi_domain.h @@ -0,0 +1,80 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)domain.h 8.1 (Berkeley) 6/2/93 + * $FreeBSD: release/9.1.0/sys/sys/domain.h 195837 2009-07-23 20:46:49Z rwatson$ + */ + +#ifndef _SYS_DOMAIN_H_ +#define _SYS_DOMAIN_H_ + +/* + * Structure per communications domain. + */ + +/* + * Forward structure declarations for function prototypes [sic]. + */ +struct mbuf; +struct ifnet; +struct protosw; + +struct domain { + int dom_family; /* OFP_AF_xxx */ + const char *dom_name; + + void (*dom_init) /* initialize domain data structures */ + (void); + void (*dom_destroy) /* cleanup structures / state */ + (void); + int (*dom_externalize) /* externalize access rights */ + (odp_packet_t , odp_packet_t *); + void (*dom_dispose) /* dispose of internalized rights */ + (odp_packet_t); + struct protosw *dom_protosw, *dom_protoswNPROTOSW; + struct domain *dom_next; + + int (*dom_rtattach) /* initialize routing table */ + (void **, int); + int (*dom_rtdetach) /* clean up routing table */ + (void **, int); + int dom_rtoffset; /* an arg to rtattach, in bits */ + /* XXX MRT. + * rtoffset May be 0 if the domain supplies its own rtattach(), + * in which case, a 0 indicates it's being called from + * vfs_export.c (HACK) Only for OFP_AF_INET{,6} at this time. + * Temporary ABI compat hack.. fix post RELENG_7 + */ + int dom_maxrtkey; /* for routing layer */ + void *(*dom_ifattach)(struct ifnet *); + void (*dom_ifdetach)(struct ifnet *, void *); + /* af-dependent data on ifnet */ +}; + +void domain_init(void *arg); +#endif /* !_SYS_DOMAIN_H_ */ diff --git a/include/ofpi_errno.h b/include/ofpi_errno.h new file mode 100644 index 00000000..9c1e4921 --- /dev/null +++ b/include/ofpi_errno.h @@ -0,0 +1,8 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "api/ofp_errno.h" diff --git a/include/ofpi_ethernet.h b/include/ofpi_ethernet.h new file mode 100644 index 00000000..d7e6673f --- /dev/null +++ b/include/ofpi_ethernet.h @@ -0,0 +1,13 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_ETHERNET_H_ +#define _OFPI_ETHERNET_H_ + +#include "api/ofp_ethernet.h" + +#endif diff --git a/include/ofpi_gre.h b/include/ofpi_gre.h new file mode 100644 index 00000000..fc04c89f --- /dev/null +++ b/include/ofpi_gre.h @@ -0,0 +1,13 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + */ +#ifndef __OFPI_GRE_H__ +#define __OFPI_GRE_H__ + +int ofp_gre_input(odp_packet_t, int); + +#endif /*__OFPI_GRE_H__*/ diff --git a/include/ofpi_hash.h b/include/ofpi_hash.h new file mode 100644 index 00000000..d7cef3bd --- /dev/null +++ b/include/ofpi_hash.h @@ -0,0 +1,12 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +uint32_t ofp_hashword(const uint32_t *k, size_t length, uint32_t initval); +void ofp_hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb); +uint32_t ofp_hashlittle(const void *key, size_t length, uint32_t initval); +void ofp_hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb); +uint32_t ofp_hashbig(const void *key, size_t length, uint32_t initval); diff --git a/include/ofpi_hook.h b/include/ofpi_hook.h new file mode 100644 index 00000000..f1b9a399 --- /dev/null +++ b/include/ofpi_hook.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFPI_HOOK_H__ +#define __OFPI_HOOK_H__ + +#include "api/ofp_types.h" +#include "api/ofp_hook.h" + +#define OFP_HOOK(_hook_id_, _pkt_, _arg_, _pres_) { \ + ofp_pkt_hook *_pkt_hook_ = ofp_get_packet_hooks(); \ + if (_pkt_hook_ && _pkt_hook_[_hook_id_]) \ + *_pres_ = _pkt_hook_[_hook_id_](_pkt_, _arg_); \ + else \ + *_pres_ = OFP_PKT_CONTINUE; \ +} + +inline ofp_pkt_hook *ofp_get_packet_hooks(void); +void ofp_hook_alloc_shared_memory(ofp_pkt_hook *pkt_hook_init); +void ofp_hook_lookup_shared_memory(void); + +#endif /* __OFPI_HOOK_H__ */ diff --git a/include/ofpi_icmp.h b/include/ofpi_icmp.h new file mode 100644 index 00000000..6c2496f4 --- /dev/null +++ b/include/ofpi_icmp.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_ICMP_H_ +#define _OFPI_ICMP_H_ + +#include "api/ofp_icmp.h" + +int ofp_icmp_input(odp_packet_t pkt_icmp, int off); +int ofp_icmp_error(odp_packet_t pkt_in, int type, int code, uint32_t dest, int mtu); + +#endif diff --git a/include/ofpi_icmp6.h b/include/ofpi_icmp6.h new file mode 100644 index 00000000..c59b8638 --- /dev/null +++ b/include/ofpi_icmp6.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + +#ifndef _OFPI_ICMP6_H_ +#define _OFPI_ICMP6_H_ + +#include "api/ofp_icmp6.h" +#include "ofpi_vnet.h" +/* +void icmp6_paramerror(struct mbuf *, int); +void icmp6_error(struct mbuf *, int, int, int); +void icmp6_error2(struct mbuf *, int, int, int, struct ifnet *); +*/ +int ofp_icmp6_input(odp_packet_t, int *, int *); +/* +void icmp6_fasttimo(void); +void icmp6_slowtimo(void); +*/ +void ofp_icmp6_reflect(odp_packet_t, size_t); +/* +void icmp6_prepare(struct mbuf *); +void icmp6_redirect_input(struct mbuf *, int); +void icmp6_redirect_output(struct mbuf *, struct rtentry *); + +struct ip6ctlparam; +void icmp6_mtudisc_update(struct ip6ctlparam *, int); +*/ + + +#define ofp_icmp6_ifstat_inc(ifp, tag) +#define ofp_icmp6_ifoutstat_inc(ifp, type, code) + +VNET_DECLARE(int, icmp6_rediraccept); /* accept/process redirects */ +VNET_DECLARE(int, icmp6_redirtimeout); /* cache time for redirect routes */ + +#define V_icmp6_rediraccept VNET(icmp6_rediraccept) +#define V_icmp6_redirtimeout VNET(icmp6_redirtimeout)*/ + +void ofp_nd6_ns_input(odp_packet_t, int, int); +enum ofp_return_code ofp_nd6_ns_output(struct ofp_ifnet *, + uint8_t *, uint8_t *); +void ofp_nd6_na_input(odp_packet_t, int, int); + +#endif /* not _OFPI_ICMP6_H_ */ diff --git a/include/ofpi_if_arp.h b/include/ofpi_if_arp.h new file mode 100644 index 00000000..5ae3da90 --- /dev/null +++ b/include/ofpi_if_arp.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)if_arp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/net/if_arp.h 219819 2011-03-21 09:40:01Z jeff $ + */ + +#ifndef _OFPI_IF_ARP_H_ +#define _OFPI_IF_ARP_H_ + +/* + * Address Resolution Protocol. + * + * See RFC 826 for protocol description. ARP packets are variable + * in size; the arphdr structure defines the fixed-length portion. + * Protocol type values are the same as those for 10 Mb/s Ethernet. + * It is followed by the variable-sized fields ar_sha, arp_spa, + * arp_tha and arp_tpa in that order, according to the lengths + * specified. Field names used correspond to RFC 826. + */ +struct ofp_arphdr { + uint16_t hrd; /* format of hardware address */ +#define OFP_ARPHDR_ETHER 1 /* ethernet hardware format */ +#define OFP_ARPHDR_IEEE802 6 /* token-ring hardware format */ +#define OFP_ARPHDR_ARCNET 7 /* arcnet hardware format */ +#define OFP_ARPHDR_FRELAY 15 /* frame relay hardware format */ +#define OFP_ARPHDR_IEEE1394 24 /* firewire hardware format */ +#define OFP_ARPHDR_INFINIBAND 32 /* infiniband hardware format */ + uint16_t pro; /* format of protocol address */ + uint8_t hln; /* length of hardware address */ + uint8_t pln; /* length of protocol address */ + uint16_t op; /* one of: */ +#define OFP_ARPOP_REQUEST 1 /* request to resolve address */ +#define OFP_ARPOP_REPLY 2 /* response to previous request */ +#define OFP_ARPOP_REVREQUEST 3 /* request protocol address given hardware */ +#define OFP_ARPOP_REVREPLY 4 /* response giving protocol address */ +#define OFP_ARPOP_INVREQUEST 8 /* request to identify peer */ +#define OFP_ARPOP_INVREPLY 9 /* response identifying peer */ + uint8_t eth_src[OFP_ETHER_ADDR_LEN]; + uint32_t ip_src; + + uint8_t eth_dst[OFP_ETHER_ADDR_LEN]; + uint32_t ip_dst; +} __attribute__((packed)); + +#endif diff --git a/include/ofpi_if_gre.h b/include/ofpi_if_gre.h new file mode 100644 index 00000000..e3423bb1 --- /dev/null +++ b/include/ofpi_if_gre.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */ +/* $FreeBSD: release/9.1.0/sys/net/if_gre.h 223223 2011-06-18 09:34:03Z bz $ */ + +/*- + * Copyright (c) 1998 The NetBSD Foundation, Inc. + * All rights reserved + * + * This code is derived from software contributed to The NetBSD Foundation + * by Heiko W.Rupp + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _OFPI_IF_GRE_H +#define _OFPI_IF_GRE_H + +struct ofp_gre_h { + uint16_t flags; /* GRE flags */ + uint16_t ptype; /* protocol type of payload typically + Ether protocol type*/ + uint32_t options[0]; /* optional options */ +/* + * from here on: fields are optional, presence indicated by flags + * + uint_16 checksum checksum (one-complements of GRE header + and payload + Present if (ck_pres | rt_pres == 1). + Valid if (ck_pres == 1). + uint_16 offset offset from start of routing filed to + first octet of active SRE (see below). + Present if (ck_pres | rt_pres == 1). + Valid if (rt_pres == 1). + uint_32 key inserted by encapsulator e.g. for + authentication + Present if (key_pres ==1 ). + uint_32 seq_num Sequence number to allow for packet order + Present if (seq_pres ==1 ). + struct gre_sre[] routing Routing fileds (see below) + Present if (rt_pres == 1) + */ +} __attribute__((packed)); + +struct ofp_greip { + struct ofp_ip gi_i; + struct ofp_gre_h gi_g; +} __attribute__((packed)); + +#define gi_pr gi_i.ip_p +#define gi_len gi_i.ip_len +#define gi_src gi_i.ip_src +#define gi_dst gi_i.ip_dst +#define gi_ptype gi_g.ptype +#define gi_flags gi_g.flags +#define gi_options gi_g.options + +#define OFP_GRE_CP 0x8000 /* Checksum Present */ +#define OFP_GRE_RP 0x4000 /* Routing Present */ +#define OFP_GRE_KP 0x2000 /* Key Present */ +#define OFP_GRE_SP 0x1000 /* Sequence Present */ +#define OFP_GRE_SS 0x0800 /* Strict Source Route */ + +#define OFP_GREPROTO_IP 0x0800 + +/* + * gre_sre defines a Source route Entry. These are needed if packets + * should be routed over more than one tunnel hop by hop + */ +struct ofp_gre_sre { + uint16_t sre_family; /* address family */ + uint8_t sre_offset; /* offset to first octet of active entry */ + uint8_t sre_length; /* number of octets in the SRE. + sre_lengthl==0 -> last entry. */ + uint8_t *sre_rtinfo; /* the routing information */ +}; +#endif diff --git a/include/ofpi_if_vlan.h b/include/ofpi_if_vlan.h new file mode 100644 index 00000000..428b31e0 --- /dev/null +++ b/include/ofpi_if_vlan.h @@ -0,0 +1,13 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_IF_VLAN_H_ +#define _OFPI_IF_VLAN_H_ 1 + +#include "api/ofp_if_vlan.h" + +#endif diff --git a/include/ofpi_in.h b/include/ofpi_in.h new file mode 100644 index 00000000..ecb97c18 --- /dev/null +++ b/include/ofpi_in.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: release/9.1.0/sys/netinet/in.h 237910 2012-07-01 08:47:15Z tuexen $ + */ +#ifndef __OFPI_IN_H__ +#define __OFPI_IN_H__ + +#include "ofpi_portconf.h" +#include "ofpi_queue.h" +#include "api/ofp_in.h" + +#include "ofpi_in6.h" + +union ofp_sockaddr_store { + struct ofp_sockaddr_in addr; +#ifdef INET6 + struct ofp_sockaddr_in6 addr6; +#endif /* INET6 */ +}; + +#endif /* __OFPI_IN_H__ */ diff --git a/include/ofpi_in6.h b/include/ofpi_in6.h new file mode 100644 index 00000000..b585d47f --- /dev/null +++ b/include/ofpi_in6.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + +#ifndef __OFPI_IN6_H__ +#define __OFPI_IN6_H__ + +#include "odp.h" +#include "api/ofp_in6.h" + +extern uint8_t ofp_ip6_protox[]; + +#define OFP_IN6_IS_SOLICITED_NODE_MC(maddr, taddr) \ + ((maddr.ofp_s6_addr16[0] == OFP_IPV6_ADDR_INT16_MLL) && \ + (maddr.ofp_s6_addr16[1] == 0) && \ + (maddr.ofp_s6_addr32[1] == 0) && \ + (maddr.ofp_s6_addr32[2] == OFP_IPV6_ADDR_INT32_ONE) && \ + (maddr.ofp_s6_addr[12] == 0xff) && \ + (maddr.ofp_s6_addr[13] == taddr[13]) && \ + (maddr.ofp_s6_addr[14] == taddr[14]) && \ + (maddr.ofp_s6_addr[15] == taddr[15])) + +struct ofp_sockaddr; +struct ofp_sockaddr_in; +void ofp_in6_sin6_2_sin(struct ofp_sockaddr_in *sin, + struct ofp_sockaddr_in6 *sin6); + +void ofp_in6_sin_2_v4mapsin6 __P((struct ofp_sockaddr_in *sin, + struct ofp_sockaddr_in6 *sin6)); +void ofp_in6_sin6_2_sin_in_sock(struct ofp_sockaddr *nam); + +uint16_t ofp_in6_getscope(struct ofp_in6_addr *); + +struct ofp_ip6_hdr; +int ofp_in6_cksum_pseudo(struct ofp_ip6_hdr *, uint32_t, uint8_t, uint16_t); +int ofp_in6_cksum (odp_packet_t, u_int8_t, u_int32_t, u_int32_t); +int ofp_ip6_cksum(odp_packet_t, uint32_t, uint8_t, uint16_t); + +#endif /* __OFPI_IN6_H__ */ diff --git a/include/ofpi_in6_pcb.h b/include/ofpi_in6_pcb.h new file mode 100644 index 00000000..d9f342d3 --- /dev/null +++ b/include/ofpi_in6_pcb.h @@ -0,0 +1,142 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_pcb.h,v 1.13 2001/02/06 09:16:53 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet6/in6_pcb.h 222748 2011-06-06 + * 12:55:02Z rwatson $ + */ + +#ifndef _NETINET6_IN6_PCB_H_ +#define _NETINET6_IN6_PCB_H_ + +#if 0 +#define satosin6(sa) ((struct sockaddr_in6 *)(sa)) +#define sin6tosa(sin6) ((struct sockaddr *)(sin6)) +#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) + +struct inpcbgroup * + in6_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t); +struct inpcbgroup * + in6_pcbgroup_byinpcb __P((struct inpcb *)); +struct inpcbgroup * + in6_pcbgroup_bymbuf(struct inpcbinfo *, struct mbuf *); +struct inpcbgroup * + in6_pcbgroup_bytuple __P((struct inpcbinfo *, const struct in6_addr *, + u_short, const struct in6_addr *, u_short)); + +void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *)); +void in6_losing __P((struct inpcb *)); +#endif +int ofp_in6_pcbbind __P((struct inpcb *, struct ofp_sockaddr *, + struct ofp_ucred *)); +int ofp_in6_pcbconnect __P((struct inpcb *, struct ofp_sockaddr *, + struct ofp_ucred *)); +int ofp_in6_pcbconnect_mbuf __P((struct inpcb *, struct ofp_sockaddr *, + struct ofp_ucred *, odp_packet_t)); +void ofp_in6_pcbdisconnect __P((struct inpcb *)); +int ofp_in6_pcbladdr(struct inpcb *, struct ofp_sockaddr *, + struct ofp_in6_addr *); +struct inpcb * + ofp_in6_pcblookup_local __P((struct inpcbinfo *, + struct ofp_in6_addr *, u_short, int, + struct ofp_ucred *)); + +struct inpcb * + ofp_in6_pcblookup __P((struct inpcbinfo *, struct ofp_in6_addr *, + u_int, struct ofp_in6_addr *, u_int, int, + struct ofp_ifnet *)); + +struct inpcb * + ofp_in6_pcblookup_hash_locked __P((struct inpcbinfo *, + struct ofp_in6_addr *, + u_int, struct ofp_in6_addr *, u_int, int, + struct ofp_ifnet *)); +struct inpcb * + ofp_in6_pcblookup_mbuf __P((struct inpcbinfo *, + struct ofp_in6_addr *, + u_int, struct ofp_in6_addr *, u_int, int, + struct ofp_ifnet *ifp, odp_packet_t m)); +#if 0 +void in6_pcbnotify __P((struct inpcbinfo *, struct sockaddr *, + u_int, const struct sockaddr *, u_int, int, void *, + struct inpcb *(*)(struct inpcb *, int))); +struct inpcb * + in6_rtchange __P((struct inpcb *, int)); +#endif +struct ofp_sockaddr * + ofp_in6_sockaddr __P((ofp_in_port_t port, + struct ofp_in6_addr *addr_p)); + +struct ofp_sockaddr * + ofp_in6_v4mapsin6_sockaddr __P((ofp_in_port_t port, + struct ofp_in_addr *addr_p)); +#if 0 +int in6_getpeeraddr __P((struct socket *so, struct sockaddr **nam)); +int in6_getsockaddr __P((struct socket *so, struct sockaddr **nam)); +int in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam)); +int in6_mapped_peeraddr __P((struct socket *so, struct sockaddr **nam)); +int in6_selecthlim __P((struct in6pcb *, struct ifnet *)); +#endif +int ofp_in6_pcbsetport __P((struct ofp_in6_addr *, struct inpcb *, + struct ofp_ucred *)); +void ofp_init_sin6 __P((struct ofp_sockaddr_in6 *sin6, + odp_packet_t pkt)); + + +#endif /* !_NETINET6_IN6_PCB_H_ */ diff --git a/include/ofpi_in_pcb.h b/include/ofpi_in_pcb.h new file mode 100644 index 00000000..5e954f0b --- /dev/null +++ b/include/ofpi_in_pcb.h @@ -0,0 +1,705 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _NETINET_IN_PCB_H_ +#define _NETINET_IN_PCB_H_ + +#define OFP_LOG_X(a...) do {} while (0) + +typedef int uma_zone_t; + +#include "ofpi_util.h" +#include "odp.h" +#include "ofpi_log.h" +#include "ofpi_in.h" +#include "ofpi_queue.h" +#include "ofpi_socket.h" +#include "ofpi_portconf.h" +#include "ofpi_udp_var.h" +#include "ofpi_systm.h" + +typedef int64_t * qaddr_t; + +#define in6pcb inpcb /* for KAME src sync over BSD*'s */ +#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ +struct inpcbpolicy; + +/* + * struct inpcb is the common protocol control block structure used in most + * IP transport protocols. + * + * Pointers to local and foreign host table entries, local and foreign socket + * numbers, and pointers up (to a socket structure) and down (to a + * protocol-specific control block) are stored here. + */ +OFP_LIST_HEAD(inpcbhead, inpcb); +OFP_LIST_HEAD(inpcbporthead, inpcbport); +typedef uint64_t inp_gen_t; + +/* + * PCB with OFP_AF_INET6 null bind'ed laddr can receive OFP_AF_INET input packet. + * So, OFP_AF_INET6 null laddr is also used as OFP_AF_INET null laddr, by utilizing + * the following structure. + */ +struct in_addr_4in6 { + uint32_t ia46_pad32[3]; + struct ofp_in_addr ia46_addr4; +}; + +/* + * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has + * some extra padding to accomplish this. + */ +struct in_endpoints { + uint16_t ie_fport; /* foreign port */ + uint16_t ie_lport; /* local port */ + /* protocol dependent part, local and foreign addr */ + union { + /* foreign host table entry */ + struct in_addr_4in6 ie46_foreign; + struct ofp_in6_addr ie6_foreign; + } ie_dependfaddr; + union { + /* local host table entry */ + struct in_addr_4in6 ie46_local; + struct ofp_in6_addr ie6_local; + } ie_dependladdr; +}; +#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4 +#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4 +#define ie6_faddr ie_dependfaddr.ie6_foreign +#define ie6_laddr ie_dependladdr.ie6_local + +/* + * XXX The defines for inc_* are hacks and should be changed to direct + * references. + */ +struct in_conninfo { + uint8_t inc_flags; + uint8_t inc_len; + uint16_t inc_fibnum; /* XXX was pad, 16 bits is plenty */ + uint16_t inc_altfibnum; + /* protocol dependent part */ + struct in_endpoints inc_ie; +}; + +/* + * Flags for inc_flags. + */ +#define INC_ISIPV6 0x01 +#define INC_PASSIVE 0x02 /* connection is being passively reassembled */ +#define INC_PROMISC 0x04 /* connection is promiscuous */ +#define INC_SYNFILTERED 0x08 /* a SYN filter has been applied */ +#define INC_ALTFIB 0x10 /* alternate FIB is set */ +#define INC_CONVONTMO 0x20 /* convert from passive to active on syncache timeout */ + +#define inc_fport inc_ie.ie_fport +#define inc_lport inc_ie.ie_lport +#define inc_faddr inc_ie.ie_faddr +#define inc_laddr inc_ie.ie_laddr +#define inc6_faddr inc_ie.ie6_faddr +#define inc6_laddr inc_ie.ie6_laddr + +struct icmp6_filter; + +/*- + * Global data structure for each high-level protocol (UDP, TCP, ...) in both + * IPv4 and IPv6. Holds inpcb lists and information for managing them. + * + * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock, + * the former covering mutable global fields (such as the global pcb list), + * and the latter covering the hashed lookup tables. The lock order is: + * + * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks} + * + * Locking key: + * + * (c) Constant or nearly constant after initialisation + * (g) Locked by ipi_lock + * (h) Read using either ipi_hash_lock or inpcb lock; write requires both + * (p) Protected by one or more pcbgroup locks + * (x) Synchronisation properties poorly defined + */ +struct inpcbinfo { + /* + * Global lock protecting global inpcb list, inpcb count, etc. + */ + struct ofp_rec_rwlock ipi_lock; + //int ipi_lock_cnt; + //int ipi_lock_owner; + /* + * Global list of inpcbs on the protocol. + */ + struct inpcbhead *ipi_listhead; /* (g) */ + uint32_t ipi_count; /* (g) */ + + /* + * Generation count -- incremented each time a connection is allocated + * or freed. + */ + uint64_t ipi_gencnt; /* (g) */ + + /* + * Fields associated with port lookup and allocation. + */ + uint16_t ipi_lastport; /* (x) */ + uint16_t ipi_lastlow; /* (x) */ + uint16_t ipi_lasthi; /* (x) */ + + /* + * UMA zone from which inpcbs are allocated for this protocol. + */ + uma_zone_t ipi_zone; /* (c) */ + + /* + * Connection groups associated with this protocol. These fields are + * constant, but pcbgroup structures themselves are protected by + * per-pcbgroup locks. + */ + struct inpcbgroup *ipi_pcbgroups; /* (c) */ + uint32_t ipi_npcbgroups; /* (c) */ + uint32_t ipi_hashfields; /* (c) */ + + /* + * Global lock protecting non-pcbgroup hash lookup tables. + */ + odp_rwlock_t ipi_hash_lock; + + /* + * Global hash of inpcbs, hashed by local and foreign addresses and + * port numbers. + */ + struct inpcbhead *ipi_hashbase; /* (h) */ + uint64_t ipi_hashmask; /* (h) */ + + /* + * Global hash of inpcbs, hashed by only local port number. + */ + struct inpcbporthead *ipi_porthashbase; /* (h) */ + uint64_t ipi_porthashmask; /* (h) */ + + /* + * List of wildcard inpcbs for use with pcbgroups. In the past, was + * per-pcbgroup but is now global. All pcbgroup locks must be held + * to modify the list, so any is sufficient to read it. + */ + struct inpcbhead *ipi_wildbase; /* (p) */ + uint64_t ipi_wildmask; /* (p) */ + + /* + * Pointer to network stack instance + */ + struct vnet *ipi_vnet; /* (c) */ + + /* + * general use 2 + */ + void *ipi_pspare[2]; +}; + +/* + * Connection groups hold sets of connections that have similar CPU/thread + * affinity. Each connection belongs to exactly one connection group. + */ +struct inpcbgroup { + /* + * Per-connection group hash of inpcbs, hashed by local and foreign + * addresses and port numbers. + */ + struct inpcbhead *ipg_hashbase; /* (c) */ + uint64_t ipg_hashmask; /* (c) */ + + /* + * Notional affinity of this pcbgroup. + */ + uint32_t ipg_cpu; /* (p) */ + + /* + * Per-connection group lock, not to be confused with ipi_lock. + * Protects the hash table hung off the group, but also the global + * wildcard list in inpcbinfo. + */ + odp_rwlock_t ipg_lock; +} __attribute__((__aligned__(ODP_CACHE_LINE_SIZE))); + +/*- + * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 + * and IPv6 sockets. In the case of TCP, further per-connection state is + * hung off of inp_ppcb most of the time. Almost all fields of struct inpcb + * are static after creation or protected by a per-inpcb rwlock, inp_lock. A + * few fields also require the global pcbinfo lock for the inpcb to be held, + * when modified, such as the global connection lists and hashes, as well as + * binding information (which affects which hash a connection is on). This + * model means that connections can be looked up without holding the + * per-connection lock, which is important for performance when attempting to + * find the connection for a packet given its IP and port tuple. Writing to + * these fields that write locks be held on both the inpcb and global locks. + * + * Key: + * (c) - Constant after initialization + * (g) - Protected by the pcbgroup lock + * (i) - Protected by the inpcb lock + * (p) - Protected by the pcbinfo lock for the inpcb + * (s) - Protected by another subsystem's locks + * (x) - Undefined locking + * + * A few other notes: + * + * When a read lock is held, stability of the field is guaranteed; to write + * to a field, a write lock must generally be held. + * + * netinet/netinet6-layer code should not assume that the inp_socket pointer + * is safe to dereference without inp_lock being held, even for protocols + * other than TCP (where the inpcb persists during TIMEWAIT even after the + * socket has been freed), or there may be close(2)-related races. + * + * The inp_vflag field is overloaded, and would otherwise ideally be (c). + */ +struct inpcb { + OFP_LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */ + OFP_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ + OFP_LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */ + void *inp_ppcb; /* (i) pointer to per-protocol pcb */ + union { /* HJo: static space allocation for inp_ppcp */ + struct udpcb udp_ppcb; + } ppcb_space; + struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ + struct inpcbinfo static_pcbinfo; + struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ + struct inpcbgroup static_pcbgroup; + OFP_LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */ + struct socket *inp_socket; /* (i) back pointer to socket */ + struct ofp_ucred *inp_cred; /* (c) cache of socket cred */ + uint32_t inp_flow; /* (i) IPv6 flow information */ + int inp_flags; /* (i) generic IP/datagram flags */ + int inp_flags2; /* (i) generic IP/datagram flags #2*/ + uint8_t inp_vflag; /* (i) IP version flag (v4/v6) */ + uint8_t inp_ip_ttl; /* (i) time to live proto */ + uint8_t inp_ip_p; /* (c) protocol proto */ + uint8_t inp_ip_minttl; /* (i) minimum TTL or drop */ + uint32_t inp_flowid; /* (x) flow id / queue id */ + odp_atomic_u32_t inp_refcount; /* (i) refcount */ + void *inp_pspare[5]; /* (x) route caching / general use */ + uint32_t inp_ispare[6]; /* (x) route caching / user cookie / + * general use */ + + /* Local and foreign ports, local and foreign addr. */ + struct in_conninfo inp_inc; /* (i/p) list for PCB's local port */ + + /* MAC and IPSEC policy information. */ + struct label *inp_label; /* (i) MAC label */ + struct inpcbpolicy *inp_sp; /* (s) for IPSEC */ + + /* Protocol-dependent part; options. */ + struct { + uint8_t inp4_ip_tos; /* (i) type of service proto */ + odp_packet_t inp4_options; /* (i) IP options */ + struct ip_moptions *inp4_moptions; /* (i) IP mcast options */ + } inp_depend4; + struct { + /* (i) IP options */ + odp_packet_t inp6_options; + /* (i) IP6 options for outgoing packets */ + struct ip6_pktopts *inp6_outputopts; + /* (i) IP multicast options */ + struct ip6_moptions *inp6_moptions; + /* (i) ICMPv6 code type filter */ + struct icmp6_filter *inp6_icmp6filt; + /* (i) IPV6_CHECKSUM setsockopt */ + int inp6_cksum; + short inp6_hops; + } inp_depend6; + OFP_LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */ + struct inpcbport *inp_phd; /* (i/p) head of this list */ +#define inp_zero_size offsetof(struct inpcb, inp_gencnt) + inp_gen_t inp_gencnt; /* (c) generation count */ + struct llentry *inp_lle; /* cached L2 information */ + struct rtentry *inp_rt; /* cached L3 information */ + struct ofp_rec_rwlock inp_lock; + //int inp_lock_cnt; + //int inp_lock_owner; + //const char *lockedby_file; + //int lockedby_line; + uint64_t dummy; +}; +#define inp_fibnum inp_inc.inc_fibnum +#define inp_altfibnum inp_inc.inc_altfibnum +#define inp_fport inp_inc.inc_fport +#define inp_lport inp_inc.inc_lport +#define inp_faddr inp_inc.inc_faddr +#define inp_laddr inp_inc.inc_laddr +#define inp_ip_tos inp_depend4.inp4_ip_tos +#define inp_options inp_depend4.inp4_options +#define inp_moptions inp_depend4.inp4_moptions + +#define in6p_faddr inp_inc.inc6_faddr +#define in6p_laddr inp_inc.inc6_laddr +#define in6p_hops inp_depend6.inp6_hops /* default hop limit */ +#define in6p_flowinfo inp_flow +#define in6p_options inp_depend6.inp6_options +#define in6p_outputopts inp_depend6.inp6_outputopts +#define in6p_moptions inp_depend6.inp6_moptions +#define in6p_icmp6filt inp_depend6.inp6_icmp6filt +#define in6p_cksum inp_depend6.inp6_cksum + +#define inp_vnet inp_pcbinfo->ipi_vnet + + +struct inpcbport { + OFP_LIST_ENTRY(inpcbport) phd_hash; + struct inpcbhead phd_pcblist; + uint16_t phd_port; +}; + + +#define INP_LOCK_INIT(inp, d, t) ofp_rec_init(&(inp)->inp_lock, __FILE__, __LINE__) +#define INP_LOCK_DESTROY(inp) +#define INP_RLOCK(inp) ofp_rec_rlock(&(inp)->inp_lock, __FILE__, __LINE__) +#define INP_WLOCK(inp) ofp_rec_wlock(&(inp)->inp_lock, __FILE__, __LINE__) +#define INP_TRY_RLOCK(inp) //rw_try_rlock(&(inp)->inp_lock) +#define INP_TRY_WLOCK(inp) ofp_rec_try_wlock(&(inp)->inp_lock, __FILE__, __LINE__) +#define INP_RUNLOCK(inp) ofp_rec_runlock(&(inp)->inp_lock, __FILE__, __LINE__) +#define INP_WUNLOCK(inp) ofp_rec_wunlock(&(inp)->inp_lock, __FILE__, __LINE__) +#define INP_TRY_UPGRADE(inp) //rw_try_upgrade(&(inp)->inp_lock) +#define INP_DOWNGRADE(inp) //rw_downgrade(&(inp)->inp_lock) +#define INP_WLOCKED(inp) //rw_wowned(&(inp)->inp_lock) +#define INP_LOCK_ASSERT(inp) //rw_assert(&(inp)->inp_lock, RA_LOCKED) +#define INP_RLOCK_ASSERT(inp) //rw_assert(&(inp)->inp_lock, RA_RLOCKED) +#define INP_WLOCK_ASSERT(inp) //rw_assert(&(inp)->inp_lock, RA_WLOCKED) +#define INP_UNLOCK_ASSERT(inp) //rw_assert(&(inp)->inp_lock, RA_UNLOCKED) + +/* + * These locking functions are for inpcb consumers outside of sys/netinet, + * more specifically, they were added for the benefit of TOE drivers. The + * macros are reserved for use by the stack. + */ +void inp_wlock(struct inpcb *); +void inp_wunlock(struct inpcb *); +void inp_rlock(struct inpcb *); +void inp_runlock(struct inpcb *); + +#ifdef INVARIANTS +void inp_lock_assert(struct inpcb *); +void inp_unlock_assert(struct inpcb *); +#else +static __inline void +inp_lock_assert(struct inpcb *inp) +{ + (void)inp; +} + +static __inline void +inp_unlock_assert(struct inpcb *inp) +{ + (void)inp; +} + +#endif + +extern const char *ofp_tcbinfo_locked_by_file; +extern int ofp_tcbinfo_locked_by_line; + +void inp_apply_all(void (*func)(struct inpcb *, void *), void *arg); +int inp_ip_tos_get(const struct inpcb *inp); +void inp_ip_tos_set(struct inpcb *inp, int val); +struct socket * + inp_inpcbtosocket(struct inpcb *inp); +struct tcpcb * + inp_inpcbtotcpcb(struct inpcb *inp); +void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, + uint32_t *faddr, uint16_t *fp); + +#define INP_INFO_LOCK_INIT(ipi, d) ofp_rec_init(&(ipi)->ipi_lock, __FILE__, __LINE__) +#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock) +#define INP_INFO_RLOCK(ipi) ofp_rec_rlock(&(ipi)->ipi_lock, __FILE__, __LINE__) +#define INP_INFO_WLOCK(ipi) ofp_rec_wlock(&(ipi)->ipi_lock, __FILE__, __LINE__) +#define INP_INFO_TRY_RLOCK(ipi) ofp_rwlock_try_read_lock(&(ipi)->ipi_lock) +#define INP_INFO_TRY_WLOCK(ipi) ofp_rec_try_wlock(&(ipi)->ipi_lock, __FILE__, __LINE__) +#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock) +#define INP_INFO_RUNLOCK(ipi) ofp_rec_runlock(&(ipi)->ipi_lock, __FILE__, __LINE__) +#define INP_INFO_WUNLOCK(ipi) ofp_rec_wunlock(&(ipi)->ipi_lock, __FILE__, __LINE__) + +#define RA_LOCKED 0x01 +#define RA_RLOCKED 0x02 +#define RA_WLOCKED 0x04 +#define RA_UNLOCKED 0x00 +#define RA_RECURSED 0x08 +#define RA_NOTRECURSED 0x10 + +extern const char *ofp_tcbinfo_locked_by_file; +extern int ofp_tcbinfo_locked_by_line; + +static inline void rw_assert(struct ofp_rec_rwlock *lock, int mode, const char *file, int line) { + int ok = 0; + (void)file; + (void)line; + + switch (mode) { + case RA_LOCKED: ok = lock->lock.cnt.v != 0; break; + case RA_RLOCKED: ok = lock->lock.cnt.v > 0; break; + case RA_WLOCKED: ok = (int32_t)lock->lock.cnt.v < 0; break; + case RA_UNLOCKED: ok = lock->lock.cnt.v == 0; break; + } + + if (!ok) + OFP_LOG_X("file=%s, line=%d, RWLOCK %p WAS NOT %d BUT %d\nLocked by %s:%d\n", + file, line, lock, mode, lock->lock.cnt.v, + ofp_tcbinfo_locked_by_file, ofp_tcbinfo_locked_by_line); +} + +#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED, __FILE__, __LINE__) +#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED, __FILE__, __LINE__) +#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED, __FILE__, __LINE__) +#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED, __FILE__, __LINE__) + +#define INP_HASH_LOCK_INIT(ipi, d) do { printf("%s:%d: hash lock init %p\n",\ + __FILE__,__LINE__,ipi);\ + odp_rwlock_init(&(ipi)->ipi_hash_lock);} while(0) +#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock) + + +#define INP_HASH_RLOCK(ipi) odp_rwlock_read_lock(&(ipi)->ipi_hash_lock) +#define INP_HASH_WLOCK(ipi) odp_rwlock_write_lock(&(ipi)->ipi_hash_lock) +#define INP_HASH_RUNLOCK(ipi) odp_rwlock_read_unlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_WUNLOCK(ipi) odp_rwlock_write_unlock(&(ipi)->ipi_hash_lock) + +#define INP_HASH_LOCK_ASSERT(ipi) //rw_assert(&(ipi)->ipi_hash_lock, RA_LOCKED) +#define INP_HASH_WLOCK_ASSERT(ipi) //rw_assert(&(ipi)->ipi_hash_lock, RA_WLOCKED) + +#define IN_IFADDR_RLOCK() OFP_IFNET_LOCK_READ(ifaddr_list) +#define IN_IFADDR_RUNLOCK() OFP_IFNET_UNLOCK_READ(ifaddr_list) +#define IN_IFADDR_WLOCK() OFP_IFNET_LOCK_WRITE(ifaddr_list) +#define IN_IFADDR_WUNLOCK() OFP_IFNET_UNLOCK_WRITE(ifaddr_list) + +#define IN_IFADDR_LOCK_ASSERT() /*rw_assert(&ofp_ifnet_locks_shm->lock_ifaddr_list_rw, RA_LOCKED)*/ +#define IN_IFADDR_RLOCK_ASSERT() /*rw_assert(&ofp_ifnet_locks_shm->lock_ifaddr_list_rw, RA_RLOCKED)*/ +#define IN_IFADDR_WLOCK_ASSERT() /*rw_assert(&ofp_ifnet_locks_shm->lock_ifaddr_list_rw, RA_WLOCKED)*/ + +#define INP_GROUP_LOCK_INIT(ipg, d) //mtx_init(&(ipg)->ipg_lock, (d), NULL, MTX_DEF | MTX_DUPOK) +#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock) + +#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock) +#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED) +#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock) + +#define INP_PCBHASH(faddr, lport, fport, mask) \ + (((faddr) ^ ((faddr) >> 16) ^ odp_be_to_cpu_16((lport) ^ (fport))) & (mask)) +#define INP_PCBPORTHASH(lport, mask) \ + (odp_be_to_cpu_16((lport)) & (mask)) + +/* + * Flags for inp_vflags -- historically version flags only + */ +#define INP_IPV4 0x1 +#define INP_IPV6 0x2 +#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ + +/* + * Flags for inp_flags. + */ +#define INP_RECVOPTS 0x00000001 /* receive incoming IP options */ +#define INP_RECVRETOPTS 0x00000002 /* receive IP options for reply */ +#define INP_RECVDSTADDR 0x00000004 /* receive IP dst address */ +#define INP_HDRINCL 0x00000008 /* user supplies entire IP header */ +#define INP_HIGHPORT 0x00000010 /* user wants "high" port binding */ +#define INP_LOWPORT 0x00000020 /* user wants "low" port binding */ +#define INP_ANONPORT 0x00000040 /* port chosen for user */ +#define INP_RECVIF 0x00000080 /* receive incoming interface */ +#define INP_MTUDISC 0x00000100 /* user can do MTU discovery */ +#define INP_FAITH 0x00000200 /* accept FAITH'ed connections */ +#define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */ +#define INP_DONTFRAG 0x00000800 /* don't fragment packet */ +#define INP_BINDANY 0x00001000 /* allow bind to any address */ +#define INP_INHASHLIST 0x00002000 /* ofp_in_pcbinshash() has been called */ +#define INP_RECVTOS 0x00004000 /* receive incoming IP TOS */ +#define IN6P_IPV6_V6ONLY 0x00008000 /* restrict OFP_AF_INET6 socket for v6 */ +#define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */ +#define IN6P_HOPLIMIT 0x00020000 /* receive hoplimit */ +#define IN6P_HOPOPTS 0x00040000 /* receive hop-by-hop options */ +#define IN6P_DSTOPTS 0x00080000 /* receive dst options after rthdr */ +#define IN6P_RTHDR 0x00100000 /* receive routing header */ +#define IN6P_RTHDRDSTOPTS 0x00200000 /* receive dstoptions before rthdr */ +#define IN6P_TCLASS 0x00400000 /* receive traffic class value */ +#define IN6P_AUTOFLOWLABEL 0x00800000 /* attach flowlabel automatically */ +#define INP_TIMEWAIT 0x01000000 /* in TIMEWAIT, ppcb is tcptw */ +#define INP_ONESBCAST 0x02000000 /* send all-ones broadcast */ +#define INP_DROPPED 0x04000000 /* protocol drop flag */ +#define INP_SOCKREF 0x08000000 /* strong socket reference */ +#define INP_SW_FLOWID 0x10000000 /* software generated flow id */ +#define INP_HW_FLOWID 0x20000000 /* hardware generated flow id */ +#define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */ +#define IN6P_MTU 0x80000000 /* receive path MTU */ + +#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ + INP_RECVIF|INP_RECVTTL|INP_RECVTOS|\ + IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ + IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ + IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ + IN6P_MTU) + +/* + * Flags for inp_flags2. + */ +#define INP_LLE_VALID 0x00000001 /* cached lle is valid */ +#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */ +#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */ +#define INP_REUSEPORT 0x00000008 /* OFP_SO_REUSEPORT option is set */ +#define INP_PASSIVE 0x00000010 /* passive inet mode enabled */ +#define INP_PROMISC 0x00000020 /* promiscuous inet mode enabled */ +#define INP_SYNFILTER 0x00000040 /* a SYN filter has been attached */ + +/* + * Flags passed to ofp_in_pcblookup*() functions. + */ +#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */ +#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */ +#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */ + +#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \ + INPLOOKUP_WLOCKPCB) + +#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) +#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ + +#define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family + +#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) + +/* + * Constants for pcbinfo.ipi_hashfields. + */ +#define IPI_HASHFIELDS_NONE 0 +#define IPI_HASHFIELDS_2TUPLE 1 +#define IPI_HASHFIELDS_4TUPLE 2 + +#define VNET_DEFINE(t, n) t n + +VNET_DECLARE(int, ofp_ipport_reservedhigh); +VNET_DECLARE(int, ofp_ipport_reservedlow); +VNET_DECLARE(int, ofp_ipport_lowfirstauto); +VNET_DECLARE(int, ofp_ipport_lowlastauto); +VNET_DECLARE(int, ofp_ipport_firstauto); +VNET_DECLARE(int, ofp_ipport_lastauto); +VNET_DECLARE(int, ofp_ipport_hifirstauto); +VNET_DECLARE(int, ofp_ipport_hilastauto); +VNET_DECLARE(int, ofp_ipport_randomized); +VNET_DECLARE(int, ofp_ipport_randomcps); +VNET_DECLARE(int, ofp_ipport_randomtime); +VNET_DECLARE(int, ofp_ipport_stoprandom); +VNET_DECLARE(int, ofp_ipport_tcpallocs); + +#define V_ipport_reservedhigh VNET(ofp_ipport_reservedhigh) +#define V_ipport_reservedlow VNET(ofp_ipport_reservedlow) +#define V_ipport_lowfirstauto VNET(ofp_ipport_lowfirstauto) +#define V_ipport_lowlastauto VNET(ofp_ipport_lowlastauto) +#define V_ipport_firstauto VNET(ofp_ipport_firstauto) +#define V_ipport_lastauto VNET(ofp_ipport_lastauto) +#define V_ipport_hifirstauto VNET(ofp_ipport_hifirstauto) +#define V_ipport_hilastauto VNET(ofp_ipport_hilastauto) +#define V_ipport_randomized VNET(ofp_ipport_randomized) +#define V_ipport_randomcps VNET(ofp_ipport_randomcps) +#define V_ipport_randomtime VNET(ofp_ipport_randomtime) +#define V_ipport_stoprandom VNET(ofp_ipport_stoprandom) +#define V_ipport_tcpallocs VNET(ofp_ipport_tcpallocs) + +void ofp_in_pcbinfo_destroy(struct inpcbinfo *); +void ofp_in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, + int, int, const char *, uma_init, uma_fini, uint32_t, uint32_t); + +void ofp_in_pcbinfo_hashstats(struct inpcbinfo *pcbinfo, unsigned int *min, + unsigned int *avg, unsigned int *max); + +struct inpcbgroup * + in_pcbgroup_byhash(struct inpcbinfo *, uint32_t, uint32_t); +struct inpcbgroup * + in_pcbgroup_byinpcb(struct inpcb *); +struct inpcbgroup * + in_pcbgroup_bytuple(struct inpcbinfo *, struct ofp_in_addr, uint16_t, + struct ofp_in_addr, uint16_t); +void in_pcbgroup_destroy(struct inpcbinfo *); +int in_pcbgroup_enabled(struct inpcbinfo *); +void in_pcbgroup_init(struct inpcbinfo *, uint32_t, int); +void in_pcbgroup_remove(struct inpcb *); +void in_pcbgroup_update(struct inpcb *); +void in_pcbgroup_update_mbuf(struct inpcb *, odp_packet_t ); + +void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); +int ofp_in_pcballoc(struct socket *, struct inpcbinfo *); +int ofp_in_pcbbind(struct inpcb *, struct ofp_sockaddr *, struct ofp_ucred *); +int ofp_in_pcb_lport(struct inpcb *, struct ofp_in_addr *, uint16_t *, + struct ofp_ucred *, int); +int ofp_in_pcbbind_setup(struct inpcb *, struct ofp_sockaddr *, ofp_in_addr_t *, + uint16_t *, struct ofp_ucred *); +int ofp_in_pcbconnect(struct inpcb *, struct ofp_sockaddr *, struct ofp_ucred *); +int ofp_in_pcbconnect_mbuf(struct inpcb *, struct ofp_sockaddr *, struct ofp_ucred *, + odp_packet_t ); +int ofp_in_pcbconnect_setup(struct inpcb *, struct ofp_sockaddr *, ofp_in_addr_t *, + uint16_t *, ofp_in_addr_t *, uint16_t *, struct inpcb **, + struct ofp_ucred *); +void ofp_in_pcbdetach(struct inpcb *); +void ofp_in_pcbdisconnect(struct inpcb *); +void ofp_in_pcbdrop(struct inpcb *); +void ofp_in_pcbfree(struct inpcb *); +int ofp_in_pcbinshash(struct inpcb *); +int ofp_in_pcbinshash_nopcbgroup(struct inpcb *); +struct inpcb * + ofp_in_pcblookup_local(struct inpcbinfo *, + struct ofp_in_addr, uint16_t, int, struct ofp_ucred *); +struct inpcb * + ofp_in_pcblookup(struct inpcbinfo *, struct ofp_in_addr, uint32_t, + struct ofp_in_addr, uint32_t, int, struct ofp_ifnet *); +struct inpcb * +ofp_in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct ofp_in_addr faddr, + uint32_t fport, struct ofp_in_addr laddr, uint32_t lport, + int lookupflags, struct ofp_ifnet *ifp, odp_packet_t m); + +void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct ofp_in_addr, + int, struct inpcb *(*)(struct inpcb *, int)); +void ofp_in_pcbref(struct inpcb *); +void ofp_in_pcbrehash(struct inpcb *); +void ofp_in_pcbrehash_mbuf(struct inpcb *, odp_packet_t ); +int in_pcbrele(struct inpcb *); +int ofp_in_pcbrele_rlocked(struct inpcb *); +int ofp_in_pcbrele_wlocked(struct inpcb *); +void in_pcbsetsolabel(struct socket *so); +int ofp_in_getpeeraddr(struct socket *so, struct ofp_sockaddr **nam); +int ofp_in_getsockaddr(struct socket *so, struct ofp_sockaddr **nam); +struct ofp_sockaddr * + ofp_in_sockaddr(ofp_in_port_t port, struct ofp_in_addr *addr); +void ofp_in_pcbsosetlabel(struct socket *so); + +#endif /* !_NETINET_IN_PCB_H_ */ diff --git a/include/ofpi_inet.h b/include/ofpi_inet.h new file mode 100644 index 00000000..93b1ec59 --- /dev/null +++ b/include/ofpi_inet.h @@ -0,0 +1,13 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFPI_INET_H__ +#define __OFPI_INET_H__ + +int ofp_inet_init(void); + +#endif /* __OFPI_INET_H__ */ diff --git a/include/ofpi_ioctl.h b/include/ofpi_ioctl.h new file mode 100644 index 00000000..7e9aa6f4 --- /dev/null +++ b/include/ofpi_ioctl.h @@ -0,0 +1,22 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_IOCTL_H_ +#define _OFPI_IOCTL_H_ + +#include "api/ofp_ioctl.h" + +extern odp_rwlock_t ofp_in_ifaddr_lock; +struct thread; +struct ofp_ucred; + +int ofp_soo_ioctl(struct socket *so, uint32_t cmd, void *data, + struct ofp_ucred *active_cred, struct thread *td); +int ofp_in_control(struct socket *so, uint32_t cmd, char * data, struct ofp_ifnet *ifp, + struct thread *td); + +#endif diff --git a/include/ofpi_ip.h b/include/ofpi_ip.h new file mode 100644 index 00000000..6492a922 --- /dev/null +++ b/include/ofpi_ip.h @@ -0,0 +1,12 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_IP_H_ +#define _OFPI_IP_H_ + +#include "api/ofp_ip.h" + +#endif diff --git a/include/ofpi_ip6.h b/include/ofpi_ip6.h new file mode 100644 index 00000000..789168b9 --- /dev/null +++ b/include/ofpi_ip6.h @@ -0,0 +1,12 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_IP6_H_ +#define _OFPI_IP6_H_ + +#include "api/ofp_ip6.h" + +#endif diff --git a/include/ofpi_ip6_var.h b/include/ofpi_ip6_var.h new file mode 100644 index 00000000..92312b75 --- /dev/null +++ b/include/ofpi_ip6_var.h @@ -0,0 +1,478 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_var.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet6/ip6_var.h 238233 2012-07-08 11:42:21Z bz $ + */ + +#ifndef _NETINET6_IP6_VAR_H_ +#define _NETINET6_IP6_VAR_H_ + +#include +#include "ofpi_in_pcb.h" +#include "ofpi_in6.h" +#include "ofpi_vnet.h" +#include "ofpi_systm.h" + +VNET_DECLARE(int, ip6_v6only); +#define V_ip6_v6only VNET(ip6_v6only) + +VNET_DECLARE(int, ip6_auto_flowlabel); +#define V_ip6_auto_flowlabel VNET(ip6_auto_flowlabel) + +VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope + * zone when unspecified */ +#define V_ip6_use_defzone VNET(ip6_use_defzone) + +VNET_DECLARE(int, ip6_defhlim); /* default hop limit */ + +#define V_ip6_defhlim VNET(ip6_defhlim) + +#if 0 +/* + * IP6 reassembly queue structure. Each fragment + * being reassembled is attached to one of these structures. + */ +struct ip6q { + struct ip6asfrag *ip6q_down; + struct ip6asfrag *ip6q_up; + u_int32_t ip6q_ident; + u_int8_t ip6q_nxt; + u_int8_t ip6q_ecn; + u_int8_t ip6q_ttl; + struct in6_addr ip6q_src, ip6q_dst; + struct ip6q *ip6q_next; + struct ip6q *ip6q_prev; + int ip6q_unfrglen; /* len of unfragmentable part */ +#ifdef notyet + u_char *ip6q_nxtp; +#endif + int ip6q_nfrag; /* # of fragments */ + struct label *ip6q_label; +}; + +struct ip6asfrag { + struct ip6asfrag *ip6af_down; + struct ip6asfrag *ip6af_up; + struct mbuf *ip6af_m; + int ip6af_offset; /* offset in ip6af_m to next header */ + int ip6af_frglen; /* fragmentable part length */ + int ip6af_off; /* fragment offset */ + u_int16_t ip6af_mff; /* more fragment bit in frag off */ +}; + +#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) + +/* + * Structure attached to inpcb.in6p_moptions and + * passed to ip6_output when IPv6 multicast options are in use. + * This structure is lazy-allocated. + */ +struct ip6_moptions { + struct ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */ + u_char im6o_multicast_hlim; /* hoplimit for outgoing multicasts */ + u_char im6o_multicast_loop; /* 1 >= hear sends if a member */ + u_short im6o_num_memberships; /* no. memberships this socket */ + u_short im6o_max_memberships; /* max memberships this socket */ + struct in6_multi **im6o_membership; /* group memberships */ + struct in6_mfilter *im6o_mfilters; /* source filters */ +}; + +/* + * Control options for outgoing packets + */ + +/* Routing header related info */ +struct ip6po_rhinfo { + struct ip6_rthdr *ip6po_rhi_rthdr; /* Routing header */ + struct route_in6 ip6po_rhi_route; /* Route to the 1st hop */ +}; +#define ip6po_rthdr ip6po_rhinfo.ip6po_rhi_rthdr +#define ip6po_route ip6po_rhinfo.ip6po_rhi_route + +/* Nexthop related info */ +struct ip6po_nhinfo { + struct sockaddr *ip6po_nhi_nexthop; + struct route_in6 ip6po_nhi_route; /* Route to the nexthop */ +}; +#define ip6po_nexthop ip6po_nhinfo.ip6po_nhi_nexthop +#define ip6po_nextroute ip6po_nhinfo.ip6po_nhi_route + +struct ip6_pktopts { + struct mbuf *ip6po_m; /* Pointer to mbuf storing the data */ + int ip6po_hlim; /* Hoplimit for outgoing packets */ + + /* Outgoing IF/address information */ + struct in6_pktinfo *ip6po_pktinfo; + + /* Next-hop address information */ + struct ip6po_nhinfo ip6po_nhinfo; + + struct ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */ + + /* Destination options header (before a routing header) */ + struct ip6_dest *ip6po_dest1; + + /* Routing header related info. */ + struct ip6po_rhinfo ip6po_rhinfo; + + /* Destination options header (after a routing header) */ + struct ip6_dest *ip6po_dest2; + + int ip6po_tclass; /* traffic class */ + + int ip6po_minmtu; /* fragment vs PMTU discovery policy */ +#define IP6PO_MINMTU_MCASTONLY -1 /* default; send at min MTU for multicast*/ +#define IP6PO_MINMTU_DISABLE 0 /* always perform pmtu disc */ +#define IP6PO_MINMTU_ALL 1 /* always send at min MTU */ + + int ip6po_prefer_tempaddr; /* whether temporary addresses are + preferred as source address */ +#define IP6PO_TEMPADDR_SYSTEM -1 /* follow the system default */ +#define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */ +#define IP6PO_TEMPADDR_PREFER 1 /* prefer temporary address */ + + int ip6po_flags; +#if 0 /* parameters in this block is obsolete. do not reuse the values. */ +#define IP6PO_REACHCONF 0x01 /* upper-layer reachability confirmation. */ +#define IP6PO_MINMTU 0x02 /* use minimum MTU (IPV6_USE_MIN_MTU) */ +#endif +#define IP6PO_DONTFRAG 0x04 /* disable fragmentation (IPV6_DONTFRAG) */ +#define IP6PO_USECOA 0x08 /* use care of address */ +}; + +/* + * Control options for incoming packets + */ + +struct ip6stat { + u_quad_t ip6s_total; /* total packets received */ + u_quad_t ip6s_tooshort; /* packet too short */ + u_quad_t ip6s_toosmall; /* not enough data */ + u_quad_t ip6s_fragments; /* fragments received */ + u_quad_t ip6s_fragdropped; /* frags dropped(dups, out of space) */ + u_quad_t ip6s_fragtimeout; /* fragments timed out */ + u_quad_t ip6s_fragoverflow; /* fragments that exceeded limit */ + u_quad_t ip6s_forward; /* packets forwarded */ + u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */ + u_quad_t ip6s_redirectsent; /* packets forwarded on same net */ + u_quad_t ip6s_delivered; /* datagrams delivered to upper level*/ + u_quad_t ip6s_localout; /* total ip packets generated here */ + u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */ + u_quad_t ip6s_reassembled; /* total packets reassembled ok */ + u_quad_t ip6s_fragmented; /* datagrams successfully fragmented */ + u_quad_t ip6s_ofragments; /* output fragments created */ + u_quad_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ + u_quad_t ip6s_badoptions; /* error in option processing */ + u_quad_t ip6s_noroute; /* packets discarded due to no route */ + u_quad_t ip6s_badvers; /* ip6 version != 6 */ + u_quad_t ip6s_rawout; /* total raw ip packets generated */ + u_quad_t ip6s_badscope; /* scope error */ + u_quad_t ip6s_notmember; /* don't join this multicast group */ + u_quad_t ip6s_nxthist[256]; /* next header history */ + u_quad_t ip6s_m1; /* one mbuf */ + u_quad_t ip6s_m2m[32]; /* two or more mbuf */ + u_quad_t ip6s_mext1; /* one ext mbuf */ + u_quad_t ip6s_mext2m; /* two or more ext mbuf */ + u_quad_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */ + u_quad_t ip6s_nogif; /* no match gif found */ + u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */ + + /* + * statistics for improvement of the source address selection + * algorithm: + * XXX: hardcoded 16 = # of ip6 multicast scope types + 1 + */ + /* number of times that address selection fails */ + u_quad_t ip6s_sources_none; + /* number of times that an address on the outgoing I/F is chosen */ + u_quad_t ip6s_sources_sameif[16]; + /* number of times that an address on a non-outgoing I/F is chosen */ + u_quad_t ip6s_sources_otherif[16]; + /* + * number of times that an address that has the same scope + * from the destination is chosen. + */ + u_quad_t ip6s_sources_samescope[16]; + /* + * number of times that an address that has a different scope + * from the destination is chosen. + */ + u_quad_t ip6s_sources_otherscope[16]; + /* number of times that a deprecated address is chosen */ + u_quad_t ip6s_sources_deprecated[16]; + + /* number of times that each rule of source selection is applied. */ + u_quad_t ip6s_sources_rule[16]; +}; + +#ifdef _KERNEL +#define IP6STAT_ADD(name, val) V_ip6stat.name += (val) +#define IP6STAT_SUB(name, val) V_ip6stat.name -= (val) +#define IP6STAT_INC(name) IP6STAT_ADD(name, 1) +#define IP6STAT_DEC(name) IP6STAT_SUB(name, 1) +#endif + +#ifdef _KERNEL +/* + * IPv6 onion peeling state. + * it will be initialized when we come into ip6_input(). + * XXX do not make it a kitchen sink! + */ +struct ip6aux { + u_int32_t ip6a_flags; +#define IP6A_SWAP 0x01 /* swapped home/care-of on packet */ +#define IP6A_HASEEN 0x02 /* HA was present */ +#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */ +#define IP6A_RTALERTSEEN 0x08 /* rtalert present */ + + /* ip6.ip6_src */ + struct in6_addr ip6a_careof; /* care-of address of the peer */ + struct in6_addr ip6a_home; /* home address of the peer */ + u_int16_t ip6a_bruid; /* BR unique identifier */ + + /* ip6.ip6_dst */ + struct in6_ifaddr *ip6a_dstia6; /* my ifaddr that matches ip6_dst */ + + /* rtalert */ + u_int16_t ip6a_rtalert; /* rtalert option value */ + + /* + * decapsulation history will be here. + * with IPsec it may not be accurate. + */ +}; +#endif + +#ifdef _KERNEL +/* flags passed to ip6_output as last parameter */ +#define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */ +#define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */ +#define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */ + +#ifdef __NO_STRICT_ALIGNMENT +#define IP6_HDR_ALIGNED_P(ip) 1 +#else +#define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0) +#endif + +VNET_DECLARE(struct ip6stat, ip6stat); /* statistics */ +VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */ +VNET_DECLARE(int, ip6_forwarding); /* act as router? */ +VNET_DECLARE(int, ip6_use_deprecated); /* allow deprecated addr as source */ +VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix + * walk list every 5 sec. */ +VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */ +#define V_ip6stat VNET(ip6stat) +#define V_ip6_defmcasthlim VNET(ip6_defmcasthlim) +#define V_ip6_forwarding VNET(ip6_forwarding) +#define V_ip6_use_deprecated VNET(ip6_use_deprecated) +#define V_ip6_rr_prune VNET(ip6_rr_prune) +#define V_ip6_mcast_pmtu VNET(ip6_mcast_pmtu) + +VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */ +VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */ +VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly + * queue */ +VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly + * queue */ +VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */ +VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */ +VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA + * receiving IF. */ +VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when + forwarding enabled */ +VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */ +VNET_DECLARE(int, ip6_log_interval); +VNET_DECLARE(time_t, ip6_log_time); +VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension + * headers */ +VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ +#define V_ip6_mrouter VNET(ip6_mrouter) +#define V_ip6_sendredirects VNET(ip6_sendredirects) +#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) +#define V_ip6_maxfrags VNET(ip6_maxfrags) +#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv) +#define V_ip6_no_radr VNET(ip6_no_radr) +#define V_ip6_norbit_raif VNET(ip6_norbit_raif) +#define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3) +#define V_ip6_keepfaith VNET(ip6_keepfaith) +#define V_ip6_log_interval VNET(ip6_log_interval) +#define V_ip6_log_time VNET(ip6_log_time) +#define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit) +#define V_ip6_dad_count VNET(ip6_dad_count) + +VNET_DECLARE(int, ip6_auto_linklocal); +#define V_ip6_auto_linklocal VNET(ip6_auto_linklocal) + +VNET_DECLARE(int, ip6_use_tempaddr); /* Whether to use temporary addresses */ +VNET_DECLARE(int, ip6_prefer_tempaddr); /* Whether to prefer temporary + * addresses in the source address + * selection */ +#define V_ip6_use_tempaddr VNET(ip6_use_tempaddr) +#define V_ip6_prefer_tempaddr VNET(ip6_prefer_tempaddr) + +VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ +#define V_inet6_pfil_hook VNET(inet6_pfil_hook) +#ifdef IPSTEALTH +VNET_DECLARE(int, ip6stealth); +#define V_ip6stealth VNET(ip6stealth) +#endif + +extern struct pr_usrreqs rip6_usrreqs; +struct sockopt; + +struct inpcb; + +int icmp6_ctloutput __P((struct socket *, struct sockopt *sopt)); + +struct in6_ifaddr; +#endif /*_KERNEL*/ +#endif /*0 */ + +void ofp_ip6_init (void); +#ifdef VIMAGE +void ofp_ip6_destroy (void); +#endif + +int ofp_ip6_input(odp_packet_t, int *, int *); +int ofp_ip6_none_input(odp_packet_t, int *, int *); + +#if 0 +#ifdef _KERNEL +int ip6proto_register(short); +int ip6proto_unregister(short); + +void ip6_input __P((struct mbuf *)); +struct in6_ifaddr *ip6_getdstifaddr __P((struct mbuf *)); +void ip6_freepcbopts __P((struct ip6_pktopts *)); + +int ip6_unknown_opt __P((u_int8_t *, struct mbuf *, int)); +char * ip6_get_prevhdr __P((struct mbuf *, int)); +int ip6_nexthdr __P((struct mbuf *, int, int, int *)); +int ip6_lasthdr __P((struct mbuf *, int, int, int *)); + +#ifdef __notyet__ +struct ip6aux *ip6_findaux __P((struct mbuf *)); +#endif + +extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, + struct mbuf *); + +int ip6_process_hopopts __P((struct mbuf *, u_int8_t *, int, u_int32_t *, + u_int32_t *)); +struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *, + struct mbuf **, int *); +void ip6_savecontrol __P((struct inpcb *, struct mbuf *, struct mbuf **)); +void ip6_notify_pmtu __P((struct inpcb *, struct sockaddr_in6 *, + u_int32_t *)); +int ip6_sysctl __P((int *, u_int, void *, size_t *, void *, size_t)); + +void ip6_forward __P((struct mbuf *, int)); + +void ip6_mloopback __P((struct ifnet *, struct mbuf *, struct sockaddr_in6 *)); +int ip6_output __P((struct mbuf *, struct ip6_pktopts *, + struct route_in6 *, + int, + struct ip6_moptions *, struct ifnet **, + struct inpcb *)); +int ip6_ctloutput __P((struct socket *, struct sockopt *)); +int ip6_raw_ctloutput __P((struct socket *, struct sockopt *)); +void ip6_initpktopts __P((struct ip6_pktopts *)); +int ip6_setpktopts __P((struct mbuf *, struct ip6_pktopts *, + struct ip6_pktopts *, struct ofp_ucred *, int)); +void ip6_clearpktopts __P((struct ip6_pktopts *, int)); +struct ip6_pktopts *ip6_copypktopts __P((struct ip6_pktopts *, int)); +int ip6_optlen __P((struct inpcb *)); + +int route6_input __P((struct mbuf **, int *, int)); + +void frag6_init __P((void)); +int frag6_input __P((struct mbuf **, int *, int)); +void frag6_slowtimo __P((void)); +void frag6_drain __P((void)); + +void rip6_init __P((void)); +int rip6_input __P((struct mbuf **, int *, int)); +void rip6_ctlinput __P((int, struct sockaddr *, void *)); +int rip6_ctloutput __P((struct socket *, struct sockopt *)); +int rip6_output __P((struct mbuf *, ...)); +int rip6_usrreq __P((struct socket *, + int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *)); + +int dest6_input __P((struct mbuf **, int *, int)); +int none_input __P((struct mbuf **, int *, int)); + +int in6_selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, + struct ip6_moptions *, struct route_in6 *, struct ifnet **, + struct rtentry **)); +int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *, + struct ip6_moptions *, struct route_in6 *, struct ifnet **, + struct rtentry **, u_int); +#endif /* _KERNEL */ +#endif + +int ofp_in6_selectsrc(struct ofp_sockaddr_in6 *, void *, + struct inpcb *inp, void *, struct ofp_ucred *cred, + struct ofp_ifnet **, struct ofp_in6_addr *); + +uint32_t ofp_ip6_randomid __P((void)); +uint32_t ofp_ip6_randomflowlabel __P((void)); + +#endif /* !_NETINET6_IP6_VAR_H_ */ diff --git a/include/ofpi_ip6protosw.h b/include/ofpi_ip6protosw.h new file mode 100644 index 00000000..ee0a7607 --- /dev/null +++ b/include/ofpi_ip6protosw.h @@ -0,0 +1,152 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6protosw.h,v 1.25 2001/09/26 06:13:03 keiichi Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)protosw.h 8.1 (Berkeley) 6/2/93 + * BSDI protosw.h,v 2.3 1996/10/11 16:02:40 pjd Exp + * $FreeBSD: release/9.1.0/sys/netinet6/ip6protosw.h 193731 2009-06-08 17:15:40Z zec $ + */ + +#ifndef _NETINET6_IP6PROTOSW_H_ +#define _NETINET6_IP6PROTOSW_H_ + +#include "odp.h" +#include "ofpi_domain.h" + +/* + * Protocol switch table for IPv6. + * All other definitions should refer to sys/protosw.h + */ + +struct mbuf; +struct sockaddr; +struct socket; +struct domain; +struct thread; +struct ip6_hdr; +struct icmp6_hdr; +struct in6_addr; +struct pr_usrreqs; + +/* + * argument type for the last arg of pr_ctlinput(). + * should be consulted only with AF_INET6 family. + * + * IPv6 ICMP IPv6 [exthdrs] finalhdr payload + * ^ ^ ^ ^ + * | | ip6c_ip6 ip6c_off + * | ip6c_icmp6 + * ip6c_m + * + * ip6c_finaldst usually points to ip6c_ip6->ip6_dst. if the original + * (internal) packet carries a routing header, it may point the final + * dstination address in the routing header. + * + * ip6c_src: ip6c_ip6->ip6_src + scope info + flowlabel in ip6c_ip6 + * (beware of flowlabel, if you try to compare it against others) + * ip6c_dst: ip6c_finaldst + scope info + */ +#if 0 +struct ip6ctlparam { + struct mbuf *ip6c_m; /* start of mbuf chain */ + struct icmp6_hdr *ip6c_icmp6; /* icmp6 header of target packet */ + struct ip6_hdr *ip6c_ip6; /* ip6 header of target packet */ + int ip6c_off; /* offset of the target proto header */ + struct sockaddr_in6 *ip6c_src; /* srcaddr w/ additional info */ + struct sockaddr_in6 *ip6c_dst; /* (final) dstaddr w/ additional info */ + struct in6_addr *ip6c_finaldst; /* final destination address */ + void *ip6c_cmdarg; /* control command dependent data */ + u_int8_t ip6c_nxt; /* final next header field */ +}; +#endif + +struct ip6protosw { + short pr_type; /* socket type used for */ + struct domain *pr_domain; /* domain protocol a member of */ + short pr_protocol; /* protocol number */ + short pr_flags; /* see below */ + +/* protocol-protocol hooks */ + int (*pr_input) /* input to protocol (from below) */ + __P((odp_packet_t, int *, int *)); + int (*pr_output) /* output to protocol (from above) */ + __P((odp_packet_t, ...)); + void (*pr_ctlinput) /* control input (from below) */ + __P((int, struct ofp_sockaddr *, void *)); + int (*pr_ctloutput) /* control output (from above) */ + __P((struct socket *, struct sockopt *)); + +/* utility hooks */ + void (*pr_init) /* initialization hook */ + __P((void)); + void (*pr_destroy) /* cleanup hook */ + __P((void)); + + void (*pr_fasttimo) /* fast timeout (200ms) */ + __P((void)); + void (*pr_slowtimo) /* slow timeout (500ms) */ + __P((void)); + void (*pr_drain) /* flush any excess space possible */ + __P((void)); + struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ +}; + +extern struct ip6protosw ofp_inet6sw[]; +extern struct domain ofp_inet6domain; + +#endif /* !_NETINET6_IP6PROTOSW_H_ */ diff --git a/include/ofpi_ip_var.h b/include/ofpi_ip_var.h new file mode 100644 index 00000000..dac2af15 --- /dev/null +++ b/include/ofpi_ip_var.h @@ -0,0 +1,226 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_var.h 8.2 (Berkeley) 1/9/95 + * $FreeBSD: release/9.1.0/sys/netinet/ip_var.h 223666 2011-06-29 10:06:58Z ae $ + */ + +#ifndef _OFPI_IP_VAR_H_ +#define _OFPI_IP_VAR_H_ + +#include "ofpi_queue.h" +#include "ofpi_socket.h" +#include "ofpi_vnet.h" + +#include "api/ofp_ip_var.h" + +/* + * In-kernel consumers can use these accessor macros directly to update + * stats. + */ +#define IPSTAT_ADD(name, val) V_ipstat.name += (val) +#define IPSTAT_SUB(name, val) V_ipstat.name -= (val) +#define IPSTAT_INC(name) IPSTAT_ADD(name, 1) +#define IPSTAT_DEC(name) IPSTAT_SUB(name, 1) + +/* + * Kernel module consumers must use this accessor macro. + */ +void kmod_ipstat_inc(int statnum); +#define KMOD_IPSTAT_INC(name) \ + kmod_ipstat_inc(offsetof(struct ofp_ipstat, name) / sizeof(uint64_t)) +void kmod_ipstat_dec(int statnum); +#define KMOD_IPSTAT_DEC(name) \ + kmod_ipstat_dec(offsetof(struct ofp_ipstat, name) / sizeof(uint64_t)) + +/* flags passed to ip_output as last parameter */ +#define IP_FORWARDING 0x1 /* most of ip header exists */ +#define IP_RAWOUTPUT 0x2 /* raw ip header exists */ +#define IP_SENDONES 0x4 /* send all-ones broadcast */ +#define IP_SENDTOIF 0x8 /* send on specific ifnet */ +#define IP_ROUTETOIF OFP_SO_DONTROUTE /* 0x10 bypass routing tables */ +#define IP_ALLOWBROADCAST OFP_SO_BROADCAST /* 0x20 can send broadcast packets */ + +/* + * mbuf flag used by ip_fastfwd + */ +#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */ + +#ifdef __NO_STRICT_ALIGNMENT +#define IP_HDR_ALIGNED_P(ip) 1 +#else +#define IP_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0) +#endif + +struct ofp_ip; +struct inpcb; +struct route; +struct sockopt; + +VNET_DECLARE(struct ofp_ipstat, ofp_ipstat); +VNET_DECLARE(uint16_t, ofp_ip_id); /* ip packet ctr, for ids */ +VNET_DECLARE(int, ofp_ip_defttl); /* default IP ttl */ +VNET_DECLARE(int, ofp_ipforwarding); /* ip forwarding */ +#ifdef IPSTEALTH +VNET_DECLARE(int, ipstealth); /* stealth forwarding */ +#endif +extern uint8_t ofp_ip_protox[]; +extern uint8_t ofp_ip_protox_udp; +extern uint8_t ofp_ip_protox_tcp; +extern uint8_t ofp_ip_protox_gre; +VNET_DECLARE(struct socket *, ofp_ip_rsvpd); /* reservation protocol daemon*/ +VNET_DECLARE(struct socket *, ofp_ip_mrouter); /* multicast routing daemon */ +extern int (*legal_vif_num)(int); +extern uint64_t (*ip_mcast_src)(int); +VNET_DECLARE(int, ofp_rsvp_on); +extern struct pr_usrreqs rip_usrreqs; + +#define V_ipstat VNET(ofp_ipstat) +#define V_ip_id VNET(ofp_ip_id) +#define V_ip_defttl VNET(ofp_ip_defttl) +#define V_ipforwarding VNET(ofp_ipforwarding) +#ifdef IPSTEALTH +#define V_ipstealth VNET(ipstealth) +#endif +#define V_ip_rsvpd VNET(ofp_ip_rsvpd) +#define V_ip_mrouter VNET(ofp_ip_mrouter) +#define V_rsvp_on VNET(ofp_rsvp_on) + +void inp_freemoptions(struct ip_moptions *); +int inp_getmoptions(struct inpcb *, struct sockopt *); +int inp_setmoptions(struct inpcb *, struct sockopt *); + +int ip_ctloutput(struct socket *, struct sockopt *sopt); +void ip_drain(void); +int ip_fragment(struct ofp_ip *ip, odp_packet_t *m_frag, int mtu, + uint64_t if_hwassist_flags, int sw_csum); +void ip_forward(odp_packet_t m, int srcrt); + +void ofp_ip_init(void); +#ifdef VIMAGE +void ofp_ip_destroy(void); +#endif +int ofp_ip_input(odp_packet_t , int); + +extern int + (*ip_mforward)(struct ofp_ip *, struct ifnet *, odp_packet_t , + struct ip_moptions *); +int ip_output(odp_packet_t , + odp_packet_t , struct route *, int, struct ip_moptions *, + struct inpcb *); +int ipproto_register(short); +int ipproto_unregister(short); +odp_packet_t + ip_reass(odp_packet_t ); +struct ofp_ifnet * + ip_rtaddr(struct ofp_in_addr, uint32_t fibnum); +void ip_savecontrol(struct inpcb *, odp_packet_t *, struct ofp_ip *, + odp_packet_t ); +void ip_slowtimo(void); +u_int16_t ip_randomid(void); +int rip_ctloutput(struct socket *, struct sockopt *); +void rip_ctlinput(int, struct ofp_sockaddr *, void *); +void rip_init(void); +void rip_input(odp_packet_t , int); +int rip_output(odp_packet_t , struct socket *, uint64_t); +void ipip_input(odp_packet_t , int); +void rsvp_input(odp_packet_t , int); +int ip_rsvp_init(struct socket *); +int ip_rsvp_done(void); +extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *); +extern void (*ip_rsvp_force_done)(struct socket *); +extern void (*rsvp_input_p)(odp_packet_t m, int off); + +#if 0 +VNET_DECLARE(struct pfil_head, inet_pfil_hook); /* packet filter hooks */ +#define V_inet_pfil_hook VNET(inet_pfil_hook) +#endif + +void in_delayed_cksum(odp_packet_t m); + +/* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */ +/* + * Reference to an ipfw or packet filter rule that can be carried + * outside critical sections. + * A rule is identified by rulenum:rule_id which is ordered. + * In version chain_id the rule can be found in slot 'slot', so + * we don't need a lookup if chain_id == chain->id. + * + * On exit from the firewall this structure refers to the rule after + * the matching one (slot points to the new rule; rulenum:rule_id-1 + * is the matching rule), and additional info (e.g. info often contains + * the insn argument or tablearg in the low 16 bits, in host format). + * On entry, the structure is valid if slot>0, and refers to the starting + * rules. 'info' contains the reason for reinject, e.g. divert port, + * divert direction, and so on. + */ +struct ipfw_rule_ref { + uint32_t slot; /* slot for matching rule */ + uint32_t rulenum; /* matching rule number */ + uint32_t rule_id; /* matching rule id */ + uint32_t chain_id; /* ruleset id */ + uint32_t info; /* see below */ +}; + +enum { + IPFW_INFO_MASK = 0x0000ffff, + IPFW_INFO_OUT = 0x00000000, /* outgoing, just for convenience */ + IPFW_INFO_IN = 0x80000000, /* incoming, overloads dir */ + IPFW_ONEPASS = 0x40000000, /* One-pass, do not reinject */ + IPFW_IS_MASK = 0x30000000, /* which source ? */ + IPFW_IS_DIVERT = 0x20000000, + IPFW_IS_DUMMYNET =0x10000000, + IPFW_IS_PIPE = 0x08000000, /* pip1=1, queue = 0 */ +}; +#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ +#define MTAG_IPFW_RULE 1262273568 /* rule reference */ +#define MTAG_IPFW_CALL 1308397630 /* call stack */ + +struct ip_fw_args; +typedef int (*ip_fw_chk_ptr_t)(struct ip_fw_args *args); +typedef int (*ip_fw_ctl_ptr_t)(struct sockopt *); +VNET_DECLARE(ip_fw_chk_ptr_t, ofp_ip_fw_chk_ptr); +VNET_DECLARE(ip_fw_ctl_ptr_t, ofp_ip_fw_ctl_ptr); +#define V_ip_fw_chk_ptr VNET(ofp_ip_fw_chk_ptr) +#define V_ip_fw_ctl_ptr VNET(ofp_ip_fw_ctl_ptr) + +/* Divert hooks. */ +extern void (*ip_divert_ptr)(odp_packet_t m, int incoming); +/* ng_ipfw hooks -- XXX make it the same as divert and dummynet */ +extern int (*ng_ipfw_input_p)(odp_packet_t *, int, + struct ip_fw_args *, int); + +extern int (*ip_dn_ctl_ptr)(struct sockopt *); +extern int (*ip_dn_io_ptr)(odp_packet_t *, int, struct ip_fw_args *); + +VNET_DECLARE(int, ofp_ip_do_randomid); +#define V_ip_do_randomid VNET(ofp_ip_do_randomid) +#define ip_newid() ((V_ip_do_randomid != 0) ? ip_randomid() : \ + odp_cpu_to_be_16(V_ip_id++)) + +#endif /* !_OFPI_IP_VAR_H_ */ diff --git a/include/ofpi_log.h b/include/ofpi_log.h new file mode 100644 index 00000000..1975446e --- /dev/null +++ b/include/ofpi_log.h @@ -0,0 +1,8 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "api/ofp_log.h" diff --git a/include/ofpi_md5.h b/include/ofpi_md5.h new file mode 100644 index 00000000..36b0bcc4 --- /dev/null +++ b/include/ofpi_md5.h @@ -0,0 +1,50 @@ +/* MD5.H - header file for MD5C.C + * $FreeBSD: release/9.1.0/sys/sys/md5.h 156752 2006-03-15 19:47:12Z andre $ + */ + +/*- + Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All +rights reserved. + +License to copy and use this software is granted provided that it +is identified as the "RSA Data Security, Inc. MD5 Message-Digest +Algorithm" in all material mentioning or referencing this software +or this function. + +License is also granted to make and use derivative works provided +that such works are identified as "derived from the RSA Data +Security, Inc. MD5 Message-Digest Algorithm" in all material +mentioning or referencing the derived work. + +RSA Data Security, Inc. makes no representations concerning either +the merchantability of this software or the suitability of this +software for any particular purpose. It is provided "as is" +without express or implied warranty of any kind. + +These notices must be retained in any copies of any part of this +documentation and/or software. + */ + +#ifndef _SYS_MD5_H_ +#define _SYS_MD5_H_ + +#define MD5_BLOCK_LENGTH 64 +#define MD5_DIGEST_LENGTH 16 +#define MD5_DIGEST_STRING_LENGTH (MD5_DIGEST_LENGTH * 2 + 1) + +/* MD5 context. */ +typedef struct MD5Context { + uint32_t state[4]; /* state (ABCD) */ + uint32_t count[2]; /* number of bits, modulo 2^64 (lsb first) */ + unsigned char buffer[64]; /* input buffer */ +} MD5_CTX; + +void ofp_MD5Init (MD5_CTX *); +void ofp_MD5Update (MD5_CTX *, const void *, unsigned int); +void ofp_MD5Final (unsigned char [16], MD5_CTX *); +char * MD5End(MD5_CTX *, char *); +char * MD5File(const char *, char *); +/* char * MD5FileChunk(const char *, char *, int64_t, int64_t); */ +char * MD5Data(const void *, unsigned int, char *); + +#endif /* _SYS_MD5_H_ */ diff --git a/include/ofpi_netlink.h b/include/ofpi_netlink.h new file mode 100644 index 00000000..3b948186 --- /dev/null +++ b/include/ofpi_netlink.h @@ -0,0 +1,33 @@ +/*- + * Copyright (c) 2014 David Nyström, Enea Software AB All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include + +#ifdef HAVE_QUAGGA +#define START_NL_SERVER start_quagga_nl_server +void * start_quagga_nl_server(void *arg); +#else +#define START_NL_SERVER start_netlink_nl_server +void * start_netlink_nl_server(void *arg); +#endif diff --git a/include/ofpi_pkt_processing.h b/include/ofpi_pkt_processing.h new file mode 100644 index 00000000..4017b987 --- /dev/null +++ b/include/ofpi_pkt_processing.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_APP_H +#define _OFPI_APP_H + +#include +#include "api/ofp_types.h" +#include "api/ofp_pkt_processing.h" +#include "ofpi_in.h" + +enum ofp_return_code send_pkt_out(struct ofp_ifnet *dev, + odp_packet_t pkt); +enum ofp_return_code send_pkt_loop(struct ofp_ifnet *dev, + odp_packet_t pkt); + +enum ofp_return_code ipv4_transport_classifier(odp_packet_t pkt, + uint8_t ip_proto); +enum ofp_return_code ipv6_transport_classifier(odp_packet_t pkt, + uint8_t ip6_nxt); + +#endif /* _OFPI_APP_H */ diff --git a/include/ofpi_portconf.h b/include/ofpi_portconf.h new file mode 100644 index 00000000..3bac337b --- /dev/null +++ b/include/ofpi_portconf.h @@ -0,0 +1,134 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_PORTCONF_H_ +#define _OFPI_PORTCONF_H_ + +#include + +#include "odp.h" +#include "odp/helper/linux.h" +#include "api/ofp_portconf.h" +#include "ofpi_ethernet.h" +#include "ofpi_queue.h" + +#define NUM_PORTS 16 +/* GRE ports are the last port assigned in the port vector. + * Ports start from 0, and the last value is NUM_PORTS - 1. + */ +#define GRE_PORTS (NUM_PORTS - 1) +#define OFP_IFNAME_PREFIX "fp" +#define OFP_GRE_IFNAME_PREFIX "gre" + +#define OFP_IFNET_LOCK_READ(name) odp_rwlock_read_lock(\ + &ofp_ifnet_locks_shm->lock_##name##_rw) +#define OFP_IFNET_UNLOCK_READ(name) odp_rwlock_read_unlock(\ + &ofp_ifnet_locks_shm->lock_##name##_rw) +#define OFP_IFNET_LOCK_WRITE(name) odp_rwlock_write_lock(\ + &ofp_ifnet_locks_shm->lock_##name##_rw) +#define OFP_IFNET_UNLOCK_WRITE(name) odp_rwlock_write_unlock(\ + &ofp_ifnet_locks_shm->lock_##name##_rw) + +struct ofp_ifnet_locks_str { + odp_rwlock_t lock_ifaddr_list_rw; +#ifdef INET6 + odp_rwlock_t lock_ifaddr6_list_rw; +#endif /* INET6 */ +}; + +extern struct ofp_ifnet_locks_str *ofp_ifnet_locks_shm; + +OFP_TAILQ_HEAD(in_ifaddrhead, ofp_ifnet); + +struct ofp_ifnet { + uint16_t port; + uint16_t vlan; + uint16_t vrf; + uint16_t if_mtu; + uint32_t ip_addr; /* network byte order */ + uint32_t ip_p2p; /* network byte order */ + uint32_t ip_local; /* network byte order */ + uint32_t ip_remote; /* network byte order */ + uint32_t bcast_addr; /* network byte order */ + int masklen; +#ifdef INET6 + uint8_t link_local[16]; + uint8_t ip6_addr[16]; + uint8_t ip6_prefix; +#endif /* INET6 */ + uint8_t mac[OFP_ETHER_ADDR_LEN]; + void *vlan_structs; +#define OFP_IFT_ETHER 1 +#define OFP_IFT_LOCAL 2 +#define OFP_IFT_LOOP 3 +#define OFP_IFT_GRE 4 + uint8_t if_type; + uint8_t if_flags; + + char if_name[OFP_IFNAMSIZ]; + odp_pktio_t pktio; + odp_queue_t outq_def; + odp_queue_t inq_def; + odp_queue_t loopq_def; + odp_pool_t pkt_pool; +#ifdef SP + int linux_index; + int fd; + odp_queue_t spq_def; +#define OFP_SP_DOWN 0 +#define OFP_SP_UP 1 + int sp_status; + odph_linux_pthread_t rx_tbl[1]; + odph_linux_pthread_t tx_tbl[1]; +#endif /*SP */ + + OFP_LIST_ENTRY(ofp_ifnet) ia_hash; /* entry in bucket of inet addresses */ + OFP_TAILQ_ENTRY(ofp_ifnet) ia_link; /* list of internet addresses */ +#ifdef INET6 + OFP_TAILQ_ENTRY(ofp_ifnet) ia6_link; /* list of internet addresses */ +#endif /* INET6 */ +}; + +struct in_ifaddrhead *ofp_get_ifaddrhead(void); +void ofp_ifaddr_elem_add(struct ofp_ifnet *ifnet); +void ofp_ifaddr_elem_del(struct ofp_ifnet *ifnet); +struct ofp_ifnet *ofp_ifaddr_elem_get(uint8_t *addr); + +#ifdef INET6 +struct in_ifaddrhead *ofp_get_ifaddr6head(void); +void ofp_ifaddr6_elem_add(struct ofp_ifnet *ifnet); +void ofp_ifaddr6_elem_del(struct ofp_ifnet *ifnet); +struct ofp_ifnet *ofp_ifaddr6_elem_get(uint8_t *addr6); +#endif /* INET6 */ + +void *sp_tx_thread(void *ifnet_void); +void *sp_rx_thread(void *ifnet_void); +int sp_setup_device(struct ofp_ifnet *ifnet); + +void ofp_portconf_alloc_shared_memory(void); +void ofp_portconf_lookup_shared_memory(void); +void ofp_init_ifnet_data(void); + +#ifdef SP +void ofp_update_ifindex_lookup_tab(struct ofp_ifnet *ifnet); +#endif /* SP */ + +int ofp_vlan_get_by_key(void *root, void *key, void **value_address); +int vlan_ifnet_insert(void *root, void *elem); +int vlan_ifnet_delete(void *root, void *elem, int (*free_key_fun)(void *arg)); +int free_key(void *key); + +struct ofp_ifconf; +void ofp_get_interfaces(struct ofp_ifconf *ifc); + +/* Finds the node interface by the local ip assigned regardless of vlan */ +struct ofp_ifnet *ofp_get_ifnet_by_ip(uint32_t ip, uint16_t vrf); +/* Finds the tunnel interface by tunnel addresses */ +struct ofp_ifnet *ofp_get_ifnet_by_tunnel(uint32_t tun_loc, + uint32_t tun_rem, uint16_t vrf); + +#endif diff --git a/include/ofpi_protosw.h b/include/ofpi_protosw.h new file mode 100644 index 00000000..9e0e8253 --- /dev/null +++ b/include/ofpi_protosw.h @@ -0,0 +1,280 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)protosw.h 8.1 (Berkeley) 6/2/93 + * $FreeBSD: release/9.1.0/sys/sys/protosw.h 193731 2009-06-08 17:15:40Z zec $ + */ + +#ifndef _SYS_PROTOSW_H_ +#define _SYS_PROTOSW_H_ + +#include "odp.h" +#include "ofpi_domain.h" + +/* Forward declare these structures referenced from prototypes below. */ +struct mbuf; +struct thread; +struct ofp_sockaddr; +struct socket; +struct sockopt; + +/*#ifdef _KERNEL*/ +/* + * Protocol switch table. + * + * Each protocol has a handle initializing one of these structures, + * which is used for protocol-protocol and system-protocol communication. + * + * A protocol is called through the pr_init entry before any other. + * Thereafter it is called every 200ms through the pr_fasttimo entry and + * every 500ms through the pr_slowtimo for timer based actions. + * The system will call the pr_drain entry if it is low on space and + * this should throw away any non-critical data. + * + * Protocols pass data between themselves as chains of mbufs using + * the pr_input and pr_output hooks. Pr_input passes data up (towards + * the users) and pr_output passes it down (towards the interfaces); control + * information passes up and down on pr_ctlinput and pr_ctloutput. + * The protocol is responsible for the space occupied by any the + * arguments to these entries and must dispose it. + * + * In retrospect, it would be a lot nicer to use an interface + * similar to the vnode VOP interface. + */ +/* USE THESE FOR YOUR PROTOTYPES ! */ +typedef int pr_input_t (odp_packet_t , int); +typedef int pr_input6_t (odp_packet_t *, int*, int); /* XXX FIX THIS */ +typedef int pr_output_t (odp_packet_t , struct socket *); +typedef void pr_ctlinput_t (int, struct ofp_sockaddr *, void *); +typedef int pr_ctloutput_t (struct socket *, struct sockopt *); +typedef void pr_init_t (void); +typedef void pr_destroy_t (void); +typedef void pr_fasttimo_t (void); +typedef void pr_slowtimo_t (void *); +typedef void pr_drain_t (void); + +struct protosw { + short pr_type; /* socket type used for */ + struct domain *pr_domain; /* domain protocol a member of */ + short pr_protocol; /* protocol number */ + short pr_flags; /* see below */ +/* protocol-protocol hooks */ + pr_input_t *pr_input; /* input to protocol (from below) */ + pr_output_t *pr_output; /* output to protocol (from above) */ + pr_ctlinput_t *pr_ctlinput; /* control input (from below) */ + pr_ctloutput_t *pr_ctloutput; /* control output (from above) */ +/* utility hooks */ + pr_init_t *pr_init; + pr_destroy_t *pr_destroy; + pr_fasttimo_t *pr_fasttimo; /* fast timeout (200ms) */ + pr_slowtimo_t *pr_slowtimo; /* slow timeout (500ms) */ + pr_drain_t *pr_drain; /* flush any excess space possible */ + + struct pr_usrreqs *pr_usrreqs; /* user-protocol hook */ +}; +/*#endif*/ + +extern struct protosw ofp_inetsw[]; +extern struct domain ofp_inetdomain; + +#define PR_SLOWHZ 2 /* 2 slow timeouts per second */ +#define PR_FASTHZ 5 /* 5 fast timeouts per second */ + +/* + * This number should be defined again within each protocol family to avoid + * confusion. + */ +#define PROTO_SPACER 32767 /* spacer for loadable protocols */ + +/* + * Values for pr_flags. + * PR_ADDR requires PR_ATOMIC; + * PR_ADDR and PR_CONNREQUIRED are mutually exclusive. + * PR_IMPLOPCL means that the protocol allows sendto without prior connect, + * and the protocol understands the OFP_MSG_EOF flag. The first property is + * is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed + * anyhow). + */ +#define PR_ATOMIC 0x01 /* exchange atomic messages only */ +#define PR_ADDR 0x02 /* addresses given with messages */ +#define PR_CONNREQUIRED 0x04 /* connection required by protocol */ +#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */ +#define PR_RIGHTS 0x10 /* passes capabilities */ +#define PR_IMPLOPCL 0x20 /* implied open/close */ +#define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ + + +struct ifnet; +struct stat; +struct ofp_ucred; +struct uio; + +/* + * If the ordering here looks odd, that's because it's alphabetical. These + * should eventually be merged back into struct protosw. + * + * Some fields initialized to defaults if they are NULL. + * See uipc_domain.c:net_init_domain() + */ +struct ofp_ifnet; +struct pr_usrreqs { + void (*pru_abort)(struct socket *so); + int (*pru_accept)(struct socket *so, struct ofp_sockaddr **nam); + int (*pru_attach)(struct socket *so, int proto, struct thread *td); + int (*pru_bind)(struct socket *so, struct ofp_sockaddr *nam, + struct thread *td); + int (*pru_connect)(struct socket *so, struct ofp_sockaddr *nam, + struct thread *td); + int (*pru_connect2)(struct socket *so1, struct socket *so2); + int (*pru_control)(struct socket *so, uint32_t cmd, char * data, + struct ofp_ifnet *ifp, struct thread *td); + void (*pru_detach)(struct socket *so); + int (*pru_disconnect)(struct socket *so); + int (*pru_listen)(struct socket *so, int backlog, + struct thread *td); + int (*pru_peeraddr)(struct socket *so, struct ofp_sockaddr **nam); + int (*pru_rcvd)(struct socket *so, int flags); + int (*pru_rcvoob)(struct socket *so, odp_packet_t m, int flags); + int (*pru_send)(struct socket *so, int flags, odp_packet_t m, + struct ofp_sockaddr *addr, odp_packet_t control, + struct thread *td); +#define PRUS_OOB 0x1 +#define PRUS_EOF 0x2 +#define PRUS_MORETOCOME 0x4 + int (*pru_sense)(struct socket *so, struct stat *sb); + int (*pru_shutdown)(struct socket *so); + int (*pru_flush)(struct socket *so, int direction); + int (*pru_sockaddr)(struct socket *so, struct ofp_sockaddr **nam); + int (*pru_sosend)(struct socket *so, struct ofp_sockaddr *addr, + struct uio *uio, odp_packet_t top, odp_packet_t control, + int flags, struct thread *td); + int (*pru_soreceive)(struct socket *so, struct ofp_sockaddr **paddr, + struct uio *uio, odp_packet_t *mp0, odp_packet_t *controlp, + int *flagsp); + int (*pru_sopoll)(struct socket *so, int events, + struct ofp_ucred *cred, struct thread *td); + void (*pru_sosetlabel)(struct socket *so); + void (*pru_close)(struct socket *so); +}; + +/* + * All nonvoid pru_*() functions below return OFP_EOPNOTSUPP. + */ +int ofp_pru_accept_notsupp(struct socket *so, struct ofp_sockaddr **nam); +int ofp_pru_attach_notsupp(struct socket *so, int proto, struct thread *td); +int ofp_pru_bind_notsupp(struct socket *so, struct ofp_sockaddr *nam, + struct thread *td); +int ofp_pru_connect_notsupp(struct socket *so, struct ofp_sockaddr *nam, + struct thread *td); +int ofp_pru_connect2_notsupp(struct socket *so1, struct socket *so2); +int ofp_pru_control_notsupp(struct socket *so, uint32_t cmd, char *data, + struct ofp_ifnet *ifp, struct thread *td); +int ofp_pru_disconnect_notsupp(struct socket *so); +int ofp_pru_listen_notsupp(struct socket *so, int backlog, struct thread *td); +int ofp_pru_peeraddr_notsupp(struct socket *so, struct ofp_sockaddr **nam); +int ofp_pru_rcvd_notsupp(struct socket *so, int flags); +int ofp_pru_rcvoob_notsupp(struct socket *so, odp_packet_t m, int flags); +int ofp_pru_send_notsupp(struct socket *so, int flags, odp_packet_t m, + struct ofp_sockaddr *addr, odp_packet_t control, + struct thread *td); +int ofp_pru_sense_null(struct socket *so, struct stat *sb); +int ofp_pru_shutdown_notsupp(struct socket *so); +int ofp_pru_sockaddr_notsupp(struct socket *so, struct ofp_sockaddr **nam); +int ofp_pru_sosend_notsupp(struct socket *so, struct ofp_sockaddr *addr, + struct uio *uio, odp_packet_t top, odp_packet_t control, int flags, + struct thread *td); +int ofp_pru_soreceive_notsupp(struct socket *so, + struct ofp_sockaddr **paddr, + struct uio *uio, odp_packet_t *mp0, odp_packet_t *controlp, + int *flagsp); +int ofp_pru_sopoll_notsupp(struct socket *so, int events, struct ofp_ucred *cred, + struct thread *td); + +/* + * The arguments to the ctlinput routine are + * (*protosw[].pr_ctlinput)(cmd, sa, arg); + * where cmd is one of the commands below, sa is a pointer to a sockaddr, + * and arg is a `void *' argument used within a protocol family. + */ +#define OFP_PRC_IFDOWN 0 /* interface transition */ +#define OFP_PRC_ROUTEDEAD 1 /* select new route if possible ??? */ +#define OFP_PRC_IFUP 2 /* interface has come back up */ +#define OFP_PRC_QUENCH2 3 /* DEC congestion bit says slow down */ +#define OFP_PRC_QUENCH 4 /* some one said to slow down */ +#define OFP_PRC_MSGSIZE 5 /* message size forced drop */ +#define OFP_PRC_HOSTDEAD 6 /* host appears to be down */ +#define OFP_PRC_HOSTUNREACH 7 /* deprecated (use OFP_PRC_UNREACH_HOST) */ +#define OFP_PRC_UNREACH_NET 8 /* no route to network */ +#define OFP_PRC_UNREACH_HOST 9 /* no route to host */ +#define OFP_PRC_UNREACH_PROTOCOL 10 /* dst says bad protocol */ +#define OFP_PRC_UNREACH_PORT 11 /* bad port # */ +/* was OFP_PRC_UNREACH_NEEDFRAG 12 (use OFP_PRC_MSGSIZE) */ +#define OFP_PRC_UNREACH_SRCFAIL 13 /* source route failed */ +#define OFP_PRC_REDIRECT_NET 14 /* net routing redirect */ +#define OFP_PRC_REDIRECT_HOST 15 /* host routing redirect */ +#define OFP_PRC_REDIRECT_TOSNET 16 /* redirect for type of service & net */ +#define OFP_PRC_REDIRECT_TOSHOST 17 /* redirect for tos & host */ +#define OFP_PRC_TIMXCEED_INTRANS 18 /* packet lifetime expired in transit */ +#define OFP_PRC_TIMXCEED_REASS 19 /* lifetime expired on reass q */ +#define OFP_PRC_PARAMPROB 20 /* header incorrect */ +#define OFP_PRC_UNREACH_ADMIN_PROHIB 21 /* packet administrativly prohibited */ + +#define OFP_PRC_NCMDS 22 + +#define OFP_PRC_IS_REDIRECT(cmd) \ + ((cmd) >= OFP_PRC_REDIRECT_NET && (cmd) <= OFP_PRC_REDIRECT_TOSHOST) + + +/* + * The arguments to ctloutput are: + * (*protosw[].pr_ctloutput)(req, so, level, optname, optval, p); + * req is one of the actions listed below, so is a (struct socket *), + * level is an indication of which protocol layer the option is intended. + * optname is a protocol dependent socket option request, + * optval is a pointer to a mbuf-chain pointer, for value-return results. + * The protocol is responsible for disposal of the mbuf chain *optval + * if supplied, + * the caller is responsible for any space held by *optval, when returned. + * A non-zero return from ctloutput gives an + * UNIX error number which should be passed to higher level software. + */ +#define PRCO_GETOPT 0 +#define PRCO_SETOPT 1 + +#define PRCO_NCMDS 2 + +void pfctlinput(int, struct ofp_sockaddr *); +void pfctlinput2(int, struct ofp_sockaddr *, void *); +struct protosw *ofp_pffindproto(int family, int protocol, int type); +/*struct protosw *pffindtype(int family, int type);*/ +int pf_proto_register(int family, struct protosw *npr); +int pf_proto_unregister(int family, int protocol, int type); + +#endif diff --git a/include/ofpi_queue.h b/include/ofpi_queue.h new file mode 100644 index 00000000..cc2b1077 --- /dev/null +++ b/include/ofpi_queue.h @@ -0,0 +1,13 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFPI_QUEUE_H_ +#define __OFPI_QUEUE_H_ + +#include "api/ofp_queue.h" + +#endif /* __OFPI_QUEUE_H__ */ diff --git a/include/ofpi_reass.h b/include/ofpi_reass.h new file mode 100644 index 00000000..c29d0e85 --- /dev/null +++ b/include/ofpi_reass.h @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 ENEA Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _OFPI_REASSEMBLY_H_ +#define _OFPI_REASSEMBLY_H_ + +void ofp_reassembly_alloc_shared_memory(void); +void ofp_reassembly_lookup_shared_memory(void); +odp_packet_t ofp_ip_reass(odp_packet_t pkt); + + +#endif diff --git a/include/ofpi_route.h b/include/ofpi_route.h new file mode 100644 index 00000000..e6a32e27 --- /dev/null +++ b/include/ofpi_route.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_ROUTE_H_ +#define _OFPI_ROUTE_H_ + +#include + +#include "odp/rwlock.h" +#include "api/ofp_route_arp.h" +#include "ofpi_portconf.h" + +#define OFP_LOCK_READ(name) odp_rwlock_read_lock(&ofp_locks_shm->lock_##name##_rw) +#define OFP_UNLOCK_READ(name) odp_rwlock_read_unlock(&ofp_locks_shm->lock_##name##_rw) +#define OFP_LOCK_WRITE(name) odp_rwlock_write_lock(&ofp_locks_shm->lock_##name##_rw) +#define OFP_UNLOCK_WRITE(name) odp_rwlock_write_unlock(&ofp_locks_shm->lock_##name##_rw) + +struct ofp_locks_str { + odp_rwlock_t lock_config_rw; + odp_rwlock_t lock_route_rw; +}; + +extern struct ofp_locks_str *ofp_locks_shm; + +void ofp_route_lookup_shared_memory(void); +void ofp_route_alloc_shared_memory(void); +void ofp_route_init(void); + +int32_t ofp_is_mobile(uint32_t addr); +int ofp_route_save_ipv6_pkt(odp_packet_t pkt, uint8_t *addr, + struct ofp_ifnet *dev); + +#endif diff --git a/include/ofpi_rt_lookup.h b/include/ofpi_rt_lookup.h new file mode 100644 index 00000000..316d7e61 --- /dev/null +++ b/include/ofpi_rt_lookup.h @@ -0,0 +1,180 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#include "ofpi_pkt_processing.h" + +#ifndef _OFPI_RT_LOOKUP_H +#define _OFPI_RT_LOOKUP_H + +#define OFP_RTL_ENOMEM ((void *) ofp_rtl_init) + +#define OFP_RTL_MAXDEPTH 32 +#define OFP_RTL64_MAXDEPTH 64 + +#define OFP_RTL_FLAGS_VALID_DATA 1 +#define OFP_RTL_FLAGS_GATEWAY 2 +#define OFP_RTL_FLAGS_MAC_ADDR 4 +#define OFP_RTL_FLAGS_LOCAL_INTERFACE 8 + +#ifdef MTRIE +#define IPV4_LENGTH 32 +#define IPV4_FIRST_LEVEL 16 +#define IPV4_LEVEL 8 + +struct ofp_rt_rule { + uint8_t used; + uint8_t masklen; + uint16_t vrf; + uint32_t addr; + struct ofp_nh_entry data[4]; +}; + +#endif + + +struct ofp_rtl_node { + uint32_t flags; + struct ofp_nh_entry data[4]; +#ifdef MTRIE + uint8_t masklen; + uint8_t root; + uint16_t ref; + struct ofp_rtl_node *next; +#else + struct ofp_rtl_node *left; + struct ofp_rtl_node *right; +#endif +}; + +struct ofp_rtl_tree { + uint16_t vrf; + struct ofp_rtl_node *root; +}; + +struct ofp_rtl6_node { + uint32_t flags; + struct ofp_nh6_entry data; + + struct ofp_rtl6_node *left; + struct ofp_rtl6_node *right; +}; + +struct ofp_rtl6_tree { + struct ofp_rtl6_node *root; +}; + +extern int ofp_rtl_init(struct ofp_rtl_tree *tree); +extern int ofp_rtl_root_init(struct ofp_rtl_tree *tree, uint16_t vrf); +extern struct ofp_nh_entry *ofp_rtl_insert(struct ofp_rtl_tree *tree, uint32_t addr, + uint32_t masklen, struct ofp_nh_entry *data); +extern struct ofp_nh_entry *ofp_rtl_remove(struct ofp_rtl_tree *tree, uint32_t addr, + uint32_t masklen); +#ifdef MTRIE +extern void ofp_rt_rule_add(uint16_t vrf, uint32_t addr, uint32_t masklen, struct ofp_nh_entry *data); +extern void ofp_rt_rule_remove(uint16_t vrf, uint32_t addr, uint32_t masklen); +extern void ofp_rt_rule_print(int fd, uint16_t vrf, + void (*func)(int fd, uint32_t key, int level, struct ofp_nh_entry *data)); +#else +extern struct ofp_nh_entry *ofp_rtl_search_exact(struct ofp_rtl_tree *tree, + uint32_t addr, uint32_t masklen); +extern void ofp_rtl_destroy(struct ofp_rtl_tree *tree, + void (*func)(void *data)); +extern void ofp_rtl_traverse(int fd, struct ofp_rtl_tree *tree, + void (*func)(int fd, uint32_t key, int level, struct ofp_nh_entry *data)); +#endif +extern int ofp_rtl6_init(struct ofp_rtl6_tree *tree); +extern struct ofp_nh6_entry *ofp_rtl_insert6(struct ofp_rtl6_tree *tree, uint8_t *addr, + uint32_t masklen, struct ofp_nh6_entry *data); +extern struct ofp_nh6_entry *ofp_rtl_remove6(struct ofp_rtl6_tree *tree, uint8_t *addr, + uint32_t masklen); +extern void ofp_rtl_traverse6(int fd, struct ofp_rtl6_tree *tree, + void (*func)(int fd, uint8_t *key, int level, struct ofp_nh6_entry *data)); +extern void ofp_print_rt_stat(int fd); +#ifndef MTRIE +static __inline struct ofp_nh_entry *ofp_rtl_search(struct ofp_rtl_tree *tree, uint32_t addr_be) +{ + struct ofp_rtl_node *node; + uint32_t mask = 0x80000000; + uint32_t addr = odp_be_to_cpu_32(addr_be); + struct ofp_rtl_node *match_table[65]; + int matches; + + matches = 0; + node = tree->root; + while (node) { + if (node->flags & OFP_RTL_FLAGS_VALID_DATA) { + match_table[matches++] = node; + } + + if (addr & mask) { + node = node->right; + } else { + node = node->left; + } + mask >>= 1; + } + if (!matches) + return NULL; + + return &(match_table[--matches]->data[0]); +} +#else +inline struct ofp_nh_entry *ofp_rtl_search(struct ofp_rtl_tree *tree, uint32_t addr_be); +int32_t ofp_rt_rule_find_prefix_match(uint16_t vrf, uint32_t addr, uint8_t masklen, uint8_t low); +#endif + +static inline int ofp_rt_bit_set(uint8_t *p, int bit) +{ + uint8_t r = 7 - (bit & 7); + int i = bit >> 3; + return p[i] & (1 << r); +} + +static inline void ofp_rt_set_bit(uint8_t *p, int bit) +{ + uint8_t r = 7 - (bit & 7); + int i = bit >> 3; + p[i] |= (1 << r); +} + +static inline void ofp_rt_reset_bit(uint8_t *p, int bit) +{ + uint8_t r = 7 - (bit & 7); + int i = bit >> 3; + p[i] &= ~(1 << r); +} + +static __inline struct ofp_nh6_entry *ofp_rtl_search6(struct ofp_rtl6_tree *tree, uint8_t *addr) +{ + struct ofp_rtl6_node *node; + struct ofp_rtl6_node *match_table[129]; + uint32_t bit = 0; + int matches; + + matches = 0; + node = tree->root; + while (node) { + if (node->flags & OFP_RTL_FLAGS_VALID_DATA) { + match_table[matches++] = node; + } + + if (ofp_rt_bit_set(addr, bit)) { + node = node->right; + } else { + node = node->left; + } + bit++; + } + if (!matches) + return NULL; + + return &(match_table[--matches]->data); +} + +void ofp_rt_lookup_lookup_shared_memory(void); +void ofp_rt_lookup_alloc_shared_memory(void); + +#endif /* _OFPI_RT_LOOKUP_H */ diff --git a/include/ofpi_sockbuf.h b/include/ofpi_sockbuf.h new file mode 100644 index 00000000..122717dc --- /dev/null +++ b/include/ofpi_sockbuf.h @@ -0,0 +1,263 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 + * + * $FreeBSD: release/9.1.0/sys/sys/sockbuf.h 225169 2011-08-25 09:20:13Z bz $ + */ +#ifndef _SYS_SOCKBUF_H_ +#define _SYS_SOCKBUF_H_ + +#include "odp.h" +#include "odp/rwlock.h" +#include "ofpi_systm.h" +#include "ofpi_util.h" + +#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ + +/* + * Constants for sb_flags field of struct sockbuf. + */ +#define SB_WAIT 0x04 /* someone is waiting for data/space */ +#define SB_SEL 0x08 /* someone is selecting */ +#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ +#define SB_UPCALL 0x20 /* someone wants an upcall */ +#define SB_NOINTR 0x40 /* operations not interruptible */ +#define SB_AIO 0x80 /* AIO operations queued */ +#define SB_KNOTE 0x100 /* kernel note attached */ +#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ +#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */ +#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ + +struct ofp_sockaddr; +struct socket; +struct thread; + +struct xsockbuf { + uint32_t sb_cc; + uint32_t sb_hiwat; + uint32_t sb_mbcnt; + uint32_t sb_mcnt; + uint32_t sb_ccnt; + uint32_t sb_mbmax; + int sb_lowat; + int sb_timeo; + short sb_flags; +}; + +struct selinfo { + OFP_LIST_ENTRY(selinfo) si_list; + void *si_wakeup_channel; + int si_socket; +#if 0 + struct selfdlist si_tdlist; /* List of sleeping threads. */ + struct knlist si_note; /* kernel note list */ + struct mtx *si_mtx; /* Lock for tdlist. */ +#endif +}; + +struct ofp_iovec { + void *iov_base; /* Base address. */ + size_t iov_len; /* Length. */ +}; + +struct uio { + struct ofp_iovec *uio_iov; /* scatter/gather list */ + int uio_iovcnt; /* length of scatter/gather list */ + off_t uio_offset; /* offset in target object */ + ofp_ssize_t uio_resid; /* remaining bytes to process */ +}; + +/* + * Variables for socket buffering. + */ +struct sockbuf { + struct selinfo sb_sel; /* process selecting read/write */ + + odp_rwlock_t sb_mtx; /* sockbuf lock */ + odp_spinlock_t sb_sx; /* prevent I/O interlacing */ + + short sb_state; /* (c/d) socket state on sockbuf */ +#define sb_startzero sb_mb +#define SOCKBUF_LEN 64 + odp_packet_t sb_mb[SOCKBUF_LEN]; /* (c/d) the pkt table */ + int sb_put, sb_get; + int sb_mbtail; /* (c/d) the last pkt in the table */ + int sb_lastrecord; /* (c/d) first mbuf of last + * record in socket buffer */ + int sb_sndptr; /* (c/d) next pkt to send */ + uint32_t sb_sndptroff; /* (c/d) byte offset of ptr into chain */ + uint32_t sb_cc; /* (c/d) actual chars in buffer */ + uint32_t sb_hiwat; /* (c/d) max actual char count */ + uint32_t sb_mbcnt; /* (c/d) chars of mbufs used */ + uint32_t sb_mcnt; /* (c/d) number of mbufs in buffer */ + uint32_t sb_ccnt; /* (c/d) number of clusters in buffer */ + uint32_t sb_mbmax; /* (c/d) max chars of mbufs to use */ + uint32_t sb_ctl; /* (c/d) non-data chars in buffer */ + int sb_lowat; /* (c/d) low water mark */ + int sb_timeo; /* (c/d) timeout for read/write */ + short sb_flags; /* (c/d) flags, see below */ + int (*sb_upcall)(struct socket *, void *, int); /* (c/d) */ + void *sb_upcallarg; /* (c/d) */ + struct socket *sb_socket; + //const char *lockedby_file; + //int lockedby_line; +}; + + +/* + * Per-socket buffer mutex used to protect most fields in the socket + * buffer. + */ +#define SOCKBUF_MTX(_sb) (&(_sb)->sb_mtx) + +#define SOCKBUF_LOCK_INIT(_sb, _name) odp_rwlock_init(SOCKBUF_MTX(_sb)) +#define SOCKBUF_LOCK(_sb) odp_rwlock_write_lock(SOCKBUF_MTX(_sb)) +#define SOCKBUF_UNLOCK(_sb) odp_rwlock_write_unlock(SOCKBUF_MTX(_sb)) +#define SOCKBUF_RLOCK(_sb) odp_rwlock_read_lock(SOCKBUF_MTX(_sb)) +#define SOCKBUF_RUNLOCK(_sb) odp_rwlock_read_unlock(SOCKBUF_MTX(_sb)) + + +#define SOCKBUF_LOCK_DESTROY(_sb) //mtx_destroy(SOCKBUF_MTX(_sb)) +#define SOCKBUF_OWNED(_sb) //mtx_owned(SOCKBUF_MTX(_sb)) +#define SOCKBUF_LOCK_ASSERT(_sb) //mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED) +#define SOCKBUF_UNLOCK_ASSERT(_sb) //mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED) + +//#define SOCKBUF_LOCK_Y(_sb) ofp_rec_wlock(SOCKBUF_MTX(_sb), __FILE__, __LINE__) +//#define SOCKBUF_UNLOCK_Y(_sb) ofp_rec_wunlock(SOCKBUF_MTX(_sb), __FILE__, __LINE__) + +int packet_accepted_as_event_rlocked(struct sockbuf *sb, odp_packet_t pkt); +void sbappend(struct sockbuf *sb, odp_packet_t m); +void sbappend_locked(struct sockbuf *sb, odp_packet_t m); +void ofp_sbappendstream(struct sockbuf *sb, odp_packet_t m); +void ofp_sbappendstream_locked(struct sockbuf *sb, odp_packet_t m); +int sbappendaddr(struct sockbuf *sb, const struct ofp_sockaddr *asa, + odp_packet_t m0, odp_packet_t control); +int ofp_sbappendaddr_locked(struct sockbuf *sb, odp_packet_t m0, + odp_packet_t control); +int sbappendcontrol(struct sockbuf *sb, odp_packet_t m0, + odp_packet_t control); +int sbappendcontrol_locked(struct sockbuf *sb, odp_packet_t m0, + odp_packet_t control); +void sbappendrecord(struct sockbuf *sb, odp_packet_t m0); +void sbappendrecord_locked(struct sockbuf *sb, odp_packet_t m0); +void sbcheck(struct sockbuf *sb); +void ofp_sbcompress(struct sockbuf *sb, odp_packet_t m, int n); +odp_packet_t + sbcreatecontrol(char * p, int size, int type, int level); +void ofp_sbdestroy(struct sockbuf *sb, struct socket *so); +void ofp_sbdrop(struct sockbuf *sb, int len); +void ofp_sbdrop_locked(struct sockbuf *sb, int len); +void sbdroprecord(struct sockbuf *sb); +void ofp_sbdroprecord_locked(struct sockbuf *sb); +void ofp_sbflush(struct sockbuf *sb); +void ofp_sbflush_locked(struct sockbuf *sb); +void ofp_sbrelease(struct sockbuf *sb, struct socket *so); +void ofp_sbrelease_internal(struct sockbuf *sb, struct socket *so); +void ofp_sbrelease_locked(struct sockbuf *sb, struct socket *so); +int ofp_sbreserve(struct sockbuf *sb, uint64_t cc, struct socket *so, + struct thread *td); +int ofp_sbreserve_locked(struct sockbuf *sb, uint64_t cc, struct socket *so, + struct thread *td); +odp_packet_t + sbsndptr(struct sockbuf *sb, uint32_t off, uint32_t len, uint32_t *moff); +void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); +int ofp_sbwait(struct sockbuf *sb); +int ofp_sblock(struct sockbuf *sb, int flags); +void ofp_sbunlock(struct sockbuf *sb); + +/* + * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? + * This is problematical if the fields are unsigned, as the space might + * still be negative (cc > hiwat or mbcnt > mbmax). Should detect + * overflow and return 0. Should use "lmin" but it doesn't exist now. + */ +#if 1 +#define sbspace(sb) \ + ((long)(SHM_PKT_POOL_BUF_SIZE * \ + ((sb)->sb_put >= (sb)->sb_get ? \ + (SOCKBUF_LEN - ((sb)->sb_put - (sb)->sb_get) - 1) : \ + ((sb)->sb_get - (sb)->sb_put - 1)))) +#else +#define sbspace(sb) \ + ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \ + (int)((sb)->sb_mbmax - (sb)->sb_mbcnt))) +#endif + +/* adjust counters in sb reflecting allocation of m */ +#define sballoc(sb, m) { \ + (sb)->sb_cc += odp_packet_len(m); \ + (sb)->sb_mbcnt += odp_packet_buf_len(m); \ + (sb)->sb_mcnt += 1; \ +} + +/* adjust counters in sb reflecting freeing of m */ +#define sbfree(sb, m) { \ + (sb)->sb_cc -= odp_packet_len(m); \ + (sb)->sb_mbcnt -= odp_packet_buf_len(m); \ + (sb)->sb_mcnt -= 1; \ + if ((sb)->sb_sndptr >= 0 && (sb)->sb_mb[(sb)->sb_sndptr] == (m)) { \ + (sb)->sb_sndptr = -1; \ + (sb)->sb_sndptroff = 0; \ + } \ + if ((sb)->sb_sndptroff != 0) \ + (sb)->sb_sndptroff -= odp_packet_len(m); \ +} + + +#define SB_EMPTY_FIXUP(sb) do { \ + if ((sb)->sb_mb == NULL) { \ + (sb)->sb_mbtail = NULL; \ + (sb)->sb_lastrecord = NULL; \ + } \ +} while (/*CONSTCOND*/0) + +#ifdef SOCKBUF_DEBUG +void sblastrecordchk(struct sockbuf *, const char *, int); +#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__) + +void sblastmbufchk(struct sockbuf *, const char *, int); +#define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__) +#else +#define SBLASTRECORDCHK(sb) /* nothing */ +#define SBLASTMBUFCHK(sb) /* nothing */ +#endif /* SOCKBUF_DEBUG */ + +void ofp_socantrcvmore_locked(struct socket *so); +void ofp_socantrcvmore(struct socket *so); + +int ofp_sockbuf_put_last(struct sockbuf *sb, odp_packet_t pkt); +odp_packet_t ofp_sockbuf_get_first(struct sockbuf *); +odp_packet_t ofp_sockbuf_remove_first(struct sockbuf *); +odp_packet_t ofp_sockbuf_get_first_remove(struct sockbuf *); +void ofp_sockbuf_packet_free(odp_packet_t); +void ofp_sockbuf_copy_out(struct sockbuf *sb, int off, int len, char *dst); + +#endif /* _SYS_SOCKBUF_H_ */ diff --git a/include/ofpi_socket.h b/include/ofpi_socket.h new file mode 100644 index 00000000..960b0699 --- /dev/null +++ b/include/ofpi_socket.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2014, Linaro Limited + * All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __OFPI_SOCKET_H__ +#define __OFPI_SOCKET_H__ + +#include "api/ofp_socket.h" +#include "ofpi_queue.h" + +OFP_LIST_HEAD(ofp_fdset, selinfo); +#define OFP_GET_FD_SET(_set) (struct ofp_fdset *)(_set)->fd_set_buf + +void ofp_socket_alloc_shared_memory(odp_pool_t); + +#endif /* __OFPI_SOCKET_H__ */ diff --git a/include/ofpi_socketvar.h b/include/ofpi_socketvar.h new file mode 100644 index 00000000..9209c37b --- /dev/null +++ b/include/ofpi_socketvar.h @@ -0,0 +1,450 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 + * + * $FreeBSD: release/9.1.0/sys/sys/socketvar.h 215178 2010-11-12 13:02:26Z luigi $ + */ + +#ifndef _SYS_SOCKETVAR_H_ +#define _SYS_SOCKETVAR_H_ + +#include "ofpi_queue.h" /* for TAILQ macros */ +#include "ofpi_sockbuf.h" +#include "ofpi_in_pcb.h" + +struct vnet; +struct in_l2info; + +/* + * Kernel structure per socket. + * Contains send and receive buffer queues, + * handle on protocol and pointer to protocol + * private data and error information. + */ +typedef uint64_t so_gen_t; + +/*- + * Locking key to struct socket: + * (a) constant after allocation, no locking required. + * (b) locked by OFP_SOCK_LOCK(so). + * (c) locked by SOCKBUF_LOCK(&so->so_rcv). + * (d) locked by SOCKBUF_LOCK(&so->so_snd). + * (e) locked by ACCEPT_LOCK(). + * (f) not locked since integer reads/writes are atomic. + * (g) used only as a sleep/wakeup address, no value. + * (h) locked by global mutex so_global_mtx. + */ +struct socket { + struct socket *next; /* next in free list */ + int so_number; /* file descriptor */ + int so_count; /* (b) reference count */ + short so_type; /* (a) generic type, see socket.h */ + int so_options; /* from socket call, see socket.h */ + short so_linger; /* time to linger while closing */ + short so_state; /* (b) internal state flags SS_* */ + int so_qstate; /* (e) internal state flags SQ_* */ + void *so_pcb; /* protocol control block */ + struct vnet *so_vnet; /* network stack instance */ + struct protosw *so_proto; /* (a) protocol handle */ +/* + * Variables for connection queuing. + * Socket where accepts occur is so_head in all subsidiary sockets. + * If so_head is 0, socket is not related to an accept. + * For head socket so_incomp queues partially completed connections, + * while so_comp is a queue of connections ready to be accepted. + * If a connection is aborted and it has so_head set, then + * it has to be pulled out of either so_incomp or so_comp. + * We allow connections to queue up based on current queue lengths + * and limit on number of queued connections for this socket. + */ + struct socket *so_head; /* (e) back pointer to listen socket */ + OFP_TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */ + OFP_TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */ + OFP_TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */ + uint16_t so_qlen; /* (e) number of unaccepted connections */ + uint16_t so_incqlen; /* (e) number of unaccepted incomplete + connections */ + uint16_t so_qlimit; /* (e) max number queued connections */ + short so_timeo; /* (g) connection timeout */ + uint16_t so_error; /* (f) error affecting connection */ + struct sigio *so_sigio; /* [sg] information for async I/O or + out of band data (SIGURG) */ + uint64_t so_oobmark; /* (c) chars to oob mark */ +#if 0 + OFP_TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */ +#endif + struct sockbuf so_rcv, so_snd; + + struct ofp_ucred *so_cred; /* (a) user credentials */ + struct ofp_ucred so_cred_space; + struct label *so_label; /* (b) MAC label for socket */ + struct label *so_peerlabel; /* (b) cached MAC label for peer */ + /* NB: generation count must not be first. */ + so_gen_t so_gencnt; /* (h) generation count */ + void *so_emuldata; /* (b) private data for emulators */ + struct so_accf { + struct accept_filter *so_accept_filter; + void *so_accept_filter_arg; /* saved filter args */ + char *so_accept_filter_str; /* saved user args */ + } *so_accf; + /* + * so_fibnum, so_user_cookie and friends can be used to attach + * some user-specified metadata to a socket, which then can be + * used by the kernel for various actions. + * so_user_cookie is used by ipfw/dummynet. + */ + int so_fibnum; /* routing domain for this socket */ + int so_altfibnum; + uint32_t so_user_cookie; + + struct so_upcallprep { + void (*soup_accept)(struct socket *so, void *arg); + void *soup_accept_arg; + void (*soup_receive)(struct socket *so, void *arg, int64_t, int64_t); + void *soup_receive_arg; + void (*soup_send)(struct socket *so, void *arg, int64_t); + void *soup_send_arg; + } so_upcallprep; /* (a) initialized once immediately after socket creation */ + + struct in_l2info *so_l2info; /* (b) PROMISCUOUS_INET L2 info */ + unsigned int so_user_ctx_count; /* (b) number of user contexts in use, lock needed to increment */ +#define SOMAXUSERCTX 1 + void *so_user_ctx[SOMAXUSERCTX]; /* (a) each pointer managed by user */ + struct socket *so_passive_peer; /* (a) peer socket when performing passive reassembly */ + union { + struct inpcb dummy; + } pcb_space; + struct ofp_sigevent so_sigevent; +}; + + +/* + * Global accept mutex to serialize access to accept queues and + * fields associated with multiple sockets. This allows us to + * avoid defining a lock order between listen and accept sockets + * until such time as it proves to be a good idea. + */ +#define ACCEPT_LOCK_ASSERT() //mtx_assert(&accept_mtx, MA_OWNED) +#define ACCEPT_UNLOCK_ASSERT() //mtx_assert(&accept_mtx, MA_NOTOWNED) +#define ACCEPT_LOCK() ofp_accept_lock() +#define ACCEPT_UNLOCK() ofp_accept_unlock() + +/* + * Per-socket mutex: we reuse the receive socket buffer mutex for space + * efficiency. This decision should probably be revisited as we optimize + * locking for the socket code. + */ +#define OFP_SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv) +#define OFP_SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv) +#define OFP_SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv) +#define OFP_SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv) +#define OFP_SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv) + + +/* + * Socket state bits stored in so_qstate. + */ +#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */ +#define SQ_COMP 0x1000 /* unaccepted, complete connection */ + +/* + * Externalized form of struct socket used by the sysctl(3) interface. + */ +struct xsocket { + size_t xso_len; /* length of this structure */ + struct socket *xso_so; /* makes a convenient handle sometimes */ + short so_type; + int so_options; + short so_linger; + short so_state; + char * so_pcb; /* another convenient handle */ + int xso_protocol; + int xso_family; + uint16_t so_qlen; + uint16_t so_incqlen; + uint16_t so_qlimit; + short so_timeo; + uint16_t so_error; + ofp_pid_t so_pgid; + uint64_t so_oobmark; + struct xsockbuf so_rcv, so_snd; + ofp_uid_t so_uid; /* XXX */ +}; + +/* + * Macros for sockets and socket buffering. + */ + +/* + * Flags to ofp_sblock(). + */ +#define SBL_WAIT 0x00000001 /* Wait if not immediately available. */ +#define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */ +#define SBL_VALID (SBL_WAIT | SBL_NOINTR) + +/* + * Do we need to notify the other side when I/O is possible? + */ +#define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \ + SB_UPCALL | SB_AIO | SB_KNOTE)) != 0) + +/* do we have to send all at once on a socket? */ +#define sosendallatonce(so) \ + ((so)->so_proto->pr_flags & PR_ATOMIC) + +/* can we read something from so? */ +#define soreadabledata(so) \ + ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ + !OFP_TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error) +#define soreadable(so) \ + (soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE)) + +/* can we write something to so? */ +#define sowriteable(so) \ + ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ + (((so)->so_state&SS_ISCONNECTED) || \ + ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ + ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ + (so)->so_error) + +/* + * soref()/sorele() ref-count the socket structure. Note that you must + * still explicitly close the socket, but the last ref count will free + * the structure. + */ +#define soref(so) do { \ + OFP_SOCK_LOCK_ASSERT(so); \ + ++(so)->so_count; \ +} while (0) + +#define sorele(so) do { \ + ACCEPT_LOCK_ASSERT(); \ + OFP_SOCK_LOCK_ASSERT(so); \ + if ((so)->so_count <= 0) \ + panic("sorele"); \ + if (--(so)->so_count == 0) \ + ofp_sofree(so); \ + else { \ + OFP_SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ +} while (0) + + +/* + * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to + * avoid a non-atomic test-and-wakeup. However, ofp_sowakeup is + * responsible for releasing the lock if it is called. We unlock only + * if we don't call into ofp_sowakeup. If any code is introduced that + * directly invokes the underlying ofp_sowakeup() primitives, it must + * maintain the same semantics. + */ +#define sorwakeup_locked(so) do { \ + SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \ + if (sb_notify(&(so)->so_rcv)) { \ + ofp_sowakeup((so), &(so)->so_rcv); \ + } else { \ + SOCKBUF_UNLOCK(&(so)->so_rcv); \ + } \ +} while (0) + +#define sorwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_rcv); \ + sorwakeup_locked(so); \ +} while (0) + +#define sowwakeup_locked(so) do { \ + SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \ + if (sb_notify(&(so)->so_snd)) \ + ofp_sowakeup((so), &(so)->so_snd); \ + else \ + SOCKBUF_UNLOCK(&(so)->so_snd); \ +} while (0) + +#define sowwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_snd); \ + sowwakeup_locked(so); \ +} while (0) + +struct accept_filter { + char accf_name[16]; + int (*accf_callback) + (struct socket *so, void *arg, int waitflag); + void * (*accf_create) + (struct socket *so, char *arg); + void (*accf_destroy) + (struct socket *so); + OFP_SLIST_ENTRY(accept_filter) accf_next; +}; + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_ACCF); +MALLOC_DECLARE(M_PCB); +MALLOC_DECLARE(M_SONAME); +#endif + +extern int maxsockets; +extern uint64_t ofp_sb_max; +extern struct uma_zone *socket_zone; +extern so_gen_t so_gencnt; + +struct mbuf; +struct ofp_sockaddr; +struct ofp_ucred; +struct uio; + +/* 'which' values for socket upcalls. */ +#define OFP_SO_RCV 1 +#define OFP_SO_SND 2 + +/* Return values for socket upcalls. */ +#define SU_OK 0 +#define SU_ISCONNECTED 1 + +/* + * From uipc_socket and friends + */ +struct socket *ofp_get_sock_by_fd(int fd); + +int sockargs(odp_packet_t *mp, char * buf, int buflen, int type); +int getsockaddr(struct ofp_sockaddr **namp, char * uaddr, size_t len); +void ofp_soabort(struct socket *so); +int ofp_soaccept(struct socket *so, struct ofp_sockaddr **nam); +int socheckuid(struct socket *so, ofp_uid_t uid); +int ofp_sobind(struct socket *so, struct ofp_sockaddr *nam, struct thread *td); +int ofp_soclose(struct socket *so); +int ofp_soconnect(struct socket *so, struct ofp_sockaddr *nam, struct thread *td); +int soconnect2(struct socket *so1, struct socket *so2); +int socow_setup(odp_packet_t m0, struct uio *uio); +int ofp_socreate(int dom, struct socket **aso, int type, int proto, struct thread *td); +int ofp_sodisconnect(struct socket *so); +struct ofp_sockaddr *sodupsockaddr(const struct ofp_sockaddr *sa, int mflags); +void ofp_sofree(struct socket *so); +void ofp_sohasoutofband(struct socket *so); +int ofp_solisten(struct socket *so, int backlog, struct thread *td); +void ofp_solisten_proto(struct socket *so, int backlog); +int ofp_solisten_proto_check(struct socket *so); +struct socket * + ofp_sonewconn(struct socket *head, int connstatus); +struct socket * + sonewconn_passive_client(struct socket *head, int connstatus); + +int sopoll(struct socket *so, int events, struct ofp_ucred *active_cred, + struct thread *td); +int sopoll_generic(struct socket *so, int events, + struct ofp_ucred *active_cred, struct thread *td); +int ofp_soreceive(struct socket *so, struct ofp_sockaddr **paddr, struct uio *uio, + odp_packet_t *mp0, odp_packet_t *controlp, int *flagsp); +int soreceive_stream(struct socket *so, struct ofp_sockaddr **paddr, + struct uio *uio, odp_packet_t *mp0, odp_packet_t *controlp, + int *flagsp); +int ofp_soreceive_dgram(struct socket *so, struct ofp_sockaddr **paddr, + struct uio *uio, odp_packet_t *mp0, odp_packet_t *controlp, + int *flagsp); +int ofp_soreceive_generic(struct socket *so, struct ofp_sockaddr **paddr, + struct uio *uio, odp_packet_t *mp0, odp_packet_t *controlp, + int *flagsp); +int ofp_soreserve(struct socket *so, uint64_t sndcc, uint64_t rcvcc); +void sorflush(struct socket *so); +#if 0 +int ofp_sosend(struct socket *so, struct ofp_sockaddr *addr, struct uio *uio, + odp_packet_t top, odp_packet_t control, int flags, + struct thread *td); +#endif +int ofp_sosend(struct socket *so, struct ofp_sockaddr *addr, struct uio *uio, + odp_packet_t top, odp_packet_t control, int flags, struct thread *td); + +int ofp_sosend_dgram(struct socket *so, struct ofp_sockaddr *addr, + struct uio *uio, odp_packet_t top, odp_packet_t control, + int flags, struct thread *td); + +int ofp_sosend_generic(struct socket *so, struct ofp_sockaddr *addr, + struct uio *uio, odp_packet_t top, odp_packet_t control, + int flags, struct thread *td); +int ofp_soshutdown(struct socket *so, int how); +void sotoxsocket(struct socket *so, struct xsocket *xso); +void ofp_soupcall_clear(struct socket *so, int which); +void ofp_soupcall_set(struct socket *so, int which, + int (*func)(struct socket *, void *, int), void *arg); +int souserctx_alloc(struct socket *so); +void ofp_sowakeup(struct socket *so, struct sockbuf *sb); +int selsocket(struct socket *so, int events, struct timeval *tv, + struct thread *td); + +/* + * Accept filter functions (duh). + */ +int accept_filt_add(struct accept_filter *filt); +int accept_filt_del(char *name); +struct accept_filter *accept_filt_get(char *name); +#ifdef ACCEPT_FILTER_MOD +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet_accf); +#endif +int accept_filt_generic_mod_event(module_t mod, int event, void *data); +#endif + +void ofp_socket_alloc_shared_memory(odp_pool_t pool); +void ofp_socket_lookup_shared_memory(void); +odp_packet_t ofp_packet_alloc(uint32_t len); +odp_rwlock_t *ofp_accept_mtx(void); +void ofp_accept_lock(void); +void ofp_accept_unlock(void); + +#define maxsockets 16384 + +/* Pools */ + +#define uma_zcreate(name, size, ctor, dtor, uminit, fini, align, flags) \ + ofp_socket_pool_create(name, size) + +#define uma_zdestroy(zone) do {} while (0) + +#define uma_zalloc(zone, flags) \ + ofp_socket_pool_alloc(zone) + +#define uma_zfree(zone, item) \ + ofp_socket_pool_free(item) + +#define uma_zone_set_max(zone, nitems) do {} while (0) + +int ofp_socket_pool_create(const char *name, int size); +void *ofp_socket_pool_alloc(int zone); +void ofp_socket_pool_free(void *item); + +/* Emulation for BSD wakeup mechanism */ +int ofp_msleep(void *channel, odp_rwlock_t *mtx, int priority, const char *wmesg, + uint32_t timeout); +int ofp_wakeup(void *channel); +int ofp_wakeup_one(void *channel); +int ofp_send_sock_event(struct socket *head, struct socket *so, int event); + +#endif /* !_SYS_SOCKETVAR_H_ */ diff --git a/include/ofpi_sockopt.h b/include/ofpi_sockopt.h new file mode 100644 index 00000000..5c089be9 --- /dev/null +++ b/include/ofpi_sockopt.h @@ -0,0 +1,68 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 + * + * $FreeBSD: release/9.1.0/sys/sys/sockopt.h 180948 2008-07-29 07:45:05Z kmacy $ + */ +#ifndef _SYS_SOCKOPT_H_ +#define _SYS_SOCKOPT_H_ + +struct socket; + +/* + * Argument structure for sosetopt et seq. This is in the KERNEL + * section because it will never be visible to user code. + */ +enum sopt_dir { SOPT_GET, SOPT_SET }; + +struct sockopt { + enum sopt_dir sopt_dir; /* is this a get or a set? */ + int sopt_level; /* second arg of [gs]etsockopt */ + int sopt_name; /* third arg of [gs]etsockopt */ + void *sopt_val; /* fourth arg of [gs]etsockopt */ + size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ +#if 0 + struct thread *sopt_td; /* calling thread or null if kernel */ +#endif +}; + +int ofp_sosetopt(struct socket *so, struct sockopt *sopt); +int ofp_sogetopt(struct socket *so, struct sockopt *sopt); +int sogetopt(struct socket *so, struct sockopt *sopt); +int soopt_getm(struct sockopt *sopt, odp_packet_t *mp); +int soopt_mcopyin(struct sockopt *sopt, odp_packet_t m); +int soopt_mcopyout(struct sockopt *sopt, odp_packet_t m); +int do_getopt_accept_filter(struct socket *so, struct sockopt *sopt); +int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); +int so_setsockopt(struct socket *so, int level, int optname, + const void *optval, size_t optlen); +int so_getsockopt(struct socket *so, int level, int optname, + void *optval, size_t *optlen); + +#endif /* _SYS_SOCKOPT_H_ */ diff --git a/include/ofpi_sockstate.h b/include/ofpi_sockstate.h new file mode 100644 index 00000000..e225dcdf --- /dev/null +++ b/include/ofpi_sockstate.h @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 + * + * $FreeBSD: release/9.1.0/sys/sys/sockstate.h 180948 2008-07-29 07:45:05Z kmacy $ + */ +#ifndef _SYS_SOCKTATE_H_ +#define _SYS_SOCKTATE_H_ + +/* + * Socket state bits. + * + * Historically, this bits were all kept in the so_state field. For + * locking reasons, they are now in multiple fields, as they are + * locked differently. so_state maintains basic socket state protected + * by the socket lock. so_qstate holds information about the socket + * accept queues. Each socket buffer also has a state field holding + * information relevant to that socket buffer (can't send, rcv). Many + * fields will be read without locks to improve performance and avoid + * lock order issues. However, this approach must be used with caution. + */ +#define SS_NOFDREF 0x0001 /* no file table ref any more */ +#define SS_ISCONNECTED 0x0002 /* socket connected to a peer */ +#define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */ +#define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */ +#define SS_NBIO 0x0100 /* non-blocking ops */ +#define SS_ASYNC 0x0200 /* async i/o notify */ +#define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */ +#define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ + +/* + * Protocols can mark a socket as SS_PROTOREF to indicate that, following + * pru_detach, they still want the socket to persist, and will free it + * themselves when they are done. Protocols should only ever call ofp_sofree() + * following setting this flag in pru_detach(), and never otherwise, as + * ofp_sofree() bypasses socket reference counting. + */ +#define SS_PROTOREF 0x4000 /* strong protocol reference */ + +#define SS_EVENT 0x8000 /* socket is handled by event mechanism */ + +/* + * Socket state bits now stored in the socket buffer state field. + */ +#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ +#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ +#define SBS_RCVATMARK 0x0040 /* at mark on input */ + + +struct socket; + +void ofp_soisconnected(struct socket *so); +void ofp_soisconnecting(struct socket *so); +void ofp_soisdisconnected(struct socket *so); +void ofp_soisdisconnecting(struct socket *so); +void ofp_socantrcvmore(struct socket *so); +void ofp_socantrcvmore_locked(struct socket *so); +void ofp_socantsendmore(struct socket *so); +void ofp_socantsendmore_locked(struct socket *so); +void ofp_soisdisconnecting(struct socket *so); + +#endif /* _SYS_SOCKTATE_H_ */ diff --git a/include/ofpi_stat.h b/include/ofpi_stat.h new file mode 100644 index 00000000..fb34282c --- /dev/null +++ b/include/ofpi_stat.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _STAT_H_ +#define _STAT_H_ + +#include "api/ofp_stat.h" + +void ofp_stat_alloc_shared_memory(void); +void ofp_stat_lookup_shared_memory(void); + +#define OFP_UPDATE_PACKET_STAT(_s, _n) do { \ + struct ofp_packet_stat *st = ofp_get_packet_statistics(); \ + if (st) \ + st->per_core[odp_cpu_id()]._s += _n; \ +} while (0) + +extern unsigned long int ofp_stat_flags; + +#define _UPDATE_LATENCY(_core, _current_cycle, _n) {\ + if (st->per_core[_core].last_input_cycles) \ + st->per_core[_core].input_latency[ilog2(odp_time_diff_cycles(\ + st->per_core[_core].last_input_cycles, \ + _current_cycle))] += _n; \ + st->per_core[_core].last_input_cycles = _current_cycle;\ +} + +#define OFP_UPDATE_PACKET_LATENCY_STAT(_n) {\ + if (ofp_stat_flags & OFP_STAT_COMPUTE_LATENCY) { \ + struct ofp_packet_stat *st = ofp_get_packet_statistics(); \ + if (st) { \ + uint64_t _in_cycles = odp_time_cycles(); \ + int _core = odp_cpu_id(); \ + _UPDATE_LATENCY(_core, _in_cycles, _n) \ + } \ + } \ +} + +#endif diff --git a/include/ofpi_sysctl.h b/include/ofpi_sysctl.h new file mode 100644 index 00000000..cc149cf2 --- /dev/null +++ b/include/ofpi_sysctl.h @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2015, Nokia Solutions and Networks + * Copyright (c) 2015, ENEA Software AB + * + * This code is derived from software contributed to Berkeley by + * Mike Karels at Berkeley Software Design, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_SYSCTL_H_ +#define _SYS_SYSCTL_H_ + +#include "ofpi_queue.h" +#include "api/ofp_sysctl.h" + +enum sysinit_sub_id { + SI_SUB_DUMMY = 0x0000000, /* not executed; for linker*/ + SI_SUB_KMEM = 0x1800000, /* kernel memory*/ +}; + +enum sysinit_elem_order { + SI_ORDER_FIRST = 0x0000000, /* first*/ + SI_ORDER_SECOND = 0x0000001, /* second*/ + SI_ORDER_THIRD = 0x0000002, /* third*/ + SI_ORDER_FOURTH = 0x0000003, /* fourth*/ + SI_ORDER_MIDDLE = 0x1000000, /* somewhere in the middle */ + SI_ORDER_ANY = 0xfffffff /* last*/ +}; + +typedef void (*sysinit_nfunc_t)(void *); +typedef void (*sysinit_cfunc_t)(const void *); + +struct sysinit { + enum sysinit_sub_id subsystem; /* subsystem identifier*/ + enum sysinit_elem_order order; /* init order within subsystem*/ + sysinit_cfunc_t func; /* function */ + const void *udata; /* multiplexer/argument */ +}; + +/* definitions for ofp_sysctl_req 'lock' member */ +#define REQ_UNWIRED 1 +#define REQ_WIRED 2 + +/* definitions for ofp_sysctl_req 'flags' member */ +#if defined(__amd64__) || defined(__ia64__) || defined(__powerpc64__) +#define SCTL_MASK32 1 /* 32 bit emulation */ +#endif + +/* Dynamic oid handling */ +int ofp_kernel_sysctl(struct thread *td, const int *name, unsigned int namelen, void *old, + size_t *oldlenp, const void *new, size_t newlen, + size_t *retval, int flags); +int ofp_sysctl_find_oid(const int *name, unsigned int namelen, struct ofp_sysctl_oid **noid, + int *nindx, struct ofp_sysctl_req *req); +void ofp_register_sysctls(void); +void ofp_sysctl_write_tree(int fd); + +#endif /* !_SYS_SYSCTL_H_ */ diff --git a/include/ofpi_systm.h b/include/ofpi_systm.h new file mode 100644 index 00000000..1343dff1 --- /dev/null +++ b/include/ofpi_systm.h @@ -0,0 +1,244 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_SYSTM_H_ +#define _OFPI_SYSTM_H_ + +#include "odp/std_types.h" +#include "odp/packet.h" +#include "odp/spinlock.h" +#include "odp/rwlock.h" +#include "odp/thread.h" +#include "odp/cpu.h" +#include "ofpi_socket.h" + +#define SHM_PKT_POOL_BUF_SIZE 1856 +#define MCLBYTES SHM_PKT_POOL_BUF_SIZE +#define RLIM_INFINITY ((uint64_t)(((uint64_t)1 << 63) - 1)) + +/* + * Flags indicating hw checksum support and sw checksum requirements. This + * field can be directly tested against if_data.ifi_hwassist. + */ +#define CSUM_IP 0x0001 /* will csum IP */ +#define CSUM_TCP 0x0002 /* will csum TCP */ +#define CSUM_UDP 0x0004 /* will csum UDP */ +#define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ +#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ +#define CSUM_TSO 0x0020 /* will do TSO */ +#define CSUM_SCTP 0x0040 /* will csum SCTP */ +#define CSUM_SCTP_IPV6 0x0080 /* will csum IPv6/SCTP */ + +#define CSUM_IP_CHECKED 0x0100 /* did csum IP */ +#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ +#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ +#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ +#define CSUM_SCTP_VALID 0x1000 /* SCTP checksum is valid */ +#define CSUM_UDP_IPV6 0x2000 /* will csum IPv6/UDP */ +#define CSUM_TCP_IPV6 0x4000 /* will csum IPv6/TCP */ + +#ifndef OFP__UID_T_DECLARED +typedef __ofp_uid_t ofp_uid_t; +#define OFP__UID_T_DECLARED +#endif /*OFP__UID_T_DECLARED*/ + +struct ofp_ucred { + ofp_uid_t cr_uid; /* effective user id */ +}; + +struct thread { + struct proc { + int p_fibnum; /* in this routing domain XXX MRT */ + } td_proc; + struct ofp_ucred *td_ucred; /* (k) Reference to credentials. */ +}; + +static inline struct ofp_ifnet *ofp_packet_interface(odp_packet_t pkt) { + struct ofp_ifnet *dev = odp_packet_user_ptr(pkt); + return dev; +} + + +odp_packet_t odp_packet_ensure_contiguous(odp_packet_t pkt, int len); +int odp_packet_flags(odp_packet_t pkt); + +typedef int (*uma_init)(void *mem, int size, int flags); +typedef void (*uma_fini)(void *mem, int size); + +#define HASH_NOWAIT 0x00000001 +#define HASH_WAITOK 0x00000002 +void *ofp_hashinit(int count, void *type, uint64_t *hashmask); +void *ofp_hashinit_flags(int elements, void *type, uint64_t *hashmask, int flags); +void *ofp_phashinit(int count, void *type, uint64_t *nentries); +void ofp_hashdestroy(void *vhashtbl, void *type, uint64_t hashmask); + +static __inline int imax(int a, int b) { return (a > b ? a : b); } +static __inline int imin(int a, int b) { return (a < b ? a : b); } +static __inline long lmax(long a, long b) { return (a > b ? a : b); } +static __inline long lmin(long a, long b) { return (a < b ? a : b); } +static __inline unsigned int max(unsigned int a, unsigned int b) { return (a > b ? a : b); } +static __inline unsigned int min(unsigned int a, unsigned int b) { return (a < b ? a : b); } +#if 0 +static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); } +static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); } +#endif +static __inline unsigned long ulmax(unsigned long a, unsigned long b) { return (a > b ? a : b); } +static __inline unsigned long ulmin(unsigned long a, unsigned long b) { return (a < b ? a : b); } +#if 0 +static __inline off_t omax(off_t a, off_t b) { return (a > b ? a : b); } +static __inline off_t omin(off_t a, off_t b) { return (a < b ? a : b); } +#endif + +#if 0 +static __inline int abs(int a) { return (a < 0 ? -a : a); } +static __inline long labs(long a) { return (a < 0 ? -a : a); } +static __inline quad_t qabs(quad_t a) { return (a < 0 ? -a : a); } +#endif + +static inline void odp_packet_set_csum_data(odp_packet_t pkt, int val) +{ + (void)pkt; (void)val; +} + +static inline int odp_packet_csum_data(odp_packet_t pkt) +{ + (void)pkt; + return 0; +} + +static inline void odp_packet_set_csum_flags(odp_packet_t pkt, int val) +{ + (void)pkt; (void)val; +} + +static inline int odp_packet_csum_flags(odp_packet_t pkt) +{ + (void)pkt; + return 0; +} + +static inline int odp_packet_is_bcast(odp_packet_t pkt) +{ + (void)pkt; + return 0; +} + +static inline int odp_packet_is_mcast(odp_packet_t pkt) +{ + (void)pkt; + return 0; +} + +struct in_conninfo; +struct hc_metrics_lite; +static inline void tcp_hc_update(struct in_conninfo *c, struct hc_metrics_lite *a) +{ + (void)c; (void)a; +} + + +struct ofp_rec_rwlock { + odp_rwlock_t lock; + odp_spinlock_t splock; + int cnt; + int owner; + const char *file; + int line; +}; + +static inline void ofp_rec_init(struct ofp_rec_rwlock *lock, + const char *file, int line) +{ + (void)file; + (void)line; + odp_spinlock_init(&lock->splock); + odp_rwlock_init(&lock->lock); +} + +#define OFP_LOG_Z(a...) do {} while (0) + +static inline void ofp_rec_wlock(struct ofp_rec_rwlock *lock, + const char *file, int line) +{ + odp_spinlock_lock(&lock->splock); + + if ((int32_t)(lock->lock.cnt.v) < 0) { + OFP_LOG_Z("lock=%p is already locked by %s:%d cpu=%d cnt=%d\n", + lock, lock->file, lock->line, lock->owner, lock->cnt); + if (odp_cpu_id() != lock->owner) { + odp_spinlock_unlock(&lock->splock); + odp_rwlock_write_lock(&lock->lock); + } else { + odp_spinlock_unlock(&lock->splock); + } + } else { + odp_rwlock_write_lock(&lock->lock); + odp_spinlock_unlock(&lock->splock); + } + + lock->owner = odp_cpu_id(); + lock->cnt++; + lock->file = file; + lock->line = line; +} + +static inline void ofp_rec_wunlock(struct ofp_rec_rwlock *lock, + const char *file, int line) +{ + (void)file; + (void)line; + + if (--lock->cnt == 0) { + lock->owner = -1; + odp_rwlock_write_unlock(&lock->lock); + } else + OFP_LOG_Z("lock=%p still locked, cnt=%d\n", + lock, lock->cnt); +} + +static inline void ofp_rec_rlock(struct ofp_rec_rwlock *lock, + const char *file, int line) +{ + (void)file; + (void)line; + odp_rwlock_read_lock(&lock->lock); +} + +static inline void ofp_rec_runlock(struct ofp_rec_rwlock *lock, + const char *file, int line) +{ + (void)file; + (void)line; + odp_rwlock_read_unlock(&lock->lock); +} + +static inline int ofp_rec_try_wlock(struct ofp_rec_rwlock *lock, + const char *file, int line) +{ + odp_spinlock_lock(&lock->splock); + + if ((int32_t)(lock->lock.cnt.v) < 0) { + OFP_LOG_Z("try lock=%p is already locked by %s:%d cpu=%d cnt=%d\n", + lock, lock->file, lock->line, lock->owner, lock->cnt); + if (odp_cpu_id() != lock->owner) { + odp_spinlock_unlock(&lock->splock); + return 0; + } + odp_spinlock_unlock(&lock->splock); + } else { + odp_rwlock_write_lock(&lock->lock); + odp_spinlock_unlock(&lock->splock); + } + lock->owner = odp_cpu_id(); + lock->cnt++; + lock->file = file; + lock->line = line; + return 1; +} + + +#endif diff --git a/include/ofpi_tcp.h b/include/ofpi_tcp.h new file mode 100644 index 00000000..9675f3de --- /dev/null +++ b/include/ofpi_tcp.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet/tcp.h 232945 2012-03-13 20:37:57Z glebius $ + */ + +#ifndef _OFPI_TCP_H_ +#define _OFPI_TCP_H_ + +#include "ofpi_ip_var.h" + +#include "api/ofp_tcp.h" + +#endif /* !_OFPI_TCP_H_ */ diff --git a/include/ofpi_tcp6_var.h b/include/ofpi_tcp6_var.h new file mode 100644 index 00000000..f4f9354d --- /dev/null +++ b/include/ofpi_tcp6_var.h @@ -0,0 +1,86 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 + * $FreeBSD: release/9.1.0/sys/netinet6/tcp6_var.h 195727 2009-07-16 21:13:04Z rwatson $ + */ + +#ifndef _NETINET_TCP6_VAR_H_ +#define _NETINET_TCP6_VAR_H_ + +#include + + +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet6_tcp6); + +VNET_DECLARE(int, ofp_tcp_v6mssdflt); /* XXX */ +#define V_tcp_v6mssdflt VNET(ofp_tcp_v6mssdflt) +#endif + +struct ofp_ip6_hdr; +/* +void tcp6_ctlinput __P((int, struct sockaddr *, void *)); +void tcp6_init __P((void));*/ +int ofp_tcp6_input __P((odp_packet_t, int *, int *)); +/* +struct rtentry *tcp_rtlookup6(struct in_conninfo *); +*/ + +extern struct pr_usrreqs ofp_tcp6_usrreqs; + +#endif /* _NETINET_TCP6_VAR_H_ */ diff --git a/include/ofpi_tcp_fsm.h b/include/ofpi_tcp_fsm.h new file mode 100644 index 00000000..bc09f6b5 --- /dev/null +++ b/include/ofpi_tcp_fsm.h @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_fsm.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet/tcp_fsm.h 171656 2007-07-30 11:06:42Z des $ + */ + +#ifndef _NETINET_TCP_FSM_H_ +#define _NETINET_TCP_FSM_H_ + +/* + * TCP FSM state definitions. + * + * Per RFC793, September, 1981. + */ + +#define TCP_NSTATES 11 + +#define TCPS_CLOSED 0 /* closed */ +#define TCPS_LISTEN 1 /* listening for connection */ +#define TCPS_SYN_SENT 2 /* active, have sent syn */ +#define TCPS_SYN_RECEIVED 3 /* have sent and received syn */ +/* states < TCPS_ESTABLISHED are those where connections not established */ +#define TCPS_ESTABLISHED 4 /* established */ +#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +/* states > TCPS_CLOSE_WAIT are those where user has closed */ +#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ +/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ +#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ + +/* for KAME src sync over BSD*'s */ +#define TCP6_NSTATES TCP_NSTATES +#define TCP6S_CLOSED TCPS_CLOSED +#define TCP6S_LISTEN TCPS_LISTEN +#define TCP6S_SYN_SENT TCPS_SYN_SENT +#define TCP6S_SYN_RECEIVED TCPS_SYN_RECEIVED +#define TCP6S_ESTABLISHED TCPS_ESTABLISHED +#define TCP6S_CLOSE_WAIT TCPS_CLOSE_WAIT +#define TCP6S_FIN_WAIT_1 TCPS_FIN_WAIT_1 +#define TCP6S_CLOSING TCPS_CLOSING +#define TCP6S_LAST_ACK TCPS_LAST_ACK +#define TCP6S_FIN_WAIT_2 TCPS_FIN_WAIT_2 +#define TCP6S_TIME_WAIT TCPS_TIME_WAIT + +#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) +#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) +#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) + +#ifdef OFP_TCPOUTFLAGS +/* + * Flags used when sending segments in ofp_tcp_output. Basic flags (TH_RST, + * TH_ACK,TH_SYN,TH_FIN) are totally determined by state, with the proviso + * that TH_FIN is sent only if all data queued for output is included in the + * segment. + */ +static uint8_t tcp_outflags[TCP_NSTATES] = { + OFP_TH_RST|OFP_TH_ACK, /* 0, CLOSED */ + 0, /* 1, LISTEN */ + OFP_TH_SYN, /* 2, SYN_SENT */ + OFP_TH_SYN|OFP_TH_ACK, /* 3, SYN_RECEIVED */ + OFP_TH_ACK, /* 4, ESTABLISHED */ + OFP_TH_ACK, /* 5, CLOSE_WAIT */ + OFP_TH_FIN|OFP_TH_ACK, /* 6, FIN_WAIT_1 */ + OFP_TH_FIN|OFP_TH_ACK, /* 7, CLOSING */ + OFP_TH_FIN|OFP_TH_ACK, /* 8, LAST_ACK */ + OFP_TH_ACK, /* 9, FIN_WAIT_2 */ + OFP_TH_ACK, /* 10, TIME_WAIT */ +}; +#endif + +#ifdef KPROF +int tcp_acounts[TCP_NSTATES][OFP_PRU_NREQ]; +#endif + +#ifdef TCPSTATES +static char const * const tcpstates[] = { + "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD", + "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING", + "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT", +}; +#endif + +#endif diff --git a/include/ofpi_tcp_offload.h b/include/ofpi_tcp_offload.h new file mode 100644 index 00000000..87f1f415 --- /dev/null +++ b/include/ofpi_tcp_offload.h @@ -0,0 +1,360 @@ +/*- + * Copyright (c) 2007, Chelsio Inc. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of the Chelsio Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: release/9.1.0/sys/netinet/tcp_offload.h 218909 2011-02-21 09:01:34Z brucec $ + */ + +#ifndef _NETINET_TCP_OFFLOAD_H_ +#define _NETINET_TCP_OFFLOAD_H_ + +/* + * A driver publishes that it provides offload services + * by setting IFCAP_TOE in the ifnet. The offload connect + * will bypass any further work if the interface that a + * connection would use does not support TCP offload. + * + * The TOE API assumes that the tcp offload engine can offload the + * the entire connection from set up to teardown, with some provision + * being made to allowing the software stack to handle time wait. If + * the device does not meet these criteria, it is the driver's responsibility + * to overload the functions that it needs to in ofp_tcp_usrreqs and make + * its own calls to ofp_tcp_output if it needs to do so. + * + * There is currently no provision for the device advertising the congestion + * control algorithms it supports as there is currently no API for querying + * an operating system for the protocols that it has loaded. This is a desirable + * future extension. + * + * + * + * It is assumed that individuals deploying TOE will want connections + * to be offloaded without software changes so all connections on an + * interface providing TOE are offloaded unless the OFP_SO_NO_OFFLOAD + * flag is set on the socket. + * + * + * The toe_usrreqs structure constitutes the TOE driver's + * interface to the TCP stack for functionality that doesn't + * interact directly with userspace. If one wants to provide + * (optional) functionality to do zero-copy to/from + * userspace one still needs to override ofp_soreceive/ofp_sosend + * with functions that fault in and pin the user buffers. + * + * + tu_send + * - tells the driver that new data may have been added to the + * socket's send buffer - the driver should not fail if the + * buffer is in fact unchanged + * - the driver is responsible for providing credits (bytes in the send window) + * back to the socket by calling ofp_sbdrop() as segments are acknowledged. + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * + tu_rcvd + * - returns credits to the driver and triggers window updates + * to the peer (a credit as used here is a byte in the peer's receive window) + * - the driver is expected to determine how many bytes have been + * consumed and credit that back to the card so that it can grow + * the window again by maintaining its own state between invocations. + * - In principle this could be used to shrink the window as well as + * grow the window, although it is not used for that now. + * - this function needs to correctly handle being called any number of + * times without any bytes being consumed from the receive buffer. + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * + tu_disconnect + * - tells the driver to send FIN to peer + * - driver is expected to send the remaining data and then do a clean half close + * - disconnect implies at least half-close so only send, reset, and detach + * are legal + * - the driver is expected to handle transition through the shutdown + * state machine and allow the stack to support OFP_SO_LINGER. + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * + tu_reset + * - closes the connection and sends a RST to peer + * - driver is expectd to trigger an RST and detach the toepcb + * - no further calls are legal after reset + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * The following fields in the tcpcb are expected to be referenced by the driver: + * + iss + * + rcv_nxt + * + rcv_wnd + * + snd_isn + * + snd_max + * + snd_nxt + * + snd_una + * + t_flags + * + t_inpcb + * + t_maxseg + * + t_toe + * + * The following fields in the inpcb are expected to be referenced by the driver: + * + inp_lport + * + inp_fport + * + inp_laddr + * + inp_fport + * + inp_socket + * + inp_ip_tos + * + * The following fields in the socket are expected to be referenced by the + * driver: + * + so_comp + * + so_error + * + so_linger + * + so_options + * + so_rcv + * + so_snd + * + so_state + * + so_timeo + * + * These functions all return 0 on success and can return the following errors + * as appropriate: + * + OFP_EPERM: + * + OFP_ENOBUFS: memory allocation failed + * + OFP_EMSGSIZE: MTU changed during the call + * + OFP_EHOSTDOWN: + * + OFP_EHOSTUNREACH: + * + OFP_ENETDOWN: + * * OFP_ENETUNREACH: the peer is no longer reachable + * + * + tu_detach + * - tells driver that the socket is going away so disconnect + * the toepcb and free appropriate resources + * - allows the driver to cleanly handle the case of connection state + * outliving the socket + * - no further calls are legal after detach + * - the driver is expected to provide its own synchronization between + * detach and receiving new data. + * + * + tu_syncache_event + * - even if it is not actually needed, the driver is expected to + * call ofp_syncache_add for the initial SYN and then ofp_syncache_expand + * for the SYN,ACK + * - tells driver that a connection either has not been added or has + * been dropped from the syncache + * - the driver is expected to maintain state that lives outside the + * software stack so the syncache needs to be able to notify the + * toe driver that the software stack is not going to create a connection + * for a received SYN + * - The driver is responsible for any synchronization required between + * the syncache dropping an entry and the driver processing the SYN,ACK. + * + */ + +#define TCP_OFFLOAD_DISABLE 1 + +struct toe_usrreqs { + int (*tu_send)(struct tcpcb *tp); + int (*tu_rcvd)(struct tcpcb *tp); + int (*tu_disconnect)(struct tcpcb *tp); + int (*tu_reset)(struct tcpcb *tp); + void (*tu_detach)(struct tcpcb *tp); + void (*tu_syncache_event)(int event, void *toep); +}; + +/* + * Proxy for struct tcpopt between TOE drivers and TCP functions. + */ +struct toeopt { + uint64_t to_flags; /* see tcpopt in tcp_var.h */ + uint16_t to_mss; /* maximum segment size */ + uint8_t to_wscale; /* window scaling */ + + uint8_t _pad1; /* explicit pad for 64bit alignment */ + uint32_t _pad2; /* explicit pad for 64bit alignment */ + uint64_t _pad3[4]; /* TBD */ +}; + +#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */ +#define TOE_SC_DROP 2 /* connection was timed out */ + +/* + * Because listen is a one-to-many relationship (a socket can be listening + * on all interfaces on a machine some of which may be using different TCP + * offload devices), listen uses a publish/subscribe mechanism. The TCP + * offload driver registers a listen notification function with the stack. + * When a listen socket is created all TCP offload devices are notified + * so that they can do the appropriate set up to offload connections on the + * port to which the socket is bound. When the listen socket is closed, + * the offload devices are notified so that they will stop listening on that + * port and free any associated resources as well as sending RSTs on any + * connections in the SYN_RCVD state. + * + */ + +typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *); +typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *); + +#if 0 /* HJo */ +EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn); +EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn); +#endif + +/* + * Check if the socket can be offloaded by the following steps: + * - determine the egress interface + * - check the interface for TOE capability and TOE is enabled + * - check if the device has resources to offload the connection + */ +int tcp_offload_connect(struct socket *so, struct ofp_sockaddr *nam); + +/* + * The tcp_output_* routines are wrappers around the toe_usrreqs calls + * which trigger packet transmission. In the non-offloaded case they + * translate to ofp_tcp_output. The tcp_offload_* routines notify TOE + * of specific events. I the non-offloaded case they are no-ops. + * + * Listen is a special case because it is a 1 to many relationship + * and there can be more than one offload driver in the system. + */ + +/* + * Connection is offloaded + */ +#define tp_offload(tp) ((tp)->t_flags & TF_TOE) + +/* + * hackish way of allowing this file to also be included by TOE + * which needs to be kept ignorant of socket implementation details + */ +#ifdef _SYS_SOCKETVAR_H_ +/* + * The socket has not been marked as "do not offload" + */ +#define OFP_SO_OFFLOADABLE(so) ((so->so_options & OFP_SO_NO_OFFLOAD) == 0) + +static __inline int +tcp_output_connect(struct socket *so, struct ofp_sockaddr *nam) +{ + struct tcpcb *tp = sototcpcb(so); + int error; + (void)nam; + + /* + * If offload has been disabled for this socket or the + * connection cannot be offloaded just call ofp_tcp_output + * to start the TCP state machine. + */ +#ifndef TCP_OFFLOAD_DISABLE + if (!OFP_SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0) +#endif + error = ofp_tcp_output(tp); + return (error); +} + +static __inline int +tcp_output_send(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_send(tp)); +#endif + return (ofp_tcp_output(tp)); +} + +static __inline int +tcp_output_rcvd(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_rcvd(tp)); +#endif + return (ofp_tcp_output(tp)); +} + +static __inline int +tcp_output_disconnect(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_disconnect(tp)); +#endif + return (ofp_tcp_output(tp)); +} + +static __inline int +tcp_output_reset(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_reset(tp)); +#endif + return (ofp_tcp_output(tp)); +} + +static __inline void +tcp_offload_detach(struct tcpcb *tp) +{ + (void)tp; +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + tp->t_tu->tu_detach(tp); +#endif +} + +static __inline void +tcp_offload_listen_open(struct tcpcb *tp) +{ + (void)tp; + +#ifndef TCP_OFFLOAD_DISABLE + if (OFP_SO_OFFLOADABLE(tp->t_inpcb->inp_socket)) + EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp); +#endif +} + +static __inline void +tcp_offload_listen_close(struct tcpcb *tp) +{ + (void)tp; + +#ifndef TCP_OFFLOAD_DISABLE + EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp); +#endif +} +#undef OFP_SO_OFFLOADABLE +#endif /* _SYS_SOCKETVAR_H_ */ +#undef tp_offload + +void tcp_offload_twstart(struct tcpcb *tp); +struct tcpcb *tcp_offload_close(struct tcpcb *tp); +struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error); + +#endif /* _NETINET_TCP_OFFLOAD_H_ */ diff --git a/include/ofpi_tcp_seq.h b/include/ofpi_tcp_seq.h new file mode 100644 index 00000000..441f3d43 --- /dev/null +++ b/include/ofpi_tcp_seq.h @@ -0,0 +1,86 @@ +/*- + * Copyright (c) 1982, 1986, 1993, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETINET_TCP_SEQ_H_ +#define _NETINET_TCP_SEQ_H_ + +#include "ofpi_timer.h" + +/* + * TCP sequence numbers are 32 bit integers operated + * on with modular arithmetic. These macros can be + * used to compare such integers. + */ +#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) + +#define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b)) +#define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b)) + +/* for modulo comparisons of timestamps */ +#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) +#define TSTMP_GT(a,b) ((int)((a)-(b)) > 0) +#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Macros to initialize tcp sequence numbers for + * send and receive from initial send and receive + * sequence numbers. + */ +#define tcp_rcvseqinit(tp) \ + (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 + +#define tcp_sendseqinit(tp) \ + (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \ + (tp)->snd_recover = (tp)->iss + +/* + * Clock macros for RFC 1323 timestamps. + */ +#define TCP_TS_TO_TICKS(_t) ((_t)*1000 / OFP_TIMER_RESOLUTION_US) + +/* Timestamp wrap-around time, 24 days. */ +#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * 1000) + +/* + * tcp_ts_getticks() in ms, should be 1ms < x < 1000ms according to RFC 1323. + * We always use 1ms granularity independent of hz. + */ +static __inline uint32_t +tcp_ts_getticks(void) +{ + return (ofp_timer_ticks(0) * (OFP_TIMER_RESOLUTION_US / 1000)); +} + +#endif /* _NETINET_TCP_SEQ_H_ */ diff --git a/include/ofpi_tcp_syncache.h b/include/ofpi_tcp_syncache.h new file mode 100644 index 00000000..f088d2d9 --- /dev/null +++ b/include/ofpi_tcp_syncache.h @@ -0,0 +1,135 @@ +/*- + * Copyright (c) 1982, 1986, 1993, 1994, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 + * $FreeBSD: release/9.1.0/sys/netinet/tcp_syncache.h 224151 2011-07-17 21:15:20Z bz $ + */ + +#ifndef _NETINET_TCP_SYNCACHE_H_ +#define _NETINET_TCP_SYNCACHE_H_ + +#ifndef TCP_OFFLOAD_DISABLE +#define TCP_OFFLOAD_DISABLE +#endif + +struct toeopt; + +void ofp_syncache_init(void); +int ofp_syncache_expand(struct in_conninfo *, struct tcpopt *, + struct ofp_tcphdr *, struct socket **, odp_packet_t ); +int tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo, + struct ofp_tcphdr *th, struct socket **lsop, odp_packet_t m); +void ofp_syncache_add(struct in_conninfo *, struct tcpopt *, + struct ofp_tcphdr *, struct inpcb *, struct socket **, odp_packet_t , + int); +void tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *, + struct ofp_tcphdr *, struct inpcb *, struct socket **, + struct toe_usrreqs *tu, void *toepcb); + +void ofp_syncache_chkrst(struct in_conninfo *, struct ofp_tcphdr *); +void ofp_syncache_badack(struct in_conninfo *); +int ofp_syncache_pcbcount(void); +void ofp_syncache_unreach(struct in_conninfo *inc, struct ofp_tcphdr *th); +//int syncache_pcblist(struct ofp_sysctl_req *req, int max_pcbs, int *pcbs_exported); + +struct syncache { + OFP_TAILQ_ENTRY(syncache) sc_hash; + struct in_conninfo sc_inc; /* addresses */ + int sc_rxttime; /* retransmit time */ + uint16_t sc_rxmits; /* retransmit counter */ + uint32_t sc_tsreflect; /* timestamp to reflect */ + uint32_t sc_ts; /* our timestamp to send */ + uint32_t sc_tsoff; /* ts offset w/ syncookies */ + uint32_t sc_flowlabel; /* IPv6 flowlabel */ + tcp_seq sc_irs; /* seq from peer */ + tcp_seq sc_iss; /* our ISS */ + odp_packet_t sc_ipopts; /* source route */ + uint16_t sc_peer_mss; /* peer's MSS */ + uint16_t sc_wnd; /* advertised window */ + uint8_t sc_ip_ttl; /* IPv4 TTL */ + uint8_t sc_ip_tos; /* IPv4 TOS */ + uint8_t sc_requested_s_scale:4, + sc_requested_r_scale:4; + uint16_t sc_flags; +#ifndef TCP_OFFLOAD_DISABLE + struct toe_usrreqs *sc_tu; /* TOE operations */ + void *sc_toepcb; /* TOE protocol block */ +#endif + struct label *sc_label; /* MAC label reference */ + struct ofp_ucred *sc_cred; /* cred cache for jail checks */ + uint32_t sc_spare[2]; /* UTO */ +}; + +/* + * Flags for the sc_flags field. + */ +#define SCF_NOOPT 0x01 /* no TCP options */ +#define SCF_WINSCALE 0x02 /* negotiated window scaling */ +#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */ + /* MSS is implicit */ +#define SCF_UNREACH 0x10 /* icmp unreachable received */ +#define SCF_SIGNATURE 0x20 /* send MD5 digests */ +#define SCF_SACK 0x80 /* send SACK option */ +#define SCF_ECN 0x100 /* send ECN setup packet */ +#define SCF_PASSIVE 0x200 /* connection is in passive mode */ +#define SCF_PASSIVE_SYNACK 0x400 /* SYN|ACK captured in passive mode */ +#define SCF_NO_TIMEOUT_RESET 0x800 /* don't reset timeout on dup SYN */ +#define SCF_CONVERT_ON_TIMEOUT 0x1000 /* convert from passive to active on timeout */ + +#define SYNCOOKIE_SECRET_SIZE 8 /* dwords */ +#define SYNCOOKIE_LIFETIME 16 /* seconds */ + +struct syncache_head { + struct vnet *sch_vnet; + odp_spinlock_t sch_mtx; + OFP_TAILQ_HEAD(sch_head, syncache) sch_bucket; + struct callout sch_timer; + int sch_nextc; + uint32_t sch_length; + uint32_t sch_oddeven; + uint32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE]; + uint32_t sch_secbits_even[SYNCOOKIE_SECRET_SIZE]; + uint32_t sch_reseed; /* time_uptime, seconds */ +}; + +struct tcp_syncache { + struct syncache_head *hashbase; + uma_zone_t zone; + uint32_t hashsize; + uint32_t hashmask; + uint32_t bucket_limit; + uint32_t cache_count; /* XXX: unprotected */ + uint32_t cache_limit; + uint32_t rexmt_limit; + uint32_t hash_secret; +}; + +#endif /* !_NETINET_TCP_SYNCACHE_H_ */ diff --git a/include/ofpi_tcp_timer.h b/include/ofpi_tcp_timer.h new file mode 100644 index 00000000..766adbeb --- /dev/null +++ b/include/ofpi_tcp_timer.h @@ -0,0 +1,194 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet/tcp_timer.h 232945 2012-03-13 20:37:57Z glebius $ + */ + +#ifndef _NETINET_TCP_TIMER_H_ +#define _NETINET_TCP_TIMER_H_ + +/* + * The TCPT_REXMT timer is used to force retransmissions. + * The TCP has the TCPT_REXMT timer set whenever segments + * have been sent for which ACKs are expected but not yet + * received. If an ACK is received which advances tp->snd_una, + * then the retransmit timer is cleared (if there are no more + * outstanding segments) or reset to the base value (if there + * are more ACKs expected). Whenever the retransmit timer goes off, + * we retransmit one unacknowledged segment, and do a backoff + * on the retransmit timer. + * + * The TCPT_PERSIST timer is used to keep window size information + * flowing even if the window goes shut. If all previous transmissions + * have been acknowledged (so that there are no retransmissions in progress), + * and the window is too small to bother sending anything, then we start + * the TCPT_PERSIST timer. When it expires, if the window is nonzero, + * we go to transmit state. Otherwise, at intervals send a single byte + * into the peer's window to force him to update our window information. + * We do this at most as often as TCPT_PERSMIN time intervals, + * but no more frequently than the current estimate of round-trip + * packet time. The TCPT_PERSIST timer is cleared whenever we receive + * a window update from the peer. + * + * The TCPT_KEEP timer is used to keep connections alive. If an + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, + * but not yet established, then we drop the connection. Once the connection + * is established, if the connection is idle for TCPTV_KEEP_IDLE time + * (and keepalives have been enabled on the socket), we begin to probe + * the connection. We force the peer to send us a segment by sending: + * + * This segment is (deliberately) outside the window, and should elicit + * an ack segment in response from the peer. If, despite the TCPT_KEEP + * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE + * amount of time probing, then we drop the connection. + */ + +#include "ofpi_callout.h" + +/* + * Time constants. + */ + +#define TCPTV_MSL ( 30*hz) /* max seg lifetime (hah!) */ +#define TCPTV_SRTTBASE 0 /* base roundtrip time; + if 0, no idea yet */ +#define TCPTV_RTOBASE ( 3*hz) /* assumed RTO if no info */ +#define TCPTV_SRTTDFLT ( 3*hz) /* assumed RTT if no info */ + +#define TCPTV_PERSMIN ( 5*hz) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*hz) /* maximum persist interval */ + +#define TCPTV_KEEP_INIT ( 75*hz) /* initial connect keepalive */ +#define TCPTV_KEEP_IDLE (120L*60L*hz) /* dflt time before probing */ +#define TCPTV_KEEPINTVL ( 75*hz) /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ + +#define TCPTV_REASSDL ( 5*hz) /* initial passive reassembly deadline */ + +#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */ + +/* + * Minimum retransmit timer is 3 ticks, for algorithmic stability. + * TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with + * the expected worst-case processing variances by the kernels + * representing the end points. Such variances do not always show + * up in the srtt because the timestamp is often calculated at + * the interface rather then at the TCP layer. This value is + * typically 50ms. However, it is also possible that delayed + * acks (typically 100ms) could create issues so we set the slop + * to 200ms to try to cover it. Note that, properly speaking, + * delayed-acks should not create a major issue for interactive + * environments which 'P'ush the last segment, at least as + * long as implementations do the required 'at least one ack + * for every two packets' for the non-interactive streaming case. + * (maybe the RTO calculation should use 2*RTT instead of RTT + * to handle the ack-every-other-packet case). + * + * The prior minimum of 1*hz (1 second) badly breaks throughput on any + * networks faster then a modem that has minor (e.g. 1%) packet loss. + */ +#define TCPTV_MIN ( hz/33 ) /* minimum allowable value */ +#define TCPTV_CPU_VAR ( hz/5 ) /* cpu variance allowed (200ms) */ +#define TCPTV_REXMTMAX ( 64*hz) /* max allowable REXMT value */ + +#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ + +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ + +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ + +#define TCPTV_DELACK (hz / PR_FASTHZ / 2) /* 100ms timeout */ + +#ifdef TCPTIMERS +static const char *tcptimers[] = + { "REXMT", "PERSIST", "KEEP", "2MSL" }; +#endif + +/* + * Force a time value to be in a certain range. + */ +#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \ + (tv) = (value) + ofp_tcp_rexmit_slop; \ + if ((uint64_t)(tv) < (uint64_t)(tvmin)) \ + (tv) = (tvmin); \ + if ((uint64_t)(tv) > (uint64_t)(tvmax)) \ + (tv) = (tvmax); \ +} while(0) + +struct xtcp_timer; + +struct tcp_timer { + struct callout tt_rexmt; /* retransmit timer */ + struct callout tt_persist; /* retransmit persistence */ + struct callout tt_keep; /* keepalive */ + struct callout tt_2msl; /* 2*msl TIME_WAIT timer */ + struct callout tt_delack; /* delayed ACK timer */ +}; +#define TT_DELACK 0x01 +#define TT_REXMT 0x02 +#define TT_PERSIST 0x04 +#define TT_KEEP 0x08 +#define TT_2MSL 0x10 + +#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (int)(tp)->t_keepinit : ofp_tcp_keepinit) +#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (int)(tp)->t_keepidle : ofp_tcp_keepidle) +#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (int)(tp)->t_keepintvl : ofp_tcp_keepintvl) +#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (int)(tp)->t_keepcnt : ofp_tcp_keepcnt) +#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp)) + +extern int ofp_tcp_keepinit; /* time to establish connection */ +extern int ofp_tcp_keepidle; /* time before keepalive probes begin */ +extern int ofp_tcp_keepintvl; /* time between keepalive probes */ +extern int ofp_tcp_keepcnt; /* number of keepalives */ +extern int ofp_tcp_delacktime; /* time before sending a delayed ACK */ +extern int ofp_tcp_maxpersistidle; +extern int ofp_tcp_rexmit_min; +extern int ofp_tcp_rexmit_slop; +extern int ofp_tcp_msl; +extern int tcp_ttl; /* time to live for TCP segs */ +extern int ofp_tcp_backoff[]; + +extern int ofp_tcp_finwait2_timeout; +extern int ofp_tcp_fast_finwait2_recycle; + +void tcp_timer_init(void); +void ofp_tcp_timer_2msl(void *xtp); +struct tcptw * + ofp_tcp_tw_2msl_scan(int _reuse); /* XXX temporary */ +void ofp_tcp_timer_keep(void *xtp); +void ofp_tcp_timer_persist(void *xtp); +void ofp_tcp_timer_rexmt(void *xtp); +void ofp_tcp_timer_delack(void *xtp); +void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, + struct xtcp_timer *xtimer); + +#endif /* !_NETINET_TCP_TIMER_H_ */ diff --git a/include/ofpi_tcp_var.h b/include/ofpi_tcp_var.h new file mode 100644 index 00000000..3ef80b8c --- /dev/null +++ b/include/ofpi_tcp_var.h @@ -0,0 +1,717 @@ +/*- + * Copyright (c) 1982, 1986, 1993, 1994, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 + * $FreeBSD: release/9.1.0/sys/netinet/tcp_var.h 235051 2012-05-05 07:55:50Z glebius $ + */ + +#ifndef _NETINET_TCP_VAR_H_ +#define _NETINET_TCP_VAR_H_ + +#include "ofpi_tcp.h" +#include "ofpi_vnet.h" + +/* + * Kernel variables for tcp. + */ + +VNET_DECLARE(int, ofp_tcp_do_rfc1323); +#define V_tcp_do_rfc1323 VNET(ofp_tcp_do_rfc1323) + +/* TCP segment queue entry */ +struct tseg_qent { + OFP_LIST_ENTRY(tseg_qent) tqe_q; + int tqe_len; /* TCP segment data length */ + struct ofp_tcphdr *tqe_th; /* a pointer to tcp header */ + odp_packet_t tqe_m; /* mbuf contains packet */ +#ifdef PASSIVE_INET + OFP_TAILQ_ENTRY(tseg_qent) tqe_ageq; + int tqe_ticks; /* ticks when queued */ +#endif +}; +OFP_LIST_HEAD(tsegqe_head, tseg_qent); + +struct sackblk { + tcp_seq start; /* start seq no. of sack block */ + tcp_seq end; /* end seq no. */ +}; + +struct sackhole { + tcp_seq start; /* start seq no. of hole */ + tcp_seq end; /* end seq no. */ + tcp_seq rxmit; /* next seq. no in hole to be retransmitted */ + OFP_TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */ +}; + +struct sackhint { + struct sackhole *nexthole; + int sack_bytes_rexmit; + tcp_seq last_sack_ack; /* Most recent/largest sacked ack */ + + int ispare; /* explicit pad for 64bit alignment */ + uint64_t _pad[2]; /* 1 sacked_bytes, 1 TBD */ +}; + +struct tcptemp { + uint8_t tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ + struct ofp_tcphdr tt_t; +}; + +#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ + +/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ +#ifdef INET6 +# if 1 +#define ND6_HINT(tp) +# else +#define ND6_HINT(tp) \ +do { \ + if ((tp) && (tp)->t_inpcb && \ + ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \ + nd6_nud_hint(NULL, NULL, 0); \ +} while (0) +# endif /*1*/ +#else +#define ND6_HINT(tp) +#endif + +/* + * Tcp control block, one per tcp; fields: + * Organized for 16 byte cacheline efficiency. + */ +struct tcpcb { + struct tsegqe_head t_segq; /* segment reassembly queue */ + void *t_pspare[2]; /* new reassembly queue */ + int t_segqlen; /* segment reassembly queue length */ + int t_dupacks; /* consecutive dup acks recd */ + + struct tcp_timer *t_timers; /* All the TCP timers in one struct */ + + struct inpcb *t_inpcb; /* back pointer to internet pcb */ + int t_state; /* state of this connection */ + uint32_t t_flags; + + struct vnet *t_vnet; /* back pointer to parent vnet */ + + tcp_seq snd_una; /* send unacknowledged */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ + tcp_seq snd_nxt; /* send next */ + tcp_seq snd_up; /* send urgent pointer */ + + tcp_seq snd_wl1; /* window update seg seq number */ + tcp_seq snd_wl2; /* window update seg ack number */ + tcp_seq iss; /* initial send sequence number */ + tcp_seq irs; /* initial receive sequence number */ + + tcp_seq rcv_nxt; /* receive next */ + tcp_seq rcv_adv; /* advertised window */ + uint64_t rcv_wnd; /* receive window */ + tcp_seq rcv_up; /* receive urgent pointer */ + + uint64_t snd_wnd; /* send window */ + uint64_t snd_cwnd; /* congestion-controlled window */ + uint64_t snd_spare1; /* unused */ + uint64_t snd_ssthresh; /* snd_cwnd size threshold for + * for slow start exponential to + * linear switch + */ + uint64_t snd_spare2; /* unused */ + tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ + + uint32_t t_maxopd; /* mss plus options */ + + uint32_t t_rcvtime; /* inactivity time */ + uint32_t t_starttime; /* time connection was established */ + uint32_t t_rtttime; /* RTT measurement start time */ + tcp_seq t_rtseq; /* sequence number being timed */ + + uint32_t t_bw_spare1; /* unused */ + tcp_seq t_bw_spare2; /* unused */ + + int t_rxtcur; /* current retransmit value (ticks) */ + uint32_t t_maxseg; /* maximum segment size */ + int t_srtt; /* smoothed round-trip time */ + int t_rttvar; /* variance in round-trip time */ + + int t_rxtshift; /* log(2) of rexmt exp. backoff */ + uint32_t t_rttmin; /* minimum rtt allowed */ + uint32_t t_rttbest; /* best rtt we've seen */ + uint64_t t_rttupdated; /* number of times rtt sampled */ + uint64_t max_sndwnd; /* largest window peer has offered */ + + int t_softerror; /* possible error not yet reported */ +/* out-of-band data */ + char t_oobflags; /* have some */ + char t_iobc; /* input character */ +/* RFC 1323 variables */ + uint8_t snd_scale; /* window scaling for send window */ + uint8_t rcv_scale; /* window scaling for recv window */ + uint8_t request_r_scale; /* pending window scaling */ + uint32_t ts_recent; /* timestamp echo data */ + uint32_t ts_recent_age; /* when last updated */ + uint32_t ts_offset; /* our timestamp offset */ + + tcp_seq last_ack_sent; +/* experimental */ + uint64_t snd_cwnd_prev; /* cwnd prior to retransmit */ + uint64_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ + tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ + int t_sndzerowin; /* zero-window updates sent */ + uint32_t t_badrxtwin; /* window for retransmit recovery */ + uint8_t snd_limited; /* segments limited transmitted */ +/* SACK related state */ + int snd_numholes; /* number of holes seen by sender */ + OFP_TAILQ_HEAD(sackhole_head, sackhole) snd_holes; + /* SACK scoreboard (sorted) */ + tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ + int rcv_numsacks; /* # distinct sack blks present */ + struct sackblk sackblks[OFP_MAX_SACK_BLKS]; /* seq nos. of sack blocks */ + tcp_seq sack_newdata; /* New data xmitted in this recovery + episode starts at this seq number */ + struct sackhint sackhint; /* SACK scoreboard hint */ + int t_rttlow; /* smallest observerved RTT */ + uint32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ + int rfbuf_cnt; /* recv buffer autoscaling byte count */ + struct toe_usrreqs *t_tu; /* offload operations vector */ + int t_sndrexmitpack; /* retransmit packets sent */ + int t_rcvoopack; /* out-of-order packets received */ + void *t_toe; /* TOE pcb pointer */ + int t_bytes_acked; /* # bytes acked during current RTT */ + struct cc_algo *cc_algo; /* congestion control algorithm */ + struct cc_var *ccv; /* congestion control specific vars */ + struct osd *osd; /* storage for Khelp module data */ + + uint32_t t_keepinit; /* time to establish connection */ + uint32_t t_keepidle; /* time before keepalive probes begin */ + uint32_t t_keepintvl; /* interval between keepalives */ + uint32_t t_keepcnt; /* number of keepalives before close */ + + uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ + void *t_pspare2[4]; /* 4 TBD */ + uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */ +}; + +/* + * Flags and utility macros for the t_flags field. + */ +#define TF_ACKNOW 0x000001 /* ack peer immediately */ +#define TF_DELACK 0x000002 /* ack, but try to delay it */ +#define TF_NODELAY 0x000004 /* don't delay packets to coalesce */ +#define TF_NOOPT 0x000008 /* don't use tcp options */ +#define TF_SENTFIN 0x000010 /* have sent FIN */ +#define TF_REQ_SCALE 0x000020 /* have/will request window scaling */ +#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */ +#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */ +#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */ +#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */ +#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ +#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ +#define TF_NOPUSH 0x001000 /* don't push */ +#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ +#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ +#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ +#define TF_LASTIDLE 0x040000 /* connection was previously idle */ +#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */ +#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */ +#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ +#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ +#define TF_FORCEDATA 0x800000 /* force out a byte */ +#define TF_TSO 0x1000000 /* TSO enabled on this connection */ +#define TF_TOE 0x2000000 /* this connection is offloaded */ +#define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */ +#define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */ +#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */ +#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ +#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ + +#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) +#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY +#define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY + +#define IN_CONGRECOVERY(t_flags) (t_flags & TF_CONGRECOVERY) +#define ENTER_CONGRECOVERY(t_flags) t_flags |= TF_CONGRECOVERY +#define EXIT_CONGRECOVERY(t_flags) t_flags &= ~TF_CONGRECOVERY + +#define IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY)) +#define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY) +#define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY) + +#define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una) + +/* + * Flags for the t_oobflags field. + */ +#define OFP_TCPOOB_HAVEDATA 0x01 +#define OFP_TCPOOB_HADDATA 0x02 + +/* + * Structure to hold TCP options that are only used during segment + * processing (in ofp_tcp_input), but not held in the tcpcb. + * It's basically used to reduce the number of parameters + * to tcp_dooptions and ofp_tcp_addoptions. + * The binary order of the to_flags is relevant for packing of the + * options in ofp_tcp_addoptions. + */ +struct tcpopt { + uint64_t to_flags; /* which options are present */ +#define TOF_MSS 0x0001 /* maximum segment size */ +#define TOF_SCALE 0x0002 /* window scaling */ +#define TOF_SACKPERM 0x0004 /* SACK permitted */ +#define TOF_TS 0x0010 /* timestamp */ +#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */ +#define TOF_SACK 0x0080 /* Peer sent SACK option */ +#define TOF_MAXOPT 0x0100 + uint32_t to_tsval; /* new timestamp */ + uint32_t to_tsecr; /* reflected timestamp */ + uint8_t *to_sacks; /* pointer to the first SACK blocks */ + uint8_t *to_signature; /* pointer to the TCP-MD5 signature */ + uint16_t to_mss; /* maximum segment size */ + uint8_t to_wscale; /* window scaling */ + uint8_t to_nsacks; /* number of SACK blocks */ + uint32_t to_spare; /* UTO */ +}; + +/* + * Flags for tcp_dooptions. + */ +#define TO_SYN 0x01 /* parse SYN-only options */ + +struct hc_metrics_lite { /* must stay in sync with hc_metrics */ + uint64_t rmx_mtu; /* MTU for this path */ + uint64_t rmx_ssthresh; /* outbound gateway buffer limit */ + uint64_t rmx_rtt; /* estimated round trip time */ + uint64_t rmx_rttvar; /* estimated rtt variance */ + uint64_t rmx_bandwidth; /* estimated bandwidth */ + uint64_t rmx_cwnd; /* congestion window */ + uint64_t rmx_sendpipe; /* outbound delay-bandwidth product */ + uint64_t rmx_recvpipe; /* inbound delay-bandwidth product */ +}; + +#ifndef _NETINET_IN_PCB_H_ +struct in_conninfo; +#endif /* _NETINET_IN_PCB_H_ */ + +struct tcptw { + struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */ + tcp_seq snd_nxt; + tcp_seq rcv_nxt; + tcp_seq iss; + tcp_seq irs; + uint16_t last_win; /* cached window value */ + uint16_t tw_so_options; /* copy of so_options */ + struct ofp_ucred *tw_cred; /* user credentials */ + uint32_t t_recent; + uint32_t ts_offset; /* our timestamp offset */ + uint32_t t_starttime; + int tw_time; + OFP_TAILQ_ENTRY(tcptw) tw_2msl; +}; + +#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) +#define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb) +#define sototcpcb(so) (intotcpcb(sotoinpcb(so))) + +/* + * The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 3 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ +#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ +#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ +#define TCP_DELTA_SHIFT 2 /* see ofp_tcp_input.c */ + +/* + * The initial retransmission should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + * This version of the macro adapted from a paper by Lawrence + * Brakmo and Larry Peterson which outlines a problem caused + * by insufficient precision in the original implementation, + * which results in inappropriately large RTO values for very + * fast networks. + */ +#define TCP_REXMTVAL(tp) \ + max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ + + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) + +/* + * TCP statistics. + * Many of these should be kept per connection, + * but that's inconvenient at the moment. + */ +struct ofp_tcpstat { + uint64_t tcps_connattempt; /* connections initiated */ + uint64_t tcps_accepts; /* connections accepted */ + uint64_t tcps_connects; /* connections established */ + uint64_t tcps_drops; /* connections dropped */ + uint64_t tcps_conndrops; /* embryonic connections dropped */ + uint64_t tcps_minmssdrops; /* average minmss too low drops */ + uint64_t tcps_closed; /* conn. closed (includes drops) */ + uint64_t tcps_segstimed; /* segs where we tried to get rtt */ + uint64_t tcps_rttupdated; /* times we succeeded */ + uint64_t tcps_delack; /* delayed acks sent */ + uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ + uint64_t tcps_rexmttimeo; /* retransmit timeouts */ + uint64_t tcps_persisttimeo; /* persist timeouts */ + uint64_t tcps_keeptimeo; /* keepalive timeouts */ + uint64_t tcps_keepprobe; /* keepalive probes sent */ + uint64_t tcps_keepdrops; /* connections dropped in keepalive */ + + uint64_t tcps_sndtotal; /* total packets sent */ + uint64_t tcps_sndpack; /* data packets sent */ + uint64_t tcps_sndbyte; /* data bytes sent */ + uint64_t tcps_sndrexmitpack; /* data packets retransmitted */ + uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */ + uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */ + uint64_t tcps_sndacks; /* ack-only packets sent */ + uint64_t tcps_sndprobe; /* window probes sent */ + uint64_t tcps_sndurg; /* packets sent with URG only */ + uint64_t tcps_sndwinup; /* window update-only packets sent */ + uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ + + uint64_t tcps_rcvtotal; /* total packets received */ + uint64_t tcps_rcvpack; /* packets received in sequence */ + uint64_t tcps_rcvbyte; /* bytes received in sequence */ + uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */ + uint64_t tcps_rcvbadoff; /* packets received with bad offset */ + uint64_t tcps_rcvmemdrop; /* packets dropped for lack of memory */ + uint64_t tcps_rcvshort; /* packets received too short */ + uint64_t tcps_rcvduppack; /* duplicate-only packets received */ + uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */ + uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */ + uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ + uint64_t tcps_rcvoopack; /* out-of-order packets received */ + uint64_t tcps_rcvoobyte; /* out-of-order bytes received */ + uint64_t tcps_rcvpackafterwin; /* packets with data after window */ + uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */ + uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */ + uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */ + uint64_t tcps_rcvdupack; /* rcvd duplicate acks */ + uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */ + uint64_t tcps_rcvackpack; /* rcvd ack packets */ + uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */ + uint64_t tcps_rcvwinupd; /* rcvd window update packets */ + uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */ + uint64_t tcps_predack; /* times hdr predict ok for acks */ + uint64_t tcps_preddat; /* times hdr predict ok for data pkts */ + uint64_t tcps_pcbcachemiss; + uint64_t tcps_cachedrtt; /* times cached RTT in route updated */ + uint64_t tcps_cachedrttvar; /* times cached rttvar updated */ + uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */ + uint64_t tcps_usedrtt; /* times RTT initialized from route */ + uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */ + uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/ + uint64_t tcps_persistdrop; /* timeout in persist state */ + uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */ + uint64_t tcps_mturesent; /* resends due to MTU discovery */ + uint64_t tcps_listendrop; /* listen queue overflows */ + uint64_t tcps_badrst; /* ignored RSTs in the window */ + + uint64_t tcps_sc_added; /* entry added to syncache */ + uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */ + uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */ + uint64_t tcps_sc_dropped; /* could not reply to packet */ + uint64_t tcps_sc_completed; /* successful extraction of entry */ + uint64_t tcps_sc_bucketoverflow; /* syncache per-bucket limit hit */ + uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */ + uint64_t tcps_sc_reset; /* RST removed entry from syncache */ + uint64_t tcps_sc_stale; /* timed out or listen socket gone */ + uint64_t tcps_sc_aborted; /* syncache entry aborted */ + uint64_t tcps_sc_badack; /* removed due to bad ACK */ + uint64_t tcps_sc_unreach; /* ICMP unreachable received */ + uint64_t tcps_sc_zonefail; /* zalloc() failed */ + uint64_t tcps_sc_sendcookie; /* SYN cookie sent */ + uint64_t tcps_sc_recvcookie; /* SYN cookie received */ + + uint64_t tcps_hc_added; /* entry added to hostcache */ + uint64_t tcps_hc_bucketoverflow; /* hostcache per bucket limit hit */ + + uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */ + + /* SACK related stats */ + uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */ + uint64_t tcps_sack_rexmits; /* SACK rexmit segments */ + uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */ + uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */ + uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */ + uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */ + + /* ECN related stats */ + uint64_t tcps_ecn_ce; /* ECN Congestion Experienced */ + uint64_t tcps_ecn_ect0; /* ECN Capable Transport */ + uint64_t tcps_ecn_ect1; /* ECN Capable Transport */ + uint64_t tcps_ecn_shs; /* ECN successful handshakes */ + uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */ + + /* TCP_SIGNATURE related stats */ + uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */ + uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */ + uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */ + uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */ + uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */ + + uint64_t _pad[12]; /* 6 UTO, 6 TBD */ +}; + +/* + * In-kernel consumers can use these accessor macros directly to update + * stats. + */ +#define TCPSTAT_ADD(name, val) V_tcpstat.name += (val) +#define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1) + +/* + * Kernel module consumers must use this accessor macro. + */ +void ofp_kmod_tcpstat_inc(int statnum); +#define KMOD_TCPSTAT_INC(name) \ + ofp_kmod_tcpstat_inc(offsetof(struct ofp_tcpstat, name) / sizeof(uint64_t)) + +/* + * TCP specific helper hook point identifiers. + */ +#define HHOOK_TCP_EST_IN 0 +#define HHOOK_TCP_EST_OUT 1 +#define HHOOK_TCP_LAST HHOOK_TCP_EST_OUT + +struct tcp_hhook_data { + struct tcpcb *tp; + struct ofp_tcphdr *th; + struct tcpopt *to; + long len; + int tso; + tcp_seq curack; +}; + +/* + * TCB structure exported to user-land via sysctl(3). + * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been + * included. Not all of our clients do. + */ +#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) +struct xtcp_timer { + int tt_rexmt; /* retransmit timer */ + int tt_persist; /* retransmit persistence */ + int tt_keep; /* keepalive */ + int tt_2msl; /* 2*msl TIME_WAIT timer */ + int tt_delack; /* delayed ACK timer */ + int t_rcvtime; /* Time since last packet received */ +}; +struct xtcpcb { + size_t xt_len; + struct inpcb xt_inp; + struct tcpcb xt_tp; + struct xsocket xt_socket; + struct xtcp_timer xt_timer; + uint64_t xt_alignment_hack; +}; +#endif + +/* + * Names for TCP sysctl objects + */ +#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ +#define TCPCTL_MSSDFLT 3 /* MSS default */ +#define TCPCTL_STATS 4 /* statistics (read-only) */ +#define TCPCTL_RTTDFLT 5 /* default RTT estimate */ +#define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */ +#define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ +#define TCPCTL_SENDSPACE 8 /* send buffer space */ +#define TCPCTL_RECVSPACE 9 /* receive buffer space */ +#define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */ +#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ +#define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */ +#define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ +#define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */ +#define TCPCTL_DROP 15 /* drop tcp connection */ +#define TCPCTL_MAXID 16 +#define TCPCTL_FINWAIT2_TIMEOUT 17 + +#define TCPCTL_NAMES { \ + { 0, 0 }, \ + { "rfc1323", OFP_CTLTYPE_INT }, \ + { "mssdflt", OFP_CTLTYPE_INT }, \ + { "stats", OFP_CTLTYPE_STRUCT }, \ + { "rttdflt", OFP_CTLTYPE_INT }, \ + { "keepidle", OFP_CTLTYPE_INT }, \ + { "keepintvl", OFP_CTLTYPE_INT }, \ + { "sendspace", OFP_CTLTYPE_INT }, \ + { "recvspace", OFP_CTLTYPE_INT }, \ + { "keepinit", OFP_CTLTYPE_INT }, \ + { "pcblist", OFP_CTLTYPE_STRUCT }, \ + { "delacktime", OFP_CTLTYPE_INT }, \ + { "v6mssdflt", OFP_CTLTYPE_INT }, \ + { "maxid", OFP_CTLTYPE_INT }, \ +} + +SYSCTL_DECL(_net_inet_tcp); +SYSCTL_DECL(_net_inet_tcp_sack); + +VNET_DECLARE(struct inpcbhead, ofp_tcb); /* queue of active tcpcb's */ +VNET_DECLARE(struct inpcbinfo, ofp_tcbinfo); +VNET_DECLARE(struct ofp_tcpstat, ofp_tcpstat); /* tcp statistics */ +extern int ofp_tcp_log_in_vain; +VNET_DECLARE(int, ofp_tcp_mssdflt); /* XXX */ +VNET_DECLARE(int, ofp_tcp_minmss); +VNET_DECLARE(int, ofp_tcp_delack_enabled); +VNET_DECLARE(int, ofp_tcp_do_rfc3390); +VNET_DECLARE(int, ofp_path_mtu_discovery); +VNET_DECLARE(int, ofp_ss_fltsz); +VNET_DECLARE(int, ofp_ss_fltsz_local); +VNET_DECLARE(int, ofp_tcp_do_rfc3465); +VNET_DECLARE(int, ofp_tcp_abc_l_var); +#define V_tcb VNET(ofp_tcb) +#define V_tcbinfo VNET(ofp_tcbinfo) +#define V_tcpstat VNET(ofp_tcpstat) +#define V_tcp_mssdflt VNET(ofp_tcp_mssdflt) +#define V_tcp_minmss VNET(ofp_tcp_minmss) +#define V_tcp_delack_enabled VNET(ofp_tcp_delack_enabled) +#define V_tcp_do_rfc3390 VNET(ofp_tcp_do_rfc3390) +#define V_path_mtu_discovery VNET(ofp_path_mtu_discovery) +#define V_ss_fltsz VNET(ofp_ss_fltsz) +#define V_ss_fltsz_local VNET(ofp_ss_fltsz_local) +#define V_tcp_do_rfc3465 VNET(ofp_tcp_do_rfc3465) +#define V_tcp_abc_l_var VNET(ofp_tcp_abc_l_var) + +VNET_DECLARE(int, ofp_tcp_do_sack); /* SACK enabled/disabled */ +VNET_DECLARE(int, ofp_tcp_sc_rst_sock_fail); /* RST on sock alloc failure */ +#define V_tcp_do_sack VNET(ofp_tcp_do_sack) +#define V_tcp_sc_rst_sock_fail VNET(ofp_tcp_sc_rst_sock_fail) + +VNET_DECLARE(int, ofp_tcp_do_ecn); /* TCP ECN enabled/disabled */ +VNET_DECLARE(int, ofp_tcp_ecn_maxretries); +#define V_tcp_do_ecn VNET(ofp_tcp_do_ecn) +#define V_tcp_ecn_maxretries VNET(ofp_tcp_ecn_maxretries) + +VNET_DECLARE(struct hhook_head *, ofp_tcp_hhh[HHOOK_TCP_LAST + 1]); +#define V_tcp_hhh VNET(ofp_tcp_hhh) + +int ofp_tcp_addoptions(struct tcpopt *, uint8_t *); +int tcp_ccalgounload(struct cc_algo *unload_algo); +struct tcpcb * + ofp_tcp_close(struct tcpcb *); +void ofp_tcp_discardcb(struct tcpcb *); +void ofp_tcp_twstart(struct tcpcb *); +#if 0 +int tcp_twrecycleable(struct tcptw *tw); +#endif +void ofp_tcp_twclose(struct tcptw *_tw, int _reuse); +void ofp_tcp_ctlinput(int, struct ofp_sockaddr *, void *); +int ofp_tcp_ctloutput(struct socket *, struct sockopt *); +struct tcpcb * + ofp_tcp_drop(struct tcpcb *, int); +void ofp_tcp_drain(void); +void ofp_tcp_tcbinfo_hashstats(unsigned int *min, unsigned int *avg, unsigned int *max); +void ofp_tcp_init(void); +void ofp_tcp_fini(void *); +char *ofp_tcp_log_addrs(struct in_conninfo *, struct ofp_tcphdr *, void *, + const void *); +char *ofp_tcp_log_vain(struct in_conninfo *, struct ofp_tcphdr *, void *, + const void *); +int ofp_tcp_reass(struct tcpcb *, struct ofp_tcphdr *, int *, odp_packet_t ); +void ofp_tcp_reass_init(void); +void ofp_tcp_reass_flush(struct tcpcb *); + +int ofp_tcp_input(odp_packet_t , int); +#define TI_UNLOCKED 1 +#define TI_WLOCKED 2 +void ofp_tcp_do_segment(odp_packet_t m, struct ofp_tcphdr *th, struct socket *so, + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, + int ti_locked, int no_unlock); +u_long ofp_tcp_maxmtu(struct in_conninfo *, int *); +u_long ofp_tcp_maxmtu6(struct in_conninfo *, int *); +void ofp_tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, + int *); +void ofp_tcp_mss(struct tcpcb *, int); +int ofp_tcp_mssopt(struct in_conninfo *); +struct inpcb * + ofp_tcp_drop_syn_sent(struct inpcb *, int); +struct inpcb * + ofp_tcp_mtudisc(struct inpcb *, int); +struct tcpcb * + ofp_tcp_newtcpcb(struct inpcb *); +int ofp_tcp_output(struct tcpcb *); +void ofp_tcp_respond(struct tcpcb *, void *, + struct ofp_tcphdr *, odp_packet_t , tcp_seq, tcp_seq, int); +void ofp_tcp_tw_init(void); +void ofp_tcp_tw_zone_change(void); +int ofp_tcp_twcheck(struct inpcb *, struct tcpopt *, struct ofp_tcphdr *, + odp_packet_t , int); +int ofp_tcp_twrespond(struct tcptw *, int); +void ofp_tcp_setpersist(struct tcpcb *); +void ofp_tcp_slowtimo(void *); +struct tcptemp * + ofp_tcpip_maketemplate(struct inpcb *); +void ofp_tcpip_fillheaders(struct inpcb *, void *, void *); +void ofp_tcp_timer_activate(struct tcpcb *, int, uint32_t); +int ofp_tcp_timer_active(struct tcpcb *, int); +void tcp_trace(short, short, struct tcpcb *, void *, struct ofp_tcphdr *, int); +/* + * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo) + */ +void tcp_hc_init(void); +void tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *); +u_long tcp_hc_getmtu(struct in_conninfo *); +void tcp_hc_updatemtu(struct in_conninfo *, uint64_t); +//void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *); + +extern struct pr_usrreqs ofp_tcp_usrreqs; +extern uint64_t ofp_tcp_sendspace; +extern uint64_t ofp_tcp_recvspace; +tcp_seq ofp_tcp_new_isn(struct tcpcb *); + +void ofp_tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); +void ofp_tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); +void ofp_tcp_clean_sackreport(struct tcpcb *tp); +void ofp_tcp_sack_adjust(struct tcpcb *tp); +struct sackhole *ofp_tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); +void ofp_tcp_sack_partialack(struct tcpcb *, struct ofp_tcphdr *); +void ofp_tcp_free_sackholes(struct tcpcb *tp); +int tcp_newreno(struct tcpcb *, struct ofp_tcphdr *); +u_long tcp_seq_subtract(uint64_t, uint64_t ); + +void ofp_cc_cong_signal(struct tcpcb *tp, struct ofp_tcphdr *th, uint32_t type); + +#endif /* _NETINET_TCP_VAR_H_ */ diff --git a/include/ofpi_timer.h b/include/ofpi_timer.h new file mode 100644 index 00000000..b18f2e16 --- /dev/null +++ b/include/ofpi_timer.h @@ -0,0 +1,31 @@ +/*- + * Copyright (c) 2014 Nokia + * Copyright (c) 2014 ENEA Software AB + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_TIMER_H +#define _OFPI_TIMER_H + +#include "api/ofp_timer.h" + +#define OFP_TIMER_RESOLUTION_US 10000UL +#define OFP_TIMER_MIN_US 0UL +#define OFP_TIMER_MAX_US 10000000UL +#define OFP_TIMER_TMO_COUNT 1000UL + +#define HZ (1000000UL/OFP_TIMER_RESOLUTION_US) +#define hz HZ + +#define OFP_TIMER_ARG_LEN 256 + +/* Timer type */ +#define OFP_TIMER_SOCKET 0 + +int ofp_timer_init(int resolution_us, + int min_us, int max_us, + int tmo_count); +void ofp_timer_lookup_shared_memory(void); + +#endif diff --git a/include/ofpi_udp.h b/include/ofpi_udp.h new file mode 100644 index 00000000..28f2e4b8 --- /dev/null +++ b/include/ofpi_udp.h @@ -0,0 +1,12 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _OFPI_UDP_H_ +#define _OFPI_UDP_H_ + +#include "api/ofp_udp.h" + +#endif diff --git a/include/ofpi_udp6_var.h b/include/ofpi_udp6_var.h new file mode 100644 index 00000000..d9e82b6c --- /dev/null +++ b/include/ofpi_udp6_var.h @@ -0,0 +1,77 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet6/udp6_var.h 174510 2007-12-10 16:03:40Z obrien $ + */ + +#ifndef _NETINET6_UDP6_VAR_H_ +#define _NETINET6_UDP6_VAR_H_ + +#include + +#if 0 +SYSCTL_DECL(_net_inet6_udp6); +#endif + +extern struct pr_usrreqs ofp_udp6_usrreqs; + +void ofp_udp6_ctlinput(int, struct ofp_sockaddr *, void *); +int ofp_udp6_input(odp_packet_t, int *, int *); + +#endif /*_NETINET6_UDP6_VAR_H_*/ diff --git a/include/ofpi_udp_var.h b/include/ofpi_udp_var.h new file mode 100644 index 00000000..12bca160 --- /dev/null +++ b/include/ofpi_udp_var.h @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: release/9.1.0/sys/netinet/udp_var.h 234780 2012-04-29 08:50:50Z bz $ + */ + +#ifndef _NETINET_UDP_VAR_H_ +#define _NETINET_UDP_VAR_H_ + +#include "ofpi_sockopt.h" +#include "ofpi_ip_var.h" +#include "ofpi_udp.h" +#include "api/ofp_sysctl.h" + +struct mbuf; +//struct inpcb; + +/* + * UDP kernel structures and variables. + */ +struct udpiphdr { + struct ipovly ui_i; /* overlaid ip structure */ + struct ofp_udphdr ui_u; /* udp header */ +}; +#define ui_x1 ui_i.ih_x1 +#define ui_pr ui_i.ih_pr +#define ui_len ui_i.ih_len +#define ui_src ui_i.ih_src +#define ui_dst ui_i.ih_dst +#define ui_sport ui_u.uh_sport +#define ui_dport ui_u.uh_dport +#define ui_ulen ui_u.uh_ulen +#define ui_sum ui_u.uh_sum + +typedef void(*udp_tun_func_t)(odp_packet_t , int off, struct inpcb *); + +/* + * UDP control block; one per udp. + */ +struct udpcb { + udp_tun_func_t u_tun_func; /* UDP kernel tunneling callback. */ + uint32_t u_flags; /* Generic UDP flags. */ +}; + +#define intoudpcb(ip) ((struct udpcb *)(ip)->inp_ppcb) +#define sotoudpcb(so) (intoudpcb(sotoinpcb(so))) + +struct ofp_udpstat { + /* input statistics: */ + uint64_t udps_ipackets; /* total input packets */ + uint64_t udps_hdrops; /* packet shorter than header */ + uint64_t udps_badsum; /* checksum error */ + uint64_t udps_nosum; /* no checksum */ + uint64_t udps_badlen; /* data length larger than packet */ + uint64_t udps_noport; /* no socket on port */ + uint64_t udps_noportbcast; /* of above, arrived as broadcast */ + uint64_t udps_fullsock; /* not delivered, input socket full */ + uint64_t udpps_pcbcachemiss; /* input packets missing pcb cache */ + uint64_t udpps_pcbhashmiss; /* input packets not for hashed pcb */ + /* output statistics: */ + uint64_t udps_opackets; /* total output packets */ + uint64_t udps_fastout; /* output packets on fast path */ + /* of no socket on port, arrived as multicast */ + uint64_t udps_noportmcast; + uint64_t udps_filtermcast; /* blocked by multicast filter */ +}; + +/* + * Names for UDP sysctl objects. + */ +#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ +#define UDPCTL_STATS 2 /* statistics (read-only) */ +#define UDPCTL_MAXDGRAM 3 /* max datagram size */ +#define UDPCTL_RECVSPACE 4 /* default receive buffer space */ +#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */ +#define UDPCTL_MAXID 6 + +#define UDPCTL_NAMES { \ + { 0, 0 }, \ + { "checksum", OFP_CTLTYPE_INT }, \ + { "stats", OFP_CTLTYPE_STRUCT }, \ + { "maxdgram", OFP_CTLTYPE_INT }, \ + { "recvspace", OFP_CTLTYPE_INT }, \ + { "pcblist", OFP_CTLTYPE_STRUCT }, \ +} + +SYSCTL_DECL(_net_inet_udp); + +extern struct pr_usrreqs ofp_udp_usrreqs; +extern uint64_t ofp_udp_sendspace; +extern uint64_t ofp_udp_recvspace; + +int udp_newudpcb(struct inpcb *); +void udp_discardcb(struct udpcb *); + +struct ofp_sockaddr; +void ofp_udp_ctlinput(int, struct ofp_sockaddr *, void *); +int ofp_udp_ctloutput(struct socket *, struct sockopt *); +void ofp_udp_init(void); +int ofp_udp_input(odp_packet_t , int); +struct inpcb *ofp_udp_notify(struct inpcb *, int); +int ofp_udp_shutdown(struct socket *so); + +#endif diff --git a/include/ofpi_util.h b/include/ofpi_util.h new file mode 100644 index 00000000..6561ba23 --- /dev/null +++ b/include/ofpi_util.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _UTIL_H_ +#define _UTIL_H_ + +#include +#include +#include +#include +#include + +#include "api/ofp_utils.h" +#include "ofpi_timer.h" + +#define L2_HEADER_NO_VLAN_SIZE 14 + +#define KASSERT(x, y) do { if (!(x)) { printf("KASSERT %s:%d\n",__FILE__,__LINE__); \ + printf y ; printf("\n"); int *a = 0; *a = 3;}} while (0) + +extern int ofp_first_log_time; +#define TICS_PER_SEC (1000000/OFP_TIMER_RESOLUTION_US) + +#define OFP_LOG_TIME(a...) \ + do { int now = ofp_timer_ticks(0); \ + if (ofp_first_log_time == 0) ofp_first_log_time = now; \ + int diff = now - ofp_first_log_time; \ + printf("[%d] %3d.%02d %5d:%s:%s\n ", odp_cpu_id(), \ + diff/TICS_PER_SEC, diff%TICS_PER_SEC, \ + __LINE__, __FUNCTION__, __FILE__); \ + printf(a); fflush(stdout); } while (0) + +#define panic(x) do {fprintf(stderr, "PANIC: %s", x); int *a = 0; *a = 3;} while (0) + +static inline char *print_th_flags(uint8_t f, int or) { + const char *t[8] = {"FIN", "SYN", "RST", "PUSH", "ACK", "URG", "ECE", "CWR"}; + static char buf[64]; + uint8_t m = 1; + int i, n = 0; + buf[0] = 0; + for (i = 0; i < 8; i++) { + if (or && (f & m)) n += sprintf(buf+n, " %s", t[i]); + else if (!or && !(f & m)) n += sprintf(buf+n, " %s", t[i]); + m = m << 1; + } + return buf; +} + +static inline char *print_flags(uint32_t f, int or) { + const char *t[] = {"ACKNOW", "DELACK", "NODEALY", "NOOPT", "SENTFIN", + "REQ_SCALE", "RCVD_SCALE", "REQ_TSTMP", "RCVD_TSTMP", + "SACK_PERMIT", "NEEDSYN", "NEEDFIN", "NOPUSH", "PREVVALID", + "", "", + "MORETOCOME", "LQ_OVERFLOW","LASTIDLE","RXWIN0SENT", + "FASTRECOVERY", "WASFRECOVERY", "SIGNATURE", "FORCEDATA", + "TSO", "TOE", "ECN_PERMIT", "ECN_SND_CWR", + "ECN_SND_ECE", "CONGRECOVERY", "WASCRECOVERY", ""}; + static char buf[128]; + uint32_t m = 1; + int i, n = 0; + buf[0] = 0; + for (i = 0; i < 29; i++) { + if (or && (f & m)) n += sprintf(buf+n, " %s", t[i]); + else if (!or && !(f & m)) n += sprintf(buf+n, " %s", t[i]); + m = m << 1; + } + return buf; +} + +#define t_flags_or(_f, _v) do { _f |= _v; \ + /*OFP_LOG("t_flags OR %s 0x%x\n", print_flags(_v, 1), (uint32_t)_v);*/ } while (0) +#define t_flags_and(_f, _v) do { _f &= _v; \ + /*OFP_LOG("t_flags AND %s 0x%x\n", print_flags(_v, 0), (uint32_t)_v);*/ } while (0) + +uint16_t ofp_in_cksum(register uint16_t *addr, register int len); +int ofp_cksum(const odp_packet_t pkt, unsigned int off, unsigned int len); +int ofp_getsum(const odp_packet_t pkt, unsigned int off, unsigned int len); +int ofp_in4_cksum(const odp_packet_t pkt); +void ofp_print_hex(uint8_t log_level, + unsigned char *data, int len); +void ofp_generate_coredump(void); +int ofp_hex_to_num(char *s); +void ofp_mac_to_link_local(uint8_t *mac, uint8_t *lladdr); +void ofp_ip6_masklen_to_mask(int masklen, uint8_t *mask); +int ofp_mask_length(int masklen, uint8_t *mask); +int ofp_name_to_port_vlan(const char *dev, int *vlan); +char *ofp_port_vlan_to_ifnet_name(int port, int vlan); +int ofp_sendf(int fd, const char *fmt, ...); +int ofp_has_mac(uint8_t *mac); + +static inline int ilog2(unsigned long long n) +{ + return 63 - __builtin_clzll(n); +} + +static inline odp_bool_t ofp_ip6_is_set(uint8_t *addr) +{ + return ((*(uint64_t *)addr | *(uint64_t *)(addr + 8)) == 0 ? 0 : 1); +} +static inline odp_bool_t ofp_ip6_equal(uint8_t *addr1, uint8_t *addr2) +{ + return (((*(uint64_t *)addr1 == *(uint64_t *)addr2) && + (*(uint64_t *)(addr1 + 8) == *(uint64_t *)(addr2 + 8))) + ? 1 : 0); +} + +#if 0 +#define ofp_copy_mac(dst, src) { \ + *((uint32_t *)(dst)) = *((uint32_t *)(src));\ + *(uint16_t *)(((uint32_t *)dst) + 1) = \ + *(uint16_t *)(((uint32_t *)src) + 1); \ +} +#else +#define ofp_copy_mac(dst, src) {\ + memcpy(dst, src, OFP_ETHER_ADDR_LEN); \ +} +#endif + +/*Note: destination and source must have 8 octets available + On destination address, 2 bytes after MAC will be written + On source address, 2 bytes after MAC will be read*/ +#define ofp_copy_mac_64(dst, src) {\ + *(uint64_t *)dst = *(uint64_t *)src;\ +} + +#endif diff --git a/include/ofpi_vnet.h b/include/ofpi_vnet.h new file mode 100644 index 00000000..65a70dd6 --- /dev/null +++ b/include/ofpi_vnet.h @@ -0,0 +1,27 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef __OFPI_VNET_H__ +#define __OFPI_VNET_H__ + +/* + * Versions of the VNET macros that compile to normal global variables and + * standard sysctl definitions. + */ +#define VNET_NAME(n) n +#define VNET_DECLARE(t, n) extern t n +#define VNET_DEFINE(t, n) t n + +/* + * Virtualized global variable accessor macros. + */ +#define VNET_VNET_PTR(vnet, n) (&(n)) +#define VNET_VNET(vnet, n) (n) + +#define VNET_PTR(n) (&(n)) +#define VNET(n) (n) + +#endif /*__OFPI_VNET_H__*/ diff --git a/m4/ax_pthread.m4 b/m4/ax_pthread.m4 new file mode 100644 index 00000000..d383ad5c --- /dev/null +++ b/m4/ax_pthread.m4 @@ -0,0 +1,332 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_pthread.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro figures out how to build C programs using POSIX threads. It +# sets the PTHREAD_LIBS output variable to the threads library and linker +# flags, and the PTHREAD_CFLAGS output variable to any special C compiler +# flags that are needed. (The user can also force certain compiler +# flags/libs to be tested by setting these environment variables.) +# +# Also sets PTHREAD_CC to any special C compiler that is needed for +# multi-threaded programs (defaults to the value of CC otherwise). (This +# is necessary on AIX to use the special cc_r compiler alias.) +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also link it with them as well. e.g. you should link with +# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +# +# If you are only building threads programs, you may wish to use these +# variables in your default LIBS, CFLAGS, and CC: +# +# LIBS="$PTHREAD_LIBS $LIBS" +# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +# CC="$PTHREAD_CC" +# +# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant +# has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name +# (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# +# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the +# PTHREAD_PRIO_INHERIT symbol is defined when compiling with +# PTHREAD_CFLAGS. +# +# ACTION-IF-FOUND is a list of shell commands to run if a threads library +# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it +# is not found. If ACTION-IF-FOUND is not specified, the default action +# will define HAVE_PTHREAD. +# +# Please let the authors know if this macro fails on any platform, or if +# you have any other suggestions or comments. This macro was based on work +# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help +# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by +# Alejandro Forero Cuervo to the autoconf macro repository. We are also +# grateful for the helpful feedback of numerous users. +# +# Updated for Autoconf 2.68 by Daniel Richard G. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# Copyright (c) 2011 Daniel Richard G. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 21 + +AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) +AC_DEFUN([AX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_LANG_PUSH([C]) +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) + AC_TRY_LINK_FUNC([pthread_join], [ax_pthread_ok=yes]) + AC_MSG_RESULT([$ax_pthread_ok]) + if test x"$ax_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case ${host_os} in + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" + ;; + + darwin*) + ax_pthread_flags="-pthread $ax_pthread_flags" + ;; +esac + +# Clang doesn't consider unrecognized options an error unless we specify +# -Werror. We throw in some extra Clang-specific options to ensure that +# this doesn't happen for GCC, which also accepts -Werror. + +AC_MSG_CHECKING([if compiler needs -Werror to reject unknown flags]) +save_CFLAGS="$CFLAGS" +ax_pthread_extra_flags="-Werror" +CFLAGS="$CFLAGS $ax_pthread_extra_flags -Wunknown-warning-option -Wsizeof-array-argument" +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([int foo(void);],[foo()])], + [AC_MSG_RESULT([yes])], + [ax_pthread_extra_flags= + AC_MSG_RESULT([no])]) +CFLAGS="$save_CFLAGS" + +if test x"$ax_pthread_ok" = xno; then +for flag in $ax_pthread_flags; do + + case $flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $flag]) + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) + if test x"$ax_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$flag]) + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS $ax_pthread_extra_flags" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; }], + [pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */])], + [ax_pthread_ok=yes], + []) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + AC_MSG_RESULT([$ax_pthread_ok]) + if test "x$ax_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_MSG_CHECKING([for joinable pthread attribute]) + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [int attr = $attr; return attr /* ; */])], + [attr_name=$attr; break], + []) + done + AC_MSG_RESULT([$attr_name]) + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], [$attr_name], + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + fi + + AC_MSG_CHECKING([if more special flags are required for pthreads]) + flag=no + case ${host_os} in + aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; + osf* | hpux*) flag="-D_REENTRANT";; + solaris*) + if test "$GCC" = "yes"; then + flag="-D_REENTRANT" + else + # TODO: What about Clang on Solaris? + flag="-mt -D_REENTRANT" + fi + ;; + esac + AC_MSG_RESULT([$flag]) + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], + [ax_cv_PTHREAD_PRIO_INHERIT], [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], + [[int i = PTHREAD_PRIO_INHERIT;]])], + [ax_cv_PTHREAD_PRIO_INHERIT=yes], + [ax_cv_PTHREAD_PRIO_INHERIT=no]) + ]) + AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"], + [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])]) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: compile with *_r variant + if test "x$GCC" != xyes; then + case $host_os in + aix*) + AS_CASE(["x/$CC"], + [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], + [#handle absolute path differently from PATH based program lookup + AS_CASE(["x$CC"], + [x/*], + [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], + [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) + ;; + esac + fi +fi + +test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" + +AC_SUBST([PTHREAD_LIBS]) +AC_SUBST([PTHREAD_CFLAGS]) +AC_SUBST([PTHREAD_CC]) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_pthread_ok" = xyes; then + ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) + : +else + ax_pthread_ok=no + $2 +fi +AC_LANG_POP +])dnl AX_PTHREAD diff --git a/scripts/reset_classifier.sh b/scripts/reset_classifier.sh new file mode 100755 index 00000000..5b8b629e --- /dev/null +++ b/scripts/reset_classifier.sh @@ -0,0 +1,16 @@ +#!/bin/bash -x + +intf=$1 +if test "X$intf" = "X"; then intf=eth0; fi + +killall classifier +sudo iptables -D FORWARD -i $intf -j DROP +sudo iptables -D INPUT -i $intf -j DROP +sudo ip6tables -D FORWARD -i $intf -j DROP +sudo ip6tables -D INPUT -i $intf -j DROP +sudo ifconfig $intf arp + +# restore DNS servers: +echo nameserver 8.8.8.8 |sudo tee /etc/resolv.conf +echo nameserver 127.0.1.1 |sudo tee -a /etc/resolv.conf +cat /etc/resolv.conf diff --git a/scripts/reset_device.sh b/scripts/reset_device.sh new file mode 100755 index 00000000..1d22b065 --- /dev/null +++ b/scripts/reset_device.sh @@ -0,0 +1,19 @@ +#!/bin/bash -x + +intf=$1 +if test "X$intf" = "X"; then intf=eth0; fi +echo Running FPM on intferface $intf + +killall fpm +sudo iptables -D FORWARD -i $intf -j DROP +sudo iptables -D INPUT -i $intf -j DROP +sudo ip6tables -D FORWARD -i $intf -j DROP +sudo ip6tables -D INPUT -i $intf -j DROP +sudo ifconfig $intf arp +sudo dhclient -v $intf + +# restore DNS servers: +#cat resolv.conf.reference |sudo tee /etc/resolv.conf +echo nameserver 8.8.8.8 |sudo tee /etc/resolv.conf +echo nameserver 127.0.1.1 |sudo tee -a /etc/resolv.conf +cat /etc/resolv.conf diff --git a/scripts/reset_socket.sh b/scripts/reset_socket.sh new file mode 100755 index 00000000..4f97f7ec --- /dev/null +++ b/scripts/reset_socket.sh @@ -0,0 +1,16 @@ +#!/bin/bash -x + +intf=$1 +if test "X$intf" = "X"; then intf=eth0; fi + +killall socket +sudo iptables -D FORWARD -i $intf -j DROP +sudo iptables -D INPUT -i $intf -j DROP +sudo ip6tables -D FORWARD -i $intf -j DROP +sudo ip6tables -D INPUT -i $intf -j DROP +sudo ifconfig $intf arp + +# restore DNS servers: +echo nameserver 8.8.8.8 |sudo tee /etc/resolv.conf +echo nameserver 127.0.1.1 |sudo tee -a /etc/resolv.conf +cat /etc/resolv.conf diff --git a/scripts/start_classifier.sh b/scripts/start_classifier.sh new file mode 100755 index 00000000..fd65333d --- /dev/null +++ b/scripts/start_classifier.sh @@ -0,0 +1,15 @@ +#!/bin/bash -x + +intf=$1 +if test "X$intf" = "X"; then intf=eth0; fi + +iptables -A FORWARD -i $intf -j DROP +iptables -A INPUT -i $intf -j DROP +ip6tables -A FORWARD -i $intf -j DROP +ip6tables -A INPUT -i $intf -j DROP +ifconfig $intf -arp +ip addr flush dev $intf + +sleep 1 + +./example/classifier/classifier -i $intf -c 2 -f ./example/classifier/ofp.conf & diff --git a/scripts/start_conformance.sh b/scripts/start_conformance.sh new file mode 100755 index 00000000..52a2f915 --- /dev/null +++ b/scripts/start_conformance.sh @@ -0,0 +1,20 @@ +#!/bin/bash +./example/fpm/fpm -i vlan103,vlan104 -c 4 & + +sleep 3 +iptables -A FORWARD -i vlan103 -j DROP +iptables -A FORWARD -i vlan104 -j DROP +iptables -A INPUT -i vlan103 -j DROP +iptables -A INPUT -i vlan104 -j DROP +ifconfig vlan103 -arp +ifconfig vlan104 -arp +ip addr flush dev vlan103 +ip addr flush dev vlan104 +#sleep 1 +#sysctl -w net.ipv6.conf.fp_vlan103.autoconf=0 +#sysctl -w net.ipv6.conf.fp_vlan104.autoconf=0 +sleep 1 +ifconfig fp0 192.168.13.15 up +ifconfig fp1 192.168.14.15 up +# arp of ixia machine is required for sending ICMP Echo Req in tests 1.3 and 4.4 +arp -i fp0 -s 192.168.13.16 10:1F:74:36:29:9A diff --git a/scripts/start_device.sh b/scripts/start_device.sh new file mode 100755 index 00000000..7059c466 --- /dev/null +++ b/scripts/start_device.sh @@ -0,0 +1,21 @@ +#!/bin/bash +intf=$1 +if test "X$intf" = "X"; then intf=eth0; fi + +./example/fpm/fpm -i $intf -c 4 & + +sleep 3 +iptables -A FORWARD -i $intf -j DROP +iptables -A INPUT -i $intf -j DROP +ip6tables -A FORWARD -i $intf -j DROP +ip6tables -A INPUT -i $intf -j DROP +ifconfig $intf -arp +ip addr flush dev $intf +sleep 3 +sysctl -w net.ipv6.conf.fp0.autoconf=0 +dhclient -v fp0 +#sysctl -w net.ipv4.conf.fp0.forwarding=0 +#sysctl -w net.ipv4.conf.fp0.mc_forwarding=0 +#sysctl -w net.ipv4.conf.fp0.arp_filter=0 +#sysctl -w net.ipv4.conf.fp0.arp_accept=0 +#sysctl -w net.ipv4.conf.fp0.arp_announce=1 diff --git a/scripts/start_socket.sh b/scripts/start_socket.sh new file mode 100755 index 00000000..60e02974 --- /dev/null +++ b/scripts/start_socket.sh @@ -0,0 +1,16 @@ +#!/bin/bash -x + +intf=$1 +if test "X$intf" = "X"; then intf=eth0; fi + +./example/socket/socket -i $intf -c 2 -f ./example/socket/ofp.conf & + +sleep 3 +iptables -A FORWARD -i $intf -j DROP +iptables -A INPUT -i $intf -j DROP +ip6tables -A FORWARD -i $intf -j DROP +ip6tables -A INPUT -i $intf -j DROP +ifconfig $intf -arp +ip addr flush dev $intf +sleep 3 +sysctl -w net.ipv6.conf.fp_$intf.autoconf=0 diff --git a/scripts/stop_conformance.sh b/scripts/stop_conformance.sh new file mode 100755 index 00000000..2c794fde --- /dev/null +++ b/scripts/stop_conformance.sh @@ -0,0 +1,13 @@ +#!/bin/bash -x + +killall fpm + +sleep 3 + +sudo iptables -D FORWARD -i vlan103 -j DROP +sudo iptables -D FORWARD -i vlan104 -j DROP +sudo iptables -D INPUT -i vlan103 -j DROP +sudo iptables -D INPUT -i vlan104 -j DROP +sudo ifconfig vlan103 arp +sudo ifconfig vlan104 arp + diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 00000000..3618b120 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,106 @@ +LIB = $(top_builddir)/lib + +dist_pkgdata_DATA = $(LIB)/libofp.la + +VPATH = $(srcdir) $(builddir) + +lib_LTLIBRARIES = $(LIB)/libofp.la + +AM_CFLAGS += -I$(top_srcdir)/include +AM_CFLAGS += -I$(top_srcdir)/include/api +AM_CFLAGS += -DINET + +__LIB__libofp_la_SOURCES = \ +ofp_pkt_processing.c \ +ofp_avl.c \ +ofp_log.c \ +ofp_debug.c \ +ofp_debug_pcap.c \ +ofp_debug_print.c \ +cli/ofp_cli_route.c \ +cli/ofp_cli_debug.c \ +cli/ofp_cli_log.c \ +cli/ofp_cli_arp.c \ +cli/ofp_cli_alias.c \ +cli/ofp_cli_stat.c \ +cli/ofp_cli_ifconfig.c \ +cli/ofp_cli_sysctl.c \ +cli/ofp_cli.c \ +ofp_hash.c \ +ofp_icmp.c \ +ofp_init.c \ +ofp_inet.c \ +ofp_in_pcb.c \ +ofp_in_proto.c \ +ofp_ip_init.c \ +ofp_portconf.c \ +ofp_route.c \ +ofp_subr_hash.c \ +ofp_syscalls.c \ +ofp_timer.c \ +ofp_udp_usrreq.c \ +ofp_uipc_sockbuf.c \ +ofp_uipc_socket.c \ +ofp_uipc_domain.c \ +ofp_tcp_usrreq.c \ +ofp_tcp_subr.c \ +ofp_tcp_timer.c \ +ofp_tcp_output.c \ +ofp_tcp_input.c \ +ofp_tcp_sack.c \ +ofp_tcp_timewait.c \ +ofp_tcp_syncache.c \ +ofp_tcp_reass.c \ +ofp_gre.c \ +ofp_md5c.c \ +ofp_errno.c \ +ofp_stat.c \ +ofp_hook.c \ +ofp_util.c \ +ofp_reass.c \ +ofp_sys_socket.c \ +ofp_in.c \ +ofp_sysctl.c + +if OFP_USE_LIBCK +AM_CFLAGS += -DOFP_USE_LIBCK + +__LIB__libofp_la_SOURCES += \ +ofp_arp_ck.c +else +__LIB__libofp_la_SOURCES += \ +ofp_arp.c +endif + +if OFP_SP +AM_CFLAGS += -DSP + +__LIB__libofp_la_SOURCES += \ +ofp_netlink.c \ +ofp_quagga.c \ +ofp_tunthread.c +endif + +if OFP_IPv6 +AM_CFLAGS += -DINET6 + +__LIB__libofp_la_SOURCES += \ +ofp_ip6_init.c \ +ofp_in6_proto.c \ +ofp_in6.c \ +ofp_in6_pcb.c \ +ofp_in6_cksum.c \ +ofp_udp6_usrreq.c \ +ofp_icmp6.c \ +ofp_nd6.c +endif + +if OFP_MTRIE +AM_CFLAGS += -DMTRIE + +__LIB__libofp_la_SOURCES += \ +ofp_rt_mtrie_lookup.c +else +__LIB__libofp_la_SOURCES += \ +ofp_rt_lookup.c +endif diff --git a/src/cli/ofp_cli.c b/src/cli/ofp_cli.c new file mode 100644 index 00000000..216b070f --- /dev/null +++ b/src/cli/ofp_cli.c @@ -0,0 +1,1696 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ofpi_pkt_processing.h" +#include "ofpi_in.h" +#include "ofpi_cli.h" +#include "ofpi_log.h" +#include "ofpi_util.h" +#include "ofpi_portconf.h" + +/* + * Only core 0 runs this. + */ + +static void *cli_server(void *arg); + +/** Start CLI server thread + * To be called by Application code to start the CLI server if needed; + * + * @param core_id int + * @return void + * + */ +void ofp_start_cli_thread(int core_id, char *conf_file) +{ + odph_linux_pthread_t cli_linux_pthread; + odp_cpumask_t cpumask; + + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, core_id); + + odph_linux_pthread_create(&cli_linux_pthread, + &cpumask, + cli_server, + conf_file); + +} + +static int close_cli; + +int cli_display_width = 80, cli_display_height = 24; +int cli_curses = 0; +int cli_display_row = 2, cli_display_col = 5; +int cli_display_rows = 10, cli_display_cols = 30; + +/** CLI Commands node + */ +struct cli_node { + void (*func)(struct cli_conn *, const char *); + struct cli_node *nextword; + struct cli_node *nextpossibility; + char *word; + const char *help; + char type; +}; + +/** CLI Command descriptor + */ +struct cli_command { + const char *command; + const char *help; + void (*func)(struct cli_conn *, const char *); +}; + +#define NODE(NAME, TEXT, NEXT, ALT) do { \ + NAME.nextword = &NEXT; \ + NAME.nextpossibility = &ALT; \ + NAME.word = TEXT; \ + NAME.func = 0; \ + NAME.help = 0; \ + } while (0) + +#define HELP(NAME, TEXT) NAME.help = TEXT + +#define FUNC(NAME, F) NAME.func = F + +#define MAX_CONNECTIONS 5 + +/* status bits */ +#define CONNECTION_ON 1 +#define DO_ECHO 2 /* telnet */ +#define DO_SUPPRESS 4 /* telnet */ +#define WILL_SUPPRESS 8 /* telnet */ +#define WAITING_TELNET_1 16 +#define WAITING_TELNET_2 32 +#define WAITING_ESC_1 64 +#define WAITING_ESC_2 128 +#define WAITING_PASSWD 256 +#define ENABLED_OK 512 + +static struct cli_conn connection; + +int run_alias = -1; + +static void addchars(struct cli_conn *conn, const char *s); +static void parse(struct cli_conn *conn, int extra); + +static void close_connection(struct cli_conn *conn) +{ + (void)conn; + OFP_DBG("Closing connection...\r\n"); + close_cli = 1; /* tell server to close the socket */ +} + +static int int_ok(char *val) +{ + if ((val[0] == '0') && + (val[1] == 'x' || val[1] == 'X')) { + val += 2; + while (*val) { + if (!((*val >= '0' && *val <= '9') || + (*val >= 'a' && *val <= 'f') || + (*val >= 'A' && *val <= 'F'))) + return 0; + val++; + } + return 1; + } + + while (*val) { + if (*val < '0' || *val > '9') + return 0; + val++; + } + return 1; +} + +static int ip4addr_ok(char *val) +{ + char b[100], *p, *octet; + int i; + + strcpy(b, val); + + p = b; + for (i = 0; i < 4; i++) { + octet = strsep(&p, "."); + if (strlen(octet) > 3) + return 0; + if (strlen(octet) == 0) + return 0; + if (!int_ok(octet)) + return 0; + if (i < 3 && p == NULL) + return 0; + } + if (p) + return 0; + return 1; +} + +static int topname_ok(char *val) +{ + if (!strncmp("parse", val, 3)) + return 1; + if (!strncmp("resolve", val, 3)) + return 1; + if (!strncmp("modify", val, 3)) + return 1; + if (!strncmp("search", val, 3)) + return 1; + if (!strncmp("learn", val, 3)) + return 1; + return 0; +} + +static int dev_ok(char *val) +{ + int port, vlan; + + port = ofp_name_to_port_vlan(val, &vlan); + return (port >= 0 && port < ofp_get_num_ports()); +} + +static int ip4net_ok(char *val) +{ + char b[100], *p, *octet; + int i; + + strcpy(b, val); + + p = b; + for (i = 0; i < 5; i++) { + if (i == 3) + octet = strsep(&p, "/"); + else + octet = strsep(&p, "."); + if (strlen(octet) > 3) + return 0; + if (strlen(octet) == 0) + return 0; + if (!int_ok(octet)) + return 0; + if (i < 4 && p == NULL) + return 0; + } + return 1; +} + +static int ip6addr_check_ok(char *val, int len) +{ + char *it, *last; + char *last_colon; + char *group_start; + int colon_cnt; + int group_cnt; + odp_bool_t short_format; + + it = val; + last = it + len; + last_colon = NULL; + colon_cnt = 0; + group_cnt = 0; + short_format = 0; + + while (it < last) { + if ((*it) == ':') { + if ((last_colon != NULL) && (it - 1 == last_colon)) + short_format = 1; + last_colon = it; + it++; + colon_cnt++; + } else if (((*it) >= '0' && (*it) <= '9') || + ((*it) >= 'a' && (*it) <= 'f') || + ((*it) >= 'A' && (*it) <= 'F')) { + group_start = it; + while ((it < last) && + (((*it) >= '0' && (*it) <= '9') || + ((*it) >= 'a' && (*it) <= 'f') || + ((*it) >= 'A' && (*it) <= 'F'))) { + it++; + } + + if ((it - group_start > 4) || + (it - group_start == 0)) + return 0; + + group_cnt++; + } else + return 0; + + } + + if (short_format) { + if (colon_cnt > 7 || group_cnt > 8) + return 0; + } else { + if (colon_cnt != 7 || group_cnt != 8) + return 0; + } + + return 1; +} + +static int ip6addr_ok(char *val) +{ + return ip6addr_check_ok(val, strlen(val)); +} + +static int ip6net_ok(char *val) +{ + char *prefix_position; + + prefix_position = strstr(val, "/"); + if (prefix_position == NULL) + return 0; + + + if (ip6addr_check_ok(val, prefix_position - val) == 0) + return 0; + + prefix_position++; + + if (strlen(prefix_position) > 3) + return 0; + if (strlen(prefix_position) == 0) + return 0; + if (!int_ok(prefix_position)) + return 0; + + return 1; +} + +static uint8_t txt_to_hex(char val) +{ + if (val >= '0' && val <= '9') + return(val - '0'); + if (val >= 'a' && val <= 'f') + return(val - 'a' + 10); + if (val >= 'A' && val <= 'F') + return(val - 'A' + 10); + + return 255; +} + +int ip4addr_get(const char *tk, uint32_t *addr) +{ + int a, b, c, d; + + if (sscanf(tk, "%d.%d.%d.%d", &a, &b, &c, &d) < 4) + return 0; + + *addr = odp_cpu_to_be_32((a << 24) | (b << 16) | (c << 8) | d); + + return 1; +} + +int ip4net_get(const char *tk, uint32_t *addr, int *mask) +{ + int a, b, c, d; + + if (sscanf(tk, "%d.%d.%d.%d/%d", &a, &b, &c, &d, mask) < 5) + return 0; + + *addr = odp_cpu_to_be_32((a << 24) | (b << 16) | (c << 8) | d); + + return 1; +} + +int ip6addr_get(const char *tk, int tk_len, uint8_t *addr) +{ + const char *it, *last; + const char *last_colon; + const char *group_start; + int group_cnt; + int group_len; + int dbl_colon_pos; + int i; + + memset(addr, 0, 16); + + it = tk; + last = it + tk_len; + last_colon = NULL; + group_cnt = 0; + dbl_colon_pos = -1; + + while (it < last) { + if ((*it) == ':') { + if ((last_colon != NULL) && + (it - 1 == last_colon)) { + if (dbl_colon_pos != -1) + return 0; + dbl_colon_pos = group_cnt; + } + last_colon = it; + it++; + } else if (((*it) >= '0' && (*it) <= '9') || + ((*it) >= 'a' && (*it) <= 'f') || + ((*it) >= 'A' && (*it) <= 'F')) { + group_start = it; + while ((it < last) && + (((*it) >= '0' && (*it) <= '9') || + ((*it) >= 'a' && (*it) <= 'f') || + ((*it) >= 'A' && (*it) <= 'F'))) { + it++; + } + group_len = it - group_start; + if ((group_len > 4) || + (group_len == 0)) + return 0; + + if (group_len >= 1) + addr[group_cnt * 2 + 1] = + txt_to_hex(*(it - 1)); + if (group_len >= 2) + addr[group_cnt * 2 + 1] |= + txt_to_hex(*(it - 2)) << 4; + if (group_len >= 3) + addr[group_cnt * 2] = + txt_to_hex(*(it - 3)); + if (group_len == 4) + addr[group_cnt * 2] |= + txt_to_hex(*(it - 4)) << 4; + + group_cnt++; + } else + return 0; + + } + + if (dbl_colon_pos != -1) { + for (i = 0; i < 16 - (dbl_colon_pos * 2); i++) { + if (i < (group_cnt - dbl_colon_pos) * 2) + addr[15 - i] = + addr[group_cnt * 2 - 1 - i]; + else + addr[15 - i] = 0; + } + } + + return 1; +} + +static void sendstr(struct cli_conn *conn, const char *s) +{ + if (S_ISSOCK(conn->fd)) + send(conn->fd, s, strlen(s), 0); + else + (void)(write(conn->fd, s, strlen(s)) + 1); +} + +void sendcrlf(struct cli_conn *conn) +{ + if ((conn->status & DO_ECHO) == 0) + sendstr(conn, "\n"); /* no extra prompts */ + else if (conn->status & ENABLED_OK) + sendstr(conn, "\r\n# "); + else + sendstr(conn, "\r\n> "); +} + +static void sendprompt(struct cli_conn *conn) +{ + if (conn->status & ENABLED_OK) + sendstr(conn, "\r# "); + else + sendstr(conn, "\r> "); +} + +static void cli_send_welcome_banner(int fd) +{ + struct cli_conn *conn; + char sendbuf[100]; + (void)fd; + + conn = &connection; + + sprintf(sendbuf, + "\r\n" + "--==--==--==--==--==--==--\r\n" + "-- WELCOME to OFP CLI --\r\n" + "--==--==--==--==--==--==--\r\n" + ); + sendstr(conn, sendbuf); + sendcrlf(conn); +} + +static void cli_send_goodbye_banner(struct cli_conn *conn) +{ + sendstr(conn, + "\r\n" + "--==--==--==--\r\n" + "-- Goodbye! --\r\n" + "--==--==--==--\r\n" + ); + sendcrlf(conn); +} + +/*********************************************** + * Functions to be called. * + ***********************************************/ + +static void f_exit(struct cli_conn *conn, const char *s) +{ + (void)s; + if (conn->status & ENABLED_OK) { + conn->status &= ~ENABLED_OK; + cli_send_goodbye_banner(conn); + sendcrlf(conn); + return; + } + + cli_send_goodbye_banner(conn); + close_connection(conn); +} + +static void f_help(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_sendf(conn->fd, "Display help information for CLI commands:\r\n" + " help \r\n" + " command: alias, arp, debug, exit, ifconfig, loglevel, " + "route, show, stat\r\n\r\n"); + sendcrlf(conn); +} + +static void f_help_exit(struct cli_conn *conn, const char *s) +{ + (void)s; + sendstr(conn, "Exit closes the current connection.\r\n" + "You can type ctl-D, too."); + sendcrlf(conn); +} + + +static void f_help_show(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_sendf(conn->fd, "Display current status:\r\n" + " show \r\n" + " command: alias, arp, debug, ifconfig, loglevel, route, " + "stat\r\n\r\n"); + sendcrlf(conn); +} + +static int authenticate(const char *user, const char *passwd) +{ + (void)user; + (void)passwd; +#if 0 + struct passwd *pw; + char *epasswd; + + if ((pw = getpwnam(user)) == NULL) return 0; + if (pw->pw_passwd == 0) return 1; + epasswd = crypt(passwd, pw->pw_passwd); + if (strcmp(epasswd, pw->pw_passwd)) return 0; +#endif + return 1; +} + + +/*******************************************/ + +/**< Special Parameter keywords in commands */ +static char NUMBER[] = ""; +static char IP4ADDR[] = ""; +static char TOPNAME[] = ""; +static char STRING[] = ""; +static char DEV[] = ""; +static char IP4NET[] = ""; +static char IP6ADDR[] = ""; +static char IP6NET[] = ""; + +/** Check if the given word is a built-in "Parameter Keyword", + * and if so returns the Parameter string address, used as an identifier in the parser; + * + * @input str const char*: word to be checked + * @return char* + * @return NULL: the input word is not a Parameter + * @return else the Parameter string address + * + */ +static char *get_param_string(const char *str) +{ +#define IS_PARAM(str, param) (!strncmp(str, #param, strlen(#param))) + + if IS_PARAM(str, NUMBER) + return NUMBER; + if IS_PARAM(str, IP4ADDR) + return IP4ADDR; + if IS_PARAM(str, TOPNAME) + return TOPNAME; + if IS_PARAM(str, STRING) + return STRING; + if IS_PARAM(str, DEV) + return DEV; + if IS_PARAM(str, IP4NET) + return IP4NET; + if IS_PARAM(str, IP6NET) + return IP6NET; + if IS_PARAM(str, IP6ADDR) + return IP6ADDR; + +#undef IS_PARAM + return NULL; +} + +static struct cli_node end = {0, 0, 0, 0, 0, 0}; +static struct cli_node *start = &end; + +/* CLI Commands list */ + +/* Command Parameters are indicated by the following keywords: + * NUMBER,IP4ADDR,TOPNAME,STRING,DEV,IP4NET + */ + +struct cli_command commands[] = { + { + "exit", + "Quit the connection", + f_exit + }, + { + "show", + "Display information", + f_help_show + }, + { + "show help", + "Display information", + f_help_show + }, + { + "show arp", + NULL, + f_arp + }, + { + "show debug", + NULL, + f_debug_show + }, + { + "show loglevel", + NULL, + f_loglevel_show + }, + { + "show route", + NULL, + f_route_show + }, + { + "show alias", + NULL, + f_alias_show + }, + { + "show stat", + NULL, + f_stat_show + }, + { + "show ifconfig", + NULL, + f_ifconfig_show + }, + { + "debug", + "Print traffic to file (and console) or to a pcap file", + f_debug_show + }, + { + "debug NUMBER", + "Bit mask of categories whose traffic to print (15 or 0xf for everything)", + f_debug + }, + { + "debug help", + "Print help", + f_help_debug + }, + { + "debug show", + "Show debug settings", + f_debug_show + }, + { + "debug capture NUMBER", + "Port mask whose traffic to save in pcap format (15 or 0xf for ports 0-3)", + f_debug_capture + }, + { + "debug capture info NUMBER", + "Non-zero = Include port number info by overwriting the first octet of dest MAC", + f_debug_info + }, + { + "debug capture file STRING", + "File to save captured packets", + f_debug_capture_file + }, + { + "loglevel", + "Show or set log level", + f_loglevel_show + }, + { + "loglevel set STRING", + "Set log level", + f_loglevel + }, + { + "loglevel help", + "Print help", + f_help_loglevel + }, + { + "loglevel show", + "Show log level", + f_loglevel_show + }, + { + "help", + NULL, + f_help + }, + { + "help exit", + NULL, + f_help_exit + }, + { + "help show", + NULL, + f_help_show + }, + { + "help debug", + NULL, + f_help_debug + }, + { + "help loglevel", + NULL, + f_help_loglevel + }, + { + "help route", + NULL, + f_help_route + }, + { + "help arp", + NULL, + f_help_arp + }, + { + "help alias", + NULL, + f_help_alias + }, + { + "help stat", + NULL, + f_help_stat + }, + { + "help ifconfig", + NULL, + f_help_ifconfig + }, + { + "arp", + "Show arp table", + f_arp + }, + { + "arp flush", + "Flush arp table", + f_arp_flush + }, + { + "arp cleanup", + "Clean old entries from arp table", + f_arp_cleanup + }, + { + "arp help", + NULL, + f_help_arp + }, + { + "route", + "Show route table", + f_route_show + }, + { + "route show", + "Show route table", + f_route_show + }, + { + "route add IP4NET gw IP4ADDR dev DEV", + "Add route", + f_route_add + }, + { + "route -A inet4 add IP4NET gw IP4ADDR dev DEV", + "Add route", + f_route_add + }, +#ifdef INET6 + { + "route -A inet6 add IP6NET gw IP6ADDR dev DEV", + "Add route", + f_route_add_v6 + }, +#endif /* INET6 */ + { + "route add vrf NUMBER IP4NET gw IP4ADDR dev DEV", + "Add route to VRF", + f_route_add_vrf + }, + { + "route -A inet4 add vrf NUMBER IP4NET gw IP4ADDR dev DEV", + "Add route to VRF", + f_route_add_vrf + }, + { + "route delete IP4NET", + "Delete route", + f_route_del + }, + { + "route -A inet4 delete IP4NET", + "Delete route", + f_route_del + }, + { + "route delete vrf NUMBER IP4NET", + "Delete route", + f_route_del_vrf + }, + { + "route -A inet4 delete vrf NUMBER IP4NET", + "Delete route", + f_route_del_vrf + }, +#ifdef INET6 + { + "route -A inet6 delete IP6NET", + "Delete route", + f_route_del_v6 + }, +#endif /* INET6 */ + { + "route add from DEV to DEV", + "Add route from interface to interface", + f_route_add_dev_to_dev + }, + { + "route help", + NULL, + f_help_route + }, + { + "ifconfig", + "Show interfaces", + f_ifconfig_show + }, + { + "ifconfig show", + NULL, + f_ifconfig_show + }, + { + "ifconfig DEV IP4NET", + "Create interface", + f_ifconfig + }, + { + "ifconfig -A inet4 DEV IP4NET", + "Create interface", + f_ifconfig + }, +#ifdef INET6 + { + "ifconfig -A inet6 DEV IP6NET", + "Create interface", + f_ifconfig_v6 + }, +#endif /* INET6 */ + { + "ifconfig DEV IP4NET vrf NUMBER", + "Create interface", + f_ifconfig + }, + { + "ifconfig -A inet4 DEV IP4NET vrf NUMBER", + "Create interface", + f_ifconfig + }, + { + "ifconfig tunnel gre DEV local IP4ADDR remote IP4ADDR peer IP4ADDR IP4ADDR", + "Create GRE tunnel interface", + f_ifconfig_tun + }, + { + "ifconfig tunnel gre DEV local IP4ADDR remote IP4ADDR peer IP4ADDR IP4ADDR vrf NUMBER", + "Create GRE tunnel interface", + f_ifconfig_tun + }, + { + "ifconfig DEV down", + "Delete interface", + f_ifconfig_down + }, + { + "ifconfig help", + NULL, + f_help_ifconfig + }, + { + "alias", + NULL, + f_alias_show + }, + { + "alias set STRING STRING", + "Define an alias", + f_alias_set + }, + { + "alias show", + NULL, + f_alias_show + }, + { + "alias help", + NULL, + f_help_alias + }, + { + "stat", + "Show statistics", + f_stat_show + }, + { + "stat show", + NULL, + f_stat_show + }, + { + "stat set NUMBER", + NULL, + f_stat_set + }, + { + "stat clear", + NULL, + f_stat_clear + }, + { + "stat help", + NULL, + f_help_stat + }, + { + "sysctl dump", + "Dump sysctl tree", + f_sysctl_dump + }, + { + "sysctl r STRING", + "Read sysctl variable", + f_sysctl_read + }, + { + "sysctl w STRING STRING", + "Set sysctl variable", + f_sysctl_write + }, + { NULL, NULL, NULL } +}; + +static void print_nodes(int fd, struct cli_node *node) +{ + struct cli_node *n; + static int depth = 0; + int i; + int ni = 0; + struct cli_node *stack[100]; + + if (node == &end) + return; + + for (i = 0; i < depth; i++) + ofp_sendf(fd, " "); + for (n = node; n != &end; n = n->nextword) { + depth += strlen(n->word) + 1; + stack[ni++] = n; + ofp_sendf(fd, "%s ", n->word); + } + + printf("\n"); + while (ni > 0) { + n = stack[--ni]; + depth -= strlen(n->word) + 1; + print_nodes(fd, n->nextpossibility); + } +} + +static struct cli_node *add_command(struct cli_node *root, struct cli_command *cc) +{ + struct cli_node *s; + struct cli_node *cn = root; + struct cli_node *new; + struct cli_node *n; + int nextpossibility = 0; + int len; + char *nw; + char *param; + const char *str; + const char *w; + + w = cc->command; + + s = cn; + while (cn != &end) { + nw = strchr(w, ' '); + + str = get_param_string(w); + if (!str) { + str = w; + if (nw) + len = nw - w; + else + len = strlen(w); + } else { + len = strlen(str); + } + + while (cn != &end && strncmp(str, cn->word, len)) { + s = cn; + cn = cn->nextpossibility; + } + + if (cn == &end) { + nextpossibility = 1; + } else { + if (!nw) + ofp_generate_coredump(); + w = nw + 1; + s = cn; + cn = cn->nextword; + } + } + + new = NULL; + cn = NULL; + while (w) { + n = malloc(sizeof(*cn)); + n->help = NULL; + n->func = NULL; + n->nextword = &end; + n->nextpossibility = &end; + + if (!new) + new = n; + + if (cn) + cn->nextword = n; + + cn = n; + param = get_param_string(w); + nw = strchr(w, ' '); + if (!nw) { + if (param) + n->word = param; + else + n->word = strdup(w); + break; + } + /* else */ + if (param) { + n->word = param; + } else { + n->word = malloc(nw - w + 1); + memcpy(n->word, w, nw - w); + n->word[nw - w] = '\0'; + } + w = nw + 1; + } + + cn->func = cc->func; + cn->help = cc->help; + + if (root == &end) + root = new; + else if (nextpossibility) + s->nextpossibility = new; + else + s->nextword = new; + + return root; +} + +static void f_run_alias(struct cli_conn *conn, const char *s) +{ + (void)s; + char *line = conn->inbuf; + int i; + + for (i = 0; i < ALIAS_TABLE_LEN; i++) { + if (alias_table[i].name == 0 || alias_table[i].cmd == 0) + continue; + if (strncmp(line, alias_table[i].name, + strlen(alias_table[i].name)) == 0) { + run_alias = i; + return; + } + } +} + +void f_add_alias_command(const char *name) +{ + struct cli_command a; + + a.command = name; + a.help = "Alias command"; + a.func = f_run_alias; + start = add_command(start, &a); +} + +static void cli_init_commands(void) +{ + unsigned i = 0; + static int initialized = 0; + struct cli_conn conn; + + if (initialized) + return; + + initialized = 1; + + /* virtual connection */ + memset(&conn, 0, sizeof(conn)); + conn.fd = 1; /* stdout */ + conn.status = CONNECTION_ON; /* no prompt */ + + + /* Initalize alias table*/ + for (i = 0; i < ALIAS_TABLE_LEN; i++) { + alias_table[i].name = NULL; + alias_table[i].cmd = NULL; + } + + /* Add regular commands */ + for (i = 0; commands[i].command; i++) + start = add_command(start, &commands[i]); + + /* Print nodes */ + if (OFP_IS_LOGLEVEL_DEBUG()) { + ofp_sendf(conn.fd, "CLI Command nodes:\n"); + print_nodes(conn.fd, start); + } +} + +static void cli_process_conf_file(char *config_file_name) +{ + FILE *f; + struct cli_conn conn; + + /* virtual connection */ + memset(&conn, 0, sizeof(conn)); + conn.fd = 1; /* stdout */ + conn.status = CONNECTION_ON; /* no prompt */ + + if (config_file_name != NULL) { + f = fopen(config_file_name, "r"); + if (!f) { + OFP_ERR("OFP configuration file not found.\n"); + return; + } + + while (fgets(conn.inbuf, sizeof(conn.inbuf), f)) { + if (conn.inbuf[0] == '#' || conn.inbuf[0] <= ' ') + continue; + ofp_sendf(conn.fd, "CONFIGURATION LINE: %s\n", + conn.inbuf); + parse(&conn, 0); + } + + fclose(f); + } + else { + OFP_DBG("OFP configuration file not set.\n"); + } +} + +static void print_q(struct cli_conn *conn, struct cli_node *s, struct cli_node *ok) +{ + char sendbuf[200]; + + if (s == &end || (ok && ok->func)) { + sendstr(conn, "\r\n "); + //return; + } + while (s != &end) { + if (s->help) + sprintf(sendbuf, "\r\n %-20s(%.158s)", s->word, s->help); + else + sprintf(sendbuf, "\r\n %.178s", s->word); + sendstr(conn, sendbuf); + s = s->nextpossibility; + } + sendcrlf(conn); + return; +} + +static struct cli_node *find_next_vertical(struct cli_node *s, char *word) +{ + int foundcnt = 0, len = strlen(word); + struct cli_node *found = 0; + + while (s != &end) { + if ((strncmp(s->word, word, len) == 0) || + (s->word == NUMBER && int_ok(word)) || + (s->word == IP4ADDR && ip4addr_ok(word)) || + (s->word == TOPNAME && topname_ok(word)) || + (s->word == DEV && dev_ok(word)) || + (s->word == IP4NET && ip4net_ok(word)) || + (s->word == STRING) || + (s->word == IP6ADDR && ip6addr_ok(word)) || + (s->word == IP6NET && ip6net_ok(word))) { + foundcnt++; + if (foundcnt > 1) return 0; + found = s; + } + s = s->nextpossibility; + } + return found; +} + +static int is_parameter(struct cli_node *s) +{ + return ((s->word == NUMBER) || + (s->word == IP4ADDR) || + (s->word == TOPNAME) || + (s->word == DEV) || + (s->word == IP4NET) || + (s->word == STRING) || + (s->word == IP6ADDR) || + (s->word == IP6NET)); +} + +/** parse(): parse a Command line + * + * @param conn struct cli_conn* + * @param extra int + * @return void + * + */ +static void parse(struct cli_conn *conn, int extra) +{ + char **ap, *argv[50], **token, *msg, *lasttoken = 0; + char b[sizeof(conn->inbuf)]; + struct cli_node *p = start, *horpos = &end, *lastok = 0; + int paramlen; + char paramlist[100]; + char *line = conn->inbuf; + int linelen = strlen(line); + + if (linelen > 0 && line[linelen-1] == ' ' && extra) extra = '?'; + else if (linelen == 0 && extra) extra = '?'; + else if (extra) extra = '\t'; + + if (linelen == 0) { + print_q(conn, p, 0); + return; + } + + strcpy(b, line); + msg = b; + + for (ap = argv; (*ap = strsep(&msg, " \r\n")) != NULL;) { + if (**ap != '\0') { + if (++ap >= &argv[49]) + break; + + if (msg != NULL && *msg == '\"') { + msg += 1; + *ap = strsep(&msg, "\"\r\n"); + if (++ap >= &argv[49]) + break; + } + } + } + + token = argv; + + horpos = p; + paramlen = 0; + paramlist[0] = 0; + + while (*token && p != &end) { + struct cli_node *found; + found = find_next_vertical(p, *token); + if (found) { + lastok = found; + lasttoken = *token; + p = found->nextword; + horpos = p; + if ((found->word == NUMBER && int_ok(*token)) || + (found->word == IP4ADDR && ip4addr_ok(*token)) || + (found->word == TOPNAME && topname_ok(*token)) || + (found->word == DEV && dev_ok(*token)) || + (found->word == IP4NET && ip4net_ok(*token)) || + (found->word == STRING) || + (found->word == IP6ADDR && ip6addr_ok(*token)) || + (found->word == IP6NET && ip6net_ok(*token))) { + paramlen += sprintf(paramlist + paramlen, + "%s ", *token); + } + token++; + } else { + p = &end; + } + } + + if (extra && p == &end && *token == 0) { + if (is_parameter(lastok) || + strlen(lastok->word) == strlen(lasttoken)) { + sendstr(conn, "\r\n "); + sendcrlf(conn); + sendstr(conn, line); + } else { + addchars(conn, lastok->word + strlen(lasttoken)); + addchars(conn, " "); + sendstr(conn, lastok->word + strlen(lasttoken)); + sendstr(conn, " "); + } + return; + } + + if (lastok && lastok->func && extra == 0) { + lastok->func(conn, paramlist); + return; + } + + if (extra == '?') { + print_q(conn, horpos, lastok); + sendstr(conn, line); + return; + } + + if (extra == '\t') { + struct cli_node *found = 0; + + if (*token == NULL) { + addchars(conn, lastok->word + strlen(lasttoken)); + addchars(conn, " "); + sendstr(conn, lastok->word + strlen(lasttoken)); + sendstr(conn, " "); + return; + } + + found = find_next_vertical(horpos, *token); + + if (found) { + addchars(conn, found->word + strlen(*token)); + addchars(conn, " "); + sendstr(conn, found->word + strlen(*token)); + sendstr(conn, " "); + return; + } + + print_q(conn, horpos, lastok); + sendstr(conn, line); + return; + } + + sendstr(conn, "syntax error\r\n"); + sendcrlf(conn); + return; +} + +static char telnet_echo_off[] = { + 0xff, 0xfb, 0x01, /* IAC WILL ECHO */ + 0xff, 0xfb, 0x03, /* IAC WILL SUPPRESS_GO_AHEAD */ + 0xff, 0xfd, 0x03, /* IAC DO SUPPRESS_GO_AHEAD */ +}; + +static void addchars(struct cli_conn *conn, const char *s) +{ + strcat(conn->inbuf, s); + conn->pos += strlen(s); +} + + +static int cli_read(int fd) +{ + struct cli_conn *conn = &connection; + unsigned char c; + + //receive data from client + if (recv(fd, &c, 1, 0) <= 0) { + perror("recv"); + close_connection(conn); + return -1; + } + + //printf("ch = %02x = %c\n", c, c); + + if (conn->status & WAITING_PASSWD) { + unsigned int plen = strlen(conn->passwd); + if (c == 10 || c == 13) { + conn->status &= ~WAITING_PASSWD; + if (authenticate("admin", conn->passwd)) { + conn->status |= ENABLED_OK; + sendcrlf(conn); + } else { + sendstr(conn, "Your password fails!"); + sendcrlf(conn); + } + } else if (plen < (sizeof(conn->passwd)-1)) { + conn->passwd[plen] = c; + conn->passwd[plen+1] = 0; + } + return 0; + } else if (conn->status & WAITING_TELNET_1) { + conn->ch1 = c; + conn->status &= ~WAITING_TELNET_1; + conn->status |= WAITING_TELNET_2; + return 0; + } else if (conn->status & WAITING_TELNET_2) { + //printf("telnet: 0x%x 0x%x=%d\n", conn->ch1, c, c); + static int num_dsp_chars = 0; + static char dsp_chars[8]; + + if (num_dsp_chars) { + dsp_chars[6 - num_dsp_chars--] = c; + if (num_dsp_chars == 0) { + conn->status &= ~WAITING_TELNET_2; + cli_display_width = dsp_chars[1]; + cli_display_height = dsp_chars[3]; + //printf("display size = %dx%d\n", dsp_chars[1], dsp_chars[3]); + } + return 0; + } + + if (conn->ch1 == 0xfd && c == 0x01) conn->status |= DO_ECHO; + else if (conn->ch1 == 0xfd && c == 0x03) conn->status |= DO_SUPPRESS; + else if (conn->ch1 == 0xfb && c == 0x03) { + conn->status |= WILL_SUPPRESS; + // ask for display size + char com[] = {255, 253, 31}; + send(fd, com, sizeof(com), 0); + } else if (conn->ch1 == (unsigned char)0x251 && c == 31) { + // IAC WILL NAWS (display size) + } else if (conn->ch1 == 250 && c == 31) { // (display size info) + num_dsp_chars = 6; + return 0; + } + conn->status &= ~WAITING_TELNET_2; + return 0; + } else if (conn->status & WAITING_ESC_1) { + conn->ch1 = c; + conn->status &= ~WAITING_ESC_1; + conn->status |= WAITING_ESC_2; + return 0; + } else if (conn->status & WAITING_ESC_2) { + //printf("ESC ch1=0x%x ch2=0x%x\n", conn->ch1, c); + conn->status &= ~WAITING_ESC_2; + if (conn->ch1 != 0x5b) + return 0; + + switch (c) { + case 0x41: // up + c = 0x10; /* arrow up = ctl-P */ + break; + case 0x42: // down + c = 0x0e; /* arrow down = ctl-N */ + break; + case 0x44: // left + c = 8; /* arrow left = backspace */ + break; + case 0x31: // home + cli_curses = !cli_curses; + printf("CURSES=%d\n", cli_curses); + return 0; + case 0x32: // ins + case 0x33: // delete + case 0x34: // end + case 0x35: // pgup + case 0x36: // pgdn + case 0x43: // right + case 0x45: // 5 + return 0; + } + } + + if (c == 4) { /* ctl-D */ + close_connection(conn); + return 0; + } else if (c == 0x10 || c == 0x0e) { /* ctl-P or ctl-N */ + strcpy(conn->inbuf, conn->oldbuf[conn->old_get_cnt]); + if (c == 0x10) { + conn->old_get_cnt--; + if (conn->old_get_cnt < 0) + conn->old_get_cnt = NUM_OLD_BUFS - 1; + } else { + conn->old_get_cnt++; + if (conn->old_get_cnt >= NUM_OLD_BUFS) + conn->old_get_cnt = 0; + } + conn->pos = strlen(conn->inbuf); + sendstr(conn, "\r "); + sendprompt(conn); + sendstr(conn, conn->inbuf); + } else if (c == 0x1b) { + conn->status |= WAITING_ESC_1; + } else if (c == 0xff) { + /* telnet commands */ + conn->status |= WAITING_TELNET_1; + /* + unsigned char c1, c2; + recv(conn->fd, &c1, 1, 0); + recv(conn->fd, &c2, 1, 0); + if (c1 == 0xfd && c2 == 0x01) conn->status |= DO_ECHO; + else if (c1 == 0xfd && c2 == 0x03) conn->status |= DO_SUPPRESS; + else if (c1 == 0xfb && c2 == 0x03) conn->status |= WILL_SUPPRESS; + */ + } else if (c == 13 || c == 10) { + char nl[] = {13, 10}; + if (conn->status & DO_ECHO) + send(fd, nl, sizeof(nl), 0); + conn->inbuf[conn->pos] = 0; + if (0 && conn->pos == 0) { + strcpy(conn->inbuf, conn->oldbuf[conn->old_put_cnt]); + conn->pos = strlen(conn->inbuf); + sendstr(conn, conn->inbuf); + send(fd, nl, sizeof(nl), 0); + } else if (conn->pos > 0 && strcmp(conn->oldbuf[conn->old_put_cnt], conn->inbuf)) { + conn->old_put_cnt++; + if (conn->old_put_cnt >= NUM_OLD_BUFS) + conn->old_put_cnt = 0; + strcpy(conn->oldbuf[conn->old_put_cnt], conn->inbuf); + } + + if (conn->pos) { + parse(conn, 0); + + if (run_alias >= 0) { + strcpy(conn->inbuf, alias_table[run_alias].cmd); + run_alias = -1; + parse(conn, 0); + } + } else + sendcrlf(conn); + + conn->pos = 0; + conn->inbuf[0] = 0; + conn->old_get_cnt = conn->old_put_cnt; + } else if (c == 8 || c == 127) { + if (conn->pos > 0) { + char bs[] = {8, ' ', 8}; + if (conn->status & DO_ECHO) + send(fd, bs, sizeof(bs), 0); + conn->pos--; + conn->inbuf[conn->pos] = 0; + } + } else if (c == '?' || c == '\t') { + parse(conn, c); + } else if (c >= ' ' && c < 127) { + if (conn->pos < (sizeof(conn->inbuf) - 1)) { + conn->inbuf[conn->pos++] = c; + conn->inbuf[conn->pos] = 0; + + if (conn->status & DO_ECHO) + send(fd, &c, 1, 0); + } + } + + return 0; +} + +static void cli_sa_accept(int fd) +{ + struct cli_conn *conn; + + conn = &connection; + bzero(conn, sizeof(*conn)); + conn->fd = fd; + send(fd, telnet_echo_off, sizeof(telnet_echo_off), 0); + + OFP_DBG("new sock %d opened\r\n", conn->fd); +} + +#define OFP_SERVER_PORT 2345 + +#define UFP_SERVER_PORT 2345 + +static int cli_serv_fd = -1, cli_tmp_fd = -1; + +/** CLI server thread + * + * @param arg void* + * @return void* + * + */ +static void *cli_server(void *arg) +{ + int alen; + struct sockaddr_in my_addr, caller; + int reuse = 1; + fd_set read_fd, fds; + char *config_file_name; + + close_cli = 0; + + config_file_name = (char *)arg; + + printf("CLI server started on core %i\n", odp_cpu_id()); + + odp_init_local(); + ofp_init_local(); + + cli_init_commands(); + + cli_process_conf_file(config_file_name); + + cli_serv_fd = socket(AF_INET, SOCK_STREAM, 0); + if (cli_serv_fd < 0) { + OFP_ERR("cli serv socket\n"); + return NULL; + } + + if (setsockopt(cli_serv_fd, SOL_SOCKET, + SO_REUSEADDR, (void *)&reuse, sizeof(reuse)) < 0) + OFP_ERR("cli setsockopt (SO_REUSEADDR)\n"); + + memset(&my_addr, 0, sizeof(my_addr)); + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(OFP_SERVER_PORT); + my_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + if (bind(cli_serv_fd, (struct sockaddr *)&my_addr, + sizeof(struct sockaddr)) < 0) { + OFP_ERR("serv bind\n"); + return NULL; + } + + listen(cli_serv_fd, 1); + + FD_ZERO(&read_fd); + FD_SET(cli_serv_fd, &read_fd); + + while (1) { + struct timeval timeout; + int r; + + fds = read_fd; + + if (cli_tmp_fd > 0) + FD_SET(cli_tmp_fd, &fds); + + timeout.tv_sec = 1; + timeout.tv_usec = 0; + + r = select(FD_SETSIZE, &fds, NULL, NULL, &timeout); + + if (close_cli) { + if (cli_tmp_fd > 0) + close(cli_tmp_fd); + cli_tmp_fd = -1; + close_cli = 0; + OFP_DBG("CLI connection closed\r\n"); + } + + if (r < 0) + continue; + + if (FD_ISSET(cli_serv_fd, &fds)) { + close_cli = 0; + + if (cli_tmp_fd > 0) + close(cli_tmp_fd); + + alen = sizeof(caller); + cli_tmp_fd = accept(cli_serv_fd, + (struct sockaddr *)&caller, + (socklen_t *)&alen); + if (cli_tmp_fd < 0) { + OFP_ERR("cli serv accept"); + continue; + } + cli_sa_accept(cli_tmp_fd); + cli_send_welcome_banner(cli_tmp_fd); + OFP_DBG("CLI connection established\r\n"); + } + + if (cli_tmp_fd > 0 && FD_ISSET(cli_tmp_fd, &fds)) { + if (cli_read(cli_tmp_fd)) { + close(cli_tmp_fd); + cli_tmp_fd = -1; + OFP_DBG("CLI connection closed\r\n"); + } + } + } /* while (1) */ + + if (cli_tmp_fd > 0) + close(cli_tmp_fd); + cli_tmp_fd = -1; + + OFP_DBG("Connection closed\r\n"); + + close(cli_serv_fd); + cli_serv_fd = -1; +} + +/*end*/ diff --git a/src/cli/ofp_cli_alias.c b/src/cli/ofp_cli_alias.c new file mode 100644 index 00000000..169df4e5 --- /dev/null +++ b/src/cli/ofp_cli_alias.c @@ -0,0 +1,93 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_log.h" +#include "ofpi_cli.h" +#include "ofpi_util.h" + +struct alias_table_s alias_table[ALIAS_TABLE_LEN]; + +void f_alias_set(struct cli_conn *conn, const char *s) +{ + const char *name; + int name_len; + const char *line; + + int i; + + name = s; + while ((*s != ' ') && (*s != 0)) + s++; + name_len = s - name; + + line = NULL; + if (*s != 0) { + while (*s == ' ') + s++; + if (*s != 0) + line = s; + } + + for (i = 0; i < ALIAS_TABLE_LEN; i++) { + if (alias_table[i].name == 0) { + + alias_table[i].name = strndup(name, name_len); + alias_table[i].cmd = strdup(line); + f_add_alias_command(alias_table[i].name); + break; + } else { + if (strncmp(alias_table[i].name, name, name_len) == 0) { + if (alias_table[i].cmd) + free(alias_table[i].cmd); + alias_table[i].cmd = strdup(line); + break; + } + } + } + + sendcrlf(conn); +} + +void f_alias_show(struct cli_conn *conn, const char *s) +{ + int i; + + (void)s; + ofp_sendf(conn->fd, "Alias Command\r\n"); + for (i = 0; i < ALIAS_TABLE_LEN; i++) { + if (alias_table[i].name != 0) { + ofp_sendf(conn->fd, "%-10s %s\r\n", + alias_table[i].name, + alias_table[i].cmd); + } else + break; + } + sendcrlf(conn); +} + +void f_help_alias(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_sendf(conn->fd, + "Add an alias for a command:\r\n" + " alias set \"\"\r\n" + " Example:\r\n" + " alias set ll \"loglevel show\"\r\n\r\n"); + + ofp_sendf(conn->fd, + "Show alias table:\r\n" + " alias show\r\n\r\n"); + + ofp_sendf(conn->fd, + "Show (this) help:\r\n" + " alias help\r\n\r\n"); + sendcrlf(conn); +} diff --git a/src/cli/ofp_cli_arp.c b/src/cli/ofp_cli_arp.c new file mode 100644 index 00000000..c54a920a --- /dev/null +++ b/src/cli/ofp_cli_arp.c @@ -0,0 +1,66 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_log.h" +#include "ofpi_cli.h" +#include "ofpi_route.h" +#include "ofpi_arp.h" +#include "ofpi_util.h" + + +void f_arp(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_show_routes(conn->fd, OFP_SHOW_ARP); + ofp_arp_show_saved_packets(conn->fd); + sendcrlf(conn); +} + +void f_arp_flush(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_arp_init_tables(); + sendcrlf(conn); +} + +void f_arp_cleanup(struct cli_conn *conn, const char *s) +{ + int cli = 1; + + (void)s; + + ofp_arp_cleanup(&cli); + sendcrlf(conn); +} + +void f_help_arp(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_sendf(conn->fd, + "Show arp table:\r\n" + " arp\r\n\r\n"); + + ofp_sendf(conn->fd, + "Flush arp table:\r\n" + " arp flush\r\n\r\n"); + + ofp_sendf(conn->fd, + "Clean old entries from arp table:\r\n" + " arp cleanup\r\n\r\n"); + + ofp_sendf(conn->fd, + "Show (this) help:\r\n" + " arp help\r\n\r\n"); + + sendcrlf(conn); +} diff --git a/src/cli/ofp_cli_debug.c b/src/cli/ofp_cli_debug.c new file mode 100644 index 00000000..191705a7 --- /dev/null +++ b/src/cli/ofp_cli_debug.c @@ -0,0 +1,182 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_debug.h" +#include "ofpi_cli.h" +#include "ofpi_util.h" + +/* debug NUMBER */ +void f_debug(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_debug_flags = (ofp_debug_flags & + (~OFP_DEBUG_PCAP_PORT_MASK)) | + strtol(s, NULL, 0); + + if ((ofp_debug_flags & OFP_DEBUG_CAPTURE) && + (ofp_debug_capture_ports == 0)) { + + /*enable capture on first port*/ + ofp_debug_capture_ports = 0x1; + } + sendcrlf(conn); +} + +/* debug show */ +void f_debug_show(struct cli_conn *conn, const char *s) +{ + int i; + char filename[128]; + + (void)s; + + if (ofp_debug_flags & (OFP_DEBUG_PRINT_RECV_NIC | + OFP_DEBUG_PRINT_SEND_NIC | + OFP_DEBUG_PRINT_RECV_KNI | + OFP_DEBUG_PRINT_SEND_KNI)) { + ofp_sendf(conn->fd, + "Printing traffic on file%s:%s%s%s%s\r\n", + ofp_debug_flags & OFP_DEBUG_PRINT_CONSOLE ? + " (and console)" : "", + ofp_debug_flags & OFP_DEBUG_PRINT_RECV_NIC ? + " ODP-to-FP" : "", + ofp_debug_flags & OFP_DEBUG_PRINT_SEND_NIC ? + " FP-to-ODP" : "", + ofp_debug_flags & OFP_DEBUG_PRINT_RECV_KNI ? + " FP-to-SP" : "", + ofp_debug_flags & OFP_DEBUG_PRINT_SEND_KNI ? + " SP-to-ODP" : ""); + ofp_sendf(conn->fd, " Printing file: " + DEFAULT_DEBUG_TXT_FILE_NAME"\r\n"); + } else { + ofp_sendf(conn->fd, "Printing NO traffic.\r\n"); + } + + if (ofp_debug_flags & OFP_DEBUG_CAPTURE) { + ofp_sendf(conn->fd, + "Capturing traffic from ports%s:", + ofp_debug_capture_ports & + OFP_DEBUG_PCAP_CONF_ADD_INFO ? + " (with info)" : ""); + + for (i = 0; i < 30; i++) + if (ofp_debug_capture_ports & (1 << i)) + ofp_sendf(conn->fd, " %d", i); + + ofp_sendf(conn->fd, "\r\n"); + + ofp_get_capture_file(filename, sizeof(filename)); + + ofp_sendf(conn->fd, " Capturing file: %s\r\n", filename); + } else { + ofp_sendf(conn->fd, "Capturing NO traffic.\r\n"); + } + + sendcrlf(conn); +} + +/* debug capture NUMBER */ +void f_debug_capture(struct cli_conn *conn, const char *s) +{ + ofp_debug_capture_ports = strtol(s, NULL, 0); + + if (ofp_debug_capture_ports) + ofp_debug_flags |= OFP_DEBUG_CAPTURE; + else + ofp_debug_flags &= ~OFP_DEBUG_CAPTURE; + + sendcrlf(conn); +} + +/* debug capture info NUMBER */ +void f_debug_info(struct cli_conn *conn, const char *s) +{ + if (atoi(s)) + ofp_debug_capture_ports |= OFP_DEBUG_PCAP_CONF_ADD_INFO; + else + ofp_debug_capture_ports &= ~OFP_DEBUG_PCAP_CONF_ADD_INFO; + + sendcrlf(conn); +} + + /* debug capture file STRING */ +void f_debug_capture_file(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_set_capture_file(s); + sendcrlf(conn); +} + +/* debug */ +/* debug help */ +/* help debug*/ +void f_help_debug(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_sendf(conn->fd, + "Show debug settings\r\n" + " debug show\r\n\r\n"); + + ofp_sendf(conn->fd, + "Set options for printing traffic on file" + " (and console) in text format and capturing traffic" + " on file in pcap format\r\n" + " debug \r\n" + " bit 0: print packets from ODP to FP\r\n" + " bit 1: print packets from FP to ODP\r\n" + " bit 2: print packets from FP to SP\r\n" + " bit 3: print packets from SP to ODP\r\n" + " bit 4: print packets to console\r\n" + " bit 6: capture packets to pcap file\r\n" + " - set/reset automatically by capture function\r\n" + " Default text file name: '" + DEFAULT_DEBUG_TXT_FILE_NAME"'\r\n" + " Default capture file name: '" + DEFAULT_DEBUG_PCAP_FILE_NAME"'\r\n" + " Example: Print SP traffic:\r\n" + " debug 0xc\r\n" + " (numbers can be in decimal or hex format)\r\n\r\n"); + + ofp_sendf(conn->fd, + "Set packet capture port(s).\r\n" + " debug capture \r\n" + " bit 0: port 0\r\n" + " bit 1: port 1\r\n" + " etc.\r\n" + " Note: \r\n" + " A zero value will disable packet capture.\r\n" + " A non-zero value will enable packet capture.\r\n" + " Default capture file is '"DEFAULT_DEBUG_PCAP_FILE_NAME"'\r\n" + " Old file is overwritten when the fastpath starts.\r\n" + " Example: Save traffic of ports 0, 2, and 3:\r\n" + " debug capture 0xd\r\n\r\n"); + + ofp_sendf(conn->fd, + "Set packet capture file or fifo\r\n" + " debug capture file \r\n" + " Example:\r\n" + " debug capture file /root/my-fifo\r\n\r\n"); + + ofp_sendf(conn->fd, + "Set the first octet of the destination MAC address " + "of captured packet to 'port info' value.\r\n" + " debug capture info <1 or 0>\r\n" + " 1: overwrite MAC address octet\r\n" + " 0: no overwriting\r\n" + " Port info format:\r\n" + " bits 0-5: port number\r\n" + " bit 6: 1 = SP traffic\r\n" + " bit 7: 0 = received, 1 = transmitted packet\r\n" + " Example: tcpdump line:\r\n" + " '11:36:56.851469 b4:b5:2f:63:05:e5 > c0:9d:67:1a:97:7e, ethe...'\r\n" + " 1st octet of dst = 0xc0 -> port = 0, tx via KNI\r\n\r\n"); + sendcrlf(conn); +} diff --git a/src/cli/ofp_cli_ifconfig.c b/src/cli/ofp_cli_ifconfig.c new file mode 100644 index 00000000..e04ac052 --- /dev/null +++ b/src/cli/ofp_cli_ifconfig.c @@ -0,0 +1,239 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_log.h" +#include "ofpi_cli.h" +#include "ofpi_portconf.h" +#include "ofpi_util.h" + + +/* "ifconfig" */ +/* "ifconfig show" */ +/* "show ifconfig" */ +void f_ifconfig_show(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_show_interfaces(conn->fd); + + sendcrlf(conn); +} + +/* "ifconfig help" */ +/* "help ifconfig" */ +void f_help_ifconfig(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_sendf(conn->fd, "Show interfaces:\r\n" + " ifconfig [show]\r\n\r\n"); + + ofp_sendf(conn->fd, "Create interface:\r\n" + " ifconfig [-A inet4] DEV IP4NET\r\n" + " DEV: ethernet interface name\r\n" + " IP4NET: network address in a.b.c.d/e format\r\n" + " Example:\r\n" + " ifconfig %s0 192.168.200.1/24\r\n\r\n", + OFP_IFNAME_PREFIX); + + ofp_sendf(conn->fd, "Create interface on virtual route table:\r\n" + " ifconfig [-A inet4] DEV IP4NET vrf VRF\r\n" + " DEV: ethernet interface name\r\n" + " IP4NET: network address in a.b.c.d/e format\r\n" + " VRF: number\r\n" + " Example:\r\n" + " ifconfig %s0 192.168.200.1/24 vrf 2\r\n\r\n", + OFP_IFNAME_PREFIX); + ofp_sendf(conn->fd, "Create GRE tunnel:\r\n" + " ifconfig tunnel gre DEV local IP4ADDR remote IP4ADDR peer IP4ADDR IP4ADDR\r\n" + " DEV: gre interface name\r\n" + " local: tunnel local ip address in a.b.c.d format\r\n" + " remote: tunnel remote ip address in a.b.c.d format\r\n" + " peer: pointtopoint ip address in a.b.c.d format\r\n" + " IP4ADDR: interface ip address in a.b.c.d format\r\n" + " Example:\r\n" + " ifconfig %s100 local 192.168.200.1 remote 192.168.200.2 peer 10.10.10.2 10.10.10.1\r\n\r\n", + OFP_GRE_IFNAME_PREFIX); + ofp_sendf(conn->fd, "Create GRE tunnel on virtual route table :\r\n" + " ifconfig tunnel gre DEV local IP4ADDR remote IP4ADDR peer IP4ADDR IP4ADDR vrf VRF\r\n" + " DEV: gre interface name\r\n" + " local: tunnel local ip address in a.b.c.d format\r\n" + " remote: tunnel remote ip address in a.b.c.d format\r\n" + " peer: pointtopoint ip address in a.b.c.d format\r\n" + " IP4ADDR: interface ip address in a.b.c.d format\r\n" + " vrf: number\r\n" + " Example:\r\n" + " ifconfig %s100 local 192.168.200.1 remote 192.168.200.2 peer 10.10.10.2 10.10.10.1 vrf 2\r\n\r\n", + OFP_GRE_IFNAME_PREFIX); +#ifdef INET6 + ofp_sendf(conn->fd, "Create IPv6 interface:\r\n" + " ifconfig -A inet6 DEV IP6NET\r\n" + " DEV: ethernet interface name\r\n" + " IP6NET: network address in a:b:c:d:e:f:g:h/n or" + " compressed format\r\n" + " Example:\r\n" + " ifconfig -A inet6 %s0 2000:1baf::/64\r\n\r\n", + OFP_IFNAME_PREFIX); +#endif /* INET6 */ + ofp_sendf(conn->fd, "Delete interface:\r\n" + " ifconfig DEV down\r\n" + " DEV: ethernet interface name\r\n" + " Example:\r\n" + " ifconfig %s0 down\r\n\r\n", + OFP_IFNAME_PREFIX); + + ofp_sendf(conn->fd, "Show (this) help:\r\n" + " ifconfig help\r\n\r\n"); + + sendcrlf(conn); +} + +/* "ifconfig [-A inet 4] DEV IP4NET";*/ +void f_ifconfig(struct cli_conn *conn, const char *s) +{ + + char dev[16]; + int port, a, b, c, d, m, vlan, vrf = 0; + uint32_t addr; + const char *err; + + if (sscanf(s, "%s %d.%d.%d.%d/%d %d", dev, &a, &b, + &c, &d, &m, &vrf) < 6) + return; + addr = odp_cpu_to_be_32((a << 24) | (b << 16) | (c << 8) | d); + port = ofp_name_to_port_vlan(dev, &vlan); + + if (port == GRE_PORTS) { + ofp_sendf(conn->fd, "Invalid device name.\r\n"); + return; + } + + err = ofp_config_interface_up_v4(port, vlan, vrf, + addr, m); + if (err != NULL) + ofp_sendf(conn->fd, err); + sendcrlf(conn); +} + +/* "ifconfig tunnel gre DEV local IP4ADDR remote IP4ADDR peer IP4ADDR IP4ADDR vrf NUMBER";*/ +void f_ifconfig_tun(struct cli_conn *conn, const char *s) +{ + char dev[16], loc[16], rem[16], ip[16], peer[16]; + uint32_t tun_loc, tun_rem, addr, p2p; + int port, vlan, vrf = 0, masklen = 32; + const char *err; + + if (sscanf(s, "%s %s %s %s %s %d", dev, loc, rem, peer, ip, &vrf) < 5) + return; + + port = ofp_name_to_port_vlan(dev, &vlan); + + if (port != GRE_PORTS) { + ofp_sendf(conn->fd, "Invalid device name.\r\n"); + sendcrlf(conn); + return; + } + + if (!ip4addr_get(loc, &tun_loc)) + return; + if (!ip4addr_get(rem, &tun_rem)) + return; + if (!ip4addr_get(peer, &p2p)) + return; + if (!ip4addr_get(ip, &addr)) + return; + + + err = ofp_config_interface_up_tun(port, vlan, vrf, tun_loc, tun_rem, + p2p, addr, masklen); + if (err != NULL) + ofp_sendf(conn->fd, err); + sendcrlf(conn); +} + +/* ifconfig -A inet6 DEV IP6NET */ +#ifdef INET6 +void f_ifconfig_v6(struct cli_conn *conn, const char *s) +{ + char dev[16]; + uint8_t addr6[16]; + int prefix, port, vlan; + const char *tk; + const char *tk_end; + const char *err; + + /*get DEV*/ + tk = s; + tk_end = strstr(tk, " "); + + if (!tk_end || ((int)(tk_end - tk) > (int)(sizeof(dev) - 1))) { + ofp_sendf(conn->fd, "Invalid device name.\r\n"); + sendcrlf(conn); + return; + } + memcpy(dev, tk, tk_end - tk); + dev[tk_end - tk] = 0; + + port = ofp_name_to_port_vlan(dev, &vlan); + if (port == -1 || port == GRE_PORTS) { + ofp_sendf(conn->fd, "Invalid device name.\r\n"); + sendcrlf(conn); + return; + } + + /*get IP6NET address*/ + tk = tk_end + 1; + tk_end = strstr(tk, "/"); + + if (!tk_end || tk_end - tk > 40) { + ofp_sendf(conn->fd, "Invalid IP6NET address.\r\n"); + sendcrlf(conn); + return; + } + + if (!ip6addr_get(tk, tk_end - tk, addr6)) { + ofp_sendf(conn->fd, "Invalid IP6NET address.\r\n"); + sendcrlf(conn); + return; + } + + /* get IP6NET prefix len*/ + tk = tk_end + 1; + if (sscanf(tk, "%d", &prefix) < 1) { + ofp_sendf(conn->fd, "Invalid IP6NET prefix.\r\n"); + sendcrlf(conn); + return; + } + + err = ofp_config_interface_up_v6(port, vlan, addr6, prefix); + if (err != NULL) + ofp_sendf(conn->fd, err); + sendcrlf(conn); +} +#endif /* INET6 */ + +void f_ifconfig_down(struct cli_conn *conn, const char *s) +{ + /* "ifconfig DEV down"; */ + char dev[16]; + int port, vlan; + const char *err; + + if (sscanf(s, "%s", dev) < 1) + return; + port = ofp_name_to_port_vlan(dev, &vlan); + + err = ofp_config_interface_down(port, vlan); + + if (err != NULL) + ofp_sendf(conn->fd, err); + sendcrlf(conn); +} diff --git a/src/cli/ofp_cli_log.c b/src/cli/ofp_cli_log.c new file mode 100644 index 00000000..ef28ee3f --- /dev/null +++ b/src/cli/ofp_cli_log.c @@ -0,0 +1,68 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_cli.h" +#include "ofpi_log.h" +#include "ofpi_util.h" + +const char *loglevel_descript[] = { + "abort", + "error", + "info", + "debug" +}; + +/* loglevel help */ +/* help loglevel */ +void f_help_loglevel(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_sendf(conn->fd, "Show log level\r\n" + " loglevel show\r\n"); + ofp_sendf(conn->fd, "Set log level\r\n" + " loglevel set \r\n" + " Example: loglevel set debug\r\n"); + ofp_sendf(conn->fd, "Show log level help (this help)\r\n" + " loglevel help\r\n"); + + sendcrlf(conn); +} + +/* loglevel */ +/* loglevel show */ +void f_loglevel_show(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_sendf(conn->fd, "Log level: %s\r\n", + loglevel_descript[ofp_loglevel]); + + sendcrlf(conn); +} + +/* loglevel */ +void f_loglevel(struct cli_conn *conn, const char *s) +{ + int i; + + for (i = 0; i <= OFP_LOG_DBG; i++) { + if (strncmp(loglevel_descript[i], s, + strlen(loglevel_descript[i])) == 0) { + ofp_loglevel = i; + sendcrlf(conn); + return; + } + } + + ofp_sendf(conn->fd, "Invalid value!\r\nUsage:\r\n"); + + f_help_loglevel(conn, NULL); +} diff --git a/src/cli/ofp_cli_route.c b/src/cli/ofp_cli_route.c new file mode 100644 index 00000000..a43bcbea --- /dev/null +++ b/src/cli/ofp_cli_route.c @@ -0,0 +1,372 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_cli.h" +#include "ofpi_route.h" +#include "ofpi_util.h" +#include "ofpi_log.h" + +/* route show */ +void f_route_show(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_show_routes(conn->fd, OFP_SHOW_ROUTES); + + sendcrlf(conn); +} + +/* route add IP4NET gw IP4ADDR dev DEV */ +/* route -A inet4 add IP4NET gw IP4ADDR dev DEV */ +void f_route_add(struct cli_conn *conn, const char *s) +{ + uint32_t gwaddr, destaddr; + int a, b, c, d, e, f, g, h, port, mlen, vlan; + char dev[16]; + struct ofp_route_msg msg; + + if (sscanf(s, "%d.%d.%d.%d/%d %d.%d.%d.%d %s", + &a, &b, &c, &d, &mlen, + &e, &f, &g, &h, dev) != 10) + return; + destaddr = odp_cpu_to_be_32((a << 24) | (b << 16) | (c << 8) | d); + gwaddr = odp_cpu_to_be_32((e << 24) | (f << 16) | (g << 8) | h); + + port = ofp_name_to_port_vlan(dev, &vlan); + if (port < 0 || port >= ofp_get_num_ports()) { + ofp_sendf(conn->fd, "Invalid port!\r\n"); + sendcrlf(conn); + return; + } + + msg.type = OFP_ROUTE_ADD; + msg.dst = destaddr; + msg.masklen = mlen; + msg.vrf = 0; + msg.gw = gwaddr; + msg.port = port; + msg.vlan = vlan; + + ofp_set_route(&msg); + + sendcrlf(conn); +} + +/* route add vrf NUMBER IP4NET gw IP4ADDR dev DEV */ +/* route -A inet4 add vrf NUMBER IP4NET gw IP4ADDR dev DEV */ +void f_route_add_vrf(struct cli_conn *conn, const char *s) +{ + uint32_t gwaddr, destaddr; + int a, b, c, d, e, f, g, h, port, mlen, vrf, vlan; + char dev[16]; + struct ofp_route_msg msg; + + if (sscanf(s, "%d %d.%d.%d.%d/%d %d.%d.%d.%d %s", + &vrf, &a, &b, &c, &d, &mlen, + &e, &f, &g, &h, dev) != 11) + return; + destaddr = odp_cpu_to_be_32((a << 24) | (b << 16) | (c << 8) | d); + gwaddr = odp_cpu_to_be_32((e << 24) | (f << 16) | (g << 8) | h); + + port = ofp_name_to_port_vlan(dev, &vlan); + if (port < 0 || port >= ofp_get_num_ports()) { + ofp_sendf(conn->fd, "Invalid port!\r\n"); + sendcrlf(conn); + return; + } + + msg.type = OFP_ROUTE_ADD; + msg.dst = destaddr; + msg.masklen = mlen; + msg.vrf = vrf; + msg.gw = gwaddr; + msg.port = port; + msg.vlan = vlan; + ofp_set_route(&msg); + + sendcrlf(conn); +} + +/* route -A inet6 add IP6NET gw IP6ADDR dev DEV */ +#ifdef INET6 +void f_route_add_v6(struct cli_conn *conn, const char *s) +{ + struct ofp_route_msg msg; + int port, vlan, mlen; + const char *tk; + const char *tk_end; + const char *last; + + memset(&msg, 0, sizeof(msg)); + last = s + strlen(s); + +/* get IP6NET address*/ + tk = s; + tk_end = strstr(tk, "/"); + if (!tk_end) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + + if (!ip6addr_get(tk, tk_end - tk, msg.dst6)) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + +/* get IP6NET prefix len*/ + tk = tk_end + 1; + if (tk >= last) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + + tk_end = strstr(tk, " "); + if (!tk_end || (tk == tk_end)) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + + mlen = atoi(tk); + +/* get IP6ADDR */ + tk = tk_end + 1; + if (tk >= last) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + tk_end = strstr(tk, " "); + if (tk_end == NULL) { + ofp_sendf(conn->fd, "Invalid IP6ADDR\r\n"); + sendcrlf(conn); + return; + } + + if (!ip6addr_get(tk, tk_end - tk, msg.gw6)) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + +/* get DEV */ + tk = tk_end + 1; + if (tk >= last) { + ofp_sendf(conn->fd, "Invalid DEV\r\n"); + sendcrlf(conn); + return; + } + tk_end = last; + + port = ofp_name_to_port_vlan(tk, &vlan); + if (port < 0 || port >= ofp_get_num_ports()) { + ofp_sendf(conn->fd, "Invalid port!\r\n"); + sendcrlf(conn); + return; + } + + + msg.type = OFP_ROUTE6_ADD; + msg.masklen = mlen; + msg.vrf = 0; + msg.port = port; + msg.vlan = vlan; + + ofp_set_route(&msg); + + sendcrlf(conn); +} +#endif /* INET6*/ + +/* route delete IP4NET */ +/* route -A inet4 delete IP4NET */ +void f_route_del(struct cli_conn *conn, const char *s) +{ + uint32_t destaddr; + int a, b, c, d, mlen; + struct ofp_route_msg msg; + + if (sscanf(s, "%d.%d.%d.%d/%d", + &a, &b, &c, &d, &mlen) != 5) + return; + destaddr = odp_cpu_to_be_32((a << 24) | (b << 16) | (c << 8) | d); + + msg.type = OFP_ROUTE_DEL; + msg.vrf = 0; + msg.dst = destaddr; + msg.masklen = mlen; + ofp_set_route(&msg); + + sendcrlf(conn); +} + +/* route delete vrf NUMBER IP4NET */ +/* route -A inet4 delete vrf NUMBER IP4NET */ +void f_route_del_vrf(struct cli_conn *conn, const char *s) +{ + uint32_t destaddr; + int a, b, c, d, mlen, vrf; + struct ofp_route_msg msg; + + if (sscanf(s, "%d %d.%d.%d.%d/%d", + &vrf, &a, &b, &c, &d, &mlen) != 6) + return; + destaddr = odp_cpu_to_be_32((a << 24) | (b << 16) | (c << 8) | d); + + msg.type = OFP_ROUTE_DEL; + msg.vrf = vrf; + msg.dst = destaddr; + msg.masklen = mlen; + ofp_set_route(&msg); + + sendcrlf(conn); +} + +/* route -A inet6 delete IP6NET */ +#ifdef INET6 +void f_route_del_v6(struct cli_conn *conn, const char *s) +{ + struct ofp_route_msg msg; + int mlen; + const char *tk; + const char *tk_end; + const char *last; + + memset(&msg, 0, sizeof(msg)); + last = s + strlen(s); + +/* get IP6NET address*/ + tk = s; + tk_end = strstr(tk, "/"); + if (!tk_end) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + + if (!ip6addr_get(tk, tk_end - tk, msg.dst6)) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + +/* get IP6NET prefix len*/ + tk = tk_end + 1; + if (tk >= last) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + + tk_end = last; + if (tk == tk_end) { + ofp_sendf(conn->fd, "Invalid IP6NET\r\n"); + sendcrlf(conn); + return; + } + + mlen = atoi(tk); + + msg.type = OFP_ROUTE6_DEL; + msg.vrf = 0; + msg.masklen = mlen; + ofp_set_route(&msg); + + sendcrlf(conn); +} +#endif /* INET6 */ + +/* route add from DEV to DEV */ +void f_route_add_dev_to_dev(struct cli_conn *conn, const char *s) +{ + + char dev[16], from[16]; + int from_port, to_port, vlan; + + if (sscanf(s, "%s %s", from, dev) != 2) + return; + from_port = ofp_name_to_port_vlan(from, &vlan); + to_port = ofp_name_to_port_vlan(dev, &vlan); + from_port = from_port; /* remove warnings*/ + to_port = to_port; + /*set_port_params(16, ofp_ifnet_data[32].address, + ufp_ifnet_data[32].masklen, ufp_ifnet_data[32].mac, + ufp_ifnet_data[32].link_local); + add_to_next_hop_table(ADD_ENTRY, ufp_ifnet_data[to_port].address, + from_port, ufp_ifnet_data[to_port].masklen, + NH_FLAGS_TO_LOCAL_SEGMENT, to_port, 0, to_port, NULL, 0);*/ + sendcrlf(conn); +} + +void f_help_route(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_sendf(conn->fd, "Show configured routes:\r\n" + " route show\r\n\r\n"); + + ofp_sendf(conn->fd, "Add IPv4 route:\r\n" + " route [-A inet4 ] add IP4NET gw IP4ADDR dev DEV\r\n" + " IP4NET: network address in a.b.c.d/e format\r\n" + " IP4ADDR: IP address in a.b.c.d format\r\n" + " DEV: ethernet interface name\r\n" + " Example:\r\n" + " route add 192.168.200.0/24 gw 192.168.100.1" + " dev %s0\r\n\r\n", OFP_IFNAME_PREFIX); + + ofp_sendf(conn->fd, "Delete IPv4 route:\r\n" + " route [-A inet4] delete IP4NET\r\n" + " IP4NET: network address in a.b.c.d/e format\r\n" + " Example:\r\n" + " route delete 192.168.200.0/24\r\n\r\n"); + + ofp_sendf(conn->fd, "Add IPv4 route to virtual route table:\r\n" + " route [-A inet4 ] add vrf VRF IP4NET gw IP4ADDR dev DEV\r\n" + " VRF: number\r\n" + " IP4NET: network address in a.b.c.d/n format\r\n" + " IP4ADDR: IP address in a.b.c.d format\r\n" + " DEV: ethernet interface name\r\n" + " Example:\r\n" + " route add vrf 2 192.168.200.0/24 gw 192.168.100.1" + " dev %s0\r\n\r\n", OFP_IFNAME_PREFIX); + + ofp_sendf(conn->fd, "Delete IPv4 route from virtual route table:\r\n" + " route [-A inet4] delete vrf VRF IP4NET\r\n" + " VRF: number\r\n" + " IP4NET: network address in a.b.c.d/e format\r\n" + " Example:\r\n" + " route del vrf 2 192.168.200.0/24\r\n\r\n"); +#ifdef INET6 + ofp_sendf(conn->fd, "Add IPv6 route:\r\n" + " route -A inet6 add IP6NET gw IP6ADDR dev DEV\r\n" + " IP6NET: network address in a:b:c:d:e:f:g:h/n or" + " compressed format\r\n" + " IP6ADDR: IPv6 address in a:b:c:d:e:f:g:h or" + " compressed format\r\n" + " DEV: ethernet interface name\r\n" + " Example:\r\n" + " route -A inet6 add 2000:1baf::/64 gw 2001:db8:0:f101:0:0:0:1" + " dev %s0\r\n\r\n", OFP_IFNAME_PREFIX); + + ofp_sendf(conn->fd, "Delete IPv6 route:\r\n" + " route -A inet6 delete IP6NET\r\n" + " IP6NET: network address in a:b:c:d:e:f:g:h/n or" + " compressed format\r\n" + " Example:\r\n" + " route -A inet6 delete 2000:1baf::/64\r\n\r\n"); +#endif /* INET6 */ + ofp_sendf(conn->fd, "Show (this) help.\r\n" + " route help\r\n\r\n"); + sendcrlf(conn); +} diff --git a/src/cli/ofp_cli_stat.c b/src/cli/ofp_cli_stat.c new file mode 100644 index 00000000..05e14a70 --- /dev/null +++ b/src/cli/ofp_cli_stat.c @@ -0,0 +1,151 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_log.h" +#include "ofpi_cli.h" +#include "ofpi_avl.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_stat.h" +#include "ofpi_util.h" + +static void print_latency_entry(struct cli_conn *conn, + struct ofp_packet_stat *st, int core, int entry) +{ + int j; + uint64_t input_latency = st->per_core[core].input_latency[entry]; + int input_latency_log = ilog2(input_latency); + + ofp_sendf(conn->fd, "\r\n%3d| ", entry); + + if (input_latency == 0) + return; + + if (input_latency < 10000) + ofp_sendf(conn->fd, "[%05d]", input_latency); + else + ofp_sendf(conn->fd, "[99999]"); + + for (j = 0; j < input_latency_log + 1; j++) + ofp_sendf(conn->fd, "*"); +} + +void f_stat_show(struct cli_conn *conn, const char *s) +{ + struct ofp_packet_stat *st = ofp_get_packet_statistics(); + int i, j, k; + int last_entry; + + (void)s; + + if (!st) + return; + + ofp_sendf(conn->fd, "Settings: \r\n" + " compute latency - %s\r\n\r\n", + ofp_stat_flags & OFP_STAT_COMPUTE_LATENCY ? "yes" : "no"); + +#define PRINT_STAT(_st, _s, _n) do { int i; \ + ofp_sendf(conn->fd, " %16s:", _s); \ + for (i = 0; i < odp_cpu_count(); i++) \ + ofp_sendf(conn->fd, " %10d", (_st)->per_core[i]._n); \ + ofp_sendf(conn->fd, "\r\n"); } \ + while (0) + + ofp_sendf(conn->fd, "Packets:\r\n Core:"); + + for (i = 0; i < odp_cpu_count(); i++) + ofp_sendf(conn->fd, " %10d", i); + ofp_sendf(conn->fd, "\r\n\r\n"); + + PRINT_STAT(st, "ODP to FP", rx_fp); + PRINT_STAT(st, "FP to ODP", tx_fp); + PRINT_STAT(st, "FP to SP", rx_sp); + PRINT_STAT(st, "SP to ODP", tx_sp); + + PRINT_STAT(st, "Tx frag", tx_eth_frag); + PRINT_STAT(st, "Rx IP frag", rx_ip_frag); + PRINT_STAT(st, "RX IP reass", rx_ip_reass); + +/*TODO: print interface related stats colected from ODP or linux IP stack*/ + + ofp_sendf(conn->fd, "\r\nAllocated memory:\r\n"); + ofp_print_avl_stat(conn->fd); + ofp_print_rt_stat(conn->fd); + + if (ofp_stat_flags & OFP_STAT_COMPUTE_LATENCY) { + ofp_sendf(conn->fd, "\r\n Latency graph | log/log scale | " + "X = occurrences, Y = cycles"); + + for (k = 0; k < odp_cpu_count(); k++) { + ofp_sendf(conn->fd, "\r\nCore %d:\r\n", k); + + /* Skip to the first entry where there's data */ + for (i = 0; i < 64; i++) + if (st->per_core[k].input_latency[i] != 0) + break; + + if (i == 64) + continue; + + /* Check what's the last entry with data */ + last_entry = i; + for (j = i; j < 64; j++) + if (st->per_core[k].input_latency[j]) + last_entry = j; + + /* Now we have cut the ends with zeros */ + for (; i < last_entry + 1; i++) + print_latency_entry(conn, st, k, i); + } + } + sendcrlf(conn); +} + +void f_stat_set(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_stat_flags = strtol(s, NULL, 0); + + sendcrlf(conn); +} +void f_stat_clear(struct cli_conn *conn, const char *s) +{ + struct ofp_packet_stat *st = ofp_get_packet_statistics(); + + (void)s; + + memset(st, 0, sizeof(struct ofp_packet_stat)); + + sendcrlf(conn); +} + +void f_help_stat(struct cli_conn *conn, const char *s) +{ + (void)s; + + ofp_sendf(conn->fd, "Show statistics:\r\n" + " stat [show]\r\n\r\n"); + + ofp_sendf(conn->fd, "Set options for statistics:\r\n" + " stat set \r\n" + " bit 0: compute packets latency\r\n" + " Example:\r\n" + " stat set 0x1\r\n\r\n"); + + ofp_sendf(conn->fd, "Clear statistics:\r\n" + " stat clear\r\n\r\n"); + + ofp_sendf(conn->fd, "Show (this) help:\r\n" + " stat help\r\n\r\n"); + + sendcrlf(conn); +} diff --git a/src/cli/ofp_cli_sysctl.c b/src/cli/ofp_cli_sysctl.c new file mode 100644 index 00000000..bc744d6a --- /dev/null +++ b/src/cli/ofp_cli_sysctl.c @@ -0,0 +1,273 @@ +/*- + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 ENEA Software AB + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "ofpi_log.h" +#include "ofpi_cli.h" +#include "ofpi_route.h" +#include "ofpi_arp.h" +#include "ofpi_util.h" +#include "ofpi_sysctl.h" + + +void f_sysctl_dump(struct cli_conn *conn, const char *s) +{ + (void)s; + ofp_sysctl_write_tree(conn->fd); + sendcrlf(conn); +} + +void f_sysctl_read(struct cli_conn *conn, const char *s) +{ + int oid[OFP_CTL_MAXNAME]; + size_t oidlen; + uint64_t old[32]; + size_t oldlen; + size_t retval, plen; + int error; + int slen; + char str[128], *p; + struct ofp_sysctl_oid *noid; + int nindx; + struct ofp_sysctl_oid_list *l; + + strncpy(str, s, sizeof(str)); + str[sizeof(str)-1] = 0; + p = strchr(str, ' '); + if (p) + *p = 0; + slen = strlen(str); + + if (slen == 0) { + l = &sysctl__children; + goto err; + } + + if (!strncmp(s, "-a", 2)) { + ofp_sysctl_write_tree(conn->fd); + return; + } + + oid[0] = 0; /* sysctl internal magic */ + oid[1] = 3; /* name2oid */ + oidlen = sizeof(oid); + + error = ofp_kernel_sysctl(NULL, oid, 2, oid, &oidlen, + (const void *)str, slen, &plen, 0); + if (error) { + ofp_sendf(conn->fd, "Not valid string: '%s'\r\n", str); + str[0] = 0; + l = &sysctl__children; + goto err; + } + + plen /= sizeof(int); + + error = ofp_sysctl_find_oid(oid, plen, &noid, &nindx, NULL); + if (error) + return; + + if ((noid->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + ofp_sendf(conn->fd, "Not a variable.\r\n"); + l = noid->oid_arg1; + goto err; + } + + oldlen = sizeof(old) - 1; + + error = ofp_kernel_sysctl(NULL, oid, plen, old, &oldlen, + NULL, 0, &retval, 0); + + if (error) { + ofp_sendf(conn->fd, "Cannot access: '%s'", str); + sendcrlf(conn); + return; + } + + ofp_sendf(conn->fd, "%s = ", str); + + switch (noid->oid_kind & OFP_CTLTYPE) { + case OFP_CTLTYPE_INT: { + int *r = (int *)old; + ofp_sendf(conn->fd, "%d\r\n", *r); + break; + } + case OFP_CTLTYPE_UINT: { + unsigned int *r = (unsigned int *)old; + ofp_sendf(conn->fd, "%u\r\n", *r); + break; + } + case OFP_CTLTYPE_LONG: { + long int *r = (long int *)old; + ofp_sendf(conn->fd, "%ld\r\n", *r); + break; + } + case OFP_CTLTYPE_ULONG: { + unsigned long *r = (unsigned long *)old; + ofp_sendf(conn->fd, "%lu\r\n", *r); + break; + } + case OFP_CTLTYPE_STRING: { + char *r = (char *)old; + r[oldlen] = 0; + ofp_sendf(conn->fd, "%s\r\n", r); + break; + } + case OFP_CTLTYPE_U64: { + uint64_t *r = (uint64_t *)old; + ofp_sendf(conn->fd, "%lu\r\n", *r); + break; + } + case OFP_CTLTYPE_S64: { + int64_t *r = (int64_t *)old; + ofp_sendf(conn->fd, "%ld\r\n", *r); + break; + } + case OFP_CTLTYPE_OPAQUE: { + unsigned int i; + unsigned char *r = (unsigned char *)old; + for (i = 0; i < oldlen; i++) + ofp_sendf(conn->fd, " %02x", r[i]); + ofp_sendf(conn->fd, "\r\n"); + break; + } + default: ofp_sendf(conn->fd, "unknown type\r\n"); + } + + sendcrlf(conn); + return; + +err: + ofp_sendf(conn->fd, "Alternatives:\r\n"); + + struct ofp_sysctl_oid *oidp; + OFP_SLIST_FOREACH(oidp, l, oid_link) { + ofp_sendf(conn->fd, " %s%s%s (%s)\r\n", + str, str[0] ? "." : "", + oidp->oid_name, oidp->oid_descr); + } + + sendcrlf(conn); +} + +void f_sysctl_write(struct cli_conn *conn, const char *s) +{ + int oid[OFP_CTL_MAXNAME]; + size_t oidlen; + uint64_t new[32]; + size_t newlen; + size_t retval, plen; + int error; + int slen; + char str[128], *p, *p1; + struct ofp_sysctl_oid *noid; + int nindx; + struct ofp_sysctl_oid_list *l = &sysctl__children; + + strncpy(str, s, sizeof(str)); + str[sizeof(str)-1] = 0; + p = strchr(str, ' '); + if (p) { + *p = 0; + p++; + + p1 = strchr(p, ' '); + if (p1) + *p1 = 0; + } + + slen = strlen(str); + + if (slen == 0) { + l = &sysctl__children; + goto err; + } + + oid[0] = 0; /* sysctl internal magic */ + oid[1] = 3; /* name2oid */ + oidlen = sizeof(oid); + + error = ofp_kernel_sysctl(NULL, oid, 2, oid, &oidlen, + (const void *)str, slen, &plen, 0); + if (error) { + ofp_sendf(conn->fd, "Not valid string: '%s'\r\n", str); + str[0] = 0; + goto err; + } + + plen /= sizeof(int); + + error = ofp_sysctl_find_oid(oid, plen, &noid, &nindx, NULL); + if (error) + return; + + if ((noid->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + ofp_sendf(conn->fd, "Not a variable.\r\n"); + l = noid->oid_arg1; + goto err; + } + + switch (noid->oid_kind & OFP_CTLTYPE) { + case OFP_CTLTYPE_UINT: + case OFP_CTLTYPE_INT: { + int *r = (int *)new; + *r = atoi(p); + newlen = sizeof(int); + break; + } + case OFP_CTLTYPE_ULONG: + case OFP_CTLTYPE_LONG: { + long int *r = (long int *)new; + *r = atol(p); + newlen = sizeof(long int); + break; + } + case OFP_CTLTYPE_STRING: { + newlen = strlen(p); + if (newlen > sizeof(new) - 1) + newlen = sizeof(new) - 1; + p[newlen] = 0; + memcpy(new, p, newlen+1); + break; + } + case OFP_CTLTYPE_S64: + case OFP_CTLTYPE_U64: { + int64_t *r = (int64_t *)new; + *r = atoll(p); + newlen = sizeof(int64_t); + break; + } + default: ofp_sendf(conn->fd, "unsupported type for writing\r\n"); + goto err; + } + + error = ofp_kernel_sysctl(NULL, oid, plen, NULL, NULL, + new, newlen, &retval, 0); + + if (error) { + ofp_sendf(conn->fd, "Cannot write: '%s'", str); + sendcrlf(conn); + return; + } + + sendcrlf(conn); + return; + +err: + ofp_sendf(conn->fd, "Alternatives:\r\n"); + + struct ofp_sysctl_oid *oidp; + OFP_SLIST_FOREACH(oidp, l, oid_link) { + ofp_sendf(conn->fd, " %s%s%s\r\n", + str, str[0] ? "." : "", oidp->oid_name); + } + + sendcrlf(conn); +} diff --git a/src/ofp_arp.c b/src/ofp_arp.c new file mode 100644 index 00000000..322a75f5 --- /dev/null +++ b/src/ofp_arp.c @@ -0,0 +1,642 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + */ + +#include +#include +#include +#include + +#include "ofpi_portconf.h" +#include "ofpi_timer.h" +#include "ofpi_arp.h" +#include "ofpi_hash.h" +#include "ofpi_log.h" +#include "ofpi_util.h" + +#define ARP_SANITY_CHECK 1 + +#define NUM_SETS 2048 /* Must be power of two */ +#define NUM_ARPS (NUM_SETS * 4) + +#define NUM_PKTS 2048 /* number of saved packets waiting for arp reply */ + +#define SEC_USEC 1000000UL +#define CLEANUP_TIMER_INTERVAL (60 * SEC_USEC) +#define ENTRY_TIMEOUT (1200 * ODP_TIME_SEC) /* 20 minutes */ +#define ENTRY_UPD_TIMEOUT (2 * SEC_USEC) +#define ENTRY_USETIME_INVALID 0xFFFFFFFF +#define SAVED_PKT_TIMEOUT (10 * SEC_USEC) + +#if (ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN) +#define hashfunc ofp_hashlittle +#else +#define hashfunc ofp_hashbig +#endif + +/* + * Data + */ + +struct arp_entry_list { + struct arp_entry *slh_first; +}; /* OFP_SLIST_HEAD */ + +struct _arp { + struct arp_entry entries[NUM_ARPS] ODP_ALIGNED_CACHE; + struct arp_entry_list free_entries; + struct arp_entry_list table[NUM_SETS] ODP_ALIGNED_CACHE; + struct arp_cache cache[NUM_SETS] ODP_ALIGNED_CACHE; + odp_rwlock_t table_rwlock[NUM_SETS]; + odp_rwlock_t fr_ent_rwlock; +}; + +struct _pkt { + struct pkt_entry entries[NUM_PKTS] ODP_ALIGNED_CACHE; + struct pkt_list free_entries; + odp_rwlock_t fr_ent_rwlock; +}; + +struct ofp_arp_mem { + struct _arp arp; + struct _pkt pkt; +}; + +static __thread struct ofp_arp_mem *shm; + +/* + * Private functions + */ + +static inline uint32_t ipv4_hash(struct arp_key *key) +{ + uint32_t set = hashfunc(key, sizeof(*key), 0) & (NUM_SETS - 1); + + return set; +} + +static inline uint32_t set_key_and_hash(uint32_t vrf, uint32_t ipv4_addr, + struct arp_key *key) +{ + uint32_t set; + + key->vrf = vrf; + key->ipv4_addr = ipv4_addr; + set = ipv4_hash(key); + + return set; +} + +static inline void *entry_alloc(void) +{ + odp_rwlock_write_lock(&shm->arp.fr_ent_rwlock); + + struct arp_entry *entry = OFP_SLIST_FIRST(&shm->arp.free_entries); + + if (entry) + OFP_SLIST_REMOVE_HEAD(&shm->arp.free_entries, next); + + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.fr_ent_rwlock); + + return entry; +} + +static inline void entry_free(struct arp_entry *entry) +{ + memset(entry, 0, sizeof(*entry)); + entry->pkt_tmo = ODP_TIMER_INVALID; + + odp_rwlock_write_lock(&shm->arp.fr_ent_rwlock); + OFP_SLIST_INSERT_HEAD(&shm->arp.free_entries, entry, next); + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.fr_ent_rwlock); +} + +static inline struct arp_entry *arp_lookup(int set, struct arp_key *key) +{ + struct arp_entry *new; + + OFP_SLIST_FOREACH(new, &shm->arp.table[set], next) { + if (odp_likely((new->key.ipv4_addr == key->ipv4_addr) && + (new->key.vrf == key->vrf))) + return new; + } + + return NULL; +} + +static inline void *insert_new_entry(int set, struct arp_key *key) +{ + struct arp_entry *new; + + new = arp_lookup(set, key); + + if (odp_likely(new == NULL)) { + new = entry_alloc(); + + if (odp_unlikely(new == NULL)) + return NULL; + + new->key.ipv4_addr = key->ipv4_addr; + new->key.vrf = key->vrf; + new->usetime_upd_tmo = ODP_TIMER_INVALID; + OFP_SLIST_INSERT_HEAD(&shm->arp.table[set], new, next); + } + + return new; +} + +static inline void remove_entry(int set, struct arp_entry *entry) +{ + struct arp_cache *cache; + +/* remove from set */ + OFP_SLIST_REMOVE(&shm->arp.table[set], entry, arp_entry, next); + +/* remove from set's cache */ + cache = &shm->arp.cache[set]; + + if (ARP_IN_CACHE(cache, &entry->key)) + ARP_DEL_CACHE(cache); + +/* kill update timer*/ + odp_rwlock_write_lock(&entry->usetime_rwlock); + + if (entry->usetime_upd_tmo != ODP_TIMER_INVALID) { + ofp_timer_cancel(entry->usetime_upd_tmo); + entry->usetime_upd_tmo = ODP_TIMER_INVALID; + } + + odp_rwlock_write_unlock(&entry->usetime_rwlock); + +/* free */ + entry_free(entry); +} + +static inline void show_arp_entry(int fd, struct arp_entry *entry) +{ + uint64_t t, diff; + + t = odp_time_cycles(); + diff = odp_time_diff_cycles(odp_atomic_load_u64(&entry->usetime), t); + ofp_sendf(fd, "%3d %-15s %-17s %4u\r\n", + entry->key.vrf, + ofp_print_ip_addr(entry->key.ipv4_addr), + ofp_print_mac((uint8_t *)&entry->macaddr), + odp_time_cycles_to_ns(diff) / ODP_TIME_SEC); +} + +static inline void *pkt_entry_alloc(void) +{ + struct pkt_entry *pktentry; + + odp_rwlock_write_lock(&shm->pkt.fr_ent_rwlock); + + pktentry = OFP_SLIST_FIRST(&shm->pkt.free_entries); + + if (pktentry) + OFP_SLIST_REMOVE_HEAD(&shm->pkt.free_entries, next); + + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->pkt.fr_ent_rwlock); + + return pktentry; +} + +static inline void pkt_entry_free(struct pkt_entry *pktentry) +{ + memset(pktentry, 0, sizeof(*pktentry)); + + odp_rwlock_write_lock(&shm->pkt.fr_ent_rwlock); + OFP_SLIST_INSERT_HEAD(&shm->pkt.free_entries, pktentry, next); + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->pkt.fr_ent_rwlock); +} + +/* + * Public functions + */ +int ofp_arp_ipv4_insert(uint32_t ipv4_addr, unsigned char *ll_addr, + struct ofp_ifnet *dev) +{ + struct arp_entry *new; + struct arp_key key; + struct pkt_entry *pktentry; + struct pkt_list send_list; + uint32_t set; + uint64_t tnow; + + OFP_SLIST_INIT(&send_list); + + set = set_key_and_hash(dev->vrf, ipv4_addr, &key); + + odp_rwlock_write_lock(&shm->arp.table_rwlock[set]); + + new = insert_new_entry(set, &key); + + if (new == NULL) { + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + return -1; + } + + memcpy(&new->macaddr, ll_addr, OFP_ETHER_ADDR_LEN); + tnow = odp_time_cycles(); + odp_atomic_store_u64(&new->usetime, tnow); + + OFP_SLIST_SWAP(&send_list, &new->pkt_list_head, pkt_entry); + + if (OFP_SLIST_FIRST(&send_list)) { + ofp_timer_cancel(new->pkt_tmo); + new->pkt_tmo = ODP_TIMER_INVALID; + } + + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + + /* Send queued packets */ + pktentry = OFP_SLIST_FIRST(&send_list); + while (pktentry) { + OFP_DBG("***Sending saved packet %" PRIX64 " to %s\n", + odp_packet_to_u64(pktentry->pkt), + ofp_print_ip_addr(ipv4_addr)); + + if (ofp_ip_output(pktentry->pkt, pktentry->nh) == OFP_PKT_DROP) + odp_packet_free(pktentry->pkt); + + OFP_SLIST_REMOVE_HEAD(&send_list, next); + pkt_entry_free(pktentry); + + pktentry = OFP_SLIST_FIRST(&send_list); + } + + return 0; +} + +int ofp_arp_ipv4_remove(uint32_t ipv4_addr, struct ofp_ifnet *dev) +{ + struct arp_entry *entry; + struct arp_key key; + struct pkt_entry *pktentry; + int ret = -1; + uint32_t set; + + set = set_key_and_hash(dev->vrf, ipv4_addr, &key); + + odp_rwlock_write_lock(&shm->arp.table_rwlock[set]); + entry = arp_lookup(set, &key); + + if (odp_likely(entry != NULL)) { + while ((pktentry = OFP_SLIST_FIRST(&entry->pkt_list_head))) { + OFP_SLIST_REMOVE_HEAD(&entry->pkt_list_head, next); + pkt_entry_free(pktentry); + } + + remove_entry(set, entry); + ret = 0; + } + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + + return ret; +} + +static void ofp_arp_entry_usetime_tmo(void *arg) +{ + struct arp_entry *entry; + uint32_t entry_idx; + + entry_idx = *(uint32_t *)arg; + + entry = &shm->arp.entries[entry_idx]; + + odp_rwlock_write_lock(&entry->usetime_rwlock); + + entry->usetime_upd_tmo = ODP_TIMER_INVALID; + + odp_rwlock_write_unlock(&entry->usetime_rwlock); +} + +int ofp_ipv4_lookup_mac(uint32_t ipv4_addr, unsigned char *ll_addr, + struct ofp_ifnet *dev) +{ + struct arp_entry *entry; + struct arp_key key; + uint32_t set; + uint64_t tnew; + odp_bool_t usetime_is_old = FALSE; + uint32_t entry_idx; + struct arp_cache *cache; + + set = set_key_and_hash(dev->vrf, ipv4_addr, &key); + + cache = &shm->arp.cache[set]; + + if (ARP_IN_CACHE(cache, (&key))) + entry = ARP_GET_CACHE(cache); + else { + odp_rwlock_write_lock(&shm->arp.table_rwlock[set]); + + entry = arp_lookup(set, &key); + + if (odp_unlikely(entry == NULL) || + OFP_SLIST_FIRST(&entry->pkt_list_head)) { + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + return -1; + } + + ARP_SET_CACHE(cache, (&key), entry); + + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + } + + ofp_copy_mac_64(ll_addr, &entry->macaddr); + + if (entry->usetime_upd_tmo == ODP_TIMER_INVALID) + usetime_is_old = TRUE; + + if (odp_unlikely(usetime_is_old == TRUE)) { + odp_rwlock_write_lock(&entry->usetime_rwlock); + if (entry->usetime_upd_tmo == ODP_TIMER_INVALID) { + tnew = odp_time_cycles(); + odp_atomic_store_u64(&entry->usetime, tnew); + + entry_idx = entry - &shm->arp.entries[0]; + entry->usetime_upd_tmo = ofp_timer_start( + ENTRY_UPD_TIMEOUT, + ofp_arp_entry_usetime_tmo, + &entry_idx, sizeof(entry_idx)); + } + odp_rwlock_write_unlock(&entry->usetime_rwlock); + } + + return 0; +} + +struct cleanup_arg { + uint32_t ipv4_addr; + struct ofp_ifnet *dev; +}; + +static void ofp_arp_cleanup_pkt_list(void *arg) +{ + struct cleanup_arg *args; + + args = (struct cleanup_arg *)arg; + + OFP_DBG("***Arp reply did not arrive on time, %s\n", + ofp_print_ip_addr(args->ipv4_addr)); + ofp_arp_ipv4_remove(args->ipv4_addr, args->dev); +} + +int ofp_arp_save_ipv4_pkt(odp_packet_t pkt, struct ofp_nh_entry *nh_param, + uint32_t ipv4_addr, struct ofp_ifnet *dev) +{ + struct arp_entry *newarp; + struct arp_key key; + struct pkt_entry *newpkt; + uint32_t set; + struct cleanup_arg cl_arg; + + OFP_DBG("Saving packet %" PRIX64 " to %s\n", odp_packet_to_u64(pkt), + ofp_print_ip_addr(ipv4_addr)); + + set = set_key_and_hash(dev->vrf, ipv4_addr, &key); + + odp_rwlock_write_lock(&shm->arp.table_rwlock[set]); + +#if (ARP_SANITY_CHECK) + newarp = arp_lookup(set, &key); + if (newarp != NULL && *((uint8_t *)&newarp->macaddr + 5) != 0) + OFP_ERR("Saving packet to destination which has valid MAC\n"); +#endif + + newarp = insert_new_entry(set, &key); + if (newarp == NULL) { + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + OFP_ERR("ARP entry alloc failed, %" PRIX64 " to %s\n", + odp_packet_to_u64(pkt), + ofp_print_ip_addr(ipv4_addr)); + return OFP_PKT_DROP; + } + odp_atomic_store_u64(&newarp->usetime, ENTRY_USETIME_INVALID); + + newpkt = pkt_entry_alloc(); + if (newpkt == NULL) { + OFP_ERR("PKT entry alloc failed, %" PRIX64 " to %s\n", + odp_packet_to_u64(pkt), + ofp_print_ip_addr(ipv4_addr)); + if (OFP_SLIST_FIRST(&newarp->pkt_list_head) == NULL) + remove_entry(set, newarp); + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + return OFP_PKT_DROP; + } + newpkt->pkt = pkt; + newpkt->nh = nh_param; + + /* Start timer only when the first pkt is saved */ + if (OFP_SLIST_FIRST(&newarp->pkt_list_head) == NULL) { + cl_arg.ipv4_addr = ipv4_addr; + cl_arg.dev = dev; + newarp->pkt_tmo = ofp_timer_start(SAVED_PKT_TIMEOUT, + ofp_arp_cleanup_pkt_list, + &cl_arg, sizeof(cl_arg)); + } + + OFP_SLIST_INSERT_HEAD(&newarp->pkt_list_head, newpkt, next); + + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.table_rwlock[set]); + + return OFP_PKT_PROCESSED; +} + +void ofp_arp_cleanup(void *arg) +{ + struct arp_entry *entry, *next_entry; + int i, cli; + uint64_t now, cycles, ns, usetime; + + cli = *(int *)arg; + now = odp_time_cycles(); + + for (i = 0; i < NUM_SETS; ++i) { + odp_rwlock_write_lock(&shm->arp.table_rwlock[i]); + + entry = OFP_SLIST_FIRST(&shm->arp.table[i]); + while (entry) { + next_entry = OFP_SLIST_NEXT(entry, next); + if (OFP_SLIST_FIRST(&entry->pkt_list_head) == NULL) { + usetime = odp_atomic_load_u64(&entry->usetime); + if (usetime < now) { + cycles = odp_time_diff_cycles( + usetime, + now); + ns = odp_time_cycles_to_ns(cycles); + if (ns > ENTRY_TIMEOUT) { + show_arp_entry(1, entry); + + remove_entry(i, entry); + } + } + } + entry = next_entry; + } + + odp_rwlock_write_unlock(&shm->arp.table_rwlock[i]); + } + + if (!cli) + ofp_timer_start(CLEANUP_TIMER_INTERVAL, ofp_arp_cleanup, + &cli, sizeof(cli)); +} + +void ofp_arp_show_table(int fd) +{ + int i; + + for (i = 0; i < NUM_ARPS; ++i) + if (shm->arp.entries[i].key.ipv4_addr && + OFP_SLIST_FIRST(&shm->arp.entries[i].pkt_list_head) == NULL) + show_arp_entry(fd, &shm->arp.entries[i]); +} + +void ofp_arp_show_saved_packets(int fd) +{ + int i; + struct pkt_entry *pktentry; + struct arp_entry *entry; + + ofp_sendf(fd, "Saved packets:\r\n"); + for (i = 0; i < NUM_ARPS; ++i) { + entry = &shm->arp.entries[i]; + if (entry->key.ipv4_addr && + OFP_SLIST_FIRST(&entry->pkt_list_head) != NULL) { + ofp_sendf(fd, "IP: %-15s: ", + ofp_print_ip_addr(entry->key.ipv4_addr)); + + OFP_SLIST_FOREACH(pktentry, &entry->pkt_list_head, next) + ofp_sendf(fd, "%" PRIX64 "\t", + odp_packet_to_u64(pktentry->pkt)); + + ofp_sendf(fd, "\r\n"); + } + } +} + +void ofp_arp_init_tables(void) +{ + int i; + + for (i = 0; i < NUM_SETS; ++i) + odp_rwlock_write_lock(&shm->arp.table_rwlock[i]); + odp_rwlock_write_lock(&shm->arp.fr_ent_rwlock); + odp_rwlock_write_lock(&shm->pkt.fr_ent_rwlock); + + for (i = 0; i < NUM_ARPS; ++i) { + if (shm->arp.entries[i].pkt_tmo != ODP_TIMER_INVALID) + ofp_timer_cancel(shm->arp.entries[i].pkt_tmo); + + odp_rwlock_write_lock(&shm->arp.entries[i].usetime_rwlock); + + if (shm->arp.entries[i].usetime_upd_tmo != ODP_TIMER_INVALID) { + ofp_timer_cancel(shm->arp.entries[i].usetime_upd_tmo); + shm->arp.entries[i].usetime_upd_tmo = ODP_TIMER_INVALID; + } + odp_rwlock_write_unlock(&shm->arp.entries[i].usetime_rwlock); + + shm->arp.entries[i].pkt_tmo = ODP_TIMER_INVALID; + + memset(&shm->arp.entries[i].key, 0, + sizeof(shm->arp.entries[i].key)); + shm->arp.entries[i].macaddr = 0; + memset(&shm->arp.entries[i].pkt_list_head, 0, + sizeof(shm->arp.entries[i].pkt_list_head)); + } + + memset(shm->arp.table, 0, sizeof(shm->arp.table)); + memset(shm->arp.cache, 0, sizeof(shm->arp.cache)); + memset(shm->pkt.entries, 0, sizeof(shm->pkt.entries)); + + OFP_SLIST_INIT(&shm->arp.free_entries); + OFP_SLIST_INIT(&shm->pkt.free_entries); + + for (i = NUM_ARPS - 1; i >= 0; --i) + OFP_SLIST_INSERT_HEAD(&shm->arp.free_entries, &shm->arp.entries[i], + next); + + for (i = NUM_PKTS - 1; i >= 0; --i) + OFP_SLIST_INSERT_HEAD(&shm->pkt.free_entries, &shm->pkt.entries[i], + next); + + for (i = 0; i < NUM_SETS; ++i) + odp_rwlock_write_unlock(&shm->arp.table_rwlock[i]); + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->arp.fr_ent_rwlock); + odp_rwlock_write_unlock(&shm->pkt.fr_ent_rwlock); +} + +void ofp_arp_global_init(void) +{ + int i; + int cli = 0; + + for (i = 0; i < NUM_SETS; ++i) + odp_rwlock_init(&shm->arp.table_rwlock[i]); + odp_rwlock_init(&shm->arp.fr_ent_rwlock); + odp_rwlock_init(&shm->pkt.fr_ent_rwlock); + + for (i = 0; i < NUM_ARPS; ++i) { + shm->arp.entries[i].pkt_tmo = ODP_TIMER_INVALID; + shm->arp.entries[i].usetime_upd_tmo = ODP_TIMER_INVALID; + odp_rwlock_init(&shm->arp.entries[i].usetime_rwlock); + + } + + ofp_arp_init_tables(); + + ofp_timer_start(CLEANUP_TIMER_INTERVAL, + ofp_arp_cleanup, &cli, sizeof(cli)); +} + +void ofp_arp_local_init(void) +{ +} + +void ofp_arp_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + /* Reserve memory for args from shared mem */ + shm_h = odp_shm_reserve("OfpArpShMem", + sizeof(*shm), ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpArpShMem shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); +} + +void ofp_arp_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpArpShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpArpShMem shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} diff --git a/src/ofp_arp_ck.c b/src/ofp_arp_ck.c new file mode 100644 index 00000000..fcc677ef --- /dev/null +++ b/src/ofp_arp_ck.c @@ -0,0 +1,296 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + * SPMC ARP table in shmem + * Needs a writelock on writes to become MPMC for controlplane + * to scale properly. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ofpi_portconf.h" +#include "ofpi_arp.h" +#include "ofpi_hash.h" +#include "ofpi_log.h" +#include "ofpi_util.h" + +#include + +#define ENTRIES_PER_CACHE_LINE (ODP_CACHE_LINE_SIZE / sizeof(struct arp_entry)) +#define ENTRIES_PER_SET (ENTRIES_PER_CACHE_LINE * 4) +#define NUM_SETS 2048 /* Must be power of two */ + +#if (ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN) +#define hashfunc ofp_hashlittle +#else +#define hashfunc ofp_hashbig +#endif + +struct arp_tbl { + struct arp_entry *slh_first; +}; + +struct ofp_arp_mem { + struct arp_tbl arp_table[NUM_SETS] ODP_ALIGNED_CACHE; + struct arp_entry arp_entries[NUM_SETS][ENTRIES_PER_SET] ODP_ALIGNED_CACHE; +}; + +static __thread struct ofp_arp_mem *shm; + +static ck_epoch_t arp_epoch ODP_ALIGNED_CACHE; +static __thread ck_epoch_record_t record ODP_ALIGNED_CACHE; + +static inline uint32_t ipv4_hash(struct arp_key *key) +{ + uint32_t set; + + set = hashfunc(key, sizeof(*key), 0) & (NUM_SETS - 1); +#if 0 + printf("Hashed %x to %x\n", key->ipv4_addr, set); +#endif + return set; +} + +static inline void *arp_malloc(int index, struct arp_key *key) +{ + uint32_t i; + + for (i = 0; i < ENTRIES_PER_SET; i++) { + if (ck_pr_cas_32(&(shm->arp_entries[index][i].key.ipv4_addr), 0, + key->ipv4_addr)) { + ck_pr_store_32(&(shm->arp_entries[index][i].key.vrf), + key->vrf); + + /* Success */ + return (void *)&(shm->arp_entries[index][i]); + } + } + + OFP_ERR("Arp table bucket size error.\n"); + return NULL; +} + +static inline void arp_free(void *p) +{ + struct arp_entry *entry = (struct arp_entry *)p; + + memset(&entry->key, 0, sizeof(entry->key)); + odp_sync_stores(); +} + +static inline struct arp_entry *arp_lookup(struct arp_key *key) +{ + struct arp_entry *new; + int set; + + set = ipv4_hash(key); + + CK_SLIST_FOREACH(new, &(shm->arp_table[set]), next) { + if (odp_likely((new->key.ipv4_addr == key->ipv4_addr) && + (new->key.vrf == key->vrf))) { + return new; + } + } + + return NULL; +} + +inline int ofp_arp_ipv4_insert(uint32_t ipv4_addr, unsigned char *ll_addr, + struct ofp_ifnet *dev) +{ + struct arp_entry *new; + struct arp_key key; + int set; + + key.vrf = dev->vrf; + key.ipv4_addr = ipv4_addr; + set = ipv4_hash(&key); + + ck_epoch_begin(&arp_epoch, &record); + new = arp_lookup(&key); + /* + TODO: when mac is changing for an existing node and read while + changing. We should always alloc, and if we find an existing entry + we should swap the addresses atomically. + */ + if (odp_unlikely(new != NULL)) { + new->ifx = dev->port; + memcpy(&new->macaddr, ll_addr, ETH_ALEN); + odp_sync_stores(); + ck_epoch_end(&arp_epoch, &record); + return 0; + } + + new = arp_malloc(set, &key); + if (odp_unlikely(new == NULL)) + return -1; + + new->ifx = dev->port; + memcpy(&new->macaddr, ll_addr, ETH_ALEN); + CK_SLIST_INSERT_HEAD(&(shm->arp_table[set]), new, next); + ck_epoch_end(&arp_epoch, &record); + + return 0; +} + +inline int ofp_arp_ipv4_remove(uint32_t ipv4_addr, struct ofp_ifnet *dev) +{ + struct arp_entry *new; + struct arp_key key; + int ret = -1; + int set; + + key.vrf = dev->vrf; + key.ipv4_addr = ipv4_addr; + set = ipv4_hash(&key); + + ck_epoch_begin(&arp_epoch, &record); + new = arp_lookup(&key); + + if (odp_likely(new != NULL)) { + CK_SLIST_REMOVE(&(shm->arp_table[set]), new, arp_entry, next); + ret = 0; + } + + ck_epoch_end(&arp_epoch, &record); + if (odp_likely(ret == 0)) { + /* Blocking RCU cleanup from controlplane side */ + ck_epoch_barrier(&arp_epoch, &record); + /* epoch has passed, we can now safely free object */ + arp_free(new); + } + + return ret; +} + +inline int ofp_ipv4_lookup_mac(uint32_t ipv4_addr, unsigned char *ll_addr, + struct ofp_ifnet *dev) +{ + struct arp_entry *new; + struct arp_key key; + int ret; + + key.vrf = dev->vrf; + key.ipv4_addr = ipv4_addr; + + ck_epoch_begin(&arp_epoch, &record); + new = arp_lookup(&key); + + if (odp_likely(new != NULL)) { + memcpy(ll_addr, &new->macaddr, ETH_ALEN); + ret = new->ifx; + } else { + ret = -1; + } + ck_epoch_end(&arp_epoch, &record); + + return ret; +} + +static inline void show_arp_entry(int fd, int s, int e) +{ + if (shm->arp_entries[s][e].key.ipv4_addr) + ofp_sendf(fd, "%3d %-15s %s\r\n", + shm->arp_entries[s][e].key.vrf, + ofp_print_ip_addr(shm->arp_entries[s][e].key.ipv4_addr), + ofp_print_mac((uint8_t *)&shm->arp_entries[s][e].macaddr)); +} + +void ofp_arp_show_table(int fd) +{ + uint32_t i, j; + + ck_epoch_begin(&arp_epoch, &record); + for (i = 0; i < NUM_SETS; ++i) + for (j = 0; j < ENTRIES_PER_SET; ++j) + show_arp_entry(fd, i, j); + ck_epoch_end(&arp_epoch, &record); +} + +/* + * TODO, stubs + */ +int ofp_arp_save_ipv4_pkt(odp_packet_t pkt, struct ofp_nh_entry *nh_param, + uint32_t ipv4_addr, struct ofp_ifnet *dev) +{ + (void) pkt; + (void) nh_param; + (void) ipv4_addr; + (void) dev; + + return OFP_DROP; +} + +void ofp_arp_cleanup(void *arg) +{ + (void) arg; +} + +void ofp_arp_show_saved_packets(int fd) +{ + (void) fd; +} + +void ofp_arp_init_tables(void) +{ +} + +/******************************************************************************/ + +void ofp_arp_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + /* Reserve memory for args from shared mem */ + shm_h = odp_shm_reserve("OfpArpShMem", + sizeof(*shm), ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpArpShMem shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); +} + +void ofp_arp_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpArpShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpArpShMem shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} + +void ofp_arp_global_init(void) +{ + memset((void *)&(shm->arp_table[0]), 0x0, sizeof(shm->arp_table)); + memset((void *)&(shm->arp_entries[0][0]), 0x0, + sizeof(shm->arp_entries)); + ck_epoch_init(&arp_epoch); + odp_sync_stores(); +} + +void ofp_arp_local_init(void) +{ + ck_epoch_register(&arp_epoch, &record); + odp_sync_stores(); +} diff --git a/src/ofp_avl.c b/src/ofp_avl.c new file mode 100644 index 00000000..59bec8b5 --- /dev/null +++ b/src/ofp_avl.c @@ -0,0 +1,1316 @@ +/* + * Copyright (C) 1995-1997 by Sam Rushing + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that copyright notice and this permission + * notice appear in supporting documentation, and that the name of Sam + * Rushing not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. + * + * SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN + * NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include "ofpi_avl.h" +#include "ofpi_log.h" +#include "ofpi_util.h" + +/* + * Shared data + */ +struct ofp_avl_mem { +#define NUM_NODES 16384 + avl_node node_list[NUM_NODES]; + avl_node *free_nodes; +#define NUM_TREES 64 + avl_tree trees[NUM_TREES]; + avl_tree *free_trees; + int tree_cnt; + int nodes_allocated, max_nodes_allocated; +}; + +/* + * Data per core + */ +static __thread struct ofp_avl_mem *shm; + + +static void AVL_NODEFREE(avl_node *node) +{ + node->right = shm->free_nodes; + shm->free_nodes = node; + shm->nodes_allocated--; +} + +static avl_node *AVL_NODEALLOC(void) +{ + avl_node *p = shm->free_nodes; + if (shm->free_nodes) { + shm->free_nodes = shm->free_nodes->right; + shm->nodes_allocated++; + if (shm->nodes_allocated > shm->max_nodes_allocated) + shm->max_nodes_allocated = shm->nodes_allocated; + } + return p; +} + +static void AVL_TREEFREE(avl_tree *tree) +{ + tree->compare_arg = shm->free_trees; + shm->free_trees = tree; +} + +static avl_tree *AVL_TREEALLOC(void) +{ + avl_tree *p = shm->free_trees; + if (shm->free_trees) { + shm->free_trees = shm->free_trees->compare_arg; + } + return p; +} + +avl_node * +avl_node_new (void * key, + avl_node * parent) +{ + avl_node * node = AVL_NODEALLOC();//(avl_node *) malloc (sizeof (avl_node)); + + if (!node) { + return NULL; + } else { + node->parent = parent; + node->key = key; + node->left = NULL; + node->right = NULL; + node->rank_and_balance = 0; + AVL_SET_BALANCE (node, 0); + AVL_SET_RANK (node, 1); +#ifdef HAVE_AVL_NODE_LOCK + thread_rwlock_create(&node->rwlock); +#endif + return node; + } +} + +avl_tree * +avl_tree_new (avl_key_compare_fun_type compare_fun, + void * compare_arg) +{ + avl_tree * t = AVL_TREEALLOC(); + + if (!t) { + OFP_ERR("No more avl trees!\n"); + return NULL; + } else { + avl_node * root = avl_node_new((void *)NULL, (avl_node *) NULL); + if (!root) { + //XXXX free (t); + return NULL; + } else { + t->root = root; + t->height = 0; + t->length = 0; + t->compare_fun = compare_fun; + t->compare_arg = compare_arg; + odp_rwlock_init(&t->lock_rw); + thread_rwlock_create(&t->rwlock); + return t; + } + } +} + +static void +avl_tree_free_helper (avl_node * node, avl_free_key_fun_type free_key_fun) +{ + if (node->left) { + avl_tree_free_helper (node->left, free_key_fun); + } + if (free_key_fun) + free_key_fun (node->key); + if (node->right) { + avl_tree_free_helper (node->right, free_key_fun); + } +#ifdef HAVE_AVL_NODE_LOCK + thread_rwlock_destroy (&node->rwlock); +#endif + AVL_NODEFREE(node); +} + +void +avl_tree_free (avl_tree * tree, avl_free_key_fun_type free_key_fun) +{ + if (tree->length) { + avl_tree_free_helper (tree->root->right, free_key_fun); + } + if (tree->root) { +#ifdef HAVE_AVL_NODE_LOCK + thread_rwlock_destroy(&tree->root->rwlock); +#endif + AVL_NODEFREE(tree->root); + } + thread_rwlock_destroy(&tree->rwlock); + AVL_TREEFREE(tree); +} + +int +avl_insert (avl_tree * ob, + void * key) +{ + odp_rwlock_write_lock(&ob->lock_rw); + + if (!(ob->root->right)) { + avl_node * node = avl_node_new (key, ob->root); + if (!node) { + odp_rwlock_write_unlock(&ob->lock_rw); + return -1; + } else { + ob->root->right = node; + ob->length = ob->length + 1; + odp_rwlock_write_unlock(&ob->lock_rw); + return 0; + } + } else { /* not self.right == None */ + avl_node *t, *p, *s, *q, *r; + int a; + + t = ob->root; + s = p = t->right; + + while (1) { + if (ob->compare_fun (ob->compare_arg, key, p->key) < 1) { + /* move left */ + AVL_SET_RANK (p, (AVL_GET_RANK (p) + 1)); + q = p->left; + if (!q) { + /* insert */ + avl_node * q_node = avl_node_new (key, p); + if (!q_node) { + odp_rwlock_write_unlock(&ob->lock_rw); + return (-1); + } else { + q = q_node; + p->left = q; + break; + } + } else if (AVL_GET_BALANCE(q)) { + t = p; + s = q; + } + p = q; + } else { + /* move right */ + q = p->right; + if (!q) { + /* insert */ + avl_node * q_node = avl_node_new (key, p); + if (!q_node) { + odp_rwlock_write_unlock(&ob->lock_rw); + return -1; + } else { + q = q_node; + p->right = q; + break; + } + } else if (AVL_GET_BALANCE(q)) { + t = p; + s = q; + } + p = q; + } + } + + ob->length = ob->length + 1; + + /* adjust balance factors */ + if (ob->compare_fun (ob->compare_arg, key, s->key) < 1) { + r = p = s->left; + } else { + r = p = s->right; + } + while (p != q) { + if (ob->compare_fun (ob->compare_arg, key, p->key) < 1) { + AVL_SET_BALANCE (p, -1); + p = p->left; + } else { + AVL_SET_BALANCE (p, +1); + p = p->right; + } + } + + /* balancing act */ + + if (ob->compare_fun (ob->compare_arg, key, s->key) < 1) { + a = -1; + } else { + a = +1; + } + + if (AVL_GET_BALANCE (s) == 0) { + AVL_SET_BALANCE (s, a); + ob->height = ob->height + 1; + odp_rwlock_write_unlock(&ob->lock_rw); + return 0; + } else if (AVL_GET_BALANCE (s) == -a) { + AVL_SET_BALANCE (s, 0); + odp_rwlock_write_unlock(&ob->lock_rw); + return 0; + } else if (AVL_GET_BALANCE(s) == a) { + if (AVL_GET_BALANCE (r) == a) { + /* single rotation */ + p = r; + if (a == -1) { + s->left = r->right; + if (r->right) { + r->right->parent = s; + } + r->right = s; + s->parent = r; + AVL_SET_RANK (s, (AVL_GET_RANK (s) - AVL_GET_RANK (r))); + } else { + s->right = r->left; + if (r->left) { + r->left->parent = s; + } + r->left = s; + s->parent = r; + AVL_SET_RANK (r, (AVL_GET_RANK (r) + AVL_GET_RANK (s))); + } + AVL_SET_BALANCE (s, 0); + AVL_SET_BALANCE (r, 0); + } else if (AVL_GET_BALANCE (r) == -a) { + /* double rotation */ + if (a == -1) { + p = r->right; + r->right = p->left; + if (p->left) { + p->left->parent = r; + } + p->left = r; + r->parent = p; + s->left = p->right; + if (p->right) { + p->right->parent = s; + } + p->right = s; + s->parent = p; + AVL_SET_RANK (p, (AVL_GET_RANK (p) + AVL_GET_RANK (r))); + AVL_SET_RANK (s, (AVL_GET_RANK (s) - AVL_GET_RANK (p))); + } else { + p = r->left; + r->left = p->right; + if (p->right) { + p->right->parent = r; + } + p->right = r; + r->parent = p; + s->right = p->left; + if (p->left) { + p->left->parent = s; + } + p->left = s; + s->parent = p; + AVL_SET_RANK (r, (AVL_GET_RANK (r) - AVL_GET_RANK (p))); + AVL_SET_RANK (p, (AVL_GET_RANK (p) + AVL_GET_RANK (s))); + } + if (AVL_GET_BALANCE (p) == a) { + AVL_SET_BALANCE (s, -a); + AVL_SET_BALANCE (r, 0); + } else if (AVL_GET_BALANCE (p) == -a) { + AVL_SET_BALANCE (s, 0); + AVL_SET_BALANCE (r, a); + } else { + AVL_SET_BALANCE (s, 0); + AVL_SET_BALANCE (r, 0); + } + AVL_SET_BALANCE (p, 0); + } + /* finishing touch */ + if (s == t->right) { + t->right = p; + } else { + t->left = p; + } + p->parent = t; + } + } + odp_rwlock_write_unlock(&ob->lock_rw); + return 0; +} + +int +avl_get_by_index (avl_tree * tree, + unsigned long index, + void ** value_address) +{ + avl_node * p = tree->root->right; + unsigned long m = index + 1; + while (1) { + if (!p) { + return -1; + } + if (m < AVL_GET_RANK(p)) { + p = p->left; + } else if (m > AVL_GET_RANK(p)) { + m = m - AVL_GET_RANK(p); + p = p->right; + } else { + *value_address = p->key; + return 0; + } + } +} + +int +avl_get_by_key (avl_tree * tree, + void * key, + void **value_address) +{ + odp_rwlock_read_lock(&tree->lock_rw); + + avl_node * x = tree->root->right; + if (!x) { + odp_rwlock_read_unlock(&tree->lock_rw); + return -1; + } + while (1) { + int compare_result = tree->compare_fun (tree->compare_arg, key, x->key); + if (compare_result < 0) { + if (x->left) { + x = x->left; + } else { + odp_rwlock_read_unlock(&tree->lock_rw); + return -1; + } + } else if (compare_result > 0) { + if (x->right) { + x = x->right; + } else { + odp_rwlock_read_unlock(&tree->lock_rw); + return -1; + } + } else { + *value_address = x->key; + odp_rwlock_read_unlock(&tree->lock_rw); + return 0; + } + } +} + +int avl_delete(avl_tree *tree, void *key, avl_free_key_fun_type free_key_fun) +{ + avl_node *x, *y, *p, *q, *r, *top, *x_child; + int shortened_side, shorter; + + x = tree->root->right; + if (!x) { + return -1; + } + while (1) { + int compare_result = tree->compare_fun (tree->compare_arg, key, x->key); + if (compare_result < 0) { + /* move left + * We will be deleting from the left, adjust this node's + * rank accordingly + */ + AVL_SET_RANK (x, (AVL_GET_RANK(x) - 1)); + if (x->left) { + x = x->left; + } else { + /* Oops! now we have to undo the rank changes + * all the way up the tree + */ + AVL_SET_RANK(x, (AVL_GET_RANK (x) + 1)); + while (x != tree->root->right) { + if (x->parent->left == x) { + AVL_SET_RANK(x->parent, (AVL_GET_RANK (x->parent) + 1)); + } + x = x->parent; + } + return -1; /* key not in tree */ + } + } else if (compare_result > 0) { + /* move right */ + if (x->right) { + x = x->right; + } else { + AVL_SET_RANK(x, (AVL_GET_RANK (x) + 1)); + while (x != tree->root->right) { + if (x->parent->left == x) { + AVL_SET_RANK(x->parent, (AVL_GET_RANK (x->parent) + 1)); + } + x = x->parent; + } + return -1; /* key not in tree */ + } + } else { + break; + } + } + + if (x->left && x->right) { + void * temp_key; + + /* The complicated case. + * reduce this to the simple case where we are deleting + * a node with at most one child. + */ + + /* find the immediate predecessor */ + y = x->left; + while (y->right) { + y = y->right; + } + /* swap with */ + temp_key = x->key; + x->key = y->key; + y->key = temp_key; + /* we know 's left subtree lost a node because that's + * where we took it from + */ + AVL_SET_RANK (x, (AVL_GET_RANK (x) - 1)); + x = y; + } + /* now has at most one child + * scoot this child into the place of + */ + if (x->left) { + x_child = x->left; + x_child->parent = x->parent; + } else if (x->right) { + x_child = x->right; + x_child->parent = x->parent; + } else { + x_child = NULL; + } + + /* now tell 's parent that a grandchild became a child */ + if (x == x->parent->left) { + x->parent->left = x_child; + shortened_side = -1; + } else { + x->parent->right = x_child; + shortened_side = +1; + } + + /* + * the height of the subtree + * has now been shortened. climb back up + * the tree, rotating when necessary to adjust + * for the change. + */ + shorter = 1; + p = x->parent; + + /* return the key and node to storage */ + if (free_key_fun) + free_key_fun (x->key); +#ifdef HAVE_AVL_NODE_LOCK + thread_rwlock_destroy (&x->rwlock); +#endif + AVL_NODEFREE(x); + + while (shorter && p->parent) { + + /* case 1: height unchanged */ + if (AVL_GET_BALANCE(p) == 0) { + if (shortened_side == -1) { + /* we removed a left child, the tree is now heavier + * on the right + */ + AVL_SET_BALANCE (p, +1); + } else { + /* we removed a right child, the tree is now heavier + * on the left + */ + AVL_SET_BALANCE (p, -1); + } + shorter = 0; + + } else if (AVL_GET_BALANCE (p) == shortened_side) { + /* case 2: taller subtree shortened, height reduced */ + AVL_SET_BALANCE (p, 0); + } else { + /* case 3: shorter subtree shortened */ + top = p->parent; + /* set to the taller of the two subtrees of

*/ + if (shortened_side == 1) { + q = p->left; + } else { + q = p->right; + } + if (AVL_GET_BALANCE (q) == 0) { + /* case 3a: height unchanged */ + if (shortened_side == -1) { + /* single rotate left */ + q->parent = p->parent; + p->right = q->left; + if (q->left) { + q->left->parent = p; + } + q->left = p; + p->parent = q; + AVL_SET_RANK (q, (AVL_GET_RANK (q) + AVL_GET_RANK (p))); + } else { + /* single rotate right */ + q->parent = p->parent; + p->left = q->right; + if (q->right) { + q->right->parent = p; + } + q->right = p; + p->parent = q; + AVL_SET_RANK (p, (AVL_GET_RANK (p) - AVL_GET_RANK (q))); + } + shorter = 0; + AVL_SET_BALANCE (q, shortened_side); + AVL_SET_BALANCE (p, (- shortened_side)); + } else if (AVL_GET_BALANCE (q) == AVL_GET_BALANCE (p)) { + /* case 3b: height reduced */ + if (shortened_side == -1) { + /* single rotate left */ + q->parent = p->parent; + p->right = q->left; + if (q->left) { + q->left->parent = p; + } + q->left = p; + p->parent = q; + AVL_SET_RANK (q, (AVL_GET_RANK (q) + AVL_GET_RANK (p))); + } else { + /* single rotate right */ + q->parent = p->parent; + p->left = q->right; + if (q->right) { + q->right->parent = p; + } + q->right = p; + p->parent = q; + AVL_SET_RANK (p, (AVL_GET_RANK (p) - AVL_GET_RANK (q))); + } + shorter = 1; + AVL_SET_BALANCE (q, 0); + AVL_SET_BALANCE (p, 0); + } else { + /* case 3c: height reduced, balance factors opposite */ + if (shortened_side == 1) { + /* double rotate right */ + /* first, a left rotation around q */ + r = q->right; + r->parent = p->parent; + q->right = r->left; + if (r->left) { + r->left->parent = q; + } + r->left = q; + q->parent = r; + /* now, a right rotation around p */ + p->left = r->right; + if (r->right) { + r->right->parent = p; + } + r->right = p; + p->parent = r; + AVL_SET_RANK (r, (AVL_GET_RANK (r) + AVL_GET_RANK (q))); + AVL_SET_RANK (p, (AVL_GET_RANK (p) - AVL_GET_RANK (r))); + } else { + /* double rotate left */ + /* first, a right rotation around q */ + r = q->left; + r->parent = p->parent; + q->left = r->right; + if (r->right) { + r->right->parent = q; + } + r->right = q; + q->parent = r; + /* now a left rotation around p */ + p->right = r->left; + if (r->left) { + r->left->parent = p; + } + r->left = p; + p->parent = r; + AVL_SET_RANK (q, (AVL_GET_RANK (q) - AVL_GET_RANK (r))); + AVL_SET_RANK (r, (AVL_GET_RANK (r) + AVL_GET_RANK (p))); + } + if (AVL_GET_BALANCE (r) == shortened_side) { + AVL_SET_BALANCE (q, (- shortened_side)); + AVL_SET_BALANCE (p, 0); + } else if (AVL_GET_BALANCE (r) == (- shortened_side)) { + AVL_SET_BALANCE (q, 0); + AVL_SET_BALANCE (p, shortened_side); + } else { + AVL_SET_BALANCE (q, 0); + AVL_SET_BALANCE (p, 0); + } + AVL_SET_BALANCE (r, 0); + q = r; + } + /* a rotation has caused (or in case 3c) to become + * the root. let

's former parent know this. + */ + if (top->left == p) { + top->left = q; + } else { + top->right = q; + } + /* end case 3 */ + p = q; + } + x = p; + p = x->parent; + /* shortened_side tells us which side we came up from */ + if (x == p->left) { + shortened_side = -1; + } else { + shortened_side = +1; + } + } /* end while(shorter) */ + /* when we're all done, we're one shorter */ + tree->length = tree->length - 1; + return (0); +} + +static int +avl_iterate_inorder_helper (avl_node * node, + avl_iter_fun_type iter_fun, + void * iter_arg) +{ + int result; + if (node->left) { + result = avl_iterate_inorder_helper (node->left, iter_fun, iter_arg); + if (result != 0) { + return result; + } + } + result = (iter_fun (node->key, iter_arg)); + if (result != 0) { + return result; + } + if (node->right) { + result = avl_iterate_inorder_helper (node->right, iter_fun, iter_arg); + if (result != 0) { + return result; + } + } + return 0; +} + +int +avl_iterate_inorder (avl_tree * tree, + avl_iter_fun_type iter_fun, + void * iter_arg) +{ + int result; + + odp_rwlock_read_lock(&tree->lock_rw); + if (tree->length) { + result = avl_iterate_inorder_helper (tree->root->right, iter_fun, iter_arg); + odp_rwlock_read_unlock(&tree->lock_rw); + return (result); + } else { + odp_rwlock_read_unlock(&tree->lock_rw); + return 0; + } +} + +avl_node *avl_get_first(avl_tree *tree) +{ + avl_node *node; + + node = tree->root->right; + if (node == NULL || node->key == NULL) return NULL; + + while (node->left) + node = node->left; + + return node; +} + +avl_node *avl_get_prev(avl_node *node) +{ + if (node->left) { + node = node->left; + while (node->right) { + node = node->right; + } + + return node; + } else { + avl_node *child = node; + while (node->parent && node->parent->key) { + node = node->parent; + if (child == node->right) { + return node; + } + child = node; + } + + return NULL; + } +} + +avl_node *avl_get_next(avl_node *node) +{ + if (node->right) { + node = node->right; + while (node->left) { + node = node->left; + } + + return node; + } else { + avl_node *child = node; + while (node->parent && node->parent->key) { + node = node->parent; + if (child == node->left) { + return node; + } + child = node; + } + + return NULL; + } +} + +/* iterate a function over a range of indices, using get_predecessor */ + +int +avl_iterate_index_range (avl_tree * tree, + avl_iter_index_fun_type iter_fun, + unsigned long low, + unsigned long high, + void * iter_arg) +{ + unsigned long m; + unsigned long num_left; + avl_node * node; + + if (high > tree->length) { + return -1; + } + num_left = (high - low); + /* find the th node */ + m = high; + node = tree->root->right; + while (1) { + if (m < AVL_GET_RANK (node)) { + node = node->left; + } else if (m > AVL_GET_RANK (node)) { + m = m - AVL_GET_RANK (node); + node = node->right; + } else { + break; + } + } + /* call on , , ... */ + while (num_left) { + num_left = num_left - 1; + if (iter_fun (num_left, node->key, iter_arg) != 0) { + return -1; + } + node = avl_get_prev (node); + } + return 0; +} + +/* If is present in the tree, return that key's node, and set <*index> + * appropriately. If not, return NULL, and set <*index> to the position + * representing the closest preceding value. + */ + +static avl_node * +avl_get_index_by_key (avl_tree * tree, + void * key, + unsigned long * index) +{ + avl_node * x = tree->root->right; + unsigned long m; + + if (!x) { + return NULL; + } + m = AVL_GET_RANK (x); + + while (1) { + int compare_result = tree->compare_fun (tree->compare_arg, key, x->key); + if (compare_result < 0) { + if (x->left) { + m = m - AVL_GET_RANK(x); + x = x->left; + m = m + AVL_GET_RANK(x); + } else { + *index = m - 2; + return NULL; + } + } else if (compare_result > 0) { + if (x->right) { + x = x->right; + m = m + AVL_GET_RANK(x); + } else { + *index = m - 1; + return NULL; + } + } else { + *index = m - 1; + return x; + } + } +} + +/* return the (low index, high index) pair that spans the given key */ + +int +avl_get_span_by_key (avl_tree * tree, + void * key, + unsigned long * low, + unsigned long * high) +{ + unsigned long m, i, j; + avl_node * node; + + node = avl_get_index_by_key (tree, key, &m); + + /* did we find an exact match? + * if so, we have to search left and right + * to find the span, since we know nothing about + * the arrangement of like keys. + */ + if (node) { + avl_node * left, * right; + /* search left */ + left = avl_get_prev (node); + i = m; + while (left && (i > 0) && (tree->compare_fun (tree->compare_arg, key, left->key) == 0)) { + left = avl_get_prev (left); + i = i - 1; + } + /* search right */ + right = avl_get_next (node); + j = m; + while (right && (j <= tree->length) && (tree->compare_fun (tree->compare_arg, key, right->key) == 0)) { + right = avl_get_next (right); + j = j + 1; + } + *low = i; + *high = j + 1; + return 0; + } else { + *low = *high = m; + } + return 0; +} + +/* return the (low index, high index) pair that spans the given key */ + +int +avl_get_span_by_two_keys (avl_tree * tree, + void * low_key, + void * high_key, + unsigned long * low, + unsigned long * high) +{ + unsigned long i, j; + avl_node * low_node, * high_node; + int order; + + /* we may need to swap them */ + order = tree->compare_fun (tree->compare_arg, low_key, high_key); + if (order > 0) { + void * temp = low_key; + low_key = high_key; + high_key = temp; + } + + low_node = avl_get_index_by_key (tree, low_key, &i); + high_node = avl_get_index_by_key (tree, high_key, &j); + + if (low_node) { + avl_node * left; + /* search left */ + left = avl_get_prev (low_node); + while (left && (i > 0) && (tree->compare_fun (tree->compare_arg, low_key, left->key) == 0)) { + left = avl_get_prev (left); + i = i - 1; + } + } else { + i = i + 1; + } + if (high_node) { + avl_node * right; + /* search right */ + right = avl_get_next (high_node); + while (right && (j <= tree->length) && (tree->compare_fun (tree->compare_arg, high_key, right->key) == 0)) { + right = avl_get_next (right); + j = j + 1; + } + } else { + j = j + 1; + } + + *low = i; + *high = j; + return 0; +} + + +int +avl_get_item_by_key_most (avl_tree * tree, + void * key, + void **value_address) +{ + avl_node * x = tree->root->right; + *value_address = NULL; + + if (!x) { + return -1; + } + while (1) { + int compare_result = tree->compare_fun (tree->compare_arg, key, x->key); + + if (compare_result == 0) { + *value_address = x->key; + return 0; + } else if (compare_result < 0) { + /* the given key is less than the current key */ + if (x->left) { + x = x->left; + } else { + if (*value_address) + return 0; + else + return -1; + } + } else { + /* the given key is more than the current key */ + /* save this value, it might end up being the right one! */ + *value_address = x->key; + if (x->right) { + /* there is a bigger entry */ + x = x->right; + } else { + if (*value_address) + return 0; + else + return -1; + } + } + } +} + +int +avl_get_item_by_key_least (avl_tree * tree, + void * key, + void **value_address) +{ + avl_node * x = tree->root->right; + *value_address = NULL; + + if (!x) { + return -1; + } + while (1) { + int compare_result = tree->compare_fun (tree->compare_arg, key, x->key); + if (compare_result == 0) { + *value_address = x->key; + return 0; /* exact match */ + } else if (compare_result < 0) { + /* the given key is less than the current key */ + /* save this value, it might end up being the right one! */ + *value_address = x->key; + if (x->left) { + x = x->left; + } else { + if (*value_address) /* we have found a valid entry */ + return 0; + else + return -1; + } + } else { + if (x->right) { + /* there is a bigger entry */ + x = x->right; + } else { + if (*value_address) /* we have found a valid entry */ + return 0; + else + return -1; + } + } + } +} + +#define AVL_MAX(X, Y) ((X) > (Y) ? (X) : (Y)) + +static long +avl_verify_balance (avl_node * node) +{ + if (!node) { + return 0; + } else { + long lh = avl_verify_balance (node->left); + long rh = avl_verify_balance (node->right); + if ((rh - lh) != AVL_GET_BALANCE(node)) { + return 0; + } + if (((lh - rh) > 1) || ((lh - rh) < -1)) { + return 0; + } + return (1 + AVL_MAX (lh, rh)); + } +} + +static void +avl_verify_parent (avl_node * node, avl_node * parent) +{ + if (node->parent != parent) { + return; + } + if (node->left) { + avl_verify_parent (node->left, node); + } + if (node->right) { + avl_verify_parent (node->right, node); + } +} + +static long +avl_verify_rank (avl_node * node) +{ + if (!node) { + return 0; + } else { + unsigned long num_left=0, num_right=0; + if (node->left) { + num_left = avl_verify_rank (node->left); + } + if (node->right) { + num_right = avl_verify_rank (node->right); + } + if (AVL_GET_RANK (node) != num_left + 1) { + fprintf (stderr, "invalid rank at node %ld\n", (long) node->key); + exit (1); + } + return (num_left + num_right + 1); + } +} + +/* sanity-check the tree */ + +int +avl_verify (avl_tree * tree) +{ + if (tree->length) { + avl_verify_balance (tree->root->right); + avl_verify_parent (tree->root->right, tree->root); + avl_verify_rank (tree->root->right); + } + return (0); +} + +/* + * These structures are accumulated on the stack during print_tree + * and are used to keep track of the width and direction of each + * branch in the history of a particular line . + */ + +typedef struct _link_node { + struct _link_node * parent; + char direction; + int width; +} link_node; + +static char balance_chars[3] = {'\\', '-', '/'}; + +static int +default_key_printer (char * buffer, void * key) +{ + return sprintf (buffer, "%p", key); +} + +/* + * When traveling the family tree, a change in direction + * indicates when to print a connector. This is kinda crazy, + * we use the stack to build a linked list, and then travel + * it backwards using recursion. + */ + +static void +print_connectors (link_node * link) +{ + if (link->parent) { + print_connectors (link->parent); + } + if (link->parent && (link->parent->direction != link->direction) && (link->parent->parent)) { + int i; + fprintf (stdout, "|"); + for (i=0; i < (link->width - 1); i++) { + fprintf (stdout, " "); + } + } else { + int i; + for (i=0; i < (link->width); i++) { + fprintf (stdout, " "); + } + } +} + +/* + * The function writes a representation of the + * key into (which is conveniently fixed in size to add + * the spice of danger). It should return the size of the + * representation. + */ + +static void +print_node (avl_key_printer_fun_type key_printer, + avl_node * node, + link_node * link) +{ + char buffer[256]; + unsigned int width; + width = key_printer (buffer, node->key); + + if (node->right) { + link_node here; + here.parent = link; + here.direction = 1; + here.width = width + 11; + print_node (key_printer, node->right, &here); + } + print_connectors (link); + fprintf (stdout, "+-[%c %s %03d]", + balance_chars[AVL_GET_BALANCE(node)+1], + buffer, + (int)AVL_GET_RANK(node)); + if (node->left || node->right) { + fprintf (stdout, "-|\n"); + } else { + fprintf (stdout, "\n"); + } + if (node->left) { + link_node here; + here.parent = link; + here.direction = -1; + here.width = width + 11; + print_node (key_printer, node->left, &here); + } +} + +void +avl_print_tree (avl_tree * tree, avl_key_printer_fun_type key_printer) +{ + link_node top = {NULL, 0, 0}; + if (!key_printer) { + key_printer = default_key_printer; + } + if (tree->length) { + print_node (key_printer, tree->root->right, &top); + } else { + fprintf (stdout, "\n"); + } +} + + +void avl_tree_rlock(avl_tree *tree) +{ + (void) tree; + thread_rwlock_rlock(&tree->rwlock); +} + +void avl_tree_wlock(avl_tree *tree) +{ + (void) tree; + thread_rwlock_wlock(&tree->rwlock); +} + +void avl_tree_unlock(avl_tree *tree) +{ + (void) tree; + thread_rwlock_unlock(&tree->rwlock); +} + +#ifdef HAVE_AVL_NODE_LOCK +void avl_node_rlock(avl_node *node) +{ + thread_rwlock_rlock(&node->rwlock); +} + +void avl_node_wlock(avl_node *node) +{ + thread_rwlock_wlock(&node->rwlock); +} + +void avl_node_unlock(avl_node *node) +{ + thread_rwlock_unlock(&node->rwlock); +} +#endif + +void ofp_print_avl_stat(int fd) +{ + ofp_sendf(fd, "avl tree alloc now=%d max=%d total=%d\r\n", + shm->nodes_allocated, shm->max_nodes_allocated, NUM_NODES); +} + +void ofp_avl_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + uint32_t i; + + /* Reserve memory for args from shared mem */ + shm_h = odp_shm_reserve("OfpAvlShMem", sizeof(*shm), ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpAvlShMem shared mem alloc failed on core: %u.\n", odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); + + for (i = 0; i < NUM_NODES; i++) { + shm->node_list[i].right = (i == NUM_NODES - 1) ? NULL : &(shm->node_list[i+1]); + } + shm->free_nodes = &(shm->node_list[0]); + + for (i = 0; i < NUM_TREES; i++) { + shm->trees[i].compare_arg = (i == NUM_TREES - 1) ? NULL : &(shm->trees[i+1]); + } + shm->free_trees = &(shm->trees[0]); + shm->tree_cnt = 0; +} + + +void ofp_avl_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpAvlShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpAvlShMem shared mem lookup failed on core: %u.\n", odp_cpu_id()); + exit(EXIT_FAILURE); + } +} diff --git a/src/ofp_debug.c b/src/ofp_debug.c new file mode 100644 index 00000000..17885bc2 --- /dev/null +++ b/src/ofp_debug.c @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#include "ofpi_debug.h" + +struct ofp_flag_descript_s ofp_flag_descript[] = { + {OFP_DEBUG_PRINT_RECV_NIC, "ODP to FP"}, + {OFP_DEBUG_PRINT_SEND_NIC, "FP to ODP"}, + {OFP_DEBUG_PRINT_RECV_KNI, "FP to SP"}, + {OFP_DEBUG_PRINT_SEND_KNI, "SP to ODP"} +}; + +int ofp_debug_flags = 0; +int ofp_debug_capture_ports = 0; + +void ofp_set_debug_flags(int flags) +{ + ofp_debug_flags = flags; +} +int ofp_get_debug_flags(void) +{ + return ofp_debug_flags; +} + +void ofp_set_debug_capture_ports(int ports) +{ + ofp_debug_capture_ports = ports; +} +int ofp_get_debug_capture_ports(void) +{ + return ofp_debug_capture_ports; +} diff --git a/src/ofp_debug_pcap.c b/src/ofp_debug_pcap.c new file mode 100644 index 00000000..59f6281c --- /dev/null +++ b/src/ofp_debug_pcap.c @@ -0,0 +1,195 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ofpi_debug.h" +#include "ofpi_log.h" + +#define shm shm_pcap + +struct ofp_pcap_mem { + odp_rwlock_t lock_pcap_rw; + FILE *pcap_fd; + int pcap_first; + int pcap_is_fifo; + char pcap_file_name[128]; +}; +static __thread struct ofp_pcap_mem *shm; + +#define IS_KNI(flag) \ + (flag == OFP_DEBUG_PRINT_RECV_KNI || \ + flag == OFP_DEBUG_PRINT_SEND_KNI) + +#define IS_TX(flag) \ + (flag == OFP_DEBUG_PRINT_SEND_NIC || \ + flag == OFP_DEBUG_PRINT_SEND_KNI) + +#define GET_PCAP_CONF_ADD_INFO(port, flag) \ + (port | \ + (IS_KNI(flag) ? OFP_DEBUG_PCAP_KNI : 0) | \ + (IS_TX(flag) ? OFP_DEBUG_PCAP_TX : 0)) + +/* PCAP */ +void ofp_save_packet_to_pcap_file(uint32_t flag, odp_packet_t pkt, int port) +{ +#define PUT16(x) do { \ +uint16_t val16 = x; fwrite(&val16, 2, 1, shm->pcap_fd); \ +} while (0) +#define PUT32(x) do { \ +uint32_t val32 = x; fwrite(&val32, 4, 1, shm->pcap_fd); \ +} while (0) + struct timeval t; + + if ((ofp_debug_capture_ports & + (1 << (port & OFP_DEBUG_PCAP_PORT_MASK))) == 0) + return; + + odp_rwlock_write_lock(&shm->lock_pcap_rw); + + if (shm->pcap_first) { + /*int n = ufp_get_num_ports(), i;*/ + struct stat st; + + shm->pcap_is_fifo = 0; + if (stat(shm->pcap_file_name, &st) == 0) + shm->pcap_is_fifo = (st.st_mode & S_IFIFO) != 0; + + shm->pcap_fd = fopen(shm->pcap_file_name, "w"); + if (!shm->pcap_fd) + goto out; + + /* Global header */ + PUT32(0xa1b2c3d4); /* Byte order magic */ + PUT16(2); PUT16(4); /* Version major & minor */ + PUT32(0); /* Timezone */ + PUT32(0); /* Accuracy */ + PUT32(0xffff); /* Snaplen */ + PUT32(1); /* Ethernet */ + + shm->pcap_first = 0; + } else if (shm->pcap_fd == NULL) { + shm->pcap_fd = fopen(shm->pcap_file_name, "a"); + if (!shm->pcap_fd) + goto out; + } + + /* Header */ + /* Timestamp */ + gettimeofday(&t, NULL); + PUT32(t.tv_sec); + PUT32(t.tv_usec); + + PUT32(odp_packet_len(pkt)); /* Saved packet len -- segment len */ + PUT32(odp_packet_len(pkt)); /* Captured packet len -- packet len */ + + /* Data */ + if (ofp_debug_capture_ports & OFP_DEBUG_PCAP_CONF_ADD_INFO) { + fputc(GET_PCAP_CONF_ADD_INFO(port, flag), shm->pcap_fd); + /* Packet data */ + fwrite((uint8_t *) odp_packet_data(pkt) + 1, 1, + odp_packet_len(pkt) - 1, shm->pcap_fd); + } else { + /* Packet data */ + fwrite(odp_packet_data(pkt), 1, + odp_packet_len(pkt), shm->pcap_fd); + } + + if (!shm->pcap_is_fifo) { + fclose(shm->pcap_fd); + shm->pcap_fd = NULL; + } else { + fflush(shm->pcap_fd); + } +out: + odp_rwlock_write_unlock(&shm->lock_pcap_rw); +} + +void ofp_set_capture_file(const char *filename) +{ + char *p; + + odp_rwlock_write_lock(&shm->lock_pcap_rw); + + strncpy(shm->pcap_file_name, filename, sizeof(shm->pcap_file_name)-1); + shm->pcap_file_name[sizeof(shm->pcap_file_name)-1] = 0; + + /* There may be trailing spaces. Remove. */ + for (p = shm->pcap_file_name; *p; p++) + if (*p == ' ') { + *p = 0; + break; + } + + if (shm->pcap_fd) { + fclose(shm->pcap_fd); + shm->pcap_fd = NULL; + } + shm->pcap_first = 1; + + odp_rwlock_write_unlock(&shm->lock_pcap_rw); +} + +void ofp_get_capture_file(char *filename, int max_size) +{ + odp_rwlock_write_lock(&shm->lock_pcap_rw); + + strncpy(filename, shm->pcap_file_name, max_size - 1); + filename[max_size - 1] = 0; + + odp_rwlock_write_unlock(&shm->lock_pcap_rw); +} + +static void sigpipe_handler(int s) +{ + (void) s; + if (shm->pcap_fd) { + fclose(shm->pcap_fd); + shm->pcap_fd = NULL; + shm->pcap_first = 1; + } +} + +void ofp_pcap_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_reserve("OfpPcapShMem", sizeof(*shm), + ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) + OFP_ABORT("Error: Util shared mem alloc failed" + " on core: %u.\n", + odp_cpu_id()); + + memset(shm, 0, sizeof(*shm)); + odp_rwlock_init(&shm->lock_pcap_rw); + strcpy(shm->pcap_file_name, DEFAULT_DEBUG_PCAP_FILE_NAME); + shm->pcap_first = 1; + signal(SIGPIPE, sigpipe_handler); +} + +void ofp_pcap_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpPcapShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) + OFP_ABORT("Error: Util shared mem lookup failed" + " on core: %u.\n", + odp_cpu_id()); +} diff --git a/src/ofp_debug_print.c b/src/ofp_debug_print.c new file mode 100644 index 00000000..c86f2a61 --- /dev/null +++ b/src/ofp_debug_print.c @@ -0,0 +1,380 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include + +#include "ofpi.h" +#include "ofpi_log.h" +#include "ofpi_debug.h" +#include "ofpi_util.h" + +/** + * Helper function to print a work packet content. + * Only IP and ARP packets are supported. + * + * @param work Work queue entry. + */ +#define fprintf(a, b...) do { \ + if (ofp_debug_flags & OFP_DEBUG_PRINT_CONSOLE) {\ + printf(b); \ + } \ + fprintf(a, b); } \ + while (0) + +static void print_arp(FILE *f, char *p) +{ + fprintf(f, "ARP %d %s -> %s ", + p[7], /* opcode */ + ofp_print_ip_addr(*((uint32_t *)(p+14))), /* sender IP */ + ofp_print_ip_addr(*((uint32_t *)(p+24)))); /* target IP */ +} + +static void print_ipv6(FILE *f, char *p) +{ + struct ofp_ip6_hdr *ip6hdr = (struct ofp_ip6_hdr *)p; + struct ofp_icmp6_hdr *icmp; + struct ofp_udphdr *uh; + + if (ip6hdr->ofp_ip6_nxt == OFP_IPPROTO_UDP) { + uh = (struct ofp_udphdr *)(ip6hdr + 1); + + fprintf(f, "IPv6 UDP: len=%d %s port %d -> %s port %d ", + odp_be_to_cpu_16(uh->uh_ulen), + ofp_print_ip6_addr(ip6hdr->ip6_src.ofp_s6_addr), + odp_be_to_cpu_16(uh->uh_sport), + ofp_print_ip6_addr(ip6hdr->ip6_dst.ofp_s6_addr), + odp_be_to_cpu_16(uh->uh_dport)); + } else if (ip6hdr->ofp_ip6_nxt == OFP_IPPROTO_ICMPV6) { + icmp = (struct ofp_icmp6_hdr *)(ip6hdr + 1); + + fprintf(f, "IPv6 ICMP: len=%d", + odp_be_to_cpu_16(ip6hdr->ofp_ip6_plen)); + + switch (icmp->icmp6_type) { + case OFP_ND_ROUTER_SOLICIT: + fprintf(f, " type=Router-Solicitation"); + break; + case OFP_ND_ROUTER_ADVERT: + fprintf(f, " type=Router-Advertisement %s%s", + (icmp->ofp_icmp6_data8[1] & 0x80) ? "M" : "", + (icmp->ofp_icmp6_data8[1] & 0x40) ? "O" : ""); + break; + case OFP_ND_NEIGHBOR_SOLICIT: + fprintf(f, " type=Neighbor-Solicitation target=%s", + ofp_print_ip6_addr(icmp->ofp_icmp6_data8 + + 4)); + break; + case OFP_ND_NEIGHBOR_ADVERT: + fprintf(f, + " type=Neighbor-Advertisement %s%s%s target=%s", + (icmp->ofp_icmp6_data8[0] & 0x80) ? "R" : "", + (icmp->ofp_icmp6_data8[0] & 0x40) ? "S" : "", + (icmp->ofp_icmp6_data8[0] & 0x20) ? "O" : "", + ofp_print_ip6_addr(icmp->ofp_icmp6_data8 + + 4)); + break; + case OFP_ND_REDIRECT: + fprintf(f, " type=Redirect target=%s destination=%s", + ofp_print_ip6_addr(icmp->ofp_icmp6_data8 + + 4), + ofp_print_ip6_addr(icmp->ofp_icmp6_data8 + + 20)); + break; + default: + fprintf(f, " type=%d", icmp->icmp6_type); + } + + fprintf(f, " code=%d\n", icmp->icmp6_code); + fprintf(f, " %s -> %s ", + ofp_print_ip6_addr(ip6hdr->ip6_src.ofp_s6_addr), + ofp_print_ip6_addr(ip6hdr->ip6_dst.ofp_s6_addr)); + + } else { + fprintf(f, "IPv6 PKT: len=%d next=%d %s -> %s ", + odp_be_to_cpu_16(ip6hdr->ofp_ip6_plen), + ip6hdr->ofp_ip6_nxt, + ofp_print_ip6_addr(ip6hdr->ip6_src.ofp_s6_addr), + ofp_print_ip6_addr(ip6hdr->ip6_dst.ofp_s6_addr)); + } +} + +static void print_ipv4(FILE *f, char *p) +{ + struct ofp_ip *iphdr = (struct ofp_ip *)p; + struct ofp_icmp *icmp; + struct ofp_udphdr *uh; + struct ofp_tcphdr *th; + + if (iphdr->ip_p == OFP_IPPROTO_UDP) { + uh = (struct ofp_udphdr *)(((uint8_t *)iphdr) + + (iphdr->ip_hl<<2)); + + fprintf(f, "IP UDP PKT len=%d %s:%d -> %s:%d ", + odp_be_to_cpu_16(uh->uh_ulen), + ofp_print_ip_addr(iphdr->ip_src.s_addr), + odp_be_to_cpu_16(uh->uh_sport), + ofp_print_ip_addr(iphdr->ip_dst.s_addr), + odp_be_to_cpu_16(uh->uh_dport)); + + } else if (iphdr->ip_p == OFP_IPPROTO_TCP) { + th = (struct ofp_tcphdr *)(((uint8_t *)iphdr) + + (iphdr->ip_hl<<2)); + fprintf(f, "IP len=%d TCP %s:%d -> %s:%d\n" + " seq=0x%x ack=0x%x off=%d\n flags=", + odp_be_to_cpu_16(iphdr->ip_len), + ofp_print_ip_addr(iphdr->ip_src.s_addr), + odp_be_to_cpu_16(th->th_sport), + ofp_print_ip_addr(iphdr->ip_dst.s_addr), + odp_be_to_cpu_16(th->th_dport), + odp_be_to_cpu_32(th->th_seq), + odp_be_to_cpu_32(th->th_ack), + th->th_off); + if (th->th_flags & OFP_TH_FIN) + fprintf(f, "F"); + if (th->th_flags & OFP_TH_SYN) + fprintf(f, "S"); + if (th->th_flags & OFP_TH_RST) + fprintf(f, "R"); + if (th->th_flags & OFP_TH_PUSH) + fprintf(f, "P"); + if (th->th_flags & OFP_TH_ACK) + fprintf(f, "A"); + if (th->th_flags & OFP_TH_URG) + fprintf(f, "U"); + if (th->th_flags & OFP_TH_ECE) + fprintf(f, "E"); + if (th->th_flags & OFP_TH_CWR) + fprintf(f, "C"); + fprintf(f, " win=%u sum=0x%x urp=%u", + odp_be_to_cpu_16(th->th_win), + odp_be_to_cpu_16(th->th_sum), + odp_be_to_cpu_16(th->th_urp)); + int i; + int len = odp_be_to_cpu_16(iphdr->ip_len); +#if 0 + if (odp_be_to_cpu_16(th->th_win) == 0) { + /* wrong value */ + fprintf(f, "\n---- th_win == 0, quit\n"); + fflush(NULL); + int *a = 0; + *a = 8; + } +#endif + if (len > 2000) { + fprintf(f, "\nToo long data!\n"); + int *a = 0, b = 8, c = 9; + *a = b + c; + } else if (0) { + for (i = 0; i < len; i++) { + if ((i & 0xf) == 0) + fprintf(f, "\n"); + fprintf(f, " %02x", (uint8_t)p[i]); + } + } + } else if (iphdr->ip_p == OFP_IPPROTO_ICMP) { + icmp = (struct ofp_icmp *)(((uint8_t *)iphdr) + + (iphdr->ip_hl<<2)); + + switch (icmp->icmp_type) { + case OFP_ICMP_ECHOREPLY: + fprintf(f, + "IP ICMP: echo reply %s -> %s id=%d seq=%d", + ofp_print_ip_addr(iphdr->ip_src.s_addr), + ofp_print_ip_addr(iphdr->ip_dst.s_addr), + icmp->ofp_icmp_id, icmp->ofp_icmp_seq); + break; + case OFP_ICMP_UNREACH: + fprintf(f, "IP ICMP: dest unreachable %s -> %s ", + ofp_print_ip_addr(iphdr->ip_src.s_addr), + ofp_print_ip_addr(iphdr->ip_dst.s_addr)); + break; + case OFP_ICMP_ECHO: + fprintf(f, "IP ICMP: echo %s -> %s id=%d seq=%d", + ofp_print_ip_addr(iphdr->ip_src.s_addr), + ofp_print_ip_addr(iphdr->ip_dst.s_addr), + icmp->ofp_icmp_id, icmp->ofp_icmp_seq); + break; + default: + fprintf(f, "IP ICMP %d: code=%d %s -> %s ", + icmp->icmp_type, icmp->icmp_code, + ofp_print_ip_addr(iphdr->ip_src.s_addr), + ofp_print_ip_addr(iphdr->ip_dst.s_addr)); + } + } else { + fprintf(f, "IP PKT len=%d proto=%d %s -> %s ", + odp_be_to_cpu_16(iphdr->ip_len), + iphdr->ip_p, + ofp_print_ip_addr(iphdr->ip_src.s_addr), + ofp_print_ip_addr(iphdr->ip_dst.s_addr)); + } +} + +static int print_gre(FILE *f, char *p, uint16_t *proto) +{ + int len = 4; + struct ofp_gre_h *gre = (struct ofp_gre_h *)p; + + p += 4; + + fprintf(f, "GRE proto=0x%04x ", odp_be_to_cpu_16(gre->ptype)); + *proto = odp_be_to_cpu_16(gre->ptype); + + if ((gre->flags & OFP_GRE_CP) || + (gre->flags & OFP_GRE_RP)) { + len += 4; p += 4; + } + + if (gre->flags & OFP_GRE_KP) { + fprintf(f, "key=0x%02x%02x%02x%02x ", p[0], p[1], p[2], p[3]); + len += 4; p += 4; + } + + if (gre->flags & OFP_GRE_SP) { + fprintf(f, "seq=0x%02x%02x%02x%02x ", p[0], p[1], p[2], p[3]); + len += 4; p += 4; + } + + if (gre->flags & OFP_GRE_RP) + fprintf(f, "routing "); + + return len; +} + +#ifdef PRINT_PACKETS_BINARY +static void print_pkt_binary(odp_packet_t pkt) +{ + uint32_t i; + uint8_t *pnt = odp_packet_data(pkt); + + printf("PACKET:\n"); + for (i = 0; i < odp_packet_len(pkt); i++) { + printf("%02hhx ", pnt[i]); + } + printf("\n"); +} +#endif + +/* for local debug */ +void ofp_print_packet_buffer(const char *comment, uint8_t *p) +{ + static int first = 1; + FILE *f; + struct ofp_ip *ip; + uint16_t proto; + char *g; + +/* + * Filter "noise" + */ +#if 0 + if (p[12] == 0x00 && p[13] == 0x27) + return; + if (p[12] == 0x01 && p[13] == 0x98) + return; +#endif + if (first) { + f = fopen(DEFAULT_DEBUG_TXT_FILE_NAME, "w"); + fclose(f); + first = 0; + } + + f = fopen(DEFAULT_DEBUG_TXT_FILE_NAME, "a"); + + if (!f) + return; + + static struct timeval tv0; + struct timeval tv; + + gettimeofday(&tv, NULL); + if (tv0.tv_sec == 0) + tv0 = tv; + int ms = (tv.tv_sec*1000+tv.tv_usec/1000) - + (tv0.tv_sec*1000+tv0.tv_usec/1000); + + fprintf(f, "\n*************\n"); + fprintf(f, "[%d] %s: %d.%03d\n", odp_cpu_id(), comment, + ms/1000, ms%1000); + fprintf(f, "%s ->%s\n ", ofp_print_mac(p+6), ofp_print_mac(p)); + + if (p[12] == 0x81 && p[13] == 0x00) { + fprintf(f, "VLAN %d ", (p[14]<<8)|p[15]); + p += 4; + } + + if (p[12] == 0x88 && p[13] == 0x47) { + uint8_t *label = p+14; + int i; + + fprintf(f, "MPLS "); + while (1) { + fprintf(f, "[label=%d ttl=%d] ", + label[0]*16*256 + label[1]*16 + label[2]/16, + label[3]); + if (label[2] & 1) + break; + label += 4; + } + + if ((label[4] & 0xf0) == 0x40) { + label[2] = 0x08; /* ipv4 */ + label[3] = 0x00; + } else { + label[2] = 0x86; /* ipv6 */ + label[3] = 0xdd; + } + + label++; + for (i = 0; i < 12; i++) + *label-- = p[11 - i]; + p = label+1; + } + + if (p[12] == 0x08 && p[13] == 0x06) { + print_arp(f, (char *)(p + L2_HEADER_NO_VLAN_SIZE)); + } else if (p[12] == 0x86 && p[13] == 0xdd) { + print_ipv6(f, (char *)(p + L2_HEADER_NO_VLAN_SIZE)); + } else if (p[12] == 0x08 && p[13] == 0x00) { + ip = (struct ofp_ip *)(p + L2_HEADER_NO_VLAN_SIZE); + + if (ip->ip_p == 47) { /* GRE */ + g = ((char *)ip) + (ip->ip_hl << 2); + g += print_gre(f, g, &proto); + if (proto == 0x0800) + print_ipv4(f, g); + else if (proto == 0x86dd) + print_ipv6(f, g); + } else + print_ipv4(f, (char *)(p + L2_HEADER_NO_VLAN_SIZE)); + } else { + fprintf(f, "UNKNOWN ETH PACKET TYPE 0x%02x%02x ", + p[12], p[13]); + } + + fprintf(f, "\n"); + fclose(f); + fflush(stdout); +} + +void ofp_print_packet(const char *comment, odp_packet_t pkt) +{ + uint8_t *p; + uint32_t len; + + p = odp_packet_data(pkt); + len = odp_packet_len(pkt); + (void)len; + + ofp_print_packet_buffer(comment, p); + +#ifdef PRINT_PACKETS_BINARY + print_pkt_binary(pkt); +#endif +} diff --git a/src/ofp_errno.c b/src/ofp_errno.c new file mode 100644 index 00000000..0a28317f --- /dev/null +++ b/src/ofp_errno.c @@ -0,0 +1,140 @@ +/*- + * Copyright (c) 2014 Nokia + * Copyright (c) 2014 ENEA Software AB + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofpi_errno.h" + +static const char *ofp_errmsgs[] = { +"", +"Operation not permitted", /* OFP_EPERM */ +"No such file or directory", /* OFP_ENOENT */ +"No such process", /* OFP_ESRCH */ +"Interrupted system call", /* OFP_EINTR */ +"Input/output error", /* OFP_EIO */ +"Device not configured", /* OFP_ENXIO */ +"Argument list too long", /* OFP_E2BIG */ +"Exec format error", /* OFP_ENOEXEC */ +"Bad file descriptor", /* OFP_EBADF */ +"No child processes", /* OFP_ECHILD */ +"Resource deadlock avoided", /* OFP_EDEADLK */ + +"Cannot allocate memory", /* OFP_ENOMEM */ +"Permission denied", /* OFP_EACCES */ +"Bad address", /* OFP_EFAULT */ + +"Block device required", /* OFP_ENOTBLK */ + +"Device busy", /* OFP_EBUSY */ +"File exists", /* OFP_EEXIST */ +"Cross-device link", /* OFP_EXDEV */ +"Operation not supported by device", /* OFP_ENODEV */ +"Not a directory", /* OFP_ENOTDIR */ +"Is a directory", /* OFP_EISDIR */ +"Invalid argument", /* OFP_EINVAL */ +"Too many open files in system", /* OFP_ENFILE */ +"Too many open files", /* OFP_EMFILE */ +"Inappropriate ioctl for device", /* OFP_ENOTTY */ + +"Text file busy", /* OFP_ETXTBSY */ + +"File too large", /* OFP_EFBIG */ +"No space left on device", /* OFP_ENOSPC */ +"Illegal seek", /* OFP_ESPIPE */ +"Read-only filesystem", /* OFP_EROFS */ +"Too many links", /* OFP_EMLINK */ +"Broken pipe", /* OFP_EPIPE */ + +"Numerical argument out of domain", /* OFP_EDOM */ +"Result too large", /* OFP_ERANGE */ + +"Resource temporarily unavailable", /* OFP_EAGAIN */ + +"Operation now in progress", /* OFP_EINPROGRESS */ +"Operation already in progress", /* OFP_EALREADY */ + +"Socket operation on non-socket", /* OFP_ENOTSOCK */ +"Destination address required", /* OFP_EDESTADDRREQ */ +"Message too long", /* OFP_EMSGSIZE */ +"Protocol wrong type for socket", /* OFP_EPROTOTYPE */ +"Protocol not available", /* OFP_ENOPROTOOPT */ +"Protocol not supported", /* OFP_EPROTONOSUPPORT */ +"Socket type not supported", /* OFP_ESOCKTNOSUPPORT */ +"Operation not supported", /* OFP_ENOTSUP */ +"Protocol family not supported", /* OFP_EPFNOSUPPORT */ +"Address family not supported by protocol family", /* OFP_EAFNOSUPPORT */ +"Address already in use", /* OFP_EADDRINUSE */ +"Can't assign requested address", /* OFP_EADDRNOTAVAIL */ + +"Network is down", /* OFP_ENETDOWN */ +"Network is unreachable", /* OFP_ENETUNREACH */ +"Network dropped connection on reset", /* OFP_ENETRESET */ +"Software caused connection abort", /* OFP_ECONNABORTED */ +"Connection reset by peer", /* OFP_ECONNRESET */ +"No buffer space available", /* OFP_ENOBUFS */ +"Socket is already connected", /* OFP_EISCONN */ +"Socket is not connected", /* OFP_ENOTCONN */ +"Can't send after socket shutdown", /* OFP_ESHUTDOWN */ +"Too many references: can't splice", /* OFP_ETOOMANYREFS */ +"Operation timed out", /* OFP_ETIMEDOUT */ +"Connection refused", /* OFP_ECONNREFUSED */ + +"Too many levels of symbolic links", /* OFP_ELOOP */ + +"File name too long", /* OFP_ENAMETOOLONG */ + +"Host is down", /* OFP_EHOSTDOWN */ +"No route to host", /* OFP_EHOSTUNREACH */ + +"Directory not empty", /* OFP_ENOTEMPTY */ + +"Too many processes", /* OFP_EPROCLIM */ +"Too many users", /* OFP_EUSERS */ +"Disc quota exceeded", /* OFP_EDQUOT */ + +"Stale NFS file handle", /* OFP_ESTALE */ +"Too many levels of remote in path", /* OFP_EREMOTE */ +"RPC struct is bad", /* OFP_EBADRPC */ +"RPC version wrong", /* OFP_ERPCMISMATCH */ +"RPC prog. not avail", /* OFP_EPROGUNAVAIL */ +"Program version wrong", /* OFP_EPROGMISMATCH */ +"Bad procedure for program", /* OFP_EPROCUNAVAIL */ + +"No locks available", /* OFP_ENOLCK */ +"Function not implemented", /* OFP_ENOSYS */ + +"Inappropriate file type or format", /* OFP_EFTYPE */ +"Authentication error", /* OFP_EAUTH */ +"Need authenticator", /* OFP_ENEEDAUTH */ +"Identifier removed", /* OFP_EIDRM */ +"No message of desired type", /* OFP_ENOMSG */ +"Value too large to be stored in data type", /* OFP_EOVERFLOW */ +"Operation canceled", /* OFP_ECANCELED */ +"Illegal byte sequence", /* OFP_EILSEQ */ +"Attribute not found", /* OFP_ENOATTR */ + +"Programming error", /* OFP_EDOOFUS */ + +"Bad message", /* OFP_EBADMSG */ +"Multihop attempted", /* OFP_EMULTIHOP */ +"Link has been severed", /* OFP_ENOLINK */ +"Protocol error", /* OFP_EPROTO */ + +"Capabilities insufficient", /* OFP_ENOTCAPABLE */ +"Not permitted in capability mode", /* OFP_ECAPMODE */ +}; + +int ofp_errno; + +const char *ofp_strerror(int errnum) +{ + if (errnum < 0) + errnum = -errnum; + + if (errnum > OFP_ELAST) + return ""; + + return ofp_errmsgs[errnum]; +} diff --git a/src/ofp_gre.c b/src/ofp_gre.c new file mode 100644 index 00000000..1ccf9646 --- /dev/null +++ b/src/ofp_gre.c @@ -0,0 +1,115 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + */ + +#include +#include "api/ofp_types.h" +#include "api/ofp_pkt_processing.h" +#include "ofpi_in.h" +#include "ofpi_ip.h" +#include "ofpi_gre.h" +#include "ofpi_if_gre.h" +#include "ofpi_if_vlan.h" +#include "ofpi_ethernet.h" +#include "ofpi_portconf.h" +#include "ofpi_log.h" +#include "ofpi_hook.h" +#include "ofpi_util.h" + +int ofp_gre_input(odp_packet_t pkt, int off0) +{ + int res; + struct ofp_ifnet *dev, *dev_in; + struct ofp_ether_header *eth_hdr; + struct ofp_ether_vlan_header *eth_hdr_vlan; + struct ofp_greip *greip; + uint32_t grelen; + uint8_t eth_d_addr[OFP_ETHER_ADDR_LEN]; + uint8_t eth_s_addr[OFP_ETHER_ADDR_LEN]; + uint16_t ptype, offset, eth_hdr_len = OFP_ETHER_HDR_LEN; + + (void)off0; + + dev = odp_packet_user_ptr(pkt); + greip = odp_packet_l3_ptr(pkt, NULL); + + /* Validate tunnel */ + dev_in = ofp_get_ifnet_by_tunnel(greip->gi_dst.s_addr, + greip->gi_src.s_addr, dev->vrf); + if (dev_in == NULL) { + OFP_HOOK(OFP_HOOK_GRE, pkt, NULL, &res); + return res; + } + + /* save eth hdr data */ + if (dev->vlan) { + eth_hdr_vlan = odp_packet_l2_ptr(pkt, NULL); + memcpy(eth_d_addr, eth_hdr_vlan->evl_dhost, + OFP_ETHER_ADDR_LEN); + memcpy(eth_s_addr, eth_hdr_vlan->evl_shost, + OFP_ETHER_ADDR_LEN); + eth_hdr_len += OFP_ETHER_VLAN_ENCAP_LEN; + } else { + eth_hdr = odp_packet_l2_ptr(pkt, NULL); + memcpy(eth_d_addr, eth_hdr->ether_dhost, OFP_ETHER_ADDR_LEN); + memcpy(eth_s_addr, eth_hdr->ether_shost, OFP_ETHER_ADDR_LEN); + } + + /* Process gre header */ + ptype = greip->gi_ptype; + + grelen = 4; + if ((greip->gi_g.flags & OFP_GRE_CP) || + (greip->gi_g.flags & OFP_GRE_RP)) + grelen += 4; + if (greip->gi_g.flags & OFP_GRE_KP) + grelen += 4; + if (greip->gi_g.flags & OFP_GRE_SP) + grelen += 4; + + /* remove outerIP and GRE header */ + offset = odp_packet_l3_offset(pkt) + (greip->gi_i.ip_hl << 2) + grelen - + eth_hdr_len; + if (odp_packet_pull_head(pkt, offset) == NULL) { + OFP_ERR("Packet pull head failed\n"); + return OFP_PKT_DROP; + } + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, eth_hdr_len); + + /* Add eth header */ + if (dev->vlan) { + eth_hdr_vlan = odp_packet_l2_ptr(pkt, NULL); + memcpy(eth_hdr_vlan->evl_dhost, eth_d_addr, + OFP_ETHER_ADDR_LEN); + memcpy(eth_hdr_vlan->evl_dhost, eth_s_addr, + OFP_ETHER_ADDR_LEN); + eth_hdr_vlan->evl_encap_proto = odp_cpu_to_be_16(0x8100); + eth_hdr_vlan->evl_tag = odp_cpu_to_be_16(dev->vlan); + eth_hdr_vlan->evl_proto = ptype; + } else { + eth_hdr = odp_packet_l2_ptr(pkt, NULL); + memcpy(eth_hdr->ether_dhost, eth_d_addr, OFP_ETHER_ADDR_LEN); + memcpy(eth_hdr->ether_shost, eth_s_addr, OFP_ETHER_ADDR_LEN); + eth_hdr->ether_type = ptype; + } + + odp_packet_user_ptr_set(pkt, dev_in); + + switch (odp_be_to_cpu_16(ptype)) { + case OFP_ETHERTYPE_IP: + return ofp_ipv4_processing(pkt); +#ifdef INET6 + case OFP_ETHERTYPE_IPV6: + return ofp_ipv6_processing(pkt); +#endif /* INET6 */ + default: + return OFP_PKT_CONTINUE; + } + + return OFP_PKT_CONTINUE; +} diff --git a/src/ofp_hash.c b/src/ofp_hash.c new file mode 100644 index 00000000..9fefa674 --- /dev/null +++ b/src/ofp_hash.c @@ -0,0 +1,890 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. +------------------------------------------------------------------------------- +*/ + +#include /* defines printf for tests */ +#include /* defines time_t for timings in the test */ +#include /* defines uint32_t etc */ +#include + +#include "ofpi_hash.h" + +#if (defined(ODP_BYTE_ORDER) && defined(ODP_LITTLE_ENDIAN)) && \ + ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(ODP_BYTE_ORDER) && defined(ODP_BIG_ENDIAN)) && \ + ODP_BYTE_ORDER == ODP_BIG_ENDIAN +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a, b, c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c, 16); c += b; \ + b -= a; b ^= rot(a, 19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a, b, c) \ +{ \ + c ^= b; c -= rot(b, 14); \ + a ^= c; a -= rot(c, 11); \ + b ^= a; b -= rot(a, 25); \ + c ^= b; c -= rot(b, 16); \ + a ^= c; a -= rot(c, 4); \ + b ^= a; b -= rot(a, 14); \ + c ^= b; c -= rot(b, 24); \ +} + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function ofp_hashword() is identical to ofp_hashlittle() on little-endian + machines, and identical to ofp_hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. ofp_hashlittle() is more complicated than ofp_hashword() only because + ofp_hashlittle() has to dance around fitting the key bytes into registers. +Parameters: + k - the key, an array of uint32_t values + length - the length of the key, in uint32_ts + initval - the previous hash, or an arbitrary value +-------------------------------------------------------------------- +*/ +uint32_t ofp_hashword(const uint32_t *k, + size_t length, + uint32_t initval) +{ + uint32_t a, b, c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /* handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 3; + k += 3; + } + + /* handle the last 3 uint32_t's */ + switch (length) { + case 3: + c += k[2]; /* fall through */ + case 2: + b += k[1]; /* fall through */ + case 1: + a += k[0]; + final(a, b, c); /* fall through */ + case 0: + break; /* case 0: nothing left to add */ + } + /* report the result */ + return c; +} + + +/* +-------------------------------------------------------------------- +ofp_hashword2() -- same as ofp_hashword(), but take two seeds and return two +32-bit values. pc and pb must both be nonnull, and *pc and *pb must +both be initialized with seeds. If you pass in (*pb)==0, the output +(*pc) will be the same as the return value from ofp_hashword(). +Parameters: + k - the key, an array of uint32_t values + length - the length of the key, in uint32_ts + pc - IN: seed OUT: primary hash value + pb - IN: more seed OUT: secondary hash value +-------------------------------------------------------------------- +*/ +void ofp_hashword2(const uint32_t *k, + size_t length, + uint32_t *pc, + uint32_t *pb) +{ + uint32_t a, b, c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; + c += *pb; + + /* handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 3; + k += 3; + } + + /* handle the last 3 uint32_t's */ + switch (length) { + case 3: + c += k[2]; /* fall through */ + case 2: + b += k[1]; /* fall through */ + case 1: + a += k[0]; + final(a, b, c); /* fall through */ + case 0: + break; /* case 0: nothing left to add */ + } + /* report the result */ + *pc = c; *pb = b; +} + + +/* +------------------------------------------------------------------------------- +ofp_hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 12; + k += 3; + } + +/* handle the last (probably partial) block */ +/* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += k[2]&0xffffff; b += k[1]; a += k[0]; break; + case 10: + c += k[2]&0xffff; b += k[1]; a += k[0]; break; + case 9: + c += k[2]&0xff; b += k[1]; a += k[0]; break; + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += k[1]&0xffffff; a += k[0]; break; + case 6: + b += k[1]&0xffff; a += k[0]; break; + case 5: + b += k[1]&0xff; a += k[0]; break; + case 4: + a += k[0]; break; + case 3: + a += k[0]&0xffffff; break; + case 2: + a += k[0]&0xffff; break; + case 1: + a += k[0]&0xff; break; + case 0: + return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += ((uint32_t)k8[10])<<16; /* fall through */ + case 10: + c += ((uint32_t)k8[9])<<8; /* fall through */ + case 9: + c += k8[8]; /* fall through */ + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += ((uint32_t)k8[6])<<16; /* fall through */ + case 6: + b += ((uint32_t)k8[5])<<8; /* fall through */ + case 5: + b += k8[4]; /* fall through */ + case 4: + a += k[0]; break; + case 3: + a += ((uint32_t)k8[2])<<16; /* fall through */ + case 2: + a += ((uint32_t)k8[1])<<8; /* fall through */ + case 1: + a += k8[0]; break; + case 0: + return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + /* read 16-bit chunks */ + const uint16_t *k = (const uint16_t *)key; + const uint8_t *k8; + + /* all but last block: aligned reads and different mixing */ + while (length > 12) { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a, b, c); + length -= 12; + k += 6; + } + + /* handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch (length) { + case 12: + c += k[4]+(((uint32_t)k[5])<<16); + b += k[2]+(((uint32_t)k[3])<<16); + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 11: + c += ((uint32_t)k8[10])<<16; /* fall through */ + case 10: + c += k[4]; + b += k[2]+(((uint32_t)k[3])<<16); + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 9: + c += k8[8]; /* fall through */ + case 8: + b += k[2]+(((uint32_t)k[3])<<16); + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 7: + b += ((uint32_t)k8[6])<<16; /* fall through */ + case 6: + b += k[2]; + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 5: + b += k8[4]; /* fall through */ + case 4: + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 3: + a += ((uint32_t)k8[2])<<16; /* fall through */ + case 2: + a += k[0]; + break; + case 1: + a += k8[0]; + break; + case 0: + return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /* all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a, b, c); + length -= 12; + k += 12; + } + + /* last block: affect all 32 bits of (c) */ + switch (length) { + case 12: + c += ((uint32_t)k[11])<<24; /* fall through */ + case 11: + c += ((uint32_t)k[10])<<16; /* fall through */ + case 10: + c += ((uint32_t)k[9])<<8; /* fall through */ + case 9: + c += k[8]; /* fall through */ + case 8: + b += ((uint32_t)k[7])<<24; /* fall through */ + case 7: + b += ((uint32_t)k[6])<<16; /* fall through */ + case 6: + b += ((uint32_t)k[5])<<8; /* fall through */ + case 5: + b += k[4]; /* fall through */ + case 4: + a += ((uint32_t)k[3])<<24; /* fall through */ + case 3: + a += ((uint32_t)k[2])<<16; /* fall through */ + case 2: + a += ((uint32_t)k[1])<<8; /* fall through */ + case 1: + a += k[0]; + break; + case 0: + return c; + } + } + + final(a, b, c); + return c; + } + + +/* + * ofp_hashlittle2: return 2 32-bit hash values + * + * This is identical to ofp_hashlittle(), except it returns two 32-bit hash + * values instead of just one. This is good enough for hash table + * lookup with 2^^64 buckets, or if you want a second hash if you're not + * happy with the first, or if you want a probably-unique 64-bit ID for + * the key. *pc is better mixed than *pb, so use *pc first. If you want + * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". + * Parameters: + * key - the key to hash + * length - length of the key + * pc - IN: primary initval, OUT: primary hash + * pb - IN: secondary initval, OUT: secondary hash + */ +void ofp_hashlittle2(const void *key, + size_t length, + uint32_t *pc, + uint32_t *pb) +{ + uint32_t a, b, c; /* internal state */ + /* needed for Mac Powerbook G4 */ + union { const void *ptr; size_t i; } u; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; + c += *pb; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + /* read 32-bit chunks */ + const uint32_t *k = (const uint32_t *)key; +#ifdef VALGRIND + const uint8_t *k8; +#endif + /* all but last block: aligned reads and + affect 32 bits of (a, b, c) */ + while (length > 12) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 12; + k += 3; + } + +/* handle the last (probably partial) block */ +/* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += k[2]&0xffffff; b += k[1]; a += k[0]; break; + case 10: + c += k[2]&0xffff; b += k[1]; a += k[0]; break; + case 9: + c += k[2]&0xff; b += k[1]; a += k[0]; break; + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += k[1]&0xffffff; a += k[0]; break; + case 6: + b += k[1]&0xffff; a += k[0]; break; + case 5: + b += k[1]&0xff; a += k[0]; break; + case 4: + a += k[0]; break; + case 3: + a += k[0]&0xffffff; break; + case 2: + a += k[0]&0xffff; break; + case 1: + a += k[0]&0xff; break; + case 0: + /* zero length strings require no mixing */ + *pc = c; *pb = b; return; + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += ((uint32_t)k8[10])<<16; /* fall through */ + case 10: + c += ((uint32_t)k8[9])<<8; /* fall through */ + case 9: + c += k8[8]; /* fall through */ + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += ((uint32_t)k8[6])<<16; /* fall through */ + case 6: + b += ((uint32_t)k8[5])<<8; /* fall through */ + case 5: + b += k8[4]; /* fall through */ + case 4: + a += k[0]; break; + case 3: + a += ((uint32_t)k8[2])<<16; /* fall through */ + case 2: + a += ((uint32_t)k8[1])<<8; /* fall through */ + case 1: + a += k8[0]; break; + case 0: + /* zero length strings require no mixing */ + *pc = c; *pb = b; return; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + /* read 16-bit chunks */ + const uint16_t *k = (const uint16_t *)key; + const uint8_t *k8; + + /* all but last block: + aligned reads and different mixing */ + while (length > 12) { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a, b, c); + length -= 12; + k += 6; + } + + /* handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch (length) { + case 12: + c += k[4]+(((uint32_t)k[5])<<16); + b += k[2]+(((uint32_t)k[3])<<16); + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 11: + c += ((uint32_t)k8[10])<<16; /* fall through */ + case 10: + c += k[4]; + b += k[2]+(((uint32_t)k[3])<<16); + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 9: + c += k8[8]; /* fall through */ + case 8: + b += k[2]+(((uint32_t)k[3])<<16); + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 7: + b += ((uint32_t)k8[6])<<16; /* fall through */ + case 6: + b += k[2]; + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 5: + b += k8[4]; /* fall through */ + case 4: + a += k[0]+(((uint32_t)k[1])<<16); + break; + case 3: + a += ((uint32_t)k8[2])<<16; /* fall through */ + case 2: + a += k[0]; + break; + case 1: + a += k8[0]; + break; + case 0: + /* zero length strings require no mixing */ + *pc = c; *pb = b; return; + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /* all but the last block: + affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a, b, c); + length -= 12; + k += 12; + } + + /* last block: affect all 32 bits of (c) */ + switch (length) { + case 12: + c += ((uint32_t)k[11])<<24; /* fall through */ + case 11: + c += ((uint32_t)k[10])<<16; /* fall through */ + case 10: + c += ((uint32_t)k[9])<<8; /* fall through */ + case 9: + c += k[8]; /* fall through */ + case 8: + b += ((uint32_t)k[7])<<24; /* fall through */ + case 7: + b += ((uint32_t)k[6])<<16; /* fall through */ + case 6: + b += ((uint32_t)k[5])<<8; /* fall through */ + case 5: + b += k[4]; /* fall through */ + case 4: + a += ((uint32_t)k[3])<<24; /* fall through */ + case 3: + a += ((uint32_t)k[2])<<16; /* fall through */ + case 2: + a += ((uint32_t)k[1])<<8; /* fall through */ + case 1: + a += k[0]; + break; + case 0: + /* zero length strings require no mixing */ + *pc = c; *pb = b; return; + } + } + + final(a, b, c); + *pc = c; *pb = b; +} + + + +/* + * ofp_hashbig(): + * This is the same as ofp_hashword() on big-endian machines. It is different + * from ofp_hashlittle() on all machines. ofp_hashbig() takes advantage of + * big-endian byte ordering. + */ +uint32_t ofp_hashbig(const void *key, size_t length, uint32_t initval) +{ + uint32_t a, b, c; + /* to cast key to (size_t) happily */ + union { const void *ptr; size_t i; } u; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + /* read 32-bit chunks */ + const uint32_t *k = (const uint32_t *)key; +#ifdef VALGRIND + const uint8_t *k8; +#endif + /* all but last block: + aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a, b, c); + length -= 12; + k += 3; + } + +/* handle the last (probably partial) block */ +/* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += k[2]&0xffffff00; b += k[1]; a += k[0]; break; + case 10: + c += k[2]&0xffff0000; b += k[1]; a += k[0]; break; + case 9: + c += k[2]&0xff000000; b += k[1]; a += k[0]; break; + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += k[1]&0xffffff00; a += k[0]; break; + case 6: + b += k[1]&0xffff0000; a += k[0]; break; + case 5: + b += k[1]&0xff000000; a += k[0]; break; + case 4: + a += k[0]; break; + case 3: + + a += k[0]&0xffffff00; break; + case 2: + a += k[0]&0xffff0000; break; + case 1: + a += k[0]&0xff000000; break; + case 0: + return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch (length) { /* all the case statements fall through */ + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += ((uint32_t)k8[10])<<8; /* fall through */ + case 10: + c += ((uint32_t)k8[9])<<16; /* fall through */ + case 9: + c += ((uint32_t)k8[8])<<24; /* fall through */ + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += ((uint32_t)k8[6])<<8; /* fall through */ + case 6: + b += ((uint32_t)k8[5])<<16; /* fall through */ + case 5: + b += ((uint32_t)k8[4])<<24; /* fall through */ + case 4: + a += k[0]; break; + case 3: + a += ((uint32_t)k8[2])<<8; /* fall through */ + case 2: + a += ((uint32_t)k8[1])<<16; /* fall through */ + case 1: + a += ((uint32_t)k8[0])<<24; break; + case 0: + return c; + } + +#endif /* !VALGRIND */ + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /* all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a, b, c); + length -= 12; + k += 12; + } + + /* last block: affect all 32 bits of (c) */ + switch (length) { + case 12: + c += k[11]; /* fall through */ + case 11: + c += ((uint32_t)k[10])<<8; /* fall through */ + case 10: + c += ((uint32_t)k[9])<<16; /* fall through */ + case 9: + c += ((uint32_t)k[8])<<24; /* fall through */ + case 8: + b += k[7]; /* fall through */ + case 7: + b += ((uint32_t)k[6])<<8; /* fall through */ + case 6: + b += ((uint32_t)k[5])<<16; /* fall through */ + case 5: + b += ((uint32_t)k[4])<<24; /* fall through */ + case 4: + a += k[3]; /* fall through */ + case 3: + a += ((uint32_t)k[2])<<8; /* fall through */ + case 2: + a += ((uint32_t)k[1])<<16; /* fall through */ + case 1: + a += ((uint32_t)k[0])<<24; + break; + case 0: + return c; + } + } + + final(a, b, c); + return c; +} diff --git a/src/ofp_hook.c b/src/ofp_hook.c new file mode 100644 index 00000000..2a2c50f7 --- /dev/null +++ b/src/ofp_hook.c @@ -0,0 +1,56 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include + +#include + +#include "ofpi_log.h" +#include "ofpi_hook.h" + +typedef struct { + ofp_pkt_hook pkt_hook[OFP_HOOK_MAX]; +} hook_shm_t; + +static __thread hook_shm_t *shm_hook = NULL; + +inline ofp_pkt_hook *ofp_get_packet_hooks(void) +{ + if (!shm_hook) + return NULL; + + return &(shm_hook->pkt_hook[0]); +} + +void ofp_hook_alloc_shared_memory(ofp_pkt_hook* pkt_hook_init) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_reserve("OfpHookShMem", sizeof(*shm_hook), + ODP_CACHE_LINE_SIZE, 0); + shm_hook = odp_shm_addr(shm_h); + + if (shm_hook == NULL) + OFP_ABORT("Error: Hook shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + + memcpy(&shm_hook->pkt_hook[0], pkt_hook_init, + OFP_HOOK_MAX * sizeof(ofp_pkt_hook)); +} + +void ofp_hook_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpHookShMem"); + shm_hook = odp_shm_addr(shm_h); + + if (shm_hook == NULL) + OFP_ABORT("Error: Hook shared mem lookup failed on core: %u.\n", + odp_cpu_id()); +} diff --git a/src/ofp_icmp.c b/src/ofp_icmp.c new file mode 100644 index 00000000..bd6ac467 --- /dev/null +++ b/src/ofp_icmp.c @@ -0,0 +1,746 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + */ +#include "ofpi.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_protosw.h" +#include "ofpi_socket.h" +#include "ofpi_route.h" +#include "ofpi_portconf.h" +#include "ofpi_log.h" +#include "ofpi_util.h" +/* ODP should have support to get time and date like gettimeofday from Linux*/ +#include +/* +#include +__FBSDID("$FreeBSD: release/9.1.0/sys/netinet/ip_icmp.c 237913 2012-07-01 09:00:29Z tuexen $"); +*/ + +/* + * ICMP routines: error generation, receive packet processing, and + * routines to turnaround packets back to the originator, and + * host table maintenance routines. + */ +/* +static VNET_DEFINE(int, icmplim) = 200; +#define V_icmplim VNET(icmplim) +SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW, + &VNET_NAME(icmplim), 0, + "Maximum number of ICMP responses per second"); + +static VNET_DEFINE(int, icmplim_output) = 1; +#define V_icmplim_output VNET(icmplim_output) +SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW, + &VNET_NAME(icmplim_output), 0, + "Enable rate limiting of ICMP responses"); + +#ifdef INET +VNET_DEFINE(struct icmpstat, icmpstat); +SYSCTL_VNET_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW, + &VNET_NAME(icmpstat), icmpstat, ""); + +static VNET_DEFINE(int, icmpmaskrepl) = 0; +#define V_icmpmaskrepl VNET(icmpmaskrepl) +SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW, + &VNET_NAME(icmpmaskrepl), 0, + "Reply to ICMP Address Mask Request packets."); + +static VNET_DEFINE(u_int, icmpmaskfake) = 0; +#define V_icmpmaskfake VNET(icmpmaskfake) +SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW, + &VNET_NAME(icmpmaskfake), 0, + "Fake reply to ICMP Address Mask Request packets."); + +static VNET_DEFINE(int, drop_redirect) = 0; +#define V_drop_redirect VNET(drop_redirect) +SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, + &VNET_NAME(drop_redirect), 0, + "Ignore ICMP redirects"); + +static VNET_DEFINE(int, log_redirect) = 0; +#define V_log_redirect VNET(log_redirect) +SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, + &VNET_NAME(log_redirect), 0, + "Log ICMP redirects to the console"); + +static VNET_DEFINE(char, reply_src[IFNAMSIZ]); +#define V_reply_src VNET(reply_src) +SYSCTL_VNET_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW, + &VNET_NAME(reply_src), IFNAMSIZ, + "icmp reply source for non-local packets."); + +static VNET_DEFINE(int, icmp_rfi) = 0; +#define V_icmp_rfi VNET(icmp_rfi) +SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW, + &VNET_NAME(icmp_rfi), 0, + "ICMP reply from incoming interface for non-local packets"); + +static VNET_DEFINE(int, icmp_quotelen) = 8; +#define V_icmp_quotelen VNET(icmp_quotelen) +SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_RW, + &VNET_NAME(icmp_quotelen), 0, + "Number of bytes from original packet to quote in ICMP reply"); +*/ +/* + * ICMP broadcast echo sysctl + */ +/* +static VNET_DEFINE(int, icmpbmcastecho) = 0; +#define V_icmpbmcastecho VNET(icmpbmcastecho) +SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, + &VNET_NAME(icmpbmcastecho), 0, + ""); +*/ + +#ifdef ICMPPRINTFS +int icmpprintfs = 0; +#endif + +static int icmp_reflect(odp_packet_t pkt); +static void icmp_send(odp_packet_t pkt, struct ofp_nh_entry *nh); + +extern struct protosw inetsw[]; + +/* + * Return milliseconds since 00:00 GMT in network format. + */ +static uint32_t +iptime(void) +{ + struct timeval tv; + uint32_t t; + gettimeofday(&tv, NULL); + + t = (tv.tv_sec % (24*60*60)) * 1000 + tv.tv_usec / 1000; + return (odp_cpu_to_be_32(t)); +} + + + +/* + * Kernel module interface for updating icmpstat. The argument is an index + * into icmpstat treated as an array of u_long. While this encodes the + * general layout of icmpstat into the caller, it doesn't encode its + * location, so that future changes to add, for example, per-CPU stats + * support won't cause binary compatibility problems for kernel modules. + */ +/* +void +kmod_icmpstat_inc(int statnum) +{ + + (*((u_long *)&V_icmpstat + statnum))++; +} +*/ + +/* + * Generate an error packet of type error + * in response to bad packet ip. + */ +int +ofp_icmp_error(odp_packet_t pkt_in, int type, int code, uint32_t dest, int mtu) +{ + register struct ofp_ip *ip_in = (struct ofp_ip *)odp_packet_l3_ptr(pkt_in, NULL); + register unsigned ip_hlen = ip_in->ip_hl << 2; + /* ip header + icmp type+code+checksum(4B) + ip addr(4B) + ip header + 8B of original data */ + const uint16_t icmp_len = (ip_hlen * 2) + 16; + ip_in->ip_sum = 0; + ip_in->ip_sum = ofp_in_cksum((uint16_t *)ip_in, ip_in->ip_hl<<2); + + if((u_int16_t)type > OFP_ICMP_MAXTYPE) + OFP_ABORT("%s: illegal ICMP type", __func__); + +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("icmp_error(%p, %x, %d)\n", oip, type, code); +#endif +/* if (type != ICMP_REDIRECT) + ICMPSTAT_INC(icps_error);*/ + + /* + * Don't send error: + * if the original packet was encrypted. + * if not the first fragment of message. + * in response to a multicast or broadcast packet. + * if the old packet protocol was an ICMP error message. + */ + + if ((odp_be_to_cpu_16(ip_in->ip_off) & OFP_IP_OFFMASK)) + goto freeit; +/* if (n->m_flags & (M_BCAST|M_MCAST)) + goto freeit;*/ + if (ip_in->ip_p == OFP_IPPROTO_ICMP && type != OFP_ICMP_REDIRECT && + odp_packet_len(pkt_in) >= ip_hlen + OFP_ICMP_MINLEN && + !OFP_ICMP_INFOTYPE(((struct ofp_icmp *) + ((uintptr_t)ip_in + ip_hlen))->icmp_type)) { + /*ICMPSTAT_INC(icps_oldicmp);*/ + goto freeit; + } + /* + * Calculate length to quote from original packet and + * prevent the ICMP mbuf from overflowing. + * Unfortunatly this is non-trivial since ip_forward() + * sends us truncated packets. + */ +/* if (oip->ip_p == IPPROTO_TCP) { + struct tcphdr *th; + int tcphlen; + + if (oiphlen + sizeof(struct tcphdr) > n->m_len && + n->m_next == NULL) + goto stdreply; + if (n->m_len < oiphlen + sizeof(struct tcphdr) && + ((n = m_pullup(n, oiphlen + sizeof(struct tcphdr))) == NULL)) + goto freeit; + th = (struct tcphdr *)((caddr_t)oip + oiphlen); + tcphlen = th->th_off << 2; + if (tcphlen < sizeof(struct tcphdr)) + goto freeit; + if (oip->ip_len < oiphlen + tcphlen) + goto freeit; + if (oiphlen + tcphlen > n->m_len && n->m_next == NULL) + goto stdreply; + if (n->m_len < oiphlen + tcphlen && + ((n = m_pullup(n, oiphlen + tcphlen)) == NULL)) + goto freeit; + icmpelen = max(tcphlen, min(V_icmp_quotelen, oip->ip_len - oiphlen)); + } else +stdreply: icmpelen = max(8, min(V_icmp_quotelen, ip_in->ip_len - ip_hlen)); +#ifdef MAC + mac_netinet_icmp_reply(n, m); +#endif +*/ + odp_packet_t pkt = odp_packet_alloc(odp_packet_pool(pkt_in), + icmp_len + odp_packet_l3_offset(pkt_in) - + odp_packet_l2_offset(pkt_in)); + if (pkt == ODP_PACKET_INVALID) + goto freeit; + /*TODO Sometimes above odp_packet_alloc will invalidate the pkt_in*/ + if (odp_packet_l3_ptr(pkt_in, NULL) == NULL) { + odp_packet_free(pkt); + goto freeit; + } + + odp_packet_l2_offset_set(pkt, odp_packet_l2_offset(pkt_in)); + odp_packet_l3_offset_set(pkt, odp_packet_l3_offset(pkt_in)); + + memcpy(odp_packet_l3_ptr(pkt, NULL), + odp_packet_l3_ptr(pkt_in, NULL), + icmp_len); + + struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + struct ofp_icmp *icp = (struct ofp_icmp *)((uint8_t *)ip + ip_hlen); + /* + * Copy the quotation into ICMP message and + * convert quoted IP header back to network representation. + */ + memcpy(&icp->ofp_icmp_ip, ip_in, ip_hlen); + memcpy((void *)((uintptr_t)(&icp->ofp_icmp_ip) + ip_hlen), + (void *)((uintptr_t)ip_in + ip_hlen), + (8 > (ip_in->ip_len - ip_hlen)) ? (ip_in->ip_len - ip_hlen) :8); + + icp->icmp_type = type; + + if (type == OFP_ICMP_REDIRECT) + icp->ofp_icmp_gwaddr.s_addr = dest; + else { + icp->ofp_icmp_void = 0; + /* + * The following assignments assume an overlay with the + * just zeroed icmp_void field. + */ + if (type == OFP_ICMP_PARAMPROB) { + icp->ofp_icmp_pptr = code; + code = 0; + } else if (type == OFP_ICMP_UNREACH && + code == OFP_ICMP_UNREACH_NEEDFRAG && mtu) { + icp->ofp_icmp_nextmtu = odp_cpu_to_be_16(mtu); + } + } + icp->icmp_code = code; + + ip->ip_len = odp_cpu_to_be_16(icmp_len); + ip->ip_v = 4; + ip->ip_hl = 5; + ip->ip_p = OFP_IPPROTO_ICMP; + ip->ip_tos = 0; + + odp_packet_user_ptr_set(pkt, odp_packet_user_ptr(pkt_in)); + + return icmp_reflect(pkt); +freeit: + return OFP_PKT_DROP; +} + +/* + * Process a received ICMP message. + */ +int +ofp_icmp_input(odp_packet_t pkt, int off) +{ + struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + struct ofp_icmp *icp = (struct ofp_icmp *)((uint8_t *)ip + off); + struct ofp_sockaddr_in icmpsrc, icmpdst, icmpgw; + int icmplen = odp_be_to_cpu_16(ip->ip_len); + int code; +#ifdef PROMISCUOUS_INET + /* XXX ICMP plumbing is currently incomplete for promiscuous mode interfaces not in fib 0 */ + if ((m->m_pkthdr.rcvif->if_flags & IFF_PROMISCINET) && + (M_GETFIB(m) > 0)) + goto freeit; +#endif + + /* + * Locate icmp structure in mbuf, and check + * that not corrupted and of at least minimum length. + */ +#ifdef ICMPPRINTFS + if (icmpprintfs) { + char buf[4 * sizeof "123"]; + strcpy(buf, inet_ntoa(ip->ip_src)); + printf("icmp_input from %s to %s, len %d\n", + buf, inet_ntoa(ip->ip_dst), icmplen); + } +#endif + + if (icmplen < OFP_ICMP_MINLEN) { +/* ICMPSTAT_INC(icps_tooshort);*/ + goto freeit; + } + + if (ofp_cksum(pkt, odp_packet_l3_offset(pkt) + off, + icmplen - (ip->ip_hl << 2))) { + /*ICMPSTAT_INC(icps_checksum);*/ + goto freeit; + } + +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("icmp_input, type %d code %d\n", icp->icmp_type, + icp->icmp_code); +#endif + /* + * Message type specific processing. + */ + if (icp->icmp_type > OFP_ICMP_MAXTYPE) + goto raw; + + /* Initialize */ + bzero(&icmpsrc, sizeof(icmpsrc)); + icmpsrc.sin_len = sizeof(struct ofp_sockaddr_in); + icmpsrc.sin_family = OFP_AF_INET; + bzero(&icmpdst, sizeof(icmpdst)); + icmpdst.sin_len = sizeof(struct ofp_sockaddr_in); + icmpdst.sin_family = OFP_AF_INET; + bzero(&icmpgw, sizeof(icmpgw)); + icmpgw.sin_len = sizeof(struct ofp_sockaddr_in); + icmpgw.sin_family = OFP_AF_INET; + +/*TODO ICMP stats + ICMPSTAT_INC(icps_inhist[icp->icmp_type]);*/ + code = icp->icmp_code; + switch (icp->icmp_type) { + + case OFP_ICMP_UNREACH: + switch (code) { + case OFP_ICMP_UNREACH_NET: + case OFP_ICMP_UNREACH_HOST: + case OFP_ICMP_UNREACH_SRCFAIL: + case OFP_ICMP_UNREACH_NET_UNKNOWN: + case OFP_ICMP_UNREACH_HOST_UNKNOWN: + case OFP_ICMP_UNREACH_ISOLATED: + case OFP_ICMP_UNREACH_TOSNET: + case OFP_ICMP_UNREACH_TOSHOST: + case OFP_ICMP_UNREACH_HOST_PRECEDENCE: + case OFP_ICMP_UNREACH_PRECEDENCE_CUTOFF: + code = OFP_PRC_UNREACH_NET; + break; + + case OFP_ICMP_UNREACH_NEEDFRAG: + code = OFP_PRC_MSGSIZE; + break; + + /* + * RFC 1122, Sections 3.2.2.1 and 4.2.3.9. + * Treat subcodes 2,3 as immediate RST + */ + case OFP_ICMP_UNREACH_PROTOCOL: + case OFP_ICMP_UNREACH_PORT: + code = OFP_PRC_UNREACH_PORT; + break; + + case OFP_ICMP_UNREACH_NET_PROHIB: + case OFP_ICMP_UNREACH_HOST_PROHIB: + case OFP_ICMP_UNREACH_FILTER_PROHIB: + code = OFP_PRC_UNREACH_ADMIN_PROHIB; + break; + + default: + goto badcode; + } + goto deliver; + + case OFP_ICMP_TIMXCEED: + if (code > 1) + goto badcode; + code += OFP_PRC_TIMXCEED_INTRANS; + goto deliver; + + case OFP_ICMP_PARAMPROB: + if (code > 1) + goto badcode; + code = OFP_PRC_PARAMPROB; + goto deliver; + + case OFP_ICMP_SOURCEQUENCH: + if (code) + goto badcode; + code = OFP_PRC_QUENCH; + deliver: + /* + * Problem with datagram; advise higher level routines. + */ + if (((unsigned int)icmplen) < OFP_ICMP_ADVLENMIN || icmplen < OFP_ICMP_ADVLEN(icp) || + icp->ofp_icmp_ip.ip_hl < (sizeof(struct ofp_ip) >> 2)) { +/* ICMPSTAT_INC(icps_badlen);*/ + goto freeit; + } + icp->ofp_icmp_ip.ip_len = odp_be_to_cpu_16(icp->ofp_icmp_ip.ip_len); + /* Discard ICMP's in response to multicast packets */ +/* if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) + goto badcode; +*/ +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); +#endif + icmpsrc.sin_addr = icp->ofp_icmp_ip.ip_dst; + /* + * XXX if the packet contains [IPv4 AH TCP], we can't make a + * notification to TCP layer. + */ +/*TODO notify other protocols of the ICMP msg receive + ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; + if (ctlfunc) + (*ctlfunc)(code, (struct sockaddr *)&icmpsrc, + (void *)&icp->icmp_ip); +*/ + break; + + badcode: +/* ICMPSTAT_INC(icps_badcode); */ + break; + + case OFP_ICMP_ECHO: +/* if (!V_icmpbmcastecho + && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { + ICMPSTAT_INC(icps_bmcastecho); + break; + } +*/ + icp->icmp_type = OFP_ICMP_ECHOREPLY; + + goto reflect; + + case OFP_ICMP_TSTAMP: +/* + if (!V_icmpbmcastecho + && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { + ICMPSTAT_INC(icps_bmcasttstamp); + break; + } +*/ + if ((unsigned int)icmplen < OFP_ICMP_TSLEN) { +/* ICMPSTAT_INC(icps_badlen);*/ + break; + } + icp->icmp_type = OFP_ICMP_TSTAMPREPLY; + icp->ofp_icmp_rtime = iptime(); + icp->ofp_icmp_ttime = icp->ofp_icmp_rtime; /* bogus, do later! */ + + goto reflect; + + case OFP_ICMP_MASKREQ: +/*TODO if (V_icmpmaskrepl == 0)*/ + break; +reflect: +/* + ICMPSTAT_INC(icps_reflect); + ICMPSTAT_INC(icps_outhist[icp->icmp_type]); +*/ + return icmp_reflect(pkt); + + case OFP_ICMP_REDIRECT: + /*if (V_log_redirect)*/ { + u_long src, dst, gw; + + src = odp_be_to_cpu_32(ip->ip_src.s_addr); + dst = odp_be_to_cpu_32(icp->ofp_icmp_ip.ip_dst.s_addr); + gw = odp_be_to_cpu_32(icp->ofp_icmp_gwaddr.s_addr); + OFP_DBG("icmp redirect from %d.%d.%d.%d: " + "%d.%d.%d.%d => %d.%d.%d.%d\n", + (int)(src >> 24), (int)((src >> 16) & 0xff), + (int)((src >> 8) & 0xff), (int)(src & 0xff), + (int)(dst >> 24), (int)((dst >> 16) & 0xff), + (int)((dst >> 8) & 0xff), (int)(dst & 0xff), + (int)(gw >> 24), (int)((gw >> 16) & 0xff), + (int)((gw >> 8) & 0xff), (int)(gw & 0xff)); + } + /* + * RFC1812 says we must ignore ICMP redirects if we + * are acting as router. + */ +/*TODO if (V_drop_redirect || V_ipforwarding) */ + break; + /* + * Short circuit routing redirects to force + * immediate change in the kernel's routing + * tables. The message is also handed to anyone + * listening on a raw socket (e.g. the routing + * daemon for use in updating its tables). + */ + + /* + * No kernel processing for the following; + * just fall through to send to raw listener. + */ + case OFP_ICMP_ECHOREPLY: + case OFP_ICMP_ROUTERADVERT: + case OFP_ICMP_ROUTERSOLICIT: + case OFP_ICMP_TSTAMPREPLY: + case OFP_ICMP_IREQREPLY: + case OFP_ICMP_MASKREPLY: + default: + break; + } + +raw: +/*TODO pas to ip raw listener. What processing is done in raw listener? + rip_input(m, off); + return; +*/ + return OFP_PKT_DROP; + +freeit: + return OFP_PKT_DROP; +} + +/* + * Reflect the ip packet back to the source + */ +static int +icmp_reflect(odp_packet_t pkt) +{ + struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + struct ofp_in_addr t; + struct ofp_nh_entry *nh = NULL; + struct ofp_ifnet *dev_out, *ifp = odp_packet_user_ptr(pkt); + +/* if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || + IN_EXPERIMENTAL(ntohl(ip->ip_src.s_addr)) || + IN_ZERONET(ntohl(ip->ip_src.s_addr)) ) { + MPSTAT_INC(icps_badaddr); + goto done; +* Ip_output() will check for broadcast + } +*/ + if (ifp == NULL) + goto drop; + + t = ip->ip_dst; + ip->ip_dst = ip->ip_src; + + /* + * Source selection for ICMP replies: + * + * If the incoming packet was addressed directly to one of our + * own addresses, use dst as the src for the reply. + */ + if ((dev_out = ofp_get_ifnet_match(t.s_addr, ifp->vrf, ifp->vlan))) { + t.s_addr = dev_out->ip_addr; + goto match; + } + + /* + * If the incoming packet was addressed to one of our broadcast + * addresses, use the first non-broadcast address which corresponds + * to the incoming interface. + */ +/* ifp = m->m_pkthdr.rcvif; + if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + ia = ifatoia(ifa); + if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == + t.s_addr) { + t = IA_SIN(ia)->sin_addr; + IF_ADDR_RUNLOCK(ifp); + goto match; + } + } + IF_ADDR_RUNLOCK(ifp); + } +*/ + /* + * If the packet was transiting through us, use the address of + * the interface the packet came through in. If that interface + * doesn't have a suitable IP address, the normal selection + * criteria apply. + */ + t.s_addr = 0; + if (1 /*V_icmp_rfi*/) + t.s_addr = ifp->ip_addr; + /* + * If the packet was transiting through us, use the address of + * the interface that is the closest to the packet source. + * When we don't have a route back to the packet source, stop here + * and drop the packet. + */ + uint32_t flags; + nh = ofp_get_next_hop(ifp->vrf, ip->ip_dst.s_addr, &flags); + if (nh == NULL) { +/* ICMPSTAT_INC(icps_noroute);*/ + if (t.s_addr) + goto match; + else + goto drop; + + } + dev_out = ofp_get_ifnet(nh->port, nh->vlan); + t.s_addr = dev_out->ip_addr; +match: +#ifdef MAC + mac_netinet_icmp_replyinplace(m); +#endif + ip->ip_src = t; + ip->ip_ttl = 64; /*default ttl, from RFC 1340*/ + +/*TODO IP header optlen handling + if (optlen > 0) { + register u_char *cp; + int opt, cnt; + u_int len; + * Retrieve any source routing from the incoming packet; + * add on any record-route or timestamp options. + cp = (u_char *) (ip + 1); + if ((opts = ip_srcroute(m)) == 0 && + (opts = m_gethdr(M_DONTWAIT, MT_DATA))) { + opts->m_len = sizeof(struct in_addr); + mtod(opts, struct in_addr *)->s_addr = 0; + } + if (opts) { +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("icmp_reflect optlen %d rt %d => ", + optlen, opts->m_len); +#endif + for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { + opt = cp[IPOPT_OPTVAL]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + len = 1; + else { + if (cnt < IPOPT_OLEN + sizeof(*cp)) + break; + len = cp[IPOPT_OLEN]; + if (len < IPOPT_OLEN + sizeof(*cp) || + len > cnt) + break; + } + * Should check for overflow, but it "can't happen" + if (opt == IPOPT_RR || opt == IPOPT_TS || + opt == IPOPT_SECURITY) { + bcopy((caddr_t)cp, + mtod(opts, caddr_t) + opts->m_len, len); + opts->m_len += len; + } + } + * Terminate & pad, if necessary + cnt = opts->m_len % 4; + if (cnt) { + for (; cnt < 4; cnt++) { + *(mtod(opts, caddr_t) + opts->m_len) = + IPOPT_EOL; + opts->m_len++; + } + } +#ifdef ICMPPRINTFS + if (icmpprintfs) + printf("%d\n", opts->m_len); +#endif + } + * Now strip out original options by copying rest of first + * mbuf's data back, and adjust the IP length. + ip->ip_len -= optlen; + ip->ip_v = IPVERSION; + ip->ip_hl = 5; + m->m_len -= optlen; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len -= optlen; + optlen += sizeof(struct ip); + bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1), + (unsigned)(m->m_len - sizeof(struct ip))); + } +*/ + icmp_send(pkt, nh/*, opts*/); + return OFP_PKT_PROCESSED; +drop: + return OFP_PKT_DROP; +} + +/* + * Send an icmp packet back to the ip level, + * after supplying a checksum. + */ +static void +icmp_send(odp_packet_t pkt, struct ofp_nh_entry *nh) +{ + register struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + register uint16_t hlen = ip->ip_hl << 2; + register struct ofp_icmp *icp = (struct ofp_icmp *)((uint8_t *)ip + hlen); + + icp->icmp_cksum = 0; + icp->icmp_cksum = ofp_cksum(pkt, odp_packet_l3_offset(pkt) + hlen, + odp_be_to_cpu_16(ip->ip_len) - hlen); + +#ifdef ICMPPRINTFS + if (icmpprintfs) { + char buf[4 * sizeof "123"]; + strcpy(buf, inet_ntoa(ip->ip_dst)); + printf("icmp_send dst %s src %s\n", + buf, inet_ntoa(ip->ip_src)); + } +#endif + (void) ofp_ip_output(pkt, nh); +} + diff --git a/src/ofp_icmp6.c b/src/ofp_icmp6.c new file mode 100644 index 00000000..20a5b014 --- /dev/null +++ b/src/ofp_icmp6.c @@ -0,0 +1,2806 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + */ + +#include "ofpi.h" +#include "ofpi_ip6.h" +#include "ofpi_icmp6.h" +#include "ofpi_log.h" +#include "ofpi_util.h" +#include "ofpi_protosw.h" +#include "ofpi_route.h" +#include "ofpi_ip6_var.h" +#include "ofpi_pkt_processing.h" + +#if 0 +#include +__FBSDID("$FreeBSD: release/9.1.0/sys/netinet6/icmp6.c 238242 2012-07-08 12:34:12Z bz $"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef IPSEC +#include +#include +#endif +#endif /* 0*/ + +#if 0 +extern struct domain inet6domain; + +VNET_DEFINE(struct icmp6stat, icmp6stat); + +VNET_DECLARE(struct inpcbinfo, ripcbinfo); +VNET_DECLARE(struct inpcbhead, ripcb); +VNET_DECLARE(int, icmp6errppslim); +static VNET_DEFINE(int, icmp6errpps_count) = 0; +static VNET_DEFINE(struct timeval, icmp6errppslim_last); +VNET_DECLARE(int, icmp6_nodeinfo); + +#define V_ripcbinfo VNET(ripcbinfo) +#define V_ripcb VNET(ripcb) +#define V_icmp6errppslim VNET(icmp6errppslim) +#define V_icmp6errpps_count VNET(icmp6errpps_count) +#define V_icmp6errppslim_last VNET(icmp6errppslim_last) +#define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) + +static void icmp6_errcount(struct icmp6errstat *, int, int); +static int icmp6_rip6_input(struct mbuf **, int); +static int icmp6_ratelimit(const struct in6_addr *, const int, const int); +static const char *icmp6_redirect_diag __P((struct in6_addr *, + struct in6_addr *, struct in6_addr *)); +static struct mbuf *ni6_input(struct mbuf *, int); +static struct mbuf *ni6_nametodns(const char *, int, int); +static int ni6_dnsmatch(const char *, int, const char *, int); +static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *, + struct ifnet **, struct in6_addr *)); +static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, + struct ifnet *, int)); +static int icmp6_notify_error(struct mbuf **, int, int, int); +#endif /* 0*/ + + +#if 0 +/* + * Kernel module interface for updating icmp6stat. The argument is an index + * into icmp6stat treated as an array of u_quad_t. While this encodes the + * general layout of icmp6stat into the caller, it doesn't encode its + * location, so that future changes to add, for example, per-CPU stats + * support won't cause binary compatibility problems for kernel modules. + */ +void +kmod_icmp6stat_inc(int statnum) +{ + + (*((u_quad_t *)&V_icmp6stat + statnum))++; +} + +static void +icmp6_errcount(struct icmp6errstat *stat, int type, int code) +{ + switch (type) { + case ICMP6_DST_UNREACH: + switch (code) { + case ICMP6_DST_UNREACH_NOROUTE: + stat->icp6errs_dst_unreach_noroute++; + return; + case ICMP6_DST_UNREACH_ADMIN: + stat->icp6errs_dst_unreach_admin++; + return; + case ICMP6_DST_UNREACH_BEYONDSCOPE: + stat->icp6errs_dst_unreach_beyondscope++; + return; + case ICMP6_DST_UNREACH_ADDR: + stat->icp6errs_dst_unreach_addr++; + return; + case ICMP6_DST_UNREACH_NOPORT: + stat->icp6errs_dst_unreach_noport++; + return; + } + break; + case ICMP6_PACKET_TOO_BIG: + stat->icp6errs_packet_too_big++; + return; + case ICMP6_TIME_EXCEEDED: + switch (code) { + case ICMP6_TIME_EXCEED_TRANSIT: + stat->icp6errs_time_exceed_transit++; + return; + case ICMP6_TIME_EXCEED_REASSEMBLY: + stat->icp6errs_time_exceed_reassembly++; + return; + } + break; + case ICMP6_PARAM_PROB: + switch (code) { + case ICMP6_PARAMPROB_HEADER: + stat->icp6errs_paramprob_header++; + return; + case ICMP6_PARAMPROB_NEXTHEADER: + stat->icp6errs_paramprob_nextheader++; + return; + case ICMP6_PARAMPROB_OPTION: + stat->icp6errs_paramprob_option++; + return; + } + break; + case ND_REDIRECT: + stat->icp6errs_redirect++; + return; + } + stat->icp6errs_unknown++; +} + +/* + * A wrapper function for icmp6_error() necessary when the erroneous packet + * may not contain enough scope zone information. + */ +void +icmp6_error2(struct mbuf *m, int type, int code, int param, + struct ifnet *ifp) +{ + struct ip6_hdr *ip6; + + if (ifp == NULL) + return; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); +#else + if (m->m_len < sizeof(struct ip6_hdr)) { + m = m_pullup(m, sizeof(struct ip6_hdr)); + if (m == NULL) + return; + } +#endif + + ip6 = mtod(m, struct ip6_hdr *); + + if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) + return; + if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) + return; + + icmp6_error(m, type, code, param); +} + +/* + * Generate an error packet of type error in response to bad IP6 packet. + */ +void +icmp6_error(struct mbuf *m, int type, int code, int param) +{ + struct ip6_hdr *oip6, *nip6; + struct icmp6_hdr *icmp6; + u_int preplen; + int off; + int nxt; + + ICMP6STAT_INC(icp6s_error); + + /* count per-type-code statistics */ + icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code); + +#ifdef M_DECRYPTED /*not openbsd*/ + if (m->m_flags & M_DECRYPTED) { + ICMP6STAT_INC(icp6s_canterror); + goto freeit; + } +#endif + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); +#else + if (m->m_len < sizeof(struct ip6_hdr)) { + m = m_pullup(m, sizeof(struct ip6_hdr)); + if (m == NULL) + return; + } +#endif + oip6 = mtod(m, struct ip6_hdr *); + + /* + * If the destination address of the erroneous packet is a multicast + * address, or the packet was sent using link-layer multicast, + * we should basically suppress sending an error (RFC 2463, Section + * 2.4). + * We have two exceptions (the item e.2 in that section): + * - the Packet Too Big message can be sent for path MTU discovery. + * - the Parameter Problem Message that can be allowed an icmp6 error + * in the option type field. This check has been done in + * ip6_unknown_opt(), so we can just check the type and code. + */ + if ((m->m_flags & (M_BCAST|M_MCAST) || + IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && + (type != ICMP6_PACKET_TOO_BIG && + (type != ICMP6_PARAM_PROB || + code != ICMP6_PARAMPROB_OPTION))) + goto freeit; + + /* + * RFC 2463, 2.4 (e.5): source address check. + * XXX: the case of anycast source? + */ + if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || + IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) + goto freeit; + + /* + * If we are about to send ICMPv6 against ICMPv6 error/redirect, + * don't do it. + */ + nxt = -1; + off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); + if (off >= 0 && nxt == IPPROTO_ICMPV6) { + struct icmp6_hdr *icp; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); + icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, + sizeof(*icp)); + if (icp == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + if (icp->icmp6_type < ICMP6_ECHO_REQUEST || + icp->icmp6_type == ND_REDIRECT) { + /* + * ICMPv6 error + * Special case: for redirect (which is + * informational) we must not send icmp6 error. + */ + ICMP6STAT_INC(icp6s_canterror); + goto freeit; + } else { + /* ICMPv6 informational - send the error */ + } + } else { + /* non-ICMPv6 - send the error */ + } + + oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ + + /* Finally, do rate limitation check. */ + if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { + ICMP6STAT_INC(icp6s_toofreq); + goto freeit; + } + + /* + * OK, ICMP6 can be generated. + */ + + if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) + m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); + + preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); + M_PREPEND(m, preplen, M_DONTWAIT); /* FIB is also copied over. */ + if (m && m->m_len < preplen) + m = m_pullup(m, preplen); + if (m == NULL) { + nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); + return; + } + + nip6 = mtod(m, struct ip6_hdr *); + nip6->ip6_src = oip6->ip6_src; + nip6->ip6_dst = oip6->ip6_dst; + + in6_clearscope(&oip6->ip6_src); + in6_clearscope(&oip6->ip6_dst); + + icmp6 = (struct icmp6_hdr *)(nip6 + 1); + icmp6->icmp6_type = type; + icmp6->icmp6_code = code; + icmp6->icmp6_pptr = htonl((u_int32_t)param); + + /* + * icmp6_reflect() is designed to be in the input path. + * icmp6_error() can be called from both input and output path, + * and if we are in output path rcvif could contain bogus value. + * clear m->m_pkthdr.rcvif for safety, we should have enough scope + * information in ip header (nip6). + */ + m->m_pkthdr.rcvif = NULL; + + ICMP6STAT_INC(icp6s_outhist[type]); + icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ + + return; + + freeit: + /* + * If we can't tell whether or not we can generate ICMP6, free it. + */ + m_freem(m); +} +#endif + +/* + * Process a received ICMP6 message. + */ +int +ofp_icmp6_input(odp_packet_t m, int *offp, int *nxt) +{ + /*struct ofp_ether_header *eth;*/ + struct ofp_ip6_hdr *ip6; + int ip6len; + struct ofp_icmp6_hdr *icmp6; + uint32_t icmp6len; + int off = *offp; + int code, sum; + /*struct ofp_ifnet *ifp;*/ + + *nxt = OFP_IPPROTO_DONE; + /*ifp = odp_packet_user_ptr(m); + eth = (struct ofp_ether_header *) odp_packet_l2_ptr(m, NULL);*/ + + OFP_IP6_EXTHDR_CHECK(m, off, sizeof(struct ofp_icmp6_hdr), + OFP_PKT_DROP); + + /* + * Locate icmp6 structure in packet, and check + * that not corrupted and of at least minimum length + */ + + ip6 = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(m, NULL); + ip6len = sizeof(struct ofp_ip6_hdr) + odp_be_to_cpu_16(ip6->ofp_ip6_plen); + (void)ip6len; + +#if 0 + /* + * Check multicast group membership. + * Note: SSM filters are not applied for ICMPv6 traffic. + */ + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + struct in6_multi *inm; + + inm = in6m_lookup(ifp, &ip6->ip6_dst); + if (inm == NULL) { + IP6STAT_INC(ip6s_notmember); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + goto freeit; + } + } +#endif + /* + * calculate the checksum + */ + icmp6 = (struct ofp_icmp6_hdr *)((uint8_t *)ip6 + *offp); + icmp6len = odp_packet_len(m) - odp_packet_l3_offset(m) - off; + if (icmp6len < sizeof(struct ofp_icmp6_hdr)) { + /*ICMP6STAT_INC(icp6s_tooshort);*/ + goto freeit; + } + + code = icmp6->icmp6_code; + + if ((sum = ofp_in6_cksum(m, OFP_IPPROTO_ICMPV6, off, icmp6len)) != 0) { + OFP_ERR("ICMP6 checksum error(%d|%x) %s\n", + icmp6->icmp6_type, sum, + ofp_print_ip6_addr(&ip6->ip6_src.ofp_s6_addr[0])); + /*ICMP6STAT_INC(icp6s_checksum);*/ + goto freeit; + } +#if 0 + if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { + /* + * Deliver very specific ICMP6 type only. + * This is important to deliver TOOBIG. Otherwise PMTUD + * will not work. + */ + switch (icmp6->icmp6_type) { + case ICMP6_DST_UNREACH: + case ICMP6_PACKET_TOO_BIG: + case ICMP6_TIME_EXCEEDED: + break; + default: + goto freeit; + } + } + + ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); + icmp6_ifstat_inc(ifp, ifs6_in_msg); + if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) + icmp6_ifstat_inc(ifp, ifs6_in_error); +#endif + + switch (icmp6->icmp6_type) { +#if 0 + case ICMP6_DST_UNREACH: + icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); + switch (code) { + case ICMP6_DST_UNREACH_NOROUTE: + code = PRC_UNREACH_NET; + break; + case ICMP6_DST_UNREACH_ADMIN: + icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); + code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ + break; + case ICMP6_DST_UNREACH_ADDR: + code = PRC_HOSTDEAD; + break; + case ICMP6_DST_UNREACH_BEYONDSCOPE: + /* I mean "source address was incorrect." */ + code = PRC_PARAMPROB; + break; + case ICMP6_DST_UNREACH_NOPORT: + code = PRC_UNREACH_PORT; + break; + default: + goto badcode; + } + goto deliver; + break; + + case ICMP6_PACKET_TOO_BIG: + icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig); + + /* validation is made in icmp6_mtudisc_update */ + + code = PRC_MSGSIZE; + + /* + * Updating the path MTU will be done after examining + * intermediate extension headers. + */ + goto deliver; + break; + + case ICMP6_TIME_EXCEEDED: + icmp6_ifstat_inc(ifp, ifs6_in_timeexceed); + switch (code) { + case ICMP6_TIME_EXCEED_TRANSIT: + code = PRC_TIMXCEED_INTRANS; + break; + case ICMP6_TIME_EXCEED_REASSEMBLY: + code = PRC_TIMXCEED_REASS; + break; + default: + goto badcode; + } + goto deliver; + break; + + case ICMP6_PARAM_PROB: + icmp6_ifstat_inc(ifp, ifs6_in_paramprob); + switch (code) { + case ICMP6_PARAMPROB_NEXTHEADER: + code = PRC_UNREACH_PROTOCOL; + break; + case ICMP6_PARAMPROB_HEADER: + case ICMP6_PARAMPROB_OPTION: + code = PRC_PARAMPROB; + break; + default: + goto badcode; + } + goto deliver; + break; +#endif + case OFP_ICMP6_ECHO_REQUEST: + ofp_icmp6_ifstat_inc(ifp, ifs6_in_echo); + if (code != 0) + goto badcode; + + icmp6->icmp6_type = OFP_ICMP6_ECHO_REPLY; + icmp6->icmp6_code = 0; + + ofp_icmp6_reflect(m, off); + + return OFP_PKT_PROCESSED; + + case OFP_ICMP6_ECHO_REPLY: + ofp_icmp6_ifstat_inc(ifp, ifs6_in_echoreply); + if (code != 0) + goto badcode; + + return OFP_PKT_PROCESSED; +#if 0 + case MLD_LISTENER_QUERY: + case MLD_LISTENER_REPORT: + case MLD_LISTENER_DONE: + case MLDV2_LISTENER_REPORT: + /* + * Drop MLD traffic which is not link-local, has a hop limit + * of greater than 1 hop, or which does not have the + * IPv6 HBH Router Alert option. + * As IPv6 HBH options are stripped in ip6_input() we must + * check an mbuf header flag. + * XXX Should we also sanity check that these messages + * were directed to a link-local multicast prefix? + */ + if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0) + goto freeit; + if (mld_input(m, off, icmp6len) != 0) + return (IPPROTO_DONE); + /* m stays. */ + break; + + case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ + { + enum { WRU, FQDN } mode; + + if (!V_icmp6_nodeinfo) + break; + + if (icmp6len == sizeof(struct icmp6_hdr) + 4) + mode = WRU; + else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) + mode = FQDN; + else + goto badlen; + + if (mode == FQDN) { +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), + IPPROTO_DONE); +#endif + n = m_copy(m, 0, M_COPYALL); + if (n) + n = ni6_input(n, off); + /* XXX meaningless if n == NULL */ + noff = sizeof(struct ip6_hdr); + } else { + struct prison *pr; + u_char *p; + int maxlen, maxhlen, hlen; + + /* + * XXX: this combination of flags is pointless, + * but should we keep this for compatibility? + */ + if ((V_icmp6_nodeinfo & 5) != 5) + break; + + if (code != 0) + goto badcode; + maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4; + if (maxlen >= MCLBYTES) { + /* Give up remote */ + break; + } + MGETHDR(n, M_DONTWAIT, m->m_type); + if (n && maxlen > MHLEN) { + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + m_free(n); + n = NULL; + } + } + if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) { + /* + * Previous code did a blind M_COPY_PKTHDR + * and said "just for rcvif". If true, then + * we could tolerate the dup failing (due to + * the deep copy of the tag chain). For now + * be conservative and just fail. + */ + m_free(n); + n = NULL; + } + if (n == NULL) { + /* Give up remote */ + break; + } + n->m_pkthdr.rcvif = NULL; + n->m_len = 0; + maxhlen = M_TRAILINGSPACE(n) - maxlen; + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + hlen = strlen(pr->pr_hostname); + if (maxhlen > hlen) + maxhlen = hlen; + /* + * Copy IPv6 and ICMPv6 only. + */ + nip6 = mtod(n, struct ip6_hdr *); + bcopy(ip6, nip6, sizeof(struct ip6_hdr)); + nicmp6 = (struct icmp6_hdr *)(nip6 + 1); + bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); + p = (u_char *)(nicmp6 + 1); + bzero(p, 4); + /* meaningless TTL */ + bcopy(pr->pr_hostname, p + 4, maxhlen); + mtx_unlock(&pr->pr_mtx); + noff = sizeof(struct ip6_hdr); + n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + + sizeof(struct icmp6_hdr) + 4 + maxhlen; + nicmp6->icmp6_type = ICMP6_WRUREPLY; + nicmp6->icmp6_code = 0; + } + if (n) { + ICMP6STAT_INC(icp6s_reflect); + ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]); + icmp6_reflect(n, noff); + } + break; + } + + case ICMP6_WRUREPLY: + if (code != 0) + goto badcode; + break; + + case ND_ROUTER_SOLICIT: + icmp6_ifstat_inc(ifp, ifs6_in_routersolicit); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_router_solicit)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + /* give up local */ + + /* Send incoming SeND packet to user space. */ + if (send_sendso_input_hook != NULL) { + IP6_EXTHDR_CHECK(m, off, + icmp6len, IPPROTO_DONE); + error = send_sendso_input_hook(m, ifp, + SND_IN, ip6len); + /* -1 == no app on SEND socket */ + if (error == 0) + return (IPPROTO_DONE); + nd6_rs_input(m, off, icmp6len); + } else + nd6_rs_input(m, off, icmp6len); + m = NULL; + goto freeit; + } + if (send_sendso_input_hook != NULL) { + IP6_EXTHDR_CHECK(n, off, + icmp6len, IPPROTO_DONE); + error = send_sendso_input_hook(n, ifp, + SND_IN, ip6len); + if (error == 0) + goto freeit; + /* -1 == no app on SEND socket */ + nd6_rs_input(n, off, icmp6len); + } else + nd6_rs_input(n, off, icmp6len); + /* m stays. */ + break; + + case ND_ROUTER_ADVERT: + icmp6_ifstat_inc(ifp, ifs6_in_routeradvert); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_router_advert)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + + /* Send incoming SeND-protected/ND packet to user space. */ + if (send_sendso_input_hook != NULL) { + error = send_sendso_input_hook(m, ifp, + SND_IN, ip6len); + if (error == 0) + return (IPPROTO_DONE); + nd6_ra_input(m, off, icmp6len); + } else + nd6_ra_input(m, off, icmp6len); + m = NULL; + goto freeit; + } + if (send_sendso_input_hook != NULL) { + error = send_sendso_input_hook(n, ifp, + SND_IN, ip6len); + if (error == 0) + goto freeit; + nd6_ra_input(n, off, icmp6len); + } else + nd6_ra_input(n, off, icmp6len); + /* m stays. */ + break; +#endif + case OFP_ND_NEIGHBOR_SOLICIT: + ofp_icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct ofp_nd_neighbor_solicit)) + goto badlen; + ofp_nd6_ns_input(m, off, icmp6len); + break; + + case OFP_ND_NEIGHBOR_ADVERT: + ofp_icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct ofp_nd_neighbor_advert)) + goto badlen; + ofp_nd6_na_input(m, off, icmp6len); + break; +#if 0 + case ND_REDIRECT: + icmp6_ifstat_inc(ifp, ifs6_in_redirect); + if (code != 0) + goto badcode; + if (icmp6len < sizeof(struct nd_redirect)) + goto badlen; + if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + if (send_sendso_input_hook != NULL) { + error = send_sendso_input_hook(m, ifp, + SND_IN, ip6len); + if (error == 0) + return (IPPROTO_DONE); + icmp6_redirect_input(m, off); + } else + icmp6_redirect_input(m, off); + m = NULL; + goto freeit; + } + if (send_sendso_input_hook != NULL) { + error = send_sendso_input_hook(n, ifp, + SND_IN, ip6len); + if (error == 0) + goto freeit; + icmp6_redirect_input(n, off); + } else + icmp6_redirect_input(n, off); + /* m stays. */ + break; + + case ICMP6_ROUTER_RENUMBERING: + if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && + code != ICMP6_ROUTER_RENUMBERING_RESULT) + goto badcode; + if (icmp6len < sizeof(struct icmp6_router_renum)) + goto badlen; + break; +#endif + default: + OFP_DBG("icmp6_input: unknown type %d(src=%s, dst=%s)\n", + icmp6->icmp6_type, + ofp_print_ip6_addr(&ip6->ip6_src.ofp_s6_addr[0]), + ofp_print_ip6_addr(&ip6->ip6_dst.ofp_s6_addr[0])); + + return OFP_PKT_CONTINUE; /* send to SP*/ + } + + return OFP_PKT_CONTINUE; /* send to SP*/ + +/*deliver:*/ +#if 0 + if (icmp6_notify_error(&m, off, icmp6len, code) != 0) { + /* In this case, m should've been freed. */ + goto freeit + } +#endif /* 0 */ + return OFP_PKT_PROCESSED; + +badcode: +badlen: +freeit: + odp_packet_free(m); + return OFP_PKT_DROP; +} + +#if 0 +static int +icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) +{ + struct mbuf *m = *mp; + struct icmp6_hdr *icmp6; + struct ip6_hdr *eip6; + u_int32_t notifymtu; + struct sockaddr_in6 icmp6src, icmp6dst; + + if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { + ICMP6STAT_INC(icp6s_tooshort); + goto freeit; + } +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); + icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, + sizeof(*icmp6) + sizeof(struct ip6_hdr)); + if (icmp6 == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + eip6 = (struct ip6_hdr *)(icmp6 + 1); + + /* Detect the upper level protocol */ + { + void (*ctlfunc)(int, struct sockaddr *, void *); + u_int8_t nxt = eip6->ip6_nxt; + int eoff = off + sizeof(struct icmp6_hdr) + + sizeof(struct ip6_hdr); + struct ip6ctlparam ip6cp; + struct in6_addr *finaldst = NULL; + int icmp6type = icmp6->icmp6_type; + struct ip6_frag *fh; + struct ip6_rthdr *rth; + struct ip6_rthdr0 *rth0; + int rthlen; + + while (1) { /* XXX: should avoid infinite loop explicitly? */ + struct ip6_ext *eh; + + switch (nxt) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + case IPPROTO_AH: +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, + eoff + sizeof(struct ip6_ext), -1); + eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); +#else + IP6_EXTHDR_GET(eh, struct ip6_ext *, m, + eoff, sizeof(*eh)); + if (eh == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + + if (nxt == IPPROTO_AH) + eoff += (eh->ip6e_len + 2) << 2; + else + eoff += (eh->ip6e_len + 1) << 3; + nxt = eh->ip6e_nxt; + break; + case IPPROTO_ROUTING: + /* + * When the erroneous packet contains a + * routing header, we should examine the + * header to determine the final destination. + * Otherwise, we can't properly update + * information that depends on the final + * destination (e.g. path MTU). + */ +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); + rth = (struct ip6_rthdr *) + (mtod(m, caddr_t) + eoff); +#else + IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, + eoff, sizeof(*rth)); + if (rth == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + rthlen = (rth->ip6r_len + 1) << 3; + /* + * XXX: currently there is no + * officially defined type other + * than type-0. + * Note that if the segment left field + * is 0, all intermediate hops must + * have been passed. + */ + if (rth->ip6r_segleft && + rth->ip6r_type == IPV6_RTHDR_TYPE_0) { + int hops; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); + rth0 = (struct ip6_rthdr0 *) + (mtod(m, caddr_t) + eoff); +#else + IP6_EXTHDR_GET(rth0, + struct ip6_rthdr0 *, m, + eoff, rthlen); + if (rth0 == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + /* just ignore a bogus header */ + if ((rth0->ip6r0_len % 2) == 0 && + (hops = rth0->ip6r0_len/2)) + finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); + } + eoff += rthlen; + nxt = rth->ip6r_nxt; + break; + case IPPROTO_FRAGMENT: +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, 0, eoff + + sizeof(struct ip6_frag), -1); + fh = (struct ip6_frag *)(mtod(m, caddr_t) + + eoff); +#else + IP6_EXTHDR_GET(fh, struct ip6_frag *, m, + eoff, sizeof(*fh)); + if (fh == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + /* + * Data after a fragment header is meaningless + * unless it is the first fragment, but + * we'll go to the notify label for path MTU + * discovery. + */ + if (fh->ip6f_offlg & IP6F_OFF_MASK) + goto notify; + + eoff += sizeof(struct ip6_frag); + nxt = fh->ip6f_nxt; + break; + default: + /* + * This case includes ESP and the No Next + * Header. In such cases going to the notify + * label does not have any meaning + * (i.e. ctlfunc will be NULL), but we go + * anyway since we might have to update + * path MTU information. + */ + goto notify; + } + } + notify: +#ifndef PULLDOWN_TEST + icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, + sizeof(*icmp6) + sizeof(struct ip6_hdr)); + if (icmp6 == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return (-1); + } +#endif + + /* + * retrieve parameters from the inner IPv6 header, and convert + * them into sockaddr structures. + * XXX: there is no guarantee that the source or destination + * addresses of the inner packet are in the same scope as + * the addresses of the icmp packet. But there is no other + * way to determine the zone. + */ + eip6 = (struct ip6_hdr *)(icmp6 + 1); + + bzero(&icmp6dst, sizeof(icmp6dst)); + icmp6dst.sin6_len = sizeof(struct sockaddr_in6); + icmp6dst.sin6_family = AF_INET6; + if (finaldst == NULL) + icmp6dst.sin6_addr = eip6->ip6_dst; + else + icmp6dst.sin6_addr = *finaldst; + if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL)) + goto freeit; + bzero(&icmp6src, sizeof(icmp6src)); + icmp6src.sin6_len = sizeof(struct sockaddr_in6); + icmp6src.sin6_family = AF_INET6; + icmp6src.sin6_addr = eip6->ip6_src; + if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL)) + goto freeit; + icmp6src.sin6_flowinfo = + (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); + + if (finaldst == NULL) + finaldst = &eip6->ip6_dst; + ip6cp.ip6c_m = m; + ip6cp.ip6c_icmp6 = icmp6; + ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); + ip6cp.ip6c_off = eoff; + ip6cp.ip6c_finaldst = finaldst; + ip6cp.ip6c_src = &icmp6src; + ip6cp.ip6c_nxt = nxt; + + m_addr_changed(m); + + if (icmp6type == ICMP6_PACKET_TOO_BIG) { + notifymtu = ntohl(icmp6->icmp6_mtu); + ip6cp.ip6c_cmdarg = (void *)¬ifymtu; + icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ + } + + ctlfunc = (void (*)(int, struct sockaddr *, void *)) + (inet6sw[ip6_protox[nxt]].pr_ctlinput); + if (ctlfunc) { + (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, + &ip6cp); + } + } + *mp = m; + return (0); + + freeit: + m_freem(m); + return (-1); +} + +void +icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) +{ + struct in6_addr *dst = ip6cp->ip6c_finaldst; + struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; + struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ + u_int mtu = ntohl(icmp6->icmp6_mtu); + struct in_conninfo inc; + +#if 0 + /* + * RFC2460 section 5, last paragraph. + * even though minimum link MTU for IPv6 is IPV6_MMTU, + * we may see ICMPv6 too big with mtu < IPV6_MMTU + * due to packet translator in the middle. + * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for + * special handling. + */ + if (mtu < IPV6_MMTU) + return; +#endif + + /* + * we reject ICMPv6 too big with abnormally small value. + * XXX what is the good definition of "abnormally small"? + */ + if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) + return; + + if (!validated) + return; + + /* + * In case the suggested mtu is less than IPV6_MMTU, we + * only need to remember that it was for above mentioned + * "alwaysfrag" case. + * Try to be as close to the spec as possible. + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU - 8; + + bzero(&inc, sizeof(inc)); + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = *dst; + if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) + return; + + if (mtu < tcp_maxmtu6(&inc, NULL)) { + tcp_hc_updatemtu(&inc, mtu); + ICMP6STAT_INC(icp6s_pmtuchg); + } +} + +/* + * Process a Node Information Query packet, based on + * draft-ietf-ipngwg-icmp-name-lookups-07. + * + * Spec incompatibilities: + * - IPv6 Subject address handling + * - IPv4 Subject address handling support missing + * - Proxy reply (answer even if it's not for me) + * - joins NI group address at in6_ifattach() time only, does not cope + * with hostname changes by sethostname(3) + */ +static struct mbuf * +ni6_input(struct mbuf *m, int off) +{ + struct icmp6_nodeinfo *ni6, *nni6; + struct mbuf *n = NULL; + struct prison *pr; + u_int16_t qtype; + int subjlen; + int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); + struct ni_reply_fqdn *fqdn; + int addrs; /* for NI_QTYPE_NODEADDR */ + struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ + struct in6_addr in6_subj; /* subject address */ + struct ip6_hdr *ip6; + int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ + char *subj = NULL; + struct in6_ifaddr *ia6 = NULL; + + ip6 = mtod(m, struct ip6_hdr *); +#ifndef PULLDOWN_TEST + ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); +#else + IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); + if (ni6 == NULL) { + /* m is already reclaimed */ + return (NULL); + } +#endif + + /* + * Validate IPv6 source address. + * The default configuration MUST be to refuse answering queries from + * global-scope addresses according to RFC4602. + * Notes: + * - it's not very clear what "refuse" means; this implementation + * simply drops it. + * - it's not very easy to identify global-scope (unicast) addresses + * since there are many prefixes for them. It should be safer + * and in practice sufficient to check "all" but loopback and + * link-local (note that site-local unicast was deprecated and + * ULA is defined as global scope-wise) + */ + if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 && + !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && + !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) + goto bad; + + /* + * Validate IPv6 destination address. + * + * The Responder must discard the Query without further processing + * unless it is one of the Responder's unicast or anycast addresses, or + * a link-local scope multicast address which the Responder has joined. + * [RFC4602, Section 5.] + */ + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) + goto bad; + /* else it's a link-local multicast, fine */ + } else { /* unicast or anycast */ + if ((ia6 = ip6_getdstifaddr(m)) == NULL) + goto bad; /* XXX impossible */ + + if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && + !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { + ifa_free(&ia6->ia_ifa); + nd6log((LOG_DEBUG, "ni6_input: ignore node info to " + "a temporary address in %s:%d", + __FILE__, __LINE__)); + goto bad; + } + ifa_free(&ia6->ia_ifa); + } + + /* validate query Subject field. */ + qtype = ntohs(ni6->ni_qtype); + subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); + switch (qtype) { + case NI_QTYPE_NOOP: + case NI_QTYPE_SUPTYPES: + /* 07 draft */ + if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) + break; + /* FALLTHROUGH */ + case NI_QTYPE_FQDN: + case NI_QTYPE_NODEADDR: + case NI_QTYPE_IPV4ADDR: + switch (ni6->ni_code) { + case ICMP6_NI_SUBJ_IPV6: +#if ICMP6_NI_SUBJ_IPV6 != 0 + case 0: +#endif + /* + * backward compatibility - try to accept 03 draft + * format, where no Subject is present. + */ + if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && + subjlen == 0) { + oldfqdn++; + break; + } +#if ICMP6_NI_SUBJ_IPV6 != 0 + if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) + goto bad; +#endif + + if (subjlen != sizeof(struct in6_addr)) + goto bad; + + /* + * Validate Subject address. + * + * Not sure what exactly "address belongs to the node" + * means in the spec, is it just unicast, or what? + * + * At this moment we consider Subject address as + * "belong to the node" if the Subject address equals + * to the IPv6 destination address; validation for + * IPv6 destination address should have done enough + * check for us. + * + * We do not do proxy at this moment. + */ + /* m_pulldown instead of copy? */ + m_copydata(m, off + sizeof(struct icmp6_nodeinfo), + subjlen, (caddr_t)&in6_subj); + if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL)) + goto bad; + + subj = (char *)&in6_subj; + if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) + break; + + /* + * XXX if we are to allow other cases, we should really + * be careful about scope here. + * basically, we should disallow queries toward IPv6 + * destination X with subject Y, + * if scope(X) > scope(Y). + * if we allow scope(X) > scope(Y), it will result in + * information leakage across scope boundary. + */ + goto bad; + + case ICMP6_NI_SUBJ_FQDN: + /* + * Validate Subject name with gethostname(3). + * + * The behavior may need some debate, since: + * - we are not sure if the node has FQDN as + * hostname (returned by gethostname(3)). + * - the code does wildcard match for truncated names. + * however, we are not sure if we want to perform + * wildcard match, if gethostname(3) side has + * truncated hostname. + */ + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + n = ni6_nametodns(pr->pr_hostname, + strlen(pr->pr_hostname), 0); + mtx_unlock(&pr->pr_mtx); + if (!n || n->m_next || n->m_len == 0) + goto bad; + IP6_EXTHDR_GET(subj, char *, m, + off + sizeof(struct icmp6_nodeinfo), subjlen); + if (subj == NULL) + goto bad; + if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), + n->m_len)) { + goto bad; + } + m_freem(n); + n = NULL; + break; + + case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ + default: + goto bad; + } + break; + } + + /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ + switch (qtype) { + case NI_QTYPE_FQDN: + if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0) + goto bad; + break; + case NI_QTYPE_NODEADDR: + case NI_QTYPE_IPV4ADDR: + if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0) + goto bad; + break; + } + + /* guess reply length */ + switch (qtype) { + case NI_QTYPE_NOOP: + break; /* no reply data */ + case NI_QTYPE_SUPTYPES: + replylen += sizeof(u_int32_t); + break; + case NI_QTYPE_FQDN: + /* XXX will append an mbuf */ + replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); + break; + case NI_QTYPE_NODEADDR: + addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj); + if ((replylen += addrs * (sizeof(struct in6_addr) + + sizeof(u_int32_t))) > MCLBYTES) + replylen = MCLBYTES; /* XXX: will truncate pkt later */ + break; + case NI_QTYPE_IPV4ADDR: + /* unsupported - should respond with unknown Qtype? */ + break; + default: + /* + * XXX: We must return a reply with the ICMP6 code + * `unknown Qtype' in this case. However we regard the case + * as an FQDN query for backward compatibility. + * Older versions set a random value to this field, + * so it rarely varies in the defined qtypes. + * But the mechanism is not reliable... + * maybe we should obsolete older versions. + */ + qtype = NI_QTYPE_FQDN; + /* XXX will append an mbuf */ + replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); + oldfqdn++; + break; + } + + /* allocate an mbuf to reply. */ + MGETHDR(n, M_DONTWAIT, m->m_type); + if (n == NULL) { + m_freem(m); + return (NULL); + } + M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */ + if (replylen > MHLEN) { + if (replylen > MCLBYTES) { + /* + * XXX: should we try to allocate more? But MCLBYTES + * is probably much larger than IPV6_MMTU... + */ + goto bad; + } + MCLGET(n, M_DONTWAIT); + if ((n->m_flags & M_EXT) == 0) { + goto bad; + } + } + n->m_pkthdr.len = n->m_len = replylen; + + /* copy mbuf header and IPv6 + Node Information base headers */ + bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); + nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); + bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); + + /* qtype dependent procedure */ + switch (qtype) { + case NI_QTYPE_NOOP: + nni6->ni_code = ICMP6_NI_SUCCESS; + nni6->ni_flags = 0; + break; + case NI_QTYPE_SUPTYPES: + { + u_int32_t v; + nni6->ni_code = ICMP6_NI_SUCCESS; + nni6->ni_flags = htons(0x0000); /* raw bitmap */ + /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ + v = (u_int32_t)htonl(0x0000000f); + bcopy(&v, nni6 + 1, sizeof(u_int32_t)); + break; + } + case NI_QTYPE_FQDN: + nni6->ni_code = ICMP6_NI_SUCCESS; + fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); + nni6->ni_flags = 0; /* XXX: meaningless TTL */ + fqdn->ni_fqdn_ttl = 0; /* ditto. */ + /* + * XXX do we really have FQDN in hostname? + */ + pr = curthread->td_ucred->cr_prison; + mtx_lock(&pr->pr_mtx); + n->m_next = ni6_nametodns(pr->pr_hostname, + strlen(pr->pr_hostname), oldfqdn); + mtx_unlock(&pr->pr_mtx); + if (n->m_next == NULL) + goto bad; + /* XXX we assume that n->m_next is not a chain */ + if (n->m_next->m_next != NULL) + goto bad; + n->m_pkthdr.len += n->m_next->m_len; + break; + case NI_QTYPE_NODEADDR: + { + int lenlim, copied; + + nni6->ni_code = ICMP6_NI_SUCCESS; + n->m_pkthdr.len = n->m_len = + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); + lenlim = M_TRAILINGSPACE(n); + copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); + /* XXX: reset mbuf length */ + n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + + sizeof(struct icmp6_nodeinfo) + copied; + break; + } + default: + break; /* XXX impossible! */ + } + + nni6->ni_type = ICMP6_NI_REPLY; + m_freem(m); + return (n); + + bad: + m_freem(m); + if (n) + m_freem(n); + return (NULL); +} + +/* + * make a mbuf with DNS-encoded string. no compression support. + * + * XXX names with less than 2 dots (like "foo" or "foo.section") will be + * treated as truncated name (two \0 at the end). this is a wild guess. + * + * old - return pascal string if non-zero + */ +static struct mbuf * +ni6_nametodns(const char *name, int namelen, int old) +{ + struct mbuf *m; + char *cp, *ep; + const char *p, *q; + int i, len, nterm; + + if (old) + len = namelen + 1; + else + len = MCLBYTES; + + /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */ + MGET(m, M_DONTWAIT, MT_DATA); + if (m && len > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) + goto fail; + } + if (!m) + goto fail; + m->m_next = NULL; + + if (old) { + m->m_len = len; + *mtod(m, char *) = namelen; + bcopy(name, mtod(m, char *) + 1, namelen); + return m; + } else { + m->m_len = 0; + cp = mtod(m, char *); + ep = mtod(m, char *) + M_TRAILINGSPACE(m); + + /* if not certain about my name, return empty buffer */ + if (namelen == 0) + return m; + + /* + * guess if it looks like shortened hostname, or FQDN. + * shortened hostname needs two trailing "\0". + */ + i = 0; + for (p = name; p < name + namelen; p++) { + if (*p && *p == '.') + i++; + } + if (i < 2) + nterm = 2; + else + nterm = 1; + + p = name; + while (cp < ep && p < name + namelen) { + i = 0; + for (q = p; q < name + namelen && *q && *q != '.'; q++) + i++; + /* result does not fit into mbuf */ + if (cp + i + 1 >= ep) + goto fail; + /* + * DNS label length restriction, RFC1035 page 8. + * "i == 0" case is included here to avoid returning + * 0-length label on "foo..bar". + */ + if (i <= 0 || i >= 64) + goto fail; + *cp++ = i; + bcopy(p, cp, i); + cp += i; + p = q; + if (p < name + namelen && *p == '.') + p++; + } + /* termination */ + if (cp + nterm >= ep) + goto fail; + while (nterm-- > 0) + *cp++ = '\0'; + m->m_len = cp - mtod(m, char *); + return m; + } + + panic("should not reach here"); + /* NOTREACHED */ + + fail: + if (m) + m_freem(m); + return NULL; +} + +/* + * check if two DNS-encoded string matches. takes care of truncated + * form (with \0\0 at the end). no compression support. + * XXX upper/lowercase match (see RFC2065) + */ +static int +ni6_dnsmatch(const char *a, int alen, const char *b, int blen) +{ + const char *a0, *b0; + int l; + + /* simplest case - need validation? */ + if (alen == blen && bcmp(a, b, alen) == 0) + return 1; + + a0 = a; + b0 = b; + + /* termination is mandatory */ + if (alen < 2 || blen < 2) + return 0; + if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') + return 0; + alen--; + blen--; + + while (a - a0 < alen && b - b0 < blen) { + if (a - a0 + 1 > alen || b - b0 + 1 > blen) + return 0; + + if ((signed char)a[0] < 0 || (signed char)b[0] < 0) + return 0; + /* we don't support compression yet */ + if (a[0] >= 64 || b[0] >= 64) + return 0; + + /* truncated case */ + if (a[0] == 0 && a - a0 == alen - 1) + return 1; + if (b[0] == 0 && b - b0 == blen - 1) + return 1; + if (a[0] == 0 || b[0] == 0) + return 0; + + if (a[0] != b[0]) + return 0; + l = a[0]; + if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) + return 0; + if (bcmp(a + 1, b + 1, l) != 0) + return 0; + + a += 1 + l; + b += 1 + l; + } + + if (a - a0 == alen && b - b0 == blen) + return 1; + else + return 0; +} + +/* + * calculate the number of addresses to be returned in the node info reply. + */ +static int +ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, + struct in6_addr *subj) +{ + struct ifnet *ifp; + struct in6_ifaddr *ifa6; + struct ifaddr *ifa; + int addrs = 0, addrsofif, iffound = 0; + int niflags = ni6->ni_flags; + + if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { + switch (ni6->ni_code) { + case ICMP6_NI_SUBJ_IPV6: + if (subj == NULL) /* must be impossible... */ + return (0); + break; + default: + /* + * XXX: we only support IPv6 subject address for + * this Qtype. + */ + return (0); + } + } + + IFNET_RLOCK_NOSLEEP(); + TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + addrsofif = 0; + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ifa6 = (struct in6_ifaddr *)ifa; + + if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && + IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr)) + iffound = 1; + + /* + * IPv4-mapped addresses can only be returned by a + * Node Information proxy, since they represent + * addresses of IPv4-only nodes, which perforce do + * not implement this protocol. + * [icmp-name-lookups-07, Section 5.4] + * So we don't support NI_NODEADDR_FLAG_COMPAT in + * this function at this moment. + */ + + /* What do we have to do about ::1? */ + switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { + case IPV6_ADDR_SCOPE_LINKLOCAL: + if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_SITELOCAL: + if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_GLOBAL: + if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) + continue; + break; + default: + continue; + } + + /* + * check if anycast is okay. + * XXX: just experimental. not in the spec. + */ + if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && + (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) + continue; /* we need only unicast addresses */ + if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && + (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { + continue; + } + addrsofif++; /* count the address */ + } + IF_ADDR_RUNLOCK(ifp); + if (iffound) { + *ifpp = ifp; + IFNET_RUNLOCK_NOSLEEP(); + return (addrsofif); + } + + addrs += addrsofif; + } + IFNET_RUNLOCK_NOSLEEP(); + + return (addrs); +} + +static int +ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, + struct ifnet *ifp0, int resid) +{ + struct ifnet *ifp; + struct in6_ifaddr *ifa6; + struct ifaddr *ifa; + struct ifnet *ifp_dep = NULL; + int copied = 0, allow_deprecated = 0; + u_char *cp = (u_char *)(nni6 + 1); + int niflags = ni6->ni_flags; + u_int32_t ltime; + + if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) + return (0); /* needless to copy */ + + IFNET_RLOCK_NOSLEEP(); + ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); + again: + + for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ifa6 = (struct in6_ifaddr *)ifa; + + if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && + allow_deprecated == 0) { + /* + * prefererred address should be put before + * deprecated addresses. + */ + + /* record the interface for later search */ + if (ifp_dep == NULL) + ifp_dep = ifp; + + continue; + } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && + allow_deprecated != 0) + continue; /* we now collect deprecated addrs */ + + /* What do we have to do about ::1? */ + switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { + case IPV6_ADDR_SCOPE_LINKLOCAL: + if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_SITELOCAL: + if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) + continue; + break; + case IPV6_ADDR_SCOPE_GLOBAL: + if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) + continue; + break; + default: + continue; + } + + /* + * check if anycast is okay. + * XXX: just experimental. not in the spec. + */ + if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && + (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) + continue; + if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && + (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { + continue; + } + + /* now we can copy the address */ + if (resid < sizeof(struct in6_addr) + + sizeof(u_int32_t)) { + IF_ADDR_RUNLOCK(ifp); + /* + * We give up much more copy. + * Set the truncate flag and return. + */ + nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; + IFNET_RUNLOCK_NOSLEEP(); + return (copied); + } + + /* + * Set the TTL of the address. + * The TTL value should be one of the following + * according to the specification: + * + * 1. The remaining lifetime of a DHCP lease on the + * address, or + * 2. The remaining Valid Lifetime of a prefix from + * which the address was derived through Stateless + * Autoconfiguration. + * + * Note that we currently do not support stateful + * address configuration by DHCPv6, so the former + * case can't happen. + */ + if (ifa6->ia6_lifetime.ia6t_expire == 0) + ltime = ND6_INFINITE_LIFETIME; + else { + if (ifa6->ia6_lifetime.ia6t_expire > + time_second) + ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second); + else + ltime = 0; + } + + bcopy(<ime, cp, sizeof(u_int32_t)); + cp += sizeof(u_int32_t); + + /* copy the address itself */ + bcopy(&ifa6->ia_addr.sin6_addr, cp, + sizeof(struct in6_addr)); + in6_clearscope((struct in6_addr *)cp); /* XXX */ + cp += sizeof(struct in6_addr); + + resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); + copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); + } + IF_ADDR_RUNLOCK(ifp); + if (ifp0) /* we need search only on the specified IF */ + break; + } + + if (allow_deprecated == 0 && ifp_dep != NULL) { + ifp = ifp_dep; + allow_deprecated = 1; + + goto again; + } + + IFNET_RUNLOCK_NOSLEEP(); + + return (copied); +} + +/* + * XXX almost dup'ed code with rip6_input. + */ +static int +icmp6_rip6_input(struct mbuf **mp, int off) +{ + struct mbuf *m = *mp; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct inpcb *in6p; + struct inpcb *last = NULL; + struct sockaddr_in6 fromsa; + struct icmp6_hdr *icmp6; + struct mbuf *opts = NULL; + +#ifndef PULLDOWN_TEST + /* this is assumed to be safe. */ + icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); + if (icmp6 == NULL) { + /* m is already reclaimed */ + return (IPPROTO_DONE); + } +#endif + + /* + * XXX: the address may have embedded scope zone ID, which should be + * hidden from applications. + */ + bzero(&fromsa, sizeof(fromsa)); + fromsa.sin6_family = AF_INET6; + fromsa.sin6_len = sizeof(struct sockaddr_in6); + fromsa.sin6_addr = ip6->ip6_src; + if (sa6_recoverscope(&fromsa)) { + m_freem(m); + return (IPPROTO_DONE); + } + + INP_INFO_RLOCK(&V_ripcbinfo); + LIST_FOREACH(in6p, &V_ripcb, inp_list) { + if ((in6p->inp_vflag & INP_IPV6) == 0) + continue; + if (in6p->inp_ip_p != IPPROTO_ICMPV6) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && + !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && + !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) + continue; + INP_RLOCK(in6p); + if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, + in6p->in6p_icmp6filt)) { + INP_RUNLOCK(in6p); + continue; + } + if (last != NULL) { + struct mbuf *n = NULL; + + /* + * Recent network drivers tend to allocate a single + * mbuf cluster, rather than to make a couple of + * mbufs without clusters. Also, since the IPv6 code + * path tries to avoid m_pullup(), it is highly + * probable that we still have an mbuf cluster here + * even though the necessary length can be stored in an + * mbuf's internal buffer. + * Meanwhile, the default size of the receive socket + * buffer for raw sockets is not so large. This means + * the possibility of packet loss is relatively higher + * than before. To avoid this scenario, we copy the + * received data to a separate mbuf that does not use + * a cluster, if possible. + * XXX: it is better to copy the data after stripping + * intermediate headers. + */ + if ((m->m_flags & M_EXT) && m->m_next == NULL && + m->m_len <= MHLEN) { + MGET(n, M_DONTWAIT, m->m_type); + if (n != NULL) { + if (m_dup_pkthdr(n, m, M_NOWAIT)) { + bcopy(m->m_data, n->m_data, + m->m_len); + n->m_len = m->m_len; + } else { + m_free(n); + n = NULL; + } + } + } + if (n != NULL || + (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { + if (last->inp_flags & INP_CONTROLOPTS) + ip6_savecontrol(last, n, &opts); + /* strip intermediate headers */ + m_adj(n, off); + SOCKBUF_LOCK(&last->inp_socket->so_rcv); + if (sbappendaddr_locked( + &last->inp_socket->so_rcv, + (struct sockaddr *)&fromsa, n, opts) + == 0) { + /* should notify about lost packet */ + m_freem(n); + if (opts) { + m_freem(opts); + } + SOCKBUF_UNLOCK( + &last->inp_socket->so_rcv); + } else + sorwakeup_locked(last->inp_socket); + opts = NULL; + } + INP_RUNLOCK(last); + } + last = in6p; + } + INP_INFO_RUNLOCK(&V_ripcbinfo); + if (last != NULL) { + if (last->inp_flags & INP_CONTROLOPTS) + ip6_savecontrol(last, m, &opts); + /* strip intermediate headers */ + m_adj(m, off); + + /* avoid using mbuf clusters if possible (see above) */ + if ((m->m_flags & M_EXT) && m->m_next == NULL && + m->m_len <= MHLEN) { + struct mbuf *n; + + MGET(n, M_DONTWAIT, m->m_type); + if (n != NULL) { + if (m_dup_pkthdr(n, m, M_NOWAIT)) { + bcopy(m->m_data, n->m_data, m->m_len); + n->m_len = m->m_len; + + m_freem(m); + m = n; + } else { + m_freem(n); + n = NULL; + } + } + } + SOCKBUF_LOCK(&last->inp_socket->so_rcv); + if (sbappendaddr_locked(&last->inp_socket->so_rcv, + (struct sockaddr *)&fromsa, m, opts) == 0) { + m_freem(m); + if (opts) + m_freem(opts); + SOCKBUF_UNLOCK(&last->inp_socket->so_rcv); + } else + sorwakeup_locked(last->inp_socket); + INP_RUNLOCK(last); + } else { + m_freem(m); + IP6STAT_DEC(ip6s_delivered); + } + return IPPROTO_DONE; +} +#endif /*0*/ +/* + * Reflect the ip6 packet back to the source. + * OFF points to the icmp6 header, counted from the top of the mbuf. + */ +void +ofp_icmp6_reflect(odp_packet_t m, size_t off) +{ + int plen; + struct ofp_ip6_hdr *ip6; + struct ofp_icmp6_hdr *icmp6; + int icmp6len; + struct ofp_in6_addr origdst, src, *srcp = NULL; + struct ofp_ifnet *outif = NULL; + struct ofp_nh6_entry *nh6 = NULL; + + /* too short to reflect */ + if (off < sizeof(struct ofp_ip6_hdr)) { + OFP_DBG("sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", + (u_long)off, (u_long)sizeof(struct ofp_ip6_hdr), + __FILE__, __LINE__); + goto bad; + } + + icmp6len = odp_packet_len(m) - odp_packet_l3_offset(m) - off; + ip6 = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(m, NULL); + + /* + * If there are extra headers between IPv6 and ICMPv6, strip + * off that header first. + */ + + if (off > sizeof(struct ofp_ip6_hdr)) { + + int i; + uint8_t *paddr = (uint8_t *)(ip6 + 1); + size_t l = off - sizeof(struct ofp_ip6_hdr); + + for(i = 0; i < icmp6len; i++) + *(paddr + i) = *(paddr + i + l); + odp_packet_pull_tail(m, l); + off -= l; + } + + plen = odp_packet_len(m) - odp_packet_l3_offset(m) - + sizeof(struct ofp_ip6_hdr); + + icmp6 = (struct ofp_icmp6_hdr *)(ip6 + 1); + + + origdst = ip6->ip6_dst; + /* + * ip6_input() drops a packet if its src is multicast. + * So, the src is never multicast. + */ + ip6->ip6_dst = ip6->ip6_src; + + /* + * If the incoming packet was addressed directly to us (i.e. unicast), + * use dst as the src for the reply. + * The IN6_IFF_NOTREADY case should be VERY rare, but is possible + * (for example) when we encounter an error while forwarding procedure + * destined to a duplicated address of ours. + * Note that ip6_getdstifaddr() may fail if we are in an error handling + * procedure of an outgoing packet of our own, in which case we need + * to search in the ifaddr list. + */ + + outif = NULL; + srcp = NULL; + if (!OFP_IN6_IS_ADDR_MULTICAST(&origdst)) { + nh6 = ofp_get_next_hop6(0, &ip6->ip6_dst.ofp_s6_addr[0], 0); + if (nh6) { + outif = ofp_get_ifnet(nh6->port, nh6->vlan); + memcpy(&src.ofp_s6_addr[0], outif->ip6_addr, 16); + srcp = &src; + } + } + + if (srcp == NULL) { + int e; + struct ofp_sockaddr_in6 sin6; + + /* + * This case matches to multicasts, our anycast, or unicasts + * that we do not own. Select a source address based on the + * source address of the erroneous packet. + */ + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = OFP_AF_INET6; + sin6.sin6_len = sizeof(sin6); + sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ + + e = ofp_in6_selectsrc(&sin6, NULL, NULL, NULL, NULL, &outif, &src); + if (e) { + OFP_DBG("icmp6_reflect: source can't be determined: " + "dst=%s, error=%d\n", + ofp_print_ip6_addr( + &sin6.sin6_addr.ofp_s6_addr[0]), e); + goto bad; + } + srcp = &src; + } + + + ip6->ip6_src = *srcp; + ip6->ofp_ip6_flow = 0; + ip6->ofp_ip6_vfc &= ~OFP_IPV6_VERSION_MASK; + ip6->ofp_ip6_vfc |= OFP_IPV6_VERSION; + ip6->ofp_ip6_nxt = OFP_IPPROTO_ICMPV6; + ip6->ofp_ip6_hlim = V_ip6_defhlim; + + + icmp6->icmp6_cksum = 0; + icmp6->icmp6_cksum = ofp_in6_cksum(m, OFP_IPPROTO_ICMPV6, + sizeof(struct ofp_ip6_hdr), plen); + + (void) ofp_ip6_output(m, nh6); + + return; + + bad: + odp_packet_free(m); + return; +} + +#if 0 +void +icmp6_fasttimo(void) +{ + + mld_fasttimo(); +} + +void +icmp6_slowtimo(void) +{ + + mld_slowtimo(); +} + +static const char * +icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6, + struct in6_addr *tgt6) +{ + static char buf[1024]; + char ip6bufs[INET6_ADDRSTRLEN]; + char ip6bufd[INET6_ADDRSTRLEN]; + char ip6buft[INET6_ADDRSTRLEN]; + snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", + ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6), + ip6_sprintf(ip6buft, tgt6)); + return buf; +} + +void +icmp6_redirect_input(struct mbuf *m, int off) +{ + struct ifnet *ifp; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct nd_redirect *nd_rd; + int icmp6len = ntohs(ip6->ip6_plen); + char *lladdr = NULL; + int lladdrlen = 0; + struct rtentry *rt = NULL; + int is_router; + int is_onlink; + struct in6_addr src6 = ip6->ip6_src; + struct in6_addr redtgt6; + struct in6_addr reddst6; + union nd_opts ndopts; + char ip6buf[INET6_ADDRSTRLEN]; + + M_ASSERTPKTHDR(m); + KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__)); + + ifp = m->m_pkthdr.rcvif; + + /* XXX if we are router, we don't update route by icmp6 redirect */ + if (V_ip6_forwarding) + goto freeit; + if (!V_icmp6_rediraccept) + goto freeit; + +#ifndef PULLDOWN_TEST + IP6_EXTHDR_CHECK(m, off, icmp6len,); + nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); + if (nd_rd == NULL) { + ICMP6STAT_INC(icp6s_tooshort); + return; + } +#endif + redtgt6 = nd_rd->nd_rd_target; + reddst6 = nd_rd->nd_rd_dst; + + if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) || + in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) { + goto freeit; + } + + /* validation */ + if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { + nd6log((LOG_ERR, + "ICMP6 redirect sent from %s rejected; " + "must be from linklocal\n", + ip6_sprintf(ip6buf, &src6))); + goto bad; + } + if (ip6->ip6_hlim != 255) { + nd6log((LOG_ERR, + "ICMP6 redirect sent from %s rejected; " + "hlim=%d (must be 255)\n", + ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim)); + goto bad; + } + { + /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ + struct sockaddr_in6 sin6; + struct in6_addr *gw6; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB); + if (rt) { + if (rt->rt_gateway == NULL || + rt->rt_gateway->sa_family != AF_INET6) { + RTFREE_LOCKED(rt); + nd6log((LOG_ERR, + "ICMP6 redirect rejected; no route " + "with inet6 gateway found for redirect dst: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + + gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); + if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { + RTFREE_LOCKED(rt); + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "not equal to gw-for-src=%s (must be same): " + "%s\n", + ip6_sprintf(ip6buf, gw6), + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + } else { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "no route found for redirect dst: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + RTFREE_LOCKED(rt); + rt = NULL; + } + if (IN6_IS_ADDR_MULTICAST(&reddst6)) { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "redirect dst must be unicast: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + + is_router = is_onlink = 0; + if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) + is_router = 1; /* router case */ + if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) + is_onlink = 1; /* on-link destination case */ + if (!is_router && !is_onlink) { + nd6log((LOG_ERR, + "ICMP6 redirect rejected; " + "neither router case nor onlink case: %s\n", + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + /* validation passed */ + + icmp6len -= sizeof(*nd_rd); + nd6_option_init(nd_rd + 1, icmp6len, &ndopts); + if (nd6_options(&ndopts) < 0) { + nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n", + __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + /* nd6_options have incremented stats */ + goto freeit; + } + + if (ndopts.nd_opts_tgt_lladdr) { + lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); + lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; + } + + if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s " + "(if %d, icmp6 packet %d): %s\n", + __func__, ip6_sprintf(ip6buf, &redtgt6), + ifp->if_addrlen, lladdrlen - 2, + icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + goto bad; + } + + /* RFC 2461 8.3 */ + nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, + is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); + + if (!is_onlink) { /* better router case. perform rtredirect. */ + /* perform rtredirect */ + struct sockaddr_in6 sdst; + struct sockaddr_in6 sgw; + struct sockaddr_in6 ssrc; + u_int fibnum; + + bzero(&sdst, sizeof(sdst)); + bzero(&sgw, sizeof(sgw)); + bzero(&ssrc, sizeof(ssrc)); + sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6; + sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len = + sizeof(struct sockaddr_in6); + bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); + bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); + bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) + in6_rtredirect((struct sockaddr *)&sdst, + (struct sockaddr *)&sgw, (struct sockaddr *)NULL, + RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc, + fibnum); + } + /* finally update cached route in each socket via pfctlinput */ + { + struct sockaddr_in6 sdst; + + bzero(&sdst, sizeof(sdst)); + sdst.sin6_family = AF_INET6; + sdst.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); + pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); +#ifdef IPSEC + key_sa_routechange((struct sockaddr *)&sdst); +#endif /* IPSEC */ + } + + freeit: + m_freem(m); + return; + + bad: + ICMP6STAT_INC(icp6s_badredirect); + m_freem(m); +} + +void +icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) +{ + struct ifnet *ifp; /* my outgoing interface */ + struct in6_addr *ifp_ll6; + struct in6_addr *router_ll6; + struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ + struct mbuf *m = NULL; /* newly allocated one */ + struct m_tag *mtag; + struct ip6_hdr *ip6; /* m as struct ip6_hdr */ + struct nd_redirect *nd_rd; + struct llentry *ln = NULL; + size_t maxlen; + u_char *p; + struct ifnet *outif = NULL; + struct sockaddr_in6 src_sa; + + icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0); + + /* if we are not router, we don't send icmp6 redirect */ + if (!V_ip6_forwarding) + goto fail; + + /* sanity check */ + if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) + goto fail; + + /* + * Address check: + * the source address must identify a neighbor, and + * the destination address must not be a multicast address + * [RFC 2461, sec 8.2] + */ + sip6 = mtod(m0, struct ip6_hdr *); + bzero(&src_sa, sizeof(src_sa)); + src_sa.sin6_family = AF_INET6; + src_sa.sin6_len = sizeof(src_sa); + src_sa.sin6_addr = sip6->ip6_src; + if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) + goto fail; + if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) + goto fail; /* what should we do here? */ + + /* rate limit */ + if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) + goto fail; + + /* + * Since we are going to append up to 1280 bytes (= IPV6_MMTU), + * we almost always ask for an mbuf cluster for simplicity. + * (MHLEN < IPV6_MMTU is almost always true) + */ +#if IPV6_MMTU >= MCLBYTES +# error assumption failed about IPV6_MMTU and MCLBYTES +#endif + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m && IPV6_MMTU >= MHLEN) + MCLGET(m, M_DONTWAIT); + if (!m) + goto fail; + M_SETFIB(m, rt->rt_fibnum); + m->m_pkthdr.rcvif = NULL; + m->m_len = 0; + maxlen = M_TRAILINGSPACE(m); + maxlen = min(IPV6_MMTU, maxlen); + /* just for safety */ + if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { + goto fail; + } + + { + /* get ip6 linklocal address for ifp(my outgoing interface). */ + struct in6_ifaddr *ia; + if ((ia = in6ifa_ifpforlinklocal(ifp, + IN6_IFF_NOTREADY| + IN6_IFF_ANYCAST)) == NULL) + goto fail; + ifp_ll6 = &ia->ia_addr.sin6_addr; + /* XXXRW: reference released prematurely. */ + ifa_free(&ia->ia_ifa); + } + + /* get ip6 linklocal address for the router. */ + if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { + struct sockaddr_in6 *sin6; + sin6 = (struct sockaddr_in6 *)rt->rt_gateway; + router_ll6 = &sin6->sin6_addr; + if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) + router_ll6 = (struct in6_addr *)NULL; + } else + router_ll6 = (struct in6_addr *)NULL; + + /* ip6 */ + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + /* ip6->ip6_plen will be set later */ + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_hlim = 255; + /* ip6->ip6_src must be linklocal addr for my outgoing if. */ + bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); + bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); + + /* ND Redirect */ + nd_rd = (struct nd_redirect *)(ip6 + 1); + nd_rd->nd_rd_type = ND_REDIRECT; + nd_rd->nd_rd_code = 0; + nd_rd->nd_rd_reserved = 0; + if (rt->rt_flags & RTF_GATEWAY) { + /* + * nd_rd->nd_rd_target must be a link-local address in + * better router cases. + */ + if (!router_ll6) + goto fail; + bcopy(router_ll6, &nd_rd->nd_rd_target, + sizeof(nd_rd->nd_rd_target)); + bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, + sizeof(nd_rd->nd_rd_dst)); + } else { + /* make sure redtgt == reddst */ + bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, + sizeof(nd_rd->nd_rd_target)); + bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, + sizeof(nd_rd->nd_rd_dst)); + } + + p = (u_char *)(nd_rd + 1); + + if (!router_ll6) + goto nolladdropt; + + { + /* target lladdr option */ + int len; + struct nd_opt_hdr *nd_opt; + char *lladdr; + + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(router_ll6, 0, ifp); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) + goto nolladdropt; + + len = sizeof(*nd_opt) + ifp->if_addrlen; + len = (len + 7) & ~7; /* round by 8 */ + /* safety check */ + if (len + (p - (u_char *)ip6) > maxlen) + goto nolladdropt; + + if (ln->la_flags & LLE_VALID) { + nd_opt = (struct nd_opt_hdr *)p; + nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; + nd_opt->nd_opt_len = len >> 3; + lladdr = (char *)(nd_opt + 1); + bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen); + p += len; + } + } +nolladdropt: + if (ln != NULL) + LLE_RUNLOCK(ln); + + m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; + + /* just to be safe */ +#ifdef M_DECRYPTED /*not openbsd*/ + if (m0->m_flags & M_DECRYPTED) + goto noredhdropt; +#endif + if (p - (u_char *)ip6 > maxlen) + goto noredhdropt; + + { + /* redirected header option */ + int len; + struct nd_opt_rd_hdr *nd_opt_rh; + + /* + * compute the maximum size for icmp6 redirect header option. + * XXX room for auth header? + */ + len = maxlen - (p - (u_char *)ip6); + len &= ~7; + + /* This is just for simplicity. */ + if (m0->m_pkthdr.len != m0->m_len) { + if (m0->m_next) { + m_freem(m0->m_next); + m0->m_next = NULL; + } + m0->m_pkthdr.len = m0->m_len; + } + + /* + * Redirected header option spec (RFC2461 4.6.3) talks nothing + * about padding/truncate rule for the original IP packet. + * From the discussion on IPv6imp in Feb 1999, + * the consensus was: + * - "attach as much as possible" is the goal + * - pad if not aligned (original size can be guessed by + * original ip6 header) + * Following code adds the padding if it is simple enough, + * and truncates if not. + */ + if (m0->m_next || m0->m_pkthdr.len != m0->m_len) + panic("assumption failed in %s:%d", __FILE__, + __LINE__); + + if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { + /* not enough room, truncate */ + m0->m_pkthdr.len = m0->m_len = len - + sizeof(*nd_opt_rh); + } else { + /* enough room, pad or truncate */ + size_t extra; + + extra = m0->m_pkthdr.len % 8; + if (extra) { + /* pad if easy enough, truncate if not */ + if (8 - extra <= M_TRAILINGSPACE(m0)) { + /* pad */ + m0->m_len += (8 - extra); + m0->m_pkthdr.len += (8 - extra); + } else { + /* truncate */ + m0->m_pkthdr.len -= extra; + m0->m_len -= extra; + } + } + len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); + m0->m_pkthdr.len = m0->m_len = len - + sizeof(*nd_opt_rh); + } + + nd_opt_rh = (struct nd_opt_rd_hdr *)p; + bzero(nd_opt_rh, sizeof(*nd_opt_rh)); + nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; + nd_opt_rh->nd_opt_rh_len = len >> 3; + p += sizeof(*nd_opt_rh); + m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; + + /* connect m0 to m */ + m_tag_delete_chain(m0, NULL); + m0->m_flags &= ~M_PKTHDR; + m->m_next = m0; + m->m_pkthdr.len = m->m_len + m0->m_len; + m0 = NULL; + } +noredhdropt:; + if (m0) { + m_freem(m0); + m0 = NULL; + } + + /* XXX: clear embedded link IDs in the inner header */ + in6_clearscope(&sip6->ip6_src); + in6_clearscope(&sip6->ip6_dst); + in6_clearscope(&nd_rd->nd_rd_target); + in6_clearscope(&nd_rd->nd_rd_dst); + + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + + nd_rd->nd_rd_cksum = 0; + nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, + sizeof(*ip6), ntohs(ip6->ip6_plen)); + + if (send_sendso_input_hook != NULL) { + mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), + M_NOWAIT); + if (mtag == NULL) + goto fail; + *(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type; + m_tag_prepend(m, mtag); + } + + /* send the packet to outside... */ + ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); + if (outif) { + icmp6_ifstat_inc(outif, ifs6_out_msg); + icmp6_ifstat_inc(outif, ifs6_out_redirect); + } + ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]); + + return; + +fail: + if (m) + m_freem(m); + if (m0) + m_freem(m0); +} + +/* + * ICMPv6 socket option processing. + */ +int +icmp6_ctloutput(struct socket *so, struct sockopt *sopt) +{ + int error = 0; + int optlen; + struct inpcb *inp = sotoinpcb(so); + int level, op, optname; + + if (sopt) { + level = sopt->sopt_level; + op = sopt->sopt_dir; + optname = sopt->sopt_name; + optlen = sopt->sopt_valsize; + } else + level = op = optname = optlen = 0; + + if (level != IPPROTO_ICMPV6) { + return EINVAL; + } + + switch (op) { + case PRCO_SETOPT: + switch (optname) { + case ICMP6_FILTER: + { + struct icmp6_filter ic6f; + + if (optlen != sizeof(ic6f)) { + error = EMSGSIZE; + break; + } + error = sooptcopyin(sopt, &ic6f, optlen, optlen); + if (error == 0) { + INP_WLOCK(inp); + *inp->in6p_icmp6filt = ic6f; + INP_WUNLOCK(inp); + } + break; + } + + default: + error = ENOPROTOOPT; + break; + } + break; + + case PRCO_GETOPT: + switch (optname) { + case ICMP6_FILTER: + { + struct icmp6_filter ic6f; + + INP_RLOCK(inp); + ic6f = *inp->in6p_icmp6filt; + INP_RUNLOCK(inp); + error = sooptcopyout(sopt, &ic6f, sizeof(ic6f)); + break; + } + + default: + error = ENOPROTOOPT; + break; + } + break; + } + + return (error); +} + +/* + * Perform rate limit check. + * Returns 0 if it is okay to send the icmp6 packet. + * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate + * limitation. + * + * XXX per-destination/type check necessary? + * + * dst - not used at this moment + * type - not used at this moment + * code - not used at this moment + */ +static int +icmp6_ratelimit(const struct in6_addr *dst, const int type, + const int code) +{ + int ret; + + ret = 0; /* okay to send */ + + /* PPS limit */ + if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count, + V_icmp6errppslim)) { + /* The packet is subject to rate limit */ + ret++; + } + + return ret; +} + +#endif diff --git a/src/ofp_in.c b/src/ofp_in.c new file mode 100644 index 00000000..8749e024 --- /dev/null +++ b/src/ofp_in.c @@ -0,0 +1,370 @@ +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (C) 2001 WIDE Project. All rights reserved. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.c 8.4 (Berkeley) 1/9/95 + */ + +#include "odp.h" +#include "ofpi_in.h" +#include "ofpi_in_pcb.h" +#include "ofpi_udp.h" +#include "ofpi_protosw.h" +#include "ofpi_socketvar.h" +#include "ofpi_sockstate.h" +#include "ofpi_errno.h" +#include "ofpi_portconf.h" +#include "ofpi_socket.h" +#include "ofpi_ioctl.h" + +static int in_mask2len(struct ofp_in_addr *); +static void in_len2mask(struct ofp_in_addr *, int); +static int in_lifaddr_ioctl(struct socket *, uint64_t, char *, + struct ofp_ifnet *, struct thread *); + +#if 0 +static VNET_DEFINE(int, sameprefixcarponly); +#define V_sameprefixcarponly VNET(sameprefixcarponly) +SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW, + &VNET_NAME(sameprefixcarponly), 0, + "Refuse to create same prefixes on different interfaces"); + +VNET_DECLARE(struct inpcbinfo, ripcbinfo); +#define V_ripcbinfo VNET(ripcbinfo) + +VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ +#define V_arpstat VNET(arpstat) +#endif + +static int +in_mask2len(struct ofp_in_addr *mask) +{ + int x, y; + uint8_t *p; + + p = (uint8_t *)mask; + for (x = 0; x < (int)sizeof(*mask); x++) { + if (p[x] != 0xff) + break; + } + y = 0; + if (x < (int)sizeof(*mask)) { + for (y = 0; y < 8; y++) { + if ((p[x] & (0x80 >> y)) == 0) + break; + } + } + return (x * 8 + y); +} + +static void +in_len2mask(struct ofp_in_addr *mask, int len) +{ + int i; + uint8_t *p; + + p = (uint8_t *)mask; + bzero(mask, sizeof(*mask)); + for (i = 0; i < len / 8; i++) + p[i] = 0xff; + if (len % 8) + p[i] = (0xff00 >> (len % 8)) & 0xff; +} + +/* + * Generic internet control operations (ofp_ioctl's). + * + * ifp is NULL if not an interface-specific ofp_ioctl. + */ +/* ARGSUSED */ +int +ofp_in_control(struct socket *so, uint32_t cmd, char *data, struct ofp_ifnet *ifp, + struct thread *td) +{ + register struct ofp_ifreq *ifr = (struct ofp_ifreq *)data; + struct ofp_in_aliasreq *ifra = (struct ofp_in_aliasreq *)data; + struct ofp_in_tunreq *treq = (struct ofp_in_tunreq *)data; + int error; + + /* + * Filter out ioctls we implement directly; forward the rest on to + * in_lifaddr_ioctl() and ifp->if_ioctl(). + */ + switch (cmd) { + case OFP_SIOCGIFCONF: + ofp_get_interfaces((struct ofp_ifconf *)data); + return 0; + + case OFP_SIOCGIFTUN: + if (ifp == NULL) + return (OFP_EINVAL); + + treq->iftun_addr.sin_addr.s_addr = ifp->ip_addr; + treq->iftun_p2p_addr.sin_addr.s_addr = ifp->ip_p2p; + treq->iftun_local_addr.sin_addr.s_addr = ifp->ip_local; + treq->iftun_remote_addr.sin_addr.s_addr = ifp->ip_remote; + treq->iftun_vrf = ifp->vrf; + return 0; + + case OFP_SIOCAIFADDR: + case OFP_SIOCDIFADDR: + case OFP_SIOCGIFADDR: + case OFP_SIOCGIFBRDADDR: + case OFP_SIOCGIFDSTADDR: + case OFP_SIOCGIFNETMASK: + case OFP_SIOCSIFADDR: + case OFP_SIOCSIFBRDADDR: + case OFP_SIOCSIFDSTADDR: + case OFP_SIOCSIFNETMASK: + case OFP_SIOCGIFFIB: + case OFP_SIOCSIFFIB: + break; + + case OFP_SIOCALIFADDR: + if (ifp == NULL) + return (OFP_EINVAL); + return in_lifaddr_ioctl(so, cmd, data, ifp, td); + + case OFP_SIOCDLIFADDR: + if (ifp == NULL) + return (OFP_EINVAL); + return in_lifaddr_ioctl(so, cmd, data, ifp, td); + + case OFP_SIOCGLIFADDR: + if (ifp == NULL) + return (OFP_EINVAL); + return in_lifaddr_ioctl(so, cmd, data, ifp, td); + + default: + return (OFP_EOPNOTSUPP); + } + + if (ifp == NULL) + return (OFP_EADDRNOTAVAIL); + + error = 0; + + uint32_t if_addr = ifp->ip_addr; + uint32_t if_bcast = ifp->bcast_addr; + uint32_t if_p2p = ifp->ip_p2p; + int if_masklen = ifp->masklen; + int vrf = ifp->vrf; + + switch (cmd) { + case OFP_SIOCAIFADDR: + case OFP_SIOCSIFADDR: + if_addr = ifra->ifra_addr.sin_addr.s_addr; + if_masklen = in_mask2len(&ifra->ifra_mask.sin_addr); + break; + case OFP_SIOCSIFNETMASK: + if_masklen = in_mask2len(&ifra->ifra_mask.sin_addr); + break; + case OFP_SIOCSIFDSTADDR: + if_p2p = ifra->ifra_addr.sin_addr.s_addr; + break; + case OFP_SIOCSIFBRDADDR: + if_bcast = ((struct ofp_sockaddr_in *) + &ifr->ifr_broadaddr)->sin_addr.s_addr; + break; + case OFP_SIOCSIFFIB: + vrf = ifr->ifr_fib; + break; + } + + switch (cmd) { + case OFP_SIOCAIFADDR: + case OFP_SIOCSIFADDR: + case OFP_SIOCSIFNETMASK: + case OFP_SIOCSIFFIB: + if (ifp->port == GRE_PORTS) { + ofp_config_interface_up_tun + (ifp->port, ifp->vlan, + vrf, ifp->ip_local, + ifp->ip_remote, if_p2p, + if_addr, if_masklen); + } else { + ofp_config_interface_down(ifp->port, ifp->vlan); + ofp_config_interface_up_v4(ifp->port, ifp->vlan, vrf, + if_addr, if_masklen); + } + break; + case OFP_SIOCDIFADDR: + if (ifra->ifra_addr.sin_family == OFP_AF_INET) { + ofp_config_interface_down(ifp->port, ifp->vlan); + } + break; + case OFP_SIOCSIFDSTADDR: + ifp->ip_p2p = if_p2p; + break; + case OFP_SIOCSIFBRDADDR: + ifp->bcast_addr = if_bcast; + break; + } + + /* + * Most paths in this switch return directly or via out. Only paths + * that remove the address break in order to hit common removal code. + */ + switch (cmd) { + case OFP_SIOCGIFADDR: + ((struct ofp_sockaddr_in *)&ifr->ifr_addr)->sin_addr.s_addr + = ifp->ip_addr; + goto out; + + case OFP_SIOCGIFBRDADDR: + ((struct ofp_sockaddr_in *)&ifr->ifr_dstaddr)->sin_addr.s_addr + = ifp->bcast_addr; + goto out; + + case OFP_SIOCGIFDSTADDR: +#if 0 // HJo + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { + error = OFP_EINVAL; + goto out; + } +#endif + ((struct ofp_sockaddr_in *)&ifr->ifr_dstaddr)->sin_addr.s_addr + = ifp->ip_p2p; + goto out; + + case OFP_SIOCGIFNETMASK: + ((struct ofp_sockaddr_in *)&ifr->ifr_addr)->sin_addr.s_addr = + odp_cpu_to_be_32((~0)<<(32 - ifp->masklen)); + goto out; + + case OFP_SIOCGIFFIB: + ifr->ifr_fib = ifp->vrf; + goto out; + + case OFP_SIOCSIFADDR: + case OFP_SIOCSIFBRDADDR: + case OFP_SIOCSIFDSTADDR: + case OFP_SIOCSIFNETMASK: + case OFP_SIOCAIFADDR: + case OFP_SIOCDIFADDR: + case OFP_SIOCSIFFIB: + goto out; + + default: + panic("ofp_in_control: unsupported ofp_ioctl"); + } + +out: + return (error); +} + +/* + * SIOC[GAD]LIFADDR. + * SIOCGLIFADDR: get first address. (?!?) + * SIOCGLIFADDR with IFLR_PREFIX: + * get first address that matches the specified prefix. + * SIOCALIFADDR: add the specified address. + * SIOCALIFADDR with IFLR_PREFIX: + * OFP_EINVAL since we can't deduce hostid part of the address. + * SIOCDLIFADDR: delete the specified address. + * SIOCDLIFADDR with IFLR_PREFIX: + * delete the first address that matches the specified prefix. + * return values: + * OFP_EINVAL on invalid parameters + * OFP_EADDRNOTAVAIL on prefix match failed/specified address not found + * other values may be returned from in_ioctl() + */ +static int +in_lifaddr_ioctl(struct socket *so, uint64_t cmd, char * data, + struct ofp_ifnet *ifp, struct thread *td) +{ + struct ofp_if_laddrreq *iflr = (struct ofp_if_laddrreq *)data; + + /* sanity checks */ + if (data == NULL || ifp == NULL) { + panic("invalid argument to in_lifaddr_ioctl"); + /*NOTRECHED*/ + } + + switch (cmd) { + case OFP_SIOCGLIFADDR: + /* address must be specified on GET with IFLR_PREFIX */ + if ((iflr->flags & IFLR_PREFIX) == 0) + break; + /*FALLTHROUGH*/ + case OFP_SIOCALIFADDR: + case OFP_SIOCDLIFADDR: + /* address must be specified on ADD and DELETE */ + if (iflr->addr.ss_family != OFP_AF_INET) + return (OFP_EINVAL); + if (iflr->addr.ss_len != sizeof(struct ofp_sockaddr_in)) + return (OFP_EINVAL); + /* XXX need improvement */ + if (iflr->dstaddr.ss_family + && iflr->dstaddr.ss_family != OFP_AF_INET) + return (OFP_EINVAL); + if (iflr->dstaddr.ss_family + && iflr->dstaddr.ss_len != sizeof(struct ofp_sockaddr_in)) + return (OFP_EINVAL); + break; + default: /*shouldn't happen*/ + return (OFP_EOPNOTSUPP); + } + if (sizeof(struct ofp_in_addr) * 8 < iflr->prefixlen) + return (OFP_EINVAL); + + switch (cmd) { + case OFP_SIOCALIFADDR: + { + struct ofp_in_aliasreq ifra; + + if (iflr->flags & IFLR_PREFIX) + return (OFP_EINVAL); + + /* copy args to in_aliasreq, perform ofp_ioctl(SIOCAIFADDR). */ + bzero(&ifra, sizeof(ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, + sizeof(ifra.ifra_name)); + + bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len); + + if (iflr->dstaddr.ss_family) { /*XXX*/ + bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, + iflr->dstaddr.ss_len); + } + + ifra.ifra_mask.sin_family = OFP_AF_INET; + ifra.ifra_mask.sin_len = sizeof(struct ofp_sockaddr_in); + in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen); + + return (ofp_in_control(so, OFP_SIOCAIFADDR, (char *)&ifra, ifp, td)); + } + case OFP_SIOCGLIFADDR: + case OFP_SIOCDLIFADDR: + break; + } + + return (OFP_EOPNOTSUPP); /*just for safety*/ +} diff --git a/src/ofp_in6.c b/src/ofp_in6.c new file mode 100644 index 00000000..69b4e950 --- /dev/null +++ b/src/ofp_in6.c @@ -0,0 +1,2889 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.c 8.2 (Berkeley) 11/15/93 + */ +#if 0 +#include +__FBSDID("$FreeBSD: release/9.1.0/sys/netinet6/in6.c 238476 2012-07-15 11:13:09Z bz $"); + +#include "opt_compat.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#endif + +#include "ofpi_in.h" +#include "ofpi_in6.h" +#include "ofpi_ip6_var.h" +#include "ofpi_socket.h" +#include "api/ofp_types.h" +#include "api/ofp_route_arp.h" +#include "api/ofp_errno.h" +#include "ofpi_vnet.h" + +VNET_DEFINE(int, ip6_use_defzone) = 1; + +/* + * Definitions of some costant IP6 addresses. + */ +const struct ofp_in6_addr ofp_in6addr_any = OFP_IN6ADDR_ANY_INIT; +const struct ofp_in6_addr ofp_in6addr_loopback = + OFP_IN6ADDR_LOOPBACK_INIT; +const struct ofp_in6_addr ofp_in6addr_nodelocal_allnodes = + OFP_IN6ADDR_NODELOCAL_ALLNODES_INIT; +const struct ofp_in6_addr ofp_in6addr_linklocal_allnodes = + OFP_IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct ofp_in6_addr ofp_in6addr_linklocal_allrouters = + OFP_IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct ofp_in6_addr ofp_in6addr_linklocal_allv2routers = + OFP_IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT; + +const struct ofp_in6_addr ofp_in6mask0 = OFP_IN6MASK0; +const struct ofp_in6_addr ofp_in6mask32 = OFP_IN6MASK32; +const struct ofp_in6_addr ofp_in6mask64 = OFP_IN6MASK64; +const struct ofp_in6_addr ofp_in6mask96 = OFP_IN6MASK96; +const struct ofp_in6_addr ofp_in6mask128 = OFP_IN6MASK128; + +#if 0 +const struct sockaddr_in6 sa6_any = + { sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 }; + +static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t, + struct ifnet *, struct thread *)); +static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *, + struct sockaddr_in6 *, int)); +static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); + +int (*faithprefix_p)(struct in6_addr *); + +#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) +#define ia62ifa(ia6) (&((ia6)->ia_ifa)) + +void +in6_ifaddloop(struct ifaddr *ifa) +{ + struct sockaddr_dl gateway; + struct sockaddr_in6 mask, addr; + struct rtentry rt; + struct in6_ifaddr *ia; + struct ifnet *ifp; + struct llentry *ln; + + ia = ifa2ia6(ifa); + ifp = ifa->ifa_ifp; + IF_AFDATA_LOCK(ifp); + ifa->ifa_rtrequest = nd6_rtrequest; + ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | + LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + if (ln != NULL) { + ln->la_expire = 0; /* for IPv6 this means permanent */ + ln->ln_state = ND6_LLINFO_REACHABLE; + /* + * initialize for rtmsg generation + */ + bzero(&gateway, sizeof(gateway)); + gateway.sdl_len = sizeof(gateway); + gateway.sdl_family = AF_LINK; + gateway.sdl_nlen = 0; + gateway.sdl_alen = 6; + memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned, + sizeof(ln->ll_addr)); + LLE_WUNLOCK(ln); + } + + bzero(&rt, sizeof(rt)); + rt.rt_gateway = (struct sockaddr *)&gateway; + memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); + memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); + rt_mask(&rt) = (struct sockaddr *)&mask; + rt_key(&rt) = (struct sockaddr *)&addr; + rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC; + /* Announce arrival of local address to all FIBs. */ + rt_newaddrmsg(RTM_ADD, ifa, 0, &rt); +} + +void +in6_ifremloop(struct ifaddr *ifa) +{ + struct sockaddr_dl gateway; + struct sockaddr_in6 mask, addr; + struct rtentry rt0; + struct in6_ifaddr *ia; + struct ifnet *ifp; + + ia = ifa2ia6(ifa); + ifp = ifa->ifa_ifp; + IF_AFDATA_LOCK(ifp); + lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + + /* + * initialize for rtmsg generation + */ + bzero(&gateway, sizeof(gateway)); + gateway.sdl_len = sizeof(gateway); + gateway.sdl_family = AF_LINK; + gateway.sdl_nlen = 0; + gateway.sdl_alen = ifp->if_addrlen; + bzero(&rt0, sizeof(rt0)); + rt0.rt_gateway = (struct sockaddr *)&gateway; + memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); + memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); + rt_mask(&rt0) = (struct sockaddr *)&mask; + rt_key(&rt0) = (struct sockaddr *)&addr; + rt0.rt_flags = RTF_HOST | RTF_STATIC; + /* Announce removal of local address to all FIBs. */ + rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0); +} + +int +in6_mask2len(struct in6_addr *mask, u_char *lim0) +{ + int x = 0, y; + u_char *lim = lim0, *p; + + /* ignore the scope_id part */ + if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask)) + lim = (u_char *)mask + sizeof(*mask); + for (p = (u_char *)mask; p < lim; x++, p++) { + if (*p != 0xff) + break; + } + y = 0; + if (p < lim) { + for (y = 0; y < 8; y++) { + if ((*p & (0x80 >> y)) == 0) + break; + } + } + + /* + * when the limit pointer is given, do a stricter check on the + * remaining bits. + */ + if (p < lim) { + if (y != 0 && (*p & (0x00ff >> y)) != 0) + return (-1); + for (p = p + 1; p < lim; p++) + if (*p != 0) + return (-1); + } + + return x * 8 + y; +} + +#ifdef COMPAT_FREEBSD32 +struct in6_ndifreq32 { + char ifname[IFNAMSIZ]; + uint32_t ifindex; +}; +#define SIOCGDEFIFACE32_IN6 _IOWR('i', 86, struct in6_ndifreq32) +#endif + +int +in6_control(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct thread *td) +{ + struct in6_ifreq *ifr = (struct in6_ifreq *)data; + struct in6_ifaddr *ia = NULL; + struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; + struct sockaddr_in6 *sa6; + int error; + + switch (cmd) { + case SIOCGETSGCNT_IN6: + case SIOCGETMIFCNT_IN6: + /* + * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c. + * We cannot see how that would be needed, so do not adjust the + * KPI blindly; more likely should clean up the IPv4 variant. + */ + return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP); + } + + switch(cmd) { + case SIOCAADDRCTL_POLICY: + case SIOCDADDRCTL_POLICY: + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_ADDRCTRL6); + if (error) + return (error); + } + return (in6_src_ioctl(cmd, data)); + } + + if (ifp == NULL) + return (EOPNOTSUPP); + + switch (cmd) { + case SIOCSNDFLUSH_IN6: + case SIOCSPFXFLUSH_IN6: + case SIOCSRTRFLUSH_IN6: + case SIOCSDEFIFACE_IN6: + case SIOCSIFINFO_FLAGS: + case SIOCSIFINFO_IN6: + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_ND6); + if (error) + return (error); + } + /* FALLTHROUGH */ + case OSIOCGIFINFO_IN6: + case SIOCGIFINFO_IN6: + case SIOCGDRLST_IN6: + case SIOCGPRLST_IN6: + case SIOCGNBRINFO_IN6: + case SIOCGDEFIFACE_IN6: + return (nd6_ioctl(cmd, data, ifp)); + +#ifdef COMPAT_FREEBSD32 + case SIOCGDEFIFACE32_IN6: + { + struct in6_ndifreq ndif; + struct in6_ndifreq32 *ndif32; + + error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif, + ifp); + if (error) + return (error); + ndif32 = (struct in6_ndifreq32 *)data; + ndif32->ifindex = ndif.ifindex; + return (0); + } +#endif + } + + switch (cmd) { + case SIOCSIFPREFIX_IN6: + case SIOCDIFPREFIX_IN6: + case SIOCAIFPREFIX_IN6: + case SIOCCIFPREFIX_IN6: + case SIOCSGIFPREFIX_IN6: + case SIOCGIFPREFIX_IN6: + log(LOG_NOTICE, + "prefix ioctls are now invalidated. " + "please use ifconfig.\n"); + return (EOPNOTSUPP); + } + + switch (cmd) { + case SIOCSSCOPE6: + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_SCOPE6); + if (error) + return (error); + } + return (scope6_set(ifp, + (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); + case SIOCGSCOPE6: + return (scope6_get(ifp, + (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); + case SIOCGSCOPE6DEF: + return (scope6_get_default((struct scope6_id *) + ifr->ifr_ifru.ifru_scope_id)); + } + + switch (cmd) { + case SIOCALIFADDR: + if (td != NULL) { + error = priv_check(td, PRIV_NET_ADDIFADDR); + if (error) + return (error); + } + return in6_lifaddr_ioctl(so, cmd, data, ifp, td); + + case SIOCDLIFADDR: + if (td != NULL) { + error = priv_check(td, PRIV_NET_DELIFADDR); + if (error) + return (error); + } + /* FALLTHROUGH */ + case SIOCGLIFADDR: + return in6_lifaddr_ioctl(so, cmd, data, ifp, td); + } + + /* + * Find address for this interface, if it exists. + * + * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation + * only, and used the first interface address as the target of other + * operations (without checking ifra_addr). This was because netinet + * code/API assumed at most 1 interface address per interface. + * Since IPv6 allows a node to assign multiple addresses + * on a single interface, we almost always look and check the + * presence of ifra_addr, and reject invalid ones here. + * It also decreases duplicated code among SIOC*_IN6 operations. + */ + switch (cmd) { + case SIOCAIFADDR_IN6: + case SIOCSIFPHYADDR_IN6: + sa6 = &ifra->ifra_addr; + break; + case SIOCSIFADDR_IN6: + case SIOCGIFADDR_IN6: + case SIOCSIFDSTADDR_IN6: + case SIOCSIFNETMASK_IN6: + case SIOCGIFDSTADDR_IN6: + case SIOCGIFNETMASK_IN6: + case SIOCDIFADDR_IN6: + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + case SIOCGIFAFLAG_IN6: + case SIOCSNDFLUSH_IN6: + case SIOCSPFXFLUSH_IN6: + case SIOCSRTRFLUSH_IN6: + case SIOCGIFALIFETIME_IN6: + case SIOCSIFALIFETIME_IN6: + case SIOCGIFSTAT_IN6: + case SIOCGIFSTAT_ICMP6: + sa6 = &ifr->ifr_addr; + break; + default: + sa6 = NULL; + break; + } + if (sa6 && sa6->sin6_family == AF_INET6) { + if (sa6->sin6_scope_id != 0) + error = sa6_embedscope(sa6, 0); + else + error = in6_setscope(&sa6->sin6_addr, ifp, NULL); + if (error != 0) + return (error); + if (td != NULL && (error = prison_check_ip6(td->td_ucred, + &sa6->sin6_addr)) != 0) + return (error); + ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr); + } else + ia = NULL; + + switch (cmd) { + case SIOCSIFADDR_IN6: + case SIOCSIFDSTADDR_IN6: + case SIOCSIFNETMASK_IN6: + /* + * Since IPv6 allows a node to assign multiple addresses + * on a single interface, SIOCSIFxxx ioctls are deprecated. + */ + /* we decided to obsolete this command (20000704) */ + error = EINVAL; + goto out; + + case SIOCDIFADDR_IN6: + /* + * for IPv4, we look for existing in_ifaddr here to allow + * "ifconfig if0 delete" to remove the first IPv4 address on + * the interface. For IPv6, as the spec allows multiple + * interface address from the day one, we consider "remove the + * first one" semantics to be not preferable. + */ + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto out; + } + /* FALLTHROUGH */ + case SIOCAIFADDR_IN6: + /* + * We always require users to specify a valid IPv6 address for + * the corresponding operation. + */ + if (ifra->ifra_addr.sin6_family != AF_INET6 || + ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) { + error = EAFNOSUPPORT; + goto out; + } + + if (td != NULL) { + error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ? + PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR); + if (error) + goto out; + } + break; + + case SIOCGIFADDR_IN6: + /* This interface is basically deprecated. use SIOCGIFCONF. */ + /* FALLTHROUGH */ + case SIOCGIFAFLAG_IN6: + case SIOCGIFNETMASK_IN6: + case SIOCGIFDSTADDR_IN6: + case SIOCGIFALIFETIME_IN6: + /* must think again about its semantics */ + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto out; + } + break; + + case SIOCSIFALIFETIME_IN6: + { + struct in6_addrlifetime *lt; + + if (td != NULL) { + error = priv_check(td, PRIV_NETINET_ALIFETIME6); + if (error) + goto out; + } + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto out; + } + /* sanity for overflow - beware unsigned */ + lt = &ifr->ifr_ifru.ifru_lifetime; + if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && + lt->ia6t_vltime + time_second < time_second) { + error = EINVAL; + goto out; + } + if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && + lt->ia6t_pltime + time_second < time_second) { + error = EINVAL; + goto out; + } + break; + } + } + + switch (cmd) { + case SIOCGIFADDR_IN6: + ifr->ifr_addr = ia->ia_addr; + if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0) + goto out; + break; + + case SIOCGIFDSTADDR_IN6: + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { + error = EINVAL; + goto out; + } + /* + * XXX: should we check if ifa_dstaddr is NULL and return + * an error? + */ + ifr->ifr_dstaddr = ia->ia_dstaddr; + if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0) + goto out; + break; + + case SIOCGIFNETMASK_IN6: + ifr->ifr_addr = ia->ia_prefixmask; + break; + + case SIOCGIFAFLAG_IN6: + ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags; + break; + + case SIOCGIFSTAT_IN6: + if (ifp == NULL) { + error = EINVAL; + goto out; + } + bzero(&ifr->ifr_ifru.ifru_stat, + sizeof(ifr->ifr_ifru.ifru_stat)); + ifr->ifr_ifru.ifru_stat = + *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat; + break; + + case SIOCGIFSTAT_ICMP6: + if (ifp == NULL) { + error = EINVAL; + goto out; + } + bzero(&ifr->ifr_ifru.ifru_icmp6stat, + sizeof(ifr->ifr_ifru.ifru_icmp6stat)); + ifr->ifr_ifru.ifru_icmp6stat = + *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat; + break; + + case SIOCGIFALIFETIME_IN6: + ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime; + if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { + time_t maxexpire; + struct in6_addrlifetime *retlt = + &ifr->ifr_ifru.ifru_lifetime; + + /* + * XXX: adjust expiration time assuming time_t is + * signed. + */ + maxexpire = (-1) & + ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); + if (ia->ia6_lifetime.ia6t_vltime < + maxexpire - ia->ia6_updatetime) { + retlt->ia6t_expire = ia->ia6_updatetime + + ia->ia6_lifetime.ia6t_vltime; + } else + retlt->ia6t_expire = maxexpire; + } + if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { + time_t maxexpire; + struct in6_addrlifetime *retlt = + &ifr->ifr_ifru.ifru_lifetime; + + /* + * XXX: adjust expiration time assuming time_t is + * signed. + */ + maxexpire = (-1) & + ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); + if (ia->ia6_lifetime.ia6t_pltime < + maxexpire - ia->ia6_updatetime) { + retlt->ia6t_preferred = ia->ia6_updatetime + + ia->ia6_lifetime.ia6t_pltime; + } else + retlt->ia6t_preferred = maxexpire; + } + break; + + case SIOCSIFALIFETIME_IN6: + ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; + /* for sanity */ + if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_expire = + time_second + ia->ia6_lifetime.ia6t_vltime; + } else + ia->ia6_lifetime.ia6t_expire = 0; + if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_preferred = + time_second + ia->ia6_lifetime.ia6t_pltime; + } else + ia->ia6_lifetime.ia6t_preferred = 0; + break; + + case SIOCAIFADDR_IN6: + { + int i; + struct nd_prefixctl pr0; + struct nd_prefix *pr; + + /* + * first, make or update the interface address structure, + * and link it to the list. + */ + if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0) + goto out; + if (ia != NULL) + ifa_free(&ia->ia_ifa); + if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) + == NULL) { + /* + * this can happen when the user specify the 0 valid + * lifetime. + */ + break; + } + + /* + * then, make the prefix on-link on the interface. + * XXX: we'd rather create the prefix before the address, but + * we need at least one address to install the corresponding + * interface route, so we configure the address first. + */ + + /* + * convert mask to prefix length (prefixmask has already + * been validated in in6_update_ifa(). + */ + bzero(&pr0, sizeof(pr0)); + pr0.ndpr_ifp = ifp; + pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, + NULL); + if (pr0.ndpr_plen == 128) { + break; /* we don't need to install a host route. */ + } + pr0.ndpr_prefix = ifra->ifra_addr; + /* apply the mask for safety. */ + for (i = 0; i < 4; i++) { + pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= + ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; + } + /* + * XXX: since we don't have an API to set prefix (not address) + * lifetimes, we just use the same lifetimes as addresses. + * The (temporarily) installed lifetimes can be overridden by + * later advertised RAs (when accept_rtadv is non 0), which is + * an intended behavior. + */ + pr0.ndpr_raf_onlink = 1; /* should be configurable? */ + pr0.ndpr_raf_auto = + ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0); + pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; + pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; + + /* add the prefix if not yet. */ + if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { + /* + * nd6_prelist_add will install the corresponding + * interface route. + */ + if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) + goto out; + if (pr == NULL) { + log(LOG_ERR, "nd6_prelist_add succeeded but " + "no prefix\n"); + error = EINVAL; + goto out; + } + } + + /* relate the address to the prefix */ + if (ia->ia6_ndpr == NULL) { + ia->ia6_ndpr = pr; + pr->ndpr_refcnt++; + + /* + * If this is the first autoconf address from the + * prefix, create a temporary address as well + * (when required). + */ + if ((ia->ia6_flags & IN6_IFF_AUTOCONF) && + V_ip6_use_tempaddr && pr->ndpr_refcnt == 1) { + int e; + if ((e = in6_tmpifadd(ia, 1, 0)) != 0) { + log(LOG_NOTICE, "in6_control: failed " + "to create a temporary address, " + "errno=%d\n", e); + } + } + } + + /* + * this might affect the status of autoconfigured addresses, + * that is, this address might make other addresses detached. + */ + pfxlist_onlink_check(); + if (error == 0 && ia) { + if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { + /* + * Try to clear the flag when a new + * IPv6 address is added onto an + * IFDISABLED interface and it + * succeeds. + */ + struct in6_ndireq nd; + + memset(&nd, 0, sizeof(nd)); + nd.ndi.flags = ND_IFINFO(ifp)->flags; + nd.ndi.flags &= ~ND6_IFF_IFDISABLED; + if (nd6_ioctl(SIOCSIFINFO_FLAGS, + (caddr_t)&nd, ifp) < 0) + log(LOG_NOTICE, "SIOCAIFADDR_IN6: " + "SIOCSIFINFO_FLAGS for -ifdisabled " + "failed."); + /* + * Ignore failure of clearing the flag + * intentionally. The failure means + * address duplication was detected. + */ + } + EVENTHANDLER_INVOKE(ifaddr_event, ifp); + } + break; + } + + case SIOCDIFADDR_IN6: + { + struct nd_prefix *pr; + + /* + * If the address being deleted is the only one that owns + * the corresponding prefix, expire the prefix as well. + * XXX: theoretically, we don't have to worry about such + * relationship, since we separate the address management + * and the prefix management. We do this, however, to provide + * as much backward compatibility as possible in terms of + * the ioctl operation. + * Note that in6_purgeaddr() will decrement ndpr_refcnt. + */ + pr = ia->ia6_ndpr; + in6_purgeaddr(&ia->ia_ifa); + if (pr && pr->ndpr_refcnt == 0) + prelist_remove(pr); + EVENTHANDLER_INVOKE(ifaddr_event, ifp); + break; + } + + default: + if (ifp == NULL || ifp->if_ioctl == 0) { + error = EOPNOTSUPP; + goto out; + } + error = (*ifp->if_ioctl)(ifp, cmd, data); + goto out; + } + + error = 0; +out: + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return (error); +} + + +/* + * Join necessary multicast groups. Factored out from in6_update_ifa(). + * This entire work should only be done once, for the default FIB. + */ +static int +in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol) +{ + char ip6buf[INET6_ADDRSTRLEN]; + struct sockaddr_in6 mltaddr, mltmask; + struct in6_addr llsol; + struct in6_multi_mship *imm; + struct rtentry *rt; + int delay, error; + + KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__)); + + /* Join solicited multicast addr for new host id. */ + bzero(&llsol, sizeof(struct in6_addr)); + llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; + llsol.s6_addr32[1] = 0; + llsol.s6_addr32[2] = htonl(1); + llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; + llsol.s6_addr8[12] = 0xff; + if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { + /* XXX: should not happen */ + log(LOG_ERR, "%s: in6_setscope failed\n", __func__); + goto cleanup; + } + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * We need a random delay for DAD on the address being + * configured. It also means delaying transmission of the + * corresponding MLD report to avoid report collision. + * [RFC 4861, Section 6.3.7] + */ + delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); + } + imm = in6_joingroup(ifp, &llsol, &error, delay); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &llsol), + if_name(ifp), error)); + goto cleanup; + } + OFP_LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + *in6m_sol = imm->i6mm_maddr; + + bzero(&mltmask, sizeof(mltmask)); + mltmask.sin6_len = sizeof(struct sockaddr_in6); + mltmask.sin6_family = AF_INET6; + mltmask.sin6_addr = in6mask32; +#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ + + /* + * Join link-local all-nodes address. + */ + bzero(&mltaddr, sizeof(mltaddr)); + mltaddr.sin6_len = sizeof(struct sockaddr_in6); + mltaddr.sin6_family = AF_INET6; + mltaddr.sin6_addr = in6addr_linklocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto cleanup; /* XXX: should not fail */ + + /* + * XXX: do we really need this automatic routes? We should probably + * reconsider this stuff. Most applications actually do not need the + * routes, since they usually specify the outgoing interface. + */ + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); + if (rt != NULL) { + /* XXX: only works in !SCOPEDROUTING case. */ + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (rt == NULL) { + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); + if (error) + goto cleanup; + } else + RTFREE_LOCKED(rt); + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + goto cleanup; + } + OFP_LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + + /* + * Join node information group address. + */ + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * The spec does not say anything about delay for this group, + * but the same logic should apply. + */ + delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); + } + if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { + /* XXX jinmei */ + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); + if (imm == NULL) + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + /* XXX not very fatal, go on... */ + else + OFP_LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + } + + /* + * Join interface-local all-nodes address. + * (ff01::1%ifN, and ff01::%ifN/32) + */ + mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto cleanup; /* XXX: should not fail */ + /* XXX: again, do we really need the route? */ + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); + if (rt != NULL) { + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (rt == NULL) { + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); + if (error) + goto cleanup; + } else + RTFREE_LOCKED(rt); + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + goto cleanup; + } + OFP_LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); +#undef MLTMASK_LEN + +cleanup: + return (error); +} + +/* + * Update parameters of an IPv6 interface address. + * If necessary, a new entry is created and linked into address chains. + * This function is separated from in6_control(). + * XXX: should this be performed under splnet()? + */ +int +in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int flags) +{ + int error = 0, hostIsNew = 0, plen = -1; + struct sockaddr_in6 dst6; + struct in6_addrlifetime *lt; + struct in6_multi *in6m_sol; + int delay; + char ip6buf[INET6_ADDRSTRLEN]; + + /* Validate parameters */ + if (ifp == NULL || ifra == NULL) /* this maybe redundant */ + return (EINVAL); + + /* + * The destination address for a p2p link must have a family + * of AF_UNSPEC or AF_INET6. + */ + if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && + ifra->ifra_dstaddr.sin6_family != AF_INET6 && + ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) + return (EAFNOSUPPORT); + /* + * validate ifra_prefixmask. don't check sin6_family, netmask + * does not carry fields other than sin6_len. + */ + if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) + return (EINVAL); + /* + * Because the IPv6 address architecture is classless, we require + * users to specify a (non 0) prefix length (mask) for a new address. + * We also require the prefix (when specified) mask is valid, and thus + * reject a non-consecutive mask. + */ + if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0) + return (EINVAL); + if (ifra->ifra_prefixmask.sin6_len != 0) { + plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, + (u_char *)&ifra->ifra_prefixmask + + ifra->ifra_prefixmask.sin6_len); + if (plen <= 0) + return (EINVAL); + } else { + /* + * In this case, ia must not be NULL. We just use its prefix + * length. + */ + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); + } + /* + * If the destination address on a p2p interface is specified, + * and the address is a scoped one, validate/set the scope + * zone identifier. + */ + dst6 = ifra->ifra_dstaddr; + if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 && + (dst6.sin6_family == AF_INET6)) { + struct in6_addr in6_tmp; + u_int32_t zoneid; + + in6_tmp = dst6.sin6_addr; + if (in6_setscope(&in6_tmp, ifp, &zoneid)) + return (EINVAL); /* XXX: should be impossible */ + + if (dst6.sin6_scope_id != 0) { + if (dst6.sin6_scope_id != zoneid) + return (EINVAL); + } else /* user omit to specify the ID. */ + dst6.sin6_scope_id = zoneid; + + /* convert into the internal form */ + if (sa6_embedscope(&dst6, 0)) + return (EINVAL); /* XXX: should be impossible */ + } + /* + * The destination address can be specified only for a p2p or a + * loopback interface. If specified, the corresponding prefix length + * must be 128. + */ + if (ifra->ifra_dstaddr.sin6_family == AF_INET6) { + if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) { + /* XXX: noisy message */ + nd6log((LOG_INFO, "in6_update_ifa: a destination can " + "be specified for a p2p or a loopback IF only\n")); + return (EINVAL); + } + if (plen != 128) { + nd6log((LOG_INFO, "in6_update_ifa: prefixlen should " + "be 128 when dstaddr is specified\n")); + return (EINVAL); + } + } + /* lifetime consistency check */ + lt = &ifra->ifra_lifetime; + if (lt->ia6t_pltime > lt->ia6t_vltime) + return (EINVAL); + if (lt->ia6t_vltime == 0) { + /* + * the following log might be noisy, but this is a typical + * configuration mistake or a tool's bug. + */ + nd6log((LOG_INFO, + "in6_update_ifa: valid lifetime is 0 for %s\n", + ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr))); + + if (ia == NULL) + return (0); /* there's nothing to do */ + } + + /* + * If this is a new address, allocate a new ifaddr and link it + * into chains. + */ + if (ia == NULL) { + hostIsNew = 1; + /* + * When in6_update_ifa() is called in a process of a received + * RA, it is called under an interrupt context. So, we should + * call malloc with M_NOWAIT. + */ + ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR, + M_NOWAIT); + if (ia == NULL) + return (ENOBUFS); + bzero((caddr_t)ia, sizeof(*ia)); + ifa_init(&ia->ia_ifa); + OFP_LIST_INIT(&ia->ia6_memberships); + /* Initialize the address and masks, and put time stamp */ + ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; + ia->ia_addr.sin6_family = AF_INET6; + ia->ia_addr.sin6_len = sizeof(ia->ia_addr); + ia->ia6_createtime = time_second; + if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { + /* + * XXX: some functions expect that ifa_dstaddr is not + * NULL for p2p interfaces. + */ + ia->ia_ifa.ifa_dstaddr = + (struct sockaddr *)&ia->ia_dstaddr; + } else { + ia->ia_ifa.ifa_dstaddr = NULL; + } + ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; + ia->ia_ifp = ifp; + ifa_ref(&ia->ia_ifa); /* if_addrhead */ + IF_ADDR_WLOCK(ifp); + OFP_TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); + IF_ADDR_WUNLOCK(ifp); + + ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */ + IN6_IFADDR_WLOCK(); + OFP_TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link); + IN6_IFADDR_WUNLOCK(); + } + + /* update timestamp */ + ia->ia6_updatetime = time_second; + + /* set prefix mask */ + if (ifra->ifra_prefixmask.sin6_len) { + /* + * We prohibit changing the prefix length of an existing + * address, because + * + such an operation should be rare in IPv6, and + * + the operation would confuse prefix management. + */ + if (ia->ia_prefixmask.sin6_len && + in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { + nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an" + " existing (%s) address should not be changed\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); + error = EINVAL; + goto unlink; + } + ia->ia_prefixmask = ifra->ifra_prefixmask; + } + + /* + * If a new destination address is specified, scrub the old one and + * install the new destination. Note that the interface must be + * p2p or loopback (see the check above.) + */ + if (dst6.sin6_family == AF_INET6 && + !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) { + int e; + + if ((ia->ia_flags & IFA_ROUTE) != 0 && + (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { + nd6log((LOG_ERR, "in6_update_ifa: failed to remove " + "a route to the old destination: %s\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); + /* proceed anyway... */ + } else + ia->ia_flags &= ~IFA_ROUTE; + ia->ia_dstaddr = dst6; + } + + /* + * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred + * to see if the address is deprecated or invalidated, but initialize + * these members for applications. + */ + ia->ia6_lifetime = ifra->ifra_lifetime; + if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_expire = + time_second + ia->ia6_lifetime.ia6t_vltime; + } else + ia->ia6_lifetime.ia6t_expire = 0; + if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { + ia->ia6_lifetime.ia6t_preferred = + time_second + ia->ia6_lifetime.ia6t_pltime; + } else + ia->ia6_lifetime.ia6t_preferred = 0; + + /* reset the interface and routing table appropriately. */ + if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0) + goto unlink; + + /* + * configure address flags. + */ + ia->ia6_flags = ifra->ifra_flags; + /* + * backward compatibility - if IN6_IFF_DEPRECATED is set from the + * userland, make it deprecated. + */ + if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) { + ia->ia6_lifetime.ia6t_pltime = 0; + ia->ia6_lifetime.ia6t_preferred = time_second; + } + /* + * Make the address tentative before joining multicast addresses, + * so that corresponding MLD responses would not have a tentative + * source address. + */ + ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ + if (hostIsNew && in6if_do_dad(ifp)) + ia->ia6_flags |= IN6_IFF_TENTATIVE; + + /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */ + if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) + ia->ia6_flags |= IN6_IFF_TENTATIVE; + + /* + * We are done if we have simply modified an existing address. + */ + if (!hostIsNew) + return (error); + + /* + * Beyond this point, we should call in6_purgeaddr upon an error, + * not just go to unlink. + */ + + /* Join necessary multicast groups. */ + in6m_sol = NULL; + if ((ifp->if_flags & IFF_MULTICAST) != 0) { + error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol); + if (error) + goto cleanup; + } + + /* + * Perform DAD, if needed. + * XXX It may be of use, if we can administratively disable DAD. + */ + if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && + (ia->ia6_flags & IN6_IFF_TENTATIVE)) + { + int mindelay, maxdelay; + + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * We need to impose a delay before sending an NS + * for DAD. Check if we also needed a delay for the + * corresponding MLD message. If we did, the delay + * should be larger than the MLD delay (this could be + * relaxed a bit, but this simple logic is at least + * safe). + * XXX: Break data hiding guidelines and look at + * state for the solicited multicast group. + */ + mindelay = 0; + if (in6m_sol != NULL && + in6m_sol->in6m_state == MLD_REPORTING_MEMBER) { + mindelay = in6m_sol->in6m_timer; + } + maxdelay = MAX_RTR_SOLICITATION_DELAY * hz; + if (maxdelay - mindelay == 0) + delay = 0; + else { + delay = + (arc4random() % (maxdelay - mindelay)) + + mindelay; + } + } + nd6_dad_start((struct ifaddr *)ia, delay); + } + + KASSERT(hostIsNew, ("in6_update_ifa: !hostIsNew")); + ifa_free(&ia->ia_ifa); + return (error); + + unlink: + /* + * XXX: if a change of an existing address failed, keep the entry + * anyway. + */ + if (hostIsNew) { + in6_unlink_ifa(ia, ifp); + ifa_free(&ia->ia_ifa); + } + return (error); + + cleanup: + KASSERT(hostIsNew, ("in6_update_ifa: cleanup: !hostIsNew")); + ifa_free(&ia->ia_ifa); + in6_purgeaddr(&ia->ia_ifa); + return error; +} + +/* + * Leave multicast groups. Factored out from in6_purgeaddr(). + * This entire work should only be done once, for the default FIB. + */ +static int +in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) +{ + struct sockaddr_in6 mltaddr, mltmask; + struct in6_multi_mship *imm; + struct rtentry *rt; + struct sockaddr_in6 sin6; + int error; + + /* + * Leave from multicast groups we have joined for the interface. + */ + while ((imm = OFP_LIST_FIRST(&ia->ia6_memberships)) != NULL) { + OFP_LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + + /* + * Remove the link-local all-nodes address. + */ + bzero(&mltmask, sizeof(mltmask)); + mltmask.sin6_len = sizeof(struct sockaddr_in6); + mltmask.sin6_family = AF_INET6; + mltmask.sin6_addr = in6mask32; + + bzero(&mltaddr, sizeof(mltaddr)); + mltaddr.sin6_len = sizeof(struct sockaddr_in6); + mltaddr.sin6_family = AF_INET6; + mltaddr.sin6_addr = in6addr_linklocal_allnodes; + + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + return (error); + + /* + * As for the mltaddr above, proactively prepare the sin6 to avoid + * rtentry un- and re-locking. + */ + if (ifa0 != NULL) { + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(sin6); + sin6.sin6_family = AF_INET6; + memcpy(&sin6.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, + sizeof(sin6.sin6_addr)); + error = in6_setscope(&sin6.sin6_addr, ifa0->ifa_ifp, NULL); + if (error != 0) + return (error); + } + + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); + if (rt != NULL && rt->rt_gateway != NULL && + (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(ia->ia_addr.sin6_addr)) == 0)) { + /* + * If no more IPv6 address exists on this interface then + * remove the multicast address route. + */ + if (ifa0 == NULL) { + memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, + sizeof(mltaddr.sin6_addr)); + RTFREE_LOCKED(rt); + error = in6_rtrequest(RTM_DELETE, + (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); + if (error) + log(LOG_INFO, "%s: link-local all-nodes " + "multicast address deletion error\n", + __func__); + } else { + /* + * Replace the gateway of the route. + */ + memcpy(rt->rt_gateway, &sin6, sizeof(sin6)); + RTFREE_LOCKED(rt); + } + } else { + if (rt != NULL) + RTFREE_LOCKED(rt); + } + + /* + * Remove the node-local all-nodes address. + */ + mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + return (error); + + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); + if (rt != NULL && rt->rt_gateway != NULL && + (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(ia->ia_addr.sin6_addr)) == 0)) { + /* + * If no more IPv6 address exists on this interface then + * remove the multicast address route. + */ + if (ifa0 == NULL) { + memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, + sizeof(mltaddr.sin6_addr)); + + RTFREE_LOCKED(rt); + error = in6_rtrequest(RTM_DELETE, + (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0, RT_DEFAULT_FIB); + if (error) + log(LOG_INFO, "%s: node-local all-nodes" + "multicast address deletion error\n", + __func__); + } else { + /* + * Replace the gateway of the route. + */ + memcpy(rt->rt_gateway, &sin6, sizeof(sin6)); + RTFREE_LOCKED(rt); + } + } else { + if (rt != NULL) + RTFREE_LOCKED(rt); + } + + return (0); +} + +void +in6_purgeaddr(struct ifaddr *ifa) +{ + struct ifnet *ifp = ifa->ifa_ifp; + struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; + int plen, error; + struct ifaddr *ifa0; + + /* + * find another IPv6 address as the gateway for the + * link-local and node-local all-nodes multicast + * address routes + */ + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) { + if ((ifa0->ifa_addr->sa_family != AF_INET6) || + memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(struct in6_addr)) == 0) + continue; + else + break; + } + if (ifa0 != NULL) + ifa_ref(ifa0); + IF_ADDR_RUNLOCK(ifp); + + /* + * Remove the loopback route to the interface address. + * The check for the current setting of "nd6_useloopback" + * is not needed. + */ + if (ia->ia_flags & IFA_RTSELF) { + error = ifa_del_loopback_route((struct ifaddr *)ia, + (struct sockaddr *)&ia->ia_addr); + if (error == 0) + ia->ia_flags &= ~IFA_RTSELF; + } + + /* stop DAD processing */ + nd6_dad_stop(ifa); + + /* Remove local address entry from lltable. */ + in6_ifremloop(ifa); + + /* Leave multicast groups. */ + error = in6_purgeaddr_mc(ifp, ia, ifa0); + + if (ifa0 != NULL) + ifa_free(ifa0); + + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ + if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { + error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags | + (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0); + if (error != 0) + log(LOG_INFO, "%s: err=%d, destination address delete " + "failed\n", __func__, error); + ia->ia_flags &= ~IFA_ROUTE; + } + + in6_unlink_ifa(ia, ifp); +} + +static void +in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) +{ + int s = splnet(); + + IF_ADDR_WLOCK(ifp); + OFP_TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); + IF_ADDR_WUNLOCK(ifp); + ifa_free(&ia->ia_ifa); /* if_addrhead */ + + /* + * Defer the release of what might be the last reference to the + * in6_ifaddr so that it can't be freed before the remainder of the + * cleanup. + */ + IN6_IFADDR_WLOCK(); + OFP_TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link); + IN6_IFADDR_WUNLOCK(); + + /* + * Release the reference to the base prefix. There should be a + * positive reference. + */ + if (ia->ia6_ndpr == NULL) { + nd6log((LOG_NOTICE, + "in6_unlink_ifa: autoconf'ed address " + "%p has no prefix\n", ia)); + } else { + ia->ia6_ndpr->ndpr_refcnt--; + ia->ia6_ndpr = NULL; + } + + /* + * Also, if the address being removed is autoconf'ed, call + * pfxlist_onlink_check() since the release might affect the status of + * other (detached) addresses. + */ + if ((ia->ia6_flags & IN6_IFF_AUTOCONF)) { + pfxlist_onlink_check(); + } + ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */ + splx(s); +} + +void +in6_purgeif(struct ifnet *ifp) +{ + struct ifaddr *ifa, *nifa; + + OFP_TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + in6_purgeaddr(ifa); + } + + in6_ifdetach(ifp); +} + +/* + * SIOC[GAD]LIFADDR. + * SIOCGLIFADDR: get first address. (?) + * SIOCGLIFADDR with IFLR_PREFIX: + * get first address that matches the specified prefix. + * SIOCALIFADDR: add the specified address. + * SIOCALIFADDR with IFLR_PREFIX: + * add the specified prefix, filling hostid part from + * the first link-local address. prefixlen must be <= 64. + * SIOCDLIFADDR: delete the specified address. + * SIOCDLIFADDR with IFLR_PREFIX: + * delete the first address that matches the specified prefix. + * return values: + * EINVAL on invalid parameters + * EADDRNOTAVAIL on prefix match failed/specified address not found + * other values may be returned from in6_ioctl() + * + * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64. + * this is to accomodate address naming scheme other than RFC2374, + * in the future. + * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 + * address encoding scheme. (see figure on page 8) + */ +static int +in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct thread *td) +{ + struct if_laddrreq *iflr = (struct if_laddrreq *)data; + struct ifaddr *ifa; + struct sockaddr *sa; + + /* sanity checks */ + if (!data || !ifp) { + panic("invalid argument to in6_lifaddr_ioctl"); + /* NOTREACHED */ + } + + switch (cmd) { + case SIOCGLIFADDR: + /* address must be specified on GET with IFLR_PREFIX */ + if ((iflr->flags & IFLR_PREFIX) == 0) + break; + /* FALLTHROUGH */ + case SIOCALIFADDR: + case SIOCDLIFADDR: + /* address must be specified on ADD and DELETE */ + sa = (struct sockaddr *)&iflr->addr; + if (sa->sa_family != AF_INET6) + return EINVAL; + if (sa->sa_len != sizeof(struct sockaddr_in6)) + return EINVAL; + /* XXX need improvement */ + sa = (struct sockaddr *)&iflr->dstaddr; + if (sa->sa_family && sa->sa_family != AF_INET6) + return EINVAL; + if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6)) + return EINVAL; + break; + default: /* shouldn't happen */ +#if 0 + panic("invalid cmd to in6_lifaddr_ioctl"); + /* NOTREACHED */ +#else + return EOPNOTSUPP; +#endif + } + if (sizeof(struct in6_addr) * 8 < iflr->prefixlen) + return EINVAL; + + switch (cmd) { + case SIOCALIFADDR: + { + struct in6_aliasreq ifra; + struct in6_addr *hostid = NULL; + int prefixlen; + + ifa = NULL; + if ((iflr->flags & IFLR_PREFIX) != 0) { + struct sockaddr_in6 *sin6; + + /* + * hostid is to fill in the hostid part of the + * address. hostid points to the first link-local + * address attached to the interface. + */ + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); + if (!ifa) + return EADDRNOTAVAIL; + hostid = IFA_IN6(ifa); + + /* prefixlen must be <= 64. */ + if (64 < iflr->prefixlen) { + if (ifa != NULL) + ifa_free(ifa); + return EINVAL; + } + prefixlen = iflr->prefixlen; + + /* hostid part must be zero. */ + sin6 = (struct sockaddr_in6 *)&iflr->addr; + if (sin6->sin6_addr.s6_addr32[2] != 0 || + sin6->sin6_addr.s6_addr32[3] != 0) { + if (ifa != NULL) + ifa_free(ifa); + return EINVAL; + } + } else + prefixlen = iflr->prefixlen; + + /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ + bzero(&ifra, sizeof(ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); + + bcopy(&iflr->addr, &ifra.ifra_addr, + ((struct sockaddr *)&iflr->addr)->sa_len); + if (hostid) { + /* fill in hostid part */ + ifra.ifra_addr.sin6_addr.s6_addr32[2] = + hostid->s6_addr32[2]; + ifra.ifra_addr.sin6_addr.s6_addr32[3] = + hostid->s6_addr32[3]; + } + + if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */ + bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, + ((struct sockaddr *)&iflr->dstaddr)->sa_len); + if (hostid) { + ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = + hostid->s6_addr32[2]; + ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = + hostid->s6_addr32[3]; + } + } + if (ifa != NULL) + ifa_free(ifa); + + ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); + + ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; + return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td); + } + case SIOCGLIFADDR: + case SIOCDLIFADDR: + { + struct in6_ifaddr *ia; + struct in6_addr mask, candidate, match; + struct sockaddr_in6 *sin6; + int cmp; + + bzero(&mask, sizeof(mask)); + if (iflr->flags & IFLR_PREFIX) { + /* lookup a prefix rather than address. */ + in6_prefixlen2mask(&mask, iflr->prefixlen); + + sin6 = (struct sockaddr_in6 *)&iflr->addr; + bcopy(&sin6->sin6_addr, &match, sizeof(match)); + match.s6_addr32[0] &= mask.s6_addr32[0]; + match.s6_addr32[1] &= mask.s6_addr32[1]; + match.s6_addr32[2] &= mask.s6_addr32[2]; + match.s6_addr32[3] &= mask.s6_addr32[3]; + + /* if you set extra bits, that's wrong */ + if (bcmp(&match, &sin6->sin6_addr, sizeof(match))) + return EINVAL; + + cmp = 1; + } else { + if (cmd == SIOCGLIFADDR) { + /* on getting an address, take the 1st match */ + cmp = 0; /* XXX */ + } else { + /* on deleting an address, do exact match */ + in6_prefixlen2mask(&mask, 128); + sin6 = (struct sockaddr_in6 *)&iflr->addr; + bcopy(&sin6->sin6_addr, &match, sizeof(match)); + + cmp = 1; + } + } + + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (!cmp) + break; + + /* + * XXX: this is adhoc, but is necessary to allow + * a user to specify fe80::/64 (not /10) for a + * link-local address. + */ + bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate)); + in6_clearscope(&candidate); + candidate.s6_addr32[0] &= mask.s6_addr32[0]; + candidate.s6_addr32[1] &= mask.s6_addr32[1]; + candidate.s6_addr32[2] &= mask.s6_addr32[2]; + candidate.s6_addr32[3] &= mask.s6_addr32[3]; + if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) + break; + } + if (ifa != NULL) + ifa_ref(ifa); + IF_ADDR_RUNLOCK(ifp); + if (!ifa) + return EADDRNOTAVAIL; + ia = ifa2ia6(ifa); + + if (cmd == SIOCGLIFADDR) { + int error; + + /* fill in the if_laddrreq structure */ + bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len); + error = sa6_recoverscope( + (struct sockaddr_in6 *)&iflr->addr); + if (error != 0) { + ifa_free(ifa); + return (error); + } + + if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { + bcopy(&ia->ia_dstaddr, &iflr->dstaddr, + ia->ia_dstaddr.sin6_len); + error = sa6_recoverscope( + (struct sockaddr_in6 *)&iflr->dstaddr); + if (error != 0) { + ifa_free(ifa); + return (error); + } + } else + bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); + + iflr->prefixlen = + in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); + + iflr->flags = ia->ia6_flags; /* XXX */ + ifa_free(ifa); + + return 0; + } else { + struct in6_aliasreq ifra; + + /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ + bzero(&ifra, sizeof(ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, + sizeof(ifra.ifra_name)); + + bcopy(&ia->ia_addr, &ifra.ifra_addr, + ia->ia_addr.sin6_len); + if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { + bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, + ia->ia_dstaddr.sin6_len); + } else { + bzero(&ifra.ifra_dstaddr, + sizeof(ifra.ifra_dstaddr)); + } + bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, + ia->ia_prefixmask.sin6_len); + + ifra.ifra_flags = ia->ia6_flags; + ifa_free(ifa); + return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra, + ifp, td); + } + } + } + + return EOPNOTSUPP; /* just for safety */ +} + +/* + * Initialize an interface's IPv6 address and routing table entry. + */ +static int +in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, + struct sockaddr_in6 *sin6, int newhost) +{ + int error = 0, plen, ifacount = 0; + int s = splimp(); + struct ifaddr *ifa; + + /* + * Give the interface a chance to initialize + * if this is its first address, + * and to validate the address if necessary. + */ + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ifacount++; + } + IF_ADDR_RUNLOCK(ifp); + + ia->ia_addr = *sin6; + + if (ifacount <= 1 && ifp->if_ioctl) { + error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); + if (error) { + splx(s); + return (error); + } + } + splx(s); + + ia->ia_ifa.ifa_metric = ifp->if_metric; + + /* we could do in(6)_socktrim here, but just omit it at this moment. */ + + /* + * Special case: + * If a new destination address is specified for a point-to-point + * interface, install a route to the destination as an interface + * direct route. + * XXX: the logic below rejects assigning multiple addresses on a p2p + * interface that share the same destination. + */ + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ + if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && + ia->ia_dstaddr.sin6_family == AF_INET6) { + int rtflags = RTF_UP | RTF_HOST; + error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags); + if (error) + return (error); + ia->ia_flags |= IFA_ROUTE; + /* + * Handle the case for ::1 . + */ + if (ifp->if_flags & IFF_LOOPBACK) + ia->ia_flags |= IFA_RTSELF; + } + + /* + * add a loopback route to self + */ + if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) { + error = ifa_add_loopback_route((struct ifaddr *)ia, + (struct sockaddr *)&ia->ia_addr); + if (error == 0) + ia->ia_flags |= IFA_RTSELF; + } + + /* Add local address to lltable, if necessary (ex. on p2p link). */ + if (newhost) + in6_ifaddloop(&(ia->ia_ifa)); + + return (error); +} + +/* + * Find an IPv6 interface link-local address specific to an interface. + * ifaddr is returned referenced. + */ +struct in6_ifaddr * +in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) +{ + struct ifaddr *ifa; + + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) { + if ((((struct in6_ifaddr *)ifa)->ia6_flags & + ignoreflags) != 0) + continue; + ifa_ref(ifa); + break; + } + } + IF_ADDR_RUNLOCK(ifp); + + return ((struct in6_ifaddr *)ifa); +} + + +/* + * find the internet address corresponding to a given interface and address. + * ifaddr is returned referenced. + */ +struct in6_ifaddr * +in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr) +{ + struct ifaddr *ifa; + + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) { + ifa_ref(ifa); + break; + } + } + IF_ADDR_RUNLOCK(ifp); + + return ((struct in6_ifaddr *)ifa); +} + +/* + * Convert IP6 address to printable (loggable) representation. Caller + * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long. + */ +static char digits[] = "0123456789abcdef"; +char * +ip6_sprintf(char *ip6buf, const struct in6_addr *addr) +{ + int i, cnt = 0, maxcnt = 0, idx = 0, index = 0; + char *cp; + const u_int16_t *a = (const u_int16_t *)addr; + const u_int8_t *d; + int dcolon = 0, zero = 0; + + cp = ip6buf; + + for (i = 0; i < 8; i++) { + if (*(a + i) == 0) { + cnt++; + if (cnt == 1) + idx = i; + } + else if (maxcnt < cnt) { + maxcnt = cnt; + index = idx; + cnt = 0; + } + } + if (maxcnt < cnt) { + maxcnt = cnt; + index = idx; + } + + for (i = 0; i < 8; i++) { + if (dcolon == 1) { + if (*a == 0) { + if (i == 7) + *cp++ = ':'; + a++; + continue; + } else + dcolon = 2; + } + if (*a == 0) { + if (dcolon == 0 && *(a + 1) == 0 && i == index) { + if (i == 0) + *cp++ = ':'; + *cp++ = ':'; + dcolon = 1; + } else { + *cp++ = '0'; + *cp++ = ':'; + } + a++; + continue; + } + d = (const u_char *)a; + /* Try to eliminate leading zeros in printout like in :0001. */ + zero = 1; + *cp = digits[*d >> 4]; + if (*cp != '0') { + zero = 0; + cp++; + } + *cp = digits[*d++ & 0xf]; + if (zero == 0 || (*cp != '0')) { + zero = 0; + cp++; + } + *cp = digits[*d >> 4]; + if (zero == 0 || (*cp != '0')) { + zero = 0; + cp++; + } + *cp++ = digits[*d & 0xf]; + *cp++ = ':'; + a++; + } + *--cp = '\0'; + return (ip6buf); +} + +int +in6_localaddr(struct in6_addr *in6) +{ + struct in6_ifaddr *ia; + + if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) + return 1; + + IN6_IFADDR_RLOCK(); + OFP_TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, + &ia->ia_prefixmask.sin6_addr)) { + IN6_IFADDR_RUNLOCK(); + return 1; + } + } + IN6_IFADDR_RUNLOCK(); + + return (0); +} + +/* + * Return 1 if an internet address is for the local host and configured + * on one of its interfaces. + */ +int +in6_localip(struct in6_addr *in6) +{ + struct in6_ifaddr *ia; + + IN6_IFADDR_RLOCK(); + OFP_TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) { + IN6_IFADDR_RUNLOCK(); + return (1); + } + } + IN6_IFADDR_RUNLOCK(); + return (0); +} + + +int +in6_is_addr_deprecated(struct sockaddr_in6 *sa6) +{ + struct in6_ifaddr *ia; + + IN6_IFADDR_RLOCK(); + OFP_TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, + &sa6->sin6_addr) && + (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) { + IN6_IFADDR_RUNLOCK(); + return (1); /* true */ + } + + /* XXX: do we still have to go thru the rest of the list? */ + } + IN6_IFADDR_RUNLOCK(); + + return (0); /* false */ +} + +/* + * return length of part which dst and src are equal + * hard coding... + */ +int +in6_matchlen(struct in6_addr *src, struct in6_addr *dst) +{ + int match = 0; + u_char *s = (u_char *)src, *d = (u_char *)dst; + u_char *lim = s + 16, r; + + while (s < lim) + if ((r = (*d++ ^ *s++)) != 0) { + while (r < 128) { + match++; + r <<= 1; + } + break; + } else + match += 8; + return match; +} + +/* XXX: to be scope conscious */ +int +in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len) +{ + int bytelen, bitlen; + + /* sanity check */ + if (0 > len || len > 128) { + log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n", + len); + return (0); + } + + bytelen = len / 8; + bitlen = len % 8; + + if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) + return (0); + if (bitlen != 0 && + p1->s6_addr[bytelen] >> (8 - bitlen) != + p2->s6_addr[bytelen] >> (8 - bitlen)) + return (0); + + return (1); +} + +void +in6_prefixlen2mask(struct in6_addr *maskp, int len) +{ + u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; + int bytelen, bitlen, i; + + /* sanity check */ + if (0 > len || len > 128) { + log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n", + len); + return; + } + + bzero(maskp, sizeof(*maskp)); + bytelen = len / 8; + bitlen = len % 8; + for (i = 0; i < bytelen; i++) + maskp->s6_addr[i] = 0xff; + if (bitlen) + maskp->s6_addr[bytelen] = maskarray[bitlen - 1]; +} + +/* + * return the best address out of the same scope. if no address was + * found, return the first valid address from designated IF. + */ +struct in6_ifaddr * +in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) +{ + int dst_scope = in6_addrscope(dst), blen = -1, tlen; + struct ifaddr *ifa; + struct in6_ifaddr *besta = 0; + struct in6_ifaddr *dep[2]; /* last-resort: deprecated */ + + dep[0] = dep[1] = NULL; + + /* + * We first look for addresses in the same scope. + * If there is one, return it. + * If two or more, return one which matches the dst longest. + * If none, return one of global addresses assigned other ifs. + */ + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) + continue; /* XXX: is there any case to allow anycast? */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) + continue; /* don't use this interface */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { + if (V_ip6_use_deprecated) + dep[0] = (struct in6_ifaddr *)ifa; + continue; + } + + if (dst_scope == in6_addrscope(IFA_IN6(ifa))) { + /* + * call in6_matchlen() as few as possible + */ + if (besta) { + if (blen == -1) + blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst); + tlen = in6_matchlen(IFA_IN6(ifa), dst); + if (tlen > blen) { + blen = tlen; + besta = (struct in6_ifaddr *)ifa; + } + } else + besta = (struct in6_ifaddr *)ifa; + } + } + if (besta) { + ifa_ref(&besta->ia_ifa); + IF_ADDR_RUNLOCK(ifp); + return (besta); + } + + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) + continue; /* XXX: is there any case to allow anycast? */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) + continue; /* don't use this interface */ + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) + continue; + if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { + if (V_ip6_use_deprecated) + dep[1] = (struct in6_ifaddr *)ifa; + continue; + } + + if (ifa != NULL) + ifa_ref(ifa); + IF_ADDR_RUNLOCK(ifp); + return (struct in6_ifaddr *)ifa; + } + + /* use the last-resort values, that are, deprecated addresses */ + if (dep[0]) { + ifa_ref((struct ifaddr *)dep[0]); + IF_ADDR_RUNLOCK(ifp); + return dep[0]; + } + if (dep[1]) { + ifa_ref((struct ifaddr *)dep[1]); + IF_ADDR_RUNLOCK(ifp); + return dep[1]; + } + + IF_ADDR_RUNLOCK(ifp); + return NULL; +} + +/* + * perform DAD when interface becomes IFF_UP. + */ +void +in6_if_up(struct ifnet *ifp) +{ + struct ifaddr *ifa; + struct in6_ifaddr *ia; + + IF_ADDR_RLOCK(ifp); + OFP_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ia = (struct in6_ifaddr *)ifa; + if (ia->ia6_flags & IN6_IFF_TENTATIVE) { + /* + * The TENTATIVE flag was likely set by hand + * beforehand, implicitly indicating the need for DAD. + * We may be able to skip the random delay in this + * case, but we impose delays just in case. + */ + nd6_dad_start(ifa, + arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz)); + } + } + IF_ADDR_RUNLOCK(ifp); + + /* + * special cases, like 6to4, are handled in in6_ifattach + */ + in6_ifattach(ifp, NULL); +} + +int +in6if_do_dad(struct ifnet *ifp) +{ + if ((ifp->if_flags & IFF_LOOPBACK) != 0) + return (0); + + if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) + return (0); + + switch (ifp->if_type) { +#ifdef IFT_DUMMY + case IFT_DUMMY: +#endif + case IFT_FAITH: + /* + * These interfaces do not have the IFF_LOOPBACK flag, + * but loop packets back. We do not have to do DAD on such + * interfaces. We should even omit it, because loop-backed + * NS would confuse the DAD procedure. + */ + return (0); + default: + /* + * Our DAD routine requires the interface up and running. + * However, some interfaces can be up before the RUNNING + * status. Additionaly, users may try to assign addresses + * before the interface becomes up (or running). + * We simply skip DAD in such a case as a work around. + * XXX: we should rather mark "tentative" on such addresses, + * and do DAD after the interface becomes ready. + */ + if (!((ifp->if_flags & IFF_UP) && + (ifp->if_drv_flags & IFF_DRV_RUNNING))) + return (0); + + return (1); + } +} + +/* + * Calculate max IPv6 MTU through all the interfaces and store it + * to in6_maxmtu. + */ +void +in6_setmaxmtu(void) +{ + unsigned long maxmtu = 0; + struct ifnet *ifp; + + IFNET_RLOCK_NOSLEEP(); + OFP_TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + /* this function can be called during ifnet initialization */ + if (!ifp->if_afdata[AF_INET6]) + continue; + if ((ifp->if_flags & IFF_LOOPBACK) == 0 && + IN6_LINKMTU(ifp) > maxmtu) + maxmtu = IN6_LINKMTU(ifp); + } + IFNET_RUNLOCK_NOSLEEP(); + if (maxmtu) /* update only when maxmtu is positive */ + V_in6_maxmtu = maxmtu; +} + +/* + * Provide the length of interface identifiers to be used for the link attached + * to the given interface. The length should be defined in "IPv6 over + * xxx-link" document. Note that address architecture might also define + * the length for a particular set of address prefixes, regardless of the + * link type. As clarified in rfc2462bis, those two definitions should be + * consistent, and those really are as of August 2004. + */ +int +in6_if2idlen(struct ifnet *ifp) +{ + switch (ifp->if_type) { + case IFT_ETHER: /* RFC2464 */ +#ifdef IFT_PROPVIRTUAL + case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */ +#endif +#ifdef IFT_L2VLAN + case IFT_L2VLAN: /* ditto */ +#endif +#ifdef IFT_IEEE80211 + case IFT_IEEE80211: /* ditto */ +#endif +#ifdef IFT_MIP + case IFT_MIP: /* ditto */ +#endif + case IFT_INFINIBAND: + return (64); + case IFT_FDDI: /* RFC2467 */ + return (64); + case IFT_ISO88025: /* RFC2470 (IPv6 over Token Ring) */ + return (64); + case IFT_PPP: /* RFC2472 */ + return (64); + case IFT_ARCNET: /* RFC2497 */ + return (64); + case IFT_FRELAY: /* RFC2590 */ + return (64); + case IFT_IEEE1394: /* RFC3146 */ + return (64); + case IFT_GIF: + return (64); /* draft-ietf-v6ops-mech-v2-07 */ + case IFT_LOOP: + return (64); /* XXX: is this really correct? */ + default: + /* + * Unknown link type: + * It might be controversial to use the today's common constant + * of 64 for these cases unconditionally. For full compliance, + * we should return an error in this case. On the other hand, + * if we simply miss the standard for the link type or a new + * standard is defined for a new link type, the IFID length + * is very likely to be the common constant. As a compromise, + * we always use the constant, but make an explicit notice + * indicating the "unknown" case. + */ + printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type); + return (64); + } +} + +#include + +struct in6_llentry { + struct llentry base; + struct sockaddr_in6 l3_addr6; +}; + +static struct llentry * +in6_lltable_new(const struct sockaddr *l3addr, u_int flags) +{ + struct in6_llentry *lle; + + lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, + M_DONTWAIT | M_ZERO); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr; + lle->base.lle_refcnt = 1; + LLE_LOCK_INIT(&lle->base); + callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock, + CALLOUT_RETURNUNLOCKED); + + return &lle->base; +} + +/* + * Deletes an address from the address table. + * This function is called by the timer functions + * such as arptimer() and nd6_llinfo_timer(), and + * the caller does the locking. + */ +static void +in6_lltable_free(struct lltable *llt, struct llentry *lle) +{ + LLE_WUNLOCK(lle); + LLE_LOCK_DESTROY(lle); + free(lle, M_LLTABLE); +} + +static void +in6_lltable_prefix_free(struct lltable *llt, + const struct sockaddr *prefix, + const struct sockaddr *mask, + u_int flags) +{ + const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix; + const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask; + struct llentry *lle, *next; + register int i; + + /* + * (flags & LLE_STATIC) means deleting all entries + * including static ND6 entries + */ + for (i=0; i < LLTBL_HASHTBL_SIZE; i++) { + OFP_LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { + if (IN6_ARE_MASKED_ADDR_EQUAL( + &((struct sockaddr_in6 *)L3_ADDR(lle))->sin6_addr, + &pfx->sin6_addr, + &msk->sin6_addr) && + ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { + int canceled; + + canceled = callout_drain(&lle->la_timer); + LLE_WLOCK(lle); + if (canceled) + LLE_REMREF(lle); + llentry_free(lle); + } + } + } +} + +static int +in6_lltable_rtcheck(struct ifnet *ifp, + u_int flags, + const struct sockaddr *l3addr) +{ + struct rtentry *rt; + char ip6buf[INET6_ADDRSTRLEN]; + + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + /* Our local addresses are always only installed on the default FIB. */ + /* XXX rtalloc1 should take a const param */ + rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0, + RT_DEFAULT_FIB); + if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { + struct ifaddr *ifa; + /* + * Create an ND6 cache for an IPv6 neighbor + * that is not covered by our own prefix. + */ + /* XXX ifaof_ifpforaddr should take a const param */ + ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp); + if (ifa != NULL) { + ifa_free(ifa); + if (rt != NULL) + RTFREE_LOCKED(rt); + return 0; + } + log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", + ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr)); + if (rt != NULL) + RTFREE_LOCKED(rt); + return EINVAL; + } + RTFREE_LOCKED(rt); + return 0; +} + +static struct llentry * +in6_lltable_lookup(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + struct llentries *lleh; + u_int hashkey; + + IF_AFDATA_LOCK_ASSERT(ifp); + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + hashkey = sin6->sin6_addr.s6_addr32[3]; + lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; + OFP_LIST_FOREACH(lle, lleh, lle_next) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle); + if (lle->la_flags & LLE_DELETED) + continue; + if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr, + sizeof(struct in6_addr)) == 0) + break; + } + + if (lle == NULL) { + if (!(flags & LLE_CREATE)) + return (NULL); + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in6_lltable_rtcheck(ifp, flags, l3addr) != 0) + return NULL; + + lle = in6_lltable_new(l3addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return NULL; + } + lle->la_flags = flags & ~LLE_CREATE; + if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { + bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); + lle->la_flags |= (LLE_VALID | LLE_STATIC); + } + + lle->lle_tbl = llt; + lle->lle_head = lleh; + OFP_LIST_INSERT_HEAD(lleh, lle, lle_next); + } else if (flags & LLE_DELETE) { + if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { + LLE_WLOCK(lle); + lle->la_flags = LLE_DELETED; + LLE_WUNLOCK(lle); +#ifdef DIAGNOSTIC + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + } + lle = (void *)-1; + } + if (LLE_IS_VALID(lle)) { + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + } + return (lle); +} + +static int +in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) +{ + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + /* XXX stack use */ + struct { + struct rt_msghdr rtm; + struct sockaddr_in6 sin6; + /* + * ndp.c assumes that sdl is word aligned + */ +#ifdef __LP64__ + uint32_t pad; +#endif + struct sockaddr_dl sdl; + } ndpc; + int i, error; + + if (ifp->if_flags & IFF_LOOPBACK) + return 0; + + LLTABLE_LOCK_ASSERT(); + + error = 0; + for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { + OFP_LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { + struct sockaddr_dl *sdl; + + /* skip deleted or invalid entries */ + if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) + continue; + /* Skip if jailed and not a valid IP of the prison. */ + if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) + continue; + /* + * produce a msg made of: + * struct rt_msghdr; + * struct sockaddr_in6 (IPv6) + * struct sockaddr_dl; + */ + bzero(&ndpc, sizeof(ndpc)); + ndpc.rtm.rtm_msglen = sizeof(ndpc); + ndpc.rtm.rtm_version = RTM_VERSION; + ndpc.rtm.rtm_type = RTM_GET; + ndpc.rtm.rtm_flags = RTF_UP; + ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; + ndpc.sin6.sin6_family = AF_INET6; + ndpc.sin6.sin6_len = sizeof(ndpc.sin6); + bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle)); + + /* publish */ + if (lle->la_flags & LLE_PUB) + ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; + + sdl = &ndpc.sdl; + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(*sdl); + sdl->sdl_alen = ifp->if_addrlen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + ndpc.rtm.rtm_rmx.rmx_expire = + lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); + if (lle->la_flags & LLE_STATIC) + ndpc.rtm.rtm_flags |= RTF_STATIC; + ndpc.rtm.rtm_index = ifp->if_index; + error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); + if (error) + break; + } + } + return error; +} + +void * +in6_domifattach(struct ifnet *ifp) +{ + struct in6_ifextra *ext; + + ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK); + bzero(ext, sizeof(*ext)); + + ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat), + M_IFADDR, M_WAITOK); + bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat)); + + ext->icmp6_ifstat = + (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat), + M_IFADDR, M_WAITOK); + bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat)); + + ext->nd_ifinfo = nd6_ifattach(ifp); + ext->scope6_id = scope6_ifattach(ifp); + ext->lltable = lltable_init(ifp, AF_INET6); + if (ext->lltable != NULL) { + ext->lltable->llt_free = in6_lltable_free; + ext->lltable->llt_prefix_free = in6_lltable_prefix_free; + ext->lltable->llt_lookup = in6_lltable_lookup; + ext->lltable->llt_dump = in6_lltable_dump; + } + + ext->mld_ifinfo = mld_domifattach(ifp); + + return ext; +} + +void +in6_domifdetach(struct ifnet *ifp, void *aux) +{ + struct in6_ifextra *ext = (struct in6_ifextra *)aux; + + mld_domifdetach(ifp); + scope6_ifdetach(ext->scope6_id); + nd6_ifdetach(ext->nd_ifinfo); + lltable_free(ext->lltable); + free(ext->in6_ifstat, M_IFADDR); + free(ext->icmp6_ifstat, M_IFADDR); + free(ext, M_IFADDR); +} +#endif +/* + * Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be + * v4 mapped addr or v4 compat addr + */ +void +ofp_in6_sin6_2_sin(struct ofp_sockaddr_in *sin, struct ofp_sockaddr_in6 *sin6) +{ + + bzero(sin, sizeof(*sin)); + sin->sin_len = sizeof(struct ofp_sockaddr_in); + sin->sin_family = OFP_AF_INET; + sin->sin_port = sin6->sin6_port; + sin->sin_addr.s_addr = sin6->sin6_addr.ofp_s6_addr32[3]; +} + +/* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */ +void +ofp_in6_sin_2_v4mapsin6(struct ofp_sockaddr_in *sin, struct ofp_sockaddr_in6 *sin6) +{ + bzero(sin6, sizeof(*sin6)); + sin6->sin6_len = sizeof(struct ofp_sockaddr_in6); + sin6->sin6_family = OFP_AF_INET6; + sin6->sin6_port = sin->sin_port; + sin6->sin6_addr.ofp_s6_addr32[0] = 0; + sin6->sin6_addr.ofp_s6_addr32[1] = 0; + sin6->sin6_addr.ofp_s6_addr32[2] = OFP_IPV6_ADDR_INT32_SMP; + sin6->sin6_addr.ofp_s6_addr32[3] = sin->sin_addr.s_addr; +} + +/* Convert sockaddr_in6 into sockaddr_in. */ +void +ofp_in6_sin6_2_sin_in_sock(struct ofp_sockaddr *nam) +{ + struct ofp_sockaddr_in *sin_p; + struct ofp_sockaddr_in6 sin6; + + /* + * Save original sockaddr_in6 addr and convert it + * to sockaddr_in. + */ + sin6 = *(struct ofp_sockaddr_in6 *)nam; + sin_p = (struct ofp_sockaddr_in *)nam; + ofp_in6_sin6_2_sin(sin_p, &sin6); +} + +#if 0 +/* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */ +void +in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) +{ + struct sockaddr_in *sin_p; + struct sockaddr_in6 *sin6_p; + + sin6_p = malloc(sizeof *sin6_p, M_SONAME, + M_WAITOK); + sin_p = (struct sockaddr_in *)*nam; + in6_sin_2_v4mapsin6(sin_p, sin6_p); + free(*nam, M_SONAME); + *nam = (struct sockaddr *)sin6_p; +} +#endif /*0*/ + +uint32_t +ofp_ip6_randomid(void) +{ + uint32_t result = 0; + + odp_random_data((uint8_t *)&result, sizeof(result), 0); + + return result; +} + +uint32_t +ofp_ip6_randomflowlabel(void) +{ + uint32_t result = 0; + + odp_random_data((uint8_t *)&result, sizeof(result), 0); + + return result & 0xfffff; +} + +int +ofp_in6_selectsrc(struct ofp_sockaddr_in6 *dstsock, void *opts, + struct inpcb *inp, void *ro, struct ofp_ucred *cred, + struct ofp_ifnet **ifpp, struct ofp_in6_addr *srcp) +{ + struct ofp_nh6_entry* nh; + struct ofp_ifnet *ifp = NULL; + + (void)opts; + (void)ro; + (void)cred; + + /* if interface is specified and has IPv6 address, just use it*/ + if(ifpp) { + if(*ifpp != NULL && ofp_ip6_is_set((*ifpp)->ip6_addr)) { + memcpy(srcp->ofp_s6_addr, (*ifpp)->ip6_addr, 16); + return 0; + } + *ifpp = NULL; + } + + /* + * if the socket has already bound the source, just use it. + */ + if (inp != NULL && !OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + memcpy(srcp, &inp->in6p_laddr, sizeof(*srcp)); + return (0); + } + + /* + * if destination is loopback then source is loopback too. + */ + if (OFP_IN6_IS_ADDR_LOOPBACK(&(dstsock->sin6_addr))) { + *srcp = dstsock->sin6_addr; + return 0; + } + + /* + * If the address is not specified, choose the best one based on + * the outgoing interface and the destination address. + */ + /* get the outgoing interface */ + + nh = ofp_get_next_hop6(0, dstsock->sin6_addr.ofp_s6_addr, NULL); + if (!nh) { + printf("route not found\n"); + return OFP_EHOSTUNREACH; + } + + ifp = ofp_get_ifnet(nh->port, nh->vlan); + if (ifp && ofp_ip6_is_set(ifp->ip6_addr)) { + memcpy(srcp->ofp_s6_addr, ifp->ip6_addr, 16); + if(ifpp) + *ifpp = ifp; + return 0; + } + + return OFP_EHOSTUNREACH; +} + +/* + * Return the scope identifier or zero. + */ +uint16_t +ofp_in6_getscope(struct ofp_in6_addr *in6) +{ + + if (OFP_IN6_IS_SCOPE_LINKLOCAL(in6) || + OFP_IN6_IS_ADDR_MC_INTFACELOCAL(in6)) + return (in6->ofp_s6_addr16[1]); + + return (0); +} + diff --git a/src/ofp_in6_cksum.c b/src/ofp_in6_cksum.c new file mode 100644 index 00000000..15e38b13 --- /dev/null +++ b/src/ofp_in6_cksum.c @@ -0,0 +1,184 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_cksum.c,v 1.10 2000/12/03 00:53:59 itojun Exp $ + */ + +/*- + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + + +#include "odp.h" +#include "ofpi_in.h" +#include "ofpi_in6.h" +#include "ofpi_ip6.h" +#include "ofpi_util.h" + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE do { \ + l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum); \ +} while (0) + +static int +_ofp_in6_cksum_pseudo(struct ofp_ip6_hdr *ip6, uint32_t len, + uint8_t nxt, uint16_t csum) +{ + int sum; + uint16_t scope = 0, *w; + + union { + uint16_t phs[4]; + struct __attribute__ ((__packed__)) { + uint32_t ph_len; + uint8_t ph_zero[3]; + uint8_t ph_nxt; + } ph; + } uph; + + sum = csum; + + /* + * First create IP6 pseudo header and calculate a summary. + */ + uph.ph.ph_len = odp_cpu_to_be_32(len); + uph.ph.ph_zero[0] = uph.ph.ph_zero[1] = uph.ph.ph_zero[2] = 0; + uph.ph.ph_nxt = nxt; + + /* Payload length and upper layer identifier. */ + sum += uph.phs[0]; sum += uph.phs[1]; + sum += uph.phs[2]; sum += uph.phs[3]; + + /* IPv6 source address. */ + scope = ofp_in6_getscope(&ip6->ip6_src); + w = (uint16_t *)&ip6->ip6_src; + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + if (scope != 0) + sum -= scope; + + /* IPv6 destination address. */ + scope = ofp_in6_getscope(&ip6->ip6_dst); + w = (uint16_t *)&ip6->ip6_dst; + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + if (scope != 0) + sum -= scope; + + return sum; +} + +int ofp_in6_cksum_pseudo(struct ofp_ip6_hdr *ip6, + uint32_t len, uint8_t nxt, uint16_t csum) +{ + int sum; + union { + u_int16_t s[2]; + u_int32_t l; + } l_util; + + sum = _ofp_in6_cksum_pseudo(ip6, len, nxt, csum); + REDUCE; + return sum; +} +int ofp_in6_cksum(odp_packet_t m, uint8_t nxt, uint32_t off, uint32_t len) +{ + int sum; + int tmp; + union { + uint16_t s[2]; + uint32_t l; + } l_util; + struct ofp_ip6_hdr *ip6 = odp_packet_l3_ptr(m, NULL); + +/*Pseudo header*/ + sum = _ofp_in6_cksum_pseudo(ip6, len, nxt, 0); + +/* Payload*/ + tmp = ofp_getsum(m, odp_packet_l3_offset(m) + + off, len); + sum += tmp; + + REDUCE; + return (~sum & 0xffff); +} +int ofp_ip6_cksum(odp_packet_t m, uint32_t len, uint8_t nxt, uint16_t csum) +{ + int sum; + int tmp; + union { + uint16_t s[2]; + uint32_t l; + } l_util; + struct ofp_ip6_hdr *ip6 = odp_packet_l3_ptr(m, NULL); + +/*Pseudo header*/ + sum = _ofp_in6_cksum_pseudo(ip6, len, nxt, csum); + +/* Payload*/ + tmp = ofp_getsum(m, odp_packet_l3_offset(m) + + sizeof(struct ofp_ip6_hdr), len); + sum += tmp; + + REDUCE; + return (~sum & 0xffff); +} diff --git a/src/ofp_in6_pcb.c b/src/ofp_in6_pcb.c new file mode 100644 index 00000000..2d184620 --- /dev/null +++ b/src/ofp_in6_pcb.c @@ -0,0 +1,1314 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 + */ + +#include +#include "ofpi_in.h" +#include "ofpi_in_pcb.h" +#include "ofpi_in6_pcb.h" +#include "ofpi_ip6.h" +#include "ofpi_systm.h" +#include "ofpi_socket.h" +#include "ofpi_socketvar.h" +#include "ofpi_tcp_var.h" +#include "ofpi_ip6_var.h" +#include "ofpi_portconf.h" +#include "ofpi_errno.h" + +#if 0 +#include +__FBSDID("$FreeBSD: release/9.1.0/sys/netinet6/in6_pcb.c 234279 2012-04-14 10:36:43Z glebius $"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" +#include "opt_pcbgroup.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct in6_addr zeroin6_addr; +#endif + +int +ofp_in6_pcbbind(register struct inpcb *inp, struct ofp_sockaddr *nam, + struct ofp_ucred *cred) +{ + + struct socket *so = inp->inp_socket; + struct ofp_sockaddr_in6 *sin6 = (struct ofp_sockaddr_in6 *)NULL; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + u_short lport = 0; + int error, lookupflags = 0; + int reuseport = (so->so_options & OFP_SO_REUSEPORT); + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + + if (OFP_TAILQ_EMPTY(ofp_get_ifaddr6head())) /* XXX broken! */ + return (OFP_EADDRNOTAVAIL); + + if (inp->inp_lport || !OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) + return (OFP_EINVAL); + if ((so->so_options & (OFP_SO_REUSEADDR|OFP_SO_REUSEPORT)) == 0) + lookupflags = INPLOOKUP_WILDCARD; + + if (nam == NULL) { +#if 0 + if ((error = prison_local_ip6(cred, &inp->in6p_laddr, + ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) + return (error); +#else + ; +#endif /*0*/ + } else { + sin6 = (struct ofp_sockaddr_in6 *)nam; + if (nam->sa_len != sizeof(*sin6)) + return (OFP_EINVAL); + /* + * family check. + */ + if (nam->sa_family != OFP_AF_INET6) + return (OFP_EAFNOSUPPORT); +#if 0 + if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) + return(error); + + if ((error = prison_local_ip6(cred, &sin6->sin6_addr, + ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) + return (error); +#endif + + lport = sin6->sin6_port; + +/* Bogdan: no multicast, no anycast*/ + if (OFP_IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + /* + * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; + * allow compepte duplication of binding if + * SO_REUSEPORT is set, or if SO_REUSEADDR is set + * and a multicast address is bound on both + * new and duplicated sockets. + */ + if (so->so_options & OFP_SO_REUSEADDR) + reuseport = OFP_SO_REUSEADDR|OFP_SO_REUSEPORT; + } else if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + struct ofp_ifnet * ifa; + + sin6->sin6_port = 0; /* yech... */ + + ifa = ofp_ifaddr6_elem_get((uint8_t *)&sin6->sin6_addr); + if (ifa == NULL && + (inp->inp_flags & INP_BINDANY) == 0) { + return (OFP_EADDRNOTAVAIL); + } +#if 0 +/*Bogdan: No ia6_flags, no anycast*/ + /* + * XXX: bind to an anycast address might accidentally + * cause sending a packet with anycast source address. + * We should allow to bind to a deprecated address, since + * the application dares to use it. + */ + if (ifa != NULL && + ((struct in6_ifaddr *)ifa)->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { + ifa_free(ifa); + return (OFP_EADDRNOTAVAIL); + } + if (ifa != NULL) + ifa_free(ifa); +#endif + } + + if (lport) { + struct inpcb *t; + struct tcptw *tw; + + /* GROSS */ +#if 0 +/*Bogdan: No credential check */ + if (odp_be_to_cpu_16(lport) <= V_ipport_reservedhigh && + odp_be_to_cpu_16(lport) >= V_ipport_reservedlow && + priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, + 0)) + return (OFP_EACCES); + + if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && + priv_check_cred(inp->inp_cred, + PRIV_NETINET_REUSEPORT, 0) != 0) { + t = ofp_in6_pcblookup_local(pcbinfo, + &sin6->sin6_addr, lport, + INPLOOKUP_WILDCARD, cred); + if (t && + ((t->inp_flags & INP_TIMEWAIT) == 0) && + (so->so_type != SOCK_STREAM || + IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && + (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || + (t->inp_flags2 & INP_REUSEPORT) == 0) && + (inp->inp_cred->cr_uid != + t->inp_cred->cr_uid)) + return (OFP_EADDRINUSE); + + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && + IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + struct sockaddr_in sin; + + in6_sin6_2_sin(&sin, sin6); + t = in_pcblookup_local(pcbinfo, + sin.sin_addr, lport, + INPLOOKUP_WILDCARD, cred); + if (t && + ((t->inp_flags & + INP_TIMEWAIT) == 0) && + (so->so_type != SOCK_STREAM || + ntohl(t->inp_faddr.s_addr) == + INADDR_ANY) && + (inp->inp_cred->cr_uid != + t->inp_cred->cr_uid)) + return (OFP_EADDRINUSE); + } + + } +#endif /* 0 */ + t = ofp_in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, + lport, lookupflags, cred); + if (t && (t->inp_flags & INP_TIMEWAIT)) { + /* + * XXXRW: If an incpb has had its timewait + * state recycled, we treat the address as + * being in use (for now). This is better + * than a panic, but not desirable. + */ + tw = intotw(t); + if (tw == NULL || + (reuseport & tw->tw_so_options) == 0) + return (OFP_EADDRINUSE); + } else if (t && (reuseport == 0 || + (t->inp_flags2 & INP_REUSEPORT) == 0)) { + return (OFP_EADDRINUSE); + } + + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && + OFP_IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + struct ofp_sockaddr_in sin; + + ofp_in6_sin6_2_sin(&sin, sin6); + t = ofp_in_pcblookup_local(pcbinfo, sin.sin_addr, + lport, lookupflags, cred); + if (t && t->inp_flags & INP_TIMEWAIT) { + tw = intotw(t); + if (tw == NULL) + return (OFP_EADDRINUSE); + if ((reuseport & tw->tw_so_options) == 0 + && (odp_be_to_cpu_32(t->inp_laddr.s_addr) != + OFP_INADDR_ANY || ((inp->inp_vflag & + INP_IPV6PROTO) == + (t->inp_vflag & INP_IPV6PROTO)))) + return (OFP_EADDRINUSE); + } else if (t && (reuseport == 0 || + (t->inp_flags2 & INP_REUSEPORT) == 0) && + (odp_be_to_cpu_32(t->inp_laddr.s_addr) != OFP_INADDR_ANY || + (t->inp_vflag & INP_IPV6PROTO) != 0)) + return (OFP_EADDRINUSE); + } + } + + inp->in6p_laddr = sin6->sin6_addr; + } + + if (lport == 0) { + if ((error = ofp_in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) { + /* Undo an address bind that may have occurred. */ + inp->in6p_laddr = ofp_in6addr_any; + return (error); + } + } else { + inp->inp_lport = lport; + if (ofp_in_pcbinshash(inp) != 0) { + inp->in6p_laddr = ofp_in6addr_any; + inp->inp_lport = 0; + return (OFP_EAGAIN); + } + } + + return (0); +} + + +/* + * Transform old in6_pcbconnect() into an inner subroutine for new + * in6_pcbconnect(): Do some validity-checking on the remote + * address (in mbuf 'nam') and then determine local host address + * (i.e., which interface) to use to access that remote host. + * + * This preserves definition of in6_pcbconnect(), while supporting a + * slightly different version for T/TCP. (This is more than + * a bit of a kludge, but cleaning up the internal interfaces would + * have forced minor changes in every protocol). + */ +int +ofp_in6_pcbladdr(register struct inpcb *inp, struct ofp_sockaddr *nam, + struct ofp_in6_addr *plocal_addr6) +{ + register struct ofp_sockaddr_in6 *sin6 = (struct ofp_sockaddr_in6 *)nam; + int error = 0; + struct ofp_ifnet *ifp = NULL; +#if 0 + int scope_ambiguous = 0; +#endif + struct ofp_in6_addr in6a; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ + + if (nam->sa_len != sizeof (*sin6)) + return (OFP_EINVAL); + if (sin6->sin6_family != OFP_AF_INET6) + return (OFP_EAFNOSUPPORT); + if (sin6->sin6_port == 0) + return (OFP_EADDRNOTAVAIL); + +#if 0 + if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) + scope_ambiguous = 1; + + if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) + return(error); +#endif /* 0 */ + + if (!OFP_TAILQ_EMPTY(ofp_get_ifaddr6head())) { + /* + * If the destination address is UNSPECIFIED addr, + * use the loopback addr, e.g ::1. + */ + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { +#if 1 + OFP_IFNET_LOCK_READ(ifaddr6_list); + memcpy(sin6->sin6_addr.ofp_s6_addr, OFP_TAILQ_FIRST(ofp_get_ifaddr6head())->ip6_addr, 16); + OFP_IFNET_UNLOCK_READ(ifaddr6_list); +#else + sin6->sin6_addr = ofp_in6addr_loopback; +#endif + } + } +#if 0 + if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) + return (error); +#endif + + error = ofp_in6_selectsrc(sin6, inp->in6p_outputopts, + inp, NULL, inp->inp_cred, &ifp, &in6a); + if (error) + return (error); + +#if 0 + if (ifp && scope_ambiguous && + (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { + return(error); + } +#endif + + /* + * Do not update this earlier, in case we return with an error. + * + * XXX: this in6_selectsrc result might replace the bound local + * address with the address specified by setsockopt(IPV6_PKTINFO). + * Is it the intended behavior? + */ + *plocal_addr6 = in6a; + + /* + * Don't do pcblookup call here; return interface in + * plocal_addr6 + * and exit to caller, that will do the lookup. + */ + + return (0); +} + +/* + * Outer subroutine: + * Connect from a socket to a specified address. + * Both address and port must be specified in argument sin. + * If don't have a local address for this socket yet, + * then pick one. + */ +int +ofp_in6_pcbconnect_mbuf(register struct inpcb *inp, struct ofp_sockaddr *nam, + struct ofp_ucred *cred, odp_packet_t m) +{ + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + register struct ofp_sockaddr_in6 *sin6 = + (struct ofp_sockaddr_in6 *)nam; + struct ofp_in6_addr addr6; + int error; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + + /* + * Call inner routine, to assign local interface address. + * in6_pcbladdr() may automatically fill in sin6_scope_id. + */ + if ((error = ofp_in6_pcbladdr(inp, nam, &addr6)) != 0) + return (error); + + if (ofp_in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, + sin6->sin6_port, + OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) + ? &addr6 : &inp->in6p_laddr, + inp->inp_lport, 0, NULL) != NULL) { + return (OFP_EADDRINUSE); + } + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (inp->inp_lport == 0) { + error = ofp_in6_pcbbind(inp, (struct ofp_sockaddr *)0, cred); + if (error) + return (error); + } + inp->in6p_laddr = addr6; + } + + inp->in6p_faddr = sin6->sin6_addr; + inp->inp_fport = sin6->sin6_port; + /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ + inp->inp_flow &= ~OFP_IPV6_FLOWLABEL_MASK; + + if (inp->inp_flags & IN6P_AUTOFLOWLABEL) + inp->inp_flow |= + (odp_cpu_to_be_32(ofp_ip6_randomflowlabel()) & + OFP_IPV6_FLOWLABEL_MASK); + ofp_in_pcbrehash_mbuf(inp, m); + return (0); +} + + +int +ofp_in6_pcbconnect(struct inpcb *inp, struct ofp_sockaddr *nam, struct ofp_ucred *cred) +{ + odp_packet_t m = ODP_PACKET_INVALID; + + return (ofp_in6_pcbconnect_mbuf(inp, nam, cred, m)); +} + + +void +ofp_in6_pcbdisconnect(struct inpcb *inp) +{ + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); + + bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); + inp->inp_fport = 0; + /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ + inp->inp_flow &= ~OFP_IPV6_FLOWLABEL_MASK; + ofp_in_pcbrehash(inp); +} + + +struct ofp_sockaddr * +ofp_in6_sockaddr(ofp_in_port_t port, struct ofp_in6_addr *addr_p) +{ + struct ofp_sockaddr_in6 *sin6; + + sin6 = malloc(sizeof (*sin6)); + bzero(sin6, sizeof *sin6); + sin6->sin6_family = OFP_AF_INET6; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_port = port; + sin6->sin6_addr = *addr_p; +#if 0 + (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ +#endif + return (struct ofp_sockaddr *)sin6; +} + +struct ofp_sockaddr * +ofp_in6_v4mapsin6_sockaddr(ofp_in_port_t port, struct ofp_in_addr *addr_p) +{ + struct ofp_sockaddr_in sin; + struct ofp_sockaddr_in6 *sin6_p; + + bzero(&sin, sizeof sin); + sin.sin_family = OFP_AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = port; + sin.sin_addr = *addr_p; + + sin6_p = malloc(sizeof (*sin6_p)); + + ofp_in6_sin_2_v4mapsin6(&sin, sin6_p); + + return (struct ofp_sockaddr *)sin6_p; +} + +#if 0 +int +in6_getsockaddr(struct socket *so, struct sockaddr **nam) +{ + register struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); + + INP_RLOCK(inp); + port = inp->inp_lport; + addr = inp->in6p_laddr; + INP_RUNLOCK(inp); + + *nam = in6_sockaddr(port, &addr); + return 0; +} + +int +in6_getpeeraddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); + + INP_RLOCK(inp); + port = inp->inp_fport; + addr = inp->in6p_faddr; + INP_RUNLOCK(inp); + + *nam = in6_sockaddr(port, &addr); + return 0; +} + +int +in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); + +#ifdef INET + if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { + error = in_getsockaddr(so, nam); + if (error == 0) + in6_sin_2_v4mapsin6_in_sock(nam); + } else +#endif + { + /* scope issues will be handled in in6_getsockaddr(). */ + error = in6_getsockaddr(so, nam); + } + + return error; +} + +int +in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); + +#ifdef INET + if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { + error = in_getpeeraddr(so, nam); + if (error == 0) + in6_sin_2_v4mapsin6_in_sock(nam); + } else +#endif + /* scope issues will be handled in in6_getpeeraddr(). */ + error = in6_getpeeraddr(so, nam); + + return error; +} + +/* + * Pass some notification to all connections of a protocol + * associated with address dst. The local address and/or port numbers + * may be specified to limit the search. The "usual action" will be + * taken, depending on the ctlinput cmd. The caller must filter any + * cmds that are uninteresting (e.g., no error in the map). + * Call the protocol specific routine (if any) to report + * any errors for each matching socket. + */ +void +in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, + u_int fport_arg, const struct sockaddr *src, u_int lport_arg, + int cmd, void *cmdarg, + struct inpcb *(*notify)(struct inpcb *, int)) +{ + struct inpcb *inp, *inp_temp; + struct sockaddr_in6 sa6_src, *sa6_dst; + u_short fport = fport_arg, lport = lport_arg; + u_int32_t flowinfo; + int errno; + + if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) + return; + + sa6_dst = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) + return; + + /* + * note that src can be NULL when we get notify by local fragmentation. + */ + sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; + flowinfo = sa6_src.sin6_flowinfo; + + /* + * Redirects go to all references to the destination, + * and use in6_rtchange to invalidate the route cache. + * Dead host indications: also use in6_rtchange to invalidate + * the cache, and deliver the error to all the sockets. + * Otherwise, if we have knowledge of the local port and address, + * deliver only to that socket. + */ + if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { + fport = 0; + lport = 0; + bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); + + if (cmd != PRC_HOSTDEAD) + notify = in6_rtchange; + } + errno = inet6ctlerrmap[cmd]; + INP_INFO_WLOCK(pcbinfo); + OFP_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { + INP_WLOCK(inp); + if ((inp->inp_vflag & INP_IPV6) == 0) { + INP_WUNLOCK(inp); + continue; + } + + /* + * If the error designates a new path MTU for a destination + * and the application (associated with this socket) wanted to + * know the value, notify. Note that we notify for all + * disconnected sockets if the corresponding application + * wanted. This is because some UDP applications keep sending + * sockets disconnected. + * XXX: should we avoid to notify the value to TCP sockets? + */ + if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && + (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { + ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, + (u_int32_t *)cmdarg); + } + + /* + * Detect if we should notify the error. If no source and + * destination ports are specifed, but non-zero flowinfo and + * local address match, notify the error. This is the case + * when the error is delivered with an encrypted buffer + * by ESP. Otherwise, just compare addresses and ports + * as usual. + */ + if (lport == 0 && fport == 0 && flowinfo && + inp->inp_socket != NULL && + flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) + goto do_notify; + else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, + &sa6_dst->sin6_addr) || + inp->inp_socket == 0 || + (lport && inp->inp_lport != lport) || + (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, + &sa6_src.sin6_addr)) || + (fport && inp->inp_fport != fport)) { + INP_WUNLOCK(inp); + continue; + } + + do_notify: + if (notify) { + if ((*notify)(inp, errno)) + INP_WUNLOCK(inp); + } else + INP_WUNLOCK(inp); + } + INP_INFO_WUNLOCK(pcbinfo); +} +#endif +/* + * Lookup a PCB based on the local address and port. Caller must hold the + * hash lock. No inpcb locks or references are acquired. + */ +struct inpcb * +ofp_in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct ofp_in6_addr *laddr, + u_short lport, int lookupflags, struct ofp_ucred *cred) +{ + register struct inpcb *inp; + int matchwild = 3, wildcard; + + KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + + INP_HASH_WLOCK_ASSERT(pcbinfo); + + if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { + struct inpcbhead *head; + /* + * Look for an unconnected (wildcard foreign addr) PCB that + * matches the local address and port we're looking for. + */ + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(OFP_INADDR_ANY, lport, + 0, pcbinfo->ipi_hashmask)]; + OFP_LIST_FOREACH(inp, head, inp_hash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && + OFP_IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_lport == lport) { + /* Found. */ + if (cred == NULL /*|| + prison_equal_ip6(cred->cr_prison, + inp->inp_cred->cr_prison)*/) + return (inp); + } + } + /* + * Not found. + */ + return (NULL); + } else { + struct inpcbporthead *porthash; + struct inpcbport *phd; + struct inpcb *match = NULL; + /* + * Best fit PCB lookup. + * + * First see if this local port is in use by looking on the + * port hash list. + */ + porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, + pcbinfo->ipi_porthashmask)]; + OFP_LIST_FOREACH(phd, porthash, phd_hash) { + if (phd->phd_port == lport) + break; + } + if (phd != NULL) { + /* + * Port is in use by one or more PCBs. Look for best + * fit. + */ + OFP_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + wildcard = 0; + if (cred != NULL /*&& + !prison_equal_ip6(cred->cr_prison, + inp->inp_cred->cr_prison)*/) + continue; + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + wildcard++; + if (!OFP_IN6_IS_ADDR_UNSPECIFIED( + &inp->in6p_laddr)) { + if (OFP_IN6_IS_ADDR_UNSPECIFIED(laddr)) + wildcard++; + else if (!OFP_IN6_ARE_ADDR_EQUAL( + &inp->in6p_laddr, laddr)) + continue; + } else { + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(laddr)) + wildcard++; + } + if (wildcard < matchwild) { + match = inp; + matchwild = wildcard; + if (matchwild == 0) + break; + } + } + } + return (match); + } +} +#if 0 +void +in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) +{ + struct inpcb *in6p; + struct ip6_moptions *im6o; + int i, gap; + + INP_INFO_RLOCK(pcbinfo); + OFP_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { + INP_WLOCK(in6p); + im6o = in6p->in6p_moptions; + if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { + /* + * Unselect the outgoing ifp for multicast if it + * is being detached. + */ + if (im6o->im6o_multicast_ifp == ifp) + im6o->im6o_multicast_ifp = NULL; + /* + * Drop multicast group membership if we joined + * through the interface being detached. + */ + gap = 0; + for (i = 0; i < im6o->im6o_num_memberships; i++) { + if (im6o->im6o_membership[i]->in6m_ifp == + ifp) { + in6_mc_leave(im6o->im6o_membership[i], + NULL); + gap++; + } else if (gap != 0) { + im6o->im6o_membership[i - gap] = + im6o->im6o_membership[i]; + } + } + im6o->im6o_num_memberships -= gap; + } + INP_WUNLOCK(in6p); + } + INP_INFO_RUNLOCK(pcbinfo); +} + +/* + * Check for alternatives when higher level complains + * about service problems. For now, invalidate cached + * routing information. If the route was created dynamically + * (by a redirect), time to try a default gateway again. + */ +void +in6_losing(struct inpcb *in6p) +{ + + /* + * We don't store route pointers in the routing table anymore + */ + return; +} + +/* + * After a routing change, flush old routing + * and allocate a (hopefully) better one. + */ +struct inpcb * +in6_rtchange(struct inpcb *inp, int errno) +{ + /* + * We don't store route pointers in the routing table anymore + */ + return inp; +} + +#ifdef PCBGROUP +/* + * Lookup PCB in hash list, using pcbgroup tables. + */ +static struct inpcb * +in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, + struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, + u_int lport_arg, int lookupflags, struct ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp, *tmpinp; + u_short fport = fport_arg, lport = lport_arg; + int faith; + + if (faithprefix_p != NULL) + faith = (*faithprefix_p)(laddr); + else + faith = 0; + + /* + * First look for an exact match. + */ + tmpinp = NULL; + INP_GROUP_LOCK(pcbgroup); + head = &pcbgroup->ipg_hashbase[ + INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, + pcbgroup->ipg_hashmask)]; + OFP_LIST_FOREACH(inp, head, inp_pcbgrouphash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with SO_REUSEPORT? + */ + if (prison_flag(inp->inp_cred, PR_IP6)) + goto found; + if (tmpinp == NULL) + tmpinp = inp; + } + } + if (tmpinp != NULL) { + inp = tmpinp; + goto found; + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; + struct inpcb *jail_wild = NULL; + int injail; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, + 0, pcbinfo->ipi_wildmask)]; + OFP_LIST_FOREACH(inp, head, inp_pcbgroup_wild) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + inp->inp_lport != lport) { + continue; + } + + /* XXX inp locking */ + if (faith && (inp->inp_flags & INP_FAITH) == 0) + continue; + + injail = prison_flag(inp->inp_cred, PR_IP6); + if (injail) { + if (prison_check_ip6(inp->inp_cred, + laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } + + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { + if (injail) + goto found; + else + local_exact = inp; + } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* OFP_LIST_FOREACH */ + + inp = jail_wild; + if (inp == NULL) + inp = jail_wild; + if (inp == NULL) + inp = local_exact; + if (inp == NULL) + inp = local_wild; + if (inp != NULL) + goto found; + } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ + INP_GROUP_UNLOCK(pcbgroup); + return (NULL); + +found: + in_pcbref(inp); + INP_GROUP_UNLOCK(pcbgroup); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("%s: locking buf", __func__); + return (inp); +} +#endif /* PCBGROUP */ +#endif + +/* + * XXX: this is borrowed from in6_pcbbind(). If possible, we should + * share this function by all *bsd*... + */ +int +ofp_in6_pcbsetport(struct ofp_in6_addr *laddr, struct inpcb *inp, struct ofp_ucred *cred) +{ + struct socket *so = inp->inp_socket; + u_int16_t lport = 0; + int error, lookupflags = 0; +#ifdef INVARIANTS + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; +#endif + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + + (void)laddr; +#if 0 + error = prison_local_ip6(cred, laddr, + ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); + if (error) + return(error); +#endif + /* XXX: this is redundant when called from in6_pcbbind */ + if ((so->so_options & (OFP_SO_REUSEADDR|OFP_SO_REUSEPORT)) == 0) + lookupflags = INPLOOKUP_WILDCARD; + + inp->inp_flags |= INP_ANONPORT; + + error = ofp_in_pcb_lport(inp, NULL, &lport, cred, lookupflags); + if (error != 0) + return (error); + + inp->inp_lport = lport; + if (ofp_in_pcbinshash(inp) != 0) { + inp->in6p_laddr = ofp_in6addr_any; + inp->inp_lport = 0; + return (OFP_EAGAIN); + } + + return (0); +} + +/* + * Lookup PCB in hash list. + */ +struct inpcb * +ofp_in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct ofp_in6_addr *faddr, + u_int fport_arg, struct ofp_in6_addr *laddr, u_int lport_arg, + int lookupflags, struct ofp_ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp; + + u_short fport = fport_arg, lport = lport_arg; + int faith = 0; + + (void)ifp; + KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + + INP_HASH_LOCK_ASSERT(pcbinfo); + + /* + * First look for an exact match. + */ + head = &pcbinfo->ipi_hashbase[ + INP_PCBHASH(faddr->ofp_s6_addr32[3] /* XXX */, lport, fport, + pcbinfo->ipi_hashmask)]; + + OFP_LIST_FOREACH(inp, head, inp_hash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + + if (OFP_IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && + OFP_IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with SO_REUSEPORT? + */ + return (inp); + } + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; + struct inpcb *jail_wild = NULL; + int injail = 0; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(OFP_INADDR_ANY, lport, + 0, pcbinfo->ipi_hashmask)]; + + OFP_LIST_FOREACH(inp, head, inp_hash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + inp->inp_lport != lport) { + continue; + } + + /* XXX inp locking */ + if (faith && (inp->inp_flags & INP_FAITH) == 0) + continue; +#if 0 + injail = prison_flag(inp->inp_cred, PR_IP6); + if (injail) { + if (prison_check_ip6(inp->inp_cred, + laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } +#endif + if (OFP_IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { + if (injail) + return (inp); + else + local_exact = inp; + } else if (OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* OFP_LIST_FOREACH */ + + if (jail_wild != NULL) + return (jail_wild); + if (local_exact != NULL) + return (local_exact); + if (local_wild != NULL) + return (local_wild); + } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ + + /* + * Not found. + */ + return (NULL); +} + +/* + * Lookup PCB in hash list, using pcbinfo tables. This variation locks the + * hash list lock, and will return the inpcb locked (i.e., requires + * INPLOOKUP_LOCKPCB). + */ +static struct inpcb * +in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct ofp_in6_addr *faddr, + u_int fport, struct ofp_in6_addr *laddr, u_int lport, int lookupflags, + struct ofp_ifnet *ifp) +{ + + struct inpcb *inp = NULL; + + INP_HASH_RLOCK(pcbinfo); + inp = ofp_in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, + (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); + if (inp != NULL) { + ofp_in_pcbref(inp); + INP_HASH_RUNLOCK(pcbinfo); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (ofp_in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (ofp_in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("locking bug"); + } else + INP_HASH_RUNLOCK(pcbinfo); + return (inp); +} + +/* + * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf + * from which a pre-calculated hash value may be extracted. + * + * Possibly more of this logic should be in in6_pcbgroup.c. + */ +struct inpcb * +ofp_in6_pcblookup(struct inpcbinfo *pcbinfo, struct ofp_in6_addr *faddr, u_int fport, + struct ofp_in6_addr *laddr, u_int lport, int lookupflags, struct ofp_ifnet *ifp) +{ + +#if 0 /*defined(PCBGROUP)*/ + struct inpcbgroup *pcbgroup; +#endif + + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + +#if 0 /*defined(PCBGROUP)*/ + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif + + return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} + + +struct inpcb * +ofp_in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct ofp_in6_addr *faddr, + u_int fport, struct ofp_in6_addr *laddr, u_int lport, int lookupflags, + struct ofp_ifnet *ifp, odp_packet_t m) +{ + (void)m; + +#if 0 /*def PCBGROUP*/ + struct inpcbgroup *pcbgroup; +#endif + + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + +#if 0 /*def PCBGROUP*/ + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid); + if (pcbgroup != NULL) + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, + fport, laddr, lport, lookupflags, ifp)); + pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif + + return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} + +void +ofp_init_sin6(struct ofp_sockaddr_in6 *sin6, odp_packet_t pkt) +{ + struct ofp_ip6_hdr *ip; + + ip = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(pkt, NULL); + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = OFP_AF_INET6; + sin6->sin6_addr = ip->ip6_src; + + /*(void)sa6_recoverscope(sin6);*/ /* XXX: should catch errors... */ + + return; +} + diff --git a/src/ofp_in6_proto.c b/src/ofp_in6_proto.c new file mode 100644 index 00000000..dcb4c72d --- /dev/null +++ b/src/ofp_in6_proto.c @@ -0,0 +1,224 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 + */ + +#include "ofpi_in.h" +#include "ofpi_in_pcb.h" +#include "ofpi_protosw.h" +#include "ofpi_tcp_var.h" +#include "ofpi_ip6protosw.h" +#include "ofpi_udp6_var.h" +#include "ofpi_tcp6_var.h" +#include "ofpi_domain.h" +#include "ofpi_ip6.h" +#include "ofpi_ip6_var.h" +#include "ofpi_socket.h" +#include "ofpi_icmp6.h" + +/* + * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. + */ + +extern struct pr_usrreqs nousrreqs; + +#define PR_LISTEN 0 +#define PR_ABRTACPTDIS 0 + +/* Spacer for loadable protocols. */ +#define IP6PROTOSPACER \ +{ \ + .pr_domain = &ofp_inet6domain, \ + .pr_protocol = PROTO_SPACER, \ + .pr_usrreqs = &nousrreqs \ +} + +struct ip6protosw ofp_inet6sw[] = { +{ + .pr_type = 0, + .pr_domain = &ofp_inet6domain, + .pr_protocol = OFP_IPPROTO_IPV6, + .pr_init = ofp_ip6_init, +#ifdef VIMAGE + .pr_destroy = ofp_ip6_destroy, +#endif + .pr_input = ofp_ip6_input, + .pr_usrreqs = &nousrreqs +}, +{ + .pr_type = OFP_SOCK_DGRAM, + .pr_domain = &ofp_inet6domain, + .pr_protocol = OFP_IPPROTO_UDP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ofp_udp6_input, + .pr_ctlinput = ofp_udp6_ctlinput, + .pr_ctloutput = NULL, /*ip6_ctloutput,*/ +#ifndef INET /* Do not call initialization twice. */ + .pr_init = ofp_udp_init, +#else + .pr_init = NULL, +#endif + .pr_usrreqs = &ofp_udp6_usrreqs, +}, +{ + .pr_type = OFP_SOCK_STREAM, + .pr_domain = &ofp_inet6domain, + .pr_protocol = OFP_IPPROTO_TCP, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN, + .pr_input = ofp_tcp6_input, +/* .pr_ctlinput = ofp_tcp6_ctlinput, + .pr_ctloutput = ofp_tcp_ctloutput,*/ +#ifndef INET /* don't call initialization and timeout routines twice */ + .pr_init = ofp_tcp_init, + .pr_slowtimo = ofp_tcp_slowtimo, +#else + .pr_init = NULL, + .pr_slowtimo = NULL, +#endif + .pr_drain = ofp_tcp_drain, + .pr_usrreqs = &ofp_tcp6_usrreqs, +}, +#ifdef SCTP +{ + .pr_type = SOCK_SEQPACKET, + .pr_domain = &ofp_inet6domain, + .pr_protocol = IPPROTO_SCTP, + .pr_flags = PR_WANTRCVD, + .pr_input = sctp6_input, + .pr_ctlinput = sctp6_ctlinput, + .pr_ctloutput = sctp_ctloutput, + .pr_drain = sctp_drain, +#ifndef INET /* Do not call initialization twice. */ + .pr_init = sctp_init, +#else + .pr_init = NULL, +#endif + .pr_usrreqs = &sctp6_usrreqs +}, +{ + .pr_type = SOCK_STREAM, + .pr_domain = &ofp_inet6domain, + .pr_protocol = IPPROTO_SCTP, + .pr_flags = PR_WANTRCVD, + .pr_input = sctp6_input, + .pr_ctlinput = sctp6_ctlinput, + .pr_ctloutput = sctp_ctloutput, + .pr_drain = sctp_drain, + .pr_usrreqs = &sctp6_usrreqs +}, +#endif /* SCTP */ +#ifdef RAW +{ + .pr_type = OFP_SOCK_RAW, + .pr_domain = &ofp_inet6domain, + .pr_protocol = OFP_IPPROTO_RAW, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = rip6_input, + .pr_output = rip6_output, + .pr_ctlinput = rip6_ctlinput, + .pr_ctloutput = rip6_ctloutput, +#ifndef INET /* Do not call initialization twice. */ + .pr_init = rip_init, +#else + .pr_init = NULL, +#endif + .pr_usrreqs = &rip6_usrreqs +}, +#endif +{ + .pr_type = OFP_SOCK_RAW, + .pr_domain = &ofp_inet6domain, + .pr_protocol = OFP_IPPROTO_ICMPV6, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = ofp_icmp6_input, +/* .pr_output = rip6_output, + .pr_ctlinput = rip6_ctlinput, + .pr_ctloutput = rip6_ctloutput, + .pr_fasttimo = icmp6_fasttimo, + .pr_slowtimo = icmp6_slowtimo,*/ + .pr_usrreqs = &nousrreqs +}, +{ + .pr_type = OFP_SOCK_RAW, + .pr_domain = &ofp_inet6domain, + .pr_protocol = OFP_IPPROTO_NONE, + .pr_init = NULL, + .pr_destroy = NULL, + .pr_input = ofp_ip6_none_input, + .pr_usrreqs = &nousrreqs +}, +}; + +struct domain ofp_inet6domain = { + .dom_family = OFP_AF_INET6, + .dom_name = "internet", + .dom_init = NULL, + .dom_protosw = (struct protosw *)ofp_inet6sw, + .dom_protoswNPROTOSW = (struct protosw *) + &ofp_inet6sw[sizeof(ofp_inet6sw) / + sizeof(ofp_inet6sw[0])], +}; + +VNET_DEFINE(int, ip6_v6only) = 1; +VNET_DEFINE(int, ip6_auto_flowlabel) = 1; +VNET_DEFINE(int, ip6_defhlim) = OFP_IPV6_DEFHLIM; + +VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */ +VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */ diff --git a/src/ofp_in_pcb.c b/src/ofp_in_pcb.c new file mode 100644 index 00000000..e471edde --- /dev/null +++ b/src/ofp_in_pcb.c @@ -0,0 +1,1633 @@ +/*- + * Copyright (c) 1982, 1986, 1991, 1993, 1995 + * The Regents of the University of California. + * Copyright (c) 2007-2009 Robert N. M. Watson + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include + +#include "ofpi_errno.h" +#include "ofpi_in.h" +#include "ofpi_in_pcb.h" +#include "ofpi_protosw.h" +#include "ofpi_socketvar.h" +#include "ofpi_tcp_var.h" +#include "ofpi_systm.h" +#include "ofpi_route.h" +#include "ofpi_ip6_var.h" +#ifdef INET6 +#include "ofpi_in6_pcb.h" +#endif /*INET6*/ + +#include "ofpi_pkt_processing.h" + +#include "ofpi_log.h" +#include "ofpi_util.h" + +#define HASH_NOWAIT 0x00000001 +#define HASH_WAITOK 0x00000002 +extern void *ofp_hashinit(int count, void *type, uint64_t *hashmask); +extern void *ofp_hashinit_flags(int elements, void *type, uint64_t *hashmask, int flags); +extern void *ofp_phashinit(int count, void *type, uint64_t *nentries); +extern void ofp_hashdestroy(void *vhashtbl, void *type, uint64_t hashmask); + +extern struct inpcbinfo ofp_udbinfo; + +static void in_pcbremlists(struct inpcb *inp); +static struct inpcb * +in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct ofp_in_addr faddr, + uint32_t fport_arg, struct ofp_in_addr laddr, + uint32_t lport_arg, int lookupflags, + struct ofp_ifnet *ifp); + +static __inline void +refcount_init(odp_atomic_u32_t *count, uint32_t value) +{ + odp_atomic_store_u32(count, value); +} + +static __inline void +refcount_acquire(odp_atomic_u32_t *count) +{ + odp_atomic_inc_u32(count); +} + +static __inline int +refcount_release(odp_atomic_u32_t *count) +{ + uint32_t old; + + old = odp_atomic_fetch_sub_u32(count, 1); + KASSERT(old > 0, ("negative refcount %p", count)); + return (old == 1); +} + +/* + * Initialize an inpcbinfo -- we should be able to reduce the number of + * arguments in time. + */ +void +ofp_in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, + struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, + const char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, + uint32_t inpcbzone_flags, uint32_t hashfields) +{ + /* make compiler happy */ + (void)name; + (void)inpcbzone_name; + (void)inpcbzone_init; + (void)inpcbzone_fini; + (void)inpcbzone_flags; + (void)hashfields; + + INP_INFO_LOCK_INIT(pcbinfo, name); + INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */ + pcbinfo->ipi_listhead = listhead; + OFP_LIST_INIT(pcbinfo->ipi_listhead); + pcbinfo->ipi_count = 0; + pcbinfo->ipi_hashbase = ofp_hashinit(hash_nelements, 0, + &pcbinfo->ipi_hashmask); + pcbinfo->ipi_porthashbase = ofp_hashinit(porthash_nelements, 0, + &pcbinfo->ipi_porthashmask); + + pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), + NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, + inpcbzone_flags); + uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); +} + +/* + * Destroy an inpcbinfo. + */ +void +ofp_in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) +{ + KASSERT(pcbinfo->ipi_count == 0, + ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count)); + + ofp_hashdestroy(pcbinfo->ipi_hashbase, 0, pcbinfo->ipi_hashmask); + ofp_hashdestroy(pcbinfo->ipi_porthashbase, 0, + pcbinfo->ipi_porthashmask); + uma_zdestroy(pcbinfo->ipi_zone); + /* INP_HASH_LOCK_DESTROY(pcbinfo); + INP_INFO_LOCK_DESTROY(pcbinfo);*/ +} + +void +ofp_in_pcbinfo_hashstats(struct inpcbinfo *pcbinfo, unsigned int *min, + unsigned int *avg, unsigned int *max) +{ + unsigned int bucket; + unsigned int bucket_count; + unsigned int occupied; + unsigned int lmin, lsum, lmax; + struct inpcb *inp; + struct inpcbhead *head; + + INP_HASH_WLOCK(pcbinfo); + + lmin = (unsigned int)-1; + lsum = 0; + lmax = 0; + occupied = 0; + + for (bucket = 0; bucket <= pcbinfo->ipi_hashmask; bucket++) { + + bucket_count = 0; + + head = &pcbinfo->ipi_hashbase[bucket]; + OFP_LIST_FOREACH(inp, head, inp_hash) { + bucket_count++; + } + + if (bucket_count < lmin) lmin = bucket_count; + + if (bucket_count > 0) { + lsum += bucket_count; + occupied++; + } + if (bucket_count > lmax) lmax = bucket_count; + } + + *min = lmin; + *avg = lsum / occupied; + *max = lmax; + + INP_HASH_WUNLOCK(pcbinfo); +} + +/* + * Allocate a PCB and associate it with the socket. + * On success return with the PCB locked. + */ +int +ofp_in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) +{ + struct inpcb *inp; + int error; + + INP_INFO_WLOCK_ASSERT(pcbinfo); + error = 0; + inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); + if (inp == NULL) + return (OFP_ENOBUFS); + bzero(inp, inp_zero_size); + inp->inp_pcbinfo = pcbinfo; + inp->inp_socket = so; + inp->inp_cred = so->so_cred; // HJo: ref inc removed + inp->inp_inc.inc_fibnum = so->so_fibnum; + inp->inp_options = ODP_PACKET_INVALID; +#ifdef INET6 + if (INP_SOCKAF(so) == OFP_AF_INET6) { + inp->inp_vflag |= INP_IPV6PROTO; + if (V_ip6_v6only) + inp->inp_flags |= IN6P_IPV6_V6ONLY; + } +#endif + OFP_LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); + pcbinfo->ipi_count++; + so->so_pcb = (char *)inp; +#ifdef INET6 + if (V_ip6_auto_flowlabel) + inp->inp_flags |= IN6P_AUTOFLOWLABEL; +#endif + INP_WLOCK(inp); + inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */ + return (error); +} + +int +ofp_in_pcbbind(struct inpcb *inp, struct ofp_sockaddr *nam, struct ofp_ucred *cred) +{ + int anonport, error; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); + + if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != OFP_INADDR_ANY) + return (OFP_EINVAL); + anonport = inp->inp_lport == 0 && + (nam == NULL || + ((struct ofp_sockaddr_in *)nam)->sin_port == 0); + error = ofp_in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, + &inp->inp_lport, cred); + if (error) + return (error); + if (ofp_in_pcbinshash(inp) != 0) { + inp->inp_laddr.s_addr = OFP_INADDR_ANY; + inp->inp_lport = 0; + return (OFP_EAGAIN); + } + if (anonport) + inp->inp_flags |= INP_ANONPORT; + return (0); +} + +/* HJo: FIX: sysctl variables */ +int ofp_ipport_hifirstauto = 1200; /* sysctl */ +int ofp_ipport_hilastauto = 40000; +int ofp_ipport_lowfirstauto = 1023; /* 1023 */ +int ofp_ipport_lowlastauto = 40000; /* 600 */ +int ofp_ipport_firstauto = 1023; /* sysctl */ +int ofp_ipport_lastauto = 40000; + +/* + * Reserved ports accessible only to root. There are significant + * security considerations that must be accounted for when changing these, + * but the security benefits can be great. Please be careful. + */ +VNET_DEFINE(int, ofp_ipport_reservedhigh) = OFP_IPPORT_RESERVED - 1; /* 1023 */ +VNET_DEFINE(int, ofp_ipport_reservedlow); + +/* Variables dealing with random ephemeral port allocation. */ +VNET_DEFINE(int, ofp_ipport_randomized) = 1; /* user controlled via sysctl */ +VNET_DEFINE(int, ofp_ipport_randomcps) = 10; /* user controlled via sysctl */ +VNET_DEFINE(int, ofp_ipport_randomtime) = 45; /* user controlled via sysctl */ +VNET_DEFINE(int, ofp_ipport_stoprandom); /* toggled by ipport_tick */ +VNET_DEFINE(int, ofp_ipport_tcpallocs); + +#define V_ipport_tcplastcount VNET(ipport_tcplastcount) + +#define RANGECHK(var, min, max) \ + if ((var) < (min)) { (var) = (min); } \ + else if ((var) > (max)) { (var) = (max); } + +static int +sysctl_net_ipport_check(OFP_SYSCTL_HANDLER_ARGS) +{ + int error; + + error = sysctl_handle_int(oidp, arg1, arg2, req); + + if (error == 0) { + RANGECHK(V_ipport_lowfirstauto, 1, OFP_IPPORT_RESERVED - 1); + RANGECHK(V_ipport_lowlastauto, 1, OFP_IPPORT_RESERVED - 1); + RANGECHK(V_ipport_firstauto, OFP_IPPORT_RESERVED, OFP_IPPORT_MAX); + RANGECHK(V_ipport_lastauto, OFP_IPPORT_RESERVED, OFP_IPPORT_MAX); + RANGECHK(V_ipport_hifirstauto, OFP_IPPORT_RESERVED, OFP_IPPORT_MAX); + RANGECHK(V_ipport_hilastauto, OFP_IPPORT_RESERVED, OFP_IPPORT_MAX); + } + return (error); +} + +#define SYSCTL_VNET_PROC OFP_SYSCTL_PROC +#define SYSCTL_VNET_INT OFP_SYSCTL_INT + +SYSCTL_DECL(_net_inet_ip); +OFP_SYSCTL_NODE(_net_inet_ip, OFP_IPPROTO_IP, portrange, OFP_CTLFLAG_RW, 0, "IP Ports"); + +SYSCTL_VNET_PROC(_net_inet_ip_portrange, OFP_OID_AUTO, lowfirst, + OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, &VNET_NAME(ofp_ipport_lowfirstauto), 0, + &sysctl_net_ipport_check, "I", ""); +SYSCTL_VNET_PROC(_net_inet_ip_portrange, OFP_OID_AUTO, lowlast, + OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, &VNET_NAME(ofp_ipport_lowlastauto), 0, + &sysctl_net_ipport_check, "I", ""); +SYSCTL_VNET_PROC(_net_inet_ip_portrange, OFP_OID_AUTO, first, + OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, &VNET_NAME(ofp_ipport_firstauto), 0, + &sysctl_net_ipport_check, "I", ""); +SYSCTL_VNET_PROC(_net_inet_ip_portrange, OFP_OID_AUTO, last, + OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, &VNET_NAME(ofp_ipport_lastauto), 0, + &sysctl_net_ipport_check, "I", ""); +SYSCTL_VNET_PROC(_net_inet_ip_portrange, OFP_OID_AUTO, hifirst, + OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, &VNET_NAME(ofp_ipport_hifirstauto), 0, + &sysctl_net_ipport_check, "I", ""); +SYSCTL_VNET_PROC(_net_inet_ip_portrange, OFP_OID_AUTO, hilast, + OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, &VNET_NAME(ofp_ipport_hilastauto), 0, + &sysctl_net_ipport_check, "I", ""); +SYSCTL_VNET_INT(_net_inet_ip_portrange, OFP_OID_AUTO, reservedhigh, + OFP_CTLFLAG_RW|OFP_CTLFLAG_SECURE, &VNET_NAME(ofp_ipport_reservedhigh), 0, ""); +SYSCTL_VNET_INT(_net_inet_ip_portrange, OFP_OID_AUTO, reservedlow, + OFP_CTLFLAG_RW|OFP_CTLFLAG_SECURE, &VNET_NAME(ofp_ipport_reservedlow), 0, ""); +SYSCTL_VNET_INT(_net_inet_ip_portrange, OFP_OID_AUTO, randomized, OFP_CTLFLAG_RW, + &VNET_NAME(ofp_ipport_randomized), 0, "Enable random port allocation"); +SYSCTL_VNET_INT(_net_inet_ip_portrange, OFP_OID_AUTO, randomcps, OFP_CTLFLAG_RW, + &VNET_NAME(ofp_ipport_randomcps), 0, "Maximum number of random port " + "allocations before switching to a sequental one"); +SYSCTL_VNET_INT(_net_inet_ip_portrange, OFP_OID_AUTO, randomtime, OFP_CTLFLAG_RW, + &VNET_NAME(ofp_ipport_randomtime), 0, + "Minimum time to keep sequental port " + "allocation before switching to a random one"); + + +int +ofp_in_pcb_lport(struct inpcb *inp, struct ofp_in_addr *laddrp, uint16_t *lportp, + struct ofp_ucred *cred, int lookupflags) +{ + struct inpcbinfo *pcbinfo; + struct inpcb *tmpinp; + unsigned short *lastport; + int count, dorandom; + uint16_t aux, first, last, lport; + struct ofp_in_addr laddr; + + /* make compiler happy */ + (void)cred; + (void)lookupflags; + + pcbinfo = inp->inp_pcbinfo; + + /* + * Because no actual state changes occur here, a global write lock on + * the pcbinfo isn't required. + */ + INP_LOCK_ASSERT(inp); + INP_HASH_LOCK_ASSERT(pcbinfo); + + if (inp->inp_flags & INP_HIGHPORT) { + first = ofp_ipport_hifirstauto; /* sysctl */ + last = ofp_ipport_hilastauto; + lastport = &pcbinfo->ipi_lasthi; + } else if (inp->inp_flags & INP_LOWPORT) { + first = ofp_ipport_lowfirstauto; /* 1023 */ + last = ofp_ipport_lowlastauto; /* 600 */ + lastport = &pcbinfo->ipi_lastlow; + } else { + first = ofp_ipport_firstauto; /* sysctl */ + last = ofp_ipport_lastauto; + lastport = &pcbinfo->ipi_lastport; + } + /* + * For UDP, use random port allocation as long as the user + * allows it. For TCP (and as of yet unknown) connections, + * use random port allocation only if the user allows it AND + * ipport_tick() allows it. + */ + + if (ofp_ipport_randomized && pcbinfo == &ofp_udbinfo) + dorandom = 1; + else + dorandom = 0; + + /* + * It makes no sense to do random port allocation if + * we have the only port available. + */ + if (first == last) + dorandom = 0; + + /* + * Instead of having two loops further down counting up or down + * make sure that first is always <= last and go with only one + * code path implementing all logic. + */ + if (first > last) { + aux = first; + first = last; + last = aux; + } + + /* Make the compiler happy. */ + laddr.s_addr = 0; + if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) { + KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p", + __func__, inp)); + laddr = *laddrp; + (void)laddr; /* Compiler happy */ + } + + tmpinp = NULL; /* Make compiler happy. */ + lport = *lportp; + + if (dorandom) + *lastport = first + (random() % (last - first)); + + count = last - first; + + do { + if (count-- < 0) /* completely used? */ + return (OFP_EADDRNOTAVAIL); + ++*lastport; + if (*lastport < first || *lastport > last) + *lastport = first; + lport = odp_cpu_to_be_16(*lastport); + +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) + tmpinp = ofp_in6_pcblookup_local(pcbinfo, + &inp->in6p_laddr, lport, lookupflags, cred); + else +#endif + tmpinp = ofp_in_pcblookup_local(pcbinfo, laddr, + lport, lookupflags, cred); + } while (tmpinp != NULL); + + if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) + laddrp->s_addr = laddr.s_addr; + + *lportp = lport; + + return (0); +} + +/* + * Set up a bind operation on a PCB, performing port allocation + * as required, but do not actually modify the PCB. Callers can + * either complete the bind by setting inp_laddr/inp_lport and + * calling ofp_in_pcbinshash(), or they can just use the resulting + * port and address to authorise the sending of a once-off packet. + * + * On error, the values of *laddrp and *lportp are not changed. + */ +int +ofp_in_pcbbind_setup(struct inpcb *inp, struct ofp_sockaddr *nam, + ofp_in_addr_t *laddrp, + uint16_t *lportp, struct ofp_ucred *cred) +{ + struct socket *so = inp->inp_socket; + struct ofp_sockaddr_in *sin; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + struct ofp_in_addr laddr; + uint16_t lport = 0; + int lookupflags = 0, reuseport = (so->so_options & OFP_SO_REUSEPORT); + int error; + + /* + * No state changes, so read locks are sufficient here. + */ + INP_LOCK_ASSERT(inp); + INP_HASH_LOCK_ASSERT(pcbinfo); + + if (OFP_TAILQ_EMPTY(ofp_get_ifaddrhead())) /* XXX broken! */ + return (OFP_EADDRNOTAVAIL); + + laddr.s_addr = *laddrp; + if (nam != NULL && laddr.s_addr != OFP_INADDR_ANY) + return (OFP_EINVAL); + if ((so->so_options & (OFP_SO_REUSEADDR|OFP_SO_REUSEPORT)) == 0) + lookupflags = INPLOOKUP_WILDCARD; + if (nam) { + sin = (struct ofp_sockaddr_in *)nam; + if (nam->sa_len != sizeof (*sin)) + return (OFP_EINVAL); +#ifdef notdef + /* + * We should check the family, but old programs + * incorrectly fail to initialize it. + */ + if (sin->sin_family != OFP_AF_INET) + return (OFP_EAFNOSUPPORT); +#endif + if (sin->sin_port != *lportp) { + /* Don't allow the port to change. */ + if (*lportp != 0) + return (OFP_EINVAL); + lport = sin->sin_port; + } + /* NB: lport is left as 0 if the port isn't being changed. */ + if (OFP_IN_MULTICAST(odp_be_to_cpu_32(sin->sin_addr.s_addr))) { + /* + * Treat OFP_SO_REUSEADDR as OFP_SO_REUSEPORT for multicast; + * allow complete duplication of binding if + * OFP_SO_REUSEPORT is set, or if OFP_SO_REUSEADDR is set + * and a multicast address is bound on both + * new and duplicated sockets. + */ + if (so->so_options & OFP_SO_REUSEADDR) + reuseport = OFP_SO_REUSEADDR|OFP_SO_REUSEPORT; + } else if (sin->sin_addr.s_addr != OFP_INADDR_ANY) { + sin->sin_port = 0; /* yech... */ + bzero(&sin->sin_zero, sizeof(sin->sin_zero)); + + /* + * Is the address a local IP address? + * If INP_BINDANY is set, then the socket may be bound + * to any endpoint address, local or not. + */ + if ((inp->inp_flags & INP_BINDANY) == 0 && + ofp_ifaddr_elem_get( + (uint8_t *)&(sin->sin_addr.s_addr)) == NULL) + return (OFP_EADDRNOTAVAIL); + } + laddr = sin->sin_addr; + if (lport) { + struct inpcb *t; + struct tcptw *tw; + + if (!OFP_IN_MULTICAST(odp_be_to_cpu_32(sin->sin_addr.s_addr))) { + t = ofp_in_pcblookup_local(pcbinfo, sin->sin_addr, + lport, INPLOOKUP_WILDCARD, cred); + /* + * XXX + * This entire block sorely needs a rewrite. + * (HJo: this comment is from FreeBSD) + */ + if (t && + ((t->inp_flags & INP_TIMEWAIT) == 0) && + (so->so_type != OFP_SOCK_STREAM || + odp_be_to_cpu_32(t->inp_faddr.s_addr) == OFP_INADDR_ANY) && + (odp_be_to_cpu_32(sin->sin_addr.s_addr) != OFP_INADDR_ANY || + odp_be_to_cpu_32(t->inp_laddr.s_addr) != OFP_INADDR_ANY || + (t->inp_flags2 & INP_REUSEPORT) == 0) && + (inp->inp_cred->cr_uid != + t->inp_cred->cr_uid)) + return (OFP_EADDRINUSE); + } + t = ofp_in_pcblookup_local(pcbinfo, sin->sin_addr, + lport, lookupflags, cred); + if (t && (t->inp_flags & INP_TIMEWAIT)) { + /* + * XXXRW: If an incpb has had its timewait + * state recycled, we treat the address as + * being in use (for now). This is better + * than a panic, but not desirable. + */ + tw = intotw(t); + if (tw == NULL || + (reuseport & tw->tw_so_options) == 0) + return (OFP_EADDRINUSE); + } else if (t && (reuseport == 0 || + (t->inp_flags2 & INP_REUSEPORT) == 0)) { +#ifdef INET6 + if (odp_be_to_cpu_32(sin->sin_addr.s_addr) != + OFP_INADDR_ANY || + odp_be_to_cpu_32(t->inp_laddr.s_addr) != + OFP_INADDR_ANY || + (inp->inp_vflag & INP_IPV6PROTO) == 0 || + (t->inp_vflag & INP_IPV6PROTO) == 0) +#endif + return (OFP_EADDRINUSE); + } + } + } + if (*lportp != 0) + lport = *lportp; + + if (lport == 0) + { + error = ofp_in_pcb_lport(inp, &laddr, &lport, cred, lookupflags); + if (error != 0) + return (error); + } + *laddrp = laddr.s_addr; + *lportp = lport; + return (0); +} + +/* + * Connect from a socket to a specified address. + * Both address and port must be specified in argument sin. + * If don't have a local address for this socket yet, + * then pick one. + */ +int +ofp_in_pcbconnect_mbuf(struct inpcb *inp, struct ofp_sockaddr *nam, + struct ofp_ucred *cred, odp_packet_t m) +{ + u_short lport, fport; + ofp_in_addr_t laddr, faddr; + int anonport, error; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); + + lport = inp->inp_lport; + laddr = inp->inp_laddr.s_addr; + anonport = (lport == 0); + error = ofp_in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, + NULL, cred); + if (error) + return (error); + + /* Do the initial binding of the local address if required. */ + if (inp->inp_laddr.s_addr == OFP_INADDR_ANY && inp->inp_lport == 0) { + inp->inp_lport = lport; + inp->inp_laddr.s_addr = laddr; + if (ofp_in_pcbinshash(inp) != 0) { + inp->inp_laddr.s_addr = OFP_INADDR_ANY; + inp->inp_lport = 0; + return (OFP_EAGAIN); + } + } + + /* Commit the remaining changes. */ + inp->inp_lport = lport; + inp->inp_laddr.s_addr = laddr; + inp->inp_faddr.s_addr = faddr; + inp->inp_fport = fport; + ofp_in_pcbrehash_mbuf(inp, m); + + if (anonport) + inp->inp_flags |= INP_ANONPORT; + return (0); +} + +/* + * Connect from a socket to a specified address. + * Both address and port must be specified in argument sin. + * If don't have a local address for this socket yet, + * then pick one. + */ +int +ofp_in_pcbconnect(struct inpcb *inp, struct ofp_sockaddr *nam, struct ofp_ucred *cred) +{ + uint16_t lport, fport; + ofp_in_addr_t laddr, faddr; + int anonport, error; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); + + lport = inp->inp_lport; + laddr = inp->inp_laddr.s_addr; + anonport = (lport == 0); + error = ofp_in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, + NULL, cred); + if (error) + return (error); + + /* Do the initial binding of the local address if required. */ + if (inp->inp_laddr.s_addr == OFP_INADDR_ANY && inp->inp_lport == 0) { + inp->inp_lport = lport; + inp->inp_laddr.s_addr = laddr; + if (ofp_in_pcbinshash(inp) != 0) { + inp->inp_laddr.s_addr = OFP_INADDR_ANY; + inp->inp_lport = 0; + return (OFP_EAGAIN); + } + } + + /* Commit the remaining changes. */ + inp->inp_lport = lport; + inp->inp_laddr.s_addr = laddr; + inp->inp_faddr.s_addr = faddr; + inp->inp_fport = fport; + ofp_in_pcbrehash(inp); + + if (anonport) + inp->inp_flags |= INP_ANONPORT; + return (0); +} + +/* + * Do proper source address selection on an unbound socket in case + * of connect. + */ +static int +in_pcbladdr(struct inpcb *inp, struct ofp_in_addr *faddr, + struct ofp_in_addr *laddr, struct ofp_ucred *cred) +{ + struct ofp_nh_entry *nh; + struct ofp_ifnet *dev_out; + uint32_t flags; + (void)inp; + (void)faddr; + (void)cred; + + KASSERT(laddr != NULL, ("%s: laddr NULL", __func__)); + + nh = ofp_get_next_hop(0, faddr->s_addr, &flags); + if (!nh) + return OFP_ENETUNREACH; + + dev_out = ofp_get_ifnet(nh->port, nh->vlan); + + if (dev_out) { + laddr->s_addr = dev_out->ip_addr; + return 0; + } + + return OFP_ENETUNREACH; +} + +/* + * Set up for a connect from a socket to the specified address. + * On entry, *laddrp and *lportp should contain the current local + * address and port for the PCB; these are updated to the values + * that should be placed in inp_laddr and inp_lport to complete + * the connect. + * + * On success, *faddrp and *fportp will be set to the remote address + * and port. These are not updated in the error case. + * + * If the operation fails because the connection already exists, + * *oinpp will be set to the PCB of that connection so that the + * caller can decide to override it. In all other cases, *oinpp + * is set to NULL. + */ +int +ofp_in_pcbconnect_setup(struct inpcb *inp, struct ofp_sockaddr *nam, + ofp_in_addr_t *laddrp, uint16_t *lportp, ofp_in_addr_t *faddrp, uint16_t *fportp, + struct inpcb **oinpp, struct ofp_ucred *cred) +{ + struct ofp_sockaddr_in *sin = (struct ofp_sockaddr_in *)nam; + struct inpcb *oinp; + struct ofp_in_addr laddr, faddr; + uint16_t lport, fport; + int error; + + /* + * Because a global state change doesn't actually occur here, a read + * lock is sufficient. + */ + INP_LOCK_ASSERT(inp); + INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo); + + if (oinpp != NULL) + *oinpp = NULL; + if (nam->sa_len != sizeof (*sin)) + return (OFP_EINVAL); + if (sin->sin_family != OFP_AF_INET) + return (OFP_EAFNOSUPPORT); + if (sin->sin_port == 0) + return (OFP_EADDRNOTAVAIL); + laddr.s_addr = *laddrp; + lport = *lportp; + faddr = sin->sin_addr; + fport = sin->sin_port; + + if (!OFP_TAILQ_EMPTY(ofp_get_ifaddrhead())) { + /* + * If the destination address is OFP_INADDR_ANY, + * use the primary local address. + * If the supplied address is OFP_INADDR_BROADCAST, + * and the primary interface supports broadcast, + * choose the broadcast address for that interface. + */ + if (faddr.s_addr == OFP_INADDR_ANY) { + IN_IFADDR_RLOCK(); + faddr.s_addr = OFP_TAILQ_FIRST(ofp_get_ifaddrhead())->ip_addr; + IN_IFADDR_RUNLOCK(); + } else if (faddr.s_addr == (uint64_t)OFP_INADDR_BROADCAST) { + /* HJo: FIX + IN_IFADDR_RLOCK(); + if (OFP_TAILQ_FIRST(ofp_get_ifaddrhead())->ia_ifp->if_flags & + IFF_BROADCAST) + faddr = ((struct ofp_sockaddr_in *)(&OFP_TAILQ_FIRST + (ofp_get_ifaddrhead())-> + ia_broadaddr))->sin_addr; + IN_IFADDR_RUNLOCK(); + */ + } + } + if (laddr.s_addr == OFP_INADDR_ANY) { + error = in_pcbladdr(inp, &faddr, &laddr, cred); + /* + * If the destination address is multicast and an outgoing + * interface has been set as a multicast option, prefer the + * address of that interface as our source address. + */ +#if 0 + /* HJo: Multicast is not supported. */ + if (IN_MULTICAST(odp_be_to_cpu_32(faddr.s_addr)) && + inp->inp_moptions != NULL) { + struct ip_moptions *imo; + struct ofp_ifnet *ifp; + + imo = inp->inp_moptions; + if (imo->imo_multicast_ifp != NULL) { + ifp = imo->imo_multicast_ifp; + IN_IFADDR_RLOCK(); + OFP_TAILQ_FOREACH(ia, ofp_get_ifaddrhead(), ia_link) { + if ((ia->ia_ifp == ifp) && + (cred == NULL || + prison_check_ip4(cred, + &ia->ia_addr.sin_addr) == 0)) + break; + } + if (ia == NULL) + error = OFP_EADDRNOTAVAIL; + else { + laddr = ia->ia_addr.sin_addr; + error = 0; + } + IN_IFADDR_RUNLOCK(); + } + } +#endif + if (error) + return (error); + } + oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport, + laddr, lport, 0, NULL); + + if (oinp != NULL) { + if (oinpp != NULL) + *oinpp = oinp; + return (OFP_EADDRINUSE); + } + if (lport == 0) { + error = ofp_in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, + cred); + if (error) + return (error); + } + *laddrp = laddr.s_addr; + *lportp = lport; + *faddrp = faddr.s_addr; + *fportp = fport; + + return (0); +} + +void +ofp_in_pcbdisconnect(struct inpcb *inp) +{ + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); + + inp->inp_faddr.s_addr = OFP_INADDR_ANY; + inp->inp_fport = 0; + ofp_in_pcbrehash(inp); +} + +/* + * ofp_in_pcbdetach() is responsibe for disassociating a socket from an inpcb. + * For most protocols, this will be invoked immediately prior to calling + * ofp_in_pcbfree(). However, with TCP the inpcb may significantly outlive the + * socket, in which case ofp_in_pcbfree() is deferred. + */ +void +ofp_in_pcbdetach(struct inpcb *inp) +{ + KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); + + inp->inp_socket->so_pcb = NULL; + inp->inp_socket = NULL; +} + +/* + * ofp_in_pcbref() bumps the reference count on an inpcb in order to maintain + * stability of an inpcb pointer despite the inpcb lock being released. This + * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, + * but where the inpcb lock may already held, or when acquiring a reference + * via a pcbgroup. + * + * ofp_in_pcbref() should be used only to provide brief memory stability, and + * must always be followed by a call to INP_WLOCK() and in_pcbrele() to + * garbage collect the inpcb if it has been ofp_in_pcbfree()'d from another + * context. Until in_pcbrele() has returned that the inpcb is still valid, + * lock and rele are the *only* safe operations that may be performed on the + * inpcb. + * + * While the inpcb will not be freed, releasing the inpcb lock means that the + * connection's state may change, so the caller should be careful to + * revalidate any cached state on reacquiring the lock. Drop the reference + * using in_pcbrele(). + */ +void +ofp_in_pcbref(struct inpcb *inp) +{ + + KASSERT(inp->inp_refcount.v > 0, ("%s: refcount 0", __func__)); + + refcount_acquire(&inp->inp_refcount); +} + +/* + * Drop a refcount on an inpcb elevated using ofp_in_pcbref(); because a call to + * ofp_in_pcbfree() may have been made between ofp_in_pcbref() and in_pcbrele(), we + * return a flag indicating whether or not the inpcb remains valid. If it is + * valid, we return with the inpcb lock held. + * + * Notice that, unlike ofp_in_pcbref(), the inpcb lock must be held to drop a + * reference on an inpcb. Historically more work was done here (actually, in + * in_pcbfree_internal()) but has been moved to ofp_in_pcbfree() to avoid the + * need for the pcbinfo lock in in_pcbrele(). Deferring the free is entirely + * about memory stability (and continued use of the write lock). + */ +int +ofp_in_pcbrele_rlocked(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + + KASSERT(inp->inp_refcount.v > 0, ("%s: refcount 0", __func__)); + + INP_RLOCK_ASSERT(inp); + + if (refcount_release(&inp->inp_refcount) == 0) + return (0); + + KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); + + INP_RUNLOCK(inp); + pcbinfo = inp->inp_pcbinfo; + pcbinfo = pcbinfo; + uma_zfree(pcbinfo->ipi_zone, inp); + return (1); +} + +int +ofp_in_pcbrele_wlocked(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + + KASSERT(inp->inp_refcount.v > 0, ("%s: refcount 0", __func__)); + + INP_WLOCK_ASSERT(inp); + + if (refcount_release(&inp->inp_refcount) == 0) + return (0); + + KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); + + INP_WUNLOCK(inp); + pcbinfo = inp->inp_pcbinfo; + pcbinfo = pcbinfo; + uma_zfree(pcbinfo->ipi_zone, inp); + return (1); +} + +/* + * Unconditionally schedule an inpcb to be freed by decrementing its + * reference count, which should occur only after the inpcb has been detached + * from its socket. If another thread holds a temporary reference (acquired + * using ofp_in_pcbref()) then the free is deferred until that reference is + * released using in_pcbrele(), but the inpcb is still unlocked. Almost all + * work, including removal from global lists, is done in this context, where + * the pcbinfo lock is held. + */ +void +ofp_in_pcbfree(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + + KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); + + INP_INFO_WLOCK_ASSERT(pcbinfo); + INP_WLOCK_ASSERT(inp); + + inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + in_pcbremlists(inp); +#ifdef INET6 +#if 0 + if (inp->inp_vflag & INP_IPV6PROTO) { + ip6_freepcbopts(inp->in6p_outputopts); + if (inp->in6p_moptions != NULL) + ip6_freemoptions(inp->in6p_moptions);*/ + } +#endif /*0*/ +#endif + /* HJo: FIX + if (inp->inp_options) + (void)m_free(inp->inp_options); + */ + /* HJo: Multicast not supported + if (inp->inp_moptions != NULL) + inp_freemoptions(inp->inp_moptions); + */ + + inp->inp_vflag = 0; + /* HJo: cred structure not used + crfree(inp->inp_cred); + */ + + if (!ofp_in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); +} + +/* + * ofp_in_pcbdrop() removes an inpcb from hashed lists, releasing its address and + * port reservation, and preventing it from being returned by inpcb lookups. + * + * It is used by TCP to mark an inpcb as unused and avoid future packet + * delivery or event notification when a socket remains open but TCP has + * closed. This might occur as a result of a shutdown()-initiated TCP close + * or a RST on the wire, and allows the port binding to be reused while still + * maintaining the invariant that so_pcb always points to a valid inpcb until + * ofp_in_pcbdetach(). + * + * XXXRW: Possibly ofp_in_pcbdrop() should also prevent future notifications by + * in_pcbnotifyall() and in_pcbpurgeif0()? + */ +void +ofp_in_pcbdrop(struct inpcb *inp) +{ + INP_WLOCK_ASSERT(inp); + + /* + * XXXRW: Possibly we should protect the setting of INP_DROPPED with + * the hash lock...? + */ + inp->inp_flags |= INP_DROPPED; + if (inp->inp_flags & INP_INHASHLIST) { + struct inpcbport *phd = inp->inp_phd; + + INP_HASH_WLOCK(inp->inp_pcbinfo); + OFP_LIST_REMOVE(inp, inp_hash); + OFP_LIST_REMOVE(inp, inp_portlist); + if (OFP_LIST_FIRST(&phd->phd_pcblist) == NULL) { + OFP_LIST_REMOVE(phd, phd_hash); + free(phd); + } + INP_HASH_WUNLOCK(inp->inp_pcbinfo); + inp->inp_flags &= ~INP_INHASHLIST; + } +} + +/* + * Common routines to return the socket addresses associated with inpcbs. + */ +struct ofp_sockaddr * +ofp_in_sockaddr(ofp_in_port_t port, struct ofp_in_addr *addr_p) +{ + struct ofp_sockaddr_in *sin; + + sin = malloc(sizeof *sin); + sin->sin_family = OFP_AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr = *addr_p; + sin->sin_port = port; + + return (struct ofp_sockaddr *)sin; +} + +int +ofp_in_getsockaddr(struct socket *so, struct ofp_sockaddr **nam) +{ + struct inpcb *inp; + struct ofp_in_addr addr; + ofp_in_port_t port; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("ofp_in_getsockaddr: inp == NULL")); + + INP_RLOCK(inp); + port = inp->inp_lport; + addr = inp->inp_laddr; + INP_RUNLOCK(inp); + + *nam = ofp_in_sockaddr(port, &addr); + return 0; +} + +int +ofp_in_getpeeraddr(struct socket *so, struct ofp_sockaddr **nam) +{ + struct inpcb *inp; + struct ofp_in_addr addr; + ofp_in_port_t port; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("ofp_in_getpeeraddr: inp == NULL")); + + INP_RLOCK(inp); + port = inp->inp_fport; + addr = inp->inp_faddr; + INP_RUNLOCK(inp); + + *nam = ofp_in_sockaddr(port, &addr); + return 0; +} + +/* + * Lookup a PCB based on the local address and port. Caller must hold the + * hash lock. No inpcb locks or references are acquired. + */ +#define INP_LOOKUP_MAPPED_PCB_COST 3 +struct inpcb * +ofp_in_pcblookup_local(struct inpcbinfo *pcbinfo, struct ofp_in_addr laddr, + uint16_t lport, int lookupflags, struct ofp_ucred *cred) +{ + struct inpcb *inp; + + (void)cred; +#ifdef INET6 + int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST; +#else + int matchwild = 3; +#endif + int wildcard; + + KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + + INP_HASH_LOCK_ASSERT(pcbinfo); + + if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { + struct inpcbhead *head; + /* + * Look for an unconnected (wildcard foreign addr) PCB that + * matches the local address and port we're looking for. + */ + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(OFP_INADDR_ANY, lport, + 0, pcbinfo->ipi_hashmask)]; + OFP_LIST_FOREACH(inp, head, inp_hash) { +#ifdef INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_faddr.s_addr == OFP_INADDR_ANY && + inp->inp_laddr.s_addr == laddr.s_addr && + inp->inp_lport == lport) { + /* + * Found + */ + return (inp); + } + } + /* + * Not found. + */ + return (NULL); + } else { + struct inpcbporthead *porthash; + struct inpcbport *phd; + struct inpcb *match = NULL; + /* + * Best fit PCB lookup. + * + * First see if this local port is in use by looking on the + * port hash list. + */ + porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, + pcbinfo->ipi_porthashmask)]; + OFP_LIST_FOREACH(phd, porthash, phd_hash) { + if (phd->phd_port == lport) + break; + } + if (phd != NULL) { + /* + * Port is in use by one or more PCBs. Look for best + * fit. + */ + OFP_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + wildcard = 0; +#ifdef INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; + /* + * We never select the PCB that has + * INP_IPV6 flag and is bound to :: if + * we have another PCB which is bound + * to 0.0.0.0. If a PCB has the + * INP_IPV6 flag, then we set its cost + * higher than IPv4 only PCBs. + * + * Note that the case only happens + * when a socket is bound to ::, under + * the condition that the use of the + * mapped address is allowed. + */ + if ((inp->inp_vflag & INP_IPV6) != 0) + wildcard += INP_LOOKUP_MAPPED_PCB_COST; +#endif + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY) + wildcard++; + if (inp->inp_laddr.s_addr != OFP_INADDR_ANY) { + if (laddr.s_addr == OFP_INADDR_ANY) + wildcard++; + else if (inp->inp_laddr.s_addr != laddr.s_addr) + continue; + } else { + if (laddr.s_addr != OFP_INADDR_ANY) + wildcard++; + } + if (wildcard < matchwild) { + match = inp; + matchwild = wildcard; + if (matchwild == 0) + break; + } + } + } + return (match); + } +} + +/* + * Insert PCB onto various hash lists. + */ +static int +in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) +{ + struct inpcbhead *pcbhash; + struct inpcbporthead *pcbporthash; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + struct inpcbport *phd; + uint32_t hashkey_faddr; + uint32_t hashkey; + + (void)do_pcbgroup_update; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + + KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, + ("ofp_in_pcbinshash: INP_INHASHLIST")); + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + hashkey_faddr = inp->in6p_faddr.ofp_s6_addr32[3] /* XXX */; + else +#endif /* INET6 */ + hashkey_faddr = inp->inp_faddr.s_addr; + + hashkey = INP_PCBHASH(hashkey_faddr, + inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask); + + pcbhash = &pcbinfo->ipi_hashbase[hashkey]; + + hashkey = INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask); + + pcbporthash = &pcbinfo->ipi_porthashbase[hashkey]; + /* + * Go through port list and look for a head for this lport. + */ + OFP_LIST_FOREACH(phd, pcbporthash, phd_hash) { + if (phd->phd_port == inp->inp_lport) + break; + } + /* + * If none exists, malloc one and tack it on. + */ + if (phd == NULL) { + phd = malloc(sizeof(struct inpcbport)); + if (phd == NULL) { + return (OFP_ENOBUFS); /* XXX */ + } + phd->phd_port = inp->inp_lport; + OFP_LIST_INIT(&phd->phd_pcblist); + OFP_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); + } + inp->inp_phd = phd; + OFP_LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); + OFP_LIST_INSERT_HEAD(pcbhash, inp, inp_hash); + inp->inp_flags |= INP_INHASHLIST; + + return (0); +} + +/* + * For now, there are two public interfaces to insert an inpcb into the hash + * lists -- one that does update pcbgroups, and one that doesn't. The latter + * is used only in the TCP syncache, where ofp_in_pcbinshash is called before the + * full 4-tuple is set for the inpcb, and we don't want to install in the + * pcbgroup until later. + * + * XXXRW: This seems like a misfeature. ofp_in_pcbinshash should always update + * connection groups, and partially initialised inpcbs should not be exposed + * to either reservation hash tables or pcbgroups. + */ +int +ofp_in_pcbinshash(struct inpcb *inp) +{ + + return (in_pcbinshash_internal(inp, 1)); +} + +int +ofp_in_pcbinshash_nopcbgroup(struct inpcb *inp) +{ + + return (in_pcbinshash_internal(inp, 0)); +} + +/* + * Lookup PCB in hash list, using pcbinfo tables. This variation assumes + * that the caller has locked the hash list, and will not perform any further + * locking or reference operations on either the hash list or the connection. + */ +static struct inpcb * +in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct ofp_in_addr faddr, + uint32_t fport_arg, struct ofp_in_addr laddr,\ + uint32_t lport_arg, int lookupflags, + struct ofp_ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp; + uint16_t fport = fport_arg, lport = lport_arg; + + (void)ifp; + + KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + + INP_HASH_LOCK_ASSERT(pcbinfo); + + /* + * First look for an exact match. + */ + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, + pcbinfo->ipi_hashmask)]; + OFP_LIST_FOREACH(inp, head, inp_hash) { +#ifdef INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_faddr.s_addr == faddr.s_addr && + inp->inp_laddr.s_addr == laddr.s_addr && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with OFP_SO_REUSEPORT? + */ + return (inp); + } + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; +#ifdef _INET6 + struct inpcb *local_wild_mapped = NULL; +#endif + struct inpcb *jail_wild = NULL; + int injail = 0; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(OFP_INADDR_ANY, lport, + 0, pcbinfo->ipi_hashmask)]; + OFP_LIST_FOREACH(inp, head, inp_hash) { +#ifdef _INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY || + inp->inp_lport != lport) + continue; + +#if 0 + /* XXX inp locking */ + if (ifp && ifp->if_type == IFT_FAITH && + (inp->inp_flags & INP_FAITH) == 0) + continue; + + injail = prison_flag(inp->inp_cred, PR_IP4); +#endif + if (injail) { +#if 0 + if (prison_check_ip4(inp->inp_cred, + &laddr) != 0) + continue; +#endif + } else { + if (local_exact != NULL) + continue; + } + + if (inp->inp_laddr.s_addr == laddr.s_addr) { + if (injail) + return (inp); + else + local_exact = inp; + } else if (inp->inp_laddr.s_addr == OFP_INADDR_ANY) { +#ifdef _INET6 + /* XXX inp locking, NULL check */ + if (inp->inp_vflag & INP_IPV6PROTO) + local_wild_mapped = inp; + else +#endif /* INET6 */ + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* OFP_LIST_FOREACH */ + if (jail_wild != NULL) { + return (jail_wild); + } + if (local_exact != NULL) { + return (local_exact); + } + if (local_wild != NULL) { + return (local_wild); + } +#ifdef _INET6 + if (local_wild_mapped != NULL) { + return (local_wild_mapped); + } +#endif /* defined(INET6) */ + } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ + + return (NULL); +} + +static struct inpcb * +in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct ofp_in_addr faddr, + uint32_t fport, struct ofp_in_addr laddr, uint32_t lport, + int lookupflags, struct ofp_ifnet *ifp) +{ + struct inpcb *inp; + + INP_HASH_RLOCK(pcbinfo); + + inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, + (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); + if (inp != NULL) { + ofp_in_pcbref(inp); + INP_HASH_RUNLOCK(pcbinfo); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (ofp_in_pcbrele_wlocked(inp)) { + return (NULL); + } + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (ofp_in_pcbrele_rlocked(inp)) { + return (NULL); + } + } else + panic("locking bug"); + } else + INP_HASH_RUNLOCK(pcbinfo); + + return (inp); +} + +/* + * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf + * from which a pre-calculated hash value may be extracted. + * + * Possibly more of this logic should be in in_pcbgroup.c. + */ +struct inpcb * +ofp_in_pcblookup(struct inpcbinfo *pcbinfo, struct ofp_in_addr faddr, + uint32_t fport, struct ofp_in_addr laddr, uint32_t lport, + int lookupflags, struct ofp_ifnet *ifp) +{ + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + + return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} + +struct inpcb * +ofp_in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct ofp_in_addr faddr, + uint32_t fport, struct ofp_in_addr laddr, uint32_t lport, int lookupflags, + struct ofp_ifnet *ifp, odp_packet_t m) +{ + (void)m; + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + + return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} + +/* + * Move PCB to the proper hash bucket when { faddr, fport } have been + * changed. NOTE: This does not handle the case of the lport changing (the + * hashed port list would have to be updated as well), so the lport must + * not change after ofp_in_pcbinshash() has been called. + */ +void +ofp_in_pcbrehash_mbuf(struct inpcb *inp, odp_packet_t m) +{ + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + struct inpcbhead *head; + u_int32_t hashkey_faddr; + u_int32_t hashkey; + (void)m; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + + KASSERT(inp->inp_flags & INP_INHASHLIST, + ("ofp_in_pcbrehash: !INP_INHASHLIST")); + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + hashkey_faddr = inp->in6p_faddr.ofp_s6_addr32[3] /* XXX */; + else +#endif /* INET6 */ + hashkey_faddr = inp->inp_faddr.s_addr; + + hashkey = INP_PCBHASH(hashkey_faddr, + inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask); + + head = &pcbinfo->ipi_hashbase[hashkey]; + + OFP_LIST_REMOVE(inp, inp_hash); + OFP_LIST_INSERT_HEAD(head, inp, inp_hash); + +} + +/* + * Move PCB to the proper hash bucket when { faddr, fport } have been + * changed. NOTE: This does not handle the case of the lport changing (the + * hashed port list would have to be updated as well), so the lport must + * not change after ofp_in_pcbinshash() has been called. + */ +void +ofp_in_pcbrehash(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + struct inpcbhead *head; + uint32_t hashkey_faddr; + uint32_t hashkey; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + + KASSERT(inp->inp_flags & INP_INHASHLIST, + ("ofp_in_pcbrehash: !INP_INHASHLIST")); + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + hashkey_faddr = inp->in6p_faddr.ofp_s6_addr32[3] /* XXX */; + else +#endif /* INET6 */ + hashkey_faddr = inp->inp_faddr.s_addr; + + hashkey = INP_PCBHASH(hashkey_faddr, + inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask); + + head = &pcbinfo->ipi_hashbase[hashkey]; + + OFP_LIST_REMOVE(inp, inp_hash); + OFP_LIST_INSERT_HEAD(head, inp, inp_hash); +} + +/* + * Remove PCB from various lists. + */ +static void +in_pcbremlists(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + + INP_INFO_WLOCK_ASSERT(pcbinfo); + INP_WLOCK_ASSERT(inp); + + inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + if (inp->inp_flags & INP_INHASHLIST) { + struct inpcbport *phd = inp->inp_phd; + + INP_HASH_WLOCK(pcbinfo); + OFP_LIST_REMOVE(inp, inp_hash); + OFP_LIST_REMOVE(inp, inp_portlist); + if (OFP_LIST_FIRST(&phd->phd_pcblist) == NULL) { + OFP_LIST_REMOVE(phd, phd_hash); + free(phd); + } + INP_HASH_WUNLOCK(pcbinfo); + inp->inp_flags &= ~INP_INHASHLIST; + } + OFP_LIST_REMOVE(inp, inp_list); + pcbinfo->ipi_count--; +} + +/* + * A set label operation has occurred at the socket layer, propagate the + * label change into the in_pcb for the socket. + */ +void +ofp_in_pcbsosetlabel(struct socket *so) +{ + (void)so; +} + +/* + * in_pcb.c: manage the Protocol Control Blocks. + * + * NOTE: It is assumed that most of these functions will be called with + * the pcbinfo lock held, and often, the inpcb lock held, as these utility + * functions often modify hash chains or addresses in pcbs. + */ + diff --git a/src/ofp_in_proto.c b/src/ofp_in_proto.c new file mode 100644 index 00000000..d257c666 --- /dev/null +++ b/src/ofp_in_proto.c @@ -0,0 +1,179 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_proto.c 8.2 (Berkeley) 2/9/95 + */ + +#include +#include "ofpi_in.h" +#include "ofpi_sysctl.h" +#include "ofpi_icmp.h" +#include "ofpi_gre.h" +#include "ofpi_udp.h" +#include "ofpi_in_pcb.h" +#include "ofpi_domain.h" +#include "ofpi_protosw.h" +#include "ofpi_udp_var.h" +#include "ofpi_tcp_var.h" +#include "ofpi_socket.h" + +extern struct pr_usrreqs nousrreqs; + +/* + * TCP/IP protocol family: IP, ICMP, UDP, TCP. + */ +struct protosw ofp_inetsw[] = { + { + .pr_type = 0, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_IP, + .pr_init = ofp_ip_init, +#ifdef VIMAGE + .pr_destroy = ofp_ip_destroy, +#endif + .pr_input = ofp_ip_input, + .pr_usrreqs = &nousrreqs + }, + { + .pr_type = OFP_SOCK_DGRAM, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_UDP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ofp_udp_input, + .pr_ctlinput = ofp_udp_ctlinput, + .pr_ctloutput = ofp_udp_ctloutput, + .pr_init = ofp_udp_init, + .pr_usrreqs = &ofp_udp_usrreqs + }, + { + .pr_type = OFP_SOCK_STREAM, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_TCP, + .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD, + .pr_input = ofp_tcp_input, + /*.pr_ctlinput = ofp_tcp_ctlinput, + .pr_ctloutput = ofp_tcp_ctloutput,*/ + .pr_init = ofp_tcp_init, + .pr_slowtimo = ofp_tcp_slowtimo, + .pr_drain = ofp_tcp_drain, + .pr_usrreqs = &ofp_tcp_usrreqs + }, +#ifdef SCTP + { + .pr_type = OFP_SOCK_SEQPACKET, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_SCTP, + .pr_flags = PR_WANTRCVD, + .pr_input = sctp_input, + .pr_ctlinput = sctp_ctlinput, + .pr_ctloutput = sctp_ctloutput, + .pr_init = sctp_init, + .pr_drain = sctp_drain, + .pr_usrreqs = &sctp_usrreqs + }, + { + .pr_type = OFP_SOCK_STREAM, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_SCTP, + .pr_flags = PR_WANTRCVD, + .pr_input = sctp_input, + .pr_ctlinput = sctp_ctlinput, + .pr_ctloutput = sctp_ctloutput, + .pr_drain = sctp_drain, + .pr_usrreqs = &sctp_usrreqs + }, +#endif /* SCTP */ +#ifdef RAW + /* raw wildcard */ + { + .pr_type = OFP_SOCK_RAW, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_RAW, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = rip_input, + .pr_ctloutput = rip_ctloutput, + .pr_init = rip_init, + .pr_usrreqs = &rip_usrreqs + }, +#endif + { + .pr_type = OFP_SOCK_RAW, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_GRE, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = ofp_gre_input, + /*.pr_ctloutput = rip_ctloutput,*/ + .pr_init = NULL, + .pr_usrreqs = &nousrreqs + }, + { + .pr_type = OFP_SOCK_RAW, + .pr_domain = &ofp_inetdomain, + .pr_protocol = OFP_IPPROTO_ICMP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = ofp_icmp_input, + .pr_init = NULL, +/* + .pr_ctloutput = rip_ctloutput, +*/ + .pr_usrreqs = &nousrreqs + } +}; + + +struct domain ofp_inetdomain = { + .dom_family = OFP_AF_INET, + .dom_name = "internet", + .dom_init = NULL, + .dom_protosw = ofp_inetsw, + .dom_protoswNPROTOSW = &ofp_inetsw[sizeof(ofp_inetsw) / + sizeof(ofp_inetsw[0])], +}; + +OFP_SYSCTL_NODE(_net, OFP_PF_INET, inet, OFP_CTLFLAG_RW, 0, + "Internet Family"); + +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_IP, ip, OFP_CTLFLAG_RW, 0, "IP"); +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_ICMP, icmp, OFP_CTLFLAG_RW, 0, "ICMP"); +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_UDP, udp, OFP_CTLFLAG_RW, 0, "UDP"); +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_TCP, tcp, OFP_CTLFLAG_RW, 0, "TCP"); +#ifdef SCTP +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_SCTP, sctp, OFP_CTLFLAG_RW, 0, "SCTP"); +#endif +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_IGMP, igmp, OFP_CTLFLAG_RW, 0, "IGMP"); +#ifdef IPSEC +/* XXX no protocol # to use, pick something "reserved" */ +OFP_SYSCTL_NODE(_net_inet, 253, ipsec, OFP_CTLFLAG_RW, 0, "IPSEC"); +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_AH, ah, OFP_CTLFLAG_RW, 0, "AH"); +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_ESP, esp, OFP_CTLFLAG_RW, 0, "ESP"); +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_IPCOMP, ipcomp, OFP_CTLFLAG_RW, 0, "IPCOMP"); +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_IPIP, ipip, OFP_CTLFLAG_RW, 0, "IPIP"); +#endif /* IPSEC */ +OFP_SYSCTL_NODE(_net_inet, OFP_IPPROTO_RAW, raw, OFP_CTLFLAG_RW, 0, "RAW"); diff --git a/src/ofp_inet.c b/src/ofp_inet.c new file mode 100644 index 00000000..f087d150 --- /dev/null +++ b/src/ofp_inet.c @@ -0,0 +1,26 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include + +#include "ofpi_inet.h" +#include "ofpi_domain.h" +#include "ofpi_protosw.h" +#include "ofpi_ip6protosw.h" + +int ofp_inet_init(void) +{ +#ifdef INET + domain_init(&ofp_inetdomain); +#endif /* INET */ + +#ifdef INET6 + domain_init(&ofp_inet6domain); +#endif /* INET6 */ + + return 0; +} diff --git a/src/ofp_init.c b/src/ofp_init.c new file mode 100644 index 00000000..8205b58e --- /dev/null +++ b/src/ofp_init.c @@ -0,0 +1,282 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/** + * @file + * + * @example + */ + +#include +#include +#include +#include +#include +#include + +#include "ofpi_sysctl.h" +#include "ofpi_util.h" +#include "ofpi_stat.h" +#include "config.h" +#include "ofpi_netlink.h" +#include "ofpi_portconf.h" +#include "ofpi_route.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_arp.h" +#include "ofpi_avl.h" +#include "ofpi_cli.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_timer.h" +#include "ofpi_hook.h" + +#include "ofpi_tcp_var.h" +#include "ofpi_socketvar.h" +#include "ofpi_socket.h" +#include "ofpi_reass.h" +#include "ofpi_inet.h" + +#include "ofpi_log.h" +#include "ofpi_debug.h" + +#define LINUX_THREADS_MAX 4 +#define SHM_PKT_POOL_SIZE (512*2048) +#define SHM_PKT_POOL_BUFFER_SIZE 1896 +#define SHM_PKT_NR_POOL (4080) + +int ofp_init_global(ofp_init_global_t *params) +{ + odp_pool_t pool; + int thr_id = 0; + int i, ret; + odp_queue_param_t qparam; + odp_pool_param_t pool_params; + char q_name[ODP_QUEUE_NAME_LEN]; + odp_cpumask_t cpumask; +#ifdef SP + odph_linux_pthread_t nl_thread; +#endif /* SP */ + + /* Init shared memories */ + ofp_register_sysctls(); + + ofp_portconf_alloc_shared_memory(); + ofp_route_alloc_shared_memory(); + ofp_rt_lookup_alloc_shared_memory(); + ofp_avl_alloc_shared_memory(); + ofp_reassembly_alloc_shared_memory(); + ofp_pcap_alloc_shared_memory(); + ofp_stat_alloc_shared_memory(); + ofp_arp_alloc_shared_memory(); + ofp_timer_init(OFP_TIMER_RESOLUTION_US, + OFP_TIMER_MIN_US, + OFP_TIMER_MAX_US, + OFP_TIMER_TMO_COUNT); + ofp_hook_alloc_shared_memory(¶ms->pkt_hook[0]); + ofp_arp_global_init(); + + ofp_init_ifnet_data(); + + /* Define pkt.seg_len so that l2/l3/l4 offset fits in first segment */ + pool_params.pkt.seg_len = SHM_PKT_POOL_BUFFER_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUFFER_SIZE; + pool_params.pkt.num = SHM_PKT_NR_POOL; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, &pool_params); + + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + exit(EXIT_FAILURE); + } + odp_pool_print(pool); + + /* Socket memory needs pool */ + ofp_socket_alloc_shared_memory(pool); + ofp_inet_init(); + + /* cpu mask for slow path threads */ + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, params->linux_core_id); + + printf("Slow path threads will run on core %d\n", odp_cpumask_first(&cpumask)); + + /* Create interfaces */ + for (i = 0; i < params->if_count; ++i) { + int16_t port = i; + + if (port >= GRE_PORTS) { + OFP_ERR("BUG! Interfaces are depleted\n"); + break; + } + + OFP_DBG("if %s becomes %s%d, port %d\n", params->if_names[i], + OFP_IFNAME_PREFIX, port, port); + struct ofp_ifnet *ifnet = ofp_get_ifnet((uint16_t)port, 0); + + strncpy(ifnet->if_name, params->if_names[i], OFP_IFNAMSIZ); + ifnet->if_name[OFP_IFNAMSIZ-1] = 0; + ifnet->pkt_pool = pool; + + /* Open a packet IO instance for this device */ + ifnet->pktio = odp_pktio_open(ifnet->if_name, ifnet->pkt_pool); + if (ifnet->pktio == ODP_PKTIO_INVALID) { + OFP_ERR("Error: pktio create failed\n"); + abort(); + } + + + /* + * Create and set the default INPUT queue associated with the 'pktio' + * resource + */ + if (params->burst_recv_mode == 0) { + memset(&qparam, 0, sizeof(odp_queue_param_t)); + qparam.sched.prio = ODP_SCHED_PRIO_DEFAULT; + qparam.sched.sync = ODP_SCHED_SYNC_ATOMIC; + qparam.sched.group = ODP_SCHED_GROUP_DEFAULT; + snprintf(q_name, sizeof(q_name), "%" PRIu64 "-pktio_inq_def", + odp_pktio_to_u64(ifnet->pktio)); + q_name[ODP_QUEUE_NAME_LEN - 1] = '\0'; + + ifnet->inq_def = odp_queue_create(q_name, + ODP_QUEUE_TYPE_PKTIN, + &qparam); + if (ifnet->inq_def == ODP_QUEUE_INVALID) { + OFP_ERR(" [%02i] Error: pktio queue creation failed\n", + thr_id); + abort(); + } + + ret = odp_pktio_inq_setdef(ifnet->pktio, ifnet->inq_def); + if (ret != 0) { + OFP_ERR(" [%02i] Error: default input-Q setup\n", + thr_id); + abort(); + } + } + + ifnet->outq_def = odp_pktio_outq_getdef(ifnet->pktio); + if (ifnet->outq_def == ODP_QUEUE_INVALID) { + OFP_ERR(" [%02i] Error: default output-Q setup\n", thr_id); + abort(); + } + + /* Set device outq queue context */ + odp_queue_set_context(ifnet->outq_def, ifnet); + +#ifdef SP + /* Create VIF local input queue */ + memset(&qparam, 0, sizeof(odp_queue_param_t)); + qparam.sched.prio = ODP_SCHED_PRIO_DEFAULT; + qparam.sched.sync = ODP_SCHED_SYNC_ATOMIC; + qparam.sched.group = ODP_SCHED_GROUP_DEFAULT; + snprintf(q_name, sizeof(q_name), "%s_inq_def", ifnet->if_name); + q_name[ODP_QUEUE_NAME_LEN - 1] = '\0'; + + ifnet->spq_def = odp_queue_create(q_name, + ODP_QUEUE_TYPE_POLL, + &qparam); + + if (ifnet->spq_def == ODP_QUEUE_INVALID) { + OFP_ERR("Schedule queue create failed.\n"); + abort(); + } +#endif /*SP*/ + + /* Create loop queue */ + snprintf(q_name, sizeof(q_name), "%s_loopq_def", + ifnet->if_name); + q_name[ODP_QUEUE_NAME_LEN - 1] = '\0'; + + memset(&qparam, 0, sizeof(odp_queue_param_t)); + qparam.sched.prio = ODP_SCHED_PRIO_DEFAULT; + qparam.sched.sync = ODP_SCHED_SYNC_ATOMIC; + qparam.sched.group = ODP_SCHED_GROUP_DEFAULT; + + ifnet->loopq_def = odp_queue_create(q_name, + ODP_QUEUE_TYPE_SCHED, + &qparam); + if (ifnet->loopq_def == ODP_QUEUE_INVALID) { + OFP_ERR("Schedule queue create failed.\n"); + abort(); + } + + /* Set device loopq queue context */ + odp_queue_set_context(ifnet->loopq_def, ifnet); + + /* Set interface MTU*/ + ifnet->if_mtu = odp_pktio_mtu(ifnet->pktio); + OFP_DBG("device %s MTU %d\n", ifnet->if_name, ifnet->if_mtu); + + /* Set interface MAC address */ + if (odp_pktio_mac_addr(ifnet->pktio, ifnet->mac, + sizeof(ifnet->mac)) < 0) { + OFP_ERR("Failed to retrieve MAC address.\n"); + abort(); + } + OFP_DBG("device %s addr %s\n", ifnet->if_name, + ofp_print_mac((uint8_t *)ifnet->mac)); + +#ifdef SP + /* Create the kernel representation of the FP interface. */ + ifnet->fd = sp_setup_device(ifnet); + + /* Maintain table to access ifnet from linux ifindex */ + ofp_update_ifindex_lookup_tab(ifnet); + +#ifdef INET6 + /* ifnet MAC was set in sp_setup_device() */ + ofp_mac_to_link_local(ifnet->mac, ifnet->link_local); +#endif /* INET6 */ + + /* Start VIF slowpath receiver thread */ + odph_linux_pthread_create(ifnet->rx_tbl, + &cpumask, + sp_rx_thread, + ifnet); + + /* Start VIF slowpath transmitter thread */ + odph_linux_pthread_create(ifnet->tx_tbl, + &cpumask, + sp_tx_thread, + ifnet); +#endif /* SP */ + } + + ofp_route_init(); + +#ifdef SP + /* Start Netlink server process */ + odph_linux_pthread_create(&nl_thread, + &cpumask, + START_NL_SERVER, + NULL); +#endif /* SP */ + + return 0; +} + + +int ofp_init_local(void) +{ + /* Lookup shared memories */ + ofp_portconf_lookup_shared_memory(); + ofp_route_lookup_shared_memory(); + ofp_rt_lookup_lookup_shared_memory(); + ofp_avl_lookup_shared_memory(); + ofp_reassembly_lookup_shared_memory(); + ofp_pcap_lookup_shared_memory(); + ofp_stat_lookup_shared_memory(); + ofp_socket_lookup_shared_memory(); + ofp_timer_lookup_shared_memory(); + ofp_hook_lookup_shared_memory(); + ofp_arp_lookup_shared_memory(); + + ofp_arp_local_init(); + + return 0; +} diff --git a/src/ofp_ip6_init.c b/src/ofp_ip6_init.c new file mode 100644 index 00000000..55ff991e --- /dev/null +++ b/src/ofp_ip6_init.c @@ -0,0 +1,116 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + */ + +#include +#include "api/ofp_types.h" +#include "ofpi_in.h" +#include "ofpi_ip6_var.h" +#include "ofpi_protosw.h" +#include "ofpi_ip6protosw.h" +#include "ofpi_log.h" +#include "ofpi_util.h" + +uint8_t ofp_ip6_protox[OFP_IPPROTO_MAX]; + +/* + * IP6 initialization: fill in IP6 protocol switch table. + * All protocols not implemented go to slow path. + */ +void ofp_ip6_init(void) +{ + struct ip6protosw *pr; + int i; + + for (i = 0; i < OFP_IPPROTO_MAX; i++) + ofp_ip6_protox[i] = 0; + + for (pr = (struct ip6protosw *)ofp_inet6domain.dom_protosw; + pr < (struct ip6protosw *)ofp_inet6domain.dom_protoswNPROTOSW; + pr++) { + ofp_ip6_protox[pr->pr_protocol] = pr - + (struct ip6protosw *)ofp_inet6domain.dom_protosw; + } +} + +#ifdef VIMAGE +void ofp_ip6_destroy(void) +{ +} +#endif + +int ofp_ip6_input(odp_packet_t pkt, int *offp, int *nxt) +{ + (void)pkt; + (void)offp; + + *nxt = OFP_IPPROTO_SP; + return OFP_PKT_CONTINUE; +} + +int ofp_ip6_none_input(odp_packet_t pkt, int *offp, int *nxt) +{ + (void)pkt; + (void)offp; + + *nxt = OFP_IPPROTO_DONE; + return OFP_PKT_PROCESSED; +} diff --git a/src/ofp_ip_init.c b/src/ofp_ip_init.c new file mode 100644 index 00000000..42634843 --- /dev/null +++ b/src/ofp_ip_init.c @@ -0,0 +1,78 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + */ + +#include +#include "api/ofp_types.h" +#include "ofpi_in.h" +#include "ofpi_ip_var.h" +#include "ofpi_protosw.h" + + +uint8_t ofp_ip_protox[OFP_IPPROTO_MAX]; +uint8_t ofp_ip_protox_udp; +uint8_t ofp_ip_protox_tcp; +uint8_t ofp_ip_protox_gre; + +/* + * IP initialization: fill in IP protocol switch table. + * All protocols not implemented go to slow path. + */ +void ofp_ip_init(void) +{ + struct protosw *pr; + int i; + + for (i = 0; i < OFP_IPPROTO_MAX; i++) + ofp_ip_protox[i] = 0; + + for (pr = ofp_inetdomain.dom_protosw; + pr < ofp_inetdomain.dom_protoswNPROTOSW; pr++) + if (pr->pr_protocol < OFP_IPPROTO_MAX) + ofp_ip_protox[pr->pr_protocol] = pr - + ofp_inetdomain.dom_protosw; + ofp_ip_protox_udp = ofp_ip_protox[OFP_IPPROTO_UDP]; + ofp_ip_protox_tcp = ofp_ip_protox[OFP_IPPROTO_TCP]; + ofp_ip_protox_gre = ofp_ip_protox[OFP_IPPROTO_GRE]; +} + +#ifdef VIMAGE +void ofp_ip_destroy(void) +{ +} +#endif + +int ofp_ip_input(odp_packet_t pkt, int off) +{ + (void)pkt; + (void)off; + + return OFP_PKT_CONTINUE; +} diff --git a/src/ofp_log.c b/src/ofp_log.c new file mode 100644 index 00000000..3570fcd0 --- /dev/null +++ b/src/ofp_log.c @@ -0,0 +1,10 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ofpi_log.h" + +enum ofp_log_level_s ofp_loglevel = OFP_LOG_DBG; diff --git a/src/ofp_md5c.c b/src/ofp_md5c.c new file mode 100644 index 00000000..a5999fab --- /dev/null +++ b/src/ofp_md5c.c @@ -0,0 +1,326 @@ +/*- + * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm + * + * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All + * rights reserved. + * + * License to copy and use this software is granted provided that it + * is identified as the "RSA Data Security, Inc. MD5 Message-Digest + * Algorithm" in all material mentioning or referencing this software + * or this function. + * + * License is also granted to make and use derivative works provided + * that such works are identified as "derived from the RSA Data + * Security, Inc. MD5 Message-Digest Algorithm" in all material + * mentioning or referencing the derived work. + * + * RSA Data Security, Inc. makes no representations concerning either + * the merchantability of this software or the suitability of this + * software for any particular purpose. It is provided "as is" + * without express or implied warranty of any kind. + * + * These notices must be retained in any copies of any part of this + * documentation and/or software. + * + * This code is the same as the code published by RSA Inc. It has been + * edited for clarity and style only. + */ + +/* + * This file should be kept in sync with src/lib/libmd/md5c.c + */ +#include + +#include "odp.h" +#include "ofpi_md5.h" + +static void MD5Transform(uint32_t [4], const unsigned char [64]); + +#if (ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN) +#define Encode memcpy +#define Decode memcpy +#else + +/* + * Encodes input (u_int32_t) into output (unsigned char). Assumes len is + * a multiple of 4. + */ + +static void +Encode (unsigned char *output, u_int32_t *input, unsigned int len) +{ + unsigned int i; + uint32_t ip; + + for (i = 0; i < len / 4; i++) { + ip = input[i]; + *output++ = ip; + *output++ = ip >> 8; + *output++ = ip >> 16; + *output++ = ip >> 24; + } +} + +/* + * Decodes input (unsigned char) into output (u_int32_t). Assumes len is + * a multiple of 4. + */ + +static void +Decode (u_int32_t *output, const unsigned char *input, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len; i += 4) { + *output++ = input[i] | (input[i+1] << 8) | (input[i+2] << 16) | + (input[i+3] << 24); + } +} +#endif + +static unsigned char PADDING[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* F, G, H and I are basic MD5 functions. */ +#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) +#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | (~z))) + +/* ROTATE_LEFT rotates x left n bits. */ +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) + +/* + * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. + * Rotation is separate from addition to prevent recomputation. + */ +#define FF(a, b, c, d, x, s, ac) { \ + (a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define GG(a, b, c, d, x, s, ac) { \ + (a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define HH(a, b, c, d, x, s, ac) { \ + (a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define II(a, b, c, d, x, s, ac) { \ + (a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } + +/* MD5 initialization. Begins an MD5 operation, writing a new context. */ + +void +ofp_MD5Init (MD5_CTX *context) +{ + + context->count[0] = context->count[1] = 0; + + /* Load magic initialization constants. */ + context->state[0] = 0x67452301; + context->state[1] = 0xefcdab89; + context->state[2] = 0x98badcfe; + context->state[3] = 0x10325476; +} + +/* + * MD5 block update operation. Continues an MD5 message-digest + * operation, processing another message block, and updating the + * context. + */ + +void +ofp_MD5Update (MD5_CTX *context, + const void *in, + unsigned int inputLen) +{ + unsigned int i, index, partLen; + const unsigned char *input = in; + + /* Compute number of bytes mod 64 */ + index = (unsigned int)((context->count[0] >> 3) & 0x3F); + + /* Update number of bits */ + if ((context->count[0] += ((uint32_t)inputLen << 3)) + < ((uint32_t)inputLen << 3)) + context->count[1]++; + context->count[1] += ((uint32_t)inputLen >> 29); + + partLen = 64 - index; + + /* Transform as many times as possible. */ + if (inputLen >= partLen) { + memcpy((void *)&context->buffer[index], (const void *)input, + partLen); + MD5Transform (context->state, context->buffer); + + for (i = partLen; i + 63 < inputLen; i += 64) + MD5Transform (context->state, &input[i]); + + index = 0; + } + else + i = 0; + + /* Buffer remaining input */ + memcpy ((void *)&context->buffer[index], (const void *)&input[i], + inputLen-i); +} + +/* + * MD5 padding. Adds padding followed by original length. + */ + +static void +MD5Pad (MD5_CTX *context) +{ + unsigned char bits[8]; + unsigned int index, padLen; + + /* Save number of bits */ + Encode (bits, context->count, 8); + + /* Pad out to 56 mod 64. */ + index = (unsigned int)((context->count[0] >> 3) & 0x3f); + padLen = (index < 56) ? (56 - index) : (120 - index); + ofp_MD5Update (context, PADDING, padLen); + + /* Append length (before padding) */ + ofp_MD5Update (context, bits, 8); +} + +/* + * MD5 finalization. Ends an MD5 message-digest operation, writing the + * the message digest and zeroizing the context. + */ + +void +ofp_MD5Final (unsigned char digest[16], + MD5_CTX *context) +{ + /* Do padding. */ + MD5Pad (context); + + /* Store state in digest */ + Encode (digest, context->state, 16); + + /* Zeroize sensitive information. */ + memset ((void *)context, 0, sizeof (*context)); +} + +/* MD5 basic transformation. Transforms state based on block. */ + +static void +MD5Transform (uint32_t state[4], + const unsigned char block[64]) +{ + uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16]; + + Decode (x, block, 64); + + /* Round 1 */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ + FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ + FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ + FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ + FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ + FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ + FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ + FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ + FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ + FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ + FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ + FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ + FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ + FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ + FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ + FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ + + /* Round 2 */ +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ + GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ + GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ + GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ + GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ + GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ + GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ + GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ + GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ + GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ + GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ + GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ + GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ + GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ + GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ + GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ + + /* Round 3 */ +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ + HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ + HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ + HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ + HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ + HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ + HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ + HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ + HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ + HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ + HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ + HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ + HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ + HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ + HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ + HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ + + /* Round 4 */ +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ + II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ + II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ + II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ + II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ + II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ + II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ + II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ + II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ + II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ + II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ + II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ + II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ + II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ + II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ + II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ + + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + + /* Zeroize sensitive information. */ + memset ((void *)x, 0, sizeof (x)); +} diff --git a/src/ofp_nd6.c b/src/ofp_nd6.c new file mode 100644 index 00000000..2330379a --- /dev/null +++ b/src/ofp_nd6.c @@ -0,0 +1,177 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + */ + +#include "ofpi.h" +#include "ofpi_ip6.h" +#include "ofpi_icmp6.h" +#include "ofpi_log.h" +#include "ofpi_util.h" +#include "ofpi_protosw.h" +#include "ofpi_route.h" +#include "ofpi_pkt_processing.h" /* send_pkt_out */ + + +void ofp_nd6_ns_input(odp_packet_t m, int off, int icmp6len) +{ + struct ofp_ether_header *eth; + struct ofp_ip6_hdr *ip6; + struct ofp_icmp6_hdr *icmp6; + struct ofp_ifnet *ifp; + + (void)icmp6len; + + ifp = odp_packet_user_ptr(m); + eth = (struct ofp_ether_header *) odp_packet_l2_ptr(m, NULL); + ip6 = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(m, NULL); + icmp6 = (struct ofp_icmp6_hdr *)((uint8_t *)ip6 + off); + + if (icmp6->ofp_icmp6_data8[20] == OFP_ND_OPT_SOURCE_LINKADDR && + !OFP_IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && + !OFP_IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) { + SET_ROUTE6(OFP_ROUTE6_ADD, + &ip6->ip6_src.ofp_s6_addr[0], + 128, + ofp_in6addr_any.ofp_s6_addr, + ifp->port, + ifp->vlan); + + ofp_add_mac6(ifp, + &ip6->ip6_src.ofp_s6_addr[0], + (uint8_t *)ð->ether_shost); + } +} + +enum ofp_return_code ofp_nd6_ns_output(struct ofp_ifnet *dev, + uint8_t *daddr6, uint8_t *taddr6) +{ + size_t size = 0; + size_t iter = 0; + struct ofp_ether_header *e1; + struct ofp_ether_vlan_header *e2; + struct ofp_ip6_hdr *ip6hdr; + struct ofp_icmp6_hdr *icmp; + odp_packet_t pkt; + + if (dev->vlan) + size = sizeof(struct ofp_ether_vlan_header); + else + size = sizeof(struct ofp_ether_header); + + size += sizeof(struct ofp_ip6_hdr) + sizeof(struct ofp_icmp6_hdr) + + 16 /*target addr*/ + 8; /* option*/ + + pkt = odp_packet_alloc(ofp_get_ifnet(dev->port, 0)->pkt_pool, size); + if (pkt == ODP_PACKET_INVALID) + return OFP_PKT_DROP; + + odp_packet_has_eth_set(pkt, 1); + odp_packet_l2_offset_set(pkt, iter); + + if (dev->vlan) { + e2 = (struct ofp_ether_vlan_header *)odp_packet_l2_ptr(pkt, + NULL); + iter += sizeof(*e2); + + memset(e2->evl_dhost, 0xff, OFP_ETHER_ADDR_LEN); + memcpy(e2->evl_shost, dev->mac, OFP_ETHER_ADDR_LEN); + + e2->evl_encap_proto = odp_cpu_to_be_16(OFP_ETHERTYPE_VLAN); + e2->evl_tag = odp_cpu_to_be_16(dev->vlan); + e2->evl_proto = odp_cpu_to_be_16(OFP_ETHERTYPE_IPV6); + } else { + e1 = (struct ofp_ether_header *)odp_packet_l2_ptr(pkt, NULL); + iter += sizeof(*e1); + + memset(e1->ether_dhost, 0xff, OFP_ETHER_ADDR_LEN); + memcpy(e1->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN); + e1->ether_type = odp_cpu_to_be_16(OFP_ETHERTYPE_IPV6); + } + odp_packet_l3_offset_set(pkt, iter); + ip6hdr = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(pkt, NULL); + iter += sizeof(*ip6hdr); + + ip6hdr->ofp_ip6_flow = 0; + ip6hdr->ofp_ip6_vfc = 0x60; + ip6hdr->ofp_ip6_plen = odp_cpu_to_be_16(32); + /*sizeof(*icmp) + sizeof taddr + 8*/ + + /* for checksum calculation */ + ip6hdr->ofp_ip6_nxt = 0; + ip6hdr->ofp_ip6_hlim = OFP_IPPROTO_ICMPV6; + /* XXX should be multicast address*/ + memcpy(ip6hdr->ip6_src.ofp_s6_addr, dev->ip6_addr, 16); + if (ofp_ip6_is_set(daddr6)) + memcpy(ip6hdr->ip6_dst.ofp_s6_addr, daddr6, 16); + else { + /* Solicited-node multicast address */ + ip6hdr->ip6_dst.ofp_s6_addr16[0] = OFP_IPV6_ADDR_INT16_MLL; + ip6hdr->ip6_dst.ofp_s6_addr16[1] = 0; + ip6hdr->ip6_dst.ofp_s6_addr32[1] = 0; + ip6hdr->ip6_dst.ofp_s6_addr32[2] = OFP_IPV6_ADDR_INT32_ONE; + ip6hdr->ip6_dst.ofp_s6_addr32[3] = *((uint32_t *)taddr6 + 3); + ip6hdr->ip6_dst.ofp_s6_addr[12] = 0xff; + } + + odp_packet_l4_offset_set(pkt, iter); + icmp = (struct ofp_icmp6_hdr *)odp_packet_l4_ptr(pkt, NULL); + iter += sizeof(*icmp) + 8 /* option */; + + icmp->icmp6_type = OFP_ND_NEIGHBOR_SOLICIT; + icmp->icmp6_code = 0; + icmp->icmp6_cksum = 0; + icmp->ofp_icmp6_data32[0] = 0; /* Reserved */ + + memcpy(&icmp->ofp_icmp6_data8[4], taddr6, 16); + + /* Option: Source link-layer address */ + icmp->ofp_icmp6_data8[20] = OFP_ND_OPT_SOURCE_LINKADDR; + icmp->ofp_icmp6_data8[21] = 1; /* 8 octets */ + memcpy(&icmp->ofp_icmp6_data8[22], dev->mac, 6); + + icmp->icmp6_cksum = + ofp_in_cksum((uint16_t *)&ip6hdr->ofp_ip6_plen, 68); + + ip6hdr->ofp_ip6_nxt = OFP_IPPROTO_ICMPV6; + ip6hdr->ofp_ip6_hlim = 255; + + if (send_pkt_out(dev, pkt) == OFP_PKT_DROP) { + OFP_ERR("Drop packet\n"); + odp_packet_free(pkt); + return OFP_PKT_DROP; + } + + return OFP_PKT_PROCESSED; +} + +void ofp_nd6_na_input(odp_packet_t m, int off, int icmp6len) +{ + struct ofp_ether_header *eth; + struct ofp_ip6_hdr *ip6; + struct ofp_icmp6_hdr *icmp6; + struct ofp_ifnet *ifp; + + (void)icmp6len; + + ifp = odp_packet_user_ptr(m); + eth = (struct ofp_ether_header *) odp_packet_l2_ptr(m, NULL); + ip6 = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(m, NULL); + icmp6 = (struct ofp_icmp6_hdr *)((uint8_t *)ip6 + off); + + if (icmp6->ofp_icmp6_data8[20] == OFP_ND_OPT_TARGET_LINKADDR) { + SET_ROUTE6(OFP_ROUTE6_ADD, + &icmp6->ofp_icmp6_data8[4], + 128, + ofp_in6addr_any.ofp_s6_addr, + ifp->port, + ifp->vlan); + + ofp_add_mac6(ifp, + &icmp6->ofp_icmp6_data8[4], + (uint8_t *)ð->ether_shost); + } +} diff --git a/src/ofp_netlink.c b/src/ofp_netlink.c new file mode 100644 index 00000000..a1fee5e7 --- /dev/null +++ b/src/ofp_netlink.c @@ -0,0 +1,768 @@ +/* Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "odp.h" +#include "ofpi_avl.h" +#include "ofpi_portconf.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_route.h" +#include "ofpi_log.h" +#include "ofpi_util.h" +#include "ofpi_netlink.h" + +#define BUFFER_SIZE 4096 +static char buffer[BUFFER_SIZE]; + +static int handle_ipv4v6_route(struct nlmsghdr *nlp) +{ + /* string to hold content of the route */ + /* table (i.e. one entry) */ + char *dsts = NULL, *gws = NULL; + char dsts_str[24], gws_str[24], ifs[16], ms[24]; + uint32_t destination = 0 , gateway = 0, ix = 0; + + int dst_len = 0, gw_len = 0; + char *dst6 = NULL, *gw6 = NULL; + struct rtmsg *rtp; + struct rtattr *rtap; + int rtl; + + + /* get route entry header */ + rtp = (struct rtmsg *) NLMSG_DATA(nlp); + + /* TABLE_MAIN is the one that stores + the routes needed for a forwarding router*/ + if (rtp->rtm_table != RT_TABLE_MAIN) + return 0; + + OFP_DBG("* ROUTE rtm_dst_len=%d\n", rtp->rtm_dst_len); + + /* init all the strings */ + bzero(dsts_str, sizeof(dsts_str)); + bzero(gws_str, sizeof(gws_str)); + bzero(ifs, sizeof(ifs)); + bzero(ms, sizeof(ms)); + /* inner loop: loop thru all the attributes of + one route entry */ + rtap = (struct rtattr *) RTM_RTA(rtp); + rtl = RTM_PAYLOAD(nlp); + + for (; RTA_OK(rtap, rtl); rtap = RTA_NEXT(rtap, rtl)) { +#ifdef NETLINK_DEBUG + OFP_DBG(" rta_type=%d data=%p len=%ld\n", + rtap->rta_type, RTA_DATA(rtap), RTA_PAYLOAD(rtap)); + ofp_print_hex(OFP_LOG_DBG, RTA_DATA(rtap), + RTA_PAYLOAD(rtap)); + OFP_LOG_NO_CTX(OFP_LOG_DBG, "\n"); +#endif + switch (rtap->rta_type) { + /* destination IPv4 address */ + case RTA_DST: + dst_len = RTA_PAYLOAD(rtap); + + if (dst_len == 4) { + destination = *((uint32_t *)(RTA_DATA(rtap))); + dsts = ofp_print_ip_addr(destination); + OFP_DBG("Dest: %s\n", dsts); + } else if (dst_len == 16) { + dst6 = RTA_DATA(rtap); + dsts = ofp_print_ip6_addr((uint8_t *)dst6); + OFP_DBG("Dest: %s\n", dsts); + } else + OFP_DBG("\n>>> RTA_DST: len=%d <<<\n\n", + dst_len); + + break; + + /* next hop IPv4 address */ + case RTA_GATEWAY: + gw_len = RTA_PAYLOAD(rtap); + if (gw_len == 4) { + gateway = *((uint32_t *)(RTA_DATA(rtap))); + gws = ofp_print_ip_addr(gateway); + OFP_DBG("Gateway: %s\n", gws); + } else if (gw_len == 16) { + gw6 = RTA_DATA(rtap); + gws = ofp_print_ip6_addr((uint8_t *)gw6); + OFP_DBG("Gateway: %s\n", gws); + } + + break; + /* unique ID associated with the network + interface */ + case RTA_OIF: + ix = *((uint32_t *) RTA_DATA(rtap)); + sprintf(ifs, "%d", *((int *) RTA_DATA(rtap))); + OFP_DBG("Interface: %d\n", ix); + default: + break; + } + } + if (dsts == NULL) + dsts = dsts_str; + if (gws == NULL) + gws = gws_str; + sprintf(ms, "%d", rtp->rtm_dst_len); + OFP_DBG("%s dst=%s/%s gw=%s if=%d dst_len=%d\n", + (nlp->nlmsg_type == RTM_NEWROUTE)?"New":"Del", + dsts, ms, gws, ix, dst_len); + + if (nlp->nlmsg_type == RTM_NEWROUTE) { + struct ofp_ifnet *dev = ofp_get_ifnet_by_linux_ifindex(ix); + + if (dev) { + struct ofp_route_msg msg; + + msg.vrf = 0; + if (dst_len == 4 || dst_len == 16) { + msg.port = dev->port; + msg.vlan = dev->vlan; + if (dst_len == 4) { + msg.type = OFP_ROUTE_ADD; + msg.dst = destination; + msg.masklen = rtp->rtm_dst_len; + msg.gw = gateway; + ofp_set_route(&msg); + } else if (dst6) { + msg.type = OFP_ROUTE6_ADD; + memcpy(msg.dst6, dst6, dst_len); + msg.masklen = rtp->rtm_dst_len; + if (gw6) + memcpy(msg.gw6, gw6, gw_len); + else + memset(msg.gw6, 0, 16); + ofp_set_route(&msg); + } + } else if (dst_len == 0) { + /* default route */ + msg.type = OFP_ROUTE_ADD; + msg.dst = 0; + msg.masklen = 0; + msg.gw = gateway; + msg.port = dev->port; + msg.vlan = dev->vlan; + ofp_set_route(&msg); + } + } else + OFP_DBG("*** CANNOT FIND DEV ix=%d!\n", ix); + } else { + struct ofp_route_msg msg; + + msg.vrf = 0; + if (dst_len == 0) { + /* default route */ + msg.type = OFP_ROUTE_DEL; + msg.dst = 0; + msg.masklen = 0; + } else { + msg.type = dst_len == 4 ? + OFP_ROUTE_DEL : OFP_ROUTE6_DEL; + if (dst6) + memcpy(msg.dst6, dst6, dst_len); + else + msg.dst = destination; + msg.masklen = rtp->rtm_dst_len; + } + ofp_set_route(&msg); + } + return 0; +} +static int add_ipv4v6_addr(struct ifaddrmsg *if_entry, struct ofp_ifnet *dev, + unsigned char *addr, unsigned char *bcast, + unsigned char *laddr) +{ + uint32_t namespace = 0; /* TODO: vrf in netlink */ + + if (if_entry->ifa_family == AF_INET) { + if (dev->port == GRE_PORTS) { + dev->ip_p2p = *((uint32_t *)addr); + dev->ip_addr = *((uint32_t *)laddr); + } else { + dev->ip_addr = *((uint32_t *)addr); + if(dev->vlan == 0) + ofp_ifaddr_elem_add(dev); + } + /* dev->linux_index = if_entry->ifa_index;*/ + dev->vrf = namespace; + dev->masklen = if_entry->ifa_prefixlen; + dev->bcast_addr = bcast ? *(uint32_t *)bcast : 0; + dev->sp_status = OFP_SP_UP; + /* update quick access table */ + ofp_update_ifindex_lookup_tab(dev); + } +#ifdef INET6 + else if (if_entry->ifa_family == AF_INET6) { + + /* dev->linux_index = if_entry->ifa_index;*/ + dev->vrf = namespace; + if (if_entry->ifa_scope == RT_SCOPE_LINK) { + memcpy(dev->link_local, addr, 16); + } else { + memcpy(dev->ip6_addr, addr, 16); + dev->ip6_prefix = if_entry->ifa_prefixlen; + dev->sp_status = OFP_SP_UP; + if(dev->vlan == 0) + ofp_ifaddr6_elem_add(dev); + } + /* update quick access table */ + ofp_update_ifindex_lookup_tab(dev); + } +#endif /* INET6 */ + + return 0; +} +static int del_ipv4v6_addr(struct ifaddrmsg *if_entry, struct ofp_ifnet *dev, + unsigned char *addr, unsigned char *laddr) +{ + (void)addr; + (void)laddr; + + if (if_entry->ifa_family == AF_INET) { + SET_ROUTE(OFP_ROUTE_DEL, dev->vrf, + dev->port == GRE_PORTS ? dev->ip_p2p : dev->ip_addr, + dev->masklen, 0, dev->port, dev->vlan); + dev->ip_addr = 0; + if (dev->port == GRE_PORTS) + dev->ip_p2p = 0; + else if (dev->vlan == 0) + ofp_ifaddr_elem_del(dev); + } +#ifdef INET6 + else if (if_entry->ifa_family == AF_INET6) { + uint8_t gw6[16]; + + memset(gw6, 0, 16); + + SET_ROUTE6(OFP_ROUTE6_DEL, dev->ip6_addr, dev->ip6_prefix, + (uint8_t *)gw6, + dev->port, dev->vlan); + memset(dev->ip6_addr, 0, 16); + + if (dev->vlan == 0) + ofp_ifaddr6_elem_del(dev); + } +#endif /* INET6 */ + return 0; +} +static int handle_ipv4v6_addr(struct nlmsghdr *nlh) +{ + /* msg RTM_NEWADDR / RTM_DELADDR contain an ifaddrmsg structure, + optionally followed by rtattr routing attributes */ + struct ifaddrmsg *if_entry; + char if_address[32] , *if_addr = NULL; + char *name = NULL; + unsigned char *addr = NULL , *bcast = NULL, *laddr = NULL; + struct rtattr *rtap; + int rtl; + struct ofp_ifnet *dev; + + memset(if_address, 0, sizeof(if_address)); + /* Get the addr data */ + if_entry = (struct ifaddrmsg *) NLMSG_DATA(nlh); + +/* note : problem with IFLA_() macros : should be used for RTM_GETLINK, +RTM_NEWLINK messages, which start with ifinfomsg. +The processed msg here RTM_NEWADDR, RTM_DELADDR start with ifaddrmsg +*/ + OFP_DBG("* INTERFACE: ifa_family=%d ifa_prefixlen=%d ifa_flags=0x%x" + " ifa_scope=%d ifa_index=%d\n", + if_entry->ifa_family, if_entry->ifa_prefixlen, + if_entry->ifa_flags, if_entry->ifa_scope, + if_entry->ifa_index); + + rtap = (struct rtattr *) IFA_RTA(if_entry); + rtl = IFA_PAYLOAD(nlh); + for (; RTA_OK(rtap, rtl); rtap = RTA_NEXT(rtap, rtl)) { +#ifdef NETLINK_DEBUG + OFP_DBG(" rta_type=%d data=%p len=%ld\n", rtap->rta_type, + RTA_DATA(rtap), RTA_PAYLOAD(rtap)); + ofp_print_hex(OFP_LOG_DBG, + RTA_DATA(rtap), RTA_PAYLOAD(rtap)); + OFP_LOG_NO_CTX(OFP_LOG_DBG, "\n"); +#endif + switch (rtap->rta_type) { + case IFA_LABEL: + name = RTA_DATA(rtap); + OFP_DBG("Interface name = %s\n", name); + break; + + case IFA_ADDRESS: + addr = RTA_DATA(rtap); + if (if_entry->ifa_family == AF_INET) { + if_addr = ofp_print_ip_addr( + *(uint32_t *)addr); + OFP_DBG("Addr = %s\n", if_addr); + } else if (if_entry->ifa_family == AF_INET6) { + if_addr = ofp_print_ip6_addr(addr); + OFP_DBG("IP6 Addr = %s\n", if_addr); + } + break; + + case IFA_LOCAL: + if (if_entry->ifa_family == AF_INET) { + /* For P2P Interfaces(GRE): + IFA_LOCAL is local address, + IFA_ADDR is destination address */ + laddr = RTA_DATA(rtap); + OFP_DBG("Local addr = %s\n", + ofp_print_ip_addr( + *(uint32_t *)laddr)); + } + break; + + case IFA_BROADCAST: + /* addr = bcast = RTA_DATA(rtap); */ + bcast = RTA_DATA(rtap); + OFP_DBG("Bcast = %s\n", + ofp_print_ip_addr(*(uint32_t *)bcast)); + break; + + default: + break; + } + } + + if (!addr) { + OFP_ERR("netlink_server: Address not received!\n"); + return -1; + } + + dev = ofp_get_ifnet_by_linux_ifindex(if_entry->ifa_index); + if (!dev) { + OFP_ERR("netlink_server: Interface not found!\n"); + return -1; + } + + if (dev->port == GRE_PORTS && if_entry->ifa_family == AF_INET) { + if (!laddr) { + OFP_ERR("Local address not received for GRE IF!"); + return -1; + } + if_addr = ofp_print_ip_addr(*(uint32_t *)laddr); + } + + if (!name) + name = ofp_port_vlan_to_ifnet_name(dev->port, dev->vlan); + + OFP_DBG("netlink_server: %s addr to ifx --> %s OIF %d name %s\n", + nlh->nlmsg_type == RTM_NEWADDR ? "Adding" : "Deleting", + if_addr, if_entry->ifa_index, name); + + if (nlh->nlmsg_type == RTM_DELADDR) + return del_ipv4v6_addr(if_entry, dev, addr, laddr); + else if (nlh->nlmsg_type == RTM_NEWADDR) + return add_ipv4v6_addr(if_entry, dev, addr, bcast, laddr); + + return 0; +} + +static int add_link(struct ifinfomsg *ifinfo_entry, int vlan, int link, + unsigned int mtu, uint32_t tun_loc, uint32_t tun_rem) +{ + struct ofp_ifnet *dev_root = NULL; + struct ofp_ifnet *dev = NULL; + struct ofp_ifnet key; + uint32_t vrf = 0; /* TODO: vrf in netlink */ + + if (vlan != -1) { + if (ifinfo_entry->ifi_type == ARPHRD_IPGRE) { + dev_root = ofp_get_ifnet(GRE_PORTS, 0); + if (ofp_get_ifnet_by_ip(tun_loc, vrf) == NULL) { + OFP_DBG("Tunnel local IP not configured. Interface ignored.\n"); + return -1; + } + } else + dev_root = ofp_get_ifnet_by_linux_ifindex(link); + + if (!dev_root) { + OFP_ERR("netlink_server: root interface " + "not found: %d\n", link); + return -1; + } + + key.vlan = vlan; + if (ofp_vlan_get_by_key( + dev_root->vlan_structs, + &key, + (void **)&dev)) { + + dev = malloc(sizeof(struct ofp_ifnet)); + memset(dev, 0, sizeof(struct ofp_ifnet)); + dev->port = dev_root->port; + dev->vlan = vlan; + dev->vrf = vrf; + memcpy(dev->mac, dev_root->mac, 6); + dev->sp_status = OFP_SP_UP; +#ifdef INET6 + memcpy(dev->link_local, dev_root->link_local, 16); +#endif /* INET6 */ + vlan_ifnet_insert(dev_root->vlan_structs, dev); + } + + /* Update linux index in case dev was created by portconf */ + /* when linux interface index was not available yet (cli) */ + if (!dev->linux_index) { + dev->linux_index = ifinfo_entry->ifi_index; + ofp_update_ifindex_lookup_tab(dev); + } else if (dev->linux_index == 0) { + dev->linux_index = ifinfo_entry->ifi_index; + ofp_update_ifindex_lookup_tab(dev); + } + + if (tun_loc) + dev->ip_local = tun_loc; + if (tun_rem) + dev->ip_remote = tun_rem; + + } else { + dev = ofp_get_ifnet_by_linux_ifindex(ifinfo_entry->ifi_index); + } + + if (mtu && dev != NULL) { + OFP_DBG("Interface updated OIF=%d MTU=%u\n", + ifinfo_entry->ifi_index, mtu); + dev->if_mtu = mtu; + } + + return 0; +} + +static int del_link(struct ifinfomsg *ifinfo_entry, int vlan, int link) +{ + struct ofp_ifnet *dev_root = NULL; + struct ofp_ifnet *dev = NULL; + struct ofp_ifnet key; + + ifinfo_entry = ifinfo_entry; + + if (vlan != -1) { + if (ifinfo_entry->ifi_type == ARPHRD_IPGRE) { + dev_root = ofp_get_ifnet(GRE_PORTS, 0); + } else + dev_root = ofp_get_ifnet_by_linux_ifindex(link); + + if (!dev_root) { + OFP_ERR("netlink_server: root interface " + "not found: %d\n", link); + return -1; + } + + key.vlan = vlan; + if (ofp_vlan_get_by_key(dev_root->vlan_structs, + &key, (void **)&dev)) { + OFP_DBG("netlink_server: vlan not found\n"); + return 0; + } + vlan_ifnet_delete( + dev_root->vlan_structs, + &key, + free_key); + OFP_DBG("Interface deleted port: %d, vlan: %d, OIF=%d\n", + dev_root->port, vlan, ifinfo_entry->ifi_index); + } + + return 0; +} + +static void _parse_ifla_link_info(struct rtattr *rt, int rl, + uint32_t *tun_loc, uint32_t *tun_rem) +{ + struct rtattr *rtap = rt; + int rtl = rl; + + if (RTA_OK(rtap, rtl) && rtap->rta_type == IFLA_INFO_KIND && + strncmp(RTA_DATA(rtap), "gre", sizeof("gre")) == 0) { + OFP_DBG("IFLA_INFO_KIND: %s\n", RTA_DATA(rtap)); + rtap = RTA_NEXT(rtap, rtl); + } else + return; + + if (RTA_OK(rtap, rtl) && rtap->rta_type == IFLA_INFO_DATA) { + OFP_DBG("IFLA_INFO_DATA\n"); + /* next level nest */ + rtl = RTA_PAYLOAD(rtap); + rtap = RTA_DATA(rtap); + } else + return; + + for (; RTA_OK(rtap, rtl); rtap = RTA_NEXT(rtap, rtl)) { +#ifdef NETLINK_DEBUG + OFP_DBG(" rta_type=%d data=%p len=%ld\n", rtap->rta_type, + RTA_DATA(rtap), RTA_PAYLOAD(rtap)); + ofp_print_hex(OFP_LOG_DBG, RTA_DATA(rtap), + RTA_PAYLOAD(rtap)); + OFP_LOG_NO_CTX(OFP_LOG_DBG, "\n"); +#endif + switch (rtap->rta_type) { + case IFLA_GRE_LOCAL: + *tun_loc = *(uint32_t *)RTA_DATA(rtap); + OFP_DBG("GRE tunnel local addr = %s\n", + ofp_print_ip_addr(*tun_loc)); + break; + case IFLA_GRE_REMOTE: + *tun_rem = *(uint32_t *)RTA_DATA(rtap); + OFP_DBG("GRE tunnel remote addr = %s\n", + ofp_print_ip_addr(*tun_rem)); + break; + default: + break; + } + } +} + +static int handle_ifinfo(struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifinfo_entry; + struct rtattr *rtap; + int rtl; + unsigned int mtu = 0; /* to match type in struct rtattr*/ + char *name = NULL; + int link = -1; + int vlan = -1; + uint32_t tun_loc = 0, tun_rem = 0; + char *vlan_txt = NULL; + + ifinfo_entry = (struct ifinfomsg *)NLMSG_DATA(nlh); + + OFP_DBG("* IFINFO: ifi_family=%u ifi_type=%u ifi_index=%d" + " ifi_flags=0x%x ifi_change=%u\n", + ifinfo_entry->ifi_family, ifinfo_entry->ifi_type, + ifinfo_entry->ifi_index, ifinfo_entry->ifi_flags, + ifinfo_entry->ifi_change); + + rtap = (struct rtattr *) IFLA_RTA(ifinfo_entry); + rtl = IFLA_PAYLOAD(nlh); + + for (; RTA_OK(rtap, rtl); rtap = RTA_NEXT(rtap, rtl)) { +#ifdef NETLINK_DEBUG + OFP_DBG(" rta_type=%d data=%p len=%ld\n", rtap->rta_type, + RTA_DATA(rtap), RTA_PAYLOAD(rtap)); + ofp_print_hex(OFP_LOG_DBG, RTA_DATA(rtap), + RTA_PAYLOAD(rtap)); + OFP_LOG_NO_CTX(OFP_LOG_DBG, "\n"); +#endif + switch (rtap->rta_type) { + case IFLA_MTU: + mtu = *(unsigned int *)RTA_DATA(rtap); + OFP_DBG("MTU = %u\n", mtu); + break; + case IFLA_LINK: + link = *(unsigned int *)RTA_DATA(rtap); + OFP_DBG("Link = %d\n", link); + break; + case IFLA_IFNAME: + name = RTA_DATA(rtap); + OFP_DBG("Interface name = %s\n", name); + break; + case IFLA_LINKINFO: + OFP_DBG("IFLA_LINKINFO\n"); + _parse_ifla_link_info(RTA_DATA(rtap), RTA_PAYLOAD(rtap), + &tun_loc, &tun_rem); + default: + break; + } + } + + OFP_DBG("%s received to interface OIF %d\n", + nlh->nlmsg_type == RTM_DELLINK ? "DELLINK" : "NEWLINK", + ifinfo_entry->ifi_index); + + if (ifinfo_entry->ifi_type == ARPHRD_IPGRE) { /* GRE */ + if (!name) { + OFP_ERR("netlink_server: interface name " + "not received: %d\n", + ifinfo_entry->ifi_index); + return -1; + } + if (strncmp(name, OFP_GRE_IFNAME_PREFIX, + strlen(OFP_GRE_IFNAME_PREFIX))) { + OFP_ERR("Invalid GRE interface name: %s\n", name); + return -1; + } + vlan = atoi(name + strlen(OFP_GRE_IFNAME_PREFIX)); + if (vlan == 0) { + OFP_ERR("Invalid tunnel id: %d\n", vlan); + return -1; + } + OFP_DBG("GRE id = %d\n", vlan); + } else if ((link != -1) && (link != ifinfo_entry->ifi_index)) {/*vlan*/ + if (!name) { + OFP_ERR("netlink_server: interface name " + "not received: %d\n", + ifinfo_entry->ifi_index); + return -1; + } + vlan_txt = strrchr(name, '.'); + if (!vlan_txt) { + OFP_ERR("netlink_server: interface vlan ID " + "not found: %d\n", + ifinfo_entry->ifi_index); + return -1; + } + vlan = atoi(vlan_txt + 1); + if (vlan == 0) { + OFP_ERR("Invalid vlan id: %d\n", vlan); + return -1; + } + OFP_DBG("vlan id = %d\n", vlan); + } + + if (nlh->nlmsg_type == RTM_DELLINK) + return del_link(ifinfo_entry, vlan, link); + else if (nlh->nlmsg_type == RTM_NEWLINK) + return add_link(ifinfo_entry, vlan, link, mtu, tun_loc, + tun_rem); + + return 0; +} + +static void route_read(int nll) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *) buffer; + + for ( ; NLMSG_OK(nlh, nll); + nlh = NLMSG_NEXT(nlh, nll)) { + + switch (nlh->nlmsg_type) { + /* ARP now managed by ofp */ + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + break; + + case RTM_DELROUTE: + case RTM_NEWROUTE: + handle_ipv4v6_route(nlh); + break; + + case RTM_NEWADDR: + case RTM_DELADDR: + handle_ipv4v6_addr(nlh); + break; + + case RTM_NEWLINK: + case RTM_DELLINK: + handle_ifinfo(nlh); + break; + + default: + OFP_DBG("Unknown message type, %i!\n", + nlh->nlmsg_type); + break; + } + } +} + +static int route_recv(int route_fd, unsigned int nl_groups) +{ + struct nlmsghdr *nlp; + int rtn, nll; + char *p; + /* initialize the socket read buffer */ + bzero(buffer, BUFFER_SIZE); + p = buffer; + nll = 0; + /* read from the socket until the NLMSG_DONE is + returned in the type of the RTNETLINK message + or if it was a monitoring socket */ + while (1) { + rtn = recv(route_fd, p, BUFFER_SIZE - nll, 0); + if (rtn <= 0) { + OFP_DBG("ROUTE SOCK CLOSED!\n"); + break; + } + nlp = (struct nlmsghdr *) p; + if (nlp->nlmsg_type == NLMSG_DONE) + break; + /* increment the buffer pointer to place + next message */ + p += rtn; + /* TODO: sanity check code in case (p - buffer) > BUFFER_SIZE */ + /* increment the total size by the size of + the last received message */ + nll += rtn; + if ((nl_groups & RTMGRP_IPV4_ROUTE) + == RTMGRP_IPV4_ROUTE) + break; + } + return nll; +} + +void *start_netlink_nl_server(void *arg) +{ + int route_fd = -1; + int r , nll; + struct sockaddr_nl la; + (void)arg; + fd_set read_fd, fds; + struct timeval timeout; + + /* Lookup shared memories */ + ofp_init_local(); + + if ((route_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0) { + OFP_ERR("Cant Create socket\n"); + exit(-1); + } + bzero(&la, sizeof(la)); + la.nl_family = AF_NETLINK; + la.nl_pid = getpid(); + la.nl_groups = RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR | RTMGRP_NOTIFY | +#ifdef INET6 + RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR | +#endif /* INET6 */ + RTMGRP_LINK; + if (bind(route_fd, (struct sockaddr *) &la, sizeof(la)) < 0) { + OFP_ERR("Cant bind to Netlink socket\n"); + exit(-1); + } + + FD_ZERO(&read_fd); + if (route_fd <= 0) { + OFP_ERR("Invalid route FD\n"); + return NULL; + } + FD_SET(route_fd, &read_fd); + + while (1) { + fds = read_fd; + + timeout.tv_sec = 0; + timeout.tv_usec = 10000; + + r = select(FD_SETSIZE, &fds, NULL, NULL, &timeout); + + if (r < 0) + continue; + + if (route_fd > 0 && FD_ISSET(route_fd, &fds)) { + nll = route_recv(route_fd, la.nl_groups); + route_read(nll); + } + + } + + + /* Close socket */ + if (route_fd > 0) + close(route_fd); + route_fd = -1; + OFP_DBG("NL server exiting\n"); + return 0; +} diff --git a/src/ofp_pkt_processing.c b/src/ofp_pkt_processing.c new file mode 100644 index 00000000..d6305960 --- /dev/null +++ b/src/ofp_pkt_processing.c @@ -0,0 +1,1051 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ofpi.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_portconf.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_route.h" +#include "ofpi_util.h" +#include "ofpi_stat.h" +#include "ofpi_debug.h" +#include "ofpi_avl.h" +#include "ofpi_protosw.h" +#include "ofpi_ip6protosw.h" +#include "ofpi_arp.h" +#include "ofpi_hook.h" +#include "ofpi_log.h" +#include "ofpi_reass.h" +#include "api/ofp_init.h" + +/*#define OFP_PERFORMANCE*/ +#define OFP_EVENT_BURST_SIZE 16 + +void *default_event_dispatcher(void *arg) +{ + odp_event_t ev; + odp_packet_t pkt; + odp_queue_t in_queue; + odp_event_t events[OFP_EVENT_BURST_SIZE]; + int event_idx = 0; + int event_cnt = 0; + ofp_pkt_processing_func pkt_func = (ofp_pkt_processing_func)arg; + + ofp_init_local(); + + /* PER CORE DISPATCHER */ + while (1) { + event_cnt = odp_schedule_multi(&in_queue, ODP_SCHED_WAIT, + events, OFP_EVENT_BURST_SIZE); + for (event_idx = 0; event_idx < event_cnt; event_idx++) { + ev = events[event_idx]; + + if (ev == ODP_EVENT_INVALID) + continue; + + if (odp_event_type(ev) == ODP_EVENT_TIMEOUT) { + ofp_timer_handle(ev); + continue; + } + + if (odp_event_type(ev) == ODP_EVENT_PACKET) { + pkt = odp_packet_from_event(ev); + + ofp_packet_input(pkt, in_queue, pkt_func); + continue; + } + + OFP_ERR("Event_dispatcher: " + "Error, unexpected event type: %u\n", + odp_event_type(ev)); + + /* Free events by type */ + if (odp_event_type(ev) == ODP_EVENT_BUFFER) { + odp_buffer_free(odp_buffer_from_event(ev)); + continue; + } + + if (odp_event_type(ev) == ODP_EVENT_CRYPTO_COMPL) { + odp_crypto_compl_free( + odp_crypto_compl_from_event(ev)); + continue; + } + } + } + + /* Never reached */ + return NULL; +} + + +enum ofp_return_code ofp_eth_vlan_processing(odp_packet_t pkt) +{ + uint16_t vlan = 0; + struct ofp_ether_header *eth; + struct ofp_ifnet *ifnet = odp_packet_user_ptr(pkt); + + eth = (struct ofp_ether_header *)odp_packet_l2_ptr(pkt, NULL); + if (odp_unlikely(eth == NULL)) + return OFP_PKT_DROP; + + if (odp_be_to_cpu_16(eth->ether_type) == OFP_ETHERTYPE_VLAN) { + struct ofp_ether_vlan_header *vlan_hdr; + + vlan_hdr = (struct ofp_ether_vlan_header *)eth; + vlan = OFP_EVL_VLANOFTAG(vlan_hdr->evl_tag); + } + + odp_packet_user_ptr_set(pkt, ofp_get_ifnet(ifnet->port, vlan)); + + /* network layer classifier */ + switch (odp_be_to_cpu_16(eth->ether_type)) { + /* STUB: except for ARP, just terminate all traffic to slowpath. + * FIXME: test/implement other cases */ +#ifdef INET + case OFP_ETHERTYPE_IP: + return ofp_ipv4_processing(pkt); +#endif /* INET */ +#ifdef INET6 + case OFP_ETHERTYPE_IPV6: + return ofp_ipv6_processing(pkt); +#endif /* INET6 */ +#if 0 + case OFP_ETHERTYPE_MPLS: + return OFP_PKT_DROP; +#endif + case OFP_ETHERTYPE_ARP: + return ofp_arp_processing(pkt); + default: + return OFP_PKT_CONTINUE; + } +} + + +enum ofp_return_code +ipv4_transport_classifier(odp_packet_t pkt, uint8_t ip_proto) +{ + struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + + return ofp_inetsw[ofp_ip_protox[ip_proto]].pr_input(pkt, + ip->ip_hl << 2); +} + +/* + * input function returns: + * ret == OFP_PKT_CONTINUE && nxt != OFP_IPPROTO_SP - process next header + * ret == OFP_PKT_CONTINUE && nxt == OFP_IPPROTO_SP - go to slow path + * ret != OFP_PKT_CONTINUE - perform default action + */ + +#ifdef INET6 +enum ofp_return_code +ipv6_transport_classifier(odp_packet_t pkt, uint8_t ip6_nxt) +{ + int nxt = ip6_nxt; + int offset = sizeof(struct ofp_ip6_hdr); + enum ofp_return_code ret = OFP_PKT_CONTINUE; + + while (ret == OFP_PKT_CONTINUE && nxt != OFP_IPPROTO_SP) + ret = ofp_inet6sw[ofp_ip6_protox[nxt]].pr_input(pkt, + &offset, &nxt); + + return ret; +} +#endif /*INET6*/ + +enum ofp_return_code ofp_udp4_processing(odp_packet_t pkt) +{ + struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + + if (odp_unlikely(ofp_in_cksum((uint16_t *) ip, ip->ip_hl<<2))) + return OFP_PKT_DROP; + + if (odp_be_to_cpu_16(ip->ip_off) & 0x3fff) { + OFP_UPDATE_PACKET_STAT(rx_ip_frag, 1); + + pkt = ofp_ip_reass(pkt); + if (pkt == ODP_PACKET_INVALID) + return OFP_PKT_ON_HOLD; + + OFP_UPDATE_PACKET_STAT(rx_ip_reass, 1); + + ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + } + + return ofp_inetsw[ofp_ip_protox_udp].pr_input(pkt, ip->ip_hl << 2); +} + +enum ofp_return_code ofp_tcp4_processing(odp_packet_t pkt) +{ + struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + + if (odp_unlikely(ofp_in_cksum((uint16_t *) ip, ip->ip_hl<<2))) + return OFP_PKT_DROP; + + if (odp_be_to_cpu_16(ip->ip_off) & 0x3fff) { + OFP_UPDATE_PACKET_STAT(rx_ip_frag, 1); + + pkt = ofp_ip_reass(pkt); + if (pkt == ODP_PACKET_INVALID) + return OFP_PKT_ON_HOLD; + + OFP_UPDATE_PACKET_STAT(rx_ip_reass, 1); + + ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + } + + return ofp_inetsw[ofp_ip_protox_tcp].pr_input(pkt, ip->ip_hl << 2); +} + +enum ofp_return_code ofp_ipv4_processing(odp_packet_t pkt) +{ + int res; + int protocol = IS_IPV4; + uint32_t flags; + struct ofp_ip *ip; + struct ofp_nh_entry *nh; + struct ofp_ifnet *dev = odp_packet_user_ptr(pkt); + + ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + + if (odp_unlikely(ip == NULL)) + return OFP_PKT_DROP; + +#ifndef OFP_PERFORMANCE + if (odp_unlikely(ip->ip_v != 4)) + return OFP_PKT_DROP; + if (odp_unlikely(ofp_in_cksum((uint16_t *) ip, ip->ip_hl<<2))) + return OFP_PKT_DROP; + + /* TODO: handle broadcast */ + if (dev->bcast_addr == ip->ip_dst.s_addr) + return OFP_PKT_DROP; +#endif + + if (dev->ip_addr == ip->ip_dst.s_addr /*|| app_is_ip_local?*/) { + if (odp_be_to_cpu_16(ip->ip_off) & 0x3fff) { + + OFP_UPDATE_PACKET_STAT(rx_ip_frag, 1); + + pkt = ofp_ip_reass(pkt); + if (pkt == ODP_PACKET_INVALID) + return OFP_PKT_ON_HOLD; + + OFP_UPDATE_PACKET_STAT(rx_ip_reass, 1); + + ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + } + + OFP_HOOK(OFP_HOOK_LOCAL, pkt, &protocol, &res); + if (res != OFP_PKT_CONTINUE) + return res; + + return ipv4_transport_classifier(pkt, ip->ip_p); + } + + + OFP_HOOK(OFP_HOOK_FWD_IPv4, pkt, NULL, &res); + if (res != OFP_PKT_CONTINUE) + return res; + + nh = ofp_get_next_hop(dev->vrf, ip->ip_dst.s_addr, &flags); + if (nh == NULL) + return OFP_PKT_CONTINUE; + + if (ip->ip_ttl <= 1) { + ofp_icmp_error(pkt, OFP_ICMP_TIMXCEED, + OFP_ICMP_TIMXCEED_INTRANS, 0, 0); + return OFP_PKT_DROP; + } + + ip->ip_ttl--; + + if (ip->ip_p == OFP_IPPROTO_ICMP) + ofp_icmp_error(pkt, OFP_ICMP_REDIRECT, + OFP_ICMP_REDIRECT_HOST, nh->gw, 0); + + return ofp_ip_output(pkt, nh); +} + +#ifdef INET6 +enum ofp_return_code ofp_ipv6_processing(odp_packet_t pkt) +{ + int res; + int protocol = IS_IPV6; + uint32_t flags; + struct ofp_ip6_hdr *ipv6; + struct ofp_nh6_entry *nh; + struct ofp_ifnet *dev = odp_packet_user_ptr(pkt); + + ipv6 = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(pkt, NULL); + + if (odp_unlikely(ipv6 == NULL)) + return OFP_PKT_DROP; + + /* is ipv6->dst_addr one of my IPv6 addresses from this interface*/ + if (ofp_ip6_equal(dev->ip6_addr, ipv6->ip6_dst.ofp_s6_addr) || + OFP_IN6_IS_SOLICITED_NODE_MC(ipv6->ip6_dst, dev->ip6_addr) || + (memcmp((const void *)((uintptr_t)dev->link_local + 8), + (const void *)((uintptr_t)ipv6->ip6_dst.ofp_s6_addr + 8), + 2 * sizeof(uint32_t)) == 0)) { + + OFP_HOOK(OFP_HOOK_LOCAL, pkt, &protocol, &res); + if (res != OFP_PKT_CONTINUE) + return res; + + return ipv6_transport_classifier(pkt, ipv6->ofp_ip6_nxt); + + } + + OFP_HOOK(OFP_HOOK_FWD_IPv6, pkt, NULL, &res); + if (res != OFP_PKT_CONTINUE) + return res; + + nh = ofp_get_next_hop6(dev->vrf, ipv6->ip6_dst.ofp_s6_addr, &flags); + if (nh == NULL) + return OFP_PKT_CONTINUE; + + return ofp_ip6_output(pkt, nh); +} +#endif /* INET6 */ + +enum ofp_return_code ofp_gre_processing(odp_packet_t pkt) +{ + struct ofp_ip *ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + + if (odp_unlikely(ofp_in_cksum((uint16_t *) ip, ip->ip_hl<<2))) + return OFP_PKT_DROP; + + if (odp_be_to_cpu_16(ip->ip_off) & 0x3fff) { + OFP_UPDATE_PACKET_STAT(rx_ip_frag, 1); + + pkt = ofp_ip_reass(pkt); + if (pkt == ODP_PACKET_INVALID) + return OFP_PKT_ON_HOLD; + + OFP_UPDATE_PACKET_STAT(rx_ip_reass, 1); + + ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + } + + return ofp_inetsw[ofp_ip_protox_gre].pr_input(pkt, ip->ip_hl << 2); +} + +enum ofp_return_code send_pkt_out(struct ofp_ifnet *dev, + odp_packet_t pkt) +{ + if (odp_queue_enq(ofp_get_ifnet(dev->port, 0)->outq_def, + odp_packet_to_event(pkt))) + return OFP_PKT_DROP; + + OFP_DEBUG_PACKET(OFP_DEBUG_PKT_SEND_NIC, pkt, dev->port); + + OFP_UPDATE_PACKET_STAT(tx_fp, 1); + + return OFP_PKT_PROCESSED; +} + +enum ofp_return_code send_pkt_loop(struct ofp_ifnet *dev, + odp_packet_t pkt) +{ + if (odp_queue_enq(ofp_get_ifnet(dev->port, 0)->loopq_def, + odp_packet_to_event(pkt))) + return OFP_PKT_DROP; + return OFP_PKT_PROCESSED; +} + +enum ofp_return_code ofp_arp_processing(odp_packet_t pkt) +{ + struct ofp_arphdr *arp; + struct ofp_ifnet *dev = odp_packet_user_ptr(pkt); + uint16_t vlan = dev->vlan; + + arp = (struct ofp_arphdr *)odp_packet_l3_ptr(pkt, NULL); + + /* save the received arp info */ + if (odp_be_to_cpu_16(arp->op) == OFP_ARPOP_REPLY) + ofp_add_mac(dev, arp->ip_src, arp->eth_src); + + /* on our interface an ARP request */ + if ((dev->ip_addr) && dev->ip_addr == (ofp_in_addr_t)(arp->ip_dst) && + odp_be_to_cpu_16(arp->op) == OFP_ARPOP_REQUEST) { + struct ofp_arphdr tmp; + struct ofp_ether_header tmp_eth; + struct ofp_ether_vlan_header tmp_eth_vlan; + void *l2_addr = odp_packet_l2_ptr(pkt, NULL); + struct ofp_ether_header *eth = + (struct ofp_ether_header *)l2_addr; + struct ofp_ether_vlan_header *eth_vlan = + (struct ofp_ether_vlan_header *)l2_addr; + + if (vlan) + tmp_eth_vlan = *eth_vlan; + else + tmp_eth = *eth; + + OFP_DBG("Reply to ARPOP_REQ from ip %s" +#ifdef SP + "on IF %d" +#endif + " mac %s ip %s\n", + ofp_print_ip_addr(arp->ip_src), +#ifdef SP + dev->linux_index, +#endif + ofp_print_mac(dev->mac), + ofp_print_ip_addr(arp->ip_dst)); + tmp = *arp; + tmp.ip_dst = arp->ip_src; + tmp.ip_src = arp->ip_dst; + memcpy(&tmp.eth_dst, &arp->eth_src, OFP_ETHER_ADDR_LEN); + memcpy(&tmp.eth_src, dev->mac, OFP_ETHER_ADDR_LEN); + tmp.op = odp_cpu_to_be_16(OFP_ARPOP_REPLY); + *arp = tmp; + + if (vlan) { + memcpy(tmp_eth_vlan.evl_dhost, &arp->eth_dst, + OFP_ETHER_ADDR_LEN); + memcpy(tmp_eth_vlan.evl_shost, &arp->eth_src, + OFP_ETHER_ADDR_LEN); + *eth_vlan = tmp_eth_vlan; + } else { + memcpy(tmp_eth.ether_dhost, &arp->eth_dst, + OFP_ETHER_ADDR_LEN); + memcpy(tmp_eth.ether_shost, &arp->eth_src, + OFP_ETHER_ADDR_LEN); + *eth = tmp_eth; + } + + return send_pkt_out(dev, pkt); + } + return OFP_PKT_CONTINUE; +} + +static void send_arp_request(struct ofp_ifnet *dev, uint32_t gw) +{ + char buf[sizeof(struct ofp_ether_vlan_header) + + sizeof(struct ofp_arphdr)]; + struct ofp_arphdr *arp; + struct ofp_ether_header *e1 = (struct ofp_ether_header *)buf; + struct ofp_ether_vlan_header *e2 = + (struct ofp_ether_vlan_header *)buf; + size_t size; + odp_packet_t pkt; + + memset(buf, 0, sizeof(buf)); + memset(e1->ether_dhost, 0xff, OFP_ETHER_ADDR_LEN); + memcpy(e1->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN); + + if (dev->vlan) { + arp = (struct ofp_arphdr *) (e2 + 1); + e2->evl_encap_proto = odp_cpu_to_be_16(OFP_ETHERTYPE_VLAN); + e2->evl_tag = odp_cpu_to_be_16(dev->vlan); + e2->evl_proto = odp_cpu_to_be_16(OFP_ETHERTYPE_ARP); + size = sizeof(*arp) + sizeof(*e2); + } else { + arp = (struct ofp_arphdr *) (e1 + 1); + e1->ether_type = odp_cpu_to_be_16(OFP_ETHERTYPE_ARP); + size = sizeof(*arp) + sizeof(*e1); + } + + arp->hrd = odp_cpu_to_be_16(OFP_ARPHDR_ETHER); + arp->pro = odp_cpu_to_be_16(OFP_ETHERTYPE_IP); + arp->hln = OFP_ETHER_ADDR_LEN; + arp->pln = sizeof(struct ofp_in_addr); + arp->op = odp_cpu_to_be_16(OFP_ARPOP_REQUEST); + memcpy(arp->eth_src, e1->ether_shost, OFP_ETHER_ADDR_LEN); + arp->ip_src = dev->ip_addr; + memcpy(arp->eth_dst, e1->ether_dhost, OFP_ETHER_ADDR_LEN); + arp->ip_dst = gw; + + pkt = odp_packet_alloc(ofp_get_ifnet(dev->port, 0)->pkt_pool, size); + + if (pkt == ODP_PACKET_INVALID) { + OFP_ERR("Packet alloc failed\n"); + return; + } + + memcpy(odp_packet_data(pkt), buf, size); + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_arp_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, size - sizeof(*arp)); + + if (send_pkt_out(dev, pkt) == OFP_PKT_DROP) + odp_packet_free(pkt); +} + +static enum ofp_return_code ofp_fragment_pkt(odp_packet_t pkt, + struct ofp_ifnet *dev_out, + uint8_t is_local_address) +{ + struct ofp_ip *ip, *ip_new; + uint16_t vlan = dev_out->vlan; + int tot_len, pl_len, seg_len, pl_pos, flen, hwlen; + uint16_t frag, frag_new; + uint8_t *payload_new; + uint32_t payload_offset; + odp_pool_t pkt_pool; + odp_packet_t pkt_new; + struct ofp_ether_header *eth, *eth_new; + struct ofp_ether_vlan_header *eth_vlan, *eth_new_vlan; + int ret = OFP_PKT_PROCESSED; + + if (!vlan) + eth = odp_packet_l2_ptr(pkt, NULL); + else + eth_vlan = odp_packet_l2_ptr(pkt, NULL); + + ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + + pkt_pool = ofp_get_ifnet(dev_out->port, 0)->pkt_pool; + tot_len = odp_be_to_cpu_16(ip->ip_len); + pl_len = tot_len - (ip->ip_hl<<2); + seg_len = (dev_out->if_mtu - sizeof(struct ofp_ip)) & 0xfff8; + pl_pos = 0; + frag = odp_be_to_cpu_16(ip->ip_off); + payload_offset = odp_packet_l3_offset(pkt) + (ip->ip_hl<<2); + + OFP_UPDATE_PACKET_STAT(tx_eth_frag, 1); + + while (pl_pos < pl_len) { + flen = (pl_len - pl_pos) > seg_len ? + seg_len : (pl_len - pl_pos); + hwlen = flen + sizeof(struct ofp_ip) + + (vlan ? sizeof(struct ofp_ether_vlan_header) : + sizeof(struct ofp_ether_header)); + + pkt_new = odp_packet_alloc(pkt_pool, hwlen); + if (pkt_new == ODP_PACKET_INVALID) { + OFP_ERR("Packet alloc failed\n"); + return OFP_PKT_DROP; + } + odp_packet_user_ptr_set(pkt_new, odp_packet_user_ptr(pkt)); + + odp_packet_l2_offset_set(pkt_new, 0); + if (vlan) { + eth_new_vlan = odp_packet_l2_ptr(pkt_new, NULL); + *eth_new_vlan = *eth_vlan; + ip_new = (struct ofp_ip *)(eth_new_vlan + 1); + odp_packet_l3_offset_set(pkt_new, + OFP_ETHER_HDR_LEN + + OFP_ETHER_VLAN_ENCAP_LEN); + } else { + eth_new = odp_packet_l2_ptr(pkt_new, NULL); + *eth_new = *eth; + ip_new = (struct ofp_ip *)(eth_new + 1); + odp_packet_l3_offset_set(pkt_new, + OFP_ETHER_HDR_LEN); + } + + *ip_new = *ip; + + payload_new = (uint8_t *)(ip_new + 1); + + if (odp_packet_copydata_out(pkt, payload_offset + pl_pos, + flen, payload_new) < 0) { + OFP_ERR("Packet data copy failed\n"); + return OFP_PKT_DROP; + }; + + ip_new->ip_len = odp_cpu_to_be_16(flen + sizeof(*ip_new)); + + frag_new = frag + pl_pos/8; + pl_pos += flen; + if (pl_pos < pl_len) + frag_new |= OFP_IP_MF; + ip_new->ip_off = odp_cpu_to_be_16(frag_new); + + ip_new->ip_sum = 0; + ip_new->ip_sum = ofp_in_cksum((uint16_t *)ip_new, + sizeof(*ip_new)); + + if (is_local_address) + ret = send_pkt_loop(dev_out, pkt_new); + else + ret = send_pkt_out(dev_out, pkt_new); + + if (ret == OFP_PKT_DROP) { + odp_packet_free(pkt_new); + return OFP_PKT_DROP; + } + } + + odp_packet_free(pkt); + return OFP_PKT_PROCESSED; +} + +static enum ofp_return_code ofp_output_ipv4_to_gre( + odp_packet_t pkt, struct ofp_ifnet *dev_gre, + uint16_t vrfid, struct ofp_nh_entry **nh_new) +{ + struct ofp_ip *ip; + struct ofp_greip *greip; + uint32_t flags; + uint8_t l2_size = 0; + int32_t offset; + + *nh_new = ofp_get_next_hop(vrfid, dev_gre->ip_remote, &flags); + + if (*nh_new == NULL) + return OFP_PKT_DROP; + + ip = odp_packet_l3_ptr(pkt, NULL); + + /* Remove eth header, prepend gre + ip */ + if (odp_packet_has_l2(pkt)) + l2_size = odp_packet_l3_offset(pkt) - odp_packet_l2_offset(pkt); + + offset = sizeof(*greip) - l2_size; + if (offset >= 0) + greip = odp_packet_push_head(pkt, offset); + else + greip = odp_packet_pull_head(pkt, -offset); + + odp_packet_has_l2_set(pkt, 0); + odp_packet_l3_offset_set(pkt, 0); + + if (!greip) + return OFP_PKT_DROP; + + greip->gi_flags = 0; + greip->gi_ptype = odp_cpu_to_be_16(OFP_GREPROTO_IP); + + greip->gi_i.ip_hl = 5; + greip->gi_i.ip_v = 4; + greip->gi_i.ip_tos = ip->ip_tos; + greip->gi_i.ip_len = + odp_cpu_to_be_16(odp_be_to_cpu_16(ip->ip_len) + + sizeof(*greip)); + greip->gi_i.ip_id = ip->ip_id; + greip->gi_i.ip_off = 0; + greip->gi_i.ip_ttl = ip->ip_ttl; + greip->gi_i.ip_p = OFP_IPPROTO_GRE; + greip->gi_i.ip_sum = 0; + greip->gi_i.ip_src.s_addr = dev_gre->ip_local; + greip->gi_i.ip_dst.s_addr = dev_gre->ip_remote; + + return OFP_PKT_CONTINUE; +} + +enum ofp_return_code ofp_ip_output(odp_packet_t pkt, + struct ofp_nh_entry *nh_param) +{ + struct ofp_ip *ip; + uint8_t l2_size = 0; + void *l2_addr; + uint32_t flags; + struct ofp_nh_entry *nh, *nh_new = NULL; + uint32_t gw; + uint16_t vlan; + int out_port; + struct ofp_ifnet *send_ctx = odp_packet_user_ptr(pkt); + struct ofp_ifnet *dev_out = NULL; + uint16_t vrf = send_ctx ? send_ctx->vrf : 0; + uint8_t is_local_address = 0; + + if (odp_packet_l3_offset(pkt) == ODP_PACKET_OFFSET_INVALID) + odp_packet_l3_offset_set(pkt, 0); + ip = (struct ofp_ip *) odp_packet_l3_ptr(pkt, NULL); + if (odp_unlikely(ip == NULL)) + return OFP_PKT_DROP; + + if (ip->ip_p == OFP_IPPROTO_TCP) { + /* Checksum calculation is done here. We don't know if + the hardware does this or is it our job. Either way, + there is only one place to modify. */ + struct ofp_tcphdr *th = (struct ofp_tcphdr *) + ((uint8_t *)ip + (ip->ip_hl<<2)); + th->th_sum = 0; + th->th_sum = ofp_in4_cksum(pkt); + } + + if (nh_param) { + nh = nh_param; + } else { + nh = ofp_get_next_hop(vrf, ip->ip_dst.s_addr, &flags); + if (!nh) + return OFP_PKT_DROP; + } + + gw = nh->gw; + vlan = nh->vlan; + out_port = nh->port; + + dev_out = ofp_get_ifnet(out_port, vlan); + + if (!dev_out) + return OFP_PKT_DROP; + + /* GRE */ + if (out_port == GRE_PORTS) { + if (ofp_output_ipv4_to_gre(pkt, dev_out, vrf, + &nh_new) == OFP_PKT_DROP) + return OFP_PKT_DROP; + + nh = nh_new; + gw = nh->gw; + vlan = nh->vlan; + out_port = nh->port; + ip = odp_packet_l3_ptr(pkt, NULL); + + dev_out = ofp_get_ifnet(out_port, vlan); + if (!dev_out) + return OFP_PKT_DROP; + } + + if (!gw) /* link local */ + gw = ip->ip_dst.s_addr; + + if (!vlan) + l2_size = sizeof(struct ofp_ether_header); + else + l2_size = sizeof(struct ofp_ether_vlan_header); + + if (odp_packet_l2_offset(pkt) + l2_size == odp_packet_l3_offset(pkt)) { + l2_addr = odp_packet_l2_ptr(pkt, NULL); + } else if (odp_packet_l3_offset(pkt) >= l2_size) { + odp_packet_l2_offset_set(pkt, + odp_packet_l3_offset(pkt) - l2_size); + l2_addr = odp_packet_l2_ptr(pkt, NULL); + } else { + l2_addr = odp_packet_push_head(pkt, + l2_size - odp_packet_l3_offset(pkt)); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, l2_size); + odp_packet_l4_offset_set(pkt, l2_size + (ip->ip_hl<<2)); + } + + if (odp_unlikely(l2_addr == NULL)) + return OFP_PKT_DROP; + + if (!vlan) { + struct ofp_ether_header *eth = + (struct ofp_ether_header *)l2_addr; + + if (dev_out->ip_addr == ip->ip_dst.s_addr) { + is_local_address = 1; + ofp_copy_mac(eth->ether_dhost, &(dev_out->mac[0])); + } else if (ofp_get_mac(dev_out, gw, eth->ether_dhost) < 0) { + send_arp_request(dev_out, gw); + return ofp_arp_save_ipv4_pkt(pkt, nh, gw, dev_out); + } + + ofp_copy_mac(eth->ether_shost, dev_out->mac); + eth->ether_type = odp_cpu_to_be_16(OFP_ETHERTYPE_IP); + } else { + struct ofp_ether_vlan_header *eth_vlan = + (struct ofp_ether_vlan_header *)l2_addr; + + if (dev_out->ip_addr == ip->ip_dst.s_addr) { + is_local_address = 1; + ofp_copy_mac(eth_vlan->evl_dhost, dev_out->mac); + } else if (ofp_get_mac(dev_out, + gw, eth_vlan->evl_dhost) < 0) { + send_arp_request(dev_out, gw); + return ofp_arp_save_ipv4_pkt(pkt, nh, gw, dev_out); + } + + ofp_copy_mac(eth_vlan->evl_shost, dev_out->mac); + eth_vlan->evl_encap_proto = odp_cpu_to_be_16( + OFP_ETHERTYPE_VLAN); + eth_vlan->evl_tag = odp_cpu_to_be_16(vlan); + eth_vlan->evl_proto = odp_cpu_to_be_16(OFP_ETHERTYPE_IP); + } + + /* Fragmentation */ + if (odp_be_to_cpu_16(ip->ip_len) > dev_out->if_mtu) { + if (odp_be_to_cpu_16(ip->ip_off) & OFP_IP_DF) { + ofp_icmp_error(pkt, OFP_ICMP_UNREACH, + OFP_ICMP_UNREACH_NEEDFRAG, + 0, dev_out->if_mtu); + return OFP_PKT_DROP; + } + return ofp_fragment_pkt(pkt, dev_out, is_local_address); + } + +#ifndef OFP_PERFORMANCE + ip->ip_sum = 0; + ip->ip_sum = ofp_in_cksum((uint16_t *)ip, ip->ip_hl<<2); +#endif + + if (is_local_address) + return send_pkt_loop(dev_out, pkt); + else + return send_pkt_out(dev_out, pkt); +} + +#ifdef INET6 +static enum ofp_return_code ofp_output_ipv6_to_gre( + odp_packet_t pkt, struct ofp_ifnet *dev_gre, + uint16_t vrfid, struct ofp_nh_entry **nh_new) +{ + struct ofp_ip6_hdr *ip6; + struct ofp_greip *greip; + uint32_t flags; + uint8_t l2_size = 0; + int32_t offset; + static uint16_t id = 0; + + *nh_new = ofp_get_next_hop(vrfid, dev_gre->ip_remote, &flags); + + if (*nh_new == NULL) + return OFP_PKT_DROP; + + ip6 = odp_packet_l3_ptr(pkt, NULL); + + /* Remove eth header, prepend gre + ip */ + if (odp_packet_has_l2(pkt)) + l2_size = odp_packet_l3_offset(pkt) - odp_packet_l2_offset(pkt); + + offset = sizeof(*greip) - l2_size; + if (offset >= 0) + greip = odp_packet_push_head(pkt, offset); + else + greip = odp_packet_pull_head(pkt, -offset); + + odp_packet_has_l2_set(pkt, 0); + odp_packet_l3_offset_set(pkt, 0); + + if (!greip) + return OFP_PKT_DROP; + + greip->gi_flags = 0; + greip->gi_ptype = odp_cpu_to_be_16(OFP_ETHERTYPE_IPV6); + + greip->gi_i.ip_hl = 5; + greip->gi_i.ip_v = 4; + greip->gi_i.ip_tos = 0; + greip->gi_i.ip_len = odp_cpu_to_be_16( + odp_be_to_cpu_16(ip6->ofp_ip6_plen) + + sizeof(*ip6) + sizeof(*greip)); + greip->gi_i.ip_id = odp_cpu_to_be_16(id++); + greip->gi_i.ip_off = 0; + greip->gi_i.ip_ttl = ip6->ofp_ip6_hlim; + greip->gi_i.ip_p = OFP_IPPROTO_GRE; + greip->gi_i.ip_sum = 0; + greip->gi_i.ip_src.s_addr = dev_gre->ip_local; + greip->gi_i.ip_dst.s_addr = dev_gre->ip_remote; + + odp_packet_has_ipv6_set(pkt, 0); + odp_packet_has_ipv4_set(pkt, 1); + + return OFP_PKT_CONTINUE; +} + +enum ofp_return_code ofp_ip6_output(odp_packet_t pkt, + struct ofp_nh6_entry *nh_param) +{ + struct ofp_ip6_hdr *ip6; + uint8_t l2_size; + void *l2_addr; + uint32_t flags; + struct ofp_nh_entry *nh4 = NULL; + struct ofp_nh6_entry *nh; + uint16_t vlan; + int out_port; + struct ofp_ifnet *send_ctx = odp_packet_user_ptr(pkt); + struct ofp_ifnet *dev_out = NULL; + int vrf = send_ctx ? send_ctx->vrf : 0; + uint8_t is_local_address = 0; + uint8_t *mac = NULL; + + if (odp_packet_l3_offset(pkt) == ODP_PACKET_OFFSET_INVALID) + odp_packet_l3_offset_set(pkt, 0); + ip6 = (struct ofp_ip6_hdr *) odp_packet_l3_ptr(pkt, NULL); + if (odp_unlikely(ip6 == NULL)) + return OFP_PKT_DROP; + + if (nh_param) { + nh = nh_param; + vlan = nh->vlan; + out_port = nh->port; + } else { + nh = ofp_get_next_hop6(vrf, + ip6->ip6_dst.ofp_s6_addr, &flags); + if (nh) { + vlan = nh->vlan; + out_port = nh->port; + } else + return OFP_PKT_DROP; + } + + dev_out = ofp_get_ifnet(out_port, vlan); + + if (!dev_out) + return OFP_PKT_DROP; + + /* GRE */ + if (out_port == GRE_PORTS) { + if (ofp_output_ipv6_to_gre(pkt, dev_out, vrf, + &nh4) == OFP_PKT_DROP) + return OFP_PKT_DROP; + + return ofp_ip_output(pkt, nh4); + } + + if (!vlan) + l2_size = sizeof(struct ofp_ether_header); + else + l2_size = sizeof(struct ofp_ether_vlan_header); + + if (odp_packet_l3_offset(pkt) >= l2_size) { + odp_packet_l2_offset_set(pkt, + odp_packet_l3_offset(pkt) - l2_size); + l2_addr = odp_packet_l2_ptr(pkt, NULL); + } else { + int hlen = 0; + + if (odp_packet_l4_offset(pkt) != ODP_PACKET_OFFSET_INVALID) + hlen = odp_packet_l4_offset(pkt) - + odp_packet_l3_offset(pkt); + + l2_addr = odp_packet_push_head(pkt, + l2_size - odp_packet_l3_offset(pkt)); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, l2_size); + odp_packet_l4_offset_set(pkt, l2_size + hlen); + } + + if (odp_unlikely(l2_addr == NULL)) + return OFP_PKT_DROP; + + /* MAC address for the destination */ + if (ofp_ip6_equal(dev_out->ip6_addr, ip6->ip6_dst.ofp_s6_addr)) { + is_local_address = 1; + mac = dev_out->mac; + } else { + mac = nh->mac; + + if (!(((uint32_t *)mac)[0] || mac[4] || mac[5])) { + SET_ROUTE6(OFP_ROUTE6_ADD, + &ip6->ip6_dst.ofp_s6_addr[0], + 128, + ofp_in6addr_any.ofp_s6_addr, + dev_out->port, + dev_out->vlan); + + if (ofp_nd6_ns_output(dev_out, nh->gw, + ip6->ip6_dst.ofp_s6_addr) == OFP_PKT_DROP) { + OFP_ERR("MAC not set: gw = %x %x\n", nh->gw[0], + nh->gw[15]); + odp_packet_free(pkt); + return OFP_PKT_DROP; + } + return ofp_route_save_ipv6_pkt(pkt, + &ip6->ip6_dst.ofp_s6_addr[0], + dev_out); + } + } + + if (!vlan) { + struct ofp_ether_header *eth = + (struct ofp_ether_header *)l2_addr; + + memcpy(eth->ether_dhost, mac, OFP_ETHER_ADDR_LEN); + memcpy(eth->ether_shost, dev_out->mac, OFP_ETHER_ADDR_LEN); + eth->ether_type = odp_cpu_to_be_16(OFP_ETHERTYPE_IPV6); + } else { + struct ofp_ether_vlan_header *eth_vlan = + (struct ofp_ether_vlan_header *)l2_addr; + + memcpy(eth_vlan->evl_dhost, mac, OFP_ETHER_ADDR_LEN); + memcpy(eth_vlan->evl_shost, dev_out->mac, OFP_ETHER_ADDR_LEN); + eth_vlan->evl_encap_proto = + odp_cpu_to_be_16(OFP_ETHERTYPE_VLAN); + eth_vlan->evl_tag = odp_cpu_to_be_16(vlan); + eth_vlan->evl_proto = odp_cpu_to_be_16(OFP_ETHERTYPE_IPV6); + } + + if (is_local_address) + return send_pkt_loop(dev_out, pkt); + else + return send_pkt_out(dev_out, pkt); +} +#endif /* INET6 */ + +enum ofp_return_code ofp_packet_input(odp_packet_t pkt, + odp_queue_t in_queue, ofp_pkt_processing_func pkt_func) +{ + struct ofp_ifnet *ifnet; + odp_pktio_t pktio; + int res; + + pktio = odp_packet_input(pkt); + if (pktio != ODP_PKTIO_INVALID) /* pkt received from interface */ + ifnet = (struct ofp_ifnet *)odp_queue_get_context( + odp_pktio_outq_getdef(pktio)); + else { /* loopback and cunit*/ + ifnet = (struct ofp_ifnet *)odp_queue_get_context(in_queue); + if (!ifnet) { + odp_packet_free(pkt); + return OFP_PKT_DROP; + } + } + + odp_packet_user_ptr_set(pkt, ifnet); + + OFP_DEBUG_PACKET(OFP_DEBUG_PKT_RECV_NIC, pkt, ifnet->port); + + OFP_UPDATE_PACKET_STAT(rx_fp, 1); + + OFP_UPDATE_PACKET_LATENCY_STAT(1); + + /* data link layer processing */ + res = pkt_func(pkt); + + if (res == OFP_PKT_DROP) + odp_packet_free(pkt); + + if (res != OFP_PKT_CONTINUE) + return res; + + /* Enqueue the packet for slowpath */ + return ofp_sp_input(pkt, ifnet); +} + +enum ofp_return_code ofp_sp_input(odp_packet_t pkt, + struct ofp_ifnet *ifnet) +{ +#ifdef SP + odp_queue_enq(ifnet->spq_def, odp_packet_to_event(pkt)); + return OFP_PKT_PROCESSED; +#else + (void)pkt; + (void)ifnet; + + return OFP_PKT_DROP; +#endif +} diff --git a/src/ofp_portconf.c b/src/ofp_portconf.c new file mode 100644 index 00000000..6168acd0 --- /dev/null +++ b/src/ofp_portconf.c @@ -0,0 +1,1222 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#include +#include +#include + +#include "ofpi.h" +#include "ofpi_portconf.h" +#include "ofpi_route.h" +#include "ofpi_util.h" +#include "ofpi_avl.h" + +#include "ofpi_queue.h" +#include "ofpi_ioctl.h" + +#include "ofpi_log.h" + +#ifdef SP +#define NUM_LINUX_INTERFACES 512 +#endif /*SP*/ + +#define PORT_UNDEF 0xFFFF + +/* + * Shared data + */ +struct ofp_portconf_mem { + struct ofp_ifnet ofp_ifnet_data[NUM_PORTS]; + int ofp_num_ports; + + struct in_ifaddrhead in_ifaddrhead; +#ifdef INET6 + struct in_ifaddrhead in_ifaddr6head; +#endif /* INET6 */ + +#ifdef SP + struct { + uint16_t port; + uint16_t vlan; + } linux_interface_table[NUM_LINUX_INTERFACES]; +#endif /* SP */ +}; + + +/* + * Data per core + */ +static __thread struct ofp_portconf_mem *shm; +struct ofp_ifnet_locks_str *ofp_ifnet_locks_shm; + +/*Wrapper functions over AVL tree*/ +static void *new_vlan( + int (*compare_fun)(void *compare_arg, void *a, void *b), + void *compare_arg) +{ + return avl_tree_new(compare_fun, compare_arg); +} + +static int vlan_iterate_inorder(void *root, + int (*iterate_fun)(void *key, void *iter_arg), + void *iter_arg) +{ + return avl_iterate_inorder(root, iterate_fun, iter_arg); +} + +int vlan_ifnet_insert(void *root, void *elem) +{ + return avl_insert((avl_tree *)root, elem); +} + +int vlan_ifnet_delete(void *root, void *elem, + int (*free_key_fun)(void *arg)) +{ + return avl_delete(root, elem, free_key_fun); +} + +int ofp_vlan_get_by_key( + void *root, + void *key, + void **value_address + ) +{ + return avl_get_by_key(root, key, value_address); +} + +int ofp_get_num_ports(void) +{ + return shm->ofp_num_ports; +} + +static int vlan_ifnet_compare(void *compare_arg, void *a, void *b) +{ + struct ofp_ifnet *a1 = a; + struct ofp_ifnet *b1 = b; + + compare_arg = compare_arg; + + return (a1->vlan - b1->vlan); +} + +void ofp_init_ifnet_data(void) +{ + int i; + + memset(&shm->ofp_ifnet_data, 0, sizeof(shm->ofp_ifnet_data)); + for (i = 0; i < NUM_PORTS; i++) { + shm->ofp_ifnet_data[i].vlan_structs = + new_vlan(vlan_ifnet_compare, NULL); + shm->ofp_ifnet_data[i].port = i; + shm->ofp_ifnet_data[i].if_type = OFP_IFT_ETHER; + /*TODO get if_mtu from Linux/SDK*/ + shm->ofp_ifnet_data[i].if_mtu = 1500; + } + + shm->ofp_num_ports = NUM_PORTS; +#ifdef SP + for (i = 0; i < NUM_LINUX_INTERFACES; ++i) + shm->linux_interface_table[i].port = PORT_UNDEF; +#endif /* SP */ + + OFP_TAILQ_INIT(&shm->in_ifaddrhead); + odp_rwlock_init(&ofp_ifnet_locks_shm->lock_ifaddr_list_rw); +#ifdef INET6 + OFP_TAILQ_INIT(&shm->in_ifaddr6head); + odp_rwlock_init(&ofp_ifnet_locks_shm->lock_ifaddr6_list_rw); +#endif /* INET6 */ +} + +static int vlan_match_ip(void *key, void *iter_arg) +{ + struct ofp_ifnet *iface = key; + uint32_t ip = *((uint32_t *)iter_arg); + + if (iface->ip_addr == ip) + return iface->vlan; + else + return 0; +} + + + +static int iter_vlan(void *key, void *iter_arg) +{ + struct ofp_ifnet *iface = key; + char buf[16]; + int fd = *((int *)iter_arg); + + uint32_t mask = ~0; + + mask = odp_cpu_to_be_32(mask << (32 - iface->masklen)); + + if (iface->port == GRE_PORTS && iface->vlan) { +#ifdef SP + ofp_sendf(fd, "gre%d (%d) slowpath: %s\r\n", iface->vlan, + iface->linux_index, + iface->sp_status ? "on" : "off"); +#else + ofp_sendf(fd, "gre%d\r\n", iface->vlan); +#endif + + if (iface->vrf) + ofp_sendf(fd, " VRF: %d\r\n", iface->vrf); + + ofp_sendf(fd, + " Link encap:Ethernet HWaddr: %s\r\n" + " inet addr:%s P-t-P:%s Mask:%s\r\n" +#ifdef INET6 + " inet6 addr: %s\r\n" +#endif /* INET6 */ + " MTU: %d\r\n", + ofp_print_mac(iface->mac), + ofp_print_ip_addr(iface->ip_addr), + ofp_print_ip_addr(iface->ip_p2p), + ofp_print_ip_addr(mask), +#ifdef INET6 + ofp_print_ip6_addr(iface->link_local), +#endif /* INET6 */ + iface->if_mtu); + + ofp_sendf(fd, + " Local: %s Remote: %s\r\n\r\n", + ofp_print_ip_addr(iface->ip_local), + ofp_print_ip_addr(iface->ip_remote)); + return 0; + } else if (iface->port == GRE_PORTS && !iface->vlan) { + ofp_sendf(fd, "gre%d\r\n" + " Link not configured\r\n\r\n", + iface->vlan); + return 0; + } + + snprintf(buf, sizeof(buf), ".%d", iface->vlan); + + if (ofp_has_mac(iface->mac)) { +#ifdef SP + ofp_sendf(fd, + "%s%d%s (%d) (%s) slowpath: %s\r\n", + OFP_IFNAME_PREFIX, + iface->port, + iface->vlan ? buf : "", + iface->linux_index, + iface->if_name, + iface->sp_status ? "on" : "off"); +#else + ofp_sendf(fd, + "%s%d%s (%s)\r\n", + OFP_IFNAME_PREFIX, + iface->port, + iface->vlan ? buf : "", + iface->if_name); +#endif + + if (iface->vrf) + ofp_sendf(fd, " VRF: %d\r\n", iface->vrf); + + ofp_sendf(fd, + " Link encap:Ethernet HWaddr: %s\r\n", + ofp_print_mac(iface->mac)); + + if (iface->ip_addr) + ofp_sendf(fd, + " inet addr:%s Bcast:%s Mask:%s\r\n", + ofp_print_ip_addr(iface->ip_addr), + ofp_print_ip_addr(iface->bcast_addr), + ofp_print_ip_addr(mask)); + +#ifdef INET6 + ofp_sendf(fd, + " inet6 addr: %s Scope:Link\r\n", + ofp_print_ip6_addr(iface->link_local)); + + if (ofp_ip6_is_set(iface->ip6_addr)) + ofp_sendf(fd, + " inet6 addr: %s/%d\r\n", + ofp_print_ip6_addr(iface->ip6_addr), + iface->ip6_prefix); +#endif /* INET6 */ + + ofp_sendf(fd, + " MTU: %d\r\n\r\n", + iface->if_mtu); + } else { + ofp_sendf(fd, "%s%d%s\r\n" + " Link not configured\r\n\r\n", + OFP_IFNAME_PREFIX, + iface->port, iface->vlan ? buf : ""); + } + + return 0; +} + +void ofp_show_interfaces(int fd) +{ + int i; + + /* fp interfaces */ + for (i = 0; i < shm->ofp_num_ports - 1; i++) { + iter_vlan(&shm->ofp_ifnet_data[i], &fd); + vlan_iterate_inorder(shm->ofp_ifnet_data[i].vlan_structs, + iter_vlan, &fd); + } + + /* gre interfaces */ + if (avl_get_first(shm->ofp_ifnet_data[GRE_PORTS].vlan_structs)) + vlan_iterate_inorder( + shm->ofp_ifnet_data[GRE_PORTS].vlan_structs, + iter_vlan, &fd); + else + ofp_sendf(fd, "gre\r\n" + " Link not configured\r\n\r\n"); +} + +int free_key(void *key) +{ + free(key); + return 1; +} + +static inline int exec_sys_call_depending_on_vrf(char *cmd, uint16_t vrf) +{ + if (vrf == 0) { + OFP_DBG("%s\n", cmd); + return system(cmd); + } + + return 0; +} + +const char *ofp_config_interface_up_v4(int port, uint16_t vlan, uint16_t vrf, + uint32_t addr, int masklen) +{ +#ifdef SP + char cmd[200]; + int ret = 0; +#endif /* SP */ + struct ofp_ifnet *data; + uint32_t mask; + +#ifdef SP + (void)ret; +#endif + + if (port < 0 || port >= shm->ofp_num_ports - 1) + return "Wrong port number"; + + mask = ~0; + mask = odp_cpu_to_be_32(mask << (32 - masklen)); + + data = ofp_get_ifnet(port, vlan); + + if (data && data->vrf != vrf) { + ofp_config_interface_down(data->port, data->vlan); + data = ofp_get_ifnet(port, vlan); + } + + if (vlan) { + if (data == NULL) { + data = ofp_get_create_ifnet(port, vlan); +#ifdef SP + snprintf(cmd, sizeof(cmd), "vconfig add %s %d", + ofp_port_vlan_to_ifnet_name(port, 0), vlan); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /* SP */ + } else { + SET_ROUTE(OFP_ROUTE_DEL, data->vrf, data->ip_addr, + data->masklen, 0, port, vlan); + } + data->vrf = vrf; + data->ip_addr = addr; + data->masklen = masklen; + data->bcast_addr = addr | ~mask; + SET_ROUTE(OFP_ROUTE_ADD, + data->vrf, + data->ip_addr, + data->masklen, + 0, + port, + vlan); +#ifdef SP + if (vrf == 0) + data->sp_status = OFP_SP_UP; + else + data->sp_status = OFP_SP_DOWN; + + snprintf(cmd, sizeof(cmd), "ifconfig %s %s/%d up", + ofp_port_vlan_to_ifnet_name(port, vlan), + ofp_print_ip_addr(addr), masklen); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /* SP */ + } else { + if (data->ip_addr) { + SET_ROUTE(OFP_ROUTE_DEL, + data->vrf, + data->ip_addr, + data->masklen, + 0, + port, + 0); + } + + data->vrf = vrf; + data->ip_addr = addr; + data->masklen = masklen; + data->bcast_addr = addr | ~mask; + + /* Add interface to the if_addr v4 queue */ + ofp_ifaddr_elem_add(data); +#ifdef INET6 + ofp_mac_to_link_local(data->mac, data->link_local); +#endif /* INET6 */ + + SET_ROUTE(OFP_ROUTE_ADD, + data->vrf, + data->ip_addr, + data->masklen, + 0, + port, + 0); +#ifdef SP + if (vrf == 0) + data->sp_status = OFP_SP_UP; + else + data->sp_status = OFP_SP_DOWN; + + snprintf(cmd, sizeof(cmd), "ifconfig %s %s/%d up", + ofp_port_vlan_to_ifnet_name(port, 0), + ofp_print_ip_addr(addr), masklen); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /* SP */ + } + + return NULL; +} + +const char *ofp_config_interface_up_tun(int port, uint16_t greid, + uint16_t vrf, uint32_t tun_loc, + uint32_t tun_rem, uint32_t p2p, + uint32_t addr, int mlen) +{ +#ifdef SP + char cmd[200]; + int ret = 0, new = 0; +#endif /* SP */ + struct ofp_ifnet *data, *dev_root; + +#ifdef SP + (void)ret; + (void)new; +#endif + + if (port != GRE_PORTS || greid == 0) + return "Wrong port number or tunnel ID."; + + dev_root = ofp_get_ifnet_by_ip(tun_loc, vrf); + if (dev_root == NULL) + return "Tunnel local ip not configured."; + + data = ofp_get_ifnet(port, greid); + + if (data && data->vrf != vrf) { + ofp_config_interface_down(data->port, data->vlan); + data = NULL; + } + + if (data == NULL) { +#ifdef SP + new = 1; +#endif /* SP */ + data = ofp_get_create_ifnet(port, greid); + data->if_type = OFP_IFT_GRE; + } else { + SET_ROUTE(OFP_ROUTE_DEL, data->vrf, data->ip_p2p, + data->masklen, 0, port, greid); +#ifdef SP + snprintf(cmd, sizeof(cmd), + "ip addr del dev %s %s peer %s", + ofp_port_vlan_to_ifnet_name(port, greid), + ofp_print_ip_addr(data->ip_addr), + ofp_print_ip_addr(data->ip_p2p)); + ret = exec_sys_call_depending_on_vrf(cmd, data->vrf); +#endif /* SP */ + } + + data->vrf = vrf; + data->ip_local = tun_loc; + data->ip_remote = tun_rem; + data->ip_p2p = p2p; + data->ip_addr = addr; + data->masklen = mlen; + data->if_mtu = dev_root->if_mtu - sizeof(struct ofp_greip); + + SET_ROUTE(OFP_ROUTE_ADD, + data->vrf, + data->ip_p2p, + data->masklen, + 0, + port, + greid); + +#ifdef SP + if (vrf == 0) + data->sp_status = OFP_SP_UP; + else + data->sp_status = OFP_SP_DOWN; + + snprintf(cmd, sizeof(cmd), + "ip tunnel %s %s mode gre local %s remote %s ttl 255", + (new ? "add" : "change"), + ofp_port_vlan_to_ifnet_name(port, greid), + ofp_print_ip_addr(tun_loc), ofp_print_ip_addr(tun_rem)); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); + + snprintf(cmd, sizeof(cmd), + "ip link set dev %s up", + ofp_port_vlan_to_ifnet_name(port, greid)); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); + + snprintf(cmd, sizeof(cmd), + "ip addr add dev %s %s peer %s", + ofp_port_vlan_to_ifnet_name(port, greid), + ofp_print_ip_addr(addr), ofp_print_ip_addr(p2p)); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /* SP */ + return NULL; +} + +#ifdef INET6 +const char *ofp_config_interface_up_v6(int port, uint16_t vlan, + uint8_t *addr, int masklen) +{ +#ifdef SP + char cmd[200]; + int ret = 0; +#endif /* SP */ + uint8_t gw6[16]; + struct ofp_ifnet *data; + +#ifdef SP + (void)ret; +#endif + memset(gw6, 0, 16); + + if (port < 0 || port >= shm->ofp_num_ports - 1) + return "Wrong port number"; + + data = ofp_get_ifnet(port, vlan); + + if (vlan) { + if (data == NULL) { + data = ofp_get_create_ifnet(port, vlan); + data->vrf = 0; +#ifdef SP + snprintf(cmd, sizeof(cmd), "vconfig add %s %d", + ofp_port_vlan_to_ifnet_name(port, 0), vlan); + ret = exec_sys_call_depending_on_vrf(cmd, data->vrf); +#endif /* SP */ + } else { + if (ofp_ip6_is_set(data->ip6_addr)) { + SET_ROUTE6(OFP_ROUTE6_DEL, + data->ip6_addr, + data->ip6_prefix, + (uint8_t *)gw6, + port, + vlan); + } + } + + memcpy(data->ip6_addr, addr, 16); + data->ip6_prefix = masklen; + SET_ROUTE6(OFP_ROUTE6_ADD, + data->ip6_addr, + data->ip6_prefix, + (uint8_t *)gw6, + port, + vlan); +#ifdef SP + if (data->vrf == 0) + data->sp_status = OFP_SP_UP; + else + data->sp_status = OFP_SP_DOWN; + + snprintf(cmd, sizeof(cmd), + "ifconfig %s inet6 add %s/%d up", + ofp_port_vlan_to_ifnet_name(port, vlan), + ofp_print_ip6_addr(addr), masklen); + ret = exec_sys_call_depending_on_vrf(cmd, data->vrf); +#endif /*SP*/ + } else { + if (ofp_ip6_is_set(data->ip6_addr)) { + SET_ROUTE6(OFP_ROUTE6_DEL, + (uint8_t *)data->ip6_addr, + data->ip6_prefix, + (uint8_t *)gw6, + port, + 0); + } + memcpy(data->ip6_addr, addr, 16); + data->ip6_prefix = masklen; + + ofp_mac_to_link_local(data->mac, data->link_local); + + /* Add interface to the if_addr v6 queue */ + ofp_ifaddr6_elem_add(data); + + SET_ROUTE6(OFP_ROUTE6_ADD, + (uint8_t *)data->ip6_addr, + data->ip6_prefix, + gw6, + port, + 0); +#ifdef SP + if (data->vrf == 0) + data->sp_status = OFP_SP_UP; + else + data->sp_status = OFP_SP_DOWN; + + snprintf(cmd, sizeof(cmd), + "ifconfig %s inet6 add %s/%d up", + ofp_port_vlan_to_ifnet_name(port, 0), + ofp_print_ip6_addr(addr), masklen); + + ret = exec_sys_call_depending_on_vrf(cmd, data->vrf); +#endif /* SP */ + } + + return NULL; +} +#endif /* INET6 */ + +const char *ofp_config_interface_down(int port, uint16_t vlan) +{ +#ifdef SP + char cmd[200]; + int ret = 0; + uint16_t vrf; +#endif /* SP */ + uint8_t gw6[16]; + struct ofp_ifnet *data; + +#ifdef SP + (void)ret; +#endif + memset(gw6, 0, 16); + + if (port < 0 || port >= shm->ofp_num_ports) + return "Wrong port number"; + + if (vlan) { + struct ofp_ifnet key; + + key.vlan = vlan; + if (ofp_vlan_get_by_key( + shm->ofp_ifnet_data[port].vlan_structs, + &key, + (void *)&data)) + return "Unknown interface"; + +#ifdef SP + vrf = data->vrf; +#endif + if (data->ip_addr) { + SET_ROUTE(OFP_ROUTE_DEL, data->vrf, + (data->port == GRE_PORTS ? + data->ip_p2p : data->ip_addr), + data->masklen, + 0, port, vlan); +#ifdef SP + snprintf(cmd, sizeof(cmd), + "ifconfig %s 0.0.0.0", + ofp_port_vlan_to_ifnet_name(port, vlan)); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /*SP*/ + } +#ifdef INET6 + if (ofp_ip6_is_set(data->ip6_addr)) { + SET_ROUTE6(OFP_ROUTE6_DEL, + data->ip6_addr, + data->ip6_prefix, + (uint8_t *)gw6, + port, + vlan); +#ifdef SP + snprintf(cmd, sizeof(cmd), + "ifconfig %s inet6 del %s/%d", + ofp_port_vlan_to_ifnet_name(port, vlan), + ofp_print_ip6_addr(data->ip6_addr), + data->ip6_prefix); + + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /* SP */ + } +#endif /* INET6 */ + vlan_ifnet_delete( + shm->ofp_ifnet_data[port].vlan_structs, + &key, + free_key); +#ifdef SP + if (data->port == GRE_PORTS) + snprintf(cmd, sizeof(cmd), "ip tunnel del %s", + ofp_port_vlan_to_ifnet_name(port, vlan)); + else + snprintf(cmd, sizeof(cmd), "vconfig rem %s", + ofp_port_vlan_to_ifnet_name(port, vlan)); + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /*SP*/ + } else { + data = ofp_get_ifnet(port, vlan); + +#ifdef SP + vrf = data->vrf; +#endif + if (data->ip_addr) { + SET_ROUTE(OFP_ROUTE_DEL, + data->vrf, + data->ip_addr, + data->masklen, + 0, + port, + 0); +#ifdef SP + snprintf(cmd, sizeof(cmd), + "ifconfig %s 0.0.0.0", + ofp_port_vlan_to_ifnet_name(port, 0)); + + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif /* SP */ + data->ip_addr = 0; + /* Remove interface from the if_addr v4 queue */ + ofp_ifaddr_elem_del(data); + } +#ifdef INET6 + if (ofp_ip6_is_set(data->ip6_addr)) { + SET_ROUTE6(OFP_ROUTE6_DEL, + data->ip6_addr, + data->ip6_prefix, + (uint8_t *)gw6, + port, + 0); +#ifdef SP + snprintf(cmd, sizeof(cmd), + "ifconfig %s inet6 del %s/%d", + ofp_port_vlan_to_ifnet_name(port, vlan), + ofp_print_ip6_addr(data->ip6_addr), + data->ip6_prefix); + + ret = exec_sys_call_depending_on_vrf(cmd, vrf); +#endif + memset(data->ip6_addr, 0, 16); + + /* Remove interface from the if_addr v4 queue */ + ofp_ifaddr6_elem_del(data); + } +#endif /* INET6 */ + } + + return NULL; +} + +struct ofp_ifnet *ofp_get_ifnet(int port, uint16_t vlan) +{ + if (vlan) { + struct ofp_ifnet key, *data; + + key.vlan = vlan; + if (ofp_vlan_get_by_key( + shm->ofp_ifnet_data[port].vlan_structs, + &key, + (void *)&data)) + return NULL; + + return data; + } + + if (port != PORT_UNDEF) + return &(shm->ofp_ifnet_data[port]); + else + return NULL; +} + +struct ofp_ifnet *ofp_get_create_ifnet(int port, uint16_t vlan) +{ + if (vlan) { + struct ofp_ifnet key, *data; + + key.vlan = vlan; + if (ofp_vlan_get_by_key( + shm->ofp_ifnet_data[port].vlan_structs, + &key, + (void *)&data)) { + data = malloc(sizeof(*data)); + memset(data, 0, sizeof(*data)); + data->port = port; + data->vlan = vlan; + memcpy(data->mac, shm->ofp_ifnet_data[port].mac, 6); + data->if_mtu = shm->ofp_ifnet_data[port].if_mtu; +#ifdef INET6 + memcpy(data->link_local, + shm->ofp_ifnet_data[port].link_local, 16); +#endif /* INET6 */ + vlan_ifnet_insert( + shm->ofp_ifnet_data[port].vlan_structs, data); + } + return data; + } + + return &(shm->ofp_ifnet_data[port]); +} + +int ofp_delete_ifnet(int port, uint16_t vlan) +{ + if (vlan) { + struct ofp_ifnet key, *data; + + key.vlan = vlan; + if (ofp_vlan_get_by_key( + shm->ofp_ifnet_data[port].vlan_structs, + &key, + (void *)&data)) + return 0; /* vlan not found (deleted already)*/ + + vlan_ifnet_delete( + shm->ofp_ifnet_data[port].vlan_structs, + &key, + free_key); + return 0; + } + return -1; +} + +#ifdef SP +struct iter_str { + int ix; + struct ofp_ifnet *dev; +}; + +static int iter_vlan_1(void *key, void *iter_arg) +{ + struct ofp_ifnet *iface = key; + struct iter_str *data = iter_arg; + + if (iface->linux_index == data->ix) { + data->dev = key; + return 1; + } + + return 0; +} + +struct ofp_ifnet *ofp_get_ifnet_by_linux_ifindex(int ix) +{ + int i; + struct iter_str data; + + if (odp_likely(ix < NUM_LINUX_INTERFACES)) + return ofp_get_ifnet( + shm->linux_interface_table[ix].port, + shm->linux_interface_table[ix].vlan); + + /* Iterate through other index values */ + data.ix = ix; + data.dev = NULL; + + for (i = 0; i < shm->ofp_num_ports && data.dev == NULL; i++) { + if (shm->ofp_ifnet_data[i].linux_index == ix) + return &(shm->ofp_ifnet_data[i]); + + vlan_iterate_inorder(shm->ofp_ifnet_data[i].vlan_structs, + iter_vlan_1, &data); + } + + return data.dev; +} + +void ofp_update_ifindex_lookup_tab(struct ofp_ifnet *ifnet) +{ + /* quick access table */ + if (ifnet->linux_index < NUM_LINUX_INTERFACES) { + shm->linux_interface_table[ifnet->linux_index].port = + ifnet->port; + shm->linux_interface_table[ifnet->linux_index].vlan = + ifnet->vlan; + } +} +#endif /* SP */ + +struct ofp_ifnet *ofp_get_ifnet_match(uint32_t ip, + uint16_t vrf, + uint16_t vlan) +{ + uint16_t port; + + if (vlan == 0) { + for (port = 0; port < shm->ofp_num_ports; port++) { + struct ofp_ifnet *ifnet = + &shm->ofp_ifnet_data[port]; + + if (ifnet->ip_addr == ip && ifnet->vrf == vrf && !vlan) + return ifnet; + } + } else { + for (port = 0; port < shm->ofp_num_ports; port++) { + uint16_t vlan_id = vlan_iterate_inorder( + shm->ofp_ifnet_data[port].vlan_structs, + vlan_match_ip, &ip); + + if (vlan_id) + return ofp_get_ifnet(port, vlan); + } + } + return NULL; +} + +static int iter_interface(void *key, void *iter_arg) +{ + struct ofp_ifnet *iface = key; + struct ofp_ifconf *ifc = iter_arg; + int len = ifc->ifc_current_len; + struct ofp_ifreq *ifr = (struct ofp_ifreq *)(((uint8_t *)ifc->ifc_buf) + len); + + if (len + (int)sizeof(struct ofp_ifreq) > ifc->ifc_len) + return 1; + + ifc->ifc_current_len += sizeof(struct ofp_ifreq); + + ((struct ofp_sockaddr_in *)&ifr->ifr_addr)->sin_addr.s_addr = + iface->ip_addr; + ifr->ifr_addr.sa_family = OFP_AF_INET; + + if (iface->port == GRE_PORTS) + snprintf(ifr->ifr_name, OFP_IFNAMSIZ, + "gre%d", iface->vlan); + else if (iface->vlan) + snprintf(ifr->ifr_name, OFP_IFNAMSIZ, + "fp%d.%d", iface->port, iface->vlan); + else + snprintf(ifr->ifr_name, OFP_IFNAMSIZ, + "fp%d", iface->port); + + return 0; +} + +void ofp_get_interfaces(struct ofp_ifconf *ifc) +{ + int i; + + ifc->ifc_current_len = 0; + + /* fp interfaces */ + for (i = 0; i < shm->ofp_num_ports - 1; i++) { + iter_interface(&shm->ofp_ifnet_data[i], ifc); + vlan_iterate_inorder(shm->ofp_ifnet_data[i].vlan_structs, + iter_interface, ifc); + } + + /* gre interfaces */ + if (avl_get_first(shm->ofp_ifnet_data[GRE_PORTS].vlan_structs)) + vlan_iterate_inorder( + shm->ofp_ifnet_data[GRE_PORTS].vlan_structs, + iter_interface, ifc); + + ifc->ifc_len = ifc->ifc_current_len; +} + +struct iter_ip { + uint32_t addr; + uint16_t vrf; +}; + +static int vlan_match_ip_vrf(void *key, void *iter_arg) +{ + struct ofp_ifnet *iface = key; + struct iter_ip *iterdata = (struct iter_ip *)iter_arg; + + if (iface->ip_addr == iterdata->addr && + iface->vrf == iterdata->vrf) + return iface->vlan; + else + return 0; +} + +struct ofp_ifnet *ofp_get_ifnet_by_ip(uint32_t ip, uint16_t vrf) +{ + uint16_t port; + struct ofp_ifnet *ifnet; + uint16_t vlan; + struct iter_ip iterdata; + + for (port = 0; port < shm->ofp_num_ports - 1; ++port) { + ifnet = &shm->ofp_ifnet_data[port]; + if (ifnet->ip_addr == ip && ifnet->vrf == vrf) + return ifnet; + } + + iterdata.addr = ip; + iterdata.vrf = vrf; + + for (port = 0; port < shm->ofp_num_ports - 1; ++port) { + vlan = vlan_iterate_inorder( + shm->ofp_ifnet_data[port].vlan_structs, + vlan_match_ip_vrf, &iterdata); + if (vlan) + return ofp_get_ifnet(port, vlan); + } + + return NULL; +} + +struct iter_tun { + uint32_t tun_loc; + uint32_t tun_rem; + uint16_t vrf; +}; + +static int vlan_match_tun(void *key, void *iter_arg) +{ + struct ofp_ifnet *iface = key; + struct iter_tun *tundata = iter_arg; + + if (iface->ip_local == tundata->tun_loc && + iface->ip_remote == tundata->tun_rem && + iface->vrf == tundata->vrf) + return iface->vlan; + else + return 0; +} + +struct ofp_ifnet *ofp_get_ifnet_by_tunnel(uint32_t tun_loc, + uint32_t tun_rem, uint16_t vrf) +{ + uint16_t port = GRE_PORTS; + uint16_t greid; + struct iter_tun tundata; + + tundata.tun_loc = tun_loc; + tundata.tun_rem = tun_rem; + tundata.vrf = vrf; + + greid = vlan_iterate_inorder( + shm->ofp_ifnet_data[port].vlan_structs, + vlan_match_tun, &tundata); + + if (greid) + return ofp_get_ifnet(port, greid); + + return NULL; +} + +struct ofp_ifnet *ofp_get_ifnet_pktio(odp_pktio_t pktio) +{ + return (struct ofp_ifnet *)odp_queue_get_context( + odp_pktio_outq_getdef(pktio)); +} +odp_queue_t ofp_pktio_spq_get(odp_pktio_t pktio) +{ +#ifdef SP + struct ofp_ifnet *ifnet = ofp_get_ifnet_pktio(pktio); + + return ifnet->spq_def; +#else + (void)pktio; + + return ODP_QUEUE_INVALID; +#endif +} + +odp_queue_t ofp_pktio_loopq_get(odp_pktio_t pktio) +{ + struct ofp_ifnet *ifnet = ofp_get_ifnet_pktio(pktio); + + return ifnet->loopq_def; +} + +odp_pktio_t ofp_port_pktio_get(int port) +{ + struct ofp_ifnet *ifnet = ofp_get_ifnet(port, 0); + + return ifnet->pktio; +} + +void ofp_portconf_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + /* Reserve memory for args from shared mem */ + shm_h = odp_shm_reserve("OfpPortconfShMem", + sizeof(*shm), ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + shm_h = odp_shm_reserve("OfpPortconfLocksShMem", + sizeof(*ofp_ifnet_locks_shm), + ODP_CACHE_LINE_SIZE, 0); + + ofp_ifnet_locks_shm = odp_shm_addr(shm_h); + + if (shm == NULL || ofp_ifnet_locks_shm == NULL) { + OFP_ABORT("Error: OfpPortconfShMem shared mem alloc" + " failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); +} + +void ofp_portconf_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpPortconfShMem"); + shm = odp_shm_addr(shm_h); + + shm_h = odp_shm_lookup("OfpPortconfLocksShMem"); + ofp_ifnet_locks_shm = odp_shm_addr(shm_h); + + if (shm == NULL || ofp_ifnet_locks_shm == NULL) { + OFP_ABORT("Error: OfpPortconfShMem shared mem lookup" + " failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} + +struct in_ifaddrhead *ofp_get_ifaddrhead(void) +{ + return &shm->in_ifaddrhead; +} + +void ofp_ifaddr_elem_add(struct ofp_ifnet *ifnet) +{ + struct ofp_ifnet *ia; + + OFP_IFNET_LOCK_WRITE(ifaddr_list); + + OFP_TAILQ_FOREACH(ia, ofp_get_ifaddrhead(), ia_link) { + if (ia == ifnet) + break; + } + + if (!ia) + OFP_TAILQ_INSERT_TAIL(ofp_get_ifaddrhead(), ifnet, ia_link); + + OFP_IFNET_UNLOCK_WRITE(ifaddr_list); +} + +void ofp_ifaddr_elem_del(struct ofp_ifnet *ifnet) +{ + struct ofp_ifnet *ia; + + OFP_IFNET_LOCK_WRITE(ifaddr_list); + + OFP_TAILQ_FOREACH(ia, ofp_get_ifaddrhead(), ia_link) { + if (ia == ifnet) + break; + } + + if (ia) + OFP_TAILQ_REMOVE(ofp_get_ifaddrhead(), ifnet, ia_link); + + OFP_IFNET_UNLOCK_WRITE(ifaddr_list); +} + +struct ofp_ifnet *ofp_ifaddr_elem_get(uint8_t *addr) +{ + struct ofp_ifnet *ifa; + + OFP_IFNET_LOCK_WRITE(ifaddr_list); + + OFP_TAILQ_FOREACH(ifa, ofp_get_ifaddrhead(), ia_link) { + if (ifa->ip_addr == *(uint32_t *)addr) + break; + } + + OFP_IFNET_UNLOCK_WRITE(ifaddr_list); + return ifa; +} + +uint32_t ofp_port_get_ipv4_addr(int port, uint16_t vlan, + enum ofp_portconf_ip_type type) +{ + struct ofp_ifnet *dev = ofp_get_ifnet(port, vlan); + uint32_t addr = 0; + + switch (type) { + case OFP_PORTCONF_IP_TYPE_IP_ADDR: + addr = dev->ip_addr; + break; + case OFP_PORTCONF_IP_TYPE_P2P: + addr = dev->ip_p2p; + break; + case OFP_PORTCONF_IP_TYPE_TUN_LOCAL: + addr = dev->ip_local; + break; + case OFP_PORTCONF_IP_TYPE_TUN_REM: + addr = dev->ip_remote; + break; + default: + addr = 0; + break; + } + + return addr; +} + +#ifdef INET6 +struct in_ifaddrhead *ofp_get_ifaddr6head(void) +{ + return &shm->in_ifaddr6head; +} + +void ofp_ifaddr6_elem_add(struct ofp_ifnet *ifnet) +{ + struct ofp_ifnet *ia6; + + OFP_IFNET_LOCK_WRITE(ifaddr6_list); + + OFP_TAILQ_FOREACH(ia6, ofp_get_ifaddr6head(), ia6_link) { + if (ia6 == ifnet) + break; + } + + if (!ia6) + OFP_TAILQ_INSERT_TAIL(ofp_get_ifaddr6head(), ifnet, ia6_link); + + OFP_IFNET_UNLOCK_WRITE(ifaddr6_list); +} + +void ofp_ifaddr6_elem_del(struct ofp_ifnet *ifnet) +{ + struct ofp_ifnet *ia6; + + OFP_IFNET_LOCK_WRITE(ifaddr6_list); + + OFP_TAILQ_FOREACH(ia6, ofp_get_ifaddr6head(), ia6_link) { + if (ia6 == ifnet) + break; + } + + if (ia6) + OFP_TAILQ_REMOVE(ofp_get_ifaddr6head(), ifnet, ia6_link); + + OFP_IFNET_UNLOCK_WRITE(ifaddr6_list); +} + +struct ofp_ifnet *ofp_ifaddr6_elem_get(uint8_t *addr6) +{ + struct ofp_ifnet *ifa6 = NULL; + + OFP_IFNET_LOCK_WRITE(ifaddr6_list); + + OFP_TAILQ_FOREACH(ifa6, ofp_get_ifaddr6head(), ia6_link) { + if (!memcmp(ifa6->ip6_addr, addr6, 16)) + break; + } + + OFP_IFNET_UNLOCK_WRITE(ifaddr6_list); + return ifa6; +} +#endif /* INET6 */ diff --git a/src/ofp_quagga.c b/src/ofp_quagga.c new file mode 100644 index 00000000..d76c4b53 --- /dev/null +++ b/src/ofp_quagga.c @@ -0,0 +1,630 @@ +/* + * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#ifdef HAVE_QUAGGA + +#include "fpm/fpm.h" + +typedef struct glob_t_ +{ + int server_sock; + int sock; +} glob_t; + +glob_t glob_space; +glob_t *glob = &glob_space; + +int log_level = 1; + +#define log(level, format...) \ + do { \ + if (level <= log_level) { \ + fprintf(stderr, format); \ + fprintf(stderr, "\n"); \ + } \ + } while (0); + +#define NUM_OF(x) (sizeof(x) / sizeof(x[0])) + +#define warn_msg(format...) log(0, format) +#define err_msg(format...) log(-1, format) +#define trace log + +/* + * get_print_buf + */ +static char * get_print_buf (size_t *buf_len) +{ + static char print_bufs[16][128]; + static int counter; + + counter++; + if (counter >= 16) { + counter = 0; + } + + *buf_len = 128; + return &print_bufs[counter][0]; +} + +/* + * create_listen_sock + */ +static int create_listen_sock (int port, int *sock_p) +{ + int sock; + struct sockaddr_in addr; + int reuse; + + sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sock < 0) { + err_msg(0, "Failed to create socket: %s", strerror(errno)); + return 0; + } + + reuse = 1; + if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)) < 0) + { + warn_msg("Failed to set reuse addr option: %s", strerror(errno)); + } + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(port); + + if (bind(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + err_msg("Failed to bind to port %d: %s", port, strerror(errno)); + close(sock); + return 0; + } + + if (listen(sock, 5)) { + err_msg("Failed to listen on socket: %s", strerror(errno)); + close(sock); + return 0; + } + + *sock_p = sock; + return 1; +} + +/* + * accept_conn + */ +static int accept_conn (int listen_sock) +{ + int sock; + struct sockaddr_in client_addr; + unsigned int client_len; + + while (1) { + trace(1, "Waiting for client connection..."); + client_len = sizeof(client_addr); + sock = accept(listen_sock, (struct sockaddr *) &client_addr, + &client_len); + + if (sock >= 0) { + trace(1, "Accepted client %s", inet_ntoa(client_addr.sin_addr)); + return sock; + } + + err_msg("Failed to accept socket: %s", strerror(errno)); + } + +} + +/* + * read_fpm_msg + */ +static fpm_msg_hdr_t * read_fpm_msg (char *buf, size_t buf_len) +{ + char *cur, *end; + int need_len, bytes_read, have_len; + fpm_msg_hdr_t *hdr; + int reading_full_msg; + + end = buf + buf_len; + cur = buf; + hdr = (fpm_msg_hdr_t *) buf; + + while (1) { + reading_full_msg = 0; + + have_len = cur - buf; + + if (have_len < FPM_MSG_HDR_LEN) { + need_len = FPM_MSG_HDR_LEN - have_len; + } else { + need_len = fpm_msg_len(hdr) - have_len; + assert(need_len >= 0 && need_len < (end - cur)); + + if (!need_len) + return hdr; + + reading_full_msg = 1; + } + + trace(3, "Looking to read %d bytes", need_len); + bytes_read = read(glob->sock, cur, need_len); + + if (bytes_read <= 0) { + err_msg("Error reading from socket: %s", strerror(errno)); + return NULL; + } + + trace(3, "Read %d bytes", bytes_read); + cur += bytes_read; + + if (bytes_read < need_len) { + continue; + } + + assert(bytes_read == need_len); + + if (reading_full_msg) + return hdr; + + if (!fpm_msg_ok(hdr, buf_len)) + { + assert(0); + err_msg("Malformed fpm message"); + return NULL; + } + } + +} + +/* + * netlink_msg_type_to_s + */ +static const char * netlink_msg_type_to_s (uint16_t type) +{ + switch (type) { + + case RTM_NEWROUTE: + return "New route"; + + case RTM_DELROUTE: + return "Del route"; + + default: + return "Unknown"; + } +} + +/* + * netlink_prot_to_s + */ +const char * +netlink_prot_to_s (unsigned char prot) +{ + switch (prot) { + + case RTPROT_KERNEL: + return "Kernel"; + + case RTPROT_BOOT: + return "Boot"; + + case RTPROT_STATIC: + return "Static"; + + case RTPROT_ZEBRA: + return "Zebra"; + + case RTPROT_DHCP: + return "Dhcp"; + + default: + return "Unknown"; + } +} + +#define MAX_NHS 16 + +typedef struct netlink_nh_t { + struct rtattr *gateway; + int if_index; +} netlink_nh_t; + +typedef struct netlink_msg_ctx_t_ { + struct nlmsghdr *hdr; + + /* + * Stuff pertaining to route messages. + */ + struct rtmsg *rtmsg; + struct rtattr *rtattrs[RTA_MAX + 1]; + + /* + * Nexthops. + */ + struct netlink_nh_t nhs[MAX_NHS]; + int num_nhs; + + struct rtattr *dest; + struct rtattr *src; + int *metric; + + const char *err_msg; +} netlink_msg_ctx_t; + +/* + * netlink_msg_ctx_init + */ +static inline void +netlink_msg_ctx_init (netlink_msg_ctx_t *ctx) +{ + memset(ctx, 0, sizeof(*ctx)); +} + +/* + * netlink_msg_ctx_set_err + */ +static inline void +netlink_msg_ctx_set_err (netlink_msg_ctx_t *ctx, const char *err_msg) +{ + if (ctx->err_msg) { + return; + } + ctx->err_msg = err_msg; +} + +/* + * netlink_msg_ctx_cleanup + */ +static inline void +netlink_msg_ctx_cleanup (netlink_msg_ctx_t *ctx) +{ + return; +} + +/* + * parse_rtattrs_ + */ +static int parse_rtattrs_ (struct rtattr *rta, size_t len, struct rtattr**rtas, + int num_rtas, const char **err_msg) +{ + memset(rtas, 0, num_rtas * sizeof(rtas[0])); + + for (; len > 0; rta = RTA_NEXT(rta, len)) { + if (!RTA_OK(rta, len)) { + *err_msg = "Malformed rta"; + return 0; + } + + if (rta->rta_type >= num_rtas) { + warn("Unknown rtattr type %d", rta->rta_type); + continue; + } + + rtas[rta->rta_type] = rta; + } +} + +/* + * parse_rtattrs + */ +static int parse_rtattrs (netlink_msg_ctx_t *ctx, struct rtattr *rta, size_t len) +{ + const char *err_msg; + + err_msg = NULL; + + if (!parse_rtattrs_(rta, len, ctx->rtattrs, NUM_OF(ctx->rtattrs), + &err_msg)) { + netlink_msg_ctx_set_err(ctx, err_msg); + return 0; + } + + return 1; +} + +/* + * netlink_msg_ctx_add_nh + */ +static int netlink_msg_ctx_add_nh (netlink_msg_ctx_t *ctx, int if_index, + struct rtattr *gateway) +{ + netlink_nh_t *nh; + + if (ctx->num_nhs + 1 >= NUM_OF(ctx->nhs)) { + warn("Too many next hops"); + return 0; + } + nh = &ctx->nhs[ctx->num_nhs]; + ctx->num_nhs++; + + nh->gateway = gateway; + nh->if_index = if_index; + return 1; +} + +/* + * parse_multipath_attr + */ +static int parse_multipath_attr (netlink_msg_ctx_t *ctx, struct rtattr *mpath_rtattr) +{ + size_t len, attr_len; + struct rtnexthop *rtnh; + struct rtattr *rtattrs[RTA_MAX + 1]; + struct rtattr *rtattr, *gateway; + int if_index; + const char *err_msg; + + rtnh = RTA_DATA(mpath_rtattr); + len = RTA_PAYLOAD(mpath_rtattr); + + for (; len > 0; + len -= NLMSG_ALIGN(rtnh->rtnh_len), rtnh = RTNH_NEXT(rtnh)) { + + if (!RTNH_OK(rtnh, len)) { + netlink_msg_ctx_set_err(ctx, "Malformed nh"); + return 0; + } + + if (rtnh->rtnh_len <= sizeof(*rtnh)) { + netlink_msg_ctx_set_err(ctx, "NH len too small"); + return 0; + } + + /* + * Parse attributes included in the nexthop. + */ + err_msg = NULL; + if (!parse_rtattrs_(RTNH_DATA(rtnh), rtnh->rtnh_len - sizeof(*rtnh), + rtattrs, NUM_OF(rtattrs), &err_msg)) { + netlink_msg_ctx_set_err(ctx, err_msg); + return 0; + } + + gateway = rtattrs[RTA_GATEWAY]; + netlink_msg_ctx_add_nh(ctx, rtnh->rtnh_ifindex, gateway); + } + + return 1; +} + +/* + * parse_route_msg + */ +static int parse_route_msg (netlink_msg_ctx_t *ctx) +{ + int len; + struct rtattr **rtattrs, *rtattr, *gateway, *oif; + int if_index; + + ctx->rtmsg = NLMSG_DATA(ctx->hdr); + + len = ctx->hdr->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg)); + if (len < 0) { + netlink_msg_ctx_set_err(ctx, "Bad message length"); + return 0; + } + + if (!parse_rtattrs(ctx, RTM_RTA(ctx->rtmsg), len)) { + return 0; + } + + rtattrs = ctx->rtattrs; + + ctx->dest = rtattrs[RTA_DST]; + ctx->src = rtattrs[RTA_PREFSRC]; + + rtattr = rtattrs[RTA_PRIORITY]; + if (rtattr) { + ctx->metric = (int *) RTA_DATA(rtattr); + } + + gateway = rtattrs[RTA_GATEWAY]; + oif = rtattrs[RTA_OIF]; + if (gateway || oif) { + if_index = 0; + if (oif) { + if_index = *((int *) RTA_DATA(oif)); + } + netlink_msg_ctx_add_nh(ctx, if_index, gateway); + } + + rtattr = rtattrs[RTA_MULTIPATH]; + if (rtattr) { + parse_multipath_attr(ctx, rtattr); + } + + return 1; +} + +/* + * addr_to_s + */ +static const char * addr_to_s (unsigned char family, void *addr) +{ + size_t buf_len; + char *buf; + + buf = get_print_buf(&buf_len); + + return inet_ntop(family, addr, buf, buf_len); +} + +/* + * netlink_msg_ctx_print + */ +static int netlink_msg_ctx_snprint (netlink_msg_ctx_t *ctx, char *buf, size_t buf_len) +{ + struct nlmsghdr *hdr; + struct rtmsg *rtmsg; + netlink_nh_t *nh; + char *cur, *end; + int i; + + hdr = ctx->hdr; + rtmsg = ctx->rtmsg; + + cur = buf; + end = buf + buf_len; + + cur += snprintf(cur, end - cur, "%s %s/%d, Prot: %s", + netlink_msg_type_to_s(hdr->nlmsg_type), + addr_to_s(rtmsg->rtm_family, RTA_DATA(ctx->dest)), + rtmsg->rtm_dst_len, + netlink_prot_to_s(rtmsg->rtm_protocol)); + + if (ctx->metric) { + cur += snprintf(cur, end - cur, ", Metric: %d", *ctx->metric); + } + + for (i = 0; i < ctx->num_nhs; i++) { + cur += snprintf(cur, end - cur, "\n "); + nh = &ctx->nhs[i]; + + if (nh->gateway) { + cur += snprintf(cur, end - cur, " %s", + addr_to_s(rtmsg->rtm_family, RTA_DATA(nh->gateway))); + } + + if (nh->if_index) { + cur += snprintf(cur, end - cur, " via interface %d", nh->if_index); + } + } + + return cur - buf; +} + +/* + * print_netlink_msg_ctx + */ +static void print_netlink_msg_ctx (netlink_msg_ctx_t *ctx) +{ + char buf[1024]; + + netlink_msg_ctx_snprint(ctx, buf, sizeof(buf)); + printf("%s\n", buf); +} + +/* + * parse_netlink_msg + */ +static void parse_netlink_msg (char *buf, size_t buf_len) +{ + netlink_msg_ctx_t ctx_space, *ctx; + struct nlmsghdr *hdr; + int status; + int len; + + ctx = &ctx_space; + + hdr = (struct nlmsghdr *) buf; + len = buf_len; + for (; NLMSG_OK (hdr, len); hdr = NLMSG_NEXT(hdr, len)) { + + netlink_msg_ctx_init(ctx); + ctx->hdr = (struct nlmsghdr *) buf; + + switch (hdr->nlmsg_type) { + + case RTM_DELROUTE: + case RTM_NEWROUTE: + + parse_route_msg(ctx); + if (ctx->err_msg) { + err_msg("Error parsing route message: %s", ctx->err_msg); + } + + print_netlink_msg_ctx(ctx); + break; + + default: + trace(1, "Ignoring unknown netlink message - Type: %d", hdr->nlmsg_type); + } + + netlink_msg_ctx_cleanup(ctx); + } +} + +/* + * process_fpm_msg + */ +static void process_fpm_msg (fpm_msg_hdr_t *hdr) +{ + trace(1, "FPM message - Type: %d, Length %d", hdr->msg_type, + ntohs(hdr->msg_len)); + + if (hdr->msg_type != FPM_MSG_TYPE_NETLINK) { + warn("Unknown fpm message type %u", hdr->msg_type); + return; + } + + parse_netlink_msg (fpm_msg_data (hdr), fpm_msg_data_len (hdr)); +} + +/* + * fpm_serve + */ +static void fpm_serve () +{ + char buf[FPM_MAX_MSG_LEN]; + fpm_msg_hdr_t *hdr; + + while (1) { + + hdr = read_fpm_msg(buf, sizeof(buf)); + if (!hdr) { + return; + } + + process_fpm_msg(hdr); + } +} + +void * start_quagga_nl_server(void *arg) +{ + int sock; + + memset(glob, 0, sizeof(*glob)); + + if (!create_listen_sock(FPM_DEFAULT_PORT, &glob->server_sock)) { + exit(1); + } + + /* + * Server forever. + */ + while (1) { + glob->sock = accept_conn(glob->server_sock); + fpm_serve(); + trace(1, "Done serving client"); + } + + /* Never reached */ + return NULL; +} +#endif diff --git a/src/ofp_reass.c b/src/ofp_reass.c new file mode 100644 index 00000000..228751f5 --- /dev/null +++ b/src/ofp_reass.c @@ -0,0 +1,516 @@ +/*- + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 ENEA Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ofpi.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_portconf.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_route.h" +#include "ofpi_util.h" +#include "ofpi_stat.h" +#include "ofpi_debug.h" +#include "ofpi_avl.h" +#include "ofpi_protosw.h" +#include "ofpi_ip6protosw.h" +#include "ofpi_arp.h" +#include "ofpi_hook.h" +#include "ofpi_log.h" +#include "ofpi_socketvar.h" +#include "ofpi_queue.h" +#include "ofpi_reass.h" + +#define IPREASS_NHASH_LOG2 6 +#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) +#define IPREASS_HMASK (IPREASS_NHASH - 1) +#define IPREASS_HASH(x,y) \ + (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) + +/* + * Chain is an IP fragment queue. Chains are linked together via the first + * packet. Packet headroom is used to save pointer information. + */ +#define NEXT_CHAIN(_f) ((_f)->next_chain) +#define NEXT_FRAG(_f) ((_f)->next_frag) +#define NEXT_TMO(_f) ((_f)->next_tmo) + +#define SET_NEXT_CHAIN(_f, _v) (_f)->next_chain = _v +#define SET_NEXT_FRAG(_f, _v) (_f)->next_frag = _v +#define SET_NEXT_TMO(_f, _v) (_f)->next_tmo = _v + +struct frag { + struct frag *next_chain; + struct frag *next_frag; + struct frag *next_tmo; + odp_packet_t pkt; + uint16_t off_hashix; + uint8_t nfrags; + uint8_t ipq_ttl; +}; + +struct ofp_reassembly_mem { + int maxnipq, nipq; + int maxfragsperpacket; + struct frag *ipq[IPREASS_NHASH]; + odp_spinlock_t ipqlock; + odp_timer_t timer; +}; + +static struct ofp_reassembly_mem *shm; + +static void ip_freef(struct frag **head, struct frag *chain); +static void slow_tmo(void *arg); + +static inline struct ofp_ip *FRAG_IP(struct frag *f) +{ + struct ofp_ip *ip; + /* Packet is pulled for frag struct */ + char *l3 = odp_packet_l3_ptr(f->pkt, NULL); + ip = (struct ofp_ip *)(l3 + sizeof(struct frag)); + return ip; +} + +void ofp_reassembly_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_reserve("OfpIpShMem", sizeof(*shm), + ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: Ip shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); + shm->maxnipq = 1024; + shm->maxfragsperpacket = 16; + shm->timer = ODP_TIMER_INVALID; + odp_spinlock_init(&shm->ipqlock); +} + +void ofp_reassembly_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpIpShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: Ip shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} + +/* IP fragment reassembly functionality*/ +odp_packet_t ofp_ip_reass(odp_packet_t pkt) +{ + struct ofp_ip *pkt_ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + struct ofp_ip *frag_ip, *chain_ip; + int hlen = pkt_ip->ip_hl << 2; + uint8_t ttl = pkt_ip->ip_ttl; + uint16_t hash; + odp_packet_t ret; + struct frag **head, *chain = NULL, *frag, *pkt_p, *last, + *c1 = NULL, *c2 = NULL; + + if (shm->timer == ODP_TIMER_INVALID) + shm->timer = ofp_timer_start(1000000, slow_tmo, NULL, 0); + + if (shm->nipq > shm->maxnipq) + goto dropfrag; + + /* To host byte order */ + pkt_ip->ip_len = odp_be_to_cpu_16(pkt_ip->ip_len); + pkt_ip->ip_off = odp_be_to_cpu_16(pkt_ip->ip_off); + hash = IPREASS_HASH(pkt_ip->ip_src.s_addr, pkt_ip->ip_id); + head = &shm->ipq[hash]; + odp_spinlock_lock(&shm->ipqlock); + + /* + * Make space for frag header. + */ + pkt_p = odp_packet_push_head(pkt, sizeof(struct frag)); + if (!pkt_p) + goto dropfrag; + + /* + * Save data to frag header. + */ + pkt_p->pkt = pkt; + pkt_p->off_hashix = (pkt_ip->ip_off & ~OFP_IP_OFFMASK) | hash; + SET_NEXT_CHAIN(pkt_p, NULL); + SET_NEXT_FRAG(pkt_p, NULL); + SET_NEXT_TMO(pkt_p, NULL); + pkt_p->nfrags = 1; + + /* + * Look for queue of fragments + * of this datagram. + */ + chain = *head; + while (chain) { + chain_ip = FRAG_IP(chain); + if (pkt_ip->ip_id == chain_ip->ip_id && + pkt_ip->ip_src.s_addr == chain_ip->ip_src.s_addr && + pkt_ip->ip_dst.s_addr == chain_ip->ip_dst.s_addr && + pkt_ip->ip_p == chain_ip->ip_p) + goto found; + c1 = chain; + chain = NEXT_CHAIN(chain); + } + + chain = NULL; + + /* + * Attempt to trim the number of allocated fragment queues if it + * exceeds the administrative limit. + */ + if ((shm->nipq > shm->maxnipq) && (shm->maxnipq > 0)) { + } + +found: + /* + * Adjust ip_len to not reflect header, + * convert offset of this to bytes. + */ + pkt_ip->ip_len -= hlen; + if (pkt_ip->ip_off & OFP_IP_MF) { + /* + * Make sure that fragments have a data length + * that's a non-zero multiple of 8 bytes. + */ + if (pkt_ip->ip_len == 0 || (pkt_ip->ip_len & 0x7) != 0) { + goto dropfrag; + } + } + pkt_ip->ip_off <<= 3; + + /* + * If first fragment to arrive, create a reassembly queue. + */ + if (chain == NULL) { + shm->nipq++; + pkt_p->ipq_ttl = ttl < 15 ? 15 : ttl; + SET_NEXT_CHAIN(pkt_p, *head); + *head = pkt_p; + goto done; + } else { + chain->nfrags++; + c2 = NEXT_CHAIN(chain); + if (ttl > chain->ipq_ttl) + chain->ipq_ttl = ttl; + } + + /* + * Find a segment which begins after this one does. + */ + struct frag *prev = NULL; + frag = last = chain; + while (frag) { + last = frag; + frag_ip = FRAG_IP(frag); + if (pkt_ip->ip_off <= frag_ip->ip_off) + break; + prev = frag; + frag = NEXT_FRAG(frag); + } + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us, otherwise + * stick new segment in the proper place. + * + * If some of the data is dropped from the preceding + * segment, then it's checksum is invalidated. + */ + if (frag) { + if (prev) { // not first in list + int over; + struct ofp_ip *prev_ip = FRAG_IP(prev); + over = prev_ip->ip_off + prev_ip->ip_len - pkt_ip->ip_off; + if (over > 0) { + if (over >= pkt_ip->ip_len) + goto dropfrag; + memmove((char *)pkt_ip + hlen, + (char *)pkt_ip + hlen + over, + pkt_ip->ip_len - over); + pkt_ip->ip_off += over; + pkt_ip->ip_len -= over; + } + SET_NEXT_FRAG(pkt_p, frag); + SET_NEXT_FRAG(prev, pkt_p); + } else { // new first in chain + pkt_p->nfrags = frag->nfrags; + SET_NEXT_FRAG(pkt_p, frag); + SET_NEXT_CHAIN(pkt_p, NEXT_CHAIN(chain)); + if (c1) { + SET_NEXT_CHAIN(c1, pkt_p); + } else { + *head = pkt_p; + } + chain = pkt_p; + } + } else { // append to chain + SET_NEXT_FRAG(last, pkt_p); + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + prev = pkt_p; + struct frag *fr = NEXT_FRAG(prev); + while (fr) { + struct ofp_ip *prev_ip = FRAG_IP(prev); + struct ofp_ip *fr_ip = FRAG_IP(fr); + int over = prev_ip->ip_off + prev_ip->ip_len - fr_ip->ip_off; + if (over > 0) { + if (over >= fr_ip->ip_len) { + odp_packet_t tmp = fr->pkt; + SET_NEXT_FRAG(prev, NEXT_FRAG(fr)); + fr = prev; + chain->nfrags--; + odp_packet_free(tmp); + } else { + int off = fr_ip->ip_hl << 2; + memmove((char *)fr_ip + off, + (char *)fr_ip + off + over, + fr_ip->ip_len - over); + fr_ip->ip_off += over; + fr_ip->ip_len -= over; + } + } + prev = fr; + fr = NEXT_FRAG(fr); + } + + /* + * Check for complete reassembly and perform frag per packet + * limiting. + * + * Frag limiting is performed here so that the nth frag has + * a chance to complete the packet before we drop the packet. + * As a result, n+1 frags are actually allowed per packet, but + * only n will ever be stored. (n = maxfragsperpacket.) + * + */ + uint16_t saved_off; + int next = 0; + frag = chain; + while (frag) { + saved_off = frag->off_hashix; + frag_ip = FRAG_IP(frag); + if (frag_ip->ip_off != next) { + if (chain->nfrags > shm->maxfragsperpacket) + ip_freef(head, chain); + goto done; + } + next += frag_ip->ip_len; + frag = NEXT_FRAG(frag); + } + + /* Make sure the last packet didn't have the IP_MF flag */ + if (saved_off & OFP_IP_MF) { + if (chain->nfrags > shm->maxfragsperpacket) + ip_freef(head, chain); + goto done; + } + + /* + * Reassembly is complete. Make sure the packet is a sane size. + */ + if (next + hlen > 65535) { + ip_freef(head, chain); + goto done; + } + + /* + * Concatenate fragments. + */ + if (c1) + SET_NEXT_CHAIN(c1, c2); + else + *head = c2; + + shm->nipq--; + frag = NEXT_FRAG(chain); + chain_ip = FRAG_IP(chain); + ret = chain->pkt; + odp_packet_pull_head(ret, sizeof(struct frag)); + int len = (chain_ip->ip_hl << 2) + chain_ip->ip_len; + int nextoff = odp_packet_l3_offset(ret) + len; + + while (frag) { + frag_ip = FRAG_IP(frag); + int fraghlen = frag_ip->ip_hl<<2; + int fraglen = frag_ip->ip_len; + ret = odp_packet_add_data(ret, nextoff, fraglen); + odp_packet_copydata_in(ret, nextoff, fraglen, + (char *)(frag_ip) + fraghlen); + nextoff += fraglen; + len += fraglen; + odp_packet_t tmp = frag->pkt; + frag = NEXT_FRAG(frag); + odp_packet_free(tmp); + } + + chain_ip = odp_packet_l3_ptr(ret, NULL); + chain_ip->ip_sum = 0; + chain_ip->ip_off = 0; + chain_ip->ip_len = odp_cpu_to_be_16(len); + chain_ip->ip_sum = ofp_in_cksum((uint16_t *)chain_ip, + chain_ip->ip_hl << 2); + odp_spinlock_unlock(&shm->ipqlock); + return ret; + +dropfrag: + if (chain) + chain->nfrags--; + odp_packet_free(pkt); +done: + odp_spinlock_unlock(&shm->ipqlock); + return ODP_PACKET_INVALID; +} + +/* + * Free a fragment reassembly header and all + * associated datagrams. + */ +static void +ip_freef(struct frag **head, struct frag *chain) +{ + struct frag *c1, *c2; + + c1 = *head; + c2 = NEXT_CHAIN(chain); + + if (chain == c1) { + *head = c2; + } else { + while (c1 && NEXT_CHAIN(c1) != chain) { + c1 = NEXT_CHAIN(c1); + } + if (c1) + SET_NEXT_CHAIN(c1, c2); + else { + OFP_ERR("Error: Chain not found!\n"); + exit(1); + } + } + + while (chain) { + odp_packet_t tmp = chain->pkt; + chain = NEXT_FRAG(chain); + odp_packet_free(tmp); + } +} + +static void slow_tmo(void *arg) +{ + int i; + struct frag *chain, *frag, *prev, *next; + (void)arg; + + odp_spinlock_lock(&shm->ipqlock); + + for (i = 0; i < IPREASS_NHASH; i++) { + prev = NULL; + chain = shm->ipq[i]; + while (chain) { + next = NEXT_CHAIN(chain); + if (! --chain->ipq_ttl) { + if (!prev) + shm->ipq[i] = next; + else + SET_NEXT_CHAIN(prev, next); + frag = chain; + + odp_packet_pull_head(frag->pkt, sizeof(struct frag)); + ofp_icmp_error(frag->pkt, OFP_ICMP_TIMXCEED, OFP_ICMP_TIMXCEED_REASS, 0 , 0); + odp_packet_push_head(frag->pkt, sizeof(struct frag)); + + while (frag) { + odp_packet_t tmp = frag->pkt; + frag = NEXT_FRAG(frag); + odp_packet_free(tmp); + } + } else + prev = chain; + chain = next; + } + } + + odp_spinlock_unlock(&shm->ipqlock); + shm->timer = ofp_timer_start(1000000, slow_tmo, NULL, 0); +} + +#if 0 +/* For debugging purposes */ +void ofp_print_reass_queue(void) +{ + int i; + struct frag *frag, *chain; + struct ofp_ip *frag_ip, *chain_ip; + + printf("\nREASS QUEUES:\n"); + for (i = 0; i < IPREASS_NHASH; i++) { + chain = shm->ipq[i]; + while (chain) { + chain_ip = FRAG_IP(chain); + printf("Chain i=%d chain=%p src=%x dst=%x p=%d id=%d:\n", + i, chain, + chain_ip->ip_src.s_addr, + chain_ip->ip_dst.s_addr, + chain_ip->ip_p, + chain_ip->ip_p); + frag = chain; + while (frag) { + frag_ip = FRAG_IP(frag); + printf(" [frag=%p off=%d len=%d]\n", + frag, + frag_ip->ip_off, + frag_ip->ip_len); + frag = NEXT_FRAG(frag); + } + printf("\n"); + chain = NEXT_CHAIN(chain); + } + } + printf("\n"); +} +#endif diff --git a/src/ofp_route.c b/src/ofp_route.c new file mode 100644 index 00000000..8aa9b6a6 --- /dev/null +++ b/src/ofp_route.c @@ -0,0 +1,569 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include + +#include "ofpi.h" +#include "odp/rwlock.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_route.h" + +#include "ofpi_util.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_arp.h" +#include "ofpi_avl.h" +#include "ofpi_portconf.h" +#include "ofpi_log.h" + +#define USE_RW_LOCK 1 + +/* number of saved packets waiting for Neighbor Advertisement */ +#define NUM_PKTS 2048 + +/* + * Structure definitions + */ +struct routes_by_vrf { + uint16_t vrf; + struct ofp_rtl_tree routes; +}; + +struct pkt6_entry { + odp_packet_t pkt; + + OFP_SLIST_ENTRY(pkt6_entry) next; +}; + +struct _pkt6 { + struct pkt6_entry entries[NUM_PKTS] ODP_ALIGNED_CACHE; + struct pkt6_list free_entries; + odp_rwlock_t fr_ent_rwlock; +}; + + +/* + * Shared data + */ +struct ofp_route_mem { + avl_tree *vrf_routes; + struct ofp_rtl_tree default_routes; + struct ofp_rtl6_tree default_routes_6; + struct _pkt6 pkt6; +}; + +/* + * Data per core + */ + +static __thread struct ofp_route_mem *shm; +struct ofp_locks_str *ofp_locks_shm; + +static int routes_avl_compare(void *compare_arg, void *a, void *b) +{ + (void) compare_arg; + struct routes_by_vrf *a1 = a; + struct routes_by_vrf *b1 = b; + + return (a1->vrf - b1->vrf); +} + +void ofp_route_init(void) +{ + int i; + + odp_rwlock_init(&ofp_locks_shm->lock_config_rw); + odp_rwlock_init(&ofp_locks_shm->lock_route_rw); + + /*avl_tree_new(routes_avl_compare, NULL);*/ + ofp_rtl_init(&shm->default_routes); + ofp_rtl6_init(&shm->default_routes_6); + shm->vrf_routes = avl_tree_new(routes_avl_compare, NULL); + + odp_rwlock_init(&shm->pkt6.fr_ent_rwlock); + memset(shm->pkt6.entries, 0, sizeof(shm->pkt6.entries)); + OFP_SLIST_INIT(&shm->pkt6.free_entries); + for (i = NUM_PKTS - 1; i >= 0; --i) + OFP_SLIST_INSERT_HEAD(&shm->pkt6.free_entries, + &shm->pkt6.entries[i], next); +} + +static inline void *pkt6_entry_alloc(void) +{ + struct pkt6_entry *pktentry; + + odp_rwlock_write_lock(&shm->pkt6.fr_ent_rwlock); + + pktentry = OFP_SLIST_FIRST(&shm->pkt6.free_entries); + + if (pktentry) + OFP_SLIST_REMOVE_HEAD(&shm->pkt6.free_entries, next); + + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->pkt6.fr_ent_rwlock); + + return pktentry; +} + +static inline void pkt6_entry_free(struct pkt6_entry *pktentry) +{ + memset(pktentry, 0, sizeof(*pktentry)); + + odp_rwlock_write_lock(&shm->pkt6.fr_ent_rwlock); + OFP_SLIST_INSERT_HEAD(&shm->pkt6.free_entries, pktentry, next); + odp_sync_stores(); + odp_rwlock_write_unlock(&shm->pkt6.fr_ent_rwlock); +} + + +/* ARP related functions */ +int ofp_add_mac(struct ofp_ifnet *dev, uint32_t addr, uint8_t *mac) +{ + int ret; + + OFP_DBG("ofp_add_mac() port %d vlan %d vrf %d add_mac ip %s - MAC %s\n", + dev->port, dev->vlan, dev->vrf, + ofp_print_ip_addr(addr), ofp_print_mac(mac)); + + ret = ofp_arp_ipv4_insert(addr, mac, dev); + + return ret; +} + +int ofp_get_mac(struct ofp_ifnet *dev, uint32_t addr, uint8_t *mac_out) +{ + int ifindex; + + ifindex = ofp_ipv4_lookup_mac(addr, mac_out, dev); + return ifindex; +} + +int ofp_del_mac(struct ofp_ifnet *dev, uint32_t addr, uint8_t *mac) +{ + int ret; + + printf("del_mac() port %d vlan %d vrf %d add_mac ip %s - MAC %s\n" , + dev->port, dev->vlan , dev->vrf , + ofp_print_ip_addr(addr) , ofp_print_mac(mac)); + ret = ofp_arp_ipv4_remove(addr, dev); + return ret; +} + +struct ofp_nh6_entry *ofp_get_next_hop6(uint16_t vrf, + uint8_t *addr, uint32_t *flags) +{ + struct ofp_nh6_entry *nh6; + + (void) vrf; + (void) flags; + + OFP_LOCK_READ(route); + nh6 = ofp_rtl_search6(&shm->default_routes_6, addr); + OFP_UNLOCK_READ(route); + + return nh6; +} + +void ofp_add_mac6(struct ofp_ifnet *dev, uint8_t *addr, uint8_t *mac) +{ + struct ofp_nh6_entry *nh; + struct pkt6_entry *pktentry; + struct pkt6_list pkt6_send; + + OFP_LOCK_READ(route); + nh = ofp_rtl_search6(&shm->default_routes_6, addr); + if (!nh) { + OFP_DBG("ofp_add_mac6: cannot add mac for %s\n", + ofp_print_ip6_addr(addr)); + OFP_UNLOCK_READ(route); + return; + } + + OFP_DBG("ofp_add_mac6 : mac added for %s (%s)\n", + ofp_print_ip6_addr(addr), + ofp_port_vlan_to_ifnet_name(dev->port, dev->vlan)); + + memcpy(nh->mac, mac, 6); + + /* We need to + - copy pkt list to release lock while sending packets + - reverse pkt list to send data in proper order*/ + OFP_SLIST_INIT(&pkt6_send); + while ((pktentry = OFP_SLIST_FIRST(&nh->pkt6_hold))) { + OFP_SLIST_REMOVE_HEAD(&nh->pkt6_hold, next); + OFP_SLIST_INSERT_HEAD(&pkt6_send, pktentry, next); + } + OFP_SLIST_INIT(&nh->pkt6_hold); + OFP_UNLOCK_READ(route); + + while ((pktentry = OFP_SLIST_FIRST(&pkt6_send))) { + OFP_SLIST_REMOVE_HEAD(&pkt6_send, next); + if (ofp_ip6_output(pktentry->pkt, nh) == OFP_PKT_DROP) + odp_packet_free(pktentry->pkt); + pkt6_entry_free(pktentry); + } +} + +static int add_route(struct ofp_route_msg *msg) +{ + struct ofp_nh_entry tmp; + + OFP_LOCK_WRITE(route); + + tmp.gw = msg->gw; + tmp.port = msg->port; + tmp.vlan = msg->vlan; + + OFP_DBG("Adding route vrf=%d addr=%s/%d\n", msg->vrf, + ofp_print_ip_addr(msg->dst), msg->masklen); + if (msg->vrf) { + struct routes_by_vrf key, *data; + + key.vrf = msg->vrf; + if (avl_get_by_key(shm->vrf_routes, &key, (void *)&data)) { + printf(" vrf doesn't exist yet\n"); + data = malloc(sizeof(*data)); + memset(data, 0, sizeof(*data)); + data->vrf = msg->vrf; + ofp_rtl_root_init(&(data->routes), msg->vrf); + avl_insert(shm->vrf_routes, data); + } + if (ofp_rtl_insert(&(data->routes), msg->dst, + msg->masklen, &tmp)) + OFP_DBG(" insert error: data exists\n"); + } else { + if (ofp_rtl_insert(&shm->default_routes, msg->dst, + msg->masklen, &tmp)) + OFP_DBG(" insert error: data exists\n"); + } +#ifdef MTRIE + ofp_rt_rule_add(msg->vrf, msg->dst, msg->masklen, &tmp); +#endif + + OFP_UNLOCK_WRITE(route); + + return 0; +} + +static int del_route(struct ofp_route_msg *msg) +{ + OFP_DBG("Deleting route vrf=%d addr=%s/%d\n", msg->vrf, + ofp_print_ip_addr(msg->dst), msg->masklen); + + OFP_LOCK_WRITE(route); + + if (msg->vrf) { + struct routes_by_vrf key, *data; + + key.vrf = msg->vrf; + if (avl_get_by_key(shm->vrf_routes, &key, (void *)&data)) { + OFP_DBG("del_route: vrf does not exist!\n"); + OFP_UNLOCK_WRITE(route); + return -1; + } + if (!ofp_rtl_remove(&(data->routes), msg->dst, msg->masklen)) + OFP_DBG(" delete error: data does not exist in vrf\n"); + } else { + if (!ofp_rtl_remove(&shm->default_routes, msg->dst, msg->masklen)) + OFP_DBG(" delete error: data does not exist\n"); + } +#ifdef MTRIE + ofp_rt_rule_remove(msg->vrf, msg->dst, msg->masklen); +#endif + + OFP_UNLOCK_WRITE(route); + + return 0; +} +#ifdef INET6 +static int add_route6(struct ofp_route_msg *msg) +{ + struct ofp_nh6_entry tmp; + + memset(&tmp, 0, sizeof(tmp)); + + OFP_LOCK_WRITE(route); + + memcpy(tmp.gw, msg->gw6, 16); + tmp.port = msg->port; + tmp.vlan = msg->vlan; + OFP_SLIST_INIT(&tmp.pkt6_hold); + + OFP_DBG("Adding ipv6 route vrf=%d addr=%s/%d gw=%s\n", msg->vrf, + ofp_print_ip6_addr(msg->dst6), msg->masklen, + ofp_print_ip6_addr(msg->gw6)); + + if (ofp_rtl_insert6(&shm->default_routes_6, msg->dst6, + msg->masklen, &tmp)) + OFP_DBG(" insert error: data exists\n"); + + OFP_UNLOCK_WRITE(route); + + return 0; +} + +static int del_route6(struct ofp_route_msg *msg) +{ + struct ofp_nh6_entry *nh6; + struct pkt6_entry *pktentry; + + OFP_DBG("Deleting route vrf=%d addr=%s/%d\n", msg->vrf, + ofp_print_ip6_addr(msg->dst6), msg->masklen); + + OFP_LOCK_WRITE(route); + + nh6 = ofp_rtl_remove6(&shm->default_routes_6, msg->dst6, msg->masklen); + + if (nh6) { + while ((pktentry = OFP_SLIST_FIRST(&nh6->pkt6_hold))) { + OFP_SLIST_REMOVE_HEAD(&nh6->pkt6_hold, next); + odp_packet_free(pktentry->pkt); + pkt6_entry_free(pktentry); + } + } else + OFP_DBG(" delete error: data does not exist\n"); + + OFP_UNLOCK_WRITE(route); + + return 0; +} + +int ofp_route_save_ipv6_pkt(odp_packet_t pkt, + uint8_t *addr, struct ofp_ifnet *dev) +{ + struct ofp_nh6_entry *nh6 = NULL; + struct pkt6_entry *pktentry; + + (void)dev; + + OFP_LOCK_READ(route); + nh6 = ofp_rtl_search6(&shm->default_routes_6, addr); + if (!nh6) { + OFP_UNLOCK_READ(route); + return OFP_PKT_DROP; + } + + pktentry = pkt6_entry_alloc(); + if (!pktentry) { + OFP_UNLOCK_READ(route); + return OFP_PKT_DROP; + } + pktentry->pkt = pkt; + + OFP_SLIST_INSERT_HEAD(&nh6->pkt6_hold, pktentry, next); + + OFP_UNLOCK_READ(route); + return OFP_PKT_PROCESSED; +} +#endif /* INET6 */ + +static void show_routes(int fd, uint32_t key, int level, struct ofp_nh_entry *data) +{ + char buf[24]; + snprintf(buf, sizeof(buf), "%s/%d", ofp_print_ip_addr(odp_cpu_to_be_32(key)), level); + ofp_sendf(fd, "%-18s %-15s %s\r\n", + buf, + ofp_print_ip_addr(data->gw), + ofp_port_vlan_to_ifnet_name(data->port, data->vlan)); +} + +#ifdef INET6 +static void show_routes6(int fd, uint8_t *key, int level, struct ofp_nh6_entry *data) +{ + char buf[128]; + snprintf(buf, sizeof(buf), "%s/%d", ofp_print_ip6_addr(key), level); + ofp_sendf(fd, "%-30s %-28s %s\r\n", + buf, + ofp_print_ip6_addr(data->gw), + ofp_port_vlan_to_ifnet_name(data->port, data->vlan)); +} +#endif /* INET6 */ + +static int iter_routes(void * key, void * iter_arg) +{ + struct routes_by_vrf *rbv = key; + int fd = *((int *)iter_arg); + ofp_sendf(fd, "VRF: %d\r\n", rbv->vrf); +#ifdef MTRIE + ofp_rt_rule_print(fd, rbv->vrf, show_routes); +#else + ofp_rtl_traverse(fd, &(rbv->routes), show_routes); +#endif + return 0; +} + +void ofp_show_routes(int fd, int what) +{ + switch (what) { + case OFP_SHOW_ARP: + ofp_sendf(fd, + "VRF ADDRESS MAC AGE\r\n"); + ofp_arp_show_table(fd); /* ofp_rtl_traverse(fd, &shm->default_routes, show_arp); */ + break; + case OFP_SHOW_ROUTES: + ofp_sendf(fd, "Destination Gateway Iface\r\n"); +#ifdef MTRIE + ofp_rt_rule_print(fd, 0, show_routes); +#else + ofp_rtl_traverse(fd, &shm->default_routes, show_routes); +#endif + avl_iterate_inorder(shm->vrf_routes, iter_routes, &fd); +#ifdef INET6 + ofp_sendf(fd, "\r\nIPv6 routes\r\n"); + ofp_rtl_traverse6(fd, &shm->default_routes_6, show_routes6); +#endif /* INET6 */ + break; + } +} + +struct ofp_nh_entry *ofp_get_next_hop(uint16_t vrf, uint32_t addr, uint32_t *flags) +{ + (void) flags; + struct ofp_nh_entry *node; + + if (vrf) { + struct routes_by_vrf key, *data; + + key.vrf = vrf; + if (avl_get_by_key(shm->vrf_routes, &key, (void *)&data)) { + printf("%s(): VRF %d not defined!\n", __FUNCTION__, vrf); + return NULL; + } + OFP_LOCK_READ(route); + node = ofp_rtl_search(&(data->routes), addr); + OFP_UNLOCK_READ(route); + } else { + OFP_LOCK_READ(route); + node = ofp_rtl_search(&shm->default_routes, addr); + OFP_UNLOCK_READ(route); + } + + return node; +} + +static int add_local_interface(struct ofp_route_msg *msg) +{ + msg->masklen = 32; + return add_route(msg); +} + +static int del_local_interface(struct ofp_route_msg *msg) +{ + OFP_LOCK_WRITE(route); + if (!ofp_rtl_remove(&shm->default_routes, msg->dst, 32)) + OFP_DBG(" delete error: data does not exist\n"); + OFP_UNLOCK_WRITE(route); + + return 0; +} + +int32_t ofp_set_route(struct ofp_route_msg *msg) +{ + /*printf("routing msg: type = %d, dst=%x/%d, gw=%x\n", + msg->type, msg->dst, msg->masklen, msg->gw);*/ + if (msg->type == OFP_ROUTE_ADD) + return add_route(msg); + + if (msg->type == OFP_ROUTE_DEL) + return del_route(msg); +#ifdef INET6 + if (msg->type == OFP_ROUTE6_ADD) + return add_route6(msg); + + if (msg->type == OFP_ROUTE6_DEL) + return del_route6(msg); +#endif /* INET6 */ +/* +TODO hash implementation for OFP_MOBILE_ROUTE_ADD,OFP_MOBILE_ROUTE_DEL +*/ + if (msg->type == OFP_LOCAL_INTERFACE_ADD) + return add_local_interface(msg); + + if (msg->type == OFP_LOCAL_INTERFACE_DEL) + return del_local_interface(msg); + + return -1; +} + +int32_t ofp_is_mobile(uint32_t addr) +{ + (void) addr; + /* TODO find other hash implementation + */ + return 0; +} + +struct find_vlan_data { + uint32_t addr; + uint16_t vlan; +}; + +static int iter_vrfs(void *key, void *iter_arg) +{ + struct routes_by_vrf *rbv = key; + struct find_vlan_data *data = iter_arg; + struct ofp_nh_entry *node = ofp_rtl_search(&(rbv->routes), data->addr); + + if (node) { + data->vlan = node->vlan; + return 1; + } + return 0; +} + +uint16_t ofp_get_probable_vlan(int port, uint32_t addr) +{ + (void) port; + struct ofp_nh_entry *node; + struct find_vlan_data data; + + node = ofp_rtl_search(&shm->default_routes, addr); + if (node) + return node->vlan; + + data.addr = addr; + data.vlan = 0; + + avl_iterate_inorder(shm->vrf_routes, iter_vrfs, &data); + return data.vlan; +} + +void ofp_route_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + /* Reserve memory for args from shared mem */ + shm_h = odp_shm_reserve("OfpRouteShMem", + sizeof(*shm), ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + shm_h = odp_shm_reserve("OfpLocksShMem", + sizeof(*ofp_locks_shm), ODP_CACHE_LINE_SIZE, 0); + ofp_locks_shm = odp_shm_addr(shm_h); + + if (shm == NULL || ofp_locks_shm == NULL) { + OFP_ABORT("Error: OfpRouteShMem shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); +} + +void ofp_route_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpRouteShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpRouteShMem shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} diff --git a/src/ofp_rt_lookup.c b/src/ofp_rt_lookup.c new file mode 100644 index 00000000..1188ad38 --- /dev/null +++ b/src/ofp_rt_lookup.c @@ -0,0 +1,629 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/** \file + * \anchor rt_lookup + * + * Radix tree contains all the routing data for the simple executive. + * Data has two types: + * - MAC addresses + * - Gateway addresses + * + * For a given address the first hit may be a gateway address, whose + * MAC address is looked up using a second lookup. + * + */ + +#include +#include +#include +#include "ofpi_util.h" +#include "ofpi.h" +#include "odp/rwlock.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_log.h" + +/* + * Shared data + */ +struct ofp_rt_lookup_mem { + struct ofp_rtl_node *global_stack[65]; +#define NUM_NODES 65536 + struct ofp_rtl_node node_list[NUM_NODES]; + struct ofp_rtl_node *free_nodes; + int nodes_allocated, max_nodes_allocated; + + struct ofp_rtl6_node *global_stack6[129]; +#define NUM_NODES_6 65536 + struct ofp_rtl6_node node_list6[NUM_NODES_6]; + struct ofp_rtl6_node *free_nodes6; + int nodes_allocated6, max_nodes_allocated6; +}; + +/* + * Data per core + */ +static __thread struct ofp_rt_lookup_mem *shm; + +static void NODEFREE(struct ofp_rtl_node *node) +{ + node->left = NULL; + node->right = shm->free_nodes; + if (shm->free_nodes) shm->free_nodes->left = node; + shm->free_nodes = node; + shm->nodes_allocated--; +} + +static struct ofp_rtl_node *NODEALLOC(void) +{ + struct ofp_rtl_node *p = shm->free_nodes; + if (shm->free_nodes) { + shm->free_nodes->left = NULL; + shm->free_nodes = shm->free_nodes->right; + shm->nodes_allocated++; + if (shm->nodes_allocated > shm->max_nodes_allocated) + shm->max_nodes_allocated = shm->nodes_allocated; + } + return p; +} + +static void NODEFREE6(struct ofp_rtl6_node *node) +{ + node->left = NULL; + node->right = shm->free_nodes6; + if (shm->free_nodes6) shm->free_nodes6->left = node; + shm->free_nodes6 = node; + shm->nodes_allocated6--; +} + +static struct ofp_rtl6_node *NODEALLOC6(void) +{ + struct ofp_rtl6_node *p = shm->free_nodes6; + if (shm->free_nodes6) { + shm->free_nodes6->left = NULL; + shm->free_nodes6 = shm->free_nodes6->right; + shm->nodes_allocated6++; + if (shm->nodes_allocated6 > shm->max_nodes_allocated6) + shm->max_nodes_allocated6 = shm->nodes_allocated6; + } + return p; +} + +#define OFP_OOPS(_s) printf(_s) + +int ofp_rtl_init(struct ofp_rtl_tree *tree) +{ + int i; + + for (i = 0; i < NUM_NODES; i++) { + shm->node_list[i].left = (i == 0) ? NULL : &(shm->node_list[i-1]); + shm->node_list[i].right = (i == NUM_NODES - 1) ? NULL : &(shm->node_list[i+1]); + } + shm->free_nodes = shm->node_list; + + return ofp_rtl_root_init(tree, 0); +} + +int ofp_rtl6_init(struct ofp_rtl6_tree *tree) +{ + int i; + + for (i = 0; i < NUM_NODES_6; i++) { + shm->node_list6[i].left = (i == 0) ? NULL : &(shm->node_list6[i-1]); + shm->node_list6[i].right = (i == NUM_NODES_6 - 1) ? NULL : &(shm->node_list6[i+1]); + } + shm->free_nodes6 = &(shm->node_list6[0]); + + tree->root = NODEALLOC6(); + if (!tree->root) { + printf("%s(): allocation failed!\n", __FUNCTION__); + return -1; + } + + tree->root->flags = 0; + tree->root->left = NULL; + tree->root->right = NULL; + + return 0; +} + +int ofp_rtl_root_init(struct ofp_rtl_tree *tree, uint16_t vrf) +{ + tree->root = NODEALLOC(); + if (!tree->root) { + printf("%s(): allocation failed!\n", __FUNCTION__); + return -1; + } + + tree->root->flags = 0; + tree->root->left = NULL; + tree->root->right = NULL; + tree->vrf = vrf; + + return 0; +} + + +/* __attribute__((optimize("O0"))) */ +struct ofp_nh_entry * +ofp_rtl_insert(struct ofp_rtl_tree *tree, uint32_t addr_be, + uint32_t masklen, struct ofp_nh_entry *data) +{ + struct ofp_rtl_node *node; + struct ofp_rtl_node *last = NULL; + uint32_t depth; + uint32_t mask = 0x80000000; + uint32_t addr = (odp_be_to_cpu_32(addr_be)) & ((~0)<<(32-masklen)); + + depth = 0; + node = tree->root; + while (depth < masklen && node) { + last = node; + if (addr & mask) { + node = node->right; + } else { + node = node->left; + } + depth++; + mask >>= 1; + } + + if (node) { + node->data[0] = *data; + node->flags = OFP_RTL_FLAGS_VALID_DATA; + return NULL; + } + + node = NODEALLOC(); + if (!node) + return NULL;//tree; + memset(node, 0, sizeof(*node)); + + node->left = NULL; + node->right = NULL; + node->flags = OFP_RTL_FLAGS_VALID_DATA; + node->data[0] = *data; + + mask = (1 << (32 - masklen)); + while (depth < masklen) { + struct ofp_rtl_node *tmp; + + tmp = NODEALLOC(); + if (!tmp) + goto nomem; + memset(tmp, 0, sizeof(*tmp)); + + if (addr & mask) { + tmp->right = node; + tmp->left = NULL; + } else { + tmp->left = node; + tmp->right = NULL; + } + node = tmp; + mask <<= 1; + depth++; + } + + if (!last) OFP_OOPS("!last"); + if (addr & mask) { + last->right = node; + } else { + last->left = node; + } + + return NULL; + + nomem: + while(node) { + struct ofp_rtl_node *tmp; + + mask >>= 1; + if (addr & mask) { + tmp = node->right; + NODEFREE(node); + } else { + tmp = node->left; + NODEFREE(node); + } + node = tmp; + } + + return NULL; //tree; +} + + +struct ofp_nh_entry * +ofp_rtl_search_exact(struct ofp_rtl_tree *tree, uint32_t addr_be, uint32_t masklen) +{ + struct ofp_rtl_node *node; + uint32_t depth; + uint32_t mask = 0x80000000; + uint32_t addr = odp_be_to_cpu_32(addr_be); + + depth = 0; + node = tree->root; + while (depth < masklen && node) { + shm->global_stack[depth] = node; + if (addr & mask) { + node = node->right; + } else { + node = node->left; + } + depth++; + mask >>= 1; + } + + if (!node) + return NULL; + + return &node->data[0]; +} + +struct ofp_nh_entry * +ofp_rtl_remove(struct ofp_rtl_tree *tree, uint32_t addr_be, uint32_t masklen) +{ + struct ofp_rtl_node *node; + struct ofp_rtl_node **stack = shm->global_stack; + uint32_t depth; + uint32_t mask = 0x80000000; + void *data; + uint32_t addr = odp_be_to_cpu_32(addr_be); + + depth = 0; + node = tree->root; + while (depth < masklen && node) { + stack[depth] = node; + if (addr & mask) { + node = node->right; + } else { + node = node->left; + } + depth++; + mask >>= 1; + } + + if (!node || !(node->flags & OFP_RTL_FLAGS_VALID_DATA)) + return NULL; + + data = &node->data; + node->flags = 0; + + if (node->left || node->right) { + return data; + } + + if (!depth) + return data; + + NODEFREE(node); + + mask = 1 << (32 - depth); + depth--; + do { + if (addr & mask) { + stack[depth]->right = NULL; + if (stack[depth]->left || (stack[depth]->flags & OFP_RTL_FLAGS_VALID_DATA)) { + break; + } + } else { + stack[depth]->left = NULL; + if (stack[depth]->right || (stack[depth]->flags & OFP_RTL_FLAGS_VALID_DATA)) { + break; + } + } + + if (depth == 0) + break; + + NODEFREE(stack[depth]); + depth--; + mask <<= 1; + } while (1); + + return data; +} + +void ofp_rtl_destroy(struct ofp_rtl_tree *tree, void (*func)(void *data)) +{ + struct ofp_rtl_node *stack[OFP_RTL_MAXDEPTH + 1]; + struct ofp_rtl_node *node; + int depth = 0; + + node = tree->root; + + for (;;) { + if (depth == OFP_RTL_MAXDEPTH + 1) OFP_OOPS("rtl maxdetph exceeded"); + + if (!node->left && !node->right) { + if (func && (node->flags & OFP_RTL_FLAGS_VALID_DATA)) + func(&node->data); + NODEFREE(node); + depth--; + if (depth < 0) break; + + if (stack[depth]->left == node) + stack[depth]->left = NULL; + else if (stack[depth]->right == node) + stack[depth]->right = NULL; + + node = stack[depth]; + } else { + stack[depth++] = node; + + if (node->left) + node = node->left; + else + node = node->right; + } + } + + tree->root = NULL; +} + +static void traverse(int fd, struct ofp_rtl_node *node, + void (*func)(int fd, uint32_t key, int level, struct ofp_nh_entry *data), + uint32_t key, int level) +{ + if (!node) + return; + + //printf("leaf=%p flags=0x%x\n", node, node->flags); + if (func && (node->flags & OFP_RTL_FLAGS_VALID_DATA)) + func(fd, key, level, &(node->data[0])); + + //printf("left=%p right=%p\n", node->left, node->right); + traverse(fd, node->left, func, key, level+1); + if (node->right) key |= 0x80000000 >> level; + traverse(fd, node->right, func, key, level+1); +} + +void ofp_rtl_traverse(int fd, struct ofp_rtl_tree *tree, + void (*func)(int fd, uint32_t key, int level, struct ofp_nh_entry *data)) +{ + traverse(fd, tree->root, func, 0, 0); +} + +struct ofp_nh6_entry * +ofp_rtl_insert6(struct ofp_rtl6_tree *tree, uint8_t *addr, + uint32_t masklen, struct ofp_nh6_entry *data) +{ + struct ofp_rtl6_node *node; + struct ofp_rtl6_node *last = NULL; + uint32_t depth; + uint32_t bit = 0; + + depth = 0; + node = tree->root; + while (depth < masklen && node) { + last = node; + if (ofp_rt_bit_set(addr, bit)) { + node = node->right; + } else { + node = node->left; + } + depth++; + bit++; + } + + if (node) + return &node->data; + + node = NODEALLOC6(); + if (!node) + return NULL;//tree; + memset(node, 0, sizeof(*node)); + + node->left = NULL; + node->right = NULL; + node->flags = OFP_RTL_FLAGS_VALID_DATA; + node->data = *data; + + bit = masklen - 1; + while (depth < masklen) { + struct ofp_rtl6_node *tmp; + + tmp = NODEALLOC6(); + if (!tmp) + goto nomem; + memset(tmp, 0, sizeof(*tmp)); + + if (ofp_rt_bit_set(addr, bit)) { + tmp->right = node; + tmp->left = NULL; + } else { + tmp->left = node; + tmp->right = NULL; + } + node = tmp; + bit--; + depth++; + } + + if (!last) OFP_OOPS("!last"); + if (ofp_rt_bit_set(addr, bit)) { + last->right = node; + } else { + last->left = node; + } + + return NULL; + + nomem: + while(node) { + struct ofp_rtl6_node *tmp; + + bit++; + if (ofp_rt_bit_set(addr, bit)) { + tmp = node->right; + NODEFREE6(node); + } else { + tmp = node->left; + NODEFREE6(node); + } + node = tmp; + } + + return NULL; //tree; +} + +struct ofp_nh6_entry * +ofp_rtl_remove6(struct ofp_rtl6_tree *tree, uint8_t *addr, uint32_t masklen) +{ + struct ofp_rtl6_node *node; + struct ofp_rtl6_node **stack = shm->global_stack6; + uint32_t depth; + void *data; + int bit = 0; + + depth = 0; + node = tree->root; + while (depth < masklen && node) { + stack[depth] = node; + if (ofp_rt_bit_set(addr, bit)) { + node = node->right; + } else { + node = node->left; + } + depth++; + bit++; + } + + if (!node || !(node->flags & OFP_RTL_FLAGS_VALID_DATA)) + return NULL; + + data = &node->data; + node->flags = 0; + + if (node->left || node->right) { + return data; + } + + if (!depth) + return data; + + NODEFREE6(node); + + bit = masklen - 1; + depth--; + do { + if (ofp_rt_bit_set(addr, bit)) { + stack[depth]->right = NULL; + if (stack[depth]->left || (stack[depth]->flags & OFP_RTL_FLAGS_VALID_DATA)) { + break; + } + } else { + stack[depth]->left = NULL; + if (stack[depth]->right || (stack[depth]->flags & OFP_RTL_FLAGS_VALID_DATA)) { + break; + } + } + + if (depth == 0) + break; + + NODEFREE6(stack[depth]); + depth--; + bit--; + } while (1); + + return data; +} + +#if 0 +static void tr(int fd, struct ofp_rtl6_node *n, int level) +{ + ofp_sendf(fd, "level=%d node=%d left=%d right=%d flags=%d\r\n", level, NUM(n), + NUM(n->left), NUM(n->right), n->flags); + if (n->left) { + tr(fd, n->left, level+1); + } + if (n->right) { + tr(fd, n->right, level+1); + } +} +#endif + +void ofp_rtl_traverse6(int fd, struct ofp_rtl6_tree *tree, + void (*func)(int fd, uint8_t *key, int level, struct ofp_nh6_entry *data)) +{ + char key[16]; + memset(key, 0, sizeof(key)); +#define VISITED_LEFT 1 +#define VISITED_RIGHT 2 + char visited[129]; + struct ofp_rtl6_node *stack[129]; + struct ofp_rtl6_node *node = tree->root; + int depth = 0; + + memset(key, 0, sizeof(key)); + memset(visited, 0, sizeof(visited)); + + for (;;) { + if (func && (node->flags & OFP_RTL_FLAGS_VALID_DATA) && visited[depth] == 0) { + func(fd, (uint8_t*)key, depth, &(node->data)); + } + + stack[depth] = node; + if (node->left && (visited[depth] & VISITED_LEFT) == 0) { + node = node->left; + ofp_rt_reset_bit((uint8_t*)key, depth); + visited[depth++] = VISITED_LEFT; + } else if (node->right && (visited[depth] & VISITED_RIGHT) == 0) { + node = node->right; + ofp_rt_set_bit((uint8_t*)key, depth); + visited[depth++] |= VISITED_RIGHT; + } else { + visited[depth] = 0; + ofp_rt_reset_bit((uint8_t*)key, depth); + depth--; + if (depth < 0) + break; + node = stack[depth]; + } + } +} + +void ofp_print_rt_stat(int fd) +{ + ofp_sendf(fd, "rt tree alloc now=%d max=%d total=%d\r\n", + shm->nodes_allocated, shm->max_nodes_allocated, NUM_NODES); + ofp_sendf(fd, "rt6 tree alloc now=%d max=%d total=%d\r\n", + shm->nodes_allocated6, shm->max_nodes_allocated6, NUM_NODES_6); +} + +void ofp_rt_lookup_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + /* Reserve memory for args from shared mem */ + shm_h = odp_shm_reserve("OfpRtlookupShMem", + sizeof(*shm), ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpRtlookupShMem shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); +} + +void ofp_rt_lookup_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpRtlookupShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpRtlookupShMem shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} diff --git a/src/ofp_rt_mtrie_lookup.c b/src/ofp_rt_mtrie_lookup.c new file mode 100644 index 00000000..7b3cb5fd --- /dev/null +++ b/src/ofp_rt_mtrie_lookup.c @@ -0,0 +1,623 @@ +/* Copyright (c) 2014, ENEA Software AB + * Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* + * + * MTRIE data structure contains forwarding information. + * + */ + +#include +#include +#include +#include "ofpi_util.h" +#include "ofpi.h" +#include "odp/rwlock.h" +#include "ofpi_rt_lookup.h" +#include "ofpi_log.h" + +/* + * Shared data + */ +struct ofp_rt_lookup_mem { +#define NUM_NODES 1024 +#define NUM_NODES_LARGE 128 + struct ofp_rtl_node small_list[NUM_NODES][1<root == 0) { + node->next = shm->free_small; + shm->free_small = node; + shm->nodes_allocated--; + } +} + +static struct ofp_rtl_node *NODEALLOC(void) +{ + struct ofp_rtl_node *p = shm->free_small; + if (shm->free_small) { + shm->free_small = shm->free_small->next; + shm->nodes_allocated++; + } + if (shm->nodes_allocated > shm->max_nodes_allocated) + shm->max_nodes_allocated = shm->nodes_allocated; + + p->root = 0; + p->ref = 0; + + return p; +} + +int ofp_rtl_init(struct ofp_rtl_tree *tree) +{ + int i; + + for (i = 0; i < NUM_NODES; i++) { + shm->small_list[i][0].next = (i == NUM_NODES - 1) ? NULL : &(shm->small_list[i+1][0]); + } + shm->free_small = shm->small_list[0]; + + for (i = 0; i < NUM_NODES_LARGE; i++) { + shm->large_list[i][0].next = (i == NUM_NODES_LARGE - 1) ? NULL : &(shm->large_list[i+1][0]); + } + shm->free_large = shm->large_list[0]; + + + return ofp_rtl_root_init(tree, 0); +} + +int ofp_rtl_root_init(struct ofp_rtl_tree *tree, uint16_t vrf) +{ + tree->root = shm->free_large; + if (shm->free_large) + shm->free_large = shm->free_large->next; + + if (!tree->root) { + printf("%s(): allocation failed!\n", __FUNCTION__); + return -1; + } + + tree->root->flags = 0; + tree->root->next = NULL; + tree->root->root = 1; + tree->root->ref = 0; + tree->vrf = vrf; + + return 0; +} + +static void NODEFREE6(struct ofp_rtl6_node *node) +{ + node->left = NULL; + node->right = shm->free_nodes6; + if (shm->free_nodes6) shm->free_nodes6->left = node; + shm->free_nodes6 = node; + shm->nodes_allocated6--; +} + +static struct ofp_rtl6_node *NODEALLOC6(void) +{ + struct ofp_rtl6_node *p = shm->free_nodes6; + if (shm->free_nodes6) { + shm->free_nodes6->left = NULL; + shm->free_nodes6 = shm->free_nodes6->right; + shm->nodes_allocated6++; + if (shm->nodes_allocated6 > shm->max_nodes_allocated6) + shm->max_nodes_allocated6 = shm->nodes_allocated6; + } + return p; +} + +#define OFP_OOPS(_s) printf(_s) + + +int ofp_rtl6_init(struct ofp_rtl6_tree *tree) +{ + int i; + + for (i = 0; i < NUM_NODES_6; i++) { + shm->node_list6[i].left = (i == 0) ? NULL : &(shm->node_list6[i-1]); + shm->node_list6[i].right = (i == NUM_NODES_6 - 1) ? NULL : &(shm->node_list6[i+1]); + } + shm->free_nodes6 = &(shm->node_list6[0]); + + tree->root = NODEALLOC6(); + if (!tree->root) { + printf("%s(): allocation failed!\n", __FUNCTION__); + return -1; + } + + tree->root->flags = 0; + tree->root->left = NULL; + tree->root->right = NULL; + + return 0; +} + + +static int16_t ofp_rt_rule_search(uint16_t vrf, uint32_t addr, uint32_t masklen) { + uint32_t index; + for (index = 0; index < ROUTE_LIST_SIZE; index++) + if (shm->rules[index].used && + shm->rules[index].vrf == vrf && + shm->rules[index].addr == addr && + shm->rules[index].masklen == masklen) + return index; + + return -1; +} + +void ofp_rt_rule_add(uint16_t vrf, uint32_t addr, uint32_t masklen, struct ofp_nh_entry *data) +{ + uint32_t index; + int32_t reserved = -1; + if ((reserved = ofp_rt_rule_search(vrf, addr, masklen)) == -1) { + for (index = 0; index < ROUTE_LIST_SIZE; index++) + if (shm->rules[index].used == 0) { + reserved = index; + break; + } + } + + if (reserved == -1) { + printf("%s(): route rule allocation failed!\n", __FUNCTION__); + return; + } + + shm->rules[reserved].used = 1; + shm->rules[reserved].masklen = masklen; + shm->rules[reserved].addr = addr; + shm->rules[reserved].vrf = vrf; + shm->rules[reserved].data[0] = *data; +} + +void ofp_rt_rule_remove(uint16_t vrf, uint32_t addr, uint32_t masklen) +{ + int32_t reserved = ofp_rt_rule_search(vrf, addr, masklen); + + if (reserved == -1) { + printf("%s(): route rule remove failed!\n", __FUNCTION__); + return; + } + + shm->rules[reserved].used = 0; +} + + +void ofp_rt_rule_print(int fd, uint16_t vrf, + void (*func)(int fd, uint32_t key, int level, struct ofp_nh_entry *data)) +{ + uint32_t index; + for (index = 0; index < ROUTE_LIST_SIZE; index++) + if (shm->rules[index].used && shm->rules[index].vrf == vrf) + func(fd, odp_be_to_cpu_32(shm->rules[index].addr), + shm->rules[index].masklen, + &shm->rules[index].data[0]); +} + +int32_t ofp_rt_rule_find_prefix_match(uint16_t vrf, uint32_t addr, uint8_t masklen, uint8_t low) { + uint32_t index; + uint8_t low_int = low + 1; + int32_t reserved = -1; + for (index = 0; index < ROUTE_LIST_SIZE; index++) { + if (shm->rules[index].vrf == vrf && + shm->rules[index].masklen >= low_int && + masklen >= shm->rules[index].masklen && + shm->rules[index].addr >> (IPV4_LENGTH - shm->rules[index].masklen) == + addr >> (IPV4_LENGTH - shm->rules[index].masklen)) + { + /* search route rule with prefix_len in the same interval, + * largest prefix_len that is smaller or equal than what we removed, + * same route ipv4 address prefix */ + low_int = shm->rules[index].masklen; + reserved = index; + } + } + return reserved; +} + + +static inline uint32_t get_use_reference(struct ofp_rtl_node *node) +{ + return node->ref; +} + +static inline void inc_use_reference(struct ofp_rtl_node *node) +{ + node->ref++; +} + +static inline void dec_use_reference(struct ofp_rtl_node *node) +{ + if (--node->ref == 0) + NODEFREE(node); +} + + +struct ofp_nh_entry * +ofp_rtl_insert(struct ofp_rtl_tree *tree, uint32_t addr_be, + uint32_t masklen, struct ofp_nh_entry *data) +{ + struct ofp_rtl_node *elem, *node = tree->root; + uint32_t addr = (odp_be_to_cpu_32(addr_be)) & ((~0)<<(32-masklen)); + uint32_t low = 0, high = IPV4_FIRST_LEVEL; + + for (; high <= IPV4_LENGTH; low = high, high += IPV4_LEVEL) { + inc_use_reference(node); + if (masklen <= high) { + uint32_t addr_be_right = addr >> + (IPV4_LENGTH - masklen); + uint32_t shift_left = IPV4_LENGTH - masklen + low; + uint32_t shift_right = low + IPV4_LENGTH - high; + uint32_t index = + addr_be_right << shift_left >> shift_right; + + uint32_t index_end = + (addr_be_right + 1) << shift_left >> shift_right; + + if (index_end == 0) + index_end = 1 << ( high - low ); + + for (; index < index_end; index++) { + if (node[index].masklen <= masklen || node[index].masklen > high) { + node[index].data[0] = *data; + node[index].masklen = masklen; + } + } + break; + } + elem = &node[(addr << low) >> (low + IPV4_LENGTH - high)]; + + if (elem->next == NULL) + elem->next = NODEALLOC(); + + if (elem->masklen == 0) + elem->masklen = masklen; + + node = elem->next; + } + odp_sync_stores(); + + return 0; +} + +struct ofp_nh_entry * +ofp_rtl_remove(struct ofp_rtl_tree *tree, uint32_t addr_be, uint32_t masklen) +{ + struct ofp_rtl_node *elem, *node = tree->root; + uint32_t addr = (odp_be_to_cpu_32(addr_be)) & ((~0)<<(32-masklen)); + struct ofp_nh_entry *data; + uint32_t low = 0, high = IPV4_FIRST_LEVEL; + int32_t reserved = ofp_rt_rule_search(tree->vrf, addr_be, masklen); + int32_t insert = -1; + + if (reserved == -1) + return NULL; + + data = &shm->rules[reserved].data[0]; + + for (; high <= IPV4_LENGTH ; low = high, high += IPV4_LEVEL) { + dec_use_reference(node); + if (masklen <= high) { + uint32_t addr_be_right = addr >> + (IPV4_LENGTH - masklen); + uint32_t shift_left = IPV4_LENGTH - masklen + low; + uint32_t shift_right = low + IPV4_LENGTH - high; + uint32_t index = + addr_be_right << shift_left >> shift_right; + + uint32_t index_end = + (addr_be_right + 1) << shift_left >> shift_right; + + for (; index < index_end; index++) { + if (node[index].masklen == masklen && + !memcmp(&node[index].data, data, + sizeof(struct ofp_nh_entry))) { + if (node[index].next == NULL) + node[index].masklen = 0; + else + node[index].masklen = high + 1; + } + } + /* if exists, re-insert previous route that was overwritten, after cleanup*/ + insert = ofp_rt_rule_find_prefix_match(tree->vrf, addr, masklen, low); + break; + } + + elem = &node[(addr << low) >> (low + IPV4_LENGTH - high)]; + + if (elem->masklen != 0 /*&& elem->next != NULL*/) { + node = elem->next; + if (get_use_reference(node) == 1 && elem->masklen > high) { + /* next level will be freed so we update prefix_len to 0, + * if there is no leaf stored on the current elem */ + elem->masklen = 0; + elem->next = NULL; + } + } else + return NULL; + + } + odp_sync_stores(); + + if (insert != -1) + ofp_rtl_insert(tree, + shm->rules[insert].addr, + shm->rules[insert].masklen, + &shm->rules[insert].data[0]); + + return data; +} + + +inline struct ofp_nh_entry *ofp_rtl_search(struct ofp_rtl_tree *tree, uint32_t addr_be) +{ + struct ofp_nh_entry *nh = NULL; + struct ofp_rtl_node *elem, *node = tree->root; + uint32_t addr = odp_be_to_cpu_32(addr_be); + uint32_t low = 0, high = IPV4_FIRST_LEVEL; + + for (; high <= IPV4_LENGTH ; low = high, high += IPV4_LEVEL) { + elem = &node[(addr << low) >> (low + IPV4_LENGTH - high)]; + + if (elem->masklen == 0) + return nh; + else if (elem->masklen <= high) + nh = &elem->data[0]; + + if ((node = elem->next) == NULL) + return nh; + } + + return nh; +} + +struct ofp_nh6_entry * +ofp_rtl_insert6(struct ofp_rtl6_tree *tree, uint8_t *addr, + uint32_t masklen, struct ofp_nh6_entry *data) +{ + struct ofp_rtl6_node *node; + struct ofp_rtl6_node *last = NULL; + uint32_t depth; + uint32_t bit = 0; + + depth = 0; + node = tree->root; + while (depth < masklen && node) { + last = node; + if (ofp_rt_bit_set(addr, bit)) { + node = node->right; + } else { + node = node->left; + } + depth++; + bit++; + } + + if (node) + return &node->data; + + node = NODEALLOC6(); + if (!node) + return NULL;//tree; + memset(node, 0, sizeof(*node)); + + node->left = NULL; + node->right = NULL; + node->flags = OFP_RTL_FLAGS_VALID_DATA; + node->data = *data; + + bit = masklen - 1; + while (depth < masklen) { + struct ofp_rtl6_node *tmp; + + tmp = NODEALLOC6(); + if (!tmp) + goto nomem; + memset(tmp, 0, sizeof(*tmp)); + + if (ofp_rt_bit_set(addr, bit)) { + tmp->right = node; + tmp->left = NULL; + } else { + tmp->left = node; + tmp->right = NULL; + } + node = tmp; + bit--; + depth++; + } + + if (!last) OFP_OOPS("!last"); + if (ofp_rt_bit_set(addr, bit)) { + last->right = node; + } else { + last->left = node; + } + + return NULL; + + nomem: + while(node) { + struct ofp_rtl6_node *tmp; + + bit++; + if (ofp_rt_bit_set(addr, bit)) { + tmp = node->right; + NODEFREE6(node); + } else { + tmp = node->left; + NODEFREE6(node); + } + node = tmp; + } + + return NULL; //tree; +} + +struct ofp_nh6_entry * +ofp_rtl_remove6(struct ofp_rtl6_tree *tree, uint8_t *addr, uint32_t masklen) +{ + struct ofp_rtl6_node *node; + struct ofp_rtl6_node **stack = shm->global_stack6; + uint32_t depth; + void *data; + int bit = 0; + + depth = 0; + node = tree->root; + while (depth < masklen && node) { + stack[depth] = node; + if (ofp_rt_bit_set(addr, bit)) { + node = node->right; + } else { + node = node->left; + } + depth++; + bit++; + } + + if (!node || !(node->flags & OFP_RTL_FLAGS_VALID_DATA)) + return NULL; + + data = &node->data; + node->flags = 0; + + if (node->left || node->right) { + return data; + } + + if (!depth) + return data; + + NODEFREE6(node); + + bit = masklen - 1; + depth--; + do { + if (ofp_rt_bit_set(addr, bit)) { + stack[depth]->right = NULL; + if (stack[depth]->left || (stack[depth]->flags & OFP_RTL_FLAGS_VALID_DATA)) { + break; + } + } else { + stack[depth]->left = NULL; + if (stack[depth]->right || (stack[depth]->flags & OFP_RTL_FLAGS_VALID_DATA)) { + break; + } + } + + if (depth == 0) + break; + + NODEFREE6(stack[depth]); + depth--; + bit--; + } while (1); + + return data; +} + +void ofp_rtl_traverse6(int fd, struct ofp_rtl6_tree *tree, + void (*func)(int fd, uint8_t *key, int level, struct ofp_nh6_entry *data)) +{ + char key[16]; + memset(key, 0, sizeof(key)); +#define VISITED_LEFT 1 +#define VISITED_RIGHT 2 + char visited[129]; + struct ofp_rtl6_node *stack[129]; + struct ofp_rtl6_node *node = tree->root; + int depth = 0; + + memset(key, 0, sizeof(key)); + memset(visited, 0, sizeof(visited)); + + for (;;) { + if (func && (node->flags & OFP_RTL_FLAGS_VALID_DATA) && visited[depth] == 0) { + func(fd, (uint8_t*)key, depth, &(node->data)); + } + + stack[depth] = node; + if (node->left && (visited[depth] & VISITED_LEFT) == 0) { + node = node->left; + ofp_rt_reset_bit((uint8_t*)key, depth); + visited[depth++] = VISITED_LEFT; + } else if (node->right && (visited[depth] & VISITED_RIGHT) == 0) { + node = node->right; + ofp_rt_set_bit((uint8_t*)key, depth); + visited[depth++] |= VISITED_RIGHT; + } else { + visited[depth] = 0; + ofp_rt_reset_bit((uint8_t*)key, depth); + depth--; + if (depth < 0) + break; + node = stack[depth]; + } + } +} + +void ofp_print_rt_stat(int fd) +{ + ofp_sendf(fd, "rt tree alloc now=%d max=%d total=%d\r\n", + shm->nodes_allocated, shm->max_nodes_allocated, NUM_NODES); + ofp_sendf(fd, "rt6 tree alloc now=%d max=%d total=%d\r\n", + shm->nodes_allocated6, shm->max_nodes_allocated6, NUM_NODES_6); +} + +void ofp_rt_lookup_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + /* Reserve memory for args from shared mem */ + shm_h = odp_shm_reserve("OfpRtlookupShMem", + sizeof(*shm), ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpRtlookupShMem shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); +} + +void ofp_rt_lookup_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpRtlookupShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: OfpRtlookupShMem shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} diff --git a/src/ofp_stat.c b/src/ofp_stat.c new file mode 100644 index 00000000..57d30547 --- /dev/null +++ b/src/ofp_stat.c @@ -0,0 +1,66 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include + +#include + +#include "ofpi_log.h" +#include "ofpi_stat.h" + +typedef struct { + struct ofp_packet_stat ofp_packet_statistics; +} stat_shm_t; + +static __thread stat_shm_t *shm_stat = NULL; + +unsigned long int ofp_stat_flags = 0; + +struct ofp_packet_stat *ofp_get_packet_statistics(void) +{ + if (!shm_stat) + return NULL; + + return &(shm_stat->ofp_packet_statistics); +} + +void ofp_stat_alloc_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_reserve("OfpStatShMem", sizeof(*shm_stat), + ODP_CACHE_LINE_SIZE, 0); + shm_stat = odp_shm_addr(shm_h); + + if (shm_stat == NULL) + OFP_ABORT("Error: Stat shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + + memset(shm_stat, 0, sizeof(*shm_stat)); +} + +void ofp_stat_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpStatShMem"); + shm_stat = odp_shm_addr(shm_h); + + if (shm_stat == NULL) + OFP_ABORT("Error: Stat shared mem lookup failed on core: %u.\n", + odp_cpu_id()); +} + +void ofp_set_stat_flags(unsigned long int flags) +{ + ofp_stat_flags = flags; +} +unsigned long int ofp_get_stat_flags(void) +{ + return ofp_stat_flags; +} diff --git a/src/ofp_subr_hash.c b/src/ofp_subr_hash.c new file mode 100644 index 00000000..9a97c37d --- /dev/null +++ b/src/ofp_subr_hash.c @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include "odp.h" +#include "ofpi_queue.h" +#include "ofpi_socket.h" +#include "ofpi_systm.h" +#include "ofpi_util.h" + +/* + * General routine to allocate a hash table with control of memory flags. + */ +void * +ofp_hashinit_flags(int elements, void *type, uint64_t *hashmask, + int flags) +{ + long hashsize; + OFP_LIST_HEAD(generic, generic) *hashtbl; + int i; + + (void)type; + + KASSERT(elements > 0, ("%s: bad elements", __func__)); + /* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */ + KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT), + ("Bad flags (0x%x) passed to ofp_hashinit_flags", flags)); + + for (hashsize = 1; hashsize <= elements; hashsize <<= 1) + continue; + hashsize >>= 1; + + if (flags & HASH_NOWAIT) + hashtbl = malloc((uint64_t)hashsize * sizeof(*hashtbl)); + else + hashtbl = malloc((uint64_t)hashsize * sizeof(*hashtbl)); + + if (hashtbl != NULL) { + for (i = 0; i < hashsize; i++) + OFP_LIST_INIT(&hashtbl[i]); + *hashmask = hashsize - 1; + } + return (hashtbl); +} + +/* + * Allocate and initialize a hash table with default flag: may sleep. + */ +void * +ofp_hashinit(int elements, void *type, uint64_t *hashmask) +{ + + return (ofp_hashinit_flags(elements, type, hashmask, HASH_WAITOK)); +} + +void +ofp_hashdestroy(void *vhashtbl, void *type, uint64_t hashmask) +{ + (void)type; + + OFP_LIST_HEAD(generic, generic) *hashtbl, *hp; + + hashtbl = vhashtbl; + for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) + KASSERT(OFP_LIST_EMPTY(hp), ("%s: hash not empty", __func__)); + free(hashtbl); +} + +static const int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, + 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, + 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; +#define NPRIMES (sizeof(primes) / sizeof(primes[0])) + +/* + * General routine to allocate a prime number sized hash table. + */ +void * +ofp_phashinit(int elements, void *type, uint64_t *nentries) +{ + long hashsize; + OFP_LIST_HEAD(generic, generic) *hashtbl; + int i; + + (void)type; + + KASSERT(elements > 0, ("%s: bad elements", __func__)); + for (i = 1, hashsize = primes[1]; hashsize <= elements;) { + i++; + if (i == NPRIMES) + break; + hashsize = primes[i]; + } + hashsize = primes[i - 1]; + hashtbl = malloc((uint64_t)hashsize * sizeof(*hashtbl)); + for (i = 0; i < hashsize; i++) + OFP_LIST_INIT(&hashtbl[i]); + *nentries = hashsize; + return (hashtbl); +} diff --git a/src/ofp_sys_socket.c b/src/ofp_sys_socket.c new file mode 100644 index 00000000..bf05e116 --- /dev/null +++ b/src/ofp_sys_socket.c @@ -0,0 +1,143 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sys_socket.c 8.1 (Berkeley) 6/10/93 + */ + +#include "ofpi_protosw.h" +#include "ofpi_socketvar.h" +#include "ofpi_ioctl.h" +#include "ofpi_sockstate.h" +#include "ofpi_errno.h" + +int +ofp_soo_ioctl(struct socket *so, uint32_t cmd, void *data, struct ofp_ucred *active_cred, + struct thread *td) +{ + int error = 0; + (void)active_cred; + + switch (cmd) { + case OFP_FIONBIO: + OFP_SOCK_LOCK(so); + if (*(int *)data) + so->so_state |= SS_NBIO; + else + so->so_state &= ~SS_NBIO; + OFP_SOCK_UNLOCK(so); + break; + + case OFP_FIOASYNC: + /* + * XXXRW: This code separately acquires OFP_SOCK_LOCK(so) and + * SOCKBUF_LOCK(&so->so_rcv) even though they are the same + * mutex to avoid introducing the assumption that they are + * the same. + */ + if (*(int *)data) { + OFP_SOCK_LOCK(so); + so->so_state |= SS_ASYNC; + OFP_SOCK_UNLOCK(so); + SOCKBUF_LOCK(&so->so_rcv); + so->so_rcv.sb_flags |= SB_ASYNC; + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_flags |= SB_ASYNC; + SOCKBUF_UNLOCK(&so->so_snd); + } else { + OFP_SOCK_LOCK(so); + so->so_state &= ~SS_ASYNC; + OFP_SOCK_UNLOCK(so); + SOCKBUF_LOCK(&so->so_rcv); + so->so_rcv.sb_flags &= ~SB_ASYNC; + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_flags &= ~SB_ASYNC; + SOCKBUF_UNLOCK(&so->so_snd); + } + break; + + case OFP_FIONREAD: + /* Unlocked read. */ + *(int *)data = so->so_rcv.sb_cc; + break; + + case OFP_FIONWRITE: + /* Unlocked read. */ + *(int *)data = so->so_snd.sb_cc; + break; + + case OFP_FIONSPACE: + if ((so->so_snd.sb_hiwat < so->so_snd.sb_cc) || + (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt)) + *(int *)data = 0; + else + *(int *)data = sbspace(&so->so_snd); + break; +#if 0 + case OFP_FIOSETOWN: + error = fsetown(*(int *)data, &so->so_sigio); + break; + + case OFP_FIOGETOWN: + *(int *)data = fgetown(&so->so_sigio); + break; + + case OFP_SIOCSPGRP: + error = fsetown(-(*(int *)data), &so->so_sigio); + break; + + case OFP_SIOCGPGRP: + *(int *)data = -fgetown(&so->so_sigio); + break; +#endif + case OFP_SIOCATMARK: + /* Unlocked read. */ + *(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0; + break; + + /* Interface specific ioctls */ + case OFP_SIOCGIFCONF: + case OFP_OSIOCGIFCONF: + case OFP_SIOCIFCREATE: + case OFP_SIOCIFCREATE2: + case OFP_SIOCIFDESTROY: + case OFP_SIOCIFGCLONERS: + case OFP_SIOCGIFGMEMB: + error = OFP_EOPNOTSUPP; + break; + + default: + error = ((*so->so_proto->pr_usrreqs->pru_control) + (so, cmd, data, 0, td)); + break; + } + return (error); +} diff --git a/src/ofp_syscalls.c b/src/ofp_syscalls.c new file mode 100644 index 00000000..0d33b6b4 --- /dev/null +++ b/src/ofp_syscalls.c @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +#include "odp.h" + +#include "ofpi_errno.h" +#include "ofpi_log.h" +#include "ofpi_util.h" +#include "ofpi_in.h" +#include "ofpi_ip.h" +#include "ofpi_ip6.h" +#include "ofpi_udp.h" +#include "ofpi_icmp.h" + +#include "ofpi_socketvar.h" +#include "ofpi_sockbuf.h" +#include "ofpi_socket.h" +#include "ofpi_sockstate.h" +#include "ofpi_in_pcb.h" +#include "ofpi_udp_var.h" +#include "ofpi_protosw.h" +#include "ofpi_ioctl.h" +#include "ofpi_route.h" +#include "api/ofp_types.h" + +int +ofp_socket(int domain, int type, int protocol) +{ + struct socket *so; + int error; + struct thread td; + + td.td_proc.p_fibnum = 0; + td.td_ucred = NULL; + error = ofp_socreate(domain, &so, type, protocol, &td); + if (error) { + ofp_errno = error; + return -1; + } + return so->so_number; +} + +int +ofp_close(int sockfd) +{ + struct socket *so = ofp_get_sock_by_fd(sockfd); + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + ofp_errno = ofp_soclose(so); + return ofp_errno ? -1 : 0; +} + +int +ofp_shutdown(int sockfd, int how) +{ + struct socket *so = ofp_get_sock_by_fd(sockfd); + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + ofp_errno = ofp_soshutdown(so, how); + return ofp_errno ? -1 : 0; +} + +int +ofp_bind(int sockfd, const struct ofp_sockaddr *addr, ofp_socklen_t addrlen) +{ + struct thread td; + union ofp_sockaddr_store nonconstaddr; + struct socket *so = ofp_get_sock_by_fd(sockfd); + + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + memcpy(&nonconstaddr, addr, addrlen); + + td.td_proc.p_fibnum = 0; + td.td_ucred = NULL; + ofp_errno = ofp_sobind(so, (struct ofp_sockaddr *)&nonconstaddr, + &td); + return ofp_errno ? -1 : 0; +} + +int +ofp_connect(int sockfd, const struct ofp_sockaddr *addr, ofp_socklen_t addrlen) +{ + struct thread td; + union ofp_sockaddr_store nonconstaddr; + struct socket *so = ofp_get_sock_by_fd(sockfd); + + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + if (!addr || !addrlen) { + ofp_errno = OFP_EINVAL; + return -1; + } + + memcpy(&nonconstaddr, addr, addrlen); + + td.td_proc.p_fibnum = 0; + td.td_ucred = NULL; + ofp_errno = ofp_soconnect(so, (struct ofp_sockaddr *)&nonconstaddr, &td); + return ofp_errno ? -1 : 0; +} + +ofp_ssize_t +ofp_sendto(int sockfd, const void *buf, size_t len, int flags, + const struct ofp_sockaddr *dest_addr, ofp_socklen_t addrlen) +{ + struct ofp_iovec iovec; + struct uio uio; + struct thread td; + union ofp_sockaddr_store nonconstaddr; + struct socket *so = ofp_get_sock_by_fd(sockfd); + + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + if (dest_addr && addrlen) + memcpy(&nonconstaddr, dest_addr, addrlen); + + uio.uio_iov = &iovec; + uio.uio_iovcnt = 1; + uio.uio_resid = len; + uio.uio_offset = 0; + iovec.iov_base = (void *)(uintptr_t)buf; + iovec.iov_len = len; + + td.td_proc.p_fibnum = 0; + td.td_ucred = NULL; + + ofp_errno = ofp_sosend(so, + (dest_addr && addrlen)? (struct ofp_sockaddr *)&nonconstaddr : NULL, + &uio, ODP_PACKET_INVALID, ODP_PACKET_INVALID, flags, &td); + + if (ofp_errno) + return -1; + + return len - uio.uio_resid; +} + +ofp_ssize_t +ofp_send(int sockfd, const void *buf, size_t len, int flags) +{ + return ofp_sendto(sockfd, buf, len, flags, NULL, 0); +} + +ofp_ssize_t +ofp_recvfrom(int sockfd, void *buf, size_t len, int flags, + struct ofp_sockaddr *src_addr, ofp_socklen_t *addrlen) +{ + struct ofp_iovec iovec; + struct uio uio; + struct socket *so = ofp_get_sock_by_fd(sockfd); + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + uio.uio_iov = &iovec; + uio.uio_iovcnt = 1; + uio.uio_resid = len; + iovec.iov_base = buf; + iovec.iov_len = len; + + ofp_errno = ofp_soreceive(so, &src_addr, &uio, NULL, NULL, &flags); + if (ofp_errno) + return -1; + if (src_addr && addrlen) + *addrlen = src_addr->sa_len; + return (len - uio.uio_resid); +} + +ofp_ssize_t +ofp_recv(int sockfd, void *buf, size_t len, int flags) +{ + return ofp_recvfrom(sockfd, buf, len, flags, NULL, 0); +} + +int +ofp_listen(int sockfd, int backlog) +{ + struct thread td; + struct socket *so = ofp_get_sock_by_fd(sockfd); + + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + td.td_proc.p_fibnum = 0; + td.td_ucred = NULL; + + ofp_errno = ofp_solisten(so, backlog, &td); + if (ofp_errno) + return -1; + return 0; +} + +int +ofp_accept(int sockfd, struct ofp_sockaddr *addr, ofp_socklen_t *addrlen) +{ + struct ofp_sockaddr *sa = NULL; + struct socket *so, *head = ofp_get_sock_by_fd(sockfd); + if (!head) { + ofp_errno = OFP_EBADF; + return -1; + } + + if ((head->so_options & OFP_SO_ACCEPTCONN) == 0) { + ofp_errno = OFP_EINVAL; + return -1; + } + + ACCEPT_LOCK(); + if ((head->so_state & SS_NBIO) && OFP_TAILQ_EMPTY(&head->so_comp)) { + ACCEPT_UNLOCK(); + ofp_errno = OFP_EWOULDBLOCK; + return -1; + } + + while (OFP_TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { + if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { + head->so_error = OFP_ECONNABORTED; + break; + } + if (ofp_msleep(&head->so_timeo, ofp_accept_mtx(), 0, + "accept", 0)) { + ACCEPT_UNLOCK(); + return -1; + } + } + + if (head->so_error) { + ofp_errno = head->so_error; + head->so_error = 0; + ACCEPT_UNLOCK(); + return -1; + } + so = OFP_TAILQ_FIRST(&head->so_comp); + KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); + KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); + + /* + * Before changing the flags on the socket, we have to bump the + * reference count. Otherwise, if the protocol calls ofp_sofree(), + * the socket will be released due to a zero refcount. + */ + OFP_SOCK_LOCK(so); /* soref() and so_state update */ + soref(so); /* file descriptor reference */ + + OFP_TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + so->so_state |= (head->so_state & SS_NBIO); + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + + OFP_SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + + /* connection has been removed from the listen queue */ + /*KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);*/ + + sa = 0; + ofp_errno = ofp_soaccept(so, &sa); + if (ofp_errno) { + /* + * return a namelen of zero for older code which might + * ignore the return value from accept. + */ + if (addr) + *addrlen = 0; + return -1; + } + + if (sa == NULL) { + if (addr) + *addrlen = 0; + return so->so_number; + } + + if (addr) { + /* check sa_len before it is destroyed */ + if (*addrlen > sa->sa_len) + *addrlen = sa->sa_len; + memcpy(addr, sa, *addrlen); + sa = NULL; + } + + free(sa); + return so->so_number; +} + +int +ofp_select(int nfds, ofp_fd_set *_readfds, ofp_fd_set *writefds, + ofp_fd_set *exceptfds, struct ofp_timeval *timeout) +{ + struct selinfo *sel; + int ret = 0; + struct ofp_fdset *readfds = OFP_GET_FD_SET(_readfds); + + (void)nfds; + (void)writefds; + (void)exceptfds; + + OFP_LIST_FOREACH(sel, readfds, si_list) { + struct socket *so = ofp_get_sock_by_fd(sel->si_socket); + if (so) + so->so_rcv.sb_flags |= SB_SEL; + } + + ofp_msleep((void *)readfds, NULL, 0, "select", + timeout->tv_sec*1000000 + timeout->tv_usec); + + OFP_LIST_FOREACH(sel, readfds, si_list) { + struct socket *so = ofp_get_sock_by_fd(sel->si_socket); + if (!so) + continue; + + so->so_rcv.sb_flags &= ~SB_SEL; + + if (so->so_options & OFP_SO_ACCEPTCONN) { + /* accepting socket */ + if (!(OFP_TAILQ_EMPTY(&so->so_comp))) + ret++; + } else { + /* listening socket */ + if (so->so_rcv.sb_cc > 0) + ret++; + } + } + + return ret; +} + +void +OFP_FD_CLR(int fd, ofp_fd_set *_set) +{ + struct selinfo *sel; + struct ofp_fdset *set = OFP_GET_FD_SET(_set); + + OFP_LIST_FOREACH(sel, set, si_list) { + if (sel->si_socket == fd) { + OFP_LIST_REMOVE(sel, si_list); + return; + } + } +} + +int +OFP_FD_ISSET(int fd, ofp_fd_set *_set) +{ + struct selinfo *sel; + struct ofp_fdset *set = OFP_GET_FD_SET(_set); + + OFP_LIST_FOREACH(sel, set, si_list) { + if (sel->si_socket == fd) { + struct socket *so = ofp_get_sock_by_fd(fd); + if (!so) + return 0; + + if (so->so_options & OFP_SO_ACCEPTCONN) { + /* accepting socket */ + return !(OFP_TAILQ_EMPTY(&so->so_comp)); + } else { + /* listening socket */ + return (so->so_rcv.sb_cc > 0); + } + } + } + return 0; +} + +void +OFP_FD_SET(int fd, ofp_fd_set *_set) +{ + struct selinfo *sel; + struct ofp_fdset *set = OFP_GET_FD_SET(_set); + struct socket *so = ofp_get_sock_by_fd(fd); + if (!so) + return; + + sel = &so->so_rcv.sb_sel; + sel->si_socket = fd; + sel->si_wakeup_channel = set; + OFP_LIST_INSERT_HEAD(set, sel, si_list); +} + +void +OFP_FD_ZERO(ofp_fd_set *_set) +{ + struct ofp_fdset *set = OFP_GET_FD_SET(_set); + + OFP_LIST_INIT(set); +} + +void *ofp_udp_packet_parse(odp_packet_t pkt, int *length, + struct ofp_sockaddr *addr, + ofp_socklen_t *addrlen) +{ + struct ofp_sockaddr *src_addr = NULL; + ofp_socklen_t src_len = 0; + struct ofp_udphdr *uh = + (struct ofp_udphdr *)odp_packet_l4_ptr(pkt, NULL); + int udplen = odp_be_to_cpu_16(uh->uh_ulen) - sizeof(*uh); + uint8_t *data = (uint8_t *)(uh + 1); + uint8_t *start = odp_packet_data(pkt); + + if (addr && addrlen) { + src_addr = (struct ofp_sockaddr *)odp_packet_l2_ptr(pkt, + NULL); + if (src_addr->sa_family == OFP_AF_INET) + src_len = sizeof(struct ofp_sockaddr_in); + else if (src_addr->sa_family == OFP_AF_INET6) + src_len = sizeof(struct ofp_sockaddr_in6); + else + return NULL; + + memcpy(addr, src_addr, min(*addrlen, src_len)); + *addrlen = src_len; + } + if (data > start) + odp_packet_pull_head(pkt, (uint32_t)(data - start)); + int pktlen = odp_packet_len(pkt); + if (pktlen > udplen) + odp_packet_pull_tail(pkt, (uint32_t)(pktlen - udplen)); + if (length) + *length = udplen; + + return data; +} + +ofp_ssize_t +ofp_udp_pkt_sendto(int sockfd, odp_packet_t pkt, + const struct ofp_sockaddr *dest_addr, ofp_socklen_t addrlen) +{ + struct ofp_sockaddr *addr = + (struct ofp_sockaddr *)(uintptr_t)dest_addr; + struct socket *so = ofp_get_sock_by_fd(sockfd); + (void)addrlen; + + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + ofp_errno = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, pkt, addr, ODP_PACKET_INVALID, NULL); + + if (ofp_errno) + return -1; + + return 0; +} + +int ofp_socket_sigevent(struct ofp_sigevent *ev) +{ + struct ofp_sock_sigval *ss = ev->ofp_sigev_value.sival_ptr; + struct socket *so = ofp_get_sock_by_fd(ss->sockfd); + + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + switch (ev->ofp_sigev_notify) { + case OFP_SIGEV_NONE: + return 0; + case OFP_SIGEV_HOOK: + break; + default: + ofp_errno = OFP_EINVAL; + return -1; + }; + + so->so_sigevent = *ev; + so->so_rcv.sb_socket = so; + so->so_snd.sb_socket = so; + + return 0; +} + +int ofp_getsockopt(int sockfd, int level, int optname, + void *optval, ofp_socklen_t *optlen) +{ + struct sockopt sopt; + struct socket *so = ofp_get_sock_by_fd(sockfd); + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = level; + sopt.sopt_name = optname; + sopt.sopt_val = (void *)(uintptr_t)optval; + sopt.sopt_valsize = *optlen; + + ofp_errno = ofp_sogetopt(so, &sopt); + + *optlen = sopt.sopt_valsize; + + if (ofp_errno) + return -1; + + return 0; +} + +int ofp_setsockopt(int sockfd, int level, int optname, + const void *optval, ofp_socklen_t optlen) +{ + struct sockopt sopt; + struct socket *so = ofp_get_sock_by_fd(sockfd); + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = level; + sopt.sopt_name = optname; + sopt.sopt_val = (void *)(uintptr_t)optval; + sopt.sopt_valsize = optlen; + + ofp_errno = ofp_sosetopt(so, &sopt); + if (ofp_errno) + return -1; + + return 0; +} + +static int get_port_vlan_by_name(const char *name, int *port, int *vlan) +{ + if (strncmp(name, OFP_IFNAME_PREFIX, + strlen(OFP_IFNAME_PREFIX)) == 0) { + int i; + const char *p = NULL; + for (i = 0; i < OFP_IFNAMSIZ && name[i]; i++) + if (name[i] == '.') { + p = &name[i]; + break; + } + if (p) + *vlan = atoi(p+1); + else + *vlan = 0; + + *port = atoi(name + strlen(OFP_IFNAME_PREFIX)); + return 0; + } else if (strncmp(name, OFP_GRE_IFNAME_PREFIX, + strlen(OFP_GRE_IFNAME_PREFIX)) == 0) { + *port = GRE_PORTS; + *vlan = atoi(name + strlen(OFP_GRE_IFNAME_PREFIX)); + return 0; + } + return -1; +} + +int ofp_ioctl(int sockfd, int request, ...) +{ + va_list ap; + void *data; + struct ofp_ifnet *iface = NULL; + struct socket *so = ofp_get_sock_by_fd(sockfd); + if (!so) { + ofp_errno = OFP_EBADF; + return -1; + } + + va_start(ap, request); + data = va_arg(ap, void *); + va_end(ap); + + if (request == (int)(OFP_SIOCGIFCONF)) { + ofp_errno = ((*so->so_proto->pr_usrreqs->pru_control) + (so, request, data, NULL, NULL)); + } else if (OFP_IOCGROUP(request) == 'i') { + /* All the interface requests start with interface name */ + int port, vlan = 0; + char *name = data; + + if (get_port_vlan_by_name(name, &port, &vlan) < 0) { + ofp_errno = OFP_EBADF; + return -1; + } + + if (request == (int)(OFP_SIOCSIFTUN)) { + struct ofp_in_tunreq *treq = data; + const char *retstr = + ofp_config_interface_up_tun + (port, vlan, treq->iftun_vrf, + treq->iftun_local_addr.sin_addr.s_addr, + treq->iftun_remote_addr.sin_addr.s_addr, + treq->iftun_p2p_addr.sin_addr.s_addr, + treq->iftun_addr.sin_addr.s_addr, 30); + if (!retstr) + ofp_errno = 0; + else + ofp_errno = OFP_EBADMSG; + } else { + iface = ofp_get_ifnet(port, vlan); + + if (so->so_proto->pr_usrreqs->pru_control) + ofp_errno = ((*so->so_proto->pr_usrreqs->pru_control) + (so, request, data, iface, NULL)); + else + ofp_errno = OFP_EOPNOTSUPP; + } + } else if (OFP_IOCGROUP(request) == 'r') { + int port = 0, vlan = 0; + struct ofp_rtentry *rt = data; + struct ofp_route_msg msg; + uint32_t dst = ((struct ofp_sockaddr_in *)&rt->rt_dst)->sin_addr.s_addr; + uint32_t mask = ((struct ofp_sockaddr_in *)&rt->rt_genmask)->sin_addr.s_addr; + uint32_t gw = ((struct ofp_sockaddr_in *)&rt->rt_gateway)->sin_addr.s_addr; + uint32_t maskcpu = odp_be_to_cpu_32(mask); + + if (request != (int)OFP_SIOCADDRT && + request != (int)OFP_SIOCDELRT) { + ofp_errno = OFP_EBADF; + return -1; + } + + if (request == (int)OFP_SIOCADDRT) { + if (rt->rt_dev) { + if (get_port_vlan_by_name(rt->rt_dev, &port, &vlan) < 0) { + ofp_errno = OFP_EBADF; + return -1; + } + } else { + uint32_t flags; + struct ofp_nh_entry *nh = + ofp_get_next_hop(rt->rt_vrf, gw, &flags); + if (!nh) { + ofp_errno = OFP_EBADF; + return -1; + } + port = nh->port; + vlan = nh->vlan; + } + } + + msg.vrf = rt->rt_vrf; + msg.dst = dst; + msg.masklen = 0; + + while (maskcpu) { + msg.masklen++; + maskcpu <<= 1; + } + + msg.gw = gw; + msg.port = port; + msg.vlan = vlan; + if (request == (int)OFP_SIOCADDRT) + msg.type = OFP_ROUTE_ADD; + else if (request == (int)OFP_SIOCDELRT) + msg.type = OFP_ROUTE_DEL; + ofp_set_route(&msg); + } else { + ofp_errno = ofp_soo_ioctl(so, request, data, NULL, NULL); + } + + if (ofp_errno) + return -1; + + return 0; +} diff --git a/src/ofp_sysctl.c b/src/ofp_sysctl.c new file mode 100644 index 00000000..6ef83100 --- /dev/null +++ b/src/ofp_sysctl.c @@ -0,0 +1,1705 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2015, Nokia Solutions and Networks + * Copyright (c) 2015, ENEA Software AB + * + * This code is derived from software contributed to Berkeley by + * Mike Karels at Berkeley Software Design, Inc. + * + * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD + * project, to make these variables more userfriendly. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 + */ + +#include + +#include "odp.h" +#include "ofpi.h" +#include "ofpi_log.h" +#include "ofpi_util.h" +#include "ofpi_errno.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" + +#define SYSCTL_DEBUG 1 + +#define malloc(_a, _b, _c) malloc(_a) +#define free(_a, _b) free(_a) + +//static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); +//static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); +//static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); + +/* + * The sysctllock protects the MIB tree. It also protects sysctl + * contexts used with dynamic sysctls. The sysctl_register_oid() and + * sysctl_unregister_oid() routines require the sysctllock to already + * be held, so the sysctl_lock() and sysctl_unlock() routines are + * provided for the few places in the kernel which need to use that + * API rather than using the dynamic API. Use of the dynamic API is + * strongly encouraged for most code. + * + * The sysctlmemlock is used to limit the amount of user memory wired for + * sysctl requests. This is implemented by serializing any userland + * sysctl requests larger than a single page via an exclusive lock. + */ +static odp_spinlock_t sysctllock; +static odp_spinlock_t sysctlmemlock; + +#define SYSCTL_XLOCK() odp_spinlock_lock(&sysctllock) +#define SYSCTL_XUNLOCK() odp_spinlock_unlock(&sysctllock) +#define SYSCTL_ASSERT_XLOCKED() //sx_assert(&sysctllock, SA_XLOCKED) +#define SYSCTL_INIT() odp_spinlock_init(&sysctllock) +#define SYSCTL_SLEEP(ch, wmesg, timo) do {} while (0) + // sx_sleep(ch, &sysctllock, 0, wmesg, timo) + +static int sysctl_root(OFP_SYSCTL_HANDLER_ARGS); + +struct ofp_sysctl_oid_list sysctl__children; /* root list */ + +OFP_SYSCTL_NODE(, 0, sysctl, OFP_CTLFLAG_RW, 0, + "Sysctl internal magic"); +OFP_SYSCTL_NODE(, OFP_CTL_NET, net, OFP_CTLFLAG_RW, 0, + "Network, (see socket.h)"); + +#if 0 +static int sysctl_remove_oid_locked(struct ofp_sysctl_oid *oidp, int del, + int recurse); +static int +copyout(const void *src, void *dst, size_t len) +{ + memcpy(dst, src, len); + return 0; +} + +static int +copyin(const void *src, void *dst, size_t len) +{ + memcpy(dst, src, len); + return 0; +} +#endif + +static struct ofp_sysctl_oid * +sysctl_find_oidname(const char *name, struct ofp_sysctl_oid_list *list) +{ + struct ofp_sysctl_oid *oidp; + + SYSCTL_ASSERT_XLOCKED(); + OFP_SLIST_FOREACH(oidp, list, oid_link) { + if (strcmp(oidp->oid_name, name) == 0) { + return (oidp); + } + } + return (NULL); +} + +/* + * Initialization of the MIB tree. + * + * Order by number in each list. + */ + +static void +sysctl_register_oid(struct ofp_sysctl_oid *oidp) +{ + struct ofp_sysctl_oid_list *parent = oidp->oid_parent; + struct ofp_sysctl_oid *p; + struct ofp_sysctl_oid *q; + + /* + * First check if another oid with the same name already + * exists in the parent's list. + */ + SYSCTL_ASSERT_XLOCKED(); + p = sysctl_find_oidname(oidp->oid_name, parent); + if (p != NULL) { + if ((p->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + p->oid_refcnt++; + return; + } else { + OFP_ERR("can't re-use a leaf (%s)!\n", p->oid_name); + return; + } + } + /* + * If this oid has a number OFP_OID_AUTO, give it a number which + * is greater than any current oid. + * NOTE: DO NOT change the starting value here, change it in + * , and make sure it is at least 256 to + * accomodate e.g. net.inet.raw as a static sysctl node. + */ + if (oidp->oid_number == OFP_OID_AUTO) { + static int newoid = OFP_CTL_AUTO_START; + + oidp->oid_number = newoid++; + if (newoid == 0x7fffffff) + panic("out of oids"); + } +#if 0 + else if (oidp->oid_number >= OFP_CTL_AUTO_START) { + /* do not panic; this happens when unregistering sysctl sets */ + OFP_ERR("static sysctl oid too high: %d", oidp->oid_number); + } +#endif + + /* + * Insert the oid into the parent's list in order. + */ + q = NULL; + OFP_SLIST_FOREACH(p, parent, oid_link) { + if (oidp->oid_number < p->oid_number) + break; + q = p; + } + if (q) + OFP_SLIST_INSERT_AFTER(q, oidp, oid_link); + else + OFP_SLIST_INSERT_HEAD(parent, oidp, oid_link); +} + +#if 0 +static void +sysctl_unregister_oid(struct ofp_sysctl_oid *oidp) +{ + struct ofp_sysctl_oid *p; + int error; + + SYSCTL_ASSERT_XLOCKED(); + error = OFP_ENOENT; + if (oidp->oid_number == OFP_OID_AUTO) { + error = OFP_EINVAL; + } else { + OFP_SLIST_FOREACH(p, oidp->oid_parent, oid_link) { + if (p == oidp) { + OFP_SLIST_REMOVE(oidp->oid_parent, oidp, + ofp_sysctl_oid, oid_link); + error = 0; + break; + } + } + } + + /* + * This can happen when a module fails to register and is + * being unloaded afterwards. It should not be a panic() + * for normal use. + */ + if (error) + OFP_ERR("%s: failed to unregister sysctl\n", __func__); +} + +/* Initialize a new context to keep track of dynamically added sysctls. */ +static int +sysctl_ctx_init(struct sysctl_ctx_list *c) +{ + + if (c == NULL) { + return (OFP_EINVAL); + } + + /* + * No locking here, the caller is responsible for not adding + * new nodes to a context until after this function has + * returned. + */ + OFP_TAILQ_INIT(c); + return (0); +} + +/* Free the context, and destroy all dynamic oids registered in this context */ +static int +sysctl_ctx_free(struct sysctl_ctx_list *clist) +{ + struct sysctl_ctx_entry *e, *e1; + int error; + + error = 0; + /* + * First perform a "dry run" to check if it's ok to remove oids. + * XXX FIXME + * XXX This algorithm is a hack. But I don't know any + * XXX better solution for now... + */ + SYSCTL_XLOCK(); + OFP_TAILQ_FOREACH(e, clist, link) { + error = sysctl_remove_oid_locked(e->entry, 0, 0); + if (error) + break; + } + /* + * Restore deregistered entries, either from the end, + * or from the place where error occured. + * e contains the entry that was not unregistered + */ + if (error) + e1 = OFP_TAILQ_PREV(e, sysctl_ctx_list, link); + else + e1 = OFP_TAILQ_LAST(clist, sysctl_ctx_list); + while (e1 != NULL) { + sysctl_register_oid(e1->entry); + e1 = OFP_TAILQ_PREV(e1, sysctl_ctx_list, link); + } + if (error) { + SYSCTL_XUNLOCK(); + return(OFP_EBUSY); + } + /* Now really delete the entries */ + e = OFP_TAILQ_FIRST(clist); + while (e != NULL) { + e1 = OFP_TAILQ_NEXT(e, link); + error = sysctl_remove_oid_locked(e->entry, 1, 0); + if (error) + panic("sysctl_remove_oid: corrupt tree\n"); + free(e, M_SYSCTLOID); + e = e1; + } + SYSCTL_XUNLOCK(); + return (error); +} +#endif + +/* Add an entry to the context */ +static struct sysctl_ctx_entry * +sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct ofp_sysctl_oid *oidp) +{ + struct sysctl_ctx_entry *e; + + SYSCTL_ASSERT_XLOCKED(); + if (clist == NULL || oidp == NULL) + return(NULL); + e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); + e->entry = oidp; + OFP_TAILQ_INSERT_HEAD(clist, e, link); + return (e); +} + +#if 0 +/* Find an entry in the context */ +static struct sysctl_ctx_entry * +sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct ofp_sysctl_oid *oidp) +{ + struct sysctl_ctx_entry *e; + + SYSCTL_ASSERT_XLOCKED(); + if (clist == NULL || oidp == NULL) + return(NULL); + OFP_TAILQ_FOREACH(e, clist, link) { + if(e->entry == oidp) + return(e); + } + return (e); +} + +/* + * Delete an entry from the context. + * NOTE: this function doesn't free oidp! You have to remove it + * with sysctl_remove_oid(). + */ +static int +sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct ofp_sysctl_oid *oidp) +{ + struct sysctl_ctx_entry *e; + + if (clist == NULL || oidp == NULL) + return (OFP_EINVAL); + SYSCTL_XLOCK(); + e = sysctl_ctx_entry_find(clist, oidp); + if (e != NULL) { + OFP_TAILQ_REMOVE(clist, e, link); + SYSCTL_XUNLOCK(); + free(e, M_SYSCTLOID); + return (0); + } else { + SYSCTL_XUNLOCK(); + return (OFP_ENOENT); + } +} + + +/* + * Remove dynamically created sysctl trees. + * oidp - top of the tree to be removed + * del - if 0 - just deregister, otherwise free up entries as well + * recurse - if != 0 traverse the subtree to be deleted + */ +static int +sysctl_remove_oid(struct ofp_sysctl_oid *oidp, int del, int recurse) +{ + int error; + + SYSCTL_XLOCK(); + error = sysctl_remove_oid_locked(oidp, del, recurse); + SYSCTL_XUNLOCK(); + return (error); +} + +static int +sysctl_remove_name(struct ofp_sysctl_oid *parent, const char *name, + int del, int recurse) +{ + struct ofp_sysctl_oid *p, *tmp; + int error; + + error = OFP_ENOENT; + SYSCTL_XLOCK(); + OFP_SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { + if (strcmp(p->oid_name, name) == 0) { + error = sysctl_remove_oid_locked(p, del, recurse); + break; + } + } + SYSCTL_XUNLOCK(); + + return (error); +} + + +static int +sysctl_remove_oid_locked(struct ofp_sysctl_oid *oidp, int del, int recurse) +{ + struct ofp_sysctl_oid *p, *tmp; + int error; + + SYSCTL_ASSERT_XLOCKED(); + if (oidp == NULL) + return(OFP_EINVAL); + if ((oidp->oid_kind & OFP_CTLFLAG_DYN) == 0) { + OFP_ERR("can't remove non-dynamic nodes!\n"); + return (OFP_EINVAL); + } + /* + * WARNING: normal method to do this should be through + * sysctl_ctx_free(). Use recursing as the last resort + * method to purge your sysctl tree of leftovers... + * However, if some other code still references these nodes, + * it will panic. + */ + if ((oidp->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + if (oidp->oid_refcnt == 1) { + OFP_SLIST_FOREACH_SAFE(p, + SYSCTL_CHILDREN(oidp), oid_link, tmp) { + if (!recurse) + return (OFP_ENOTEMPTY); + error = sysctl_remove_oid_locked(p, del, + recurse); + if (error) + return (error); + } + if (del) + free(SYSCTL_CHILDREN(oidp), M_SYSCTLOID); + } + } + if (oidp->oid_refcnt > 1 ) { + oidp->oid_refcnt--; + } else { + if (oidp->oid_refcnt == 0) { + OFP_ERR("Warning: bad oid_refcnt=%u (%s)!\n", + oidp->oid_refcnt, oidp->oid_name); + return (OFP_EINVAL); + } + sysctl_unregister_oid(oidp); + if (del) { + /* + * Wait for all threads running the handler to drain. + * This preserves the previous behavior when the + * sysctl lock was held across a handler invocation, + * and is necessary for module unload correctness. + */ + while (oidp->oid_running > 0) { + oidp->oid_kind |= OFP_CTLFLAG_DYING; + SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); + } + if (oidp->oid_descr) + free((void *)(uintptr_t)(const void *)oidp->oid_descr, M_SYSCTLOID); + free((void *)(uintptr_t)(const void *)oidp->oid_name, + M_SYSCTLOID); + free(oidp, M_SYSCTLOID); + } + } + return (0); +} +#endif +/* + * Create new sysctls at run time. + * clist may point to a valid context initialized with sysctl_ctx_init(). + */ +struct ofp_sysctl_oid * +ofp_sysctl_add_oid(struct sysctl_ctx_list *clist, struct ofp_sysctl_oid_list *parent, + int number, const char *name, int kind, void *arg1, intptr_t arg2, + int (*handler)(OFP_SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) +{ + struct ofp_sysctl_oid *oidp; + ssize_t len; + char *newname; + + /* You have to hook up somewhere.. */ + if (parent == NULL) + return(NULL); + /* Check if the node already exists, otherwise create it */ + SYSCTL_XLOCK(); + oidp = sysctl_find_oidname(name, parent); + if (oidp != NULL) { + if ((oidp->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + oidp->oid_refcnt++; + /* Update the context */ + if (clist != NULL) + sysctl_ctx_entry_add(clist, oidp); + SYSCTL_XUNLOCK(); + return (oidp); + } else { + SYSCTL_XUNLOCK(); + OFP_ERR("can't re-use a leaf (%s)!\n", name); + return (NULL); + } + } + oidp = malloc(sizeof(struct ofp_sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); + oidp->oid_parent = parent; + OFP_SLIST_NEXT(oidp, oid_link) = NULL; + oidp->oid_number = number; + oidp->oid_refcnt = 1; + len = strlen(name); + newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); + bcopy(name, newname, len + 1); + newname[len] = '\0'; + oidp->oid_name = newname; + oidp->oid_handler = handler; + oidp->oid_kind = OFP_CTLFLAG_DYN | kind; + if ((kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + /* Allocate space for children */ + SYSCTL_CHILDREN_SET(oidp, malloc(sizeof(struct ofp_sysctl_oid_list), + M_SYSCTLOID, M_WAITOK)); + OFP_SLIST_INIT(SYSCTL_CHILDREN(oidp)); + oidp->oid_arg2 = arg2; + } else { + oidp->oid_arg1 = arg1; + oidp->oid_arg2 = arg2; + } + oidp->oid_fmt = fmt; + if (descr) { + int len = strlen(descr) + 1; + oidp->oid_descr = malloc(len, M_SYSCTLOID, M_WAITOK); + if (oidp->oid_descr) + strcpy((char *)(uintptr_t)(const void *)oidp->oid_descr, descr); + } + /* Update the context, if used */ + if (clist != NULL) + sysctl_ctx_entry_add(clist, oidp); + /* Register this oid */ + sysctl_register_oid(oidp); + SYSCTL_XUNLOCK(); + return (oidp); +} + +#if 0 +/* + * Rename an existing oid. + */ +static void +sysctl_rename_oid(struct ofp_sysctl_oid *oidp, const char *name) +{ + ssize_t len; + char *newname; + void *oldname; + + len = strlen(name); + newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); + bcopy(name, newname, len + 1); + newname[len] = '\0'; + SYSCTL_XLOCK(); + oldname = (void *)(uintptr_t)(const void *)oidp->oid_name; + oidp->oid_name = newname; + SYSCTL_XUNLOCK(); + free(oldname, M_SYSCTLOID); +} + +/* + * Reparent an existing oid. + */ +static int +sysctl_move_oid(struct ofp_sysctl_oid *oid, struct ofp_sysctl_oid_list *parent) +{ + struct ofp_sysctl_oid *oidp; + + SYSCTL_XLOCK(); + if (oid->oid_parent == parent) { + SYSCTL_XUNLOCK(); + return (0); + } + oidp = sysctl_find_oidname(oid->oid_name, parent); + if (oidp != NULL) { + SYSCTL_XUNLOCK(); + return (OFP_EEXIST); + } + sysctl_unregister_oid(oid); + oid->oid_parent = parent; + oid->oid_number = OFP_OID_AUTO; + sysctl_register_oid(oid); + SYSCTL_XUNLOCK(); + return (0); +} +#endif + +/* + * Register the kernel's oids on startup. + */ +SET_DECLARE(sysctl_set, struct ofp_sysctl_oid); + +static void +sysctl_register_all(void *arg) +{ + struct ofp_sysctl_oid **oidp; + (void)arg; + + odp_spinlock_init(&sysctlmemlock); + SYSCTL_INIT(); + SYSCTL_XLOCK(); + SET_FOREACH(oidp, sysctl_set) + sysctl_register_oid(*oidp); + SYSCTL_XUNLOCK(); +} + +SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0); + +/* + * "Staff-functions" + * + * These functions implement a presently undocumented interface + * used by the sysctl program to walk the tree, and get the type + * so it can print the value. + * This interface is under work and consideration, and should probably + * be killed with a big axe by the first person who can find the time. + * (be aware though, that the proper interface isn't as obvious as it + * may seem, there are various conflicting requirements. + * + * {0,0} printf the entire MIB-tree. + * {0,1,...} return the name of the "..." OID. + * {0,2,...} return the next OID. + * {0,3} return the OID of the name in "new" + * {0,4,...} return the kind & format info for the "..." OID. + * {0,5,...} return the description the "..." OID. + */ + +#ifdef SYSCTL_DEBUG +#include + +static void +sysctl_sysctl_debug_dump_node(int fd, struct ofp_sysctl_oid_list *l, int i) +{ + int k; + struct ofp_sysctl_oid *oidp; + + SYSCTL_ASSERT_XLOCKED(); + OFP_SLIST_FOREACH(oidp, l, oid_link) { + + for (k=0; koid_number, oidp->oid_name); + + ofp_sendf(fd, "%c%c", + oidp->oid_kind & OFP_CTLFLAG_RD ? 'R':' ', + oidp->oid_kind & OFP_CTLFLAG_WR ? 'W':' '); + + /*if (oidp->oid_handler) + ofp_sendf(fd, " *Handler");*/ + + switch (oidp->oid_kind & OFP_CTLTYPE) { + case OFP_CTLTYPE_NODE: + ofp_sendf(fd, " Node (%s)\r\n", oidp->oid_descr); + if (!oidp->oid_handler) { + sysctl_sysctl_debug_dump_node(fd, + oidp->oid_arg1, i+2); + } + break; + case OFP_CTLTYPE_INT: + ofp_sendf(fd, " int (%s)\r\n", oidp->oid_descr); + break; + case OFP_CTLTYPE_UINT: + ofp_sendf(fd, " u_int (%s)\r\n", oidp->oid_descr); + break; + case OFP_CTLTYPE_LONG: + ofp_sendf(fd, " long (%s)\r\n", oidp->oid_descr); + break; + case OFP_CTLTYPE_ULONG: + ofp_sendf(fd, " u_long (%s)\r\n", oidp->oid_descr); + break; + case OFP_CTLTYPE_STRING: + ofp_sendf(fd, " string (%s)\r\n", oidp->oid_descr); + break; + case OFP_CTLTYPE_U64: + ofp_sendf(fd, " uint64_t (%s)\r\n", oidp->oid_descr); + break; + case OFP_CTLTYPE_S64: + ofp_sendf(fd, " int64_t (%s)\r\n", oidp->oid_descr); + break; + case OFP_CTLTYPE_OPAQUE: + ofp_sendf(fd, " opaque/struct (%s)\r\n", oidp->oid_descr); + break; + } + } +} + +static int +sysctl_sysctl_debug(OFP_SYSCTL_HANDLER_ARGS) +{ + (void)oidp; + (void)arg1; + (void)arg2; + (void)req; +#if 0 /* HJo */ + int error; + error = priv_check(req->td, PRIV_SYSCTL_DEBUG); + if (error) + return (error); +#endif + SYSCTL_XLOCK(); + sysctl_sysctl_debug_dump_node(1, &sysctl__children, 0); + SYSCTL_XUNLOCK(); + return (OFP_ENOENT); +} + +OFP_SYSCTL_PROC(_sysctl, 0, debug, OFP_CTLTYPE_STRING|OFP_CTLFLAG_RD, + 0, 0, sysctl_sysctl_debug, "-", ""); + +#endif +#if 0 +static int +sysctl_sysctl_name(OFP_SYSCTL_HANDLER_ARGS) +{ + int *name = (int *) arg1; + u_int namelen = arg2; + int error = 0; + struct ofp_sysctl_oid *oid; + struct ofp_sysctl_oid_list *lsp = &sysctl__children, *lsp2; + char buf[10]; + (void) oidp; + + SYSCTL_XLOCK(); + while (namelen) { + if (!lsp) { + snprintf(buf,sizeof(buf),"%d",*name); + if (req->oldidx) + error = SYSCTL_OUT(req, ".", 1); + if (!error) + error = SYSCTL_OUT(req, buf, strlen(buf)); + if (error) + goto out; + namelen--; + name++; + continue; + } + lsp2 = 0; + OFP_SLIST_FOREACH(oid, lsp, oid_link) { + if (oid->oid_number != *name) + continue; + + if (req->oldidx) + error = SYSCTL_OUT(req, ".", 1); + if (!error) + error = SYSCTL_OUT(req, oid->oid_name, + strlen(oid->oid_name)); + if (error) + goto out; + + namelen--; + name++; + + if ((oid->oid_kind & OFP_CTLTYPE) != OFP_CTLTYPE_NODE) + break; + + if (oid->oid_handler) + break; + + lsp2 = SYSCTL_CHILDREN(oid); + break; + } + lsp = lsp2; + } + error = SYSCTL_OUT(req, "", 1); + out: + SYSCTL_XUNLOCK(); + return (error); +} +#endif +/* + * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in + * capability mode. + */ +/* +static OFP_SYSCTL_NODE(_sysctl, 1, name, OFP_CTLFLAG_RD | OFP_CTLFLAG_CAPRD, + sysctl_sysctl_name, ""); +*/ + +static int +sysctl_sysctl_next_ls(struct ofp_sysctl_oid_list *lsp, int *name, u_int namelen, + int *next, int *len, int level, struct ofp_sysctl_oid **oidpp) +{ + struct ofp_sysctl_oid *oidp; + + SYSCTL_ASSERT_XLOCKED(); + *len = level; + OFP_SLIST_FOREACH(oidp, lsp, oid_link) { + *next = oidp->oid_number; + *oidpp = oidp; + + if (oidp->oid_kind & OFP_CTLFLAG_SKIP) + continue; + + if (!namelen) { + if ((oidp->oid_kind & OFP_CTLTYPE) != OFP_CTLTYPE_NODE) + return (0); + if (oidp->oid_handler) + /* We really should call the handler here...*/ + return (0); + lsp = SYSCTL_CHILDREN(oidp); + if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, + len, level+1, oidpp)) + return (0); + goto emptynode; + } + + if (oidp->oid_number < *name) + continue; + + if (oidp->oid_number > *name) { + if ((oidp->oid_kind & OFP_CTLTYPE) != OFP_CTLTYPE_NODE) + return (0); + if (oidp->oid_handler) + return (0); + lsp = SYSCTL_CHILDREN(oidp); + if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, + next+1, len, level+1, oidpp)) + return (0); + goto next; + } + if ((oidp->oid_kind & OFP_CTLTYPE) != OFP_CTLTYPE_NODE) + continue; + + if (oidp->oid_handler) + continue; + + lsp = SYSCTL_CHILDREN(oidp); + if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, + len, level+1, oidpp)) + return (0); + next: + namelen = 1; + emptynode: + *len = level; + } + return (1); +} + +static int +sysctl_sysctl_next(OFP_SYSCTL_HANDLER_ARGS) +{ + int *name = (int *) arg1; + u_int namelen = arg2; + int i, j, error; + struct ofp_sysctl_oid *oid; + struct ofp_sysctl_oid_list *lsp = &sysctl__children; + int newoid[OFP_CTL_MAXNAME]; + (void) oidp; + + SYSCTL_XLOCK(); + i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); + OFP_LOG("name=%p namelen=%d i=%d\n", name, namelen, i); + SYSCTL_XUNLOCK(); + if (i) + return (OFP_ENOENT); + error = SYSCTL_OUT(req, newoid, j * sizeof (int)); + return (error); +} + +/* + * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in + * capability mode. + */ +static OFP_SYSCTL_NODE(_sysctl, 2, next, OFP_CTLFLAG_RD | OFP_CTLFLAG_CAPRD, + sysctl_sysctl_next, ""); + +static int +name2oid(char *name, int *oid, int *len, struct ofp_sysctl_oid **oidpp) +{ + int i; + struct ofp_sysctl_oid *oidp; + struct ofp_sysctl_oid_list *lsp = &sysctl__children; + char *p; + + SYSCTL_ASSERT_XLOCKED(); + + if (!*name) + return (OFP_ENOENT); + + p = name + strlen(name) - 1 ; + if (*p == '.') + *p = '\0'; + + *len = 0; + + for (p = name; *p && *p != '.'; p++) + ; + i = *p; + if (i == '.') + *p = '\0'; + + oidp = OFP_SLIST_FIRST(lsp); + + while (oidp && *len < OFP_CTL_MAXNAME) { + if (strcmp(name, oidp->oid_name)) { + oidp = OFP_SLIST_NEXT(oidp, oid_link); + continue; + } + *oid++ = oidp->oid_number; + (*len)++; + + if (!i) { + if (oidpp) + *oidpp = oidp; + return (0); + } + + if ((oidp->oid_kind & OFP_CTLTYPE) != OFP_CTLTYPE_NODE) + break; + + if (oidp->oid_handler) + break; + + lsp = SYSCTL_CHILDREN(oidp); + oidp = OFP_SLIST_FIRST(lsp); + name = p+1; + for (p = name; *p && *p != '.'; p++) + ; + i = *p; + if (i == '.') + *p = '\0'; + } + return (OFP_ENOENT); +} + +#define MAXPATHLEN 1024 + +static int +sysctl_sysctl_name2oid(OFP_SYSCTL_HANDLER_ARGS) +{ + char *p; + int error, oid[OFP_CTL_MAXNAME], len = 0; + struct ofp_sysctl_oid *op = 0; + (void)arg1; + (void)arg2; + (void)oidp; + + if (!req->newlen) + return (OFP_ENOENT); + if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ + return (OFP_ENAMETOOLONG); + + p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); + + error = SYSCTL_IN(req, p, req->newlen); + if (error) { + free(p, M_SYSCTL); + return (error); + } + + p [req->newlen] = '\0'; + + SYSCTL_XLOCK(); + error = name2oid(p, oid, &len, &op); + SYSCTL_XUNLOCK(); + + free(p, M_SYSCTL); + + if (error) + return (error); + + error = SYSCTL_OUT(req, oid, len * sizeof *oid); + return (error); +} + +/* + * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in + * capability mode. + */ +OFP_SYSCTL_PROC(_sysctl, 3, name2oid, + OFP_CTLTYPE_INT | OFP_CTLFLAG_RW | OFP_CTLFLAG_ANYBODY | OFP_CTLFLAG_MPSAFE + | OFP_CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); + +static int +sysctl_sysctl_oidfmt(OFP_SYSCTL_HANDLER_ARGS) +{ + struct ofp_sysctl_oid *oid; + int error; + (void)oidp; + + SYSCTL_XLOCK(); + error = ofp_sysctl_find_oid(arg1, arg2, &oid, NULL, req); + if (error) + goto out; + + if (oid->oid_fmt == NULL) { + error = OFP_ENOENT; + goto out; + } + error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); + if (error) + goto out; + error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); + out: + SYSCTL_XUNLOCK(); + return (error); +} + + +static OFP_SYSCTL_NODE(_sysctl, 4, oidfmt, OFP_CTLFLAG_RD|OFP_CTLFLAG_MPSAFE|OFP_CTLFLAG_CAPRD, + sysctl_sysctl_oidfmt, ""); + +static int +sysctl_sysctl_oiddescr(OFP_SYSCTL_HANDLER_ARGS) +{ + struct ofp_sysctl_oid *oid; + int error; + (void)oidp; + + SYSCTL_XLOCK(); + error = ofp_sysctl_find_oid(arg1, arg2, &oid, NULL, req); + if (error) + goto out; + + if (oid->oid_descr == NULL) { + error = OFP_ENOENT; + goto out; + } + error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); + out: + SYSCTL_XUNLOCK(); + return (error); +} + +static OFP_SYSCTL_NODE(_sysctl, 5, oiddescr, OFP_CTLFLAG_RD|OFP_CTLFLAG_CAPRD, + sysctl_sysctl_oiddescr, ""); + +/* + * Default "handler" functions. + */ + +/* + * Handle an int, signed or unsigned. + * Two cases: + * a variable: point arg1 at it. + * a constant: pass it in arg2. + */ + +int +sysctl_handle_int(OFP_SYSCTL_HANDLER_ARGS) +{ + int tmpout, error = 0; + (void)oidp; + + /* + * Attempt to get a coherent snapshot by making a copy of the data. + */ + if (arg1) + tmpout = *(int *)arg1; + else + tmpout = arg2; + error = SYSCTL_OUT(req, &tmpout, sizeof(int)); + + if (error || !req->newptr) + return (error); + + if (!arg1) + error = OFP_EPERM; + else + error = SYSCTL_IN(req, arg1, sizeof(int)); + return (error); +} + +/* + * Based on on sysctl_handle_int() convert milliseconds into ticks. + * Note: this is used by TCP. + */ + +int +sysctl_msec_to_ticks(OFP_SYSCTL_HANDLER_ARGS) +{ + int error, s, tt; + (void)arg2; + + tt = *(int *)arg1; + s = (int)((int64_t)tt * 1000 / hz); + + error = sysctl_handle_int(oidp, &s, 0, req); + if (error || !req->newptr) + return (error); + + tt = (int)((int64_t)s * hz / 1000); + if (tt < 1) + return (OFP_EINVAL); + + *(int *)arg1 = tt; + return (0); +} + + +/* + * Handle a long, signed or unsigned. arg1 points to it. + */ + +int +sysctl_handle_long(OFP_SYSCTL_HANDLER_ARGS) +{ + int error = 0; + long tmplong; +#ifdef SCTL_MASK32 + int tmpint; +#endif + (void)arg2; + (void)oidp; + /* + * Attempt to get a coherent snapshot by making a copy of the data. + */ + if (!arg1) + return (OFP_EINVAL); + tmplong = *(long *)arg1; +#ifdef SCTL_MASK32 + if (req->flags & SCTL_MASK32) { + tmpint = tmplong; + error = SYSCTL_OUT(req, &tmpint, sizeof(int)); + } else +#endif + error = SYSCTL_OUT(req, &tmplong, sizeof(long)); + + if (error || !req->newptr) + return (error); + +#ifdef SCTL_MASK32 + if (req->flags & SCTL_MASK32) { + error = SYSCTL_IN(req, &tmpint, sizeof(int)); + *(long *)arg1 = (long)tmpint; + } else +#endif + error = SYSCTL_IN(req, arg1, sizeof(long)); + return (error); +} + +/* + * Handle a 64 bit int, signed or unsigned. arg1 points to it. + */ +int +sysctl_handle_64(OFP_SYSCTL_HANDLER_ARGS) +{ + int error = 0; + uint64_t tmpout; + (void)oidp; + (void)arg2; + + /* + * Attempt to get a coherent snapshot by making a copy of the data. + */ + if (!arg1) + return (OFP_EINVAL); + tmpout = *(uint64_t *)arg1; + error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); + + if (error || !req->newptr) + return (error); + + error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); + return (error); +} + +static size_t strlcpy(char *dst, const char *src, size_t size) +{ + strncpy(dst, src, size); + dst[size-1] = 0; + return strlen(src); +} + +/* + * Handle our generic '\0' terminated 'C' string. + * Two cases: + * a variable string: point arg1 at it, arg2 is max length. + * a constant string: point arg1 at it, arg2 is zero. + */ + +int +sysctl_handle_string(OFP_SYSCTL_HANDLER_ARGS) +{ + int error=0; + char *tmparg; + size_t outlen; + (void)oidp; + + /* + * Attempt to get a coherent snapshot by copying to a + * temporary kernel buffer. + */ +retry: + outlen = strlen((char *)arg1)+1; + tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK); + + if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) { + free(tmparg, M_SYSCTLTMP); + goto retry; + } + + error = SYSCTL_OUT(req, tmparg, outlen); + free(tmparg, M_SYSCTLTMP); + + if (error || !req->newptr) + return (error); + + if ((int)(req->newlen - req->newidx) >= arg2) { + error = OFP_EINVAL; + } else { + arg2 = (req->newlen - req->newidx); + error = SYSCTL_IN(req, arg1, arg2); + ((char *)arg1)[arg2] = '\0'; + } + + return (error); +} + +/* + * Handle any kind of opaque data. + * arg1 points to it, arg2 is the size. + */ + +int +sysctl_handle_opaque(OFP_SYSCTL_HANDLER_ARGS) +{ + int error, tries; + int generation; + struct ofp_sysctl_req req2; + (void)oidp; + + /* + * Attempt to get a coherent snapshot, by using the thread + * pre-emption counter updated from within mi_switch() to + * determine if we were pre-empted during a bcopy() or + * copyout(). Make 3 attempts at doing this before giving up. + * If we encounter an error, stop immediately. + */ + tries = 0; + req2 = *req; +retry: + generation = odp_cpu_id(); + error = SYSCTL_OUT(req, arg1, arg2); + if (error) + return (error); + tries++; + if (generation != odp_cpu_id() && tries < 3) { + *req = req2; + goto retry; + } + + error = SYSCTL_IN(req, arg1, arg2); + + return (error); +} + +/* + * Transfer functions to/from kernel space. + * XXX: rather untested at this point + */ +static int +sysctl_old_kernel(struct ofp_sysctl_req *req, const void *p, size_t l) +{ + size_t i = 0; + + if (req->oldptr) { + i = l; + if (req->oldlen <= req->oldidx) + i = 0; + else + if (i > req->oldlen - req->oldidx) + i = req->oldlen - req->oldidx; + if (i > 0) + bcopy(p, (char *)req->oldptr + req->oldidx, i); + } + req->oldidx += l; + if (req->oldptr && i != l) + return (OFP_ENOMEM); + return (0); +} + +static int +sysctl_new_kernel(struct ofp_sysctl_req *req, void *p, size_t l) +{ + if (!req->newptr) + return (0); + if (req->newlen - req->newidx < l) + return (OFP_EINVAL); + bcopy((const char *)req->newptr + req->newidx, p, l); + req->newidx += l; + return (0); +} + +int +ofp_kernel_sysctl(struct thread *td, const int *name, u_int namelen, void *old, + size_t *oldlenp, const void *new, size_t newlen, size_t *retval, int flags) +{ + int error = 0; + struct ofp_sysctl_req req; + + bzero(&req, sizeof req); + + req.td = td; + req.flags = flags; + + if (oldlenp) { + req.oldlen = *oldlenp; + } + req.validlen = req.oldlen; + + if (old) { + req.oldptr= old; + } + + if (new != NULL) { + req.newlen = newlen; + req.newptr = new; + } + + req.oldfunc = sysctl_old_kernel; + req.newfunc = sysctl_new_kernel; + req.lock = REQ_UNWIRED; + + SYSCTL_XLOCK(); + error = sysctl_root(0, (void *)(intptr_t)name, namelen, &req); + SYSCTL_XUNLOCK(); + +#if 0 /* HJo: FIX */ + if (req.lock == REQ_WIRED && req.validlen > 0) + vsunlock(req.oldptr, req.validlen); +#endif + if (error && error != OFP_ENOMEM) + return (error); + + if (retval) { + if (req.oldptr && req.oldidx > req.validlen) + *retval = req.validlen; + else + *retval = req.oldidx; + } + return (error); +} + +static int +kernel_sysctlbyname(struct thread *td, const char *name, void *old, size_t *oldlenp, + const void *new, size_t newlen, size_t *retval, int flags) +{ + int oid[OFP_CTL_MAXNAME]; + size_t oidlen, plen; + int error; + + oid[0] = 0; /* sysctl internal magic */ + oid[1] = 3; /* name2oid */ + oidlen = sizeof(oid); + + error = ofp_kernel_sysctl(td, oid, 2, oid, &oidlen, + (const void *)name, strlen(name), &plen, flags); + if (error) + return (error); + + error = ofp_kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, + new, newlen, retval, flags); + return (error); +} + +#if 0 +/* + * Transfer function to/from user space. + */ +static int +sysctl_old_user(struct ofp_sysctl_req *req, const void *p, size_t l) +{ + size_t i, len, origidx; + int error; + + origidx = req->oldidx; + req->oldidx += l; + if (req->oldptr == NULL) + return (0); + i = l; + len = req->validlen; + if (len <= origidx) + i = 0; + else { + if (i > len - origidx) + i = len - origidx; + error = copyout(p, (char *)req->oldptr + origidx, i); + if (error != 0) + return (error); + } + if (i < l) + return (OFP_ENOMEM); + return (0); +} + +static int +sysctl_new_user(struct ofp_sysctl_req *req, void *p, size_t l) +{ + int error = 0; + + if (!req->newptr) + return (0); + if (req->newlen - req->newidx < l) + return (OFP_EINVAL); + /* HJo + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "sysctl_new_user()"); + */ + error = copyin((const char *)req->newptr + req->newidx, p, l); + req->newidx += l; + return (error); +} + +/* + * Wire the user space destination buffer. If set to a value greater than + * zero, the len parameter limits the maximum amount of wired memory. + */ +static int +sysctl_wire_old_buffer(struct ofp_sysctl_req *req, size_t len) +{ + size_t wiredlen; + + wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; + + if (req->lock != REQ_WIRED && req->oldptr && + req->oldfunc == sysctl_old_user) { +#if 0 /* HJo */ + if (wiredlen != 0) { + int ret = 0; + ret = vslock(req->oldptr, wiredlen); + if (ret != 0) { + if (ret != OFP_ENOMEM) + return (ret); + wiredlen = 0; + } + } +#endif + req->lock = REQ_WIRED; + req->validlen = wiredlen; + } + return (0); +} +#endif + +int +ofp_sysctl_find_oid(const int *name, u_int namelen, struct ofp_sysctl_oid **noid, + int *nindx, struct ofp_sysctl_req *req) +{ + struct ofp_sysctl_oid_list *lsp; + struct ofp_sysctl_oid *oid; + int indx; + (void)req; + + SYSCTL_ASSERT_XLOCKED(); + lsp = &sysctl__children; + indx = 0; + while (indx < OFP_CTL_MAXNAME) { + OFP_SLIST_FOREACH(oid, lsp, oid_link) { + if (oid->oid_number == name[indx]) + break; + } + if (oid == NULL) + return (OFP_ENOENT); + + indx++; + if ((oid->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + if (oid->oid_handler != NULL || indx == (int)namelen) { + *noid = oid; + if (nindx != NULL) + *nindx = indx; + KASSERT((oid->oid_kind & OFP_CTLFLAG_DYING) == 0, + ("%s found DYING node %p", __func__, oid)); + return (0); + } + lsp = SYSCTL_CHILDREN(oid); + } else if (indx == (int)namelen) { + *noid = oid; + if (nindx != NULL) + *nindx = indx; + KASSERT((oid->oid_kind & OFP_CTLFLAG_DYING) == 0, + ("%s found DYING node %p", __func__, oid)); + return (0); + } else { + return (OFP_ENOTDIR); + } + } + return (OFP_ENOENT); +} + +/* + * Traverse our tree, and find the right node, execute whatever it points + * to, and return the resulting error code. + */ + +static int +sysctl_root(OFP_SYSCTL_HANDLER_ARGS) +{ + struct ofp_sysctl_oid *oid; + int error, indx; + (void)oidp; + + SYSCTL_ASSERT_XLOCKED(); + + error = ofp_sysctl_find_oid(arg1, arg2, &oid, &indx, req); + if (error) + return (error); + + if ((oid->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + /* + * You can't call a sysctl when it's a node, but has + * no handler. Inform the user that it's a node. + * The indx may or may not be the same as namelen. + */ + if (oid->oid_handler == NULL) + return (OFP_EISDIR); + } + + /* Is this sysctl writable? */ + if (req->newptr && !(oid->oid_kind & OFP_CTLFLAG_WR)) + return (OFP_EPERM); + + //KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); + +#ifdef CAPABILITY_MODE + /* + * If the process is in capability mode, then don't permit reading or + * writing unless specifically granted for the node. + */ + if (IN_CAPABILITY_MODE(req->td)) { + if (req->oldptr && !(oid->oid_kind & OFP_CTLFLAG_CAPRD)) + return (OFP_EPERM); + if (req->newptr && !(oid->oid_kind & OFP_CTLFLAG_CAPWR)) + return (OFP_EPERM); + } +#endif + +#if 0 /* HJo */ + /* Is this sysctl sensitive to securelevels? */ + if (req->newptr && (oid->oid_kind & OFP_CTLFLAG_SECURE)) { + lvl = (oid->oid_kind & OFP_CTLMASK_SECURE) >> CTLSHIFT_SECURE; + error = securelevel_gt(req->td->td_ucred, lvl); + if (error) + return (error); + } + + /* Is this sysctl writable by only privileged users? */ + if (req->newptr && !(oid->oid_kind & OFP_CTLFLAG_ANYBODY)) { + int priv; + + if (oid->oid_kind & OFP_CTLFLAG_PRISON) + priv = PRIV_SYSCTL_WRITEJAIL; + else + priv = PRIV_SYSCTL_WRITE; + error = priv_check(req->td, priv); + if (error) + return (error); + } +#endif + + if (!oid->oid_handler) + return (OFP_EINVAL); + + if ((oid->oid_kind & OFP_CTLTYPE) == OFP_CTLTYPE_NODE) { + arg1 = (int *)arg1 + indx; + arg2 -= indx; + } else { + arg1 = oid->oid_arg1; + arg2 = oid->oid_arg2; + } + oid->oid_running++; + SYSCTL_XUNLOCK(); + +#if 0 /* HJo */ + if (!(oid->oid_kind & OFP_CTLFLAG_MPSAFE)) + mtx_lock(&Giant); +#endif + error = oid->oid_handler(oid, arg1, arg2, req); +#if 0 /* HJo */ + if (!(oid->oid_kind & OFP_CTLFLAG_MPSAFE)) + mtx_unlock(&Giant); +#endif +#ifndef UINET + /* HJo KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); */ +#endif + + SYSCTL_XLOCK(); + oid->oid_running--; + if (oid->oid_running == 0 && (oid->oid_kind & OFP_CTLFLAG_DYING) != 0) + ofp_wakeup(&oid->oid_running); + return (error); +} + +#if 0 +/* + * This is used from various compatibility syscalls too. That's why name + * must be in kernel space. + */ +static int +userland_sysctl(struct thread *td, const int *name, u_int namelen, void *old, + size_t *oldlenp, int inkernel, const void *new, size_t newlen, size_t *retval, + int flags) +{ + int error = 0; + struct ofp_sysctl_req req; + + bzero(&req, sizeof req); + + req.td = td; + req.flags = flags; + + if (oldlenp) { + if (inkernel) { + req.oldlen = *oldlenp; + } else { + error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); + if (error) + return (error); + } + } + req.validlen = req.oldlen; + + if (old) { + /* HJo + if (!useracc(old, req.oldlen, VM_PROT_WRITE)) + return (OFP_EFAULT); + */ + req.oldptr= old; + } + + if (new != NULL) { + /* HJo + if (!useracc(new, newlen, VM_PROT_READ)) + return (OFP_EFAULT); + */ + req.newlen = newlen; + req.newptr = new; + } + + req.oldfunc = sysctl_old_user; + req.newfunc = sysctl_new_user; + req.lock = REQ_UNWIRED; + +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_SYSCTL)) + ktrsysctl(name, namelen); +#endif +#if 0 /* HJo */ + if (req.oldlen > PAGE_SIZE) { + memlocked = 1; + sx_xlock(&sysctlmemlock); + } else + memlocked = 0; +#endif + + for (;;) { + req.oldidx = 0; + req.newidx = 0; + SYSCTL_XLOCK(); + error = sysctl_root(0, (void *)(intptr_t)name, namelen, &req); + SYSCTL_XUNLOCK(); + if (error != OFP_EAGAIN) + break; + /* HJo kern_yield(PRI_USER); */ + } +#if 0 /* HJo */ + if (req.lock == REQ_WIRED && req.validlen > 0) + vsunlock(req.oldptr, req.validlen); + if (memlocked) + sx_xunlock(&sysctlmemlock); +#endif + if (error && error != OFP_ENOMEM) + return (error); + + if (retval) { + if (req.oldptr && req.oldidx > req.validlen) + *retval = req.validlen; + else + *retval = req.oldidx; + } + return (error); +} + +/* + * Drain into a sysctl struct. The user buffer should be wired if a page + * fault would cause issue. + */ +static int +sbuf_sysctl_drain(void *arg, const char *data, int len) +{ + struct ofp_sysctl_req *req = arg; + int error; + + error = SYSCTL_OUT(req, data, len); + KASSERT(error >= 0, ("Got unexpected negative value %d", error)); + return (error == 0 ? len : -error); +} + +struct sbuf * +sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, + struct ofp_sysctl_req *req) +{ + + s = sbuf_new(s, buf, length, SBUF_FIXEDLEN); + sbuf_set_drain(s, sbuf_sysctl_drain, req); + return (s); +} +#endif + +//extern void *__start_set_sysctl_set; +//extern void *__stop_set_sysctl_set; + +void +ofp_register_sysctls(void) +{ + sysctl_register_all(NULL); + sysctl_sysctl_debug(NULL, NULL, 0, NULL); +} + +void +ofp_sysctl_write_tree(int fd) +{ + SYSCTL_XLOCK(); + sysctl_sysctl_debug_dump_node(fd, &sysctl__children, 0); + SYSCTL_XUNLOCK(); +} + +int +ofp_sysctl(const char *name, void *old, size_t *oldlenp, + const void *new, size_t newlen, size_t *retval) +{ + return kernel_sysctlbyname(NULL, name, old, oldlenp, + new, newlen, retval, 0); +} diff --git a/src/ofp_tcp_input.c b/src/ofp_tcp_input.c new file mode 100644 index 00000000..00b2e29f --- /dev/null +++ b/src/ofp_tcp_input.c @@ -0,0 +1,3622 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2007-2008,2010 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Portions of this software were developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart, + * James Healy and David Hayes, made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ofp_tcp_input.c 8.12 (Berkeley) 5/24/95 + */ +#include + +#include "ofpi_errno.h" +#include "ofpi.h" +#include "ofpi_util.h" +#include "ofpi_debug.h" +#include "ofpi_systm.h" +#include "ofpi_protosw.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" + +#define TCPSTATES /* for logging */ + +#include "ofpi_timer.h" +#include "ofpi_in.h" +#include "ofpi_in_pcb.h" +#include "ofpi_ip.h" +#include "ofpi_ip6.h" +#include "ofpi_icmp6.h" +#include "ofpi_in6_pcb.h" +#include "ofpi_tcp_fsm.h" +#include "ofpi_tcp_seq.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_var.h" +#include "ofpi_tcp6_var.h" +#include "ofpi_tcp.h" +#include "ofpi_tcp_syncache.h" +#include "ofpi_icmp.h" +#include "ofpi_sockstate.h" + +#define log(a, f...) OFP_LOG(f) + +const int ofp_tcprexmtthresh = 3; + +VNET_DEFINE(struct ofp_tcpstat, ofp_tcpstat); + +int ofp_tcp_log_in_vain = 0; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, log_in_vain, OFP_CTLFLAG_RW, + &ofp_tcp_log_in_vain, 0, + "Log all incoming TCP segments to closed ports"); + +VNET_DEFINE(int, ofp_blackhole) = 0; +#define V_blackhole VNET(ofp_blackhole) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, blackhole, OFP_CTLFLAG_RW, + &ofp_blackhole, 0, + "Do not send RST on segments to closed ports"); + +VNET_DEFINE(int, ofp_tcp_delack_enabled) = 0; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, delayed_ack, OFP_CTLFLAG_RW, + &ofp_tcp_delack_enabled, 0, + "Delay ACK to try and piggyback it onto a data packet"); + +VNET_DEFINE(int, ofp_drop_synfin) = 0; +#define V_drop_synfin VNET(ofp_drop_synfin) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, drop_synfin, OFP_CTLFLAG_RW, + &ofp_drop_synfin, 0, + "Drop TCP packets with SYN+FIN set"); + +VNET_DEFINE(int, ofp_tcp_do_rfc3042) = 0; +#define V_tcp_do_rfc3042 VNET(ofp_tcp_do_rfc3042) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, rfc3042, OFP_CTLFLAG_RW, + &ofp_tcp_do_rfc3042, 0, + "Enable RFC 3042 (Limited Transmit)"); + +VNET_DEFINE(int, ofp_tcp_do_rfc3390) = 1; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, rfc3390, OFP_CTLFLAG_RW, + &ofp_tcp_do_rfc3390, 0, + "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); + +VNET_DEFINE(int, ofp_tcp_do_rfc3465) = 1; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, rfc3465, OFP_CTLFLAG_RW, + &ofp_tcp_do_rfc3465, 0, + "Enable RFC 3465 (Appropriate Byte Counting)"); + +VNET_DEFINE(int, ofp_tcp_abc_l_var) = 2; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, abc_l_var, OFP_CTLFLAG_RW, + &ofp_tcp_abc_l_var, 2, + "Cap the max cwnd increment during slow-start to this number of segments"); + +OFP_SYSCTL_NODE(_net_inet_tcp, OFP_OID_AUTO, ecn, OFP_CTLFLAG_RW, 0, "TCP ECN"); + +VNET_DEFINE(int, ofp_tcp_do_ecn) = 0; +OFP_SYSCTL_INT(_net_inet_tcp_ecn, OFP_OID_AUTO, enable, OFP_CTLFLAG_RW, + &ofp_tcp_do_ecn, 0, + "TCP ECN support"); + +VNET_DEFINE(int, ofp_tcp_ecn_maxretries) = 1; +OFP_SYSCTL_INT(_net_inet_tcp_ecn, OFP_OID_AUTO, maxretries, OFP_CTLFLAG_RW, + &ofp_tcp_ecn_maxretries, 0, + "Max retries before giving up on ECN"); + +VNET_DEFINE(int, ofp_tcp_insecure_rst) = 0; +#define V_tcp_insecure_rst VNET(ofp_tcp_insecure_rst) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, insecure_rst, OFP_CTLFLAG_RW, + &ofp_tcp_insecure_rst, 0, + "Follow the old (insecure) criteria for accepting RST packets"); + +VNET_DEFINE(int, ofp_tcp_do_autorcvbuf) = 1; +#define V_tcp_do_autorcvbuf VNET(ofp_tcp_do_autorcvbuf) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, recvbuf_auto, OFP_CTLFLAG_RW, + &ofp_tcp_do_autorcvbuf, 0, + "Enable automatic receive buffer sizing"); + +VNET_DEFINE(int, ofp_tcp_autorcvbuf_inc) = 16*1024; +#define V_tcp_autorcvbuf_inc VNET(ofp_tcp_autorcvbuf_inc) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, recvbuf_inc, OFP_CTLFLAG_RW, + &ofp_tcp_autorcvbuf_inc, 0, + "Incrementor step size of automatic receive buffer"); + +VNET_DEFINE(int, ofp_tcp_autorcvbuf_max) = 2*1024*1024; +#define V_tcp_autorcvbuf_max VNET(ofp_tcp_autorcvbuf_max) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, recvbuf_max, OFP_CTLFLAG_RW, + &ofp_tcp_autorcvbuf_max, 0, + "Max size of automatic receive buffer"); + +VNET_DEFINE(int, ofp_tcp_passive_trace) = 0; +#define V_tcp_passive_trace VNET(ofp_tcp_passive_trace) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, passive_trace, OFP_CTLFLAG_RW, + &ofp_tcp_passive_trace, 0, + "Enable temporary passive debug traces"); + +VNET_DEFINE(struct inpcbhead, ofp_tcb); +#define tcb6 ofp_tcb /* for KAME src sync over BSD*'s */ + +VNET_DEFINE(struct inpcbinfo, ofp_tcbinfo); +const char *ofp_tcbinfo_locked_by_file = NULL; +int ofp_tcbinfo_locked_by_line; + +static void tcp_dooptions(struct tcpopt *, uint8_t *, int, int); +static void tcp_dropwithreset(odp_packet_t , struct ofp_tcphdr *, + struct tcpcb *, int, int); +static void tcp_pulloutofband(struct socket *, + struct ofp_tcphdr *, odp_packet_t , int); +static void tcp_xmit_timer(struct tcpcb *, int); +static void tcp_newreno_partial_ack(struct tcpcb *, struct ofp_tcphdr *); +inline static void tcp_fields_to_host(struct ofp_tcphdr *); +#ifdef TCP_SIGNATURE +inline static void tcp_fields_to_net(struct ofp_tcphdr *); +inline static int tcp_signature_verify_input(odp_packet_t , int, int, + int, struct tcpopt *, struct ofp_tcphdr *, uint32_t); +#endif +inline static void cc_ack_received(struct tcpcb *tp, struct ofp_tcphdr *th, + uint16_t type); +inline static void cc_conn_init(struct tcpcb *tp); +inline static void cc_post_recovery(struct tcpcb *tp, struct ofp_tcphdr *th); +inline static void hhook_run_tcp_est_in(struct tcpcb *tp, + struct ofp_tcphdr *th, struct tcpopt *to); + +/* + * Kernel module interface for updating ofp_tcpstat. The argument is an index + * into ofp_tcpstat treated as an array of uint64_t. While this encodes the + * general layout of ofp_tcpstat into the caller, it doesn't encode its location, + * so that future changes to add, for example, per-CPU stats support won't + * cause binary compatibility problems for kernel modules. + */ +void +ofp_kmod_tcpstat_inc(int statnum) +{ + (*((uint64_t *)&V_tcpstat + statnum))++; +} + +/* + * Wrapper for the TCP established input helper hook. + */ +static inline void +hhook_run_tcp_est_in(struct tcpcb *tp, struct ofp_tcphdr *th, struct tcpopt *to) +{ + (void)tp; + (void)th; + (void)to; +#if 0 /* HJo: No helpers */ + struct tcp_hhook_data hhook_data; + + if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) { + hhook_data.tp = tp; + hhook_data.th = th; + hhook_data.to = to; + + hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data, + tp->osd); + } +#endif +} + +/* + * CC wrapper hook functions + */ +static inline void +cc_ack_received(struct tcpcb *tp, struct ofp_tcphdr *th, uint16_t type) +{ + (void)tp; (void)th; (void)type; +#if 0 /* HJo */ + INP_WLOCK_ASSERT(tp->t_inpcb); + + tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); + if (tp->snd_cwnd == min(tp->snd_cwnd, tp->snd_wnd)) + tp->ccv->flags |= CCF_CWND_LIMITED; + else + tp->ccv->flags &= ~CCF_CWND_LIMITED; + + if (type == CC_ACK) { + if (tp->snd_cwnd > tp->snd_ssthresh) { + tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, + V_tcp_abc_l_var * tp->t_maxseg); + if (tp->t_bytes_acked >= tp->snd_cwnd) { + tp->t_bytes_acked -= tp->snd_cwnd; + tp->ccv->flags |= CCF_ABC_SENTAWND; + } + } else { + tp->ccv->flags &= ~CCF_ABC_SENTAWND; + tp->t_bytes_acked = 0; + } + } + + if (CC_ALGO(tp)->ack_received != NULL) { + /* XXXLAS: Find a way to live without this */ + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->ack_received(tp->ccv, type); + } +#endif +} + +static inline void +cc_conn_init(struct tcpcb *tp) +{ + (void)tp; +#if 0 /* HJo */ + struct hc_metrics_lite metrics; + struct inpcb *inp = tp->t_inpcb; + int rtt; +#ifdef INET6 + int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; +#endif + + INP_WLOCK_ASSERT(tp->t_inpcb); + + tcp_hc_get(&inp->inp_inc, &metrics); + + if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { + tp->t_srtt = rtt; + tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; + TCPSTAT_INC(tcps_usedrtt); + if (metrics.rmx_rttvar) { + tp->t_rttvar = metrics.rmx_rttvar; + TCPSTAT_INC(tcps_usedrttvar); + } else { + /* default variation is +- 1 rtt */ + tp->t_rttvar = + tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; + } + TCPT_RANGESET(tp->t_rxtcur, + ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, + tp->t_rttmin, TCPTV_REXMTMAX); + } + if (metrics.rmx_ssthresh) { + /* + * There's some sort of gateway or interface + * buffer limit on the path. Use this to set + * the slow start threshhold, but set the + * threshold to no less than 2*mss. + */ + tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh); + TCPSTAT_INC(tcps_usedssthresh); + } + + /* + * Set the slow-start flight size depending on whether this + * is a local network or not. + * + * Extend this so we cache the cwnd too and retrieve it here. + * Make cwnd even bigger than RFC3390 suggests but only if we + * have previous experience with the remote host. Be careful + * not make cwnd bigger than remote receive window or our own + * send socket buffer. Maybe put some additional upper bound + * on the retrieved cwnd. Should do incremental updates to + * hostcache when cwnd collapses so next connection doesn't + * overloads the path again. + * + * XXXAO: Initializing the CWND from the hostcache is broken + * and in its current form not RFC conformant. It is disabled + * until fixed or removed entirely. + * + * RFC3390 says only do this if SYN or SYN/ACK didn't got lost. + * We currently check only in syncache_socket for that. + */ +/* #define TCP_METRICS_CWND */ +#ifdef TCP_METRICS_CWND + if (metrics.rmx_cwnd) + tp->snd_cwnd = max(tp->t_maxseg, min(metrics.rmx_cwnd / 2, + min(tp->snd_wnd, so->so_snd.sb_hiwat))); + else +#endif + if (V_tcp_do_rfc3390) + tp->snd_cwnd = min(4 * tp->t_maxseg, + max(2 * tp->t_maxseg, 4380)); +#ifdef INET6 + else if (isipv6 /*&& in6_localaddr(&inp->in6p_faddr)*/) + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local; +#endif +#if defined(INET) && defined(INET6) + else if (!isipv6 /*&& in_localaddr(inp->inp_faddr)*/) + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local; +#endif +#ifdef INET + else if (in_localaddr(inp->inp_faddr)) + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local; +#endif + else + tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz; + + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); +#endif +} + +inline void +ofp_cc_cong_signal(struct tcpcb *tp, struct ofp_tcphdr *th, uint32_t type) +{ + (void)tp; (void)th; (void)type; +#if 0 /* HJo */ + INP_WLOCK_ASSERT(tp->t_inpcb); + + switch(type) { + case CC_NDUPACK: + if (!IN_FASTRECOVERY(tp->t_flags)) { + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_ECN_PERMIT) + t_flags_or(tp->t_flags, TF_ECN_SND_CWR); + } + break; + case CC_ECN: + if (!IN_CONGRECOVERY(tp->t_flags)) { + TCPSTAT_INC(tcps_ecn_rcwnd); + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_ECN_PERMIT) + t_flags_or(tp->t_flags, TF_ECN_SND_CWR); + } + break; + case CC_RTO: + tp->t_dupacks = 0; + tp->t_bytes_acked = 0; + EXIT_RECOVERY(tp->t_flags); + tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / + tp->t_maxseg) * tp->t_maxseg; + tp->snd_cwnd = tp->t_maxseg; + break; + case CC_RTO_ERR: + TCPSTAT_INC(tcps_sndrexmitbad); + /* RTO was unnecessary, so reset everything. */ + tp->snd_cwnd = tp->snd_cwnd_prev; + tp->snd_ssthresh = tp->snd_ssthresh_prev; + tp->snd_recover = tp->snd_recover_prev; + if (tp->t_flags & TF_WASFRECOVERY) + ENTER_FASTRECOVERY(tp->t_flags); + if (tp->t_flags & TF_WASCRECOVERY) + ENTER_CONGRECOVERY(tp->t_flags); + tp->snd_nxt = tp->snd_max; + t_flags_and(tp->t_flags, ~TF_PREVVALID); + tp->t_badrxtwin = 0; + break; + } + + if (CC_ALGO(tp)->cong_signal != NULL) { + if (th != NULL) + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->cong_signal(tp->ccv, type); + } +#endif +} + +static inline void +cc_post_recovery(struct tcpcb *tp, struct ofp_tcphdr *th) +{ + (void)tp; (void)th; +#if 0 /* HJo */ + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* XXXLAS: KASSERT that we're in recovery? */ + + if (CC_ALGO(tp)->post_recovery != NULL) { + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->post_recovery(tp->ccv); + } + /* XXXLAS: EXIT_RECOVERY ? */ + tp->t_bytes_acked = 0; +#endif +} + +static inline void +tcp_fields_to_host(struct ofp_tcphdr *th) +{ + th->th_seq = odp_be_to_cpu_32(th->th_seq); + th->th_ack = odp_be_to_cpu_32(th->th_ack); + th->th_win = odp_be_to_cpu_16(th->th_win); + th->th_urp = odp_be_to_cpu_16(th->th_urp); +} + +#ifdef TCP_SIGNATURE +static inline void +tcp_fields_to_net(struct ofp_tcphdr *th) +{ + th->th_seq = odp_cpu_to_be_32(th->th_seq); + th->th_ack = odp_cpu_to_be_32(th->th_ack); + th->th_win = odp_cpu_to_be_16(th->th_win); + th->th_urp = odp_cpu_to_be_16(th->th_urp); +} + +static inline int +tcp_signature_verify_input(odp_packet_t m, int off0, int tlen, int optlen, + struct tcpopt *to, struct ofp_tcphdr *th, uint32_t tcpbflag) +{ + int ret; + + tcp_fields_to_net(th); + ret = tcp_signature_verify(m, off0, tlen, optlen, to, th, tcpbflag); + tcp_fields_to_host(th); + return (ret); +} +#endif + +/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ +#if 0 /*NDP*/ +#ifdef INET6 +#define ND6_HINT(tp) \ +do { \ + if ((tp) && (tp)->t_inpcb && \ + ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \ + nd6_nud_hint(NULL, NULL, 0); \ +} while (0) +#else +#define ND6_HINT(tp) +#endif +#endif /*NDP*/ + +/* + * Indicate whether this ack should be delayed. We can delay the ack if + * - there is no delayed ack timer in progress and + * - our last ack wasn't a 0-sized window. We never want to delay + * the ack that opens up a 0-sized window and + * - delayed acks are enabled or + * - this is a half-synchronized T/TCP connection. + */ +#define DELAY_ACK(tp) \ + ((!ofp_tcp_timer_active(tp, TT_DELACK) && \ + (tp->t_flags & TF_RXWIN0SENT) == 0) && \ + (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) + +/* + * TCP input handling is split into multiple parts: + * tcp6_input is a thin wrapper around ofp_tcp_input for the extended + * ip6_protox[] call format in ip6_input + * ofp_tcp_input handles primary segment validation, inpcb lookup and + * SYN processing on listen sockets + * ofp_tcp_do_segment processes the ACK and text of the segment for + * establishing, established and closing connections + */ +#ifdef INET6 +int +ofp_tcp6_input(odp_packet_t m, int *offp, int *nxt) +{ + enum ofp_return_code ret = OFP_PKT_PROCESSED; + *nxt = OFP_IPPROTO_DONE; + + OFP_IP6_EXTHDR_CHECK(m, *offp, sizeof(struct ofp_tcphdr), + OFP_PKT_DROP); + +#if 0 + struct in6_ifaddr *ia6; + + /* + * draft-itojun-ipv6-tcp-to-anycast + * better place to put this in? + */ + ia6 = ip6_getdstifaddr(m); + if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) { + struct ip6_hdr *ip6; + + ifa_free(&ia6->ia_ifa); + ip6 = (struct ip6_hdr *)odp_packet_data(m); + icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, + (char *)&ip6->ip6_dst - (char *)ip6); + *nxt = OFP_IPPROTO_DONE; + return OFP_DONE; + } + if (ia6) + ifa_free(&ia6->ia_ifa); +#endif + + ret = ofp_tcp_input(m, *offp); + if (ret == OFP_PKT_CONTINUE) + *nxt = OFP_IPPROTO_SP; + return ret; +} +#endif /* INET6 */ + +int +ofp_tcp_input(odp_packet_t m, int off0) +{ + struct ofp_tcphdr *th = NULL; + struct ofp_ip *ip = NULL; + struct inpcb *inp = NULL; + struct tcpcb *tp = NULL; + struct socket *so = NULL; + uint8_t *optp = NULL; + int optlen = 0; + int tlen = 0, off; + int drop_hdrlen; + int thflags; + int rstreason = 0; /* For badport_bandlim accounting purposes */ + uint8_t iptos = 0; +#ifdef INET6 + struct ofp_ip6_hdr *ip6 = NULL; + int isipv6; +#else + const void *ip6 = NULL; +#endif /* INET6 */ + struct tcpopt to; /* options in this segment */ + char *s = NULL; /* address and port logging */ + int ti_locked; +#ifdef TCPDEBUG + /* + * The size of tcp_saveipgen must be the size of the max ip header, + * now IPv6. + */ + uint8_t tcp_saveipgen[IP6_HDR_LEN]; + struct ofp_tcphdr tcp_savetcp; + short ostate = 0; +#endif + /* HJo: remove vlan hdr */ + odp_packet_pull_head(m, odp_packet_l3_offset(m)); + odp_packet_l2_offset_set(m, 0); + odp_packet_l3_offset_set(m, 0); + +#ifdef INET6 + isipv6 = (((struct ofp_ip *)odp_packet_data(m))->ip_v == 6) ? 1 : 0; +#endif + + to.to_flags = 0; + TCPSTAT_INC(tcps_rcvtotal); + +#ifdef INET6 + if (isipv6) { + /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */ +#if 0 + if (odp_packet_get_len(m) < (sizeof(*ip6) + sizeof(*th))) { + + m = odp_packet_ensure_contiguous(m, sizeof(*ip6) + sizeof(*th)); + if (m == NULL) { + TCPSTAT_INC(tcps_rcvshort); + return OFP_DONE; + } + } +#endif + + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + th = (struct ofp_tcphdr *)((char *)ip6 + off0); + tlen = sizeof(*ip6) + odp_be_to_cpu_16(ip6->ofp_ip6_plen) - off0; + +#if 0 /* bopi: no csum check */ + if (odp_packet_csum_flags(m) & CSUM_DATA_VALID_IPV6) { + if (odp_packet_csum_flags(m) & CSUM_PSEUDO_HDR) + th->th_sum = odp_packet_csum_data(m); + else + th->th_sum = in6_cksum_pseudo(ip6, tlen, + OFP_IPPROTO_TCP, odp_packet_csum_data(m)); + th->th_sum ^= 0xffff; + } else +#endif + th->th_sum = ofp_in6_cksum(m, OFP_IPPROTO_TCP, off0, tlen); + if (th->th_sum) { + TCPSTAT_INC(tcps_rcvbadsum); + goto drop; + } + + /* + * Be proactive about unspecified IPv6 address in source. + * As we use all-zero to indicate unbounded/unconnected pcb, + * unspecified IPv6 address can be used to confuse us. + * + * Note that packets with unspecified IPv6 destination is + * already dropped in ip6_input. + */ + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { + /* XXX stat */ + goto drop; + } + } + else +#endif + {/* OK */ + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + /* HJo: FIX + if (off0 > (int) sizeof (struct ofp_ip)) { + ip_stripoptions(m, (odp_packet_t )0); + off0 = sizeof(struct ofp_ip); + } + if (odp_packet_len(m) < sizeof (struct tcpiphdr)) { + if ((m = odp_packet_ensure_contiguous(m, sizeof (struct tcpiphdr))) + == NULL) { + TCPSTAT_INC(tcps_rcvshort); + return OFP_DONE; + } + } + */ + + ip = (struct ofp_ip *)odp_packet_data(m); + /* + * Convert fields to host representation. + */ + ip->ip_len = odp_be_to_cpu_16(ip->ip_len); + ip->ip_off = odp_be_to_cpu_16(ip->ip_off); + /* HJo: see bsd ip_input() */ + ip->ip_len -= ip->ip_hl << 2; + + th = (struct ofp_tcphdr *)((char *)ip + off0); + tlen = ip->ip_len; +#if 0 /* HJo: no csum check */ + if (odp_packet_csum_flags(m) & CSUM_DATA_VALID) { + if (odp_packet_csum_flags(m) & CSUM_PSEUDO_HDR) + th->th_sum = odp_packet_csum_data(m); + else + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + odp_cpu_to_be_32(odp_packet_csum_data(m) + + ip->ip_len + + OFP_IPPROTO_TCP)); + th->th_sum ^= 0xffff; + } else { + /* + * Checksum extended TCP header and data. + */ + len = sizeof (struct ofp_ip) + tlen; + th->th_sum = in_cksum(m, len); + } + if (th->th_sum) { + TCPSTAT_INC(tcps_rcvbadsum); + goto drop; + } +#endif // HJo + /* Re-initialization for later version check */ + ip->ip_v = OFP_IPVERSION; + } + +#ifdef INET6 + if (isipv6) + iptos = (odp_be_to_cpu_32(ip6->ofp_ip6_flow) >> 20) & 0xff; + else +#endif + iptos = ip->ip_tos; + + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. XXX + */ + off = th->th_off << 2; + if (off < (int)sizeof (struct ofp_tcphdr) || off > tlen) {/* OK */ + TCPSTAT_INC(tcps_rcvbadoff); + goto drop; + } + tlen -= off; /* tlen is used instead of ti->ti_len */ + if (off > (int)sizeof (struct ofp_tcphdr)) {/* OK */ +#ifdef INET6 + if (isipv6) { + OFP_IP6_EXTHDR_CHECK(m, off0, off, OFP_PKT_DROP); + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + th = (struct ofp_tcphdr *)((char *)ip6 + off0); + } + else +#endif + {/* OK */ + /* HJo + if (odp_packet_len(m) < sizeof(struct ofp_ip) + off) { + if ((m = odp_packet_ensure_contiguous(m, sizeof (struct ofp_ip) + off)) + == NULL) { + TCPSTAT_INC(tcps_rcvshort); + return OFP_DONE; + } + ip = (struct ofp_ip *)odp_packet_data(m); + ipov = (struct ipovly *)ip; + th = (struct ofp_tcphdr *)((char *)ip + off0); + } + */ + } + + optlen = off - sizeof (struct ofp_tcphdr); + optp = (uint8_t *)(th + 1); + } + thflags = th->th_flags; + + /* + * Convert TCP protocol specific fields to host format. + */ + tcp_fields_to_host(th); + + /* + * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options. + */ + drop_hdrlen = off0 + off; + + /* + * Locate pcb for segment; if we're likely to add or remove a + * connection then first acquire pcbinfo lock. There are two cases + * where we might discover later we need a write lock despite the + * flags: ACKs moving a connection out of the syncache, and ACKs for + * a connection in TIMEWAIT. + */ + if ((thflags & (OFP_TH_SYN | OFP_TH_FIN | OFP_TH_RST)) != 0) {/* OK */ + INP_INFO_WLOCK(&V_tcbinfo); + ti_locked = TI_WLOCKED; + } else + ti_locked = TI_UNLOCKED; + +findpcb: + +#ifdef INET6 + if (isipv6) + inp = ofp_in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, + th->th_sport, &ip6->ip6_dst, th->th_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, + ofp_packet_interface(m), m); + else +#endif + inp = ofp_in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, + th->th_sport, ip->ip_dst, th->th_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, + ofp_packet_interface(m), m); + + /* + * If the INPCB does not exist then all data in the incoming + * segment is discarded and an appropriate RST is sent back. + * XXX MRT Send RST using which routing table? + */ + if (inp == NULL) { + /* + * Log communication attempts to ports that are not + * in use. + */ + + if ((ofp_tcp_log_in_vain == 1 && (thflags & OFP_TH_SYN)) || + ofp_tcp_log_in_vain == 2) { + if ((s = ofp_tcp_log_vain(NULL, th, (void *)ip, ip6))) + log(LOG_INFO, "%s; %s: Connection attempt " + "to closed port\n", s, __func__); + } + /* + * When blackholing do not respond with a RST but + * completely ignore the segment and drop it. + */ + if ((V_blackhole == 1 && (thflags & OFP_TH_SYN)) || + V_blackhole == 2) + goto dropunlock; + + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } + + return OFP_PKT_CONTINUE; /* process it on slow path */ + } + + INP_WLOCK_ASSERT(inp); + if (!(inp->inp_flags & INP_HW_FLOWID) + /* HJo: FIX: + && (odp_packet_flags(m) & M_FLOWID) + */ + && ((inp->inp_socket == NULL) + || !(inp->inp_socket->so_options & OFP_SO_ACCEPTCONN))) {/* OK */ + inp->inp_flags |= INP_HW_FLOWID; + inp->inp_flags &= ~INP_SW_FLOWID; + /* HJo: FIX: + inp->inp_flowid = m->m_pkthdr.flowid; + */ + } + + /* + * Check the minimum TTL for socket. + */ + if (inp->inp_ip_minttl != 0) { +#ifdef INET6 + if (isipv6 && inp->inp_ip_minttl > ip6->ofp_ip6_hlim) + goto dropunlock; + else +#endif + if (inp->inp_ip_minttl > ip->ip_ttl) + goto dropunlock; + } + + /* + * A previous connection in TIMEWAIT state is supposed to catch stray + * or duplicate segments arriving late. If this segment was a + * legitimate new connection attempt the old INPCB gets removed and + * we can try again to find a listening socket. + * + * At this point, due to earlier optimism, we may hold only an inpcb + * lock, and not the inpcbinfo write lock. If so, we need to try to + * acquire it, or if that fails, acquire a reference on the inpcb, + * drop all locks, acquire a global write lock, and then re-acquire + * the inpcb lock. We may at that point discover that another thread + * has tried to free the inpcb, in which case we need to loop back + * and try to find a new inpcb to deliver to. + * + * XXXRW: It may be time to rethink timewait locking. + */ +relocked: + if (inp->inp_flags & INP_TIMEWAIT) { + if (ti_locked == TI_UNLOCKED) { + if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { + ofp_in_pcbref(inp); + INP_WUNLOCK(inp); + INP_INFO_WLOCK(&V_tcbinfo); + ti_locked = TI_WLOCKED; + INP_WLOCK(inp); + if (ofp_in_pcbrele_wlocked(inp)) { + inp = NULL; + goto findpcb; + } + } else + ti_locked = TI_WLOCKED; + } + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + + if (thflags & OFP_TH_SYN) + tcp_dooptions(&to, optp, optlen, TO_SYN); + /* + * NB: ofp_tcp_twcheck unlocks the INP and frees the mbuf. + */ + if (ofp_tcp_twcheck(inp, &to, th, m, tlen)) + goto findpcb; + + INP_INFO_WUNLOCK(&V_tcbinfo); + return OFP_PKT_PROCESSED; + } + + /* + * The TCPCB may no longer exist if the connection is winding + * down or it is in the CLOSED state. Either way we drop the + * segment and send an appropriate response. + */ + tp = intotcpcb(inp); + if (tp == NULL || tp->t_state == TCPS_CLOSED) { + rstreason = BANDLIM_RST_CLOSEDPORT; + goto dropwithreset; + } + + /* + * We've identified a valid inpcb, but it could be that we need an + * inpcbinfo write lock but don't hold it. In this case, attempt to + * acquire using the same strategy as the TIMEWAIT case above. If we + * relock, we have to jump back to 'relocked' as the connection might + * now be in TIMEWAIT. + */ + if (tp->t_state != TCPS_ESTABLISHED) {/* OK */ + if (ti_locked == TI_UNLOCKED) { + if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { + ofp_in_pcbref(inp); + INP_WUNLOCK(inp); + INP_INFO_WLOCK(&V_tcbinfo); + ti_locked = TI_WLOCKED; + INP_WLOCK(inp); + if (ofp_in_pcbrele_wlocked(inp)) { + inp = NULL; + goto findpcb; + } + goto relocked; + } else + ti_locked = TI_WLOCKED; + } + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + } + + so = inp->inp_socket; + KASSERT(so != NULL, ("%s: so == NULL", __func__)); +#ifdef TCPDEBUG + if (so->so_options & OFP_SO_DEBUG) { + ostate = tp->t_state; +#ifdef INET6 + if (isipv6) { + bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6)); + } else +#endif + bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip)); + tcp_savetcp = *th; + } +#endif /* TCPDEBUG */ + + /* + * When the socket is accepting connections (the INPCB is in LISTEN + * state) we look into the SYN cache if this is a new connection + * attempt or the completion of a previous one. Because listen + * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be + * held in this case. + */ + if (so->so_options & OFP_SO_ACCEPTCONN) { + struct in_conninfo inc; + + KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " + "tp not listening", __func__)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + + bzero(&inc, sizeof(inc)); +#ifdef INET6 + if (isipv6) { + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = ip6->ip6_src; + inc.inc6_laddr = ip6->ip6_dst; + } else +#endif + { + inc.inc_faddr = ip->ip_src; + inc.inc_laddr = ip->ip_dst; + } + inc.inc_fport = th->th_sport; + inc.inc_lport = th->th_dport; + inc.inc_fibnum = so->so_fibnum; + + /* + * Check for an existing connection attempt in syncache if + * the flag is only ACK. A successful lookup creates a new + * socket appended to the listen queue in SYN_RECEIVED state. + */ + if ((thflags & (OFP_TH_RST|OFP_TH_ACK|OFP_TH_SYN)) == OFP_TH_ACK) { + /* + * Parse the TCP options here because + * syncookies need access to the reflected + * timestamp. + */ + tcp_dooptions(&to, optp, optlen, 0); + /* + * NB: ofp_syncache_expand() doesn't unlock + * inp and tcpinfo locks. + */ + if (!ofp_syncache_expand(&inc, &to, th, &so, m)) { + /* + * No syncache entry or ACK was not + * for our SYN/ACK. Send a RST. + * NB: syncache did its own logging + * of the failure cause. + */ + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + if (so == NULL) { + /* + * We completed the 3-way handshake + * but could not allocate a socket + * either due to memory shortage, + * listen queue length limits or + * global socket limits. Send RST + * or wait and have the remote end + * retransmit the ACK for another + * try. + */ + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Socket allocation failed due to " + "limits or memory shortage, %s\n", + s, __func__, + V_tcp_sc_rst_sock_fail ? + "sending RST" : "try again"); + if (V_tcp_sc_rst_sock_fail) { + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } else { + goto dropunlock; + } + } + + /* + * Socket is created in state SYN_RECEIVED. + * Unlock the listen socket, lock the newly + * created socket and update the tp variable. + */ + INP_WUNLOCK(inp); /* listen socket */ + inp = sotoinpcb(so); + INP_WLOCK(inp); /* new connection */ + tp = intotcpcb(inp); + KASSERT(tp->t_state == TCPS_SYN_RECEIVED, + ("%s: ", __func__)); + + /* + * Process the segment and the data it + * contains. ofp_tcp_do_segment() consumes + * the mbuf chain and unlocks the inpcb. + */ + ofp_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, + iptos, ti_locked, 0); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + return OFP_PKT_PROCESSED; + } + + /* + * Segment flag validation for new connection attempts: + * + * Our (SYN|ACK) response was rejected. + * Check with syncache and remove entry to prevent + * retransmits. + * + * NB: ofp_syncache_chkrst does its own logging of failure + * causes. + */ + if (thflags & OFP_TH_RST) { + ofp_syncache_chkrst(&inc, th); + goto dropunlock; + } + + /* + * We can't do anything without SYN. + */ + if ((thflags & OFP_TH_SYN) == 0) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "SYN is missing, segment ignored\n", + s, __func__); + TCPSTAT_INC(tcps_badsyn); + goto dropunlock; + } + + /* + * (SYN|ACK) is bogus on a listen socket. + */ + if (thflags & OFP_TH_ACK) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "SYN|ACK invalid, segment rejected\n", + s, __func__); + ofp_syncache_badack(&inc); /* XXX: Not needed! */ + TCPSTAT_INC(tcps_badsyn); + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + + /* + * If the ofp_drop_synfin option is enabled, drop all + * segments with both the SYN and FIN bits set. + * This prevents e.g. nmap from identifying the + * TCP/IP stack. + * XXX: Poor reasoning. nmap has other methods + * and is constantly refining its stack detection + * strategies. + * XXX: This is a violation of the TCP specification + * and was used by RFC1644. + */ + if ((thflags & OFP_TH_FIN) && V_drop_synfin) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "SYN|FIN segment ignored (based on " + "sysctl setting)\n", s, __func__); + TCPSTAT_INC(tcps_badsyn); + goto dropunlock; + } + + /* + * Segment's flags are (SYN) or (SYN|FIN). + * + * OFP_TH_PUSH, OFP_TH_URG, OFP_TH_ECE, OFP_TH_CWR are ignored + * as they do not affect the state of the TCP FSM. + * The data pointed to by OFP_TH_URG and th_urp is ignored. + */ + KASSERT((thflags & (OFP_TH_RST|OFP_TH_ACK)) == 0, + ("%s: Listen socket: OFP_TH_RST or OFP_TH_ACK set", __func__)); + KASSERT(thflags & (OFP_TH_SYN), + ("%s: Listen socket: OFP_TH_SYN not set", __func__)); +#ifdef INET6 + /* + * If deprecated address is forbidden, + * we do not accept SYN to deprecated interface + * address to prevent any new inbound connection from + * getting established. + * When we do not accept SYN, we send a TCP RST, + * with deprecated source address (instead of dropping + * it). We compromise it as it is much better for peer + * to send a RST, and RST will be the final packet + * for the exchange. + * + * If we do not forbid deprecated addresses, we accept + * the SYN packet. RFC2462 does not suggest dropping + * SYN in this case. + * If we decipher RFC2462 5.5.4, it says like this: + * 1. use of deprecated addr with existing + * communication is okay - "SHOULD continue to be + * used" + * 2. use of it with new communication: + * (2a) "SHOULD NOT be used if alternate address + * with sufficient scope is available" + * (2b) nothing mentioned otherwise. + * Here we fall into (2b) case as we have no choice in + * our source address selection - we must obey the peer. + * + * The wording in RFC2462 is confusing, and there are + * multiple description text for deprecated address + * handling - worse, they are not exactly the same. + * I believe 5.5.4 is the best one, so we follow 5.5.4. + */ +#if 0 + if (isipv6 && !V_ip6_use_deprecated) { + struct in6_ifaddr *ia6; + + ia6 = ip6_getdstifaddr(m); + if (ia6 != NULL && + (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { + ifa_free(&ia6->ia_ifa); + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt to deprecated " + "IPv6 address rejected\n", + s, __func__); + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + if (ia6) + ifa_free(&ia6->ia_ifa); + } +#endif +#endif /* INET6 */ + /* + * Basic sanity checks on incoming SYN requests: + * Don't respond if the destination is a link layer + * broadcast according to RFC1122 4.2.3.10, p. 104. + * If it is from this socket it must be forged. + * Don't respond if the source or destination is a + * global or subnet broad- or multicast address. + * Note that it is quite possible to receive unicast + * link-layer packets with a broadcast IP address. Use + * in_broadcast() to find them. + */ + + if (odp_packet_is_bcast(m) || odp_packet_is_mcast(m)) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt from broad- or multicast " + "link layer address ignored\n", s, __func__); + goto dropunlock; + } + +#ifdef INET6 + if (isipv6) { + if (th->th_dport == th->th_sport && + OFP_IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt to/from self " + "ignored\n", s, __func__); + goto dropunlock; + } + if (OFP_IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + OFP_IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt from/to multicast " + "address ignored\n", s, __func__); + goto dropunlock; + } + } + else +#endif + { + if (th->th_dport == th->th_sport && + ip->ip_dst.s_addr == ip->ip_src.s_addr) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt from/to self " + "ignored\n", s, __func__); + goto dropunlock; + } +/* HJo: FIX */ +#define in_broadcast(in, ifp) (in.s_addr == OFP_INADDR_BROADCAST || \ + in.s_addr == OFP_INADDR_ANY) + + if (OFP_IN_MULTICAST(odp_be_to_cpu_32(ip->ip_dst.s_addr)) || + OFP_IN_MULTICAST(odp_be_to_cpu_32(ip->ip_src.s_addr)) || + ip->ip_src.s_addr == odp_cpu_to_be_32(OFP_INADDR_BROADCAST) || + in_broadcast(ip->ip_dst, odp_packet_interface(m))) { + if ((s = ofp_tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt from/to broad- " + "or multicast address ignored\n", + s, __func__); + goto dropunlock; + } + } + /* + * SYN appears to be valid. Create compressed TCP state + * for syncache. + */ +#ifdef TCPDEBUG + if (so->so_options & OFP_SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, + (void *)tcp_saveipgen, &tcp_savetcp, 0); +#endif + tcp_dooptions(&to, optp, optlen, TO_SYN); + ofp_syncache_add(&inc, &to, th, inp, &so, m, -1); + /* + * Entry added to syncache and mbuf consumed. + * Everything already unlocked by ofp_syncache_add(). + */ + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + return OFP_PKT_PROCESSED; + } + + /* + * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later + * state. ofp_tcp_do_segment() always consumes the mbuf chain, unlocks + * the inpcb, and unlocks pcbinfo. + */ + ofp_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked, 0); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + return OFP_PKT_PROCESSED; + +dropwithreset: + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } + + if (inp != NULL) { + tcp_dropwithreset(m, th, tp, tlen, rstreason); + INP_WUNLOCK(inp); + } else + tcp_dropwithreset(m, th, NULL, tlen, rstreason); + m = ODP_PACKET_INVALID; /* mbuf chain got consumed. */ + goto drop; + +dropunlock: + + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } + + if (inp != NULL) + INP_WUNLOCK(inp); + +drop: + + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + /* HJo: FIX: + if (s != NULL) + free(s, M_TCPLOG); + */ + if (m == ODP_PACKET_INVALID) + return OFP_PKT_PROCESSED; + + return OFP_PKT_DROP; +} + +/* + * no_unlock is a total hack designed to get around locking issues with + * how libuinet uses ofp_tcp_do_segment(). + * + * By default it'll unlock the held inp lock and if it's held, the + * ofp_tcbinfo lock. + * + * But the libuinet passive mode uses ofp_tcp_do_segment() with an assembled + * synack to setup the passive peer! Here, it can't drop the damned + * locks or it'll confuse the following code that assumes the locks + * are still held. + * + * So this option is just a hack for the specific code path that + * the passive receive socket creation code uses. Eventually the + * relevant bits of ofp_tcp_do_segment() should be refactored out and + * used as appropriate. + */ +void +ofp_tcp_do_segment(odp_packet_t m, struct ofp_tcphdr *th, struct socket *so, + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, + int ti_locked, int no_unlock) +{ + int thflags, acked, ourfinisacked, needoutput = 0; + int rstreason, todrop, win; + uint64_t tiwin; + struct tcpopt to; + +#ifdef TCPDEBUG + /* + * The size of tcp_saveipgen must be the size of the max ip header, + * now IPv6. + */ + uint8_t tcp_saveipgen[IP6_HDR_LEN]; + struct ofp_tcphdr tcp_savetcp; + short ostate = 0; +#endif + thflags = th->th_flags; + tp->sackhint.last_sack_ack = 0; + + /* + * If this is either a state-changing packet or current state isn't + * established, we require a write lock on ofp_tcbinfo. Otherwise, we + * allow either a read lock or a write lock, as we may have acquired + * a write lock due to a race. + * + * Require a global write lock for SYN/FIN/RST segments or + * non-established connections; otherwise accept either a read or + * write lock, as we may have conservatively acquired a write lock in + * certain cases in ofp_tcp_input() (is this still true?). Currently we + * will never enter with no lock, so we try to drop it quickly in the + * common pure ack/pure data cases. + */ + if ((thflags & (OFP_TH_SYN | OFP_TH_FIN | OFP_TH_RST)) != 0 || + tp->t_state != TCPS_ESTABLISHED) { + KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for " + "SYN/FIN/RST/!EST", __func__, ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + } else { +#ifdef INVARIANTS + if (ti_locked == TI_WLOCKED) + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + else { + KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " + "ti_locked: %d", __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } +#endif + } + INP_WLOCK_ASSERT(tp->t_inpcb); + KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", + __func__)); + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", + __func__)); + + /* + * Segment received on connection. + * Reset idle time and keep-alive timer. + * XXX: This should be done after segment + * validation to ignore broken/spoofed segs. + */ + tp->t_rcvtime = ticks; + if (TCPS_HAVEESTABLISHED(tp->t_state)) + ofp_tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); + + /* + * Unscale the window into a 32-bit value. + * For the SYN_SENT state the scale is zero. + */ + tiwin = th->th_win << tp->snd_scale; + + /* + * TCP ECN processing. + */ + if (tp->t_flags & TF_ECN_PERMIT) { + if (thflags & OFP_TH_CWR) + t_flags_and(tp->t_flags, ~TF_ECN_SND_ECE); + switch (iptos & OFP_IPTOS_ECN_MASK) { + case OFP_IPTOS_ECN_CE: + t_flags_or(tp->t_flags, TF_ECN_SND_ECE); + TCPSTAT_INC(tcps_ecn_ce); + break; + case OFP_IPTOS_ECN_ECT0: + TCPSTAT_INC(tcps_ecn_ect0); + break; + case OFP_IPTOS_ECN_ECT1: + TCPSTAT_INC(tcps_ecn_ect1); + break; + } + /* Congestion experienced. */ + if (thflags & OFP_TH_ECE) { + /* HJo: FIX: + ofp_cc_cong_signal(tp, th, CC_ECN); + */ + } + } + + /* + * Parse options on any incoming segment. + */ + tcp_dooptions(&to, (uint8_t *)(th + 1), + (th->th_off << 2) - sizeof(struct ofp_tcphdr), + (thflags & OFP_TH_SYN) ? TO_SYN : 0); + + /* + * If echoed timestamp is later than the current time, + * fall back to non RFC1323 RTT calculation. Normalize + * timestamp if syncookies were used when this connection + * was established. + */ + if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { + to.to_tsecr -= tp->ts_offset; + if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) + to.to_tsecr = 0; + } + + /* + * Process options only when we get SYN/ACK back. The SYN case + * for incoming connections is handled in tcp_syncache. + * According to RFC1323 the window field in a SYN (i.e., a + * or ) segment itself is never scaled. + * XXX this is traditional behavior, may need to be cleaned up. + */ + if (tp->t_state == TCPS_SYN_SENT && (thflags & OFP_TH_SYN)) { + if ((to.to_flags & TOF_SCALE) && + (tp->t_flags & TF_REQ_SCALE)) { + t_flags_or(tp->t_flags, TF_RCVD_SCALE); + tp->snd_scale = to.to_wscale; + } + /* + * Initial send window. It will be updated with + * the next incoming segment to the scaled value. + */ + tp->snd_wnd = th->th_win; + if (to.to_flags & TOF_TS) { + t_flags_or(tp->t_flags, TF_RCVD_TSTMP); + tp->ts_recent = to.to_tsval; + tp->ts_recent_age = tcp_ts_getticks(); + } + if (to.to_flags & TOF_MSS) + ofp_tcp_mss(tp, to.to_mss); + if ((tp->t_flags & TF_SACK_PERMIT) && + (to.to_flags & TOF_SACKPERM) == 0) + t_flags_and(tp->t_flags, ~TF_SACK_PERMIT); + } + + /* + * Header prediction: check for the two common cases + * of a uni-directional data xfer. If the packet has + * no control flags, is in-sequence, the window didn't + * change and we're not retransmitting, it's a + * candidate. If the length is zero and the ack moved + * forward, we're the sender side of the xfer. Just + * free the data acked & wake any higher level process + * that was blocked waiting for space. If the length + * is non-zero and the ack didn't move, we're the + * receiver side. If we're getting packets in-order + * (the reassembly queue is empty), add the data to + * the socket buffer and note that we need a delayed ack. + * Make sure that the hidden state-flags are also off. + * Since we check for TCPS_ESTABLISHED first, it can only + * be OFP_TH_NEEDSYN. + */ + if (tp->t_state == TCPS_ESTABLISHED && + th->th_seq == tp->rcv_nxt && + (thflags & (OFP_TH_SYN|OFP_TH_FIN|OFP_TH_RST|OFP_TH_URG|OFP_TH_ACK)) == OFP_TH_ACK && + tp->snd_nxt == tp->snd_max && + tiwin && tiwin == tp->snd_wnd && + ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && + OFP_LIST_EMPTY(&tp->t_segq) && + ((to.to_flags & TOF_TS) == 0 || + TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { + + /* + * If last ACK falls within this segment's sequence numbers, + * record the timestamp. + * NOTE that the test is modified according to the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). + */ + if ((to.to_flags & TOF_TS) != 0 && + SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent = to.to_tsval; + } + + if (tlen == 0) { + if (SEQ_GT(th->th_ack, tp->snd_una) && + SEQ_LEQ(th->th_ack, tp->snd_max) && + !IN_RECOVERY(tp->t_flags) && + (to.to_flags & TOF_SACK) == 0 && + OFP_TAILQ_EMPTY(&tp->snd_holes)) { + + /* + * This is a pure ack for outstanding data. + */ + if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + + TCPSTAT_INC(tcps_predack); + + /* + * "bad retransmit" recovery. + */ + if (tp->t_rxtshift == 1 && + tp->t_flags & TF_PREVVALID && + (int)(ticks - tp->t_badrxtwin) < 0) { + /* HJo: FIX: + ofp_cc_cong_signal(tp, th, CC_RTO_ERR); + */ + } + + /* + * Recalculate the transmit timer / rtt. + * + * Some boxes send broken timestamp replies + * during the SYN+ACK phase, ignore + * timestamps of 0 or we could calculate a + * huge RTT and blow up the retransmit timer. + */ + if ((to.to_flags & TOF_TS) != 0 && + to.to_tsecr) { + uint32_t t; + + t = tcp_ts_getticks() - to.to_tsecr; + if (!tp->t_rttlow || tp->t_rttlow > (int)t) + tp->t_rttlow = t; + tcp_xmit_timer(tp, + TCP_TS_TO_TICKS(t) + 1); + } else if (tp->t_rtttime && + SEQ_GT(th->th_ack, tp->t_rtseq)) { + if (!tp->t_rttlow || + tp->t_rttlow > (int)(ticks - tp->t_rtttime)) + tp->t_rttlow = ticks - tp->t_rtttime; + tcp_xmit_timer(tp, + ticks - tp->t_rtttime); + } + acked = BYTES_THIS_ACK(tp, th); + + /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ + hhook_run_tcp_est_in(tp, th, &to); + + TCPSTAT_INC(tcps_rcvackpack); + TCPSTAT_ADD(tcps_rcvackbyte, acked); + ofp_sbdrop(&so->so_snd, acked); + if (SEQ_GT(tp->snd_una, tp->snd_recover) && + SEQ_LEQ(th->th_ack, tp->snd_recover)) + tp->snd_recover = th->th_ack - 1; + + /* + * Let the congestion control algorithm update + * congestion control related information. This + * typically means increasing the congestion + * window. + */ + /* HJo: FIX: + cc_ack_received(tp, th, CC_ACK); + */ + + tp->snd_una = th->th_ack; + /* + * Pull snd_wl2 up to prevent seq wrap relative + * to th_ack. + */ + tp->snd_wl2 = th->th_ack; + tp->t_dupacks = 0; + odp_packet_free(m); + ND6_HINT(tp); /* Some progress has been made. */ + + /* + * If all outstanding data are acked, stop + * retransmit timer, otherwise restart timer + * using current (possibly backed-off) value. + * If process is waiting for space, + * wakeup/selwakeup/signal. If data + * are ready to send, let ofp_tcp_output + * decide between more output or persist. + */ +#ifdef TCPDEBUG + if (so->so_options & OFP_SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, + (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif + if (tp->snd_una == tp->snd_max) + ofp_tcp_timer_activate(tp, TT_REXMT, 0); + else if (!ofp_tcp_timer_active(tp, TT_PERSIST)) + ofp_tcp_timer_activate(tp, TT_REXMT, + tp->t_rxtcur); + sowwakeup(so); + + if (so->so_snd.sb_cc) + (void) ofp_tcp_output(tp); + + goto check_delack; + } + } else if (th->th_ack == tp->snd_una && + tlen <= sbspace(&so->so_rcv)) { + int newsize = 0; /* automatic sockbuf scaling */ + + /* + * This is a pure, in-sequence data packet with + * nothing on the reassembly queue and we have enough + * buffer space to take it. + */ + if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + + /* Clean receiver SACK report if present */ + if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) + ofp_tcp_clean_sackreport(tp); + TCPSTAT_INC(tcps_preddat); + tp->rcv_nxt += tlen; + + /* + * Pull snd_wl1 up to prevent seq wrap relative to + * th_seq. + */ + tp->snd_wl1 = th->th_seq; + /* + * Pull rcv_up up to prevent seq wrap relative to + * rcv_nxt. + */ + tp->rcv_up = tp->rcv_nxt; + TCPSTAT_INC(tcps_rcvpack); + TCPSTAT_ADD(tcps_rcvbyte, tlen); + ND6_HINT(tp); /* Some progress has been made */ +#ifdef TCPDEBUG + if (so->so_options & OFP_SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, + (void *)tcp_saveipgen, &tcp_savetcp, 0); +#endif + /* + * Automatic sizing of receive socket buffer. Often the send + * buffer size is not optimally adjusted to the actual network + * conditions at hand (delay bandwidth product). Setting the + * buffer size too small limits throughput on links with high + * bandwidth and high delay (eg. trans-continental/oceanic links). + * + * On the receive side the socket buffer memory is only rarely + * used to any significant extent. This allows us to be much + * more aggressive in scaling the receive socket buffer. For + * the case that the buffer space is actually used to a large + * extent and we run out of kernel memory we can simply drop + * the new segments; TCP on the sender will just retransmit it + * later. Setting the buffer size too big may only consume too + * much kernel memory if the application doesn't read() from + * the socket or packet loss or reordering makes use of the + * reassembly queue. + * + * The criteria to step up the receive buffer one notch are: + * 1. the number of bytes received during the time it takes + * one timestamp to be reflected back to us (the RTT); + * 2. received bytes per RTT is within seven eighth of the + * current socket buffer size; + * 3. receive buffer size has not hit maximal automatic size; + * + * This algorithm does one step per RTT at most and only if + * we receive a bulk stream w/o packet losses or reorderings. + * Shrinking the buffer during idle times is not necessary as + * it doesn't consume any memory when idle. + * + * TODO: Only step up if the application is actually serving + * the buffer to better manage the socket buffer resources. + */ + if (V_tcp_do_autorcvbuf && + to.to_tsecr && + (so->so_rcv.sb_flags & SB_AUTOSIZE)) { + if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) && + to.to_tsecr - tp->rfbuf_ts < hz) { + if (tp->rfbuf_cnt > + (int)(so->so_rcv.sb_hiwat / 8 * 7) && + (int)so->so_rcv.sb_hiwat < + V_tcp_autorcvbuf_max) { + newsize = + min(so->so_rcv.sb_hiwat + + V_tcp_autorcvbuf_inc, + V_tcp_autorcvbuf_max); + } + /* Start over with next RTT. */ + tp->rfbuf_ts = 0; + tp->rfbuf_cnt = 0; + } else + tp->rfbuf_cnt += tlen; /* add up */ + } + + /* Add data to socket buffer. */ + SOCKBUF_LOCK(&so->so_rcv); + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { + odp_packet_free(m); + } else { + /* + * Set new socket buffer size. + * Give up when limit is reached. + */ + if (newsize) + if (!ofp_sbreserve_locked(&so->so_rcv, + newsize, so, NULL)) + so->so_rcv.sb_flags &= ~SB_AUTOSIZE; + odp_packet_pull_head(m, drop_hdrlen); /* delayed header drop */ + ofp_sbappendstream_locked(&so->so_rcv, m); + } + /* NB: sorwakeup_locked() does an implicit unlock. */ + sorwakeup_locked(so); + if (DELAY_ACK(tp)) { + t_flags_or(tp->t_flags, TF_DELACK); + } else { + t_flags_or(tp->t_flags, TF_ACKNOW); + ofp_tcp_output(tp); + } + + goto check_delack; + } + } + + /* + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. + */ + win = sbspace(&so->so_rcv); + if (win < 0) + win = 0; + tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); + + /* Reset receive buffer auto scaling when not in bulk receive mode. */ + tp->rfbuf_ts = 0; + tp->rfbuf_cnt = 0; + + switch (tp->t_state) { + /* + * If the state is SYN_RECEIVED: + * if seg contains an ACK, but not for our SYN/ACK, send a RST. + */ + case TCPS_SYN_RECEIVED: + + if ((thflags & OFP_TH_ACK) && + (SEQ_LEQ(th->th_ack, tp->snd_una) || + SEQ_GT(th->th_ack, tp->snd_max))) { + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + break; + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if seg contains an ECE and ECN support is enabled, the stream + * is ECN capable. + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + + if ((thflags & OFP_TH_ACK) && + (SEQ_LEQ(th->th_ack, tp->iss) || + SEQ_GT(th->th_ack, tp->snd_max))) { + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } + if ((thflags & (OFP_TH_ACK|OFP_TH_RST)) == (OFP_TH_ACK|OFP_TH_RST)) + tp = ofp_tcp_drop(tp, OFP_ECONNREFUSED); + if (thflags & OFP_TH_RST) + goto drop; + if (!(thflags & OFP_TH_SYN)) + goto drop; + + tp->irs = th->th_seq; + tcp_rcvseqinit(tp); + if (thflags & OFP_TH_ACK) { + TCPSTAT_INC(tcps_connects); + ofp_soisconnected(so); +#ifdef MAC + mac_socketpeer_set_from_mbuf(m, so); +#endif + /* Do window scaling on this connection? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->rcv_scale = tp->request_r_scale; + } + tp->rcv_adv += imin(tp->rcv_wnd, + OFP_TCP_MAXWIN << tp->rcv_scale); + tp->snd_una++; /* SYN is acked */ + /* + * If there's data, delay ACK; if there's also a FIN + * ACKNOW will be turned on later. + */ + // HJo XXXXXX + /* #define DELAY_ACK(tp) + ((!ofp_tcp_timer_active(tp, TT_DELACK) && + (tp->t_flags & TF_RXWIN0SENT) == 0) && + (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) */ + + if (DELAY_ACK(tp) && tlen != 0) + ofp_tcp_timer_activate(tp, TT_DELACK, + ofp_tcp_delacktime); + else + t_flags_or(tp->t_flags, TF_ACKNOW); + + if ((thflags & OFP_TH_ECE) && V_tcp_do_ecn) { + t_flags_or(tp->t_flags, TF_ECN_PERMIT); + TCPSTAT_INC(tcps_ecn_shs); + } + + /* + * Received in SYN_SENT[*] state. + * Transitions: + * SYN_SENT --> ESTABLISHED + * SYN_SENT* --> FIN_WAIT_1 + */ + tp->t_starttime = ticks; + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + t_flags_and(tp->t_flags, ~TF_NEEDFIN); + thflags &= ~OFP_TH_SYN; + } else { + tp->t_state = TCPS_ESTABLISHED; + cc_conn_init(tp); + ofp_tcp_timer_activate(tp, TT_KEEP, + TP_KEEPIDLE(tp)); + } + } else { + /* + * Received initial SYN in SYN-SENT[*] state => + * simultaneous open. If segment contains CC option + * and there is a cached CC, apply TAO test. + * If it succeeds, connection is * half-synchronized. + * Otherwise, do 3-way handshake: + * SYN-SENT -> SYN-RECEIVED + * SYN-SENT* -> SYN-RECEIVED* + * If there was no CC option, clear cached CC value. + */ + + t_flags_or(tp->t_flags, (TF_ACKNOW | TF_NEEDSYN)); + ofp_tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_state = TCPS_SYN_RECEIVED; + } + + KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: " + "ti_locked %d", __func__, ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* + * Advance th->th_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + th->th_seq++; + if (tlen > (int)tp->rcv_wnd) { + todrop = tlen - tp->rcv_wnd; + odp_packet_pull_tail(m, todrop); + tlen = tp->rcv_wnd; + thflags &= ~OFP_TH_FIN; + TCPSTAT_INC(tcps_rcvpackafterwin); + TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); + } + tp->snd_wl1 = th->th_seq - 1; + tp->rcv_up = th->th_seq; + /* + * Client side of transaction: already sent SYN and data. + * If the remote host used T/TCP to validate the SYN, + * our data will be ACK'd; if so, enter normal data segment + * processing in the middle of step 5, ack processing. + * Otherwise, goto step 6. + */ + + if (thflags & OFP_TH_ACK) + goto process_ACK; + + goto step6; + + /* + * If the state is LAST_ACK or CLOSING or TIME_WAIT: + * do normal processing. + * + * NB: Leftover from RFC1644 T/TCP. Cases to be reused later. + */ + case TCPS_LAST_ACK: + case TCPS_CLOSING: + + break; /* continue normal processing */ + } + + /* + * States other than LISTEN or SYN_SENT. + * First check the RST flag and sequence number since reset segments + * are exempt from the timestamp and connection count tests. This + * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix + * below which allowed reset segments in half the sequence space + * to fall though and be processed (which gives forged reset + * segments with a random sequence number a 50 percent chance of + * killing a connection). + * Then check timestamp, if present. + * Then check the connection count, if present. + * Then check that at least some bytes of segment are within + * receive window. If segment begins before rcv_nxt, + * drop leading data (and SYN); if nothing left, just ack. + * + * + * If the RST bit is set, check the sequence number to see + * if this is a valid reset segment. + * RFC 793 page 37: + * In all states except SYN-SENT, all reset (RST) segments + * are validated by checking their SEQ-fields. A reset is + * valid if its sequence number is in the window. + * Note: this does not take into account delayed ACKs, so + * we should test against last_ack_sent instead of rcv_nxt. + * The sequence number in the reset segment is normally an + * echo of our outgoing acknowlegement numbers, but some hosts + * send a reset with the sequence number at the rightmost edge + * of our receive window, and we have to handle this case. + * Note 2: Paul Watson's paper "Slipping in the Window" has shown + * that brute force RST attacks are possible. To combat this, + * we use a much stricter check while in the ESTABLISHED state, + * only accepting RSTs where the sequence number is equal to + * last_ack_sent. In all other states (the states in which a + * RST is more likely), the more permissive check is used. + * If we have multiple segments in flight, the initial reset + * segment sequence numbers will be to the left of last_ack_sent, + * but they will eventually catch up. + * In any case, it never made sense to trim reset segments to + * fit the receive window since RFC 1122 says: + * 4.2.2.12 RST Segment: RFC-793 Section 3.4 + * + * A TCP SHOULD allow a received RST segment to include data. + * + * DISCUSSION + * It has been suggested that a RST segment could contain + * ASCII text that encoded and explained the cause of the + * RST. No standard has yet been established for such + * data. + * + * If the reset segment passes the sequence number test examine + * the state: + * SYN_RECEIVED STATE: + * If passive open, return to LISTEN state. + * If active open, inform user that connection was refused. + * ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT STATES: + * Inform user that connection was reset, and close ofp_tcb. + * CLOSING, LAST_ACK STATES: + * Close the ofp_tcb. + * TIME_WAIT STATE: + * Drop the segment - see Stevens, vol. 2, p. 964 and + * RFC 1337. + */ + if (thflags & OFP_TH_RST) { + if (SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) && + SEQ_LEQ(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { + switch (tp->t_state) { + case TCPS_SYN_RECEIVED: + so->so_error = OFP_ECONNREFUSED; + goto close; + + case TCPS_ESTABLISHED: + if (V_tcp_insecure_rst == 0 && + !(SEQ_GEQ(th->th_seq, tp->rcv_nxt - 1) && + SEQ_LEQ(th->th_seq, tp->rcv_nxt + 1)) && + !(SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) && + SEQ_LEQ(th->th_seq, tp->last_ack_sent + 1))) { + TCPSTAT_INC(tcps_badrst); + goto drop; + } + /* FALLTHROUGH */ + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + so->so_error = OFP_ECONNRESET; + close: + KASSERT(ti_locked == TI_WLOCKED, + ("ofp_tcp_do_segment: OFP_TH_RST 1 ti_locked %d", + ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + + tp->t_state = TCPS_CLOSED; + TCPSTAT_INC(tcps_drops); + tp = ofp_tcp_close(tp); + break; + + case TCPS_CLOSING: + case TCPS_LAST_ACK: + KASSERT(ti_locked == TI_WLOCKED, + ("ofp_tcp_do_segment: OFP_TH_RST 2 ti_locked %d", + ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + + tp = ofp_tcp_close(tp); + break; + } + } + + goto drop; + } + + /* + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ + if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && + TSTMP_LT(to.to_tsval, tp->ts_recent)) { + /* Check to see if ts_recent is over 24 days old. */ + if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { + /* + * Invalidate ts_recent. If this segment updates + * ts_recent, the age will be reset later and ts_recent + * will get a valid value. If it does not, setting + * ts_recent to zero will at least satisfy the + * requirement that zero be placed in the timestamp + * echo reply when ts_recent isn't valid. The + * age isn't reset until we get a valid ts_recent + * because we don't want out-of-order segments to be + * dropped when ts_recent is old. + */ + tp->ts_recent = 0; + } else { + TCPSTAT_INC(tcps_rcvduppack); + TCPSTAT_ADD(tcps_rcvdupbyte, tlen); + TCPSTAT_INC(tcps_pawsdrop); + if (tlen) { + if (V_tcp_passive_trace) + printf(">>>>>>. drop after ack (1)\n"); + goto dropafterack; + } + goto drop; + } + } + + /* + * In the SYN-RECEIVED state, validate that the packet belongs to + * this connection before trimming the data to fit the receive + * window. Check the sequence number versus IRS since we know + * the sequence numbers haven't wrapped. This is a partial fix + * for the "LAND" DoS attack. + */ + if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + + todrop = tp->rcv_nxt - th->th_seq; + if (todrop > 0) { + if (thflags & OFP_TH_SYN) { + thflags &= ~OFP_TH_SYN; + th->th_seq++; + if (th->th_urp > 1) + th->th_urp--; + else + thflags &= ~OFP_TH_URG; + todrop--; + } + /* + * Following if statement from Stevens, vol. 2, p. 960. + */ + if (todrop > tlen + || (todrop == tlen && (thflags & OFP_TH_FIN) == 0)) { + /* + * Any valid FIN must be to the left of the window. + * At this point the FIN must be a duplicate or out + * of sequence; drop it. + */ + thflags &= ~OFP_TH_FIN; + + /* + * Send an ACK to resynchronize and drop any data. + * But keep on processing for RST or ACK. + */ + t_flags_or(tp->t_flags, TF_ACKNOW); + todrop = tlen; + TCPSTAT_INC(tcps_rcvduppack); + TCPSTAT_ADD(tcps_rcvdupbyte, todrop); + } else { + TCPSTAT_INC(tcps_rcvpartduppack); + TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); + } + drop_hdrlen += todrop; /* drop from the top afterwards */ + th->th_seq += todrop; + tlen -= todrop; + if (th->th_urp > todrop) + th->th_urp -= todrop; + else { + thflags &= ~OFP_TH_URG; + th->th_urp = 0; + } + } + + /* + * If new data are received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) && + tp->t_state > TCPS_CLOSE_WAIT && tlen) { + char *s; + + KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && " + "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + + if ((s = ofp_tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket " + "was closed, sending RST and removing tcpcb\n", + s, __func__, tcpstates[tp->t_state], tlen); + free(s); + } + tp = ofp_tcp_close(tp); + TCPSTAT_INC(tcps_rcvafterclose); + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } + + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); + if (todrop > 0) { + TCPSTAT_INC(tcps_rcvpackafterwin); + if (todrop >= tlen) { + TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen); + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. Continue processing, but + * remember to ack. Otherwise, drop segment + * and ack. + */ + if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { + t_flags_or(tp->t_flags, TF_ACKNOW); + TCPSTAT_INC(tcps_rcvwinprobe); + } else { + if (V_tcp_passive_trace) + printf(">>>>>>. drop after ack (2) wnd=%lu seq=%u next=%u\n", tp->rcv_wnd, th->th_seq, tp->rcv_nxt); + goto dropafterack; + } + } else { + if (V_tcp_passive_trace) + printf(">>>>>>>>>>>>>>>>>. dropping %u bytes after window\n", todrop); + TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); + } + odp_packet_pull_tail(m, todrop); + tlen -= todrop; + thflags &= ~(OFP_TH_PUSH|OFP_TH_FIN); + } + + /* + * If last ACK falls within this segment's sequence numbers, + * record its timestamp. + * NOTE: + * 1) That the test incorporates suggestions from the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). + * 2) That updating only on newer timestamps interferes with + * our earlier PAWS tests, so this check should be solely + * predicated on the sequence space of this segment. + * 3) That we modify the segment boundary check to be + * Last.ACK.Sent <= SEG.SEQ + SEG.Len + * instead of RFC1323's + * Last.ACK.Sent < SEG.SEQ + SEG.Len, + * This modified check allows us to overcome RFC1323's + * limitations as described in Stevens TCP/IP Illustrated + * Vol. 2 p.869. In such cases, we can still calculate the + * RTT correctly when RCV.NXT == Last.ACK.Sent. + */ + if ((to.to_flags & TOF_TS) != 0 && + SEQ_LEQ(th->th_seq, tp->last_ack_sent) && + SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + + ((thflags & (OFP_TH_SYN|OFP_TH_FIN)) != 0))) { + tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent = to.to_tsval; + } + + /* + * If a SYN is in the window, then this is an + * error and we send an RST and drop the connection. + */ + if (thflags & OFP_TH_SYN) { + KASSERT(ti_locked == TI_WLOCKED, + ("ofp_tcp_do_segment: OFP_TH_SYN ti_locked %d", ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + + tp = ofp_tcp_drop(tp, OFP_ECONNRESET); + rstreason = BANDLIM_UNLIMITED; + goto drop; + } + + /* + * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN + * flag is on (half-synchronized state), then queue data for + * later processing; else drop segment and return. + */ + if ((thflags & OFP_TH_ACK) == 0) { + if (tp->t_state == TCPS_SYN_RECEIVED || + (tp->t_flags & TF_NEEDSYN)) + goto step6; + else if (tp->t_flags & TF_ACKNOW) { + if (V_tcp_passive_trace) + printf(">>>>>>. drop after ack (3)\n"); + goto dropafterack; + } else + goto drop; + } + + /* + * Ack processing. + */ + switch (tp->t_state) { + /* + * In SYN_RECEIVED state, the ack ACKs our SYN, so enter + * ESTABLISHED state and continue processing. + * The ACK was checked above. + */ + case TCPS_SYN_RECEIVED: + + TCPSTAT_INC(tcps_connects); + ofp_soisconnected(so); + /* Do window scaling? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->rcv_scale = tp->request_r_scale; + tp->snd_wnd = tiwin; + } + /* + * Make transitions: + * SYN-RECEIVED -> ESTABLISHED + * SYN-RECEIVED* -> FIN-WAIT-1 + */ + tp->t_starttime = ticks; + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + t_flags_and(tp->t_flags, ~TF_NEEDFIN); + } else { + tp->t_state = TCPS_ESTABLISHED; + cc_conn_init(tp); + ofp_tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); + } + /* + * If segment contains data or ACK, will call ofp_tcp_reass() + * later; if not, do so now to pass queued data to user. + */ + if (tlen == 0 && (thflags & OFP_TH_FIN) == 0) + (void) ofp_tcp_reass(tp, (struct ofp_tcphdr *)0, 0, + (odp_packet_t )0); + tp->snd_wl1 = th->th_seq - 1; + /* FALLTHROUGH */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < th->th_ack <= tp->snd_max + * then advance tp->snd_una to th->th_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + +#ifdef PASSIVE_INET + if (tp->t_inpcb->inp_flags2 & INP_PASSIVE) { + if (SEQ_LT(tp->snd_max, th->th_ack)) + tp->snd_max = th->th_ack; + } +#endif + if (SEQ_GT(th->th_ack, tp->snd_max)) { + TCPSTAT_INC(tcps_rcvacktoomuch); + if (V_tcp_passive_trace) + printf(">>>>>>. drop after ack (4)\n"); + goto dropafterack; + } + if ((tp->t_flags & TF_SACK_PERMIT) && + ((to.to_flags & TOF_SACK) || + !OFP_TAILQ_EMPTY(&tp->snd_holes))) + ofp_tcp_sack_doack(tp, &to, th->th_ack); + + /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ + hhook_run_tcp_est_in(tp, th, &to); + + if (SEQ_LEQ(th->th_ack, tp->snd_una)) { + if (tlen == 0 && tiwin == tp->snd_wnd) { + TCPSTAT_INC(tcps_rcvdupack); + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change), the ack is the biggest we've + * seen and we've seen exactly our rexmt + * threshhold of them, assume a packet + * has been dropped and retransmit it. + * Kludge snd_nxt & the congestion + * window so we send only this one + * packet. + * + * We know we're losing at the current + * window size so do congestion avoidance + * (set ssthresh to half the current window + * and pull our congestion window back to + * the new ssthresh). + * + * Dup acks mean that packets have left the + * network (they're now cached at the receiver) + * so bump cwnd by the amount in the receiver + * to keep a constant cwnd packets in the + * network. + * + * When using TCP ECN, notify the peer that + * we reduced the cwnd. + */ + if (!ofp_tcp_timer_active(tp, TT_REXMT) || + th->th_ack != tp->snd_una) + tp->t_dupacks = 0; + else if (++tp->t_dupacks > ofp_tcprexmtthresh || + IN_FASTRECOVERY(tp->t_flags)) { + /* HJo: FIX: + cc_ack_received(tp, th, CC_DUPACK); + */ + if ((tp->t_flags & TF_SACK_PERMIT) && + IN_FASTRECOVERY(tp->t_flags)) { + int awnd; + + /* + * Compute the amount of data in flight first. + * We can inject new data into the pipe iff + * we have less than 1/2 the original window's + * worth of data in flight. + */ + awnd = (tp->snd_nxt - tp->snd_fack) + + tp->sackhint.sack_bytes_rexmit; + if (awnd < (int)tp->snd_ssthresh) { + tp->snd_cwnd += tp->t_maxseg; + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + } + } else + tp->snd_cwnd += tp->t_maxseg; + (void) ofp_tcp_output(tp); + goto drop; + } else if (tp->t_dupacks == ofp_tcprexmtthresh) { + tcp_seq onxt = tp->snd_nxt; + + /* + * If we're doing sack, check to + * see if we're already in sack + * recovery. If we're not doing sack, + * check to see if we're in newreno + * recovery. + */ + if (tp->t_flags & TF_SACK_PERMIT) { + if (IN_FASTRECOVERY(tp->t_flags)) { + tp->t_dupacks = 0; + break; + } + } else { + if (SEQ_LEQ(th->th_ack, + tp->snd_recover)) { + tp->t_dupacks = 0; + break; + } + } + /* Congestion signal before ack. */ + /* HJo: FIX: + ofp_cc_cong_signal(tp, th, CC_NDUPACK); + cc_ack_received(tp, th, CC_DUPACK); + */ + ofp_tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + if (tp->t_flags & TF_SACK_PERMIT) { + TCPSTAT_INC( + tcps_sack_recovery_episode); + tp->sack_newdata = tp->snd_nxt; + tp->snd_cwnd = tp->t_maxseg; + (void) ofp_tcp_output(tp); + goto drop; + } + tp->snd_nxt = th->th_ack; + tp->snd_cwnd = tp->t_maxseg; + (void) ofp_tcp_output(tp); + KASSERT(tp->snd_limited <= 2, + ("%s: tp->snd_limited too big", + __func__)); + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * + (tp->t_dupacks - tp->snd_limited); + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (V_tcp_do_rfc3042) { + /* HJo: FIX: + cc_ack_received(tp, th, CC_DUPACK); + */ + uint64_t oldcwnd = tp->snd_cwnd; + tcp_seq oldsndmax = tp->snd_max; + uint32_t sent; + + KASSERT(tp->t_dupacks == 1 || + tp->t_dupacks == 2, + ("%s: dupacks not 1 or 2", + __func__)); + if (tp->t_dupacks == 1) + tp->snd_limited = 0; + tp->snd_cwnd = + (tp->snd_nxt - tp->snd_una) + + (tp->t_dupacks - tp->snd_limited) * + tp->t_maxseg; + (void) ofp_tcp_output(tp); + sent = tp->snd_max - oldsndmax; + if (sent > tp->t_maxseg) { + KASSERT((tp->t_dupacks == 2 && + tp->snd_limited == 0) || + (sent == tp->t_maxseg + 1 && + tp->t_flags & TF_SENTFIN), + ("%s: sent too much", + __func__)); + tp->snd_limited = 2; + } else if (sent > 0) + ++tp->snd_limited; + tp->snd_cwnd = oldcwnd; + goto drop; + } + } else + tp->t_dupacks = 0; + break; + } + + KASSERT(SEQ_GT(th->th_ack, tp->snd_una), + ("%s: th_ack <= snd_una", __func__)); + + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ + if (IN_FASTRECOVERY(tp->t_flags)) { + if (SEQ_LT(th->th_ack, tp->snd_recover)) { + if (tp->t_flags & TF_SACK_PERMIT) + ofp_tcp_sack_partialack(tp, th); + else + tcp_newreno_partial_ack(tp, th); + } else + cc_post_recovery(tp, th); + } + tp->t_dupacks = 0; + /* + * If we reach this point, ACK is not a duplicate, + * i.e., it ACKs something we sent. + */ + if (tp->t_flags & TF_NEEDSYN) { + /* + * T/TCP: Connection was half-synchronized, and our + * SYN has been ACK'd (so connection is now fully + * synchronized). Go to non-starred state, + * increment snd_una for ACK of SYN, and check if + * we can do window scaling. + */ + t_flags_and(tp->t_flags, ~TF_NEEDSYN); + tp->snd_una++; + /* Do window scaling? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->rcv_scale = tp->request_r_scale; + /* Send window already scaled. */ + } + } + +process_ACK: + INP_WLOCK_ASSERT(tp->t_inpcb); + + acked = BYTES_THIS_ACK(tp, th); + TCPSTAT_INC(tcps_rcvackpack); + TCPSTAT_ADD(tcps_rcvackbyte, acked); + + /* + * If we just performed our first retransmit, and the ACK + * arrives within our recovery window, then it was a mistake + * to do the retransmit in the first place. Recover our + * original cwnd and ssthresh, and proceed to transmit where + * we left off. + */ + /* HJo: FIX: + if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && + (int)(ticks - tp->t_badrxtwin) < 0) + ofp_cc_cong_signal(tp, th, CC_RTO_ERR); + */ + /* + * If we have a timestamp reply, update smoothed + * round trip time. If no timestamp is present but + * transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + * Since we now have an rtt measurement, cancel the + * timer backoff (cf., Phil Karn's retransmit alg.). + * Recompute the initial retransmit timer. + * + * Some boxes send broken timestamp replies + * during the SYN+ACK phase, ignore + * timestamps of 0 or we could calculate a + * huge RTT and blow up the retransmit timer. + */ + if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { + uint32_t t; + + t = tcp_ts_getticks() - to.to_tsecr; + if (!tp->t_rttlow || tp->t_rttlow > (int)t) + tp->t_rttlow = t; + tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); + } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { + if (!tp->t_rttlow || tp->t_rttlow > (int)(ticks - tp->t_rtttime)) + tp->t_rttlow = ticks - tp->t_rtttime; + tcp_xmit_timer(tp, ticks - tp->t_rtttime); + } + + /* + * If all outstanding data is acked, stop retransmit + * timer and remember to restart (more output or persist). + * If there is more data to be acked, restart retransmit + * timer, using current (possibly backed-off) value. + */ + if (th->th_ack == tp->snd_max) { + ofp_tcp_timer_activate(tp, TT_REXMT, 0); + needoutput = 1; + } else if (!ofp_tcp_timer_active(tp, TT_PERSIST)) + ofp_tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + + /* + * If no data (only SYN) was ACK'd, + * skip rest of ACK processing. + */ + if (acked == 0) + goto step6; + + /* + * Let the congestion control algorithm update congestion + * control related information. This typically means increasing + * the congestion window. + */ + /* HJo: FIX + cc_ack_received(tp, th, CC_ACK); + */ + SOCKBUF_LOCK(&so->so_snd); + if (acked > (int)so->so_snd.sb_cc) { + tp->snd_wnd -= so->so_snd.sb_cc; + ofp_sbdrop_locked(&so->so_snd, (int)so->so_snd.sb_cc); + ourfinisacked = 1; + } else { + ofp_sbdrop_locked(&so->so_snd, acked); + tp->snd_wnd -= acked; + ourfinisacked = 0; + } + /* NB: sowwakeup_locked() does an implicit unlock. */ + sowwakeup_locked(so); + /* Detect una wraparound. */ + if (!IN_RECOVERY(tp->t_flags) && + SEQ_GT(tp->snd_una, tp->snd_recover) && + SEQ_LEQ(th->th_ack, tp->snd_recover)) + tp->snd_recover = th->th_ack - 1; + /* XXXLAS: Can this be moved up into cc_post_recovery? */ + if (IN_RECOVERY(tp->t_flags) && + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_RECOVERY(tp->t_flags); + } + tp->snd_una = th->th_ack; + if (tp->t_flags & TF_SACK_PERMIT) { + if (SEQ_GT(tp->snd_una, tp->snd_recover)) + tp->snd_recover = tp->snd_una; + } + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + switch (tp->t_state) { + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + + if (ourfinisacked) { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + * + * XXXjl: + * we should release the tp also, and use a + * compressed state. + */ + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { + ofp_soisdisconnected(so); + ofp_tcp_timer_activate(tp, TT_2MSL, + (ofp_tcp_fast_finwait2_recycle ? + ofp_tcp_finwait2_timeout : + TP_MAXIDLE(tp))); + } + tp->t_state = TCPS_FIN_WAIT_2; + } + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + + if (ourfinisacked) { + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + ofp_tcp_twstart(tp); + INP_INFO_WUNLOCK(&V_tcbinfo); + odp_packet_free(m); + if (V_tcp_passive_trace) + printf(">>>>>>>>>>>>>>>>>>> CLOSING finisacked tlen=%u\n", tlen); + return; + } + break; + + /* + * In LAST_ACK, we may still be waiting for data to drain + * and/or to be acked, as well as for the ack of our FIN. + * If our FIN is now acknowledged, delete the TCB, + * enter the closed state and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) { + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + tp = ofp_tcp_close(tp); + goto drop; + } + break; + } + } + +step6: + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* + * Update window information. + * Don't look at window if no ACK: TAC's send garbage on first SYN. + */ + if ((thflags & OFP_TH_ACK) && + (SEQ_LT(tp->snd_wl1, th->th_seq) || + (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || + (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { + /* keep track of pure window updates */ + if (tlen == 0 && + tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) + TCPSTAT_INC(tcps_rcvwinupd); + tp->snd_wnd = tiwin; + tp->snd_wl1 = th->th_seq; + tp->snd_wl2 = th->th_ack; + if (tp->snd_wnd > tp->max_sndwnd) + tp->max_sndwnd = tp->snd_wnd; + needoutput = 1; + } + + /* + * Process segments with URG. + */ + if ((thflags & OFP_TH_URG) && th->th_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive and accept + * random urgent pointers, we'll crash in + * ofp_soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + SOCKBUF_LOCK(&so->so_rcv); + if (th->th_urp + so->so_rcv.sb_cc > ofp_sb_max) { + th->th_urp = 0; /* XXX */ + thflags &= ~OFP_TH_URG; /* XXX */ + SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ + goto dodata; /* XXX */ + } + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + * + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section as the original + * spec states (in one of two places). + */ + if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { + tp->rcv_up = th->th_seq + th->th_urp; + so->so_oobmark = so->so_rcv.sb_cc + + (tp->rcv_up - tp->rcv_nxt) - 1; + if (so->so_oobmark == 0) + so->so_rcv.sb_state |= SBS_RCVATMARK; + ofp_sohasoutofband(so); + tp->t_oobflags &= ~(OFP_TCPOOB_HAVEDATA | OFP_TCPOOB_HADDATA); + } + SOCKBUF_UNLOCK(&so->so_rcv); + /* + * Remove out of band data so doesn't get presented to user. + * This can happen independent of advancing the URG pointer, + * but if two URG's are pending at once, some out-of-band + * data may creep in... ick. + */ + if (th->th_urp <= (uint64_t)tlen && + !(so->so_options & OFP_SO_OOBINLINE)) { + /* hdr drop is delayed */ + tcp_pulloutofband(so, th, m, drop_hdrlen); + } + } else { + /* + * If no out of band data is expected, + * pull receive urgent pointer along + * with the receive window. + */ + if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) + tp->rcv_up = tp->rcv_nxt; + } + +dodata: /* XXX */ + INP_WLOCK_ASSERT(tp->t_inpcb); + + if (tlen && (tp->t_state == TCPS_CLOSE_WAIT || + tp->t_state == TCPS_SYN_SENT || + tp->t_state == TCPS_CLOSING || + tp->t_state == TCPS_LAST_ACK || + tp->t_state == TCPS_TIME_WAIT)) { + /* + * TCP MUST ignore a data segment in SYNSENT, CLOSE-WAIT, + * CLOSING, LAST-ACK or TIME-WAIT state. + */ + goto drop; + } + /* + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case OFP_PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. + */ + if ((tlen || (thflags & OFP_TH_FIN)) && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + tcp_seq save_start = th->th_seq; + odp_packet_pull_head(m, drop_hdrlen); /* delayed header drop */ + /* + * Insert segment which includes th into TCP reassembly queue + * with control block tp. Set thflags to whether reassembly now + * includes a segment with FIN. This handles the common case + * inline (segment is the next to be received on an established + * connection, and the queue is empty), avoiding linkage into + * and removal from the queue and repetition of various + * conversions. + * Set DELACK for segments received in order, but ack + * immediately when segments are out of order (so + * fast retransmit can work). + */ + if (th->th_seq == tp->rcv_nxt && + OFP_LIST_EMPTY(&tp->t_segq) && + TCPS_HAVEESTABLISHED(tp->t_state)) { + if (DELAY_ACK(tp)) + t_flags_or(tp->t_flags, TF_DELACK); + else + t_flags_or(tp->t_flags, TF_ACKNOW); + tp->rcv_nxt += tlen; + thflags = th->th_flags & OFP_TH_FIN; + TCPSTAT_INC(tcps_rcvpack); + TCPSTAT_ADD(tcps_rcvbyte, tlen); + ND6_HINT(tp); + SOCKBUF_LOCK(&so->so_rcv); + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + odp_packet_free(m); + else + ofp_sbappendstream_locked(&so->so_rcv, m); + /* NB: sorwakeup_locked() does an implicit unlock. */ + sorwakeup_locked(so); + } else { + /* + * XXX: Due to the header drop above "th" is + * theoretically invalid by now. Fortunately + * m_adj() doesn't actually frees any mbufs + * when trimming from the head. + */ + thflags = ofp_tcp_reass(tp, th, &tlen, m); + t_flags_or(tp->t_flags, TF_ACKNOW); + } + if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT)) + ofp_tcp_update_sack_list(tp, save_start, save_start + tlen); +#if 0 + /* + * Note the amount of data that peer has sent into + * our window, in order to estimate the sender's + * buffer size. + * XXX: Unused. + */ + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); + else + len = so->so_rcv.sb_hiwat; +#endif + } else { + odp_packet_free(m); + thflags &= ~OFP_TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. + */ + if (thflags & OFP_TH_FIN) { + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { + ofp_socantrcvmore(so); + /* + * If connection is half-synchronized + * (ie NEEDSYN flag on) then delay ACK, + * so it may be piggybacked when SYN is sent. + * Otherwise, since we received a FIN then no + * more input can be expected, send ACK now. + */ + if (tp->t_flags & TF_NEEDSYN) + t_flags_or(tp->t_flags, TF_DELACK); + else + t_flags_or(tp->t_flags, TF_ACKNOW); + tp->rcv_nxt++; + } + switch (tp->t_state) { + /* + * In SYN_RECEIVED and ESTABLISHED STATES + * enter the CLOSE_WAIT state. + */ + case TCPS_SYN_RECEIVED: + + tp->t_starttime = ticks; + /* FALLTHROUGH */ + case TCPS_ESTABLISHED: + + tp->t_state = TCPS_CLOSE_WAIT; + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + + tp->t_state = TCPS_CLOSING; + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata " + "TCP_FIN_WAIT_2 ti_locked: %d", __func__, + ti_locked)); + ofp_tcp_twstart(tp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + } + if (no_unlock == 0 && ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } + +#ifdef TCPDEBUG + if (so->so_options & OFP_SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif + + /* + * Return any desired output. + */ + if (needoutput || (tp->t_flags & TF_ACKNOW)) { + /* HJo: FIX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxxx*/ + tp->t_flags |= TF_ACKNOW; + /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ + if (no_unlock) { + //printf("%s: no_unlock set; but calling ofp_tcp_output?\n", __func__); + } + (void) ofp_tcp_output(tp); + } + +check_delack: + KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", + __func__, ti_locked)); + if (no_unlock == 0) + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(tp->t_inpcb); + + if (tp->t_flags & TF_DELACK) { + if (no_unlock == 0) { + //printf("%s: no_unlock set; but calling ofp_tcp_timer_activate()?\n", __func__); + } + t_flags_and(tp->t_flags, ~TF_DELACK); + ofp_tcp_timer_activate(tp, TT_DELACK, ofp_tcp_delacktime); + } + if (no_unlock == 0) + INP_WUNLOCK(tp->t_inpcb); + + return; + +dropafterack: + + if (V_tcp_passive_trace) { + printf(">>>>>>. drop after ack tlen=%d\n", tlen); + if (thflags & OFP_TH_FIN) + printf (">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>. DROPPING FIN\n"); + } + /* + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. + * + * We can now skip the test for the RST flag since all + * paths to this code happen after packets containing + * RST have been dropped. + * + * In the SYN-RECEIVED state, don't send an ACK unless the + * segment we received passes the SYN-RECEIVED ACK test. + * If it fails send a RST. This breaks the loop in the + * "LAND" DoS attack, and also prevents an ACK storm + * between two listening ports that have been sent forged + * SYN segments, each with the source address of the other. + */ + if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & OFP_TH_ACK) && + (SEQ_GT(tp->snd_una, th->th_ack) || + SEQ_GT(th->th_ack, tp->snd_max)) ) { + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } +#ifdef TCPDEBUG + if (so->so_options & OFP_SO_DEBUG) + tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif + if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + + t_flags_or(tp->t_flags, TF_ACKNOW); + (void) ofp_tcp_output(tp); + INP_WUNLOCK(tp->t_inpcb); + odp_packet_free(m); + + return; + +dropwithreset: + + if (V_tcp_passive_trace) { + printf(">>>>>>. drop with reset tlen=%d\n", tlen); + if (thflags & OFP_TH_FIN) + printf (">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>. DROPPING FIN (2)\n"); + } + if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + + if (tp != NULL) { + tcp_dropwithreset(m, th, tp, tlen, rstreason); + INP_WUNLOCK(tp->t_inpcb); +#ifdef PASSIVE_INET + } else if (so->so_options & OFP_SO_PASSIVE) { + odp_packet_free(m); + return; +#endif + } else + tcp_dropwithreset(m, th, NULL, tlen, rstreason); + + return; + +drop: + + if (V_tcp_passive_trace) { + printf(">>>>>>. drop tlen=%d\n", tlen); + if (thflags & OFP_TH_FIN) + printf (">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>. DROPPING FIN (3)\n"); + } + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } +#ifdef INVARIANTS + else + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); +#endif + + /* + * Drop space held by incoming segment and return. + */ +#ifdef TCPDEBUG + if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & OFP_SO_DEBUG)) + tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif + if (tp != NULL) + INP_WUNLOCK(tp->t_inpcb); + odp_packet_free(m); + +} + +/* + * Issue RST and make ACK acceptable to originator of segment. + * The mbuf must still include the original packet header. + * tp may be NULL. + */ +static void +tcp_dropwithreset(odp_packet_t m, struct ofp_tcphdr *th, struct tcpcb *tp, + int tlen, int rstreason) +{ + (void)rstreason; +#ifdef INET + struct ofp_ip *ip; +#endif +#ifdef INET6 + struct ofp_ip6_hdr *ip6; +#endif + + if (tp != NULL) { + INP_WLOCK_ASSERT(tp->t_inpcb); + +#ifdef PASSIVE_INET + if (tp->t_inpcb->inp_flags2 & INP_PASSIVE) + goto drop; +#endif + } + + /* Don't bother if destination was broadcast/multicast. */ + if ((th->th_flags & OFP_TH_RST) || + odp_packet_is_bcast(m) || odp_packet_is_mcast(m)) + goto drop; +#ifdef INET6 + if (((struct ofp_ip *)odp_packet_data(m))->ip_v == 6) { + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + if (OFP_IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + OFP_IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) + goto drop; + /* IPv6 anycast check is done at tcp6_input() */ + } +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + ip = (struct ofp_ip *)odp_packet_data(m); + if (OFP_IN_MULTICAST(odp_be_to_cpu_32(ip->ip_dst.s_addr)) || + OFP_IN_MULTICAST(odp_be_to_cpu_32(ip->ip_src.s_addr)) || + ip->ip_src.s_addr == odp_cpu_to_be_32(OFP_INADDR_BROADCAST) || + in_broadcast(ip->ip_dst, odp_packet_interface(m))) + goto drop; + } +#endif + + /* Perform bandwidth limiting. */ + /* HJo: FIX + if (badport_bandlim(rstreason) < 0) + goto drop; + */ + + /* ofp_tcp_respond consumes the mbuf chain. */ + if (th->th_flags & OFP_TH_ACK) { + ofp_tcp_respond(tp, (void *)odp_packet_data(m), th, m, (tcp_seq)0, + th->th_ack, OFP_TH_RST); + } else { + if (th->th_flags & OFP_TH_SYN) + tlen++; + ofp_tcp_respond(tp, (void *)odp_packet_data(m), th, m, th->th_seq+tlen, + (tcp_seq)0, OFP_TH_RST|OFP_TH_ACK); + } + return; +drop: + odp_packet_free(m); +} + +/* + * Parse TCP options and place in tcpopt. + */ +static void +tcp_dooptions(struct tcpopt *to, uint8_t *cp, int cnt, int flags) +{ + int opt, optlen; + + to->to_flags = 0; + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == OFP_TCPOPT_EOL) + break; + if (opt == OFP_TCPOPT_NOP) + optlen = 1; + else { + if (cnt < 2) + break; + optlen = cp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + switch (opt) { + case OFP_TCPOPT_MAXSEG: + if (optlen != OFP_TCPOLEN_MAXSEG) + continue; + if (!(flags & TO_SYN)) + continue; + to->to_flags |= TOF_MSS; + bcopy((char *)cp + 2, + (char *)&to->to_mss, sizeof(to->to_mss)); + to->to_mss = odp_be_to_cpu_16(to->to_mss); + break; + case OFP_TCPOPT_WINDOW: + if (optlen != OFP_TCPOLEN_WINDOW) + continue; + if (!(flags & TO_SYN)) + continue; + to->to_flags |= TOF_SCALE; + to->to_wscale = min(cp[2], OFP_TCP_MAX_WINSHIFT); + break; + case OFP_TCPOPT_TIMESTAMP: + if (optlen != OFP_TCPOLEN_TIMESTAMP) + continue; + to->to_flags |= TOF_TS; + bcopy((char *)cp + 2, + (char *)&to->to_tsval, sizeof(to->to_tsval)); + to->to_tsval = odp_be_to_cpu_32(to->to_tsval); + bcopy((char *)cp + 6, + (char *)&to->to_tsecr, sizeof(to->to_tsecr)); + to->to_tsecr = odp_be_to_cpu_32(to->to_tsecr); + break; +#ifdef TCP_SIGNATURE + /* + * XXX In order to reply to a host which has set the + * TCP_SIGNATURE option in its initial SYN, we have to + * record the fact that the option was observed here + * for the syncache code to perform the correct response. + */ + case OFP_TCPOPT_SIGNATURE: + if (optlen != OFP_TCPOLEN_SIGNATURE) + continue; + to->to_flags |= TOF_SIGNATURE; + to->to_signature = cp + 2; + break; +#endif + case OFP_TCPOPT_SACK_PERMITTED: + if (optlen != OFP_TCPOLEN_SACK_PERMITTED) + continue; + if (!(flags & TO_SYN)) + continue; + if (!V_tcp_do_sack) + continue; + to->to_flags |= TOF_SACKPERM; + break; + case OFP_TCPOPT_SACK: + if (optlen <= 2 || (optlen - 2) % OFP_TCPOLEN_SACK != 0) + continue; + if (flags & TO_SYN) + continue; + to->to_flags |= TOF_SACK; + to->to_nsacks = (optlen - 2) / OFP_TCPOLEN_SACK; + to->to_sacks = cp + 2; + TCPSTAT_INC(tcps_sack_rcv_blocks); + break; + default: + continue; + } + } +} + +/* + * Pull out of band byte out of a segment so + * it doesn't appear in the user's data queue. + * It is still reflected in the segment length for + * sequencing purposes. + */ +static void +tcp_pulloutofband(struct socket *so, struct ofp_tcphdr *th, odp_packet_t m, + int off) +{ + int cnt = off + th->th_urp - 1; + char *cp = (char *)odp_packet_offset(m, cnt, NULL, NULL); + struct tcpcb *tp = sototcpcb(so); + + INP_WLOCK_ASSERT(tp->t_inpcb); + tp->t_iobc = *cp; + tp->t_oobflags |= OFP_TCPOOB_HAVEDATA; + odp_packet_rem_data(m, cnt, 1); +} + +/* + * Collect new round-trip time estimate + * and update averages and current timeout. + */ +static void +tcp_xmit_timer(struct tcpcb *tp, int rtt) +{ + int delta; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + TCPSTAT_INC(tcps_rttupdated); + tp->t_rttupdated++; + if (tp->t_srtt != 0) { + /* + * srtt is stored as fixed point with 5 bits after the + * binary point (i.e., scaled by 8). The following magic + * is equivalent to the smoothing algorithm in rfc793 with + * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed + * point). Adjust rtt to origin 0. + */ + delta = ((rtt - 1) << TCP_DELTA_SHIFT) + - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); + + if ((tp->t_srtt += delta) <= 0) + tp->t_srtt = 1; + + /* + * We accumulate a smoothed rtt variance (actually, a + * smoothed mean difference), then set the retransmit + * timer to smoothed rtt + 4 times the smoothed variance. + * rttvar is stored as fixed point with 4 bits after the + * binary point (scaled by 16). The following is + * equivalent to rfc793 smoothing with an alpha of .75 + * (rttvar = rttvar*3/4 + |delta| / 4). This replaces + * rfc793's wired-in beta. + */ + if (delta < 0) + delta = -delta; + delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); + if ((tp->t_rttvar += delta) <= 0) + tp->t_rttvar = 1; + if ((int)tp->t_rttbest > tp->t_srtt + tp->t_rttvar) + tp->t_rttbest = tp->t_srtt + tp->t_rttvar; + } else { + /* + * No rtt measurement yet - use the unsmoothed rtt. + * Set the variance to half the rtt (so our first + * retransmit happens at 3*rtt). + */ + tp->t_srtt = rtt << TCP_RTT_SHIFT; + tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); + tp->t_rttbest = tp->t_srtt + tp->t_rttvar; + } + tp->t_rtttime = 0; + tp->t_rxtshift = 0; + + /* + * the retransmit should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + */ + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX); + + /* + * We received an ack for a packet that wasn't retransmitted; + * it is probably safe to discard any error indications we've + * received recently. This isn't quite right, but close enough + * for now (a route might have failed after we sent a segment, + * and the return path might not be symmetrical). + */ + tp->t_softerror = 0; +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing + * interface without forcing IP to fragment; if bigger than + * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES + * to utilize large mbufs. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + * + * Also take into account the space needed for options that we + * send regularly. Make maxseg shorter by that amount to assure + * that we can send maxseg amount of data even when the options + * are present. Store the upper limit of the length of options plus + * data in maxopd. + * + * NOTE that this routine is only called when we process an incoming + * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS + * settings are handled in ofp_tcp_mssopt(). + */ +void +ofp_tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, + struct hc_metrics_lite *metricptr, int *mtuflags) +{ + int mss = 0; + uint64_t maxmtu = 0; + struct inpcb *inp = tp->t_inpcb; + struct hc_metrics_lite metrics; + int origoffer; +#ifdef INET6 + int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; + size_t min_protoh = isipv6 ? + sizeof (struct ofp_ip6_hdr) + sizeof (struct ofp_tcphdr) : + sizeof (struct tcpiphdr); +#else + const size_t min_protoh = sizeof(struct tcpiphdr); +#endif + (void)mtuflags; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + if (mtuoffer != -1) { + KASSERT(offer == -1, ("%s: conflict", __func__)); + offer = mtuoffer - min_protoh; + } + origoffer = offer; + + /* Initialize. */ +#ifdef INET6 + if (isipv6) { + maxmtu = ofp_tcp_maxmtu6(&inp->inp_inc, mtuflags); + tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt; + } + else +#endif + { + maxmtu = ofp_tcp_maxmtu(&inp->inp_inc, mtuflags); + tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt; + } + + /* + * No route to sender, stay with default mss and return. + */ + if (maxmtu == 0) { + /* + * In case we return early we need to initialize metrics + * to a defined state as tcp_hc_get() would do for us + * if there was no cache hit. + */ + if (metricptr != NULL) + bzero(metricptr, sizeof(struct hc_metrics_lite)); + return; + } + + /* What have we got? */ + switch (offer) { + case 0: + /* + * Offer == 0 means that there was no MSS on the SYN + * segment, in this case we use ofp_tcp_mssdflt as + * already assigned to t_maxopd above. + */ + offer = tp->t_maxopd; + break; + + case -1: + /* + * Offer == -1 means that we didn't receive SYN yet. + */ + /* FALLTHROUGH */ + + default: + /* + * Prevent DoS attack with too small MSS. Round up + * to at least minmss. + */ + offer = max(offer, V_tcp_minmss); + } + + /* + * rmx information is now retrieved from tcp_hostcache. + */ + bzero(&metrics, sizeof(metrics)); + /* HJo: FIX tcp_hc_get(&inp->inp_inc, &metrics); */ + if (metricptr != NULL) + bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite)); + + /* + * If there's a discovered mtu int tcp hostcache, use it + * else, use the link mtu. + */ + if (metrics.rmx_mtu) + mss = min(metrics.rmx_mtu, maxmtu) - min_protoh; + else { +#ifdef INET6 + if (isipv6) { + mss = maxmtu - min_protoh; + if (!V_path_mtu_discovery /*&& + !in6_localaddr(&inp->in6p_faddr)*/) + mss = min(mss, V_tcp_v6mssdflt); + } + else +#endif + { + mss = maxmtu - min_protoh; + if (!V_path_mtu_discovery /* HJo: FIX && + !in_localaddr(inp->inp_faddr)*/) + mss = min(mss, V_tcp_mssdflt); + } + + /* + * XXX - The above conditional (mss = maxmtu - min_protoh) + * probably violates the TCP spec. + * The problem is that, since we don't know the + * other end's MSS, we are supposed to use a conservative + * default. But, if we do that, then MTU discovery will + * never actually take place, because the conservative + * default is much less than the MTUs typically seen + * on the Internet today. For the moment, we'll sweep + * this under the carpet. + * + * The conservative default might not actually be a problem + * if the only case this occurs is when sending an initial + * SYN with options and data to a host we've never talked + * to before. Then, they will reply with an MSS value which + * will get recorded and the new parameters should get + * recomputed. For Further Study. + */ + } + mss = min(mss, offer); + + /* + * Sanity check: make sure that maxopd will be large + * enough to allow some data on segments even if the + * all the option space is used (40bytes). Otherwise + * funny things may happen in ofp_tcp_output. + */ + mss = max(mss, 64); + + /* + * maxopd stores the maximum length of data AND options + * in a segment; maxseg is the amount of data in a normal + * segment. We need to store this value (maxopd) apart + * from maxseg, because now every segment carries options + * and thus we normally have somewhat less data in segments. + */ + tp->t_maxopd = mss; + + /* + * origoffer==-1 indicates that no segments were received yet. + * In this case we just guess. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + (origoffer == -1 || + (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) + mss -= OFP_TCPOLEN_TSTAMP_APPA; + +#if (MCLBYTES & (MCLBYTES - 1)) == 0 + if (mss > MCLBYTES) + mss &= ~(MCLBYTES-1); +#else + if (mss > MCLBYTES) + mss = mss / MCLBYTES * MCLBYTES; +#endif + tp->t_maxseg = mss; +} + +void +ofp_tcp_mss(struct tcpcb *tp, int offer) +{ + int mss; + uint64_t bufsize; + struct inpcb *inp; + struct socket *so; + struct hc_metrics_lite metrics; + int mtuflags = 0; + + KASSERT(tp != NULL, ("%s: tp == NULL", __func__)); + + ofp_tcp_mss_update(tp, offer, -1, &metrics, &mtuflags); + + mss = tp->t_maxseg; + inp = tp->t_inpcb; + + /* + * If there's a pipesize, change the socket buffer to that size, + * don't change if sb_hiwat is different than default (then it + * has been changed on purpose with setsockopt). + * Make the socket buffers an integral number of mss units; + * if the mss is larger than the socket buffer, decrease the mss. + */ + so = inp->inp_socket; + SOCKBUF_LOCK(&so->so_snd); + if ((so->so_snd.sb_hiwat == ofp_tcp_sendspace) && metrics.rmx_sendpipe) + bufsize = metrics.rmx_sendpipe; + else + bufsize = so->so_snd.sb_hiwat; + if ((int)bufsize < mss) + mss = bufsize; + else { +#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */ + bufsize = roundup(bufsize, mss); + if (bufsize > ofp_sb_max) + bufsize = ofp_sb_max; + if (bufsize > so->so_snd.sb_hiwat) + (void)ofp_sbreserve_locked(&so->so_snd, bufsize, so, NULL); + } + SOCKBUF_UNLOCK(&so->so_snd); + tp->t_maxseg = mss; + + SOCKBUF_LOCK(&so->so_rcv); + if ((so->so_rcv.sb_hiwat == ofp_tcp_recvspace) && metrics.rmx_recvpipe) + bufsize = metrics.rmx_recvpipe; + else + bufsize = so->so_rcv.sb_hiwat; + if ((int)bufsize > mss) { + bufsize = roundup(bufsize, mss); + if (bufsize > ofp_sb_max) + bufsize = ofp_sb_max; + if (bufsize > so->so_rcv.sb_hiwat) + (void)ofp_sbreserve_locked(&so->so_rcv, bufsize, so, NULL); + } + SOCKBUF_UNLOCK(&so->so_rcv); + + /* Check the interface for TSO capabilities. */ + if (mtuflags & CSUM_TSO) + t_flags_or(tp->t_flags, TF_TSO); +} + +/* + * Determine the MSS option to send on an outgoing SYN. + */ +int +ofp_tcp_mssopt(struct in_conninfo *inc) +{ + int mss = 0; + uint64_t maxmtu = 0; + uint64_t thcmtu = 0; + size_t min_protoh; + + KASSERT(inc != NULL, ("ofp_tcp_mssopt with NULL in_conninfo pointer")); + +#ifdef INET6 + if (inc->inc_flags & INC_ISIPV6) { + mss = V_tcp_v6mssdflt; + maxmtu = ofp_tcp_maxmtu6(inc, NULL); + min_protoh = sizeof(struct ofp_ip6_hdr) + sizeof(struct ofp_tcphdr); + } + else +#endif + { + mss = V_tcp_mssdflt; + maxmtu = ofp_tcp_maxmtu(inc, NULL); + min_protoh = sizeof(struct tcpiphdr); + } + thcmtu = 0 /* HJo: FIX tcp_hc_getmtu(inc)*/; /* IPv4 and IPv6 */ + + if (maxmtu && thcmtu) + mss = min(maxmtu, thcmtu) - min_protoh; + else if (maxmtu || thcmtu) + mss = max(maxmtu, thcmtu) - min_protoh; + + return (mss); +} + +/* + * On a partial ack arrives, force the retransmission of the + * next unacknowledged segment. Do not clear tp->t_dupacks. + * By setting snd_nxt to ti_ack, this forces retransmission timer to + * be started again. + */ +static void +tcp_newreno_partial_ack(struct tcpcb *tp, struct ofp_tcphdr *th) +{ + tcp_seq onxt = tp->snd_nxt; + uint64_t ocwnd = tp->snd_cwnd; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + ofp_tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + tp->snd_nxt = th->th_ack; + /* + * Set snd_cwnd to one segment beyond acknowledged offset. + * (tp->snd_una has not yet been updated when this function is called.) + */ + tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th); + t_flags_or(tp->t_flags, TF_ACKNOW); + (void) ofp_tcp_output(tp); + tp->snd_cwnd = ocwnd; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + /* + * Partial window deflation. Relies on fact that tp->snd_una + * not updated yet. + */ + if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) + tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); + else + tp->snd_cwnd = 0; + tp->snd_cwnd += tp->t_maxseg; +} diff --git a/src/ofp_tcp_output.c b/src/ofp_tcp_output.c new file mode 100644 index 00000000..7e3d3d42 --- /dev/null +++ b/src/ofp_tcp_output.c @@ -0,0 +1,1525 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ofp_tcp_output.c 8.4 (Berkeley) 5/24/95 + */ +#include + +#include "ofpi_errno.h" +#include "ofpi_protosw.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" + +#include "ofpi_in.h" +#include "ofpi_ip.h" +#ifdef INET6 +#include "ofpi_ip6.h" +#include "ofpi_ip6_var.h" +#endif /* INET6 */ +#define OFP_TCPOUTFLAGS + +#include "ofpi_util.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_systm.h" +#include "ofpi_timer.h" +#include "ofpi_tcp.h" +#include "ofpi_tcp_fsm.h" +#include "ofpi_tcp_seq.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_var.h" +//#include "ofp_tcpip.h" + + +#ifdef TCPDEBUG +#include +#endif + +//#include + +extern int ofp_max_linkhdr; + +VNET_DEFINE(int, ofp_path_mtu_discovery) = 1; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, path_mtu_discovery, OFP_CTLFLAG_RW, + &ofp_path_mtu_discovery, 1, + "Enable Path MTU Discovery"); + +VNET_DEFINE(int, ofp_ss_fltsz) = 1; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, slowstart_flightsize, OFP_CTLFLAG_RW, + &ofp_ss_fltsz, 1, + "Slow start flight size"); + +VNET_DEFINE(int, ofp_ss_fltsz_local) = 4; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, local_slowstart_flightsize, + OFP_CTLFLAG_RW, &ofp_ss_fltsz_local, 1, + "Slow start flight size for local networks"); + +VNET_DEFINE(int, ofp_tcp_do_tso) = 1; +#define V_tcp_do_tso VNET(ofp_tcp_do_tso) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, tso, OFP_CTLFLAG_RW, + &ofp_tcp_do_tso, 0, + "Enable TCP Segmentation Offload"); + +VNET_DEFINE(int, ofp_tcp_do_autosndbuf) = 1; +#define V_tcp_do_autosndbuf VNET(ofp_tcp_do_autosndbuf) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, sendbuf_auto, OFP_CTLFLAG_RW, + &ofp_tcp_do_autosndbuf, 0, + "Enable automatic send buffer sizing"); + +VNET_DEFINE(int, ofp_tcp_autosndbuf_inc) = 8*1024; +#define V_tcp_autosndbuf_inc VNET(ofp_tcp_autosndbuf_inc) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, sendbuf_inc, OFP_CTLFLAG_RW, + &ofp_tcp_autosndbuf_inc, 0, + "Incrementor step size of automatic send buffer"); + +VNET_DEFINE(int, ofp_tcp_autosndbuf_max) = 2*1024*1024; +#define V_tcp_autosndbuf_max VNET(ofp_tcp_autosndbuf_max) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, sendbuf_max, OFP_CTLFLAG_RW, + &ofp_tcp_autosndbuf_max, 0, + "Max size of automatic send buffer"); + +/* +static inline void hhook_run_tcp_est_out(struct tcpcb *tp, + struct ofp_tcphdr *th, struct tcpopt *to, + long len, int tso); +*/ +static inline void cc_after_idle(struct tcpcb *tp); + +/* + * Wrapper for the TCP established ouput helper hook. + */ +#if 0 +static void inline +hhook_run_tcp_est_out(struct tcpcb *tp, struct ofp_tcphdr *th, + struct tcpopt *to, long len, int tso) +{ + struct tcp_hhook_data hhook_data; + + if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) { + hhook_data.tp = tp; + hhook_data.th = th; + hhook_data.to = to; + hhook_data.len = len; + hhook_data.tso = tso; + + hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data, + tp->osd); + } +} +#endif +/* + * CC wrapper hook functions + */ +static inline void +cc_after_idle(struct tcpcb *tp) +{ + (void)tp; +#if 0 + INP_WLOCK_ASSERT(tp->t_inpcb); + + if (CC_ALGO(tp)->after_idle != NULL) + CC_ALGO(tp)->after_idle(tp->ccv); +#endif +} + +/* + * Tcp output routine: figure out what should be sent and send it. + */ +int +ofp_tcp_output(struct tcpcb *tp) +{ + struct socket *so = tp->t_inpcb->inp_socket; + long len, recwin, sendwin; + int off, flags, error = 0; /* Keep compiler happy */ + odp_packet_t m; + struct ofp_ip *ip = NULL; + struct ipovly *ipov = NULL; + struct ofp_tcphdr *th; + uint8_t opt[OFP_TCP_MAXOLEN]; + unsigned ipoptlen, optlen, hdrlen; + int idle, sendalot; + int sack_rxmit, sack_bytes_rxmt; + struct sackhole *p; + int tso; + struct tcpopt to; + + ipov = ipov; +#if 0 + int maxburst = OFP_TCP_MAXBURST; +#endif +#ifdef INET6 + struct ofp_ip6_hdr *ip6 = NULL; + int isipv6; + + (void)ipov; + + isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; +#endif + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); + if (idle && (int)(ofp_timer_ticks(0) - tp->t_rcvtime) >= tp->t_rxtcur) + cc_after_idle(tp); + t_flags_and(tp->t_flags, ~TF_LASTIDLE); + if (idle) {/* OK */ + if (tp->t_flags & TF_MORETOCOME) { + t_flags_or(tp->t_flags, TF_LASTIDLE); + idle = 0; + } + } +again: + /* + * If we've recently taken a timeout, snd_max will be greater than + * snd_nxt. There may be SACK information that allows us to avoid + * resending already delivered data. Adjust snd_nxt accordingly. + */ + if ((tp->t_flags & TF_SACK_PERMIT) && + SEQ_LT(tp->snd_nxt, tp->snd_max)) + ofp_tcp_sack_adjust(tp); + sendalot = 0; + tso = 0; + off = tp->snd_nxt - tp->snd_una; +#ifndef min +#define min(a, b) (a>b ? b : a) +#endif + sendwin = min(tp->snd_wnd, tp->snd_cwnd); + + flags = tcp_outflags[tp->t_state]; + /* + * Send any SACK-generated retransmissions. If we're explicitly trying + * to send out new data (when sendalot is 1), bypass this function. + * If we retransmit in fast recovery mode, decrement snd_cwnd, since + * we're replacing a (future) new transmission with a retransmission + * now, and we previously incremented snd_cwnd in ofp_tcp_input(). + */ + /* + * Still in sack recovery , reset rxmit flag to zero. + */ + sack_rxmit = 0; + sack_bytes_rxmt = 0; + len = 0; + p = NULL; + if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) && + (p = ofp_tcp_sack_output(tp, &sack_bytes_rxmt))) { + long cwin; + + cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; + if (cwin < 0) + cwin = 0; + /* Do not retransmit SACK segments beyond snd_recover */ + if (SEQ_GT(p->end, tp->snd_recover)) { + /* + * (At least) part of sack hole extends beyond + * snd_recover. Check to see if we can rexmit data + * for this hole. + */ + if (SEQ_GEQ(p->rxmit, tp->snd_recover)) { + /* + * Can't rexmit any more data for this hole. + * That data will be rexmitted in the next + * sack recovery episode, when snd_recover + * moves past p->rxmit. + */ + p = NULL; + goto after_sack_rexmit; + } else { + /* Can rexmit part of the current hole */ + len = ((long)ulmin(cwin, + tp->snd_recover - p->rxmit)); + } + } else { + len = ((long)ulmin(cwin, p->end - p->rxmit)); + } + off = p->rxmit - tp->snd_una; + KASSERT(off >= 0,("%s: sack block to the left of una : %d", + __func__, off)); + if (len > 0) { + sack_rxmit = 1; + sendalot = 1; + TCPSTAT_INC(tcps_sack_rexmits); + TCPSTAT_ADD(tcps_sack_rexmit_bytes, + min(len, tp->t_maxseg)); + } + } +after_sack_rexmit: + + /* + * Get standard flags, and add SYN or FIN if requested by 'hidden' + * state flags. + */ + if (tp->t_flags & TF_NEEDFIN) + flags |= OFP_TH_FIN; + if (tp->t_flags & TF_NEEDSYN) + flags |= OFP_TH_SYN; + + SOCKBUF_LOCK(&so->so_snd); + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + if (tp->t_flags & TF_FORCEDATA) { + if (sendwin == 0) { + /* + * If we still have some data to send, then + * clear the FIN bit. Usually this would + * happen below when it realizes that we + * aren't sending all the data. However, + * if we have exactly 1 byte of unsent data, + * then it won't clear the FIN bit below, + * and if we are in persist state, we wind + * up sending the packet without recording + * that we sent the FIN bit. + * + * We can't just blindly clear the FIN bit, + * because if we don't have any more data + * to send then the probe will be the FIN + * itself. + */ + if (off < (int)so->so_snd.sb_cc) + flags &= ~OFP_TH_FIN; + sendwin = 1; + } else { + ofp_tcp_timer_activate(tp, TT_PERSIST, 0); + tp->t_rxtshift = 0; + } + } + + /* + * If snd_nxt == snd_max and we have transmitted a FIN, the + * offset will be > 0 even if so_snd.sb_cc is 0, resulting in + * a negative length. This can also occur when TCP opens up + * its congestion window while receiving additional duplicate + * acks after fast-retransmit because TCP will reset snd_nxt + * to snd_max after the fast-retransmit. + * + * In the normal retransmit-FIN-only case, however, snd_nxt will + * be set to snd_una, the offset will be 0, and the length may + * wind up 0. + * + * If sack_rxmit is true we are retransmitting from the scoreboard + * in which case len is already set. + */ + + if (sack_rxmit == 0) {/* OK */ + if (sack_bytes_rxmt == 0) {/* OK */ + len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off); + } else { + long cwin; + + /* + * We are inside of a SACK recovery episode and are + * sending new data, having retransmitted all the + * data possible in the scoreboard. + */ + len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd) + - off); + /* + * Don't remove this (len > 0) check ! + * We explicitly check for len > 0 here (although it + * isn't really necessary), to work around a gcc + * optimization issue - to force gcc to compute + * len above. Without this check, the computation + * of len is bungled by the optimizer. + */ + if (len > 0) { + cwin = tp->snd_cwnd - + (tp->snd_nxt - tp->sack_newdata) - + sack_bytes_rxmt; + if (cwin < 0) + cwin = 0; + len = lmin(len, cwin); + } + } + } + + /* + * Lop off SYN bit if it has already been sent. However, if this + * is SYN-SENT state and if segment contains data and if we don't + * know that foreign host supports TAO, suppress sending segment. + */ + if ((flags & OFP_TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { + if (tp->t_state != TCPS_SYN_RECEIVED) + flags &= ~OFP_TH_SYN; + off--, len++; + } + + /* + * Be careful not to send data and/or FIN on SYN segments. + * This measure is needed to prevent interoperability problems + * with not fully conformant TCP implementations. + */ + if ((flags & OFP_TH_SYN) && (tp->t_flags & TF_NOOPT)) { + len = 0; + flags &= ~OFP_TH_FIN; + } + + if (len < 0) { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be < 0. Otherwise, window shrank + * after we sent into it. If window shrank to 0, + * cancel pending retransmit, pull snd_nxt back + * to (closed) window, and set the persist timer + * if it isn't already going. If the window didn't + * close completely, just wait for an ACK. + */ + len = 0; + if (sendwin == 0) { + ofp_tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rxtshift = 0; + tp->snd_nxt = tp->snd_una; + if (!ofp_tcp_timer_active(tp, TT_PERSIST)) + ofp_tcp_setpersist(tp); + } + } + + /* len will be >= 0 after this point. */ + KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); + + /* + * Automatic sizing of send socket buffer. Often the send buffer + * size is not optimally adjusted to the actual network conditions + * at hand (delay bandwidth product). Setting the buffer size too + * small limits throughput on links with high bandwidth and high + * delay (eg. trans-continental/oceanic links). Setting the + * buffer size too big consumes too much real kernel memory, + * especially with many connections on busy servers. + * + * The criteria to step up the send buffer one notch are: + * 1. receive window of remote host is larger than send buffer + * (with a fudge factor of 5/4th); + * 2. send buffer is filled to 7/8th with data (so we actually + * have data to make use of it); + * 3. send buffer fill has not hit maximal automatic size; + * 4. our send window (slow start and cogestion controlled) is + * larger than sent but unacknowledged data in send buffer. + * + * The remote host receive window scaling factor may limit the + * growing of the send buffer before it reaches its allowed + * maximum. + * + * It scales directly with slow start or congestion window + * and does at most one step per received ACK. This fast + * scaling has the drawback of growing the send buffer beyond + * what is strictly necessary to make full use of a given + * delay*bandwith product. However testing has shown this not + * to be much of an problem. At worst we are trading wasting + * of available bandwith (the non-use of it) for wasting some + * socket buffer memory. + * + * TODO: Shrink send buffer during idle periods together + * with congestion window. Requires another timer. Has to + * wait for upcoming tcp timer rewrite. + */ +#if 0 /* HJo: FIX */ + if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) { + if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && + so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) && + so->so_snd.sb_cc < V_tcp_autosndbuf_max && + sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) { + if (!ofp_sbreserve_locked(&so->so_snd, + min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc, + V_tcp_autosndbuf_max), so, curthread)) + so->so_snd.sb_flags &= ~SB_AUTOSIZE; + } + } +#endif + /* + * Decide if we can use TCP Segmentation Offloading (if supported by + * hardware). + * + * TSO may only be used if we are in a pure bulk sending state. The + * presence of TCP-MD5, SACK retransmits, SACK advertizements and + * IP options prevent using TSO. With TSO the TCP header is the same + * (except for the sequence number) for all generated packets. This + * makes it impossible to transmit any options which vary per generated + * segment or packet. + */ + if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && + ((tp->t_flags & TF_SIGNATURE) == 0) && + tp->rcv_numsacks == 0 && sack_rxmit == 0 && + tp->t_inpcb->inp_options == ODP_PACKET_INVALID && + tp->t_inpcb->in6p_options == ODP_PACKET_INVALID) + tso = 1; + + if (sack_rxmit) { + if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~OFP_TH_FIN; + } else {/* OK */ + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~OFP_TH_FIN; + } + + recwin = sbspace(&so->so_rcv); + + /* + * Sender silly window avoidance. We transmit under the following + * conditions when len is non-zero: + * + * - We have a full segment (or more with TSO) + * - This is the last buffer in a write()/send() and we are + * either idle or running NODELAY + * - we've timed out (e.g. persist timer) + * - we have more then 1/2 the maximum send window's worth of + * data (receiver may be limited the window size) + * - we need to retransmit + */ + if (len) {/* OK */ + if (len >= tp->t_maxseg) + goto send; + /* + * NOTE! on localhost connections an 'ack' from the remote + * end may occur synchronously with the output and cause + * us to flush a buffer queued with moretocome. XXX + * + * note: the len + off check is almost certainly unnecessary. + */ + if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ + (idle || (tp->t_flags & TF_NODELAY)) && + len + off >= so->so_snd.sb_cc && + (tp->t_flags & TF_NOPUSH) == 0) { + goto send; + } + if (tp->t_flags & TF_FORCEDATA) /* typ. timeout case */ + goto send; + if (len >= (int)(tp->max_sndwnd / 2) && tp->max_sndwnd > 0) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */ + goto send; + if (sack_rxmit) + goto send; + + } + + /* + * Compare available window to amount of window + * known to peer (as advertised window less + * next expected input). If the difference is at least two + * max size segments, or at least 50% of the maximum possible + * window, then want to send a window update to peer. + * Skip this if the connection is in T/TCP half-open state. + * Don't send pure window updates when the peer has closed + * the connection and won't ever send more data. + */ + if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) && + !TCPS_HAVERCVDFIN(tp->t_state)) { + /* + * "adv" is the amount we can increase the window, + * taking into account that we are limited by + * OFP_TCP_MAXWIN << tp->rcv_scale. + */ + long adv; + int oldwin; + + adv = min(recwin, (long)OFP_TCP_MAXWIN << tp->rcv_scale); + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { + oldwin = (tp->rcv_adv - tp->rcv_nxt); + adv -= oldwin; + } else + oldwin = 0; + + /* + * If the new window size ends up being the same as the old + * size when it is scaled, then don't force a window update. + */ + if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale) + goto dontupdate; + if (adv >= (long) (2 * tp->t_maxseg)) + goto send; + if (2 * adv >= (long) so->so_rcv.sb_hiwat) + goto send; + } +dontupdate: + /* + * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW + * is also a catch-all for the retransmit timer timeout case. + */ + if (tp->t_flags & TF_ACKNOW) + goto send; + if ((flags & OFP_TH_RST) || + ((flags & OFP_TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, then we need to send. + */ + if (flags & OFP_TH_FIN && + ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; + + /* + * In SACK, it is possible for ofp_tcp_output to fail to send a segment + * after the retransmission timer has been turned off. Make sure + * that the retransmission timer is set. + */ + if ((tp->t_flags & TF_SACK_PERMIT) && + SEQ_GT(tp->snd_max, tp->snd_una) && + !ofp_tcp_timer_active(tp, TT_REXMT) && + !ofp_tcp_timer_active(tp, TT_PERSIST)) { + ofp_tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + goto just_return; + } + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * ofp_tcp_timer_active(tp, TT_PERSIST) + * is true when we are in persist state. + * (tp->t_flags & TF_FORCEDATA) + * is set when we are called to send a persist packet. + * ofp_tcp_timer_active(tp, TT_REXMT) + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if (so->so_snd.sb_cc && !ofp_tcp_timer_active(tp, TT_REXMT) && + !ofp_tcp_timer_active(tp, TT_PERSIST)) { + tp->t_rxtshift = 0; + ofp_tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ +just_return: + SOCKBUF_UNLOCK(&so->so_snd); + return (0); + +send: + SOCKBUF_LOCK_ASSERT(&so->so_snd); + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + * NOTE: we assume that the IP/TCP header plus TCP options + * always fit in a single mbuf, leaving room for a maximum + * link header, i.e. + * ofp_max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES + */ + optlen = 0; +#ifdef INET6 + if (isipv6) + hdrlen = sizeof (struct ofp_ip6_hdr) + sizeof (struct ofp_tcphdr); + else +#endif + hdrlen = sizeof (struct tcpiphdr); + /* + * Compute options for segment. + * We only have to care about SYN and established connection + * segments. Options for SYN-ACK segments are handled in TCP + * syncache. + */ + + if ((tp->t_flags & TF_NOOPT) == 0) {/* OK */ + to.to_flags = 0; + /* Maximum segment size. */ + if (flags & OFP_TH_SYN) { + tp->snd_nxt = tp->iss; + to.to_mss = ofp_tcp_mssopt(&tp->t_inpcb->inp_inc); + to.to_flags |= TOF_MSS; + } + /* Window scaling. */ + if ((flags & OFP_TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { + to.to_wscale = tp->request_r_scale; + to.to_flags |= TOF_SCALE; + } + /* Timestamps. */ + if ((tp->t_flags & TF_RCVD_TSTMP) || + ((flags & OFP_TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {/* OK */ + to.to_tsval = tcp_ts_getticks() + tp->ts_offset; + to.to_tsecr = tp->ts_recent; + to.to_flags |= TOF_TS; + /* Set receive buffer autosizing timestamp. */ + if (tp->rfbuf_ts == 0 && + (so->so_rcv.sb_flags & SB_AUTOSIZE)) + tp->rfbuf_ts = tcp_ts_getticks(); + } + /* Selective ACK's. */ + if (tp->t_flags & TF_SACK_PERMIT) {/* OK */ + if (flags & OFP_TH_SYN) + to.to_flags |= TOF_SACKPERM; + else if (TCPS_HAVEESTABLISHED(tp->t_state) && + (tp->t_flags & TF_SACK_PERMIT) && + tp->rcv_numsacks > 0) { + to.to_flags |= TOF_SACK; + to.to_nsacks = tp->rcv_numsacks; + to.to_sacks = (uint8_t *)tp->sackblks; + } + } +#ifdef TCP_SIGNATURE + /* TCP-MD5 (RFC2385). */ + if (tp->t_flags & TF_SIGNATURE) + to.to_flags |= TOF_SIGNATURE; +#endif /* TCP_SIGNATURE */ + + /* Processing the options. */ + hdrlen += optlen = ofp_tcp_addoptions(&to, opt); + } + +#ifdef INET6 + if (isipv6) /*Bogdan: no options*/ + ipoptlen = 0;/*ip6_optlen(tp->t_inpcb); */ + else +#endif + if (tp->t_inpcb->inp_options != ODP_PACKET_INVALID) + ipoptlen = odp_packet_len(tp->t_inpcb->inp_options) - + offsetof(struct ipoption, ipopt_list); + else + ipoptlen = 0; +#ifdef IPSEC + ipoptlen += ipsec_optlen; +#endif + + /* + * Adjust data length if insertion of options will + * bump the packet length beyond the t_maxopd length. + * Clear the FIN bit because we cut off the tail of + * the segment. + */ + if (len + optlen + ipoptlen > tp->t_maxopd) {/* OK */ + flags &= ~OFP_TH_FIN; + + if (tso) { + KASSERT(ipoptlen == 0, + ("%s: TSO can't do IP options", __func__)); + + /* + * Limit a burst to OFP_IP_MAXPACKET minus IP, + * TCP and options length to keep ip->ip_len + * from overflowing. + */ + if (len > OFP_IP_MAXPACKET - hdrlen) { + len = OFP_IP_MAXPACKET - hdrlen; + sendalot = 1; + } + + /* + * Prevent the last segment from being + * fractional unless the send sockbuf can + * be emptied. + */ + if (sendalot && off + len < so->so_snd.sb_cc) { + len -= len % (tp->t_maxopd - optlen); + sendalot = 1; + } + + /* + * Send the FIN in a separate segment + * after the bulk sending is done. + * We don't trust the TSO implementations + * to clear the FIN flag on all but the + * last segment. + */ + if (tp->t_flags & TF_NEEDFIN) + sendalot = 1; + + } else {/* OK */ + len = tp->t_maxopd - optlen - ipoptlen; + sendalot = 1; + } + } else + tso = 0; + + KASSERT(len + hdrlen + ipoptlen <= OFP_IP_MAXPACKET, + ("%s: len > OFP_IP_MAXPACKET", __func__)); + +/*#ifdef DIAGNOSTIC*/ +#if 0 +#ifdef INET6 + if (ofp_max_linkhdr + hdrlen > MCLBYTES) +#else + if (ofp_max_linkhdr + hdrlen > MHLEN) +#endif + panic("tcphdr too big"); +#endif +/*#endif*/ + + /* + * This KASSERT is here to catch edge cases at a well defined place. + * Before, those had triggered (random) panic conditions further down. + */ + KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); + + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + + if (len) {/* OK */ + if ((tp->t_flags & TF_FORCEDATA) && len == 1) + TCPSTAT_INC(tcps_sndprobe); + else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { + tp->t_sndrexmitpack++; + TCPSTAT_INC(tcps_sndrexmitpack); + TCPSTAT_ADD(tcps_sndrexmitbyte, len); + } else {/* OK */ + TCPSTAT_INC(tcps_sndpack); + TCPSTAT_ADD(tcps_sndbyte, len); + } + + m = ofp_packet_alloc(hdrlen + len); + + if (m == ODP_PACKET_INVALID) { + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_ENOBUFS; + goto out; + } + +#if 0 + if (MHLEN < hdrlen + ofp_max_linkhdr) { + MCLGET(m, M_DONTWAIT); + if ((odp_packet_flags(m) & M_EXT) == 0) { + SOCKBUF_UNLOCK(&so->so_snd); + odp_packet_free(m); + error = OFP_ENOBUFS; + goto out; + } + } +#endif + + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, +#ifdef INET6 + isipv6? sizeof(struct ofp_ip6_hdr) + ipoptlen : +#endif /*INET6*/ + sizeof(struct ofp_ip)); + + ofp_sockbuf_copy_out(&so->so_snd, off, len, + (char *)odp_packet_data(m) + hdrlen); + /* + odp_packet_t src = so->so_snd.sb_mb[so->so_snd.sb_get]; + memcpy((uint8_t *)odp_packet_data(m) + hdrlen, + odp_packet_data(src), len); + */ + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == so->so_snd.sb_cc) + flags |= OFP_TH_PUSH; + SOCKBUF_UNLOCK(&so->so_snd); + } else { + SOCKBUF_UNLOCK(&so->so_snd); + if (tp->t_flags & TF_ACKNOW) + TCPSTAT_INC(tcps_sndacks); + else if (flags & (OFP_TH_SYN|OFP_TH_FIN|OFP_TH_RST)) + TCPSTAT_INC(tcps_sndctrl); + else if (SEQ_GT(tp->snd_up, tp->snd_una)) + TCPSTAT_INC(tcps_sndurg); + else + TCPSTAT_INC(tcps_sndwinup); + + m = ofp_packet_alloc(hdrlen); + + if (m == ODP_PACKET_INVALID) { + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_ENOBUFS; + goto out; + } + +#if 0 + if (isipv6 && (MHLEN < hdrlen + ofp_max_linkhdr) && + MHLEN >= hdrlen) { + MH_ALIGN(m, hdrlen); + } else +#endif + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, +#ifdef INET6 + isipv6? sizeof(struct ofp_ip6_hdr) + ipoptlen : +#endif /*INET6*/ + sizeof(struct ofp_ip)); + } + SOCKBUF_UNLOCK_ASSERT(&so->so_snd); + odp_packet_user_ptr_set(m, NULL); + +#ifdef MAC + mac_inpcb_create_mbuf(tp->t_inpcb, m); +#endif +#ifdef INET6 + if (isipv6) { + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + th = (struct ofp_tcphdr *)odp_packet_l4_ptr(m, NULL); + ofp_tcpip_fillheaders(tp->t_inpcb, ip6, th); + } else +#endif /* INET6 */ + {/* OK */ + ip = (struct ofp_ip *)(odp_packet_data(m)); + ipov = (struct ipovly *)ip; + th = (struct ofp_tcphdr *)(ip + 1); + ofp_tcpip_fillheaders(tp->t_inpcb, ip, th); + } + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if (flags & OFP_TH_FIN && tp->t_flags & TF_SENTFIN && + tp->snd_nxt == tp->snd_max) + tp->snd_nxt--; + /* + * If we are starting a connection, send ECN setup + * SYN packet. If we are on a retransmit, we may + * resend those bits a number of times as per + * RFC 3168. + */ + + if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) { + if (tp->t_rxtshift >= 1) { + if (tp->t_rxtshift <= V_tcp_ecn_maxretries) + flags |= OFP_TH_ECE|OFP_TH_CWR; + } else + flags |= OFP_TH_ECE|OFP_TH_CWR; + } + + if (tp->t_state == TCPS_ESTABLISHED && + (tp->t_flags & TF_ECN_PERMIT)) { + /* + * If the peer has ECN, mark data packets with + * ECN capable transmission (ECT). + * Ignore pure ack packets, retransmissions and window probes. + */ + if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + !((tp->t_flags & TF_FORCEDATA) && len == 1)) { +#ifdef INET6 + if (isipv6) + ip6->ofp_ip6_flow |= odp_cpu_to_be_32(OFP_IPTOS_ECN_ECT0 << 20); + else +#endif + ip->ip_tos |= OFP_IPTOS_ECN_ECT0; + TCPSTAT_INC(tcps_ecn_ect0); + } + + /* + * Reply with proper ECN notifications. + */ + if (tp->t_flags & TF_ECN_SND_CWR) { + flags |= OFP_TH_CWR; + t_flags_and(tp->t_flags, ~TF_ECN_SND_CWR); + } + if (tp->t_flags & TF_ECN_SND_ECE) + flags |= OFP_TH_ECE; + } + + /* + * If we are doing retransmissions, then snd_nxt will + * not reflect the first unsent octet. For ACK only + * packets, we do not want the sequence number of the + * retransmitted packet, we want the sequence number + * of the next unsent octet. So, if there is no data + * (and no SYN or FIN), use snd_max instead of snd_nxt + * when filling in ti_seq. But if we are in persist + * state, snd_max might reflect one byte beyond the + * right edge of the window, so use snd_nxt in that + * case, since we know we aren't doing a retransmission. + * (retransmit and persist are mutually exclusive...) + */ + + if (sack_rxmit == 0) {/* OK */ + if (len || (flags & (OFP_TH_SYN|OFP_TH_FIN)) || + ofp_tcp_timer_active(tp, TT_PERSIST)) + th->th_seq = odp_cpu_to_be_32(tp->snd_nxt); + else + th->th_seq = odp_cpu_to_be_32(tp->snd_max); + } else { + th->th_seq = odp_cpu_to_be_32(p->rxmit); + p->rxmit += len; + tp->sackhint.sack_bytes_rexmit += len; + } + th->th_ack = odp_cpu_to_be_32(tp->rcv_nxt); + if (optlen) {/* OK */ + bcopy(opt, th + 1, optlen); + th->th_off = (sizeof (struct ofp_tcphdr) + optlen) >> 2; + } + th->th_flags = flags; + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (recwin < (long)(so->so_rcv.sb_hiwat / 4) && + recwin < (long)tp->t_maxseg) + recwin = 0; + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && + recwin < (long)(tp->rcv_adv - tp->rcv_nxt)) + recwin = (long)(tp->rcv_adv - tp->rcv_nxt); + if (recwin > (long)OFP_TCP_MAXWIN << tp->rcv_scale) + recwin = (long)OFP_TCP_MAXWIN << tp->rcv_scale; + + /* + * According to RFC1323 the window field in a SYN (i.e., a + * or ) segment itself is never scaled. The + * case is handled in syncache. + */ + if (flags & OFP_TH_SYN) + th->th_win = odp_cpu_to_be_16((uint16_t) + (min(sbspace(&so->so_rcv), OFP_TCP_MAXWIN))); + else + th->th_win = odp_cpu_to_be_16((uint16_t)(recwin >> tp->rcv_scale)); + /* + * Adjust the RXWIN0SENT flag - indicate that we have advertised + * a 0 window. This may cause the remote transmitter to stall. This + * flag tells ofp_soreceive() to disable delayed acknowledgements when + * draining the buffer. This can occur if the receiver is attempting + * to read more data than can be buffered prior to transmitting on + * the connection. + */ + if (th->th_win == 0) { + tp->t_sndzerowin++; + t_flags_or(tp->t_flags, TF_RXWIN0SENT); + } else + t_flags_and(tp->t_flags, ~TF_RXWIN0SENT); + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { + th->th_urp = odp_cpu_to_be_16((uint16_t)(tp->snd_up - tp->snd_nxt)); + th->th_flags |= OFP_TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + +#ifdef TCP_SIGNATURE + if (tp->t_flags & TF_SIGNATURE) { + int sigoff = to.to_signature - opt; + tcp_signature_compute(m, 0, len, optlen, + (uint8_t *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND); + } +#endif + + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + + odp_packet_set_csum_data(m, offsetof(struct ofp_tcphdr, th_sum)); +#ifdef INET6 + if (isipv6) { + /* + * ip6_plen is not need to be filled now, and will be filled + * in ip6_output. + */ + odp_packet_set_csum_flags(m, CSUM_TCP_IPV6); + th->th_sum = 0; + th->th_sum = ofp_ip6_cksum(m, sizeof(struct ofp_tcphdr) + + optlen + len, OFP_IPPROTO_TCP, 0); + } + else +#endif + {/* OK */ + //odp_packet_set_csum_flags(m, CSUM_TCP); + + /* HJo: FIX: + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + odp_cpu_to_be_16(sizeof(struct ofp_tcphdr) + OFP_IPPROTO_TCP + len + optlen)); + */ + + /* IP version must be set here for ipv4/ipv6 checking later */ + KASSERT(ip->ip_v == OFP_IPVERSION, + ("%s: IP version incorrect: %d", __func__, ip->ip_v)); + } + + /* + * Enable TSO and specify the size of the segments. + * The TCP pseudo header checksum is always provided. + * XXX: Fixme: This is currently not the case for IPv6. + */ + if (tso) { + KASSERT(len > tp->t_maxopd - optlen, + ("%s: len <= tso_segsz", __func__)); + odp_packet_set_csum_flags(m, odp_packet_csum_flags(m) | + CSUM_TSO); + /* HJo: FIX: + m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; + */ + } + + KASSERT(len + hdrlen + ipoptlen == (int)odp_packet_len(m), + ("%s: mbuf chain shorter than expected: %ld + %u + %d != %d", + __func__, len, hdrlen, ipoptlen, odp_packet_len(m))); + + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + + if ((tp->t_flags & TF_FORCEDATA) == 0 || + !ofp_tcp_timer_active(tp, TT_PERSIST)) {/* OK */ + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (OFP_TH_SYN|OFP_TH_FIN)) { + if (flags & OFP_TH_SYN) + tp->snd_nxt++; + if (flags & OFP_TH_FIN) { + tp->snd_nxt++; + t_flags_or(tp->t_flags, TF_SENTFIN); + } + } + if (sack_rxmit) + goto timer; + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {/* OK */ + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtttime == 0) {/* OK */ + tp->t_rtttime = ticks; + tp->t_rtseq = startseq; + TCPSTAT_INC(tcps_segstimed); + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing a pure ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ +timer: + if (!ofp_tcp_timer_active(tp, TT_REXMT) && + ((sack_rxmit && tp->snd_nxt != tp->snd_max) || + (tp->snd_nxt != tp->snd_una))) {/* OK */ + if (ofp_tcp_timer_active(tp, TT_PERSIST)) { + ofp_tcp_timer_activate(tp, TT_PERSIST, 0); + tp->t_rxtshift = 0; + } + ofp_tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + } + } else { + /* + * Persist case, update snd_max but since we are in + * persist mode (no window) we do not update snd_nxt. + */ + int xlen = len; + if (flags & OFP_TH_SYN) + ++xlen; + if (flags & OFP_TH_FIN) { + ++xlen; + t_flags_or(tp->t_flags, TF_SENTFIN); + } + if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + } + + /* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */ + /* HJo: FIX + hhook_run_tcp_est_out(tp, th, &to, len, tso); + */ + +#ifdef TCPDEBUG + /* + * Trace. + */ + if (so->so_options & OFP_SO_DEBUG) { + uint16_t save = 0; +#ifdef INET6 + if (!isipv6) +#endif + { + save = ipov->ih_len; + ipov->ih_len = odp_cpu_to_be_16(odp_packet_get_len(m) /* - hdrlen + (th->th_off << 2) */); + } + tcp_trace(TA_OUTPUT, tp->t_state, tp, (void *)odp_packet_data(m), th, 0); +#ifdef INET6 + if (!isipv6) +#endif + ipov->ih_len = save; + } +#endif /* TCPDEBUG */ + + /* + * Fill in IP length and desired time to live and + * send to IP level. There should be a better way + * to handle ttl and tos; we could keep them in + * the template, but need a way to checksum without them. + */ + /* + * odp_packet_get_len(m) should have been set before cksum calcuration, + * because in6_cksum() need it. + */ +#ifdef INET6 + if (isipv6) { + ip6->ofp_ip6_plen = odp_cpu_to_be_16(odp_packet_len(m) - + sizeof (struct ofp_ip6_hdr)); + /* + * we separately set hoplimit for every segment, since the + * user might want to change the value via setsockopt. + * Also, desired default hop limit might be changed via + * Neighbor Discovery. + */ + ip6->ofp_ip6_hlim = V_ip6_defhlim;/* in6_selecthlim(tp->t_inpcb, NULL);*/ + + /* TODO: IPv6 IP6TOS_ECT bit on */ +#if 0 + error = ip6_output(m, + tp->t_inpcb->in6p_outputopts, NULL, + ((so->so_options & OFP_SO_DONTROUTE) ? + IP_ROUTETOIF : 0), NULL, NULL, tp->t_inpcb); +#else + error = ofp_ip6_output(m, NULL); +#endif + } + else +#endif + + {/* OK */ + ip->ip_len = odp_cpu_to_be_16(odp_packet_len(m)); +#ifdef INET6 + if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO) + ip->ip_ttl = V_ip6_defhlim;/*in6_selecthlim(tp->t_inpcb, NULL);*/ +#endif /* INET6 */ + /* + * If we do path MTU discovery, then we set DF on every packet. + * This might not be the best thing to do according to RFC3390 + * Section 2. However the tcp hostcache migitates the problem + * so it affects only the first tcp connection with a host. + * + * NB: Don't set DF on small MTU/MSS to have a safe fallback. + */ + if (V_path_mtu_discovery && (int)tp->t_maxopd > V_tcp_minmss) + ip->ip_off |= OFP_IP_DF; + + ip->ip_off = odp_cpu_to_be_16(ip->ip_off); + ip->ip_sum = ofp_in_cksum((uint16_t *)ip, sizeof(*ip)); + th->th_sum = 0; + /* th->th_sum = ofp_in4_cksum(m); output function takes care of csum */ + error = ofp_ip_output(m, NULL); + } + + if (error != OFP_PKT_PROCESSED) { + /* + * We know that the packet was lost, so back out the + * sequence number advance, if any. + * + * If the error is OFP_EPERM the packet got blocked by the + * local firewall. Normally we should terminate the + * connection but the blocking may have been spurious + * due to a firewall reconfiguration cycle. So we treat + * it like a packet loss and let the retransmit timer and + * timeouts do their work over time. + * XXX: It is a POLA question whether calling ofp_tcp_drop right + * away would be the really correct behavior instead. + */ + if (((tp->t_flags & TF_FORCEDATA) == 0 || + !ofp_tcp_timer_active(tp, TT_PERSIST)) && + ((flags & OFP_TH_SYN) == 0) && + (error != OFP_EPERM)) { + if (sack_rxmit) { + p->rxmit -= len; + tp->sackhint.sack_bytes_rexmit -= len; + KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, + ("sackhint bytes rtx >= 0")); + } else + tp->snd_nxt -= len; + } +out: + SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */ +#if 0 + switch (error) { + case OFP_EPERM: + tp->t_softerror = error; + return (error); + case OFP_ENOBUFS: + if (!ofp_tcp_timer_active(tp, TT_REXMT) && + !ofp_tcp_timer_active(tp, TT_PERSIST)) + ofp_tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + tp->snd_cwnd = tp->t_maxseg; + return (0); + case OFP_EMSGSIZE: + /* + * For some reason the interface we used initially + * to send segments changed to another or lowered + * its MTU. + * + * ofp_tcp_mtudisc() will find out the new MTU and as + * its last action, initiate retransmission, so it + * is important to not do so here. + * + * If TSO was active we either got an interface + * without TSO capabilits or TSO was turned off. + * Disable it for this connection as too and + * immediatly retry with MSS sized segments generated + * by this function. + */ + if (tso) + t_flags_and(tp->t_flags, ~TF_TSO); + ofp_tcp_mtudisc(tp->t_inpcb, -1); + return (0); + case OFP_EHOSTDOWN: + case OFP_EHOSTUNREACH: + case OFP_ENETDOWN: + case OFP_ENETUNREACH: + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_softerror = error; + return (0); + } + /* FALLTHROUGH */ + default: + return (error); + } +#endif + } + TCPSTAT_INC(tcps_sndtotal); + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + recwin; + tp->last_ack_sent = tp->rcv_nxt; + t_flags_and(tp->t_flags, ~(TF_ACKNOW | TF_DELACK)); + if (ofp_tcp_timer_active(tp, TT_DELACK)) + ofp_tcp_timer_activate(tp, TT_DELACK, 0); +#if 0 + /* + * This completely breaks TCP if newreno is turned on. What happens + * is that if delayed-acks are turned on on the receiver, this code + * on the transmitter effectively destroys the TCP window, forcing + * it to four packets (1.5Kx4 = 6K window). + */ + if (sendalot && --maxburst) + goto again; +#endif + if (sendalot) + goto again; + + return (0); +} + +void +ofp_tcp_setpersist(struct tcpcb *tp) +{ + int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + int tt; + + t_flags_and(tp->t_flags, ~TF_PREVVALID); + if (ofp_tcp_timer_active(tp, TT_REXMT)) + panic("ofp_tcp_setpersist: retransmit pending"); + /* + * Start/restart persistance timer. + */ + TCPT_RANGESET(tt, t * ofp_tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + ofp_tcp_timer_activate(tp, TT_PERSIST, tt); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; +} + +/* + * Insert TCP options according to the supplied parameters to the place + * optp in a consistent way. Can handle unaligned destinations. + * + * The order of the option processing is crucial for optimal packing and + * alignment for the scarce option space. + * + * The optimal order for a SYN/SYN-ACK segment is: + * MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) + + * Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40. + * + * The SACK options should be last. SACK blocks consume 8*n+2 bytes. + * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks). + * At minimum we need 10 bytes (to generate 1 SACK block). If both + * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present, + * we only have 10 bytes for SACK options (40 - (12 + 18)). + */ +int +ofp_tcp_addoptions(struct tcpopt *to, uint8_t *optp) +{ + uint32_t mask, optlen = 0; + + for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) { + if ((to->to_flags & mask) != mask) + continue; + if (optlen == OFP_TCP_MAXOLEN) + break; + switch (to->to_flags & mask) { + case TOF_MSS: + while (optlen % 4) { + optlen += OFP_TCPOLEN_NOP; + *optp++ = OFP_TCPOPT_NOP; + } + if (OFP_TCP_MAXOLEN - optlen < OFP_TCPOLEN_MAXSEG) + continue; + optlen += OFP_TCPOLEN_MAXSEG; + *optp++ = OFP_TCPOPT_MAXSEG; + *optp++ = OFP_TCPOLEN_MAXSEG; + to->to_mss = odp_cpu_to_be_16(to->to_mss); + bcopy((uint8_t *)&to->to_mss, optp, sizeof(to->to_mss)); + optp += sizeof(to->to_mss); + break; + case TOF_SCALE: + while (!optlen || optlen % 2 != 1) { + optlen += OFP_TCPOLEN_NOP; + *optp++ = OFP_TCPOPT_NOP; + } + if (OFP_TCP_MAXOLEN - optlen < OFP_TCPOLEN_WINDOW) + continue; + optlen += OFP_TCPOLEN_WINDOW; + *optp++ = OFP_TCPOPT_WINDOW; + *optp++ = OFP_TCPOLEN_WINDOW; + *optp++ = to->to_wscale; + break; + case TOF_SACKPERM: + while (optlen % 2) { + optlen += OFP_TCPOLEN_NOP; + *optp++ = OFP_TCPOPT_NOP; + } + if (OFP_TCP_MAXOLEN - optlen < OFP_TCPOLEN_SACK_PERMITTED) + continue; + optlen += OFP_TCPOLEN_SACK_PERMITTED; + *optp++ = OFP_TCPOPT_SACK_PERMITTED; + *optp++ = OFP_TCPOLEN_SACK_PERMITTED; + break; + case TOF_TS: + while (!optlen || optlen % 4 != 2) { + optlen += OFP_TCPOLEN_NOP; + *optp++ = OFP_TCPOPT_NOP; + } + if (OFP_TCP_MAXOLEN - optlen < OFP_TCPOLEN_TIMESTAMP) + continue; + optlen += OFP_TCPOLEN_TIMESTAMP; + *optp++ = OFP_TCPOPT_TIMESTAMP; + *optp++ = OFP_TCPOLEN_TIMESTAMP; + to->to_tsval = odp_cpu_to_be_32(to->to_tsval); + to->to_tsecr = odp_cpu_to_be_32(to->to_tsecr); + bcopy((uint8_t *)&to->to_tsval, optp, sizeof(to->to_tsval)); + optp += sizeof(to->to_tsval); + bcopy((uint8_t *)&to->to_tsecr, optp, sizeof(to->to_tsecr)); + optp += sizeof(to->to_tsecr); + break; + case TOF_SIGNATURE: + { + int siglen = OFP_TCPOLEN_SIGNATURE - 2; + + while (!optlen || optlen % 4 != 2) { + optlen += OFP_TCPOLEN_NOP; + *optp++ = OFP_TCPOPT_NOP; + } + if (OFP_TCP_MAXOLEN - optlen < OFP_TCPOLEN_SIGNATURE) + continue; + optlen += OFP_TCPOLEN_SIGNATURE; + *optp++ = OFP_TCPOPT_SIGNATURE; + *optp++ = OFP_TCPOLEN_SIGNATURE; + to->to_signature = optp; + while (siglen--) + *optp++ = 0; + break; + } + case TOF_SACK: + { + int sackblks = 0; + struct sackblk *sack = (struct sackblk *)to->to_sacks; + tcp_seq sack_seq; + + while (!optlen || optlen % 4 != 2) { + optlen += OFP_TCPOLEN_NOP; + *optp++ = OFP_TCPOPT_NOP; + } + if (OFP_TCP_MAXOLEN - optlen < OFP_TCPOLEN_SACKHDR + OFP_TCPOLEN_SACK) + continue; + optlen += OFP_TCPOLEN_SACKHDR; + *optp++ = OFP_TCPOPT_SACK; + sackblks = min(to->to_nsacks, + (OFP_TCP_MAXOLEN - optlen) / OFP_TCPOLEN_SACK); + *optp++ = OFP_TCPOLEN_SACKHDR + sackblks * OFP_TCPOLEN_SACK; + while (sackblks--) { + sack_seq = odp_cpu_to_be_32(sack->start); + bcopy((uint8_t *)&sack_seq, optp, sizeof(sack_seq)); + optp += sizeof(sack_seq); + sack_seq = odp_cpu_to_be_32(sack->end); + bcopy((uint8_t *)&sack_seq, optp, sizeof(sack_seq)); + optp += sizeof(sack_seq); + optlen += OFP_TCPOLEN_SACK; + sack++; + } + TCPSTAT_INC(tcps_sack_send_blocks); + break; + } + default: + panic("unknown TCP option type"); + break; + } + } + + /* Terminate and pad TCP options to a 4 byte boundary. */ + if (optlen % 4) { + optlen += OFP_TCPOLEN_EOL; + *optp++ = OFP_TCPOPT_EOL; + } + /* + * According to RFC 793 (STD0007): + * "The content of the header beyond the End-of-Option option + * must be header padding (i.e., zero)." + * and later: "The padding is composed of zeros." + */ + while (optlen % 4) { + optlen += OFP_TCPOLEN_PAD; + *optp++ = OFP_TCPOPT_PAD; + } + + KASSERT(optlen <= OFP_TCP_MAXOLEN, ("%s: TCP options too long", __func__)); + return (optlen); +} diff --git a/src/ofp_tcp_reass.c b/src/ofp_tcp_reass.c new file mode 100644 index 00000000..548d907a --- /dev/null +++ b/src/ofp_tcp_reass.c @@ -0,0 +1,365 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ofp_tcp_input.c 8.12 (Berkeley) 5/24/95 + */ + +#include + +#include "ofpi_util.h" +#include "ofpi_in.h" +#include "ofpi_ip_var.h" +#include "ofpi_ip.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" +#include "ofpi_tcp_var.h" +#include "ofpi_socket.h" +#include "ofpi_icmp6.h" +#include "ofpi_ethernet.h" +#include "ofpi_if_arp.h" +#include "ofpi_icmp.h" +#include "ofpi_udp.h" +#include "ofpi_tcp_offload.h" +#include "ofpi_in_pcb.h" +#include "ofpi_in6.h" +#include "ofpi_portconf.h" +#include "ofpi_sockbuf.h" +#include "ofpi_domain.h" +#include "ofpi_sockopt.h" +#include "ofpi_udp_var.h" +#include "ofpi_sockstate.h" +#include "ofpi_ip6.h" +#include "ofpi_systm.h" +#include "ofpi_callout.h" +#include "ofpi_tcp_fsm.h" +#include "ofpi_route.h" +#include "ofpi_tcp_syncache.h" +#include "ofpi_queue.h" +#include "ofpi_if_vlan.h" +#include "ofpi_timer.h" +#include "ofpi_tcp.h" +#include "ofpi_if_gre.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_seq.h" +#include "ofpi_protosw.h" + +#define SYSCTL_VNET_INT(_a...) OFP_SYSCTL_INT(_a) +#define SYSCTL_VNET_PROC(_a...) OFP_SYSCTL_PROC(_a) + +static int tcp_reass_sysctl_maxseg(OFP_SYSCTL_HANDLER_ARGS); +static int tcp_reass_sysctl_qsize(OFP_SYSCTL_HANDLER_ARGS); + +OFP_SYSCTL_NODE(_net_inet_tcp, OFP_OID_AUTO, reass, OFP_CTLFLAG_RW, 0, + "TCP Segment Reassembly Queue"); + +static VNET_DEFINE(int, tcp_reass_maxseg) = 0; +#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg) +SYSCTL_VNET_PROC(_net_inet_tcp_reass, OFP_OID_AUTO, maxsegments, + OFP_CTLTYPE_INT | OFP_CTLFLAG_RDTUN, + &VNET_NAME(tcp_reass_maxseg), 0, &tcp_reass_sysctl_maxseg, "I", + "Global maximum number of TCP Segments in Reassembly Queue"); + +static VNET_DEFINE(int, tcp_reass_qsize) = 0; +#define V_tcp_reass_qsize VNET(tcp_reass_qsize) +SYSCTL_VNET_PROC(_net_inet_tcp_reass, OFP_OID_AUTO, cursegments, + OFP_CTLTYPE_INT | OFP_CTLFLAG_RD, + &VNET_NAME(tcp_reass_qsize), 0, &tcp_reass_sysctl_qsize, "I", + "Global number of TCP Segments currently in Reassembly Queue"); + +static VNET_DEFINE(int, tcp_reass_overflows) = 0; +#define V_tcp_reass_overflows VNET(tcp_reass_overflows) +SYSCTL_VNET_INT(_net_inet_tcp_reass, OFP_OID_AUTO, overflows, + OFP_CTLTYPE_INT | OFP_CTLFLAG_RD, + &VNET_NAME(tcp_reass_overflows), 0, + "Global number of TCP Segment Reassembly Queue Overflows"); + +static VNET_DEFINE(uma_zone_t, tcp_reass_zone); +#define V_tcp_reass_zone VNET(tcp_reass_zone) + +int ofp_nmbclusters = 1024; + +/* Initialize TCP reassembly queue */ +#if 0 +static void +tcp_reass_zone_change(void *tag) +{ + (void)tag; + /* HJo + V_tcp_reass_maxseg = ofp_nmbclusters / 16; + uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg); + */ +} +#endif + +void +ofp_tcp_reass_init(void) +{ + V_tcp_reass_maxseg = ofp_nmbclusters / 16; + V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg); +} + +void +ofp_tcp_reass_flush(struct tcpcb *tp) +{ + struct tseg_qent *qe; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + while ((qe = OFP_LIST_FIRST(&tp->t_segq)) != NULL) { + OFP_LIST_REMOVE(qe, tqe_q); + uma_zfree(V_tcp_reass_zone, qe); + tp->t_segqlen--; + } + + KASSERT((tp->t_segqlen == 0), + ("TCP reass queue %p segment count is %d instead of 0 after flush.", + tp, tp->t_segqlen)); +} + +static int +tcp_reass_sysctl_maxseg(OFP_SYSCTL_HANDLER_ARGS) +{ + /* HJo V_tcp_reass_maxseg = uma_zone_get_max(V_tcp_reass_zone);*/ + return (sysctl_handle_int(oidp, arg1, arg2, req)); +} + +static int +tcp_reass_sysctl_qsize(OFP_SYSCTL_HANDLER_ARGS) +{ + /* HJo V_tcp_reass_qsize = uma_zone_get_cur(V_tcp_reass_zone);*/ + return (sysctl_handle_int(oidp, arg1, arg2, req)); +} + +int +ofp_tcp_reass(struct tcpcb *tp, struct ofp_tcphdr *th, int *tlenp, odp_packet_t m) +{ + struct tseg_qent *q; + struct tseg_qent *p = NULL; + struct tseg_qent *nq; + struct tseg_qent *te = NULL; + struct socket *so = tp->t_inpcb->inp_socket; + char *s = NULL; + int flags; + struct tseg_qent tqs; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* + * XXX: ofp_tcp_reass() is rather inefficient with its data structures + * and should be rewritten (see NetBSD for optimizations). + */ + + /* + * Call with th==NULL after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (th == NULL) + goto present; + + /* + * Limit the number of segments that can be queued to reduce the + * potential for mbuf exhaustion. For best performance, we want to be + * able to queue a full window's worth of segments. The size of the + * socket receive buffer determines our advertised window and grows + * automatically when socket buffer autotuning is enabled. Use it as the + * basis for our queue limit. + * Always let the missing segment through which caused this queue. + * NB: Access to the socket buffer is left intentionally unlocked as we + * can tolerate stale information here. + * + * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat + * should work but causes packets to be dropped when they shouldn't. + * Investigate why and re-evaluate the below limit after the behaviour + * is understood. + */ + + if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) && + tp->t_segqlen >= (int)(so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) { + V_tcp_reass_overflows++; + TCPSTAT_INC(tcps_rcvmemdrop); + odp_packet_free(m); + *tlenp = 0; + if ((s = ofp_tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { + OFP_LOG("%s; %s: queue limit reached, " + "segment dropped\n", s, __func__); + free(s); + } + return (0); + } + + /* + * Allocate a new queue entry. If we can't, or hit the zone limit + * just drop the pkt. + * + * Use a temporary structure on the stack for the missing segment + * when the zone is exhausted. Otherwise we may get stuck. + */ + te = uma_zalloc(V_tcp_reass_zone, M_NOWAIT); + if (te == NULL) { + if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) { + TCPSTAT_INC(tcps_rcvmemdrop); + odp_packet_free(m); + *tlenp = 0; + if ((s = ofp_tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, + NULL))) { + OFP_LOG("%s; %s: global zone limit " + "reached, segment dropped\n", s, __func__); + free(s); + } + return (0); + } + + bzero(&tqs, sizeof(struct tseg_qent)); + te = &tqs; + if ((s = ofp_tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, + NULL))) { + OFP_LOG( + "%s; %s: global zone limit reached, using " + "stack for missing segment\n", s, __func__); + free(s); + } + } + tp->t_segqlen++; + + /* + * Find a segment which begins after this one does. + */ + OFP_LIST_FOREACH(q, &tp->t_segq, tqe_q) { + if (SEQ_GT(q->tqe_th->th_seq, th->th_seq)) + break; + p = q; + } + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (p != NULL) { + int i; + /* conversion to int (in i) handles seq wraparound */ + i = p->tqe_th->th_seq + p->tqe_len - th->th_seq; + if (i > 0) { + if (i >= *tlenp) { + TCPSTAT_INC(tcps_rcvduppack); + TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp); + odp_packet_free(m); + if (te != &tqs) + uma_zfree(V_tcp_reass_zone, te); + tp->t_segqlen--; + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + goto present; /* ??? */ + } + odp_packet_pull_head(m, i); + *tlenp -= i; + th->th_seq += i; + } + } + tp->t_rcvoopack++; + TCPSTAT_INC(tcps_rcvoopack); + TCPSTAT_ADD(tcps_rcvoobyte, *tlenp); + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q) { + int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq; + if (i <= 0) + break; + if (i < q->tqe_len) { + q->tqe_th->th_seq += i; + q->tqe_len -= i; + odp_packet_pull_head(q->tqe_m, i); + break; + } + + nq = OFP_LIST_NEXT(q, tqe_q); + OFP_LIST_REMOVE(q, tqe_q); + odp_packet_free(q->tqe_m); + uma_zfree(V_tcp_reass_zone, q); + tp->t_segqlen--; + q = nq; + } + + /* Insert the new segment queue entry into place. */ + te->tqe_m = m; + te->tqe_th = th; + te->tqe_len = *tlenp; + + if (p == NULL) { + OFP_LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q); + } else { + KASSERT(te != &tqs, ("%s: temporary stack based entry not " + "first element in queue", __func__)); + OFP_LIST_INSERT_AFTER(p, te, tqe_q); + } + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (!TCPS_HAVEESTABLISHED(tp->t_state)) + return (0); + q = OFP_LIST_FIRST(&tp->t_segq); + if (!q || q->tqe_th->th_seq != tp->rcv_nxt) + return (0); + + SOCKBUF_LOCK(&so->so_rcv); + + do { + tp->rcv_nxt += q->tqe_len; + flags = q->tqe_th->th_flags & OFP_TH_FIN; + nq = OFP_LIST_NEXT(q, tqe_q); + OFP_LIST_REMOVE(q, tqe_q); + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + odp_packet_free(q->tqe_m); + else + ofp_sbappendstream_locked(&so->so_rcv, q->tqe_m); + if (q != &tqs) { + uma_zfree(V_tcp_reass_zone, q); + } + tp->t_segqlen--; + q = nq; + } while (q && q->tqe_th->th_seq == tp->rcv_nxt); + + ND6_HINT(tp); + sorwakeup_locked(so); + return (flags); +} diff --git a/src/ofp_tcp_sack.c b/src/ofp_tcp_sack.c new file mode 100644 index 00000000..31458fc0 --- /dev/null +++ b/src/ofp_tcp_sack.c @@ -0,0 +1,677 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95 + */ + +/*- + * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995 + * + * NRL grants permission for redistribution and use in source and binary + * forms, with or without modification, of the software and documentation + * created at NRL provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgements: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * This product includes software developed at the Information + * Technology Division, US Naval Research Laboratory. + * 4. Neither the name of the NRL nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation + * are those of the authors and should not be interpreted as representing + * official policies, either expressed or implied, of the US Naval + * Research Laboratory (NRL). + */ + +#include + +#include "ofpi_util.h" +#include "ofpi_in.h" +#include "ofpi_ip_var.h" +#include "ofpi_ip.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" +#include "ofpi_tcp_var.h" +#include "ofpi_socket.h" +#include "ofpi_icmp6.h" +#include "ofpi_ethernet.h" +#include "ofpi_if_arp.h" +#include "ofpi_icmp.h" +#include "ofpi_udp.h" +#include "ofpi_tcp_offload.h" +#include "ofpi_in_pcb.h" +#include "ofpi_in6.h" +#include "ofpi_portconf.h" +#include "ofpi_sockbuf.h" +#include "ofpi_domain.h" +#include "ofpi_sockopt.h" +#include "ofpi_udp_var.h" +#include "ofpi_sockstate.h" +#include "ofpi_ip6.h" +#include "ofpi_systm.h" +#include "ofpi_callout.h" +#include "ofpi_tcp_fsm.h" +#include "ofpi_route.h" +#include "ofpi_tcp_syncache.h" +#include "ofpi_queue.h" +#include "ofpi_if_vlan.h" +#include "ofpi_timer.h" +#include "ofpi_tcp.h" +#include "ofpi_if_gre.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_seq.h" +#include "ofpi_protosw.h" + +#define SYSCTL_VNET_INT OFP_SYSCTL_INT + +VNET_DECLARE(uma_zone_t, ofp_sack_hole_zone); +#define V_sack_hole_zone VNET(ofp_sack_hole_zone) + +OFP_SYSCTL_NODE(_net_inet_tcp, OFP_OID_AUTO, sack, OFP_CTLFLAG_RW, 0, "TCP SACK"); +VNET_DEFINE(int, ofp_tcp_do_sack) = 1; +#define V_tcp_do_sack VNET(ofp_tcp_do_sack) +SYSCTL_VNET_INT(_net_inet_tcp_sack, OFP_OID_AUTO, enable, OFP_CTLFLAG_RW, + &VNET_NAME(ofp_tcp_do_sack), 0, "Enable/Disable TCP SACK support"); + +VNET_DEFINE(int, ofp_tcp_sack_maxholes) = 128; +#define V_tcp_sack_maxholes VNET(ofp_tcp_sack_maxholes) +SYSCTL_VNET_INT(_net_inet_tcp_sack, OFP_OID_AUTO, maxholes, OFP_CTLFLAG_RW, + &VNET_NAME(ofp_tcp_sack_maxholes), 0, + "Maximum number of TCP SACK holes allowed per connection"); + +VNET_DEFINE(int, ofp_tcp_sack_globalmaxholes) = 65536; +#define V_tcp_sack_globalmaxholes VNET(ofp_tcp_sack_globalmaxholes) +SYSCTL_VNET_INT(_net_inet_tcp_sack, OFP_OID_AUTO, globalmaxholes, OFP_CTLFLAG_RW, + &VNET_NAME(ofp_tcp_sack_globalmaxholes), 0, + "Global maximum number of TCP SACK holes"); + +VNET_DEFINE(int, ofp_tcp_sack_globalholes) = 0; +#define V_tcp_sack_globalholes VNET(ofp_tcp_sack_globalholes) +SYSCTL_VNET_INT(_net_inet_tcp_sack, OFP_OID_AUTO, globalholes, OFP_CTLFLAG_RD, + &VNET_NAME(ofp_tcp_sack_globalholes), 0, + "Global number of TCP SACK holes currently allocated"); + +/* + * This function is called upon receipt of new valid data (while not in + * header prediction mode), and it updates the ordered list of sacks. + */ +void +ofp_tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end) +{ + /* + * First reported block MUST be the most recent one. Subsequent + * blocks SHOULD be in the order in which they arrived at the + * receiver. These two conditions make the implementation fully + * compliant with RFC 2018. + */ + struct sackblk head_blk, saved_blks[OFP_MAX_SACK_BLKS]; + int num_head, num_saved, i; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* Check arguments. */ + KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end")); + + /* SACK block for the received segment. */ + head_blk.start = rcv_start; + head_blk.end = rcv_end; + + /* + * Merge updated SACK blocks into head_blk, and save unchanged SACK + * blocks into saved_blks[]. num_saved will have the number of the + * saved SACK blocks. + */ + num_saved = 0; + for (i = 0; i < tp->rcv_numsacks; i++) { + tcp_seq start = tp->sackblks[i].start; + tcp_seq end = tp->sackblks[i].end; + if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) { + /* + * Discard this SACK block. + */ + } else if (SEQ_LEQ(head_blk.start, end) && + SEQ_GEQ(head_blk.end, start)) { + /* + * Merge this SACK block into head_blk. This SACK + * block itself will be discarded. + */ + if (SEQ_GT(head_blk.start, start)) + head_blk.start = start; + if (SEQ_LT(head_blk.end, end)) + head_blk.end = end; + } else { + /* + * Save this SACK block. + */ + saved_blks[num_saved].start = start; + saved_blks[num_saved].end = end; + num_saved++; + } + } + + /* + * Update SACK list in tp->sackblks[]. + */ + num_head = 0; + if (SEQ_GT(head_blk.start, tp->rcv_nxt)) { + /* + * The received data segment is an out-of-order segment. Put + * head_blk at the top of SACK list. + */ + tp->sackblks[0] = head_blk; + num_head = 1; + /* + * If the number of saved SACK blocks exceeds its limit, + * discard the last SACK block. + */ + if (num_saved >= OFP_MAX_SACK_BLKS) + num_saved--; + } + if (num_saved > 0) { + /* + * Copy the saved SACK blocks back. + */ + bcopy(saved_blks, &tp->sackblks[num_head], + sizeof(struct sackblk) * num_saved); + } + + /* Save the number of SACK blocks. */ + tp->rcv_numsacks = num_head + num_saved; +} + +/* + * Delete all receiver-side SACK information. + */ +void +ofp_tcp_clean_sackreport(struct tcpcb *tp) +{ + int i; + + INP_WLOCK_ASSERT(tp->t_inpcb); + tp->rcv_numsacks = 0; + for (i = 0; i < OFP_MAX_SACK_BLKS; i++) + tp->sackblks[i].start = tp->sackblks[i].end=0; +} + +/* + * Allocate struct sackhole. + */ +static struct sackhole * +tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end) +{ + struct sackhole *hole; + + if (tp->snd_numholes >= V_tcp_sack_maxholes || + V_tcp_sack_globalholes >= V_tcp_sack_globalmaxholes) { + TCPSTAT_INC(tcps_sack_sboverflow); + return NULL; + } + + hole = (struct sackhole *)uma_zalloc(V_sack_hole_zone, M_NOWAIT); + if (hole == NULL) + return NULL; + + hole->start = start; + hole->end = end; + hole->rxmit = start; + + tp->snd_numholes++; + odp_atomic_inc_u32((odp_atomic_u32_t *)&V_tcp_sack_globalholes); + + return hole; +} + +/* + * Free struct sackhole. + */ +static void +tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole) +{ + + uma_zfree(V_sack_hole_zone, hole); + + tp->snd_numholes--; + odp_atomic_dec_u32((odp_atomic_u32_t *)&V_tcp_sack_globalholes); + + KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes >= 0")); + KASSERT(V_tcp_sack_globalholes >= 0, ("ofp_tcp_sack_globalholes >= 0")); +} + +/* + * Insert new SACK hole into scoreboard. + */ +static struct sackhole * +tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end, + struct sackhole *after) +{ + struct sackhole *hole; + + /* Allocate a new SACK hole. */ + hole = tcp_sackhole_alloc(tp, start, end); + if (hole == NULL) + return NULL; + + /* Insert the new SACK hole into scoreboard. */ + if (after != NULL) + OFP_TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink); + else + OFP_TAILQ_INSERT_TAIL(&tp->snd_holes, hole, scblink); + + /* Update SACK hint. */ + if (tp->sackhint.nexthole == NULL) + tp->sackhint.nexthole = hole; + + return hole; +} + +/* + * Remove SACK hole from scoreboard. + */ +static void +tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) +{ + + /* Update SACK hint. */ + if (tp->sackhint.nexthole == hole) + tp->sackhint.nexthole = OFP_TAILQ_NEXT(hole, scblink); + + /* Remove this SACK hole. */ + OFP_TAILQ_REMOVE(&tp->snd_holes, hole, scblink); + + /* Free this SACK hole. */ + tcp_sackhole_free(tp, hole); +} + +/* + * Process cumulative ACK and the TCP SACK option to update the scoreboard. + * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of + * the sequence space). + */ +void +ofp_tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) +{ + struct sackhole *cur, *temp; + struct sackblk sack, sack_blocks[OFP_TCP_MAX_SACK + 1], *sblkp; + int i, j, num_sack_blks; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + num_sack_blks = 0; + /* + * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, + * treat [SND.UNA, SEG.ACK) as if it is a SACK block. + */ + if (SEQ_LT(tp->snd_una, th_ack) && !OFP_TAILQ_EMPTY(&tp->snd_holes)) { + sack_blocks[num_sack_blks].start = tp->snd_una; + sack_blocks[num_sack_blks++].end = th_ack; + } + /* + * Append received valid SACK blocks to sack_blocks[], but only if we + * received new blocks from the other side. + */ + if (to->to_flags & TOF_SACK) { + for (i = 0; i < to->to_nsacks; i++) { + bcopy((to->to_sacks + i * OFP_TCPOLEN_SACK), + &sack, sizeof(sack)); + sack.start = odp_be_to_cpu_32(sack.start); + sack.end = odp_be_to_cpu_32(sack.end); + if (SEQ_GT(sack.end, sack.start) && + SEQ_GT(sack.start, tp->snd_una) && + SEQ_GT(sack.start, th_ack) && + SEQ_LT(sack.start, tp->snd_max) && + SEQ_GT(sack.end, tp->snd_una) && + SEQ_LEQ(sack.end, tp->snd_max)) + sack_blocks[num_sack_blks++] = sack; + } + } + /* + * Return if SND.UNA is not advanced and no valid SACK block is + * received. + */ + if (num_sack_blks == 0) + return; + + /* + * Sort the SACK blocks so we can update the scoreboard with just one + * pass. The overhead of sorting upto 4+1 elements is less than + * making upto 4+1 passes over the scoreboard. + */ + for (i = 0; i < num_sack_blks; i++) { + for (j = i + 1; j < num_sack_blks; j++) { + if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { + sack = sack_blocks[i]; + sack_blocks[i] = sack_blocks[j]; + sack_blocks[j] = sack; + } + } + } + if (OFP_TAILQ_EMPTY(&tp->snd_holes)) + /* + * Empty scoreboard. Need to initialize snd_fack (it may be + * uninitialized or have a bogus value). Scoreboard holes + * (from the sack blocks received) are created later below + * (in the logic that adds holes to the tail of the + * scoreboard). + */ + tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); + /* + * In the while-loop below, incoming SACK blocks (sack_blocks[]) and + * SACK holes (snd_holes) are traversed from their tails with just + * one pass in order to reduce the number of compares especially when + * the bandwidth-delay product is large. + * + * Note: Typically, in the first RTT of SACK recovery, the highest + * three or four SACK blocks with the same ack number are received. + * In the second RTT, if retransmitted data segments are not lost, + * the highest three or four SACK blocks with ack number advancing + * are received. + */ + sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ + tp->sackhint.last_sack_ack = sblkp->end; + if (SEQ_LT(tp->snd_fack, sblkp->start)) { + /* + * The highest SACK block is beyond fack. Append new SACK + * hole at the tail. If the second or later highest SACK + * blocks are also beyond the current fack, they will be + * inserted by way of hole splitting in the while-loop below. + */ + temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); + if (temp != NULL) { + tp->snd_fack = sblkp->end; + /* Go to the previous sack block. */ + sblkp--; + } else { + /* + * We failed to add a new hole based on the current + * sack block. Skip over all the sack blocks that + * fall completely to the right of snd_fack and + * proceed to trim the scoreboard based on the + * remaining sack blocks. This also trims the + * scoreboard for th_ack (which is sack_blocks[0]). + */ + while (sblkp >= sack_blocks && + SEQ_LT(tp->snd_fack, sblkp->start)) + sblkp--; + if (sblkp >= sack_blocks && + SEQ_LT(tp->snd_fack, sblkp->end)) + tp->snd_fack = sblkp->end; + } + } else if (SEQ_LT(tp->snd_fack, sblkp->end)) + /* fack is advanced. */ + tp->snd_fack = sblkp->end; + /* We must have at least one SACK hole in scoreboard. */ + KASSERT(!OFP_TAILQ_EMPTY(&tp->snd_holes), + ("SACK scoreboard must not be empty")); + cur = OFP_TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ + /* + * Since the incoming sack blocks are sorted, we can process them + * making one sweep of the scoreboard. + */ + while (sblkp >= sack_blocks && cur != NULL) { + if (SEQ_GEQ(sblkp->start, cur->end)) { + /* + * SACKs data beyond the current hole. Go to the + * previous sack block. + */ + sblkp--; + continue; + } + if (SEQ_LEQ(sblkp->end, cur->start)) { + /* + * SACKs data before the current hole. Go to the + * previous hole. + */ + cur = OFP_TAILQ_PREV(cur, sackhole_head, scblink); + continue; + } + tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); + KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, + ("sackhint bytes rtx >= 0")); + if (SEQ_LEQ(sblkp->start, cur->start)) { + /* Data acks at least the beginning of hole. */ + if (SEQ_GEQ(sblkp->end, cur->end)) { + /* Acks entire hole, so delete hole. */ + temp = cur; + cur = OFP_TAILQ_PREV(cur, sackhole_head, scblink); + tcp_sackhole_remove(tp, temp); + /* + * The sack block may ack all or part of the + * next hole too, so continue onto the next + * hole. + */ + continue; + } else { + /* Move start of hole forward. */ + cur->start = sblkp->end; + cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); + } + } else { + /* Data acks at least the end of hole. */ + if (SEQ_GEQ(sblkp->end, cur->end)) { + /* Move end of hole backward. */ + cur->end = sblkp->start; + cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); + } else { + /* + * ACKs some data in middle of a hole; need + * to split current hole + */ + temp = tcp_sackhole_insert(tp, sblkp->end, + cur->end, cur); + if (temp != NULL) { + if (SEQ_GT(cur->rxmit, temp->rxmit)) { + temp->rxmit = cur->rxmit; + tp->sackhint.sack_bytes_rexmit + += (temp->rxmit + - temp->start); + } + cur->end = sblkp->start; + cur->rxmit = SEQ_MIN(cur->rxmit, + cur->end); + } + } + } + tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); + /* + * Testing sblkp->start against cur->start tells us whether + * we're done with the sack block or the sack hole. + * Accordingly, we advance one or the other. + */ + if (SEQ_LEQ(sblkp->start, cur->start)) + cur = OFP_TAILQ_PREV(cur, sackhole_head, scblink); + else + sblkp--; + } +} + +/* + * Free all SACK holes to clear the scoreboard. + */ +void +ofp_tcp_free_sackholes(struct tcpcb *tp) +{ + struct sackhole *q; + + INP_WLOCK_ASSERT(tp->t_inpcb); + while ((q = OFP_TAILQ_FIRST(&tp->snd_holes)) != NULL) + tcp_sackhole_remove(tp, q); + tp->sackhint.sack_bytes_rexmit = 0; + + KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes == 0")); + KASSERT(tp->sackhint.nexthole == NULL, + ("tp->sackhint.nexthole == NULL")); +} + +/* + * Partial ack handling within a sack recovery episode. Keeping this very + * simple for now. When a partial ack is received, force snd_cwnd to a value + * that will allow the sender to transmit no more than 2 segments. If + * necessary, a better scheme can be adopted at a later point, but for now, + * the goal is to prevent the sender from bursting a large amount of data in + * the midst of sack recovery. + */ +void +ofp_tcp_sack_partialack(struct tcpcb *tp, struct ofp_tcphdr *th) +{ + int num_segs = 1; + + INP_WLOCK_ASSERT(tp->t_inpcb); + ofp_tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + /* Send one or 2 segments based on how much new data was acked. */ + if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2) + num_segs = 2; + tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + + (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg); + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + t_flags_or(tp->t_flags, TF_ACKNOW); + (void) ofp_tcp_output(tp); +} + +#if 0 +/* + * Debug version of ofp_tcp_sack_output() that walks the scoreboard. Used for + * now to sanity check the hint. + */ +static struct sackhole * +tcp_sack_output_debug(struct tcpcb *tp, int *sack_bytes_rexmt) +{ + struct sackhole *p; + + INP_WLOCK_ASSERT(tp->t_inpcb); + *sack_bytes_rexmt = 0; + OFP_TAILQ_FOREACH(p, &tp->snd_holes, scblink) { + if (SEQ_LT(p->rxmit, p->end)) { + if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ + continue; + } + *sack_bytes_rexmt += (p->rxmit - p->start); + break; + } + *sack_bytes_rexmt += (p->rxmit - p->start); + } + return (p); +} +#endif + +/* + * Returns the next hole to retransmit and the number of retransmitted bytes + * from the scoreboard. We store both the next hole and the number of + * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK + * reception). This avoids scoreboard traversals completely. + * + * The loop here will traverse *at most* one link. Here's the argument. For + * the loop to traverse more than 1 link before finding the next hole to + * retransmit, we would need to have at least 1 node following the current + * hint with (rxmit == end). But, for all holes following the current hint, + * (start == rxmit), since we have not yet retransmitted from them. + * Therefore, in order to traverse more 1 link in the loop below, we need to + * have at least one node following the current hint with (start == rxmit == + * end). But that can't happen, (start == end) means that all the data in + * that hole has been sacked, in which case, the hole would have been removed + * from the scoreboard. + */ +struct sackhole * +ofp_tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) +{ + struct sackhole *hole = NULL; + + INP_WLOCK_ASSERT(tp->t_inpcb); + *sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit; + hole = tp->sackhint.nexthole; + if (hole == NULL || SEQ_LT(hole->rxmit, hole->end)) + goto out; + while ((hole = OFP_TAILQ_NEXT(hole, scblink)) != NULL) { + if (SEQ_LT(hole->rxmit, hole->end)) { + tp->sackhint.nexthole = hole; + break; + } + } +out: + return (hole); +} + +/* + * After a timeout, the SACK list may be rebuilt. This SACK information + * should be used to avoid retransmitting SACKed data. This function + * traverses the SACK list to see if snd_nxt should be moved forward. + */ +void +ofp_tcp_sack_adjust(struct tcpcb *tp) +{ + struct sackhole *p, *cur = OFP_TAILQ_FIRST(&tp->snd_holes); + + INP_WLOCK_ASSERT(tp->t_inpcb); + if (cur == NULL) + return; /* No holes */ + if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) + return; /* We're already beyond any SACKed blocks */ + /*- + * Two cases for which we want to advance snd_nxt: + * i) snd_nxt lies between end of one hole and beginning of another + * ii) snd_nxt lies between end of last hole and snd_fack + */ + while ((p = OFP_TAILQ_NEXT(cur, scblink)) != NULL) { + if (SEQ_LT(tp->snd_nxt, cur->end)) + return; + if (SEQ_GEQ(tp->snd_nxt, p->start)) + cur = p; + else { + tp->snd_nxt = p->start; + return; + } + } + if (SEQ_LT(tp->snd_nxt, cur->end)) + return; + tp->snd_nxt = tp->snd_fack; +} diff --git a/src/ofp_tcp_subr.c b/src/ofp_tcp_subr.c new file mode 100644 index 00000000..e48dde40 --- /dev/null +++ b/src/ofp_tcp_subr.c @@ -0,0 +1,2343 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 + */ + +#include + +#include "ofpi_pkt_processing.h" +#include "ofpi_errno.h" + +#include "odp/spinlock.h" +#include "odp/time.h" + +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" +#include "ofpi_sockstate.h" +#include "ofpi_systm.h" +#include "ofpi_protosw.h" +#include "ofpi_in.h" +#include "ofpi_ip.h" +#include "ofpi_icmp.h" +#include "ofpi_tcp.h" +#include "ofpi_in_pcb.h" +#ifdef INET6 +#include "ofpi_ip6.h" +#include "ofpi_ip6_var.h" +#endif + +#include "ofpi_tcp_fsm.h" +#include "ofpi_tcp_seq.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_var.h" +#ifdef INET6 +#include "ofpi_tcp6_var.h" +#endif +#include "ofpi_tcp_syncache.h" +#include "ofpi_md5.h" + +//#include "ofp_tcpip.h" +#ifdef TCPDEBUG +#include +#endif + + +#define SYSCTL_VNET_INT OFP_SYSCTL_INT + +unsigned int ofp_max_protohdr = 0; +int ofp_max_linkhdr = 64; + +VNET_DEFINE(int, ofp_tcp_mssdflt) = OFP_TCP_MSS; +#ifdef INET6 +VNET_DEFINE(int, ofp_tcp_v6mssdflt) = OFP_TCP6_MSS; +#endif + +#if 0 +static int +sysctl_net_inet_tcp_mss_check(OFP_SYSCTL_HANDLER_ARGS) +{ + int error, new; + + new = V_tcp_mssdflt; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (new < OFP_TCP_MINMSS) + error = OFP_EINVAL; + else + V_tcp_mssdflt = new; + } + return (error); +} +#endif + +#ifdef _INET6 +static int +sysctl_net_inet_tcp_mss_v6_check(OFP_SYSCTL_HANDLER_ARGS) +{ + int error, new; + + new = V_tcp_v6mssdflt; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (new < OFP_TCP_MINMSS) + error = OFP_EINVAL; + else + V_tcp_v6mssdflt = new; + } + return (error); +} + +SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, + OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0, + &sysctl_net_inet_tcp_mss_v6_check, "I", + "Default TCP Maximum Segment Size for IPv6"); +#endif /* INET6 */ + +/* + * Minimum MSS we accept and use. This prevents DoS attacks where + * we are forced to a ridiculous low MSS like 20 and send hundreds + * of packets instead of one. The effect scales with the available + * bandwidth and quickly saturates the CPU and network interface + * with packet generation and sending. Set to zero to disable MINMSS + * checking. This setting prevents us from sending too small packets. + */ +VNET_DEFINE(int, ofp_tcp_minmss) = OFP_TCP_MINMSS; +VNET_DEFINE(int, ofp_tcp_do_rfc1323) = 1; +SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, OFP_CTLFLAG_RW, + &VNET_NAME(ofp_tcp_do_rfc1323), 0, + "Enable rfc1323 (high performance TCP) extensions"); + +static int tcp_log_debug = 0; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, log_debug, OFP_CTLFLAG_RW, + &tcp_log_debug, 0, "Log errors caused by incoming TCP segments"); + +static int tcp_tcbhashsize = 0; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, tcbhashsize, OFP_CTLFLAG_RDTUN, + &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); + +static int do_tcpdrain = 1; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, do_tcpdrain, OFP_CTLFLAG_RW, &do_tcpdrain, 0, + "Enable tcp_drain routine for extra help when low on mbufs"); + +static VNET_DEFINE(int, icmp_may_rst) = 1; +#define V_icmp_may_rst VNET(icmp_may_rst) +SYSCTL_VNET_INT(_net_inet_tcp, OFP_OID_AUTO, icmp_may_rst, OFP_CTLFLAG_RW, + &VNET_NAME(icmp_may_rst), 0, + "Certain ICMP unreachable messages may abort connections in SYN_SENT"); + +static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0; +#define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) +SYSCTL_VNET_INT(_net_inet_tcp, OFP_OID_AUTO, isn_reseed_interval, OFP_CTLFLAG_RW, + &VNET_NAME(tcp_isn_reseed_interval), 0, + "Seconds between reseeding of ISN secret"); + +static int tcp_soreceive_stream = 0; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, soreceive_stream, OFP_CTLFLAG_RDTUN, + &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets"); + +VNET_DEFINE(uma_zone_t, ofp_sack_hole_zone); +#define V_sack_hole_zone VNET(ofp_sack_hole_zone) + +VNET_DEFINE(struct hhook_head *, ofp_tcp_hhh[HHOOK_TCP_LAST+1]); + +static char * tcp_log_addr(struct in_conninfo *inc, struct ofp_tcphdr *th, + void *ip4hdr, const void *ip6hdr); + +/* + * Target size of TCP PCB hash tables. Must be a power of two. + * + * Note that this can be overridden by the kernel environment + * variable net.inet.tcp.tcbhashsize + */ +#ifndef TCBHASHSIZE +#define TCBHASHSIZE 512 +#endif + + +/* + * Wrapper around transport structs that contain same-named congestion + * control variables. Allows algos to be shared amongst multiple CC aware + * transprots. + */ +struct cc_var { + void *cc_data; /* Per-connection private CC algorithm data. */ + int bytes_this_ack; /* # bytes acked by the current ACK. */ + tcp_seq curack; /* Most recent ACK. */ + uint32_t flags; /* Flags for cc_var (see below) */ + int type; /* Indicates which ptr is valid in ccvc. */ + union ccv_container { + struct tcpcb *tcp; + struct sctp_nets *sctp; + } ccvc; +}; + +/* + * Lock key: + * (c) container lock (e.g. jail's pr_mtx) and/or osd_object_lock + * (l) osd_list_lock + */ +struct osd { + uint32_t osd_nslots; /* (c) */ + void **osd_slots; /* (c) */ + OFP_LIST_ENTRY(osd) osd_next; /* (l) */ +}; + +/* + * XXX + * Callouts should be moved into struct tcp directly. They are currently + * separate because the tcpcb structure is exported to userland for sysctl + * parsing purposes, which do not know about callouts. + */ +struct tcpcb_mem { + struct tcpcb ofp_tcb; + struct tcp_timer tt; + struct cc_var ccv; + struct osd osd; +}; + +static VNET_DEFINE(uma_zone_t, tcpcb_zone); +#define V_tcpcb_zone VNET(tcpcb_zone) + +static odp_spinlock_t isn_mtx; + +#if 0 +#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF) +#define ISN_LOCK() mtx_lock(&isn_mtx) +#define ISN_UNLOCK() mtx_unlock(&isn_mtx) +#else +#define ISN_LOCK_INIT() do { printf("%s:%d: isn lock init\n",__FILE__,__LINE__); \ + odp_spinlock_init(&isn_mtx); } while(0) +#define ISN_LOCK() do { /*printf("%s:%d: isn lock\n",__FILE__,__LINE__);*/ \ + odp_spinlock_lock(&isn_mtx); } while(0) +#define ISN_UNLOCK() do { /*printf("%s:%d: isn unlock\n",__FILE__,__LINE__);*/ \ + odp_spinlock_unlock(&isn_mtx); } while(0) +#endif + + +static int +tcp_inpcb_init(void *mem, int size, int flags) +{ + struct inpcb *inp = mem; + (void)size; + (void)flags; + + INP_LOCK_INIT(inp, "inp", "tcpinp"); + return (0); +} + +void +ofp_tcp_tcbinfo_hashstats(unsigned int *min, unsigned int *avg, unsigned int *max) +{ + ofp_in_pcbinfo_hashstats(&V_tcbinfo, min, avg, max); +} + +void +ofp_tcp_init(void) +{ + int hashsize; + +#if 0 + if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, + &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register helper hook\n", __func__); + if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, + &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register helper hook\n", __func__); +#endif + hashsize = TCBHASHSIZE; +#if 0 /* We trust size is power of 2. */ + TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); + if (!powerof2(hashsize)) { + printf("WARNING: TCB hash size not a power of 2\n"); + hashsize = 512; /* safe default */ + } +#endif + + ofp_in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, + "tcp_inpcb", tcp_inpcb_init, NULL, 0, + IPI_HASHFIELDS_4TUPLE); + + /* + * These have to be type stable for the benefit of the timers. + */ + V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(V_tcpcb_zone, maxsockets); + + ofp_tcp_tw_init(); + ofp_syncache_init(); + /* tcp_hc_init(); */ + ofp_tcp_reass_init(); + + //TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); + V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + + /* XXX virtualize those bellow? */ + ofp_tcp_delacktime = TCPTV_DELACK; + ofp_tcp_keepinit = TCPTV_KEEP_INIT; + ofp_tcp_keepidle = TCPTV_KEEP_IDLE; + ofp_tcp_keepintvl = TCPTV_KEEPINTVL; + ofp_tcp_maxpersistidle = TCPTV_KEEP_IDLE; + ofp_tcp_msl = TCPTV_MSL; + ofp_tcp_rexmit_min = TCPTV_MIN; + if (ofp_tcp_rexmit_min < 1) + ofp_tcp_rexmit_min = 1; + ofp_tcp_rexmit_slop = TCPTV_CPU_VAR; +#ifdef PASSIVE_INET + tcp_reassdl = TCPTV_REASSDL; +#endif + ofp_tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; + tcp_tcbhashsize = hashsize; + +#ifdef INET6 +#define TCP_MINPROTOHDR (sizeof(struct ofp_ip6_hdr) + sizeof(struct ofp_tcphdr)) +#else /* INET6 */ +#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) +#endif /* INET6 */ + if (ofp_max_protohdr < TCP_MINPROTOHDR) + ofp_max_protohdr = TCP_MINPROTOHDR; + if (ofp_max_linkhdr + TCP_MINPROTOHDR > SHM_PKT_POOL_BUF_SIZE) + panic("ofp_tcp_init"); +#undef TCP_MINPROTOHDR + + ISN_LOCK_INIT(); +#if 0 + EVENTHANDLER_REGISTER(shutdown_pre_sync, ofp_tcp_fini, NULL, + SHUTDOWN_PRI_DEFAULT); + EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, + EVENTHANDLER_PRI_ANY); +#endif + + ofp_timer_start(500000, ofp_tcp_slowtimo, NULL, 0); +} + +void +ofp_tcp_fini(void *xtp) +{ + (void)xtp; +} + +/* + * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. + * tcp_template used to store this data in mbufs, but we now recopy it out + * of the tcpcb each time to conserve mbufs. + */ +void +ofp_tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) +{ + struct ofp_tcphdr *th = (struct ofp_tcphdr *)tcp_ptr; + + INP_WLOCK_ASSERT(inp); + +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) { + struct ofp_ip6_hdr *ip6; + + ip6 = (struct ofp_ip6_hdr *)ip_ptr; + ip6->ofp_ip6_flow = (ip6->ofp_ip6_flow & ~OFP_IPV6_FLOWINFO_MASK) | + (inp->inp_flow & OFP_IPV6_FLOWINFO_MASK); + ip6->ofp_ip6_vfc = (ip6->ofp_ip6_vfc & ~OFP_IPV6_VERSION_MASK) | + (OFP_IPV6_VERSION & OFP_IPV6_VERSION_MASK); + ip6->ofp_ip6_nxt = OFP_IPPROTO_TCP; + ip6->ofp_ip6_plen = odp_cpu_to_be_16(sizeof(struct ofp_tcphdr)); + ip6->ip6_src = inp->in6p_laddr; + ip6->ip6_dst = inp->in6p_faddr; + } + else +#endif + { + struct ofp_ip *ip; + + ip = (struct ofp_ip *)ip_ptr; + ip->ip_v = OFP_IPVERSION; + ip->ip_hl = 5; + ip->ip_tos = inp->inp_ip_tos; + ip->ip_len = 0; + ip->ip_id = 0; + ip->ip_off = 0; + ip->ip_ttl = inp->inp_ip_ttl; + ip->ip_sum = 0; + ip->ip_p = OFP_IPPROTO_TCP; + ip->ip_src = inp->inp_laddr; + ip->ip_dst = inp->inp_faddr; + } + + th->th_sport = inp->inp_lport; + th->th_dport = inp->inp_fport; + th->th_seq = 0; + th->th_ack = 0; + th->th_x2 = 0; + th->th_off = 5; + th->th_flags = 0; + th->th_win = 0; + th->th_urp = 0; + th->th_sum = 0; /* in_pseudo() is called later for ipv4 */ +} + +/* + * Create template to be used to send tcp packets on a connection. + * Allocates an mbuf and fills in a skeletal tcp/ip header. The only + * use for this function is in keepalives, which use ofp_tcp_respond. + */ +struct tcptemp * +ofp_tcpip_maketemplate(struct inpcb *inp) +{ + struct tcptemp *t; + + t = malloc(sizeof(*t)); + if (t == NULL) + return (NULL); + ofp_tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); + return (t); +} + +/* + * Send a single message to the TCP at address specified by + * the given TCP/IP header. If m == NULL, then we make a copy + * of the tcpiphdr at ti and send directly to the addressed host. + * This is used to force keep alive messages out using the TCP + * template for a connection. If flags are given then we send + * a message back to the TCP which originated the * segment ti, + * and discard the mbuf containing it and any other attached mbufs. + * + * In any case the ack and sequence number of the transmitted + * segment are as specified by the parameters. + * + * NOTE: If m != NULL, then ti must point to *inside* the mbuf. + */ +void +ofp_tcp_respond(struct tcpcb *tp, void *ipgen, struct ofp_tcphdr *th, odp_packet_t m, + tcp_seq ack, tcp_seq seq, int flags) +{ + int tlen; + int win = 0; + struct ofp_ip *ip; + struct ofp_tcphdr *nth; +#ifdef INET6 + struct ofp_ip6_hdr *ip6; + int isipv6; +#endif /* INET6 */ + int ipflags = 0; + struct inpcb *inp; + (void)ipflags; + + KASSERT(tp != NULL || m != ODP_PACKET_INVALID, ("ofp_tcp_respond: tp and m both NULL")); + +#ifdef INET6 + isipv6 = ((struct ofp_ip *)ipgen)->ip_v == (OFP_IPV6_VERSION >> 4); + ip6 = ipgen; +#endif /* INET6 */ + ip = ipgen; + + if (tp != NULL) { + inp = tp->t_inpcb; + KASSERT(inp != NULL, ("tcp control block w/o inpcb")); + INP_WLOCK_ASSERT(inp); + } else + inp = NULL; + + if (tp != NULL) { + if (!(flags & OFP_TH_RST)) { + win = sbspace(&inp->inp_socket->so_rcv); + + if (win > (long)OFP_TCP_MAXWIN << tp->rcv_scale) + win = (long)OFP_TCP_MAXWIN << tp->rcv_scale; + } + } + + int valid_m = m != ODP_PACKET_INVALID; + + if (!valid_m) { +#ifdef INET6 + if (isipv6) { + m = ofp_packet_alloc(sizeof(struct ofp_ip6_hdr) + + sizeof(struct ofp_tcphdr)); + + if (m == ODP_PACKET_INVALID) + return; + + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, sizeof(struct ofp_ip6_hdr)); + } else +#else + { + m = ofp_packet_alloc(sizeof(struct ofp_ip) + + sizeof(struct ofp_tcphdr)); + + if (m == ODP_PACKET_INVALID) + return; + + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, sizeof(struct ofp_ip)); + } +#endif + flags = OFP_TH_ACK; + } + + tlen = 0; + +#ifdef INET6 + if (isipv6) { + bcopy((char *)ip6, (char *)odp_packet_data(m), + sizeof(struct ofp_ip6_hdr)); + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + nth = (struct ofp_tcphdr *)(ip6 + 1); + } else +#endif /* INET6 */ + { + bcopy((char *)ip, (char *)odp_packet_data(m), + sizeof(struct ofp_ip)); + ip = (struct ofp_ip *)odp_packet_data(m); + nth = (struct ofp_tcphdr *)(ip + 1); + } + + bcopy((char *)th, (char *)nth, sizeof(struct ofp_tcphdr)); + + if (valid_m) { +#define xchg(a,b,type) { type t; t=a; a=b; b=t; } +#ifdef INET6 + if (isipv6) { + xchg(ip6->ip6_dst, ip6->ip6_src, struct ofp_in6_addr); + } else +#endif /* INET6 */ + { + xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); + xchg(nth->th_dport, nth->th_sport, uint16_t); + } + +#undef xchg + } /* valid_m */ + +#ifdef INET6 + if (isipv6) { + ip6->ofp_ip6_flow = 0; + ip6->ofp_ip6_vfc = OFP_IPV6_VERSION; + ip6->ofp_ip6_nxt = OFP_IPPROTO_TCP; + ip6->ofp_ip6_plen = odp_cpu_to_be_16(sizeof(struct ofp_tcphdr)); + tlen += sizeof (struct ofp_ip6_hdr) + sizeof (struct ofp_tcphdr); + } +#endif +#ifdef INET6 + else +#endif + { + tlen += sizeof (struct tcpiphdr); + ip->ip_len = tlen; + ip->ip_ttl = V_ip_defttl; + if (V_path_mtu_discovery) + ip->ip_off |= OFP_IP_DF; + } + + odp_packet_user_ptr_set(m, NULL); + nth->th_seq = odp_cpu_to_be_32(seq); + nth->th_ack = odp_cpu_to_be_32(ack); + nth->th_x2 = 0; + nth->th_off = sizeof (struct ofp_tcphdr) >> 2; + nth->th_flags = flags; + if (tp != NULL) + nth->th_win = odp_cpu_to_be_16((uint16_t) (win >> tp->rcv_scale)); + else + nth->th_win = odp_cpu_to_be_16((uint16_t)win); + nth->th_urp = 0; + + /* HJo FIX + odp_packet_csum_data(m) = offsetof(struct ofp_tcphdr, th_sum); + */ +#ifdef INET6 + if (isipv6) { + odp_packet_set_csum_flags(m, CSUM_TCP_IPV6); + nth->th_sum = 0; + nth->th_sum = ofp_ip6_cksum(m, + tlen - sizeof(struct ofp_ip6_hdr), OFP_IPPROTO_TCP, 0); + ip6->ofp_ip6_hlim = V_ip6_defhlim; /*in6_selecthlim(tp != NULL ? tp->t_inpcb : + NULL, NULL);*/ + } +#endif /* INET6 */ +#ifdef INET6 + else +#endif + { + /* HJo FIX + odp_packet_csum_flags(m) = CSUM_TCP; + nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + odp_cpu_to_be_16((uint16_t)(tlen - sizeof(struct ofp_ip) + ip->ip_p))); + */ + } +#ifdef TCPDEBUG + if (tp == NULL || (inp->inp_socket->so_options & OFP_SO_DEBUG)) + tcp_trace(TA_OUTPUT, 0, tp, (void *)odp_packet_data(m), th, 0); +#endif +#ifdef INET6 + if (isipv6) + (void) ofp_ip6_output(m, NULL); + else +#endif + { + ip->ip_len = odp_cpu_to_be_16(ip->ip_len); + ip->ip_off = odp_cpu_to_be_16(ip->ip_off); + nth->th_sum = 0; + /* nth->th_sum = ofp_in4_cksum(m); output calculates csum */ + (void) ofp_ip_output(m, NULL);// HJo , NULL, ipflags, NULL, inp); + } +} + +/* + * Create a new TCP control block, making an + * empty reassembly queue and hooking it to the argument + * protocol control block. The `inp' parameter must have + * come from the zone allocator set up in ofp_tcp_init(). + */ +struct tcpcb * +ofp_tcp_newtcpcb(struct inpcb *inp) +{ + struct tcpcb_mem *tm; + struct tcpcb *tp; +#ifdef INET6 + int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; +#endif /* INET6 */ + + (void)isipv6; + + tm = uma_zalloc(V_tcpcb_zone, 0); + if (tm == NULL) + return (NULL); + tp = &tm->ofp_tcb; + + /* Initialise cc_var struct for this tcpcb. */ + tp->ccv = &tm->ccv; + tp->ccv->type = OFP_IPPROTO_TCP; + tp->ccv->ccvc.tcp = tp; + +#if 0 /* HJo FIX */ + /* + * Use the current system default CC algorithm. + */ + CC_LIST_RLOCK(); + KASSERT(!OFP_STAILQ_EMPTY(&cc_list), ("cc_list is empty!")); + CC_ALGO(tp) = CC_DEFAULT(); + CC_LIST_RUNLOCK(); + + if (CC_ALGO(tp)->cb_init != NULL) + if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { + uma_zfree(V_tcpcb_zone, tm); + return (NULL); + } + + tp->osd = &tm->osd; + if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { + uma_zfree(V_tcpcb_zone, tm); + return (NULL); + } +#endif + + tp->t_timers = &tm->tt; + /* OFP_LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ + tp->t_maxseg = tp->t_maxopd = +#ifdef INET6 + isipv6 ? V_tcp_v6mssdflt : +#endif /* INET6 */ + V_tcp_mssdflt; + + /* Set up our timeouts. */ + callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE); + callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE); + callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE); + callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE); + callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE); + + if (V_tcp_do_rfc1323) + tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); + if (V_tcp_do_sack) + t_flags_or(tp->t_flags, TF_SACK_PERMIT); + OFP_TAILQ_INIT(&tp->snd_holes); + tp->t_inpcb = inp; /* XXX */ + /* + * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no + * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives + * reasonable initial retransmit time. + */ + tp->t_srtt = TCPTV_SRTTBASE; + tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; + tp->t_rttmin = ofp_tcp_rexmit_min; + tp->t_rxtcur = TCPTV_RTOBASE; + tp->snd_cwnd = OFP_TCP_MAXWIN << OFP_TCP_MAX_WINSHIFT; + tp->snd_ssthresh = OFP_TCP_MAXWIN << OFP_TCP_MAX_WINSHIFT; + tp->t_rcvtime = ofp_timer_ticks(0); + /* + * IPv4 TTL initialization is necessary for an IPv6 socket as well, + * because the socket may be bound to an IPv6 wildcard address, + * which may match an IPv4-mapped IPv6 address. + */ + inp->inp_ip_ttl = V_ip_defttl; + inp->inp_ppcb = tp; + return (tp); /* XXX */ +} + +#if 0 /* HJo */ +/* + * Switch the congestion control algorithm back to NewReno for any active + * control blocks using an algorithm which is about to go away. + * This ensures the CC framework can allow the unload to proceed without leaving + * any dangling pointers which would trigger a panic. + * Returning non-zero would inform the CC framework that something went wrong + * and it would be unsafe to allow the unload to proceed. However, there is no + * way for this to occur with this implementation so we always return zero. + */ +int +tcp_ccalgounload(struct cc_algo *unload_algo) +{ + struct cc_algo *tmpalgo; + struct inpcb *inp; + struct tcpcb *tp; + VNET_ITERATOR_DECL(vnet_iter); + + /* + * Check all active control blocks across all network stacks and change + * any that are using "unload_algo" back to NewReno. If "unload_algo" + * requires cleanup code to be run, call it. + */ + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + INP_INFO_RLOCK(&V_tcbinfo); + /* + * New connections already part way through being initialised + * with the CC algo we're removing will not race with this code + * because the INP_INFO_WLOCK is held during initialisation. We + * therefore don't enter the loop below until the connection + * list has stabilised. + */ + OFP_LIST_FOREACH(inp, &V_tcb, inp_list) { + INP_WLOCK(inp); + /* Important to skip tcptw structs. */ + if (!(inp->inp_flags & INP_TIMEWAIT) && + (tp = intotcpcb(inp)) != NULL) { + /* + * By holding INP_WLOCK here, we are assured + * that the connection is not currently + * executing inside the CC module's functions + * i.e. it is safe to make the switch back to + * NewReno. + */ + if (CC_ALGO(tp) == unload_algo) { + tmpalgo = CC_ALGO(tp); + /* NewReno does not require any init. */ + CC_ALGO(tp) = &newreno_cc_algo; + if (tmpalgo->cb_destroy != NULL) + tmpalgo->cb_destroy(tp->ccv); + } + } + INP_WUNLOCK(inp); + } + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK(); + + return (0); +} +#endif + +/* + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +struct tcpcb * +ofp_tcp_drop(struct tcpcb *tp, int err) +{ + struct socket *so = tp->t_inpcb->inp_socket; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(tp->t_inpcb); + + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_state = TCPS_CLOSED; + (void) ofp_tcp_output(tp); + TCPSTAT_INC(tcps_drops); + } else + TCPSTAT_INC(tcps_conndrops); + if (err == OFP_ETIMEDOUT && tp->t_softerror) + err = tp->t_softerror; + so->so_error = err; + return (ofp_tcp_close(tp)); +} + +void +ofp_tcp_discardcb(struct tcpcb *tp) +{ + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; +#ifdef INET6 + int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; +#endif /* INET6 */ + + INP_WLOCK_ASSERT(inp); + + /* + * Make sure that all of our timers are stopped before we delete the + * PCB. + * + * XXXRW: Really, we would like to use callout_drain() here in order + * to avoid races experienced in tcp_timer.c where a timer is already + * executing at this point. However, we can't, both because we're + * running in a context where we can't sleep, and also because we + * hold locks required by the timers. What we instead need to do is + * test to see if callout_drain() is required, and if so, defer some + * portion of the remainder of ofp_tcp_discardcb() to an asynchronous + * context that can callout_drain() and then continue. Some care + * will be required to ensure that no further processing takes place + * on the tcpcb, even though it hasn't been freed (a flag?). + */ + callout_stop(&tp->t_timers->tt_rexmt); + callout_stop(&tp->t_timers->tt_persist); + callout_stop(&tp->t_timers->tt_keep); + callout_stop(&tp->t_timers->tt_2msl); + callout_stop(&tp->t_timers->tt_delack); +#ifdef PASSIVE_INET + callout_stop(&tp->t_timers->tt_reassdl); +#endif + /* + * If we got enough samples through the srtt filter, + * save the rtt and rttvar in the routing entry. + * 'Enough' is arbitrarily defined as 4 rtt samples. + * 4 samples is enough for the srtt filter to converge + * to within enough % of the correct value; fewer samples + * and we could save a bogus rtt. The danger is not high + * as tcp quickly recovers from everything. + * XXX: Works very well but needs some more statistics! + */ + if (tp->t_rttupdated >= 4) { + struct hc_metrics_lite metrics; + uint64_t ssthresh; + + bzero(&metrics, sizeof(metrics)); + /* + * Update the ssthresh always when the conditions below + * are satisfied. This gives us better new start value + * for the congestion avoidance for new connections. + * ssthresh is only set if packet loss occured on a session. + * + * XXXRW: 'so' may be NULL here, and/or socket buffer may be + * being torn down. Ideally this code would not use 'so'. + */ + ssthresh = tp->snd_ssthresh; + if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { + /* + * convert the limit from user data bytes to + * packets then to packet data bytes. + */ + ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; + if (ssthresh < 2) + ssthresh = 2; + ssthresh *= (uint64_t)(tp->t_maxseg + +#ifdef INET6 + (isipv6 ? sizeof (struct ofp_ip6_hdr) + + sizeof (struct ofp_tcphdr) : +#endif + sizeof (struct tcpiphdr) +#ifdef INET6 + ) +#endif + ); + } else + ssthresh = 0; + metrics.rmx_ssthresh = ssthresh; + + metrics.rmx_rtt = tp->t_srtt; + metrics.rmx_rttvar = tp->t_rttvar; + metrics.rmx_cwnd = tp->snd_cwnd; + metrics.rmx_sendpipe = 0; + metrics.rmx_recvpipe = 0; + + tcp_hc_update(&inp->inp_inc, &metrics); + } + + /* free the reassembly queue, if any */ + ofp_tcp_reass_flush(tp); +#if 0 /* HJo */ + /* Disconnect offload device, if any. */ + tcp_offload_detach(tp); +#endif + ofp_tcp_free_sackholes(tp); + +#if 0 /* HJo */ + /* Allow the CC algorithm to clean up after itself. */ + if (CC_ALGO(tp)->cb_destroy != NULL) + CC_ALGO(tp)->cb_destroy(tp->ccv); + khelp_destroy_osd(tp->osd); + + CC_ALGO(tp) = NULL; +#endif + inp->inp_ppcb = NULL; + tp->t_inpcb = NULL; + + uma_zfree(V_tcpcb_zone, tp); +} + +/* + * Attempt to close a TCP control block, marking it as dropped, and freeing + * the socket if we hold the only reference. + */ +struct tcpcb * +ofp_tcp_close(struct tcpcb *tp) +{ + struct inpcb *inp = tp->t_inpcb; + struct socket *so; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); + + /* Notify any offload devices of listener close */ + ofp_in_pcbdrop(inp); + TCPSTAT_INC(tcps_closed); + KASSERT(inp->inp_socket != NULL, ("ofp_tcp_close: inp_socket NULL")); + so = inp->inp_socket; + + ofp_soisdisconnected(so); + if (inp->inp_flags & INP_SOCKREF) { + KASSERT(so->so_state & SS_PROTOREF, + ("ofp_tcp_close: !SS_PROTOREF")); + inp->inp_flags &= ~INP_SOCKREF; + INP_WUNLOCK(inp); + ACCEPT_LOCK(); + OFP_SOCK_LOCK(so); + so->so_state &= ~SS_PROTOREF; + ofp_sofree(so); + return (NULL); + } + return (tp); +} + +void +ofp_tcp_drain(void) +{ + if (!do_tcpdrain) + return; +} + +#if 0 +/* + * Notify a tcp user of an asynchronous error; + * store error as soft error, but wake up user + * (for now, won't do anything until can select for soft error). + * + * Do not wake up user since there currently is no mechanism for + * reporting soft errors (yet - a kqueue filter may be added). + */ +static struct inpcb * +tcp_notify(struct inpcb *inp, int error) +{ + struct tcpcb *tp; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); + + if ((inp->inp_flags & INP_TIMEWAIT) || + (inp->inp_flags & INP_DROPPED)) + return (inp); + + tp = intotcpcb(inp); + KASSERT(tp != NULL, ("tcp_notify: tp == NULL")); + + /* + * Ignore some errors if we are hooked up. + * If connection hasn't completed, has retransmitted several times, + * and receives a second error, give up now. This is better + * than waiting a long time to establish a connection that + * can never complete. + */ + if (tp->t_state == TCPS_ESTABLISHED && + (error == OFP_EHOSTUNREACH || error == OFP_ENETUNREACH || + error == OFP_EHOSTDOWN)) { + return (inp); + } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && + tp->t_softerror) { + tp = ofp_tcp_drop(tp, error); + if (tp != NULL) + return (inp); + else + return (NULL); + } else { + tp->t_softerror = error; + return (inp); + } + + wakeup( &so->so_timeo); + sorwakeup(so); + sowwakeup(so); +} +#endif + +#if 0 /* HJo */ +static int +tcp_pcblist(OFP_SYSCTL_HANDLER_ARGS) +{ + int error, i, m, n, pcb_count; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == NULL) { + n = V_tcbinfo.ipi_count + ofp_syncache_pcbcount(); + n += imax(n / 8, 10); + req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb); + return (0); + } + + if (req->newptr != NULL) + return (OFP_EPERM); + + /* + * OK, now we're committed to doing something. + */ + INP_INFO_RLOCK(&V_tcbinfo); + gencnt = V_tcbinfo.ipi_gencnt; + n = V_tcbinfo.ipi_count; + INP_INFO_RUNLOCK(&V_tcbinfo); + + m = ofp_syncache_pcbcount(); + + error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + + (n + m) * sizeof(struct xtcpcb)); + if (error != 0) + return (error); + + xig.xig_len = sizeof xig; + xig.xig_count = n + m; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) + return (error); + + error = syncache_pcblist(req, m, &pcb_count); + if (error) + return (error); + + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == NULL) + return (OFP_ENOMEM); + + INP_INFO_RLOCK(&V_tcbinfo); + for (inp = OFP_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; + inp != NULL && i < n; inp = OFP_LIST_NEXT(inp, inp_list)) { + INP_WLOCK(inp); + if (inp->inp_gencnt <= gencnt) { + /* + * XXX: This use of cr_cansee(), introduced with + * TCP state changes, is not quite right, but for + * now, better than nothing. + */ + if (inp->inp_flags & INP_TIMEWAIT) { + if (intotw(inp) != NULL) + error = cr_cansee(req->td->td_ucred, + intotw(inp)->tw_cred); + else + error = OFP_EINVAL; /* Skip this inp. */ + } else + error = cr_canseeinpcb(req->td->td_ucred, inp); + if (error == 0) { + ofp_in_pcbref(inp); + inp_list[i++] = inp; + } + } + INP_WUNLOCK(inp); + } + INP_INFO_RUNLOCK(&V_tcbinfo); + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + INP_RLOCK(inp); + if (inp->inp_gencnt <= gencnt) { + struct xtcpcb xt; + void *inp_ppcb; + + bzero(&xt, sizeof(xt)); + xt.xt_len = sizeof xt; + /* XXX should avoid extra copy */ + bcopy(inp, &xt.xt_inp, sizeof *inp); + inp_ppcb = inp->inp_ppcb; + if (inp_ppcb == NULL) + bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + else if (inp->inp_flags & INP_TIMEWAIT) { + bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + xt.xt_tp.t_state = TCPS_TIME_WAIT; + } else { + bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); + if (xt.xt_tp.t_timers) + tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer); + } + if (inp->inp_socket != NULL) + sotoxsocket(inp->inp_socket, &xt.xt_socket); + else { + bzero(&xt.xt_socket, sizeof xt.xt_socket); + xt.xt_socket.xso_protocol = OFP_IPPROTO_TCP; + } + xt.xt_inp.inp_gencnt = inp->inp_gencnt; + INP_RUNLOCK(inp); + error = SYSCTL_OUT(req, &xt, sizeof xt); + } else + INP_RUNLOCK(inp); + } + INP_INFO_WLOCK(&V_tcbinfo); + for (i = 0; i < n; i++) { + inp = inp_list[i]; + INP_RLOCK(inp); + if (!ofp_in_pcbrele_rlocked(inp)) + INP_RUNLOCK(inp); + } + INP_INFO_WUNLOCK(&V_tcbinfo); + + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + INP_INFO_RLOCK(&V_tcbinfo); + xig.xig_gen = V_tcbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = V_tcbinfo.ipi_count + pcb_count; + INP_INFO_RUNLOCK(&V_tcbinfo); + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + free(inp_list, M_TEMP); + return (error); +} + +OFP_SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, + OFP_CTLTYPE_OPAQUE | OFP_CTLFLAG_RD, NULL, 0, + tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); + +static int +tcp_getcred(OFP_SYSCTL_HANDLER_ARGS) +{ + struct xucred xuc; + struct ofp_sockaddr_in addrs[2]; + struct inpcb *inp; + int error; + + error = priv_check(req->td, PRIV_NETINET_GETCRED); + if (error) + return (error); + error = SYSCTL_IN(req, addrs, sizeof(addrs)); + if (error) + return (error); + inp = ofp_in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, + addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL); + if (inp != NULL) { + if (inp->inp_socket == NULL) + error = OFP_ENOENT; + if (error == 0) + error = cr_canseeinpcb(req->td->td_ucred, inp); + if (error == 0) + cru2x(inp->inp_cred, &xuc); + INP_RUNLOCK(inp); + } else + error = OFP_ENOENT; + if (error == 0) + error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); + return (error); +} + +#ifdef _INET6 +static int +tcp6_getcred(OFP_SYSCTL_HANDLER_ARGS) +{ + struct xucred xuc; + struct ofp_sockaddr_in6 addrs[2]; + struct inpcb *inp; + int error; +#ifdef INET + int mapped = 0; +#endif + + error = priv_check(req->td, PRIV_NETINET_GETCRED); + if (error) + return (error); + error = SYSCTL_IN(req, addrs, sizeof(addrs)); + if (error) + return (error); + if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || + (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { + return (error); + } + if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { +#ifdef INET + if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) + mapped = 1; + else +#endif + return (OFP_EINVAL); + } + +#ifdef INET + if (mapped == 1) + inp = ofp_in_pcblookup(&V_tcbinfo, + *(struct ofp_in_addr *)&addrs[1].sin6_addr.s6_addr[12], + addrs[1].sin6_port, + *(struct ofp_in_addr *)&addrs[0].sin6_addr.s6_addr[12], + addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); + else +#endif + inp = in6_pcblookup(&V_tcbinfo, + &addrs[1].sin6_addr, addrs[1].sin6_port, + &addrs[0].sin6_addr, addrs[0].sin6_port, + INPLOOKUP_RLOCKPCB, NULL); + if (inp != NULL) { + if (inp->inp_socket == NULL) + error = OFP_ENOENT; + if (error == 0) + error = cr_canseeinpcb(req->td->td_ucred, inp); + if (error == 0) + cru2x(inp->inp_cred, &xuc); + INP_RUNLOCK(inp); + } else + error = OFP_ENOENT; + if (error == 0) + error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); + return (error); +} + +OFP_SYSCTL_PROC(_net_inet6_tcp6, OFP_OID_AUTO, getcred, + OFP_CTLTYPE_OPAQUE|OFP_CTLFLAG_RW|OFP_CTLFLAG_PRISON, 0, 0, + tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection"); +#endif /* INET6 */ +#endif /* HJo */ + + +#if 0 +/* + * Return the next larger or smaller MTU plateau (table from RFC 1191) + * given current value MTU. If DIR is less than zero, a larger plateau + * is returned; otherwise, a smaller value is returned. + */ +static int +ip_next_mtu(int mtu, int dir) +{ + static int mtutab[] = { + 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508, + 296, 68, 0 + }; + int i, size; + + size = (sizeof mtutab) / (sizeof mtutab[0]); + if (dir >= 0) { + for (i = 0; i < size; i++) + if (mtu > mtutab[i]) + return mtutab[i]; + } else { + for (i = size - 1; i >= 0; i--) + if (mtu < mtutab[i]) + return mtutab[i]; + if (mtu == mtutab[0]) + return mtutab[0]; + } + return 0; +} +#endif + +#ifdef INET +void +ofp_tcp_ctlinput(int cmd, struct ofp_sockaddr *sa, void *vip) +{ + (void)cmd; + (void)sa; + (void)vip; + OFP_LOG("UNIMPLEMENTED FUNCTION CALLED!\n"); +#if 0 /* HJo: FIX */ + struct ofp_ip *ip = vip; + struct ofp_tcphdr *th; + struct ofp_in_addr faddr; + struct inpcb *inp; + struct tcpcb *tp; + struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; + struct ofp_icmp *icp; + struct in_conninfo inc; + tcp_seq icmp_tcp_seq; + int mtu; + + faddr = ((struct ofp_sockaddr_in *)sa)->sin_addr; + if (sa->sa_family != OFP_AF_INET || faddr.s_addr == OFP_INADDR_ANY) + return; + + if (cmd == PRC_MSGSIZE) + notify = tcp_mtudisc_notify; + else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || + cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip) + notify = ofp_tcp_drop_syn_sent; + /* + * Redirects don't need to be handled up here. + */ + else if (PRC_IS_REDIRECT(cmd)) + return; + /* + * Source quench is depreciated. + */ + else if (cmd == PRC_QUENCH) + return; + /* + * Hostdead is ugly because it goes linearly through all PCBs. + * XXX: We never get this from ICMP, otherwise it makes an + * excellent DoS attack on machines with many connections. + */ + else if (cmd == PRC_HOSTDEAD) + ip = NULL; + else if ((unsigned)cmd >= PRC_NCMDS || ofp_inetctlerrmap[cmd] == 0) + return; + if (ip != NULL) { + icp = (struct ofp_icmp *)((char *)ip + - offsetof(struct ofp_icmp, ofp_icmp_ip)); + th = (struct ofp_tcphdr *)((char *)ip + + (ip->ip_hl << 2)); + INP_INFO_WLOCK(&V_tcbinfo); + inp = ofp_in_pcblookup(&V_tcbinfo, faddr, th->th_dport, + ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); + if (inp != NULL) { + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED) && + !(inp->inp_socket == NULL)) { + icmp_tcp_seq = odp_cpu_to_be_32(th->th_seq); + tp = intotcpcb(inp); + if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && + SEQ_LT(icmp_tcp_seq, tp->snd_max)) { + if (cmd == PRC_MSGSIZE) { + /* + * MTU discovery: + * If we got a needfrag set the MTU + * in the route to the suggested new + * value (if given) and then notify. + */ + bzero(&inc, sizeof(inc)); + inc.inc_faddr = faddr; + inc.inc_fibnum = + inp->inp_inc.inc_fibnum; + + mtu = odp_be_to_cpu_16(icp->ofp_icmp_nextmtu); + /* + * If no alternative MTU was + * proposed, try the next smaller + * one. ip->ip_len has already + * been swapped in icmp_input(). + */ + if (!mtu) + mtu = ip_next_mtu(ip->ip_len, + 1); + if (mtu < (int)(V_tcp_minmss + + sizeof(struct tcpiphdr))) + mtu = V_tcp_minmss + + sizeof(struct tcpiphdr); + /* + * Only cache the MTU if it + * is smaller than the interface + * or route MTU. ofp_tcp_mtudisc() + * will do right thing by itself. + */ + if (mtu <= (int)ofp_tcp_maxmtu(&inc, NULL)) + tcp_hc_updatemtu(&inc, mtu); + ofp_tcp_mtudisc(inp, mtu); + } else + inp = (*notify)(inp, + ofp_inetctlerrmap[cmd]); + } + } + if (inp != NULL) + INP_WUNLOCK(inp); + } else { + bzero(&inc, sizeof(inc)); + inc.inc_fport = th->th_dport; + inc.inc_lport = th->th_sport; + inc.inc_faddr = faddr; + inc.inc_laddr = ip->ip_src; + ofp_syncache_unreach(&inc, th); + } + INP_INFO_WUNLOCK(&V_tcbinfo); + } else + in_pcbnotifyall(&V_tcbinfo, faddr, ofp_inetctlerrmap[cmd], notify); +#endif +} +#endif /* INET */ + +#ifdef _INET6 +void +tcp6_ctlinput(int cmd, struct ofp_sockaddr *sa, void *d) +{ + struct ofp_tcphdr th; + struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; + struct ip6_hdr *ip6; + odp_packet_t m; + struct ip6ctlparam *ip6cp = NULL; + const struct ofp_sockaddr_in6 *sa6_src = NULL; + int off; + struct tcp_portonly { + uint16_t th_sport; + uint16_t th_dport; + } *thp; + + if (sa->sa_family != OFP_AF_INET6 || + sa->sa_len != sizeof(struct ofp_sockaddr_in6)) + return; + + if (cmd == PRC_MSGSIZE) + notify = tcp_mtudisc_notify; + else if (!PRC_IS_REDIRECT(cmd) && + ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) + return; + /* Source quench is depreciated. */ + else if (cmd == PRC_QUENCH) + return; + + /* if the parameter is from icmp6, decode it. */ + if (d != NULL) { + ip6cp = (struct ip6ctlparam *)d; + m = ip6cp->ip6c_m; + ip6 = ip6cp->ip6c_ip6; + off = ip6cp->ip6c_off; + sa6_src = ip6cp->ip6c_src; + } else { + m = NULL; + ip6 = NULL; + off = 0; /* fool gcc */ + sa6_src = &sa6_any; + } + + if (ip6 != NULL) { + struct in_conninfo inc; + /* + * XXX: We assume that when IPV6 is non NULL, + * M and OFF are valid. + */ + + /* check if we can safely examine src and dst ports */ + if (odp_packet_get_len(m) < off + sizeof(*thp)) + return; + + bzero(&th, sizeof(th)); + m_copydata(m, off, sizeof(*thp), (char *)&th); + + in6_pcbnotify(&V_tcbinfo, sa, th.th_dport, + (struct ofp_sockaddr *)ip6cp->ip6c_src, + th.th_sport, cmd, NULL, notify); + + bzero(&inc, sizeof(inc)); + inc.inc_fport = th.th_dport; + inc.inc_lport = th.th_sport; + inc.inc6_faddr = ((struct ofp_sockaddr_in6 *)sa)->sin6_addr; + inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr; + inc.inc_flags |= INC_ISIPV6; + INP_INFO_WLOCK(&V_tcbinfo); +#ifdef PROMISCUOUS_INET + /* XXX need to pass mbuf here */ + ofp_syncache_unreach(&inc, &th, NULL); +#else + ofp_syncache_unreach(&inc, &th); +#endif /* PROMISCUOUS_INET */ + INP_INFO_WUNLOCK(&V_tcbinfo); + } else + in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct ofp_sockaddr *)sa6_src, + 0, cmd, NULL, notify); +} +#endif /* INET6 */ + + +/* + * Following is where TCP initial sequence number generation occurs. + * + * There are two places where we must use initial sequence numbers: + * 1. In SYN-ACK packets. + * 2. In SYN packets. + * + * All ISNs for SYN-ACK packets are generated by the syncache. See + * tcp_syncache.c for details. + * + * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling + * depends on this property. In addition, these ISNs should be + * unguessable so as to prevent connection hijacking. To satisfy + * the requirements of this situation, the algorithm outlined in + * RFC 1948 is used, with only small modifications. + * + * Implementation details: + * + * Time is based off the system timer, and is corrected so that it + * increases by one megabyte per second. This allows for proper + * recycling on high speed LANs while still leaving over an hour + * before rollover. + * + * As reading the *exact* system time is too expensive to be done + * whenever setting up a TCP connection, we increment the time + * offset in two ways. First, a small random positive increment + * is added to isn_offset for each connection that is set up. + * Second, the function tcp_isn_tick fires once per clock tick + * and increments isn_offset as necessary so that sequence numbers + * are incremented at approximately ISN_BYTES_PER_SECOND. The + * random positive increments serve only to ensure that the same + * exact sequence number is never sent out twice (as could otherwise + * happen when a port is recycled in less than the system tick + * interval.) + * + * net.inet.tcp.isn_reseed_interval controls the number of seconds + * between seeding of isn_secret. This is normally set to zero, + * as reseeding should not be necessary. + * + * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, + * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In + * general, this means holding an exclusive (write) lock. + */ + +#define ISN_BYTES_PER_SECOND 1048576 +#define ISN_STATIC_INCREMENT 4096 +#define ISN_RANDOM_INCREMENT (4096 - 1) + +static VNET_DEFINE(uint8_t, isn_secret[32]); +static VNET_DEFINE(int, isn_last); +static VNET_DEFINE(int, isn_last_reseed); +static VNET_DEFINE(uint32_t, isn_offset); +static VNET_DEFINE(uint32_t, isn_offset_old); + +#define V_isn_secret VNET(isn_secret) +#define V_isn_last VNET(isn_last) +#define V_isn_last_reseed VNET(isn_last_reseed) +#define V_isn_offset VNET(isn_offset) +#define V_isn_offset_old VNET(isn_offset_old) + +tcp_seq +ofp_tcp_new_isn(struct tcpcb *tp) +{ + MD5_CTX isn_ctx; + uint32_t md5_buffer[4]; + tcp_seq new_isn; + uint32_t projected_offset; + uint64_t cpucycles; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + ISN_LOCK(); + /* Seed if this is the first use, reseed if requested. */ + if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) && + (((uint32_t)V_isn_last_reseed + (uint32_t)V_tcp_isn_reseed_interval*hz) + < (uint32_t)ofp_timer_ticks(0)))) { + /* HJo: read_random(&V_isn_secret, sizeof(V_isn_secret));*/ + cpucycles = ofp_timer_ticks(0); + bcopy(&cpucycles, V_isn_secret, sizeof(cpucycles)); + V_isn_last_reseed = ofp_timer_ticks(0); + } + + /* Compute the md5 hash and return the ISN. */ + ofp_MD5Init(&isn_ctx); + ofp_MD5Update(&isn_ctx, (uint8_t *) &tp->t_inpcb->inp_fport, sizeof(uint16_t)); + ofp_MD5Update(&isn_ctx, (uint8_t *) &tp->t_inpcb->inp_lport, sizeof(uint16_t)); +#ifdef INET6 + if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { + ofp_MD5Update(&isn_ctx, (uint8_t *) &tp->t_inpcb->in6p_faddr, + sizeof(struct ofp_in6_addr)); + ofp_MD5Update(&isn_ctx, (uint8_t *) &tp->t_inpcb->in6p_laddr, + sizeof(struct ofp_in6_addr)); + } else +#endif + { + ofp_MD5Update(&isn_ctx, (uint8_t *) &tp->t_inpcb->inp_faddr, + sizeof(struct ofp_in_addr)); + ofp_MD5Update(&isn_ctx, (uint8_t *) &tp->t_inpcb->inp_laddr, + sizeof(struct ofp_in_addr)); + } + ofp_MD5Update(&isn_ctx, (uint8_t *) &V_isn_secret, sizeof(V_isn_secret)); + ofp_MD5Final((uint8_t *) &md5_buffer, &isn_ctx); + new_isn = (tcp_seq) md5_buffer[0]; + V_isn_offset += ISN_STATIC_INCREMENT + /* + (arc4random() & ISN_RANDOM_INCREMENT)*/; + if (ofp_timer_ticks(0) != V_isn_last) { + projected_offset = V_isn_offset_old + + ISN_BYTES_PER_SECOND / hz * (ofp_timer_ticks(0) - V_isn_last); +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) + if (SEQ_GT(projected_offset, V_isn_offset)) + V_isn_offset = projected_offset; + V_isn_offset_old = V_isn_offset; + V_isn_last = ofp_timer_ticks(0); + } + new_isn += V_isn_offset; + ISN_UNLOCK(); + return (new_isn); +} + +/* + * When a specific ICMP unreachable message is received and the + * connection state is SYN-SENT, drop the connection. This behavior + * is controlled by the icmp_may_rst sysctl. + */ +struct inpcb * +ofp_tcp_drop_syn_sent(struct inpcb *inp, int err) +{ + struct tcpcb *tp; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); + + if ((inp->inp_flags & INP_TIMEWAIT) || + (inp->inp_flags & INP_DROPPED)) + return (inp); + + tp = intotcpcb(inp); + if (tp->t_state != TCPS_SYN_SENT) + return (inp); + + tp = ofp_tcp_drop(tp, err); + if (tp != NULL) + return (inp); + else + return (NULL); +} + +#if 0 +/* + * When `need fragmentation' ICMP is received, update our idea of the MSS + * based on the new value. Also nudge TCP to send something, since we + * know the packet we just sent was dropped. + * This duplicates some code in the ofp_tcp_mss() function in ofp_tcp_input.c. + */ +static struct inpcb * +tcp_mtudisc_notify(struct inpcb *inp, int err) +{ + (void)err; + return (ofp_tcp_mtudisc(inp, -1)); +} +#endif + +struct inpcb * +ofp_tcp_mtudisc(struct inpcb *inp, int mtuoffer) +{ + struct tcpcb *tp; + struct socket *so; + + INP_WLOCK_ASSERT(inp); + if ((inp->inp_flags & INP_TIMEWAIT) || + (inp->inp_flags & INP_DROPPED)) + return (inp); + + tp = intotcpcb(inp); + KASSERT(tp != NULL, ("ofp_tcp_mtudisc: tp == NULL")); + + ofp_tcp_mss_update(tp, -1, mtuoffer, NULL, NULL); + + so = inp->inp_socket; + SOCKBUF_LOCK(&so->so_snd); + /* If the mss is larger than the socket buffer, decrease the mss. */ + if (so->so_snd.sb_hiwat < tp->t_maxseg) + tp->t_maxseg = so->so_snd.sb_hiwat; + SOCKBUF_UNLOCK(&so->so_snd); + + TCPSTAT_INC(tcps_mturesent); + tp->t_rtttime = 0; + tp->snd_nxt = tp->snd_una; + ofp_tcp_free_sackholes(tp); + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_SACK_PERMIT) + EXIT_FASTRECOVERY(tp->t_flags); + ofp_tcp_output(tp); + return (inp); +} + +#ifdef INET +/* + * Look-up the routing entry to the peer of this inpcb. If no route + * is found and it cannot be allocated, then return 0. This routine + * is called by TCP routines that access the rmx structure and by + * ofp_tcp_mss_update to get the peer/interface MTU. + */ +u_long +ofp_tcp_maxmtu(struct in_conninfo *inc, int *flags) +{ + (void)inc; + (void)flags; + return 1000; +#if 0 /* HJo: FIX */ + struct route sro; + struct ofp_sockaddr_in *dst; + struct ifnet *ifp = NULL; + uint64_t maxmtu = 0; + + KASSERT(inc != NULL, ("ofp_tcp_maxmtu with NULL in_conninfo pointer")); + + bzero(&sro, sizeof(sro)); + if (inc->inc_faddr.s_addr != OFP_INADDR_ANY) { + dst = (struct ofp_sockaddr_in *)&sro.ro_dst; + dst->sin_family = OFP_AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = inc->inc_faddr; + in_rtalloc_ign(&sro, 0, inc->inc_fibnum); + } + if (sro.ro_rt != NULL) { + ifp = sro.ro_rt->rt_ifp; + if (sro.ro_rt->rt_rmx.rmx_mtu == 0) + maxmtu = ifp->if_mtu; + else + maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu); + + RTFREE(sro.ro_rt); + } + + /* Report additional interface capabilities. */ + if (ifp && (flags != NULL)) { + if (ifp->if_capenable & IFCAP_TSO4 && + ifp->if_hwassist & CSUM_TSO) + *flags |= CSUM_TSO; + } + + + return (maxmtu); +#endif +} +#endif /* INET */ + +#ifdef INET6 +u_long +ofp_tcp_maxmtu6(struct in_conninfo *inc, int *flags) +{ +#if 0 + struct route_in6 sro6; + struct ifnet *ifp; +#endif + uint64_t maxmtu = 1000; + + (void)inc; + (void)flags; + + KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer")); +#if 0 + bzero(&sro6, sizeof(sro6)); + if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { + sro6.ro_dst.sin6_family = OFP_AF_INET6; + sro6.ro_dst.sin6_len = sizeof(struct ofp_sockaddr_in6); + sro6.ro_dst.sin6_addr = inc->inc6_faddr; + in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum); + } + if (sro6.ro_rt != NULL) { + ifp = sro6.ro_rt->rt_ifp; + if (sro6.ro_rt->rt_rmx.rmx_mtu == 0) + maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp); + else + maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu, + IN6_LINKMTU(sro6.ro_rt->rt_ifp)); + + /* Report additional interface capabilities. */ + if (flags != NULL) { + if (ifp->if_capenable & IFCAP_TSO6 && + ifp->if_hwassist & CSUM_TSO) + *flags |= CSUM_TSO; + } + RTFREE(sro6.ro_rt); + } +#endif + return (maxmtu); +} +#endif /* INET6 */ + +#ifdef IPSEC +/* compute ESP/AH header size for TCP, including outer IP header. */ +size_t +ipsec_hdrsiz_tcp(struct tcpcb *tp) +{ + struct inpcb *inp; + odp_packet_t m; + size_t hdrsiz; + struct ofp_ip *ip; +#ifdef _INET6 + struct ip6_hdr *ip6; +#endif + struct ofp_tcphdr *th; + + if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL)) + return (0); + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (!m) + return (0); + +#ifdef _INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) { + ip6 = (struct ip6_hdr *)odp_packet_data(m); + th = (struct ofp_tcphdr *)(ip6 + 1); + odp_packet_get_len(m) = odp_packet_get_len(m) = + sizeof(struct ip6_hdr) + sizeof(struct ofp_tcphdr); + ofp_tcpip_fillheaders(inp, ip6, th); + hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); + } else +#endif /* INET6 */ + { + ip = (struct ofp_ip *)odp_packet_data(m); + th = (struct ofp_tcphdr *)(ip + 1); + odp_packet_get_len(m) = odp_packet_get_len(m) = sizeof(struct tcpiphdr); + ofp_tcpip_fillheaders(inp, ip, th); + hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); + } + + m_free(m); + return (hdrsiz); +} +#endif /* IPSEC */ + +#ifdef TCP_SIGNATURE +/* + * Callback function invoked by m_apply() to digest TCP segment data + * contained within an mbuf chain. + */ +static int +tcp_signature_apply(void *fstate, void *data, uint32_t len) +{ + + ofp_MD5Update(fstate, (uint8_t *)data, len); + return (0); +} + +/* + * Compute TCP-MD5 hash of a TCP segment. (RFC2385) + * + * Parameters: + * m pointer to head of mbuf chain + * _unused + * len length of TCP segment data, excluding options + * optlen length of TCP segment options + * buf pointer to storage for computed MD5 digest + * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) + * + * We do this over ip, tcphdr, segment data, and the key in the SADB. + * When called from ofp_tcp_input(), we can be sure that th_sum has been + * zeroed out and verified already. + * + * Return 0 if successful, otherwise return -1. + * + * XXX The key is retrieved from the system's OFP_PF_KEY SADB, by keying a + * search with the destination IP address, and a 'magic SPI' to be + * determined by the application. This is hardcoded elsewhere to 1179 + * right now. Another branch of this code exists which uses the SPD to + * specify per-application flows but it is unstable. + */ +int +tcp_signature_compute(odp_packet_t m, int _unused, int len, int optlen, + uint8_t *buf, uint32_t direction) +{ + union sockaddr_union dst; +#ifdef INET + struct ofp_ippseudo ippseudo; +#endif + MD5_CTX ctx; + int doff; + struct ofp_ip *ip; +#ifdef INET + struct ipovly *ipovly; +#endif + struct secasvar *sav; + struct ofp_tcphdr *th; +#ifdef INET6 + struct ofp_ip6_hdr *ip6; + struct ofp_in6_addr in6; + char ip6buf[INET6_ADDRSTRLEN]; + uint32_t plen; + uint16_t nhdr; +#endif + uint16_t savecsum; + + KASSERT(m != NULL, ("NULL mbuf chain")); + KASSERT(buf != NULL, ("NULL signature pointer")); + + /* Extract the destination from the IP header in the mbuf. */ + bzero(&dst, sizeof(union sockaddr_union)); + ip = (struct ofp_ip *)odp_packet_data(m); +#ifdef INET6 + ip6 = NULL; /* Make the compiler happy. */ +#endif + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + dst.sa.sa_len = sizeof(struct ofp_sockaddr_in); + dst.sa.sa_family = OFP_AF_INET; + dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ? + ip->ip_src : ip->ip_dst; + break; +#endif +#ifdef INET6 + case (OFP_IPV6_VERSION >> 4): + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + dst.sa.sa_len = sizeof(struct ofp_sockaddr_in6); + dst.sa.sa_family = OFP_AF_INET6; + dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ? + ip6->ip6_src : ip6->ip6_dst; + break; +#endif + default: + return (OFP_EINVAL); + /* NOTREACHED */ + break; + } + + /* Look up an SADB entry which matches the address of the peer. */ + sav = KEY_ALLOCSA(&dst, OFP_IPPROTO_TCP, odp_cpu_to_be_32(TCP_SIG_SPI)); + if (sav == NULL) { + ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__, + (ip->ip_v == IPVERSION) ? ofp_inet_ntoa(dst.sin.sin_addr) : +#ifdef _INET6 + (ip->ip_v == (IPV6_VERSION >> 4)) ? + ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) : +#endif + "(unsupported)")); + return (OFP_EINVAL); + } + + ofp_MD5Init(&ctx); + /* + * Step 1: Update MD5 hash with IP(v6) pseudo-header. + * + * XXX The ippseudo header MUST be digested in network byte order, + * or else we'll fail the regression test. Assume all fields we've + * been doing arithmetic on have been in host byte order. + * XXX One cannot depend on ipovly->ih_len here. When called from + * ofp_tcp_output(), the underlying ip_len member has not yet been set. + */ + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + ipovly = (struct ipovly *)ip; + ippseudo.ippseudo_src = ipovly->ih_src; + ippseudo.ippseudo_dst = ipovly->ih_dst; + ippseudo.ippseudo_pad = 0; + ippseudo.ippseudo_p = OFP_IPPROTO_TCP; + ippseudo.ippseudo_len = odp_cpu_to_be_16(len + sizeof(struct ofp_tcphdr) + + optlen); + ofp_MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ofp_ippseudo)); + + th = (struct ofp_tcphdr *)((uint8_t *)ip + sizeof(struct ofp_ip)); + doff = sizeof(struct ofp_ip) + sizeof(struct ofp_tcphdr) + optlen; + break; +#endif +#ifdef _INET6 + /* + * RFC 2385, 2.0 Proposal + * For IPv6, the pseudo-header is as described in RFC 2460, namely the + * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero- + * extended next header value (to form 32 bits), and 32-bit segment + * length. + * Note: Upper-Layer Packet Length comes before Next Header. + */ + case (IPV6_VERSION >> 4): + in6 = ip6->ip6_src; + in6_clearscope(&in6); + ofp_MD5Update(&ctx, (char *)&in6, sizeof(struct ofp_in6_addr)); + in6 = ip6->ip6_dst; + in6_clearscope(&in6); + ofp_MD5Update(&ctx, (char *)&in6, sizeof(struct ofp_in6_addr)); + plen = odp_cpu_to_be_32(len + sizeof(struct ofp_tcphdr) + optlen); + ofp_MD5Update(&ctx, (char *)&plen, sizeof(uint32_t)); + nhdr = 0; + ofp_MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); + ofp_MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); + ofp_MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); + nhdr = OFP_IPPROTO_TCP; + ofp_MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); + + th = (struct ofp_tcphdr *)((uint8_t *)ip6 + sizeof(struct ip6_hdr)); + doff = sizeof(struct ip6_hdr) + sizeof(struct ofp_tcphdr) + optlen; + break; +#endif + default: + return (OFP_EINVAL); + /* NOTREACHED */ + break; + } + + + /* + * Step 2: Update MD5 hash with TCP header, excluding options. + * The TCP checksum must be set to zero. + */ + savecsum = th->th_sum; + th->th_sum = 0; + ofp_MD5Update(&ctx, (char *)th, sizeof(struct ofp_tcphdr)); + th->th_sum = savecsum; + + /* + * Step 3: Update MD5 hash with TCP segment data. + * Use m_apply() to avoid an early odp_packet_ensure_contiguous(). + */ + if (len > 0) + m_apply(m, doff, len, tcp_signature_apply, &ctx); + + /* + * Step 4: Update MD5 hash with shared secret. + */ + ofp_MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); + ofp_MD5Final(buf, &ctx); + + key_sa_recordxfer(sav, m); + KEY_FREESAV(&sav); + return (0); +} + +/* + * Verify the TCP-MD5 hash of a TCP segment. (RFC2385) + * + * Parameters: + * m pointer to head of mbuf chain + * len length of TCP segment data, excluding options + * optlen length of TCP segment options + * buf pointer to storage for computed MD5 digest + * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) + * + * Return 1 if successful, otherwise return 0. + */ +int +tcp_signature_verify(odp_packet_t m, int off0, int tlen, int optlen, + struct tcpopt *to, struct ofp_tcphdr *th, uint32_t tcpbflag) +{ + char tmpdigest[TCP_SIGLEN]; + + if (tcp_sig_checksigs == 0) + return (1); + if ((tcpbflag & TF_SIGNATURE) == 0) { + if ((to->to_flags & TOF_SIGNATURE) != 0) { + + /* + * If this socket is not expecting signature but + * the segment contains signature just fail. + */ + TCPSTAT_INC(tcps_sig_err_sigopt); + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + + /* Signature is not expected, and not present in segment. */ + return (1); + } + + /* + * If this socket is expecting signature but the segment does not + * contain any just fail. + */ + if ((to->to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0], + IPSEC_DIR_INBOUND) == -1) { + TCPSTAT_INC(tcps_sig_err_buildsig); + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + + if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) { + TCPSTAT_INC(tcps_sig_rcvbadsig); + return (0); + } + TCPSTAT_INC(tcps_sig_rcvgoodsig); + return (1); +} +#endif /* TCP_SIGNATURE */ + +#if 0 /* HJo */ +static int +sysctl_drop(OFP_SYSCTL_HANDLER_ARGS) +{ + /* addrs[0] is a foreign socket, addrs[1] is a local one. */ + struct sockaddr_storage addrs[2]; + struct inpcb *inp; + struct tcpcb *tp; + struct tcptw *tw; + struct ofp_sockaddr_in *fin, *lin; +#ifdef _INET6 + struct ofp_sockaddr_in6 *fin6, *lin6; +#endif + int error; + + inp = NULL; + fin = lin = NULL; +#ifdef _INET6 + fin6 = lin6 = NULL; +#endif + error = 0; + + if (req->oldptr != NULL || req->oldlen != 0) + return (OFP_EINVAL); + if (req->newptr == NULL) + return (OFP_EPERM); + if (req->newlen < sizeof(addrs)) + return (OFP_ENOMEM); + error = SYSCTL_IN(req, &addrs, sizeof(addrs)); + if (error) + return (error); + + switch (addrs[0].ss_family) { +#ifdef _INET6 + case OFP_AF_INET6: + fin6 = (struct ofp_sockaddr_in6 *)&addrs[0]; + lin6 = (struct ofp_sockaddr_in6 *)&addrs[1]; + if (fin6->sin6_len != sizeof(struct ofp_sockaddr_in6) || + lin6->sin6_len != sizeof(struct ofp_sockaddr_in6)) + return (OFP_EINVAL); + if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) { + if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr)) + return (OFP_EINVAL); + in6_sin6_2_sin_in_sock((struct ofp_sockaddr *)&addrs[0]); + in6_sin6_2_sin_in_sock((struct ofp_sockaddr *)&addrs[1]); + fin = (struct ofp_sockaddr_in *)&addrs[0]; + lin = (struct ofp_sockaddr_in *)&addrs[1]; + break; + } + error = sa6_embedscope(fin6, V_ip6_use_defzone); + if (error) + return (error); + error = sa6_embedscope(lin6, V_ip6_use_defzone); + if (error) + return (error); + break; +#endif +#ifdef INET + case OFP_AF_INET: + fin = (struct ofp_sockaddr_in *)&addrs[0]; + lin = (struct ofp_sockaddr_in *)&addrs[1]; + if (fin->sin_len != sizeof(struct ofp_sockaddr_in) || + lin->sin_len != sizeof(struct ofp_sockaddr_in)) + return (OFP_EINVAL); + break; +#endif + default: + return (OFP_EINVAL); + } + INP_INFO_WLOCK(&V_tcbinfo); + switch (addrs[0].ss_family) { +#ifdef _INET6 + case OFP_AF_INET6: + inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr, + fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, + INPLOOKUP_WLOCKPCB, NULL); + break; +#endif +#ifdef INET + case OFP_AF_INET: + inp = ofp_in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port, + lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL); + break; +#endif + } + if (inp != NULL) { + if (inp->inp_flags & INP_TIMEWAIT) { + /* + * XXXRW: There currently exists a state where an + * inpcb is present, but its timewait state has been + * discarded. For now, don't allow dropping of this + * type of inpcb. + */ + tw = intotw(inp); + if (tw != NULL) + ofp_tcp_twclose(tw, 0); + else + INP_WUNLOCK(inp); + } else if (!(inp->inp_flags & INP_DROPPED) && + !(inp->inp_socket->so_options & OFP_SO_ACCEPTCONN)) { + tp = intotcpcb(inp); + tp = ofp_tcp_drop(tp, OFP_ECONNABORTED); + if (tp != NULL) + INP_WUNLOCK(inp); + } else + INP_WUNLOCK(inp); + } else + error = OFP_ESRCH; + INP_INFO_WUNLOCK(&V_tcbinfo); + return (error); +} + +OFP_SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop, + OFP_CTLTYPE_STRUCT|OFP_CTLFLAG_WR|OFP_CTLFLAG_SKIP, NULL, + 0, sysctl_drop, "", "Drop TCP connection"); +#endif /* HJo */ + +/* + * Generate a standardized TCP log line for use throughout the + * tcp subsystem. Memory allocation is done with M_NOWAIT to + * allow use in the interrupt context. + * + * NB: The caller MUST free(s, M_TCPLOG) the returned string. + * NB: The function may return NULL if memory allocation failed. + * + * Due to header inclusion and ordering limitations the struct ip + * and ip6_hdr pointers have to be passed as void pointers. + */ +char * +ofp_tcp_log_vain(struct in_conninfo *inc, struct ofp_tcphdr *th, void *ip4hdr, + const void *ip6hdr) +{ + + /* Is logging enabled? */ + if (ofp_tcp_log_in_vain == 0) + return (NULL); + + return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); +} + +char * +ofp_tcp_log_addrs(struct in_conninfo *inc, struct ofp_tcphdr *th, void *ip4hdr, + const void *ip6hdr) +{ + + /* Is logging enabled? */ + if (tcp_log_debug == 0) + return (NULL); + + return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); +} + +char *ofp_inet_ntoa(struct ofp_in_addr ina); +char *ofp_inet_ntoa_r(struct ofp_in_addr ina, char *buf); + +char * +ofp_inet_ntoa(struct ofp_in_addr ina) +{ + static char buf[4*sizeof "123"]; + unsigned char *ucp = (unsigned char *)&ina; + + sprintf(buf, "%d.%d.%d.%d", + ucp[0] & 0xff, + ucp[1] & 0xff, + ucp[2] & 0xff, + ucp[3] & 0xff); + return buf; +} + +char * +ofp_inet_ntoa_r(struct ofp_in_addr ina, char *buf) +{ + unsigned char *ucp = (unsigned char *)&ina; + + sprintf(buf, "%d.%d.%d.%d", + ucp[0] & 0xff, + ucp[1] & 0xff, + ucp[2] & 0xff, + ucp[3] & 0xff); + return buf; +} + +#pragma GCC diagnostic ignored "-Wformat" +#pragma GCC diagnostic ignored "-Wformat-extra-args" +#pragma GCC diagnostic ignored "-Wcast-qual" +static char * +tcp_log_addr(struct in_conninfo *inc, struct ofp_tcphdr *th, void *ip4hdr, + const void *ip6hdr) +{ + char *s, *sp; + size_t size; + struct ofp_ip *ip; +#ifdef INET6 + struct ofp_ip6_hdr *ip6; + + ip6 = (struct ofp_ip6_hdr *)ip6hdr; +#else + (void)ip6hdr; +#endif /* INET6 */ + ip = (struct ofp_ip *)ip4hdr; + (void)ip; + + /* + * The log line looks like this: + * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2" + */ + size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") + + sizeof(OFP_PRINT_TH_FLAGS) + 1 + +#ifdef INET6 + 2 * OFP_INET6_ADDRSTRLEN; +#else + 2 * OFP_INET_ADDRSTRLEN; +#endif /* INET6 */ + + s = malloc(size); + if (s == NULL) + return (NULL); + + strcat(s, "TCP: ["); + sp = s + strlen(s); + + if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) { + ofp_inet_ntoa_r(inc->inc_faddr, sp); + sp = s + strlen(s); + sprintf(sp, "]:%i to [", odp_be_to_cpu_16(inc->inc_fport)); + sp = s + strlen(s); + ofp_inet_ntoa_r(inc->inc_laddr, sp); + sp = s + strlen(s); + sprintf(sp, "]:%i", odp_be_to_cpu_16(inc->inc_lport)); +#ifdef INET6 + } else if (inc) { + sprintf(sp, "%s", ofp_print_ip6_addr((uint8_t *)&inc->inc6_faddr)); + sp = s + strlen(s); + sprintf(sp, "]:%i to [", odp_be_to_cpu_16(inc->inc_fport)); + sp = s + strlen(s); + sprintf(sp, "%s", ofp_print_ip6_addr((uint8_t *)&inc->inc6_laddr)); + sp = s + strlen(s); + sprintf(sp, "]:%i", odp_be_to_cpu_16(inc->inc_lport)); + } else if (ip6 && th) { + sprintf(sp, "%s", ofp_print_ip6_addr((uint8_t *)&ip6->ip6_src)); + sp = s + strlen(s); + sprintf(sp, "]:%i to [", odp_be_to_cpu_16(th->th_sport)); + sp = s + strlen(s); + sprintf(sp, "%s", ofp_print_ip6_addr((uint8_t *)&ip6->ip6_dst)); + sp = s + strlen(s); + sprintf(sp, "]:%i", odp_be_to_cpu_16(th->th_dport)); +#endif /* INET6 */ +#ifdef INET + } else if (ip && th) { + ofp_inet_ntoa_r(ip->ip_src, sp); + sp = s + strlen(s); + sprintf(sp, "]:%i to [", odp_be_to_cpu_16(th->th_sport)); + sp = s + strlen(s); + ofp_inet_ntoa_r(ip->ip_dst, sp); + sp = s + strlen(s); + sprintf(sp, "]:%i", odp_be_to_cpu_16(th->th_dport)); +#endif /* INET */ + } else { + free(s); + return (NULL); + } + sp = s + strlen(s); + if (th) + sprintf(sp, " tcpflags 0x%b", th->th_flags, OFP_PRINT_TH_FLAGS); + if (*(s + size - 1) != '\0') + panic("string too long"); + return (s); +} +#pragma GCC diagnostic error "-Wformat" +#pragma GCC diagnostic error "-Wformat-extra-args" +#pragma GCC diagnostic warning "-Wcast-qual" diff --git a/src/ofp_tcp_syncache.c b/src/ofp_tcp_syncache.c new file mode 100644 index 00000000..bf8e7853 --- /dev/null +++ b/src/ofp_tcp_syncache.c @@ -0,0 +1,1593 @@ +/*- + * Copyright (c) 2001 McAfee, Inc. + * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Jonathan Lemon + * and McAfee Research, the Security Research Division of McAfee, Inc. under + * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the + * DARPA CHATS research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include "odp.h" + +#include "ofpi_util.h" +#include "ofpi_errno.h" +#include "ofpi_in.h" +#include "ofpi_ip.h" +#include "ofpi_sysctl.h" +#include "ofpi_in_pcb.h" +#include "ofpi_socketvar.h" +#ifdef INET6 +#include "ofpi_ip6.h" +#include "ofpi_icmp6.h" +#include "ofpi_ip6_var.h" +#include "ofpi_in6_pcb.h" +# if 0 +# include +# endif /*0*/ +#endif + +#include "ofpi_tcp.h" +#include "ofpi_tcp_fsm.h" +#include "ofpi_tcp_seq.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_var.h" +#include "ofpi_tcp_syncache.h" +#ifdef INET6 +#include "ofpi_tcp6_var.h" +#endif +#include "ofpi_pkt_processing.h" +#include "ofpi_md5.h" + +extern int ofp_max_linkhdr; + +#define log(_a, _b...) OFP_DBG(_b) + +#define SYSCTL_VNET_INT OFP_SYSCTL_INT +#define SYSCTL_VNET_UINT OFP_SYSCTL_UINT + +static VNET_DEFINE(int, tcp_syncookies) = 1; +#define V_tcp_syncookies VNET(tcp_syncookies) +SYSCTL_VNET_INT(_net_inet_tcp, OFP_OID_AUTO, syncookies, OFP_CTLFLAG_RW, + &VNET_NAME(tcp_syncookies), 0, + "Use TCP SYN cookies if the syncache overflows"); + +static VNET_DEFINE(int, tcp_syncookiesonly) = 0; +#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly) +SYSCTL_VNET_INT(_net_inet_tcp, OFP_OID_AUTO, syncookies_only, OFP_CTLFLAG_RW, + &VNET_NAME(tcp_syncookiesonly), 0, + "Use only TCP SYN cookies"); + +#ifdef TCP_OFFLOAD_DISABLE +#define TOEPCB_ISSET(sc) (0) +#else +#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL) +#endif + +static void syncache_drop(struct syncache *, struct syncache_head *); +static void syncache_free(struct syncache *); +static void syncache_insert(struct syncache *, struct syncache_head *, int); +struct syncache *ofp_syncache_lookup(struct in_conninfo *, struct syncache_head **); +static int syncache_respond(struct syncache *); +static struct socket *syncache_socket(struct syncache *, struct socket *, + odp_packet_t m, struct tcpopt *to); +static void syncache_timeout(struct syncache *sc, struct syncache_head *sch, + int docallout, int timeout_ticks); +/* static void syncache_timer(void *); */ +static void syncookie_generate(struct syncache_head *, struct syncache *, + uint32_t *); +static struct syncache + *syncookie_lookup(struct in_conninfo *, struct syncache_head *, + struct syncache *, struct tcpopt *, struct ofp_tcphdr *, + struct socket *); + +/* + * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies. + * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds, + * the odds are that the user has given up attempting to connect by then. + */ +#define SYNCACHE_MAXREXMTS 3 + +/* Arbitrary values */ +#define TCP_SYNCACHE_HASHSIZE 512 +#define TCP_SYNCACHE_BUCKETLIMIT 30 + +static VNET_DEFINE(struct tcp_syncache, tcp_syncache); +#define V_tcp_syncache VNET(tcp_syncache) + +OFP_SYSCTL_NODE(_net_inet_tcp, OFP_OID_AUTO, syncache, OFP_CTLFLAG_RW, 0, "TCP SYN cache"); + +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OFP_OID_AUTO, bucketlimit, OFP_CTLFLAG_RDTUN, + &VNET_NAME(tcp_syncache.bucket_limit), 0, + "Per-bucket hash limit for syncache"); + +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OFP_OID_AUTO, cachelimit, OFP_CTLFLAG_RDTUN, + &VNET_NAME(tcp_syncache.cache_limit), 0, + "Overall entry limit for syncache"); + +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OFP_OID_AUTO, count, OFP_CTLFLAG_RD, + &VNET_NAME(tcp_syncache.cache_count), 0, + "Current number of entries in syncache"); + +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OFP_OID_AUTO, hashsize, OFP_CTLFLAG_RDTUN, + &VNET_NAME(tcp_syncache.hashsize), 0, + "Size of TCP syncache hashtable"); + +SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OFP_OID_AUTO, rexmtlimit, OFP_CTLFLAG_RW, + &VNET_NAME(tcp_syncache.rexmt_limit), 0, + "Limit on SYN/ACK retransmissions"); + +VNET_DEFINE(int, ofp_tcp_sc_rst_sock_fail) = 1; +SYSCTL_VNET_INT(_net_inet_tcp_syncache, OFP_OID_AUTO, rst_on_sock_fail, + OFP_CTLFLAG_RW, &VNET_NAME(ofp_tcp_sc_rst_sock_fail), 0, + "Send reset on socket allocation failure"); + +//static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); + +#define SYNCACHE_HASH(inc, mask) \ + ((V_tcp_syncache.hash_secret ^ \ + (inc)->inc_faddr.s_addr ^ \ + ((inc)->inc_faddr.s_addr >> 16) ^ \ + (inc)->inc_fport ^ (inc)->inc_lport) & mask) + +#define SYNCACHE_HASH6(inc, mask) \ + ((V_tcp_syncache.hash_secret ^ \ + (inc)->inc6_faddr.ofp_s6_addr32[0] ^ \ + (inc)->inc6_faddr.ofp_s6_addr32[3] ^ \ + (inc)->inc_fport ^ (inc)->inc_lport) & mask) + +#define ENDPTS_EQ(a, b) ( \ + (a)->ie_fport == (b)->ie_fport && \ + (a)->ie_lport == (b)->ie_lport && \ + (a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr && \ + (a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr \ +) + +#define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0) + +#define SCH_LOCK(sch) odp_spinlock_lock(&(sch)->sch_mtx) +#define SCH_UNLOCK(sch) odp_spinlock_unlock(&(sch)->sch_mtx) +#define SCH_LOCK_ASSERT(sch) //mtx_assert(&(sch)->sch_mtx, MA_OWNED) + +/* + * Requires the syncache entry to be already removed from the bucket list. + */ +static void +syncache_free(struct syncache *sc) +{ + + if (sc->sc_ipopts != ODP_PACKET_INVALID) + (void) odp_packet_free(sc->sc_ipopts); + /* HJo + if (sc->sc_cred) + crfree(sc->sc_cred); + */ + uma_zfree(V_tcp_syncache.zone, sc); +} + +void +ofp_syncache_init(void) +{ + int i; + + V_tcp_syncache.cache_count = 0; + V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; + V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; + V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; + V_tcp_syncache.hash_secret = 11235 /*arc4random()*/; + V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1; + + /* Set limits. */ + V_tcp_syncache.cache_limit = + V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit; + + /* Allocate the hash table. */ + V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize * + sizeof(struct syncache_head)); + + /* Initialize the hash buckets. */ + for (i = 0; i < (int)V_tcp_syncache.hashsize; i++) { + OFP_TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket); + odp_spinlock_init(&V_tcp_syncache.hashbase[i].sch_mtx); + /* + callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer, + &V_tcp_syncache.hashbase[i].sch_mtx, 0); + */ + V_tcp_syncache.hashbase[i].sch_length = 0; + } + + /* Create the syncache entry zone. */ + V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit); +} + +/* + * Inserts a syncache entry into the specified bucket row. + * Locks and unlocks the syncache_head autonomously. + */ +static void +syncache_insert(struct syncache *sc, struct syncache_head *sch, int initial_timeout) +{ + struct syncache *sc2; + + SCH_LOCK(sch); + + /* + * Make sure that we don't overflow the per-bucket limit. + * If the bucket is full, toss the oldest element. + */ + if (sch->sch_length >= V_tcp_syncache.bucket_limit) { + KASSERT(!OFP_TAILQ_EMPTY(&sch->sch_bucket), + ("sch->sch_length incorrect")); + sc2 = OFP_TAILQ_LAST(&sch->sch_bucket, sch_head); + syncache_drop(sc2, sch); + TCPSTAT_INC(tcps_sc_bucketoverflow); + } + + /* Put it into the bucket. */ + OFP_TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash); + sch->sch_length++; + + /* Reinitialize the bucket row's timer. */ + if (sch->sch_length == 1) + sch->sch_nextc = ticks + INT_MAX; + syncache_timeout(sc, sch, 1, initial_timeout); + + SCH_UNLOCK(sch); + + V_tcp_syncache.cache_count++; + TCPSTAT_INC(tcps_sc_added); +} + +/* + * Remove and free entry from syncache bucket row. + * Expects locked syncache head. + */ +static void +syncache_drop(struct syncache *sc, struct syncache_head *sch) +{ + + SCH_LOCK_ASSERT(sch); + + OFP_TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); + sch->sch_length--; + +#ifndef TCP_OFFLOAD_DISABLE + if (sc->sc_tu) + sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb); +#endif + syncache_free(sc); + V_tcp_syncache.cache_count--; +} + +/* + * Engage/reengage time on bucket row. + */ +static void +syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout, + int timeout_ticks) +{ + (void)docallout; + + if (timeout_ticks > 0) + sc->sc_rxttime = ticks + timeout_ticks; + else + sc->sc_rxttime = ticks + + TCPTV_RTOBASE * (ofp_tcp_backoff[sc->sc_rxmits]); + + sc->sc_rxmits++; + if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) { + sch->sch_nextc = sc->sc_rxttime; + /* HJo + if (docallout) + callout_reset(&sch->sch_timer, sch->sch_nextc - ticks, + syncache_timer, (void *)sch); + */ + } +} + +#if 0 +/* + * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. + * If we have retransmitted an entry the maximum number of times, expire it. + * One separate timer for each bucket row. + */ +static void +syncache_timer(void *xsch) +{ + struct syncache_head *sch = (struct syncache_head *)xsch; + struct syncache *sc, *nsc; + int tick = ticks; + //char *s; + + /* NB: syncache_head has already been locked by the callout. */ + SCH_LOCK_ASSERT(sch); + + /* + * In the following cycle we may remove some entries and/or + * advance some timeouts, so re-initialize the bucket timer. + */ + sch->sch_nextc = tick + INT_MAX; + + OFP_TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) { + /* + * We do not check if the listen socket still exists + * and accept the case where the listen socket may be + * gone by the time we resend the SYN/ACK. We do + * not expect this to happens often. If it does, + * then the RST will be sent by the time the remote + * host does the SYN/ACK->ACK. + */ + if (TSTMP_GT(sc->sc_rxttime, tick)) { + if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) + sch->sch_nextc = sc->sc_rxttime; + continue; + } + if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) { + OFP_DBG("Retransmits exhausted, " + "giving up and removing syncache entry\n"); + syncache_drop(sc, sch); + TCPSTAT_INC(tcps_sc_stale); + continue; + } + OFP_DBG("Response timeout, " + "retransmitting SYN|ACK\n"); + + (void) syncache_respond(sc); + TCPSTAT_INC(tcps_sc_retransmitted); + syncache_timeout(sc, sch, 0, -1); + } + if (!OFP_TAILQ_EMPTY(&(sch)->sch_bucket)) + callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick, + syncache_timer, (void *)(sch)); +} +#endif + +/* + * Find an entry in the syncache. + * Returns always with locked syncache_head plus a matching entry or NULL. + */ +struct syncache * +ofp_syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) +{ + struct syncache *sc; + struct syncache_head *sch; + uint32_t hashkey; + +#ifdef INET6 + if (inc->inc_flags & INC_ISIPV6) { + hashkey = SYNCACHE_HASH6(inc, V_tcp_syncache.hashmask); + sch = &V_tcp_syncache.hashbase[hashkey]; + *schp = sch; + + SCH_LOCK(sch); + + /* Circle through bucket row to find matching entry. */ + OFP_TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { + if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) + return (sc); + } + } else +#endif + { + hashkey = SYNCACHE_HASH(inc, V_tcp_syncache.hashmask); + sch = &V_tcp_syncache.hashbase[hashkey]; + *schp = sch; + + SCH_LOCK(sch); + + /* Circle through bucket row to find matching entry. */ + OFP_TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { + +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) + continue; +#endif + if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) + return (sc); + } + } + SCH_LOCK_ASSERT(*schp); + return (NULL); /* always returns with locked sch */ +} + +/* + * This function is called when we get a RST for a + * non-existent connection, so that we can see if the + * connection is in the syn cache. If it is, zap it. + */ +void +ofp_syncache_chkrst(struct in_conninfo *inc, struct ofp_tcphdr *th) +{ + struct syncache *sc; + struct syncache_head *sch; + char *s = NULL; + + sc = ofp_syncache_lookup(inc, &sch); /* returns locked sch */ + SCH_LOCK_ASSERT(sch); + + /* + * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags. + * See RFC 793 page 65, section SEGMENT ARRIVES. + */ + if (th->th_flags & (OFP_TH_ACK|OFP_TH_SYN|OFP_TH_FIN)) { + if ((s = ofp_tcp_log_addrs(inc, th, NULL, NULL))) + OFP_DBG("%s; %s: Spurious RST with ACK, SYN or " + "FIN flag set, segment ignored\n", s, __func__); + TCPSTAT_INC(tcps_badrst); + goto done; + } + + /* + * No corresponding connection was found in syncache. + * If syncookies are enabled and possibly exclusively + * used, or we are under memory pressure, a valid RST + * may not find a syncache entry. In that case we're + * done and no SYN|ACK retransmissions will happen. + * Otherwise the RST was misdirected or spoofed. + */ + if (sc == NULL) { + if ((s = ofp_tcp_log_addrs(inc, th, NULL, NULL))) + OFP_DBG("%s; %s: Spurious RST without matching " + "syncache entry (possibly syncookie only), " + "segment ignored\n", s, __func__); + TCPSTAT_INC(tcps_badrst); + goto done; + } + + /* + * If the RST bit is set, check the sequence number to see + * if this is a valid reset segment. + * RFC 793 page 37: + * In all states except SYN-SENT, all reset (RST) segments + * are validated by checking their SEQ-fields. A reset is + * valid if its sequence number is in the window. + * + * The sequence number in the reset segment is normally an + * echo of our outgoing acknowlegement numbers, but some hosts + * send a reset with the sequence number at the rightmost edge + * of our receive window, and we have to handle this case. + */ + if (SEQ_GEQ(th->th_seq, sc->sc_irs) && + SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) { + syncache_drop(sc, sch); + if ((s = ofp_tcp_log_addrs(inc, th, NULL, NULL))) + OFP_DBG("%s; %s: Our SYN|ACK was rejected, " + "connection attempt aborted by remote endpoint\n", + s, __func__); + TCPSTAT_INC(tcps_sc_reset); + } else { + if ((s = ofp_tcp_log_addrs(inc, th, NULL, NULL))) + OFP_DBG("%s; %s: RST with invalid SEQ %u != " + "IRS %u (+WND %u), segment ignored\n", + s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd); + TCPSTAT_INC(tcps_badrst); + } + +done: + SCH_UNLOCK(sch); +} + +void +ofp_syncache_badack(struct in_conninfo *inc) +{ + struct syncache *sc; + struct syncache_head *sch; + + sc = ofp_syncache_lookup(inc, &sch); /* returns locked sch */ + SCH_LOCK_ASSERT(sch); + if (sc != NULL) { + syncache_drop(sc, sch); + TCPSTAT_INC(tcps_sc_badack); + } + SCH_UNLOCK(sch); +} + +void +ofp_syncache_unreach(struct in_conninfo *inc, struct ofp_tcphdr *th) +{ + struct syncache *sc; + struct syncache_head *sch; + + sc = ofp_syncache_lookup(inc, &sch); /* returns locked sch */ + SCH_LOCK_ASSERT(sch); + if (sc == NULL) + goto done; + + /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ + if (odp_be_to_cpu_32(th->th_seq) != sc->sc_iss) + goto done; + + /* + * If we've rertransmitted 3 times and this is our second error, + * we remove the entry. Otherwise, we allow it to continue on. + * This prevents us from incorrectly nuking an entry during a + * spurious network outage. + * + * See tcp_notify(). + */ + if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) { + sc->sc_flags |= SCF_UNREACH; + goto done; + } + syncache_drop(sc, sch); + TCPSTAT_INC(tcps_sc_unreach); +done: + SCH_UNLOCK(sch); +} + + +/* + * Build a new TCP socket structure from a syncache entry. + */ +static struct socket * +syncache_socket(struct syncache *sc, struct socket *lso, odp_packet_t m, struct tcpopt *to) +{ + struct inpcb *inp = NULL; + struct socket *so = NULL; + struct tcpcb *tp; + int error; + (void)to; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + + /* + * Ok, create the full blown connection, and set things up + * as they would have been set up if we had created the + * connection when the SYN arrived. If we can't create + * the connection, abort it. + */ + so = ofp_sonewconn(lso, 0); + if (so == NULL) { + /* + * Drop the connection; we will either send a RST or + * have the peer retransmit its SYN again after its + * RTO and try again. + */ + TCPSTAT_INC(tcps_listendrop); + OFP_DBG("Socket create failed " + "due to limits or memory shortage\n"); + goto abort2; + } + + inp = sotoinpcb(so); + inp->inp_inc.inc_fibnum = so->so_fibnum; + INP_WLOCK(inp); + INP_HASH_WLOCK(&V_tcbinfo); + + /* Insert new socket into PCB hash list. */ + inp->inp_inc.inc_flags = sc->sc_inc.inc_flags; +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) { + inp->in6p_laddr = sc->sc_inc.inc6_laddr; + } else { + inp->inp_vflag &= ~INP_IPV6; + inp->inp_vflag |= INP_IPV4; +#endif + inp->inp_laddr = sc->sc_inc.inc_laddr; +#ifdef INET6 + } +#endif + + /* + * Install in the reservation hash table for now, but don't yet + * install a connection group since the full 4-tuple isn't yet + * configured. + */ + inp->inp_lport = sc->sc_inc.inc_lport; + if ((error = ofp_in_pcbinshash_nopcbgroup(inp)) != 0) { + /* + * Undo the assignments above if we failed to + * put the PCB on the hash lists. + */ +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) + inp->in6p_laddr = ofp_in6addr_any; + else +#endif + inp->inp_laddr.s_addr = OFP_INADDR_ANY; + inp->inp_lport = 0; + OFP_DBG("ofp_in_pcbinshash failed " + "with error %i\n", error); + INP_HASH_WUNLOCK(&V_tcbinfo); + goto abort; + } +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) { + struct inpcb *oinp = sotoinpcb(lso); + struct ofp_in6_addr laddr6; + struct ofp_sockaddr_in6 sin6; + /* + * Inherit socket options from the listening socket. + * Note that in6p_inputopts are not (and should not be) + * copied, since it stores previously received options and is + * used to detect if each new option is different than the + * previous one and hence should be passed to a user. + * If we copied in6p_inputopts, a user would not be able to + * receive options just after calling the accept system call. + */ + inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS; +#if 0 + if (oinp->in6p_outputopts) + inp->in6p_outputopts = + ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT); +#endif + + sin6.sin6_family = OFP_AF_INET6; + sin6.sin6_len = sizeof(sin6); + sin6.sin6_addr = sc->sc_inc.inc6_faddr; + sin6.sin6_port = sc->sc_inc.inc_fport; + sin6.sin6_flowinfo = sin6.sin6_scope_id = 0; + laddr6 = inp->in6p_laddr; + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) + inp->in6p_laddr = sc->sc_inc.inc6_laddr; + if ((error = ofp_in6_pcbconnect_mbuf(inp, (struct ofp_sockaddr *)&sin6, + NULL, m)) != 0) { + inp->in6p_laddr = laddr6; + OFP_DBG("in6_pcbconnect failed with error %d\n", error); + INP_HASH_WUNLOCK(&V_tcbinfo); + goto abort; + } + /* Override flowlabel from in6_pcbconnect. */ + inp->inp_flow &= ~OFP_IPV6_FLOWLABEL_MASK; + inp->inp_flow |= sc->sc_flowlabel; + } + else +#endif + { + struct ofp_in_addr laddr; + struct ofp_sockaddr_in sin; + + /* HJo: FIX + inp->inp_options = (m) ? ip_srcroute(m) : NULL; + */ + inp->inp_options = ODP_PACKET_INVALID; + + if (inp->inp_options == ODP_PACKET_INVALID) { + inp->inp_options = sc->sc_ipopts; + sc->sc_ipopts = ODP_PACKET_INVALID; + } + + sin.sin_family = OFP_AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_addr = sc->sc_inc.inc_faddr; + sin.sin_port = sc->sc_inc.inc_fport; + bzero((char *)sin.sin_zero, sizeof(sin.sin_zero)); + laddr = inp->inp_laddr; + if (inp->inp_laddr.s_addr == OFP_INADDR_ANY) + inp->inp_laddr = sc->sc_inc.inc_laddr; + if ((error = ofp_in_pcbconnect_mbuf(inp, (struct ofp_sockaddr *)&sin, + NULL, m)) != 0) { + inp->inp_laddr = laddr; + OFP_DBG("ofp_in_pcbconnect failed " + "with error %i\n", error); + INP_HASH_WUNLOCK(&V_tcbinfo); + goto abort; + } + } + + INP_HASH_WUNLOCK(&V_tcbinfo); + tp = intotcpcb(inp); + tp->t_state = TCPS_SYN_RECEIVED; + tp->iss = sc->sc_iss; + tp->irs = sc->sc_irs; + tcp_rcvseqinit(tp); + tcp_sendseqinit(tp); + tp->snd_wl1 = sc->sc_irs; + tp->snd_max = tp->iss + 1; + tp->snd_nxt = tp->iss + 1; + tp->rcv_up = sc->sc_irs + 1; + tp->rcv_wnd = sc->sc_wnd; + tp->rcv_adv += tp->rcv_wnd; + tp->last_ack_sent = tp->rcv_nxt; + + tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY); + if (sc->sc_flags & SCF_NOOPT) + tp->t_flags |= TF_NOOPT; + else { + if (sc->sc_flags & SCF_WINSCALE) { + tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE; + tp->snd_scale = sc->sc_requested_s_scale; + tp->request_r_scale = sc->sc_requested_r_scale; + } + if (sc->sc_flags & SCF_TIMESTAMP) { + tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; + tp->ts_recent = sc->sc_tsreflect; + tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_offset = sc->sc_tsoff; + } + if (sc->sc_flags & SCF_SACK) + tp->t_flags |= TF_SACK_PERMIT; + } + + if (sc->sc_flags & SCF_ECN) + tp->t_flags |= TF_ECN_PERMIT; + + /* + * Set up MSS and get cached values from tcp_hostcache. + * This might overwrite some of the defaults we just set. + */ + ofp_tcp_mss(tp, sc->sc_peer_mss); + + /* + * If the SYN,ACK was retransmitted, reset cwnd to 1 segment. + * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits. + */ + if (sc->sc_rxmits > 1) + tp->snd_cwnd = tp->t_maxseg; + + /* + * Copy and activate timers. + */ + tp->t_keepinit = sototcpcb(lso)->t_keepinit; + tp->t_keepidle = sototcpcb(lso)->t_keepidle; + tp->t_keepintvl = sototcpcb(lso)->t_keepintvl; + tp->t_keepcnt = sototcpcb(lso)->t_keepcnt; + ofp_tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); + + INP_WUNLOCK(inp); + + TCPSTAT_INC(tcps_accepts); + return (so); + +abort: + INP_WUNLOCK(inp); +abort2: + if (so != NULL) + ofp_soabort(so); + return (NULL); +} + +/* + * This function gets called when we receive an ACK for a + * socket in the LISTEN state. We look up the connection + * in the syncache, and if its there, we pull it out of + * the cache and turn it into a full-blown connection in + * the SYN-RECEIVED state. + */ +int +ofp_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct ofp_tcphdr *th, + struct socket **lsop, odp_packet_t m) +{ + struct syncache *sc; + struct syncache_head *sch; + struct syncache scs; + + /* + * Global TCP locks are held because we manipulate the PCB lists + * and create a new socket. + */ + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + KASSERT((th->th_flags & (OFP_TH_RST|OFP_TH_ACK|OFP_TH_SYN)) == OFP_TH_ACK, + ("%s: can handle only ACK", __func__)); + + sc = ofp_syncache_lookup(inc, &sch); /* returns locked sch */ + SCH_LOCK_ASSERT(sch); + if (sc == NULL) { + /* + * There is no syncache entry, so see if this ACK is + * a returning syncookie. To do this, first: + * A. See if this socket has had a syncache entry dropped in + * the past. We don't want to accept a bogus syncookie + * if we've never received a SYN. + * B. check that the syncookie is valid. If it is, then + * cobble up a fake syncache entry, and return. + */ + if (!V_tcp_syncookies) { + SCH_UNLOCK(sch); + OFP_DBG("Spurious ACK, " + "segment rejected (syncookies disabled)\n"); + goto failed; + } + bzero(&scs, sizeof(scs)); + sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop); + SCH_UNLOCK(sch); + if (sc == NULL) { + OFP_DBG("Segment failed " + "SYNCOOKIE authentication, segment rejected " + "(probably spoofed)\n"); + goto failed; + } + } else { + /* Pull out the entry to unlock the bucket row. */ + OFP_TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); + sch->sch_length--; + V_tcp_syncache.cache_count--; + SCH_UNLOCK(sch); + } + + /* + * Segment validation: + * ACK must match our initial sequence number + 1 (the SYN|ACK). + */ + if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) { + OFP_DBG("ACK != ISS+1 segment " + "rejected\n"); + goto failed; + } + + /* + * The SEQ must fall in the window starting at the received + * initial receive sequence number + 1 (the SYN). + */ + if ((SEQ_LEQ(th->th_seq, sc->sc_irs) || + SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) && + !TOEPCB_ISSET(sc)) { + OFP_DBG("SEQ %u != IRS+1 %u, segment " + "rejected\n", th->th_seq, sc->sc_irs); + goto failed; + } + + if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) { + OFP_DBG("Timestamp not expected, " + "segment rejected\n"); + goto failed; + } + /* + * If timestamps were negotiated the reflected timestamp + * must be equal to what we actually sent in the SYN|ACK. + */ + if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts && + !TOEPCB_ISSET(sc)) { + OFP_DBG("TSECR %u != TS %u, " + "segment rejected\n", + to->to_tsecr, sc->sc_ts); + goto failed; + } + + *lsop = syncache_socket(sc, *lsop, m, to); + + if (*lsop == NULL) + TCPSTAT_INC(tcps_sc_aborted); + else + TCPSTAT_INC(tcps_sc_completed); + +/* how do we find the inp for the new socket? */ + if (sc != &scs) + syncache_free(sc); + return (1); +failed: + if (sc != NULL && sc != &scs) + syncache_free(sc); + *lsop = NULL; + return (0); +} + +/* + * Given a LISTEN socket and an inbound SYN request, add + * this to the syn cache, and send back a segment: + * + * to the source. + * + * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN. + * Doing so would require that we hold onto the data and deliver it + * to the application. However, if we are the target of a SYN-flood + * DoS attack, an attacker could send data which would eventually + * consume all available buffer space if it were ACKed. By not ACKing + * the data, we avoid this DoS scenario. + */ +static void +_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct ofp_tcphdr *th, + struct inpcb *inp, struct socket **lsop, odp_packet_t m, + struct toe_usrreqs *tu, void *toepcb, int initial_timeout) +{ + struct tcpcb *tp; + struct socket *so; + struct syncache *sc = NULL; + struct syncache_head *sch; + odp_packet_t ipopts = ODP_PACKET_INVALID; + uint32_t flowtmp = 0; + uint32_t ltflags; + int win, sb_hiwat, ip_ttl, ip_tos; +#ifdef INET6 + int autoflowlabel = 0; +#endif + struct syncache scs; + (void)tu; + (void)toepcb; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); /* listen socket */ + KASSERT((th->th_flags & (OFP_TH_RST|OFP_TH_ACK|OFP_TH_SYN)) == OFP_TH_SYN, + ("%s: unexpected tcp flags", __func__)); + + /* + * Combine all so/tp operations very early to drop the INP lock as + * soon as possible. + */ + so = *lsop; + tp = sototcpcb(so); + +#ifdef INET6 + if ((inc->inc_flags & INC_ISIPV6) && + (inp->inp_flags & IN6P_AUTOFLOWLABEL)) + autoflowlabel = 1; +#endif + ip_ttl = inp->inp_ip_ttl; + ip_tos = inp->inp_ip_tos; + win = sbspace(&so->so_rcv); + sb_hiwat = so->so_rcv.sb_hiwat; + sb_hiwat = sb_hiwat; + ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE)); + + /* By the time we drop the lock these should no longer be used. */ + so = NULL; + tp = NULL; + + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + + /* + * Remember the IP options, if any. + */ +#ifdef INET6 + if (!(inc->inc_flags & INC_ISIPV6)) +#endif + ipopts = /*HJo (m) ? ip_srcroute(m) :*/ ODP_PACKET_INVALID; + + /* + * See if we already have an entry for this connection. + * If we do, resend the SYN,ACK, and reset the retransmit timer. + * + * XXX: should the syncache be re-initialized with the contents + * of the new SYN here (which may have different options?) + * + * XXX: We do not check the sequence number to see if this is a + * real retransmit or a new connection attempt. The question is + * how to handle such a case; either ignore it as spoofed, or + * drop the current entry and create a new one? + */ + sc = ofp_syncache_lookup(inc, &sch); /* returns locked entry */ + SCH_LOCK_ASSERT(sch); + if (sc != NULL) { +#ifndef TCP_OFFLOAD_DISABLE + if (sc->sc_tu) + sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT, + sc->sc_toepcb); +#endif + TCPSTAT_INC(tcps_sc_dupsyn); + if (ipopts != ODP_PACKET_INVALID) { + /* + * If we were remembering a previous source route, + * forget it and use the new one we've been given. + */ + if (sc->sc_ipopts != ODP_PACKET_INVALID) + odp_packet_free(sc->sc_ipopts); + sc->sc_ipopts = ipopts; + } + /* + * Update timestamp if present. + */ + if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) + sc->sc_tsreflect = to->to_tsval; + else + sc->sc_flags &= ~SCF_TIMESTAMP; + /* Retransmit SYN|ACK and reset retransmit count. */ + OFP_DBG("Received duplicate SYN, " + "resetting timer and retransmitting SYN|ACK\n"); + if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) { + if (!(sc->sc_flags & SCF_NO_TIMEOUT_RESET)) { + sc->sc_rxmits = 0; + syncache_timeout(sc, sch, 1, -1); + } + TCPSTAT_INC(tcps_sndacks); + TCPSTAT_INC(tcps_sndtotal); + } + SCH_UNLOCK(sch); + goto done; + } + + sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); + if (sc == NULL) { + /* + * The zone allocator couldn't provide more entries. + * Treat this as if the cache was full; drop the oldest + * entry and insert the new one. + */ + if ((sc = OFP_TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) + syncache_drop(sc, sch); + sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); + if (sc == NULL) { + if (V_tcp_syncookies) { + bzero(&scs, sizeof(scs)); + sc = &scs; + } else { + SCH_UNLOCK(sch); + if (ipopts != ODP_PACKET_INVALID) + odp_packet_free(ipopts); + goto done; + } + } + } + + /* + * Fill in the syncache values. + */ + sc->sc_ipopts = ipopts; + bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); +#ifdef INET6 + if (!(inc->inc_flags & INC_ISIPV6)) +#endif + { + sc->sc_ip_tos = ip_tos; + sc->sc_ip_ttl = ip_ttl; + } +#ifndef TCP_OFFLOAD_DISABLE + sc->sc_tu = tu; + sc->sc_toepcb = toepcb; +#endif + sc->sc_irs = th->th_seq; + sc->sc_iss = 31415 /* HJo: arc4random()*/; + sc->sc_flags = 0; + sc->sc_flowlabel = 0; + + /* + * Initial receive window: clip sbspace to [0 .. OFP_TCP_MAXWIN]. + * win was derived from socket earlier in the function. + */ + win = imax(win, 0); + win = imin(win, OFP_TCP_MAXWIN); + sc->sc_wnd = win; + + if (V_tcp_do_rfc1323) { + /* + * A timestamp received in a SYN makes + * it ok to send timestamp requests and replies. + */ + if (to->to_flags & TOF_TS) { + sc->sc_tsreflect = to->to_tsval; + sc->sc_ts = tcp_ts_getticks(); + sc->sc_flags |= SCF_TIMESTAMP; + } + if (to->to_flags & TOF_SCALE) { + int wscale = 0; + + /* + * Pick the smallest possible scaling factor that + * will still allow us to scale up to ofp_sb_max, aka + * kern.ipc.maxsockbuf. + * + * We do this because there are broken firewalls that + * will corrupt the window scale option, leading to + * the other endpoint believing that our advertised + * window is unscaled. At scale factors larger than + * 5 the unscaled window will drop below 1500 bytes, + * leading to serious problems when traversing these + * broken firewalls. + * + * With the default maxsockbuf of 256K, a scale factor + * of 3 will be chosen by this algorithm. Those who + * choose a larger maxsockbuf should watch out + * for the compatiblity problems mentioned above. + * + * RFC1323: The Window field in a SYN (i.e., a + * or ) segment itself is never scaled. + */ + while (wscale < OFP_TCP_MAX_WINSHIFT && + (OFP_TCP_MAXWIN << wscale) < (int)ofp_sb_max) + wscale++; + sc->sc_requested_r_scale = wscale; + sc->sc_requested_s_scale = to->to_wscale; + sc->sc_flags |= SCF_WINSCALE; + } + } + if (to->to_flags & TOF_SACKPERM) + sc->sc_flags |= SCF_SACK; + if (to->to_flags & TOF_MSS) + sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ + if (ltflags & TF_NOOPT) + sc->sc_flags |= SCF_NOOPT; + if ((th->th_flags & (OFP_TH_ECE|OFP_TH_CWR)) && V_tcp_do_ecn) + sc->sc_flags |= SCF_ECN; + + if (V_tcp_syncookies) { + syncookie_generate(sch, sc, &flowtmp); +#ifdef INET6 + if (autoflowlabel) + sc->sc_flowlabel = flowtmp; +#endif + } else { +#ifdef INET6 + if (autoflowlabel) + sc->sc_flowlabel = + (odp_cpu_to_be_32(ofp_ip6_randomflowlabel()) & + OFP_IPV6_FLOWLABEL_MASK); +#endif + } + + SCH_UNLOCK(sch); + + /* + * Do a standard 3-way handshake. + */ + if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) { + if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs) + syncache_free(sc); + else if (sc != &scs) + syncache_insert(sc, sch, initial_timeout); /* locks and unlocks sch */ + TCPSTAT_INC(tcps_sndacks); + TCPSTAT_INC(tcps_sndtotal); + } else { + if (sc != &scs) + syncache_free(sc); + TCPSTAT_INC(tcps_sc_dropped); + } + +done: + if (m != ODP_PACKET_INVALID) { + *lsop = NULL; + odp_packet_free(m); + } +} + +static int +syncache_respond(struct syncache *sc) +{ + struct ofp_ip *ip = NULL; + odp_packet_t m; + struct ofp_tcphdr *th = NULL; + int optlen, error = 0; /* Make compiler happy */ + uint16_t hlen, tlen, mssopt; + struct tcpopt to; +#ifdef INET6 + struct ofp_ip6_hdr *ip6 = NULL; +#endif + + hlen = +#ifdef INET6 + (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ofp_ip6_hdr) : +#endif + sizeof(struct ofp_ip); + tlen = hlen + sizeof(struct ofp_tcphdr); + + /* Determine MSS we advertize to other end of connection. */ + mssopt = ofp_tcp_mssopt(&sc->sc_inc); + if (sc->sc_peer_mss) + mssopt = max( min(sc->sc_peer_mss, mssopt), V_tcp_minmss); + + /* Create the IP+TCP header from scratch. */ + m = ofp_packet_alloc(tlen); + + if (m == ODP_PACKET_INVALID) + return OFP_ENOBUFS; + + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, hlen); + +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) { + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + ip6->ofp_ip6_vfc = OFP_IPV6_VERSION; + ip6->ofp_ip6_nxt = OFP_IPPROTO_TCP; + ip6->ip6_src = sc->sc_inc.inc6_laddr; + ip6->ip6_dst = sc->sc_inc.inc6_faddr; + ip6->ofp_ip6_plen = odp_cpu_to_be_16(tlen - hlen); + /* ip6_hlim is set after checksum */ + ip6->ofp_ip6_flow &= ~OFP_IPV6_FLOWLABEL_MASK; + ip6->ofp_ip6_flow |= sc->sc_flowlabel; + + th = (struct ofp_tcphdr *)(ip6 + 1); + } + else +#endif + { + ip = (struct ofp_ip *)odp_packet_data(m); + ip->ip_v = OFP_IPVERSION; + ip->ip_hl = sizeof(struct ofp_ip) >> 2; + ip->ip_len = tlen; + ip->ip_id = 0; + ip->ip_off = 0; + ip->ip_sum = 0; + ip->ip_p = OFP_IPPROTO_TCP; + ip->ip_src = sc->sc_inc.inc_laddr; + ip->ip_dst = sc->sc_inc.inc_faddr; + ip->ip_ttl = sc->sc_ip_ttl; + ip->ip_tos = sc->sc_ip_tos; + + /* + * See if we should do MTU discovery. Route lookups are + * expensive, so we will only unset the DF bit if: + * + * 1) ofp_path_mtu_discovery is disabled + * 2) the SCF_UNREACH flag has been set + */ + if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0)) + ip->ip_off |= OFP_IP_DF; + + th = (struct ofp_tcphdr *)(ip + 1); + } + + th->th_sport = sc->sc_inc.inc_lport; + th->th_dport = sc->sc_inc.inc_fport; + + th->th_seq = odp_cpu_to_be_32(sc->sc_iss); + th->th_ack = odp_cpu_to_be_32(sc->sc_irs + 1); + th->th_off = sizeof(struct ofp_tcphdr) >> 2; + th->th_x2 = 0; + th->th_flags = OFP_TH_SYN|OFP_TH_ACK; + th->th_win = odp_cpu_to_be_16(sc->sc_wnd); + th->th_urp = 0; + + if (sc->sc_flags & SCF_ECN) { + th->th_flags |= OFP_TH_ECE; + TCPSTAT_INC(tcps_ecn_shs); + } + + /* Tack on the TCP options. */ + if ((sc->sc_flags & SCF_NOOPT) == 0) { + to.to_flags = 0; + + to.to_mss = mssopt; + to.to_flags = TOF_MSS; + if (sc->sc_flags & SCF_WINSCALE) { + to.to_wscale = sc->sc_requested_r_scale; + to.to_flags |= TOF_SCALE; + } + if (sc->sc_flags & SCF_TIMESTAMP) { + /* Virgin timestamp or TCP cookie enhanced one. */ + to.to_tsval = sc->sc_ts; + to.to_tsecr = sc->sc_tsreflect; + to.to_flags |= TOF_TS; + } + if (sc->sc_flags & SCF_SACK) + to.to_flags |= TOF_SACKPERM; + optlen = ofp_tcp_addoptions(&to, (uint8_t *)(th + 1)); + /* This is done in wrong order. */ + odp_packet_push_tail(m, optlen); + + /* Adjust headers by option size. */ + th->th_off = (sizeof(struct ofp_tcphdr) + optlen) >> 2; + +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) + ip6->ofp_ip6_plen = odp_cpu_to_be_16(odp_be_to_cpu_16(ip6->ofp_ip6_plen) + optlen); + else +#endif + ip->ip_len += optlen; + } else + optlen = 0; + + //HJo M_SETFIB(m, sc->sc_inc.inc_fibnum); + //odp_packet_csum_data(m) = offsetof(struct ofp_tcphdr, th_sum); +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) { + odp_packet_set_csum_flags(m, CSUM_TCP_IPV6); + th->th_sum = 0; + th->th_sum = ofp_ip6_cksum(m, tlen + optlen - hlen, OFP_IPPROTO_TCP, 0); + ip6->ofp_ip6_hlim = V_ip6_defhlim; /*in6_selecthlim(NULL, NULL);*/ + error = ofp_ip6_output(m, NULL); + } + else +#endif + { + ip->ip_len = odp_cpu_to_be_16(ip->ip_len); + ip->ip_off = odp_cpu_to_be_16(ip->ip_off); + ip->ip_sum = ofp_in_cksum((uint16_t *)ip, sizeof(*ip)); + th->th_sum = 0; + /* th->th_sum = ofp_in4_cksum(m); output calculates csum */ + + error = ofp_ip_output(m, NULL); + } + + return (error); +} + +void +ofp_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct ofp_tcphdr *th, + struct inpcb *inp, struct socket **lsop, odp_packet_t m, int initial_timeout) +{ + _syncache_add(inc, to, th, inp, lsop, m, NULL, NULL, initial_timeout); +} + + +/* + * The purpose of SYN cookies is to avoid keeping track of all SYN's we + * receive and to be able to handle SYN floods from bogus source addresses + * (where we will never receive any reply). SYN floods try to exhaust all + * our memory and available slots in the SYN cache table to cause a denial + * of service to legitimate users of the local host. + * + * The idea of SYN cookies is to encode and include all necessary information + * about the connection setup state within the SYN-ACK we send back and thus + * to get along without keeping any local state until the ACK to the SYN-ACK + * arrives (if ever). Everything we need to know should be available from + * the information we encoded in the SYN-ACK. + * + * More information about the theory behind SYN cookies and its first + * discussion and specification can be found at: + * http://cr.yp.to/syncookies.html (overview) + * http://cr.yp.to/syncookies/archive (gory details) + * + * This implementation extends the orginal idea and first implementation + * of FreeBSD by using not only the initial sequence number field to store + * information but also the timestamp field if present. This way we can + * keep track of the entire state we need to know to recreate the session in + * its original form. Almost all TCP speakers implement RFC1323 timestamps + * these days. For those that do not we still have to live with the known + * shortcomings of the ISN only SYN cookies. + * + * Cookie layers: + * + * Initial sequence number we send: + * 31|................................|0 + * DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP + * D = MD5 Digest (first dword) + * M = MSS index + * R = Rotation of secret + * P = Odd or Even secret + * + * The MD5 Digest is computed with over following parameters: + * a) randomly rotated secret + * b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6) + * c) the received initial sequence number from remote host + * d) the rotation offset and odd/even bit + * + * Timestamp we send: + * 31|................................|0 + * DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5 + * D = MD5 Digest (third dword) (only as filler) + * S = Requested send window scale + * R = Requested receive window scale + * A = SACK allowed + * 5 = TCP-MD5 enabled (not implemented yet) + * XORed with MD5 Digest (forth dword) + * + * The timestamp isn't cryptographically secure and doesn't need to be. + * The double use of the MD5 digest dwords ties it to a specific remote/ + * local host/port, remote initial sequence number and our local time + * limited secret. A received timestamp is reverted (XORed) and then + * the contained MD5 dword is compared to the computed one to ensure the + * timestamp belongs to the SYN-ACK we sent. The other parameters may + * have been tampered with but this isn't different from supplying bogus + * values in the SYN in the first place. + * + * Some problems with SYN cookies remain however: + * Consider the problem of a recreated (and retransmitted) cookie. If the + * original SYN was accepted, the connection is established. The second + * SYN is inflight, and if it arrives with an ISN that falls within the + * receive window, the connection is killed. + * + * Notes: + * A heuristic to determine when to accept syn cookies is not necessary. + * An ACK flood would cause the syncookie verification to be attempted, + * but a SYN flood causes syncookies to be generated. Both are of equal + * cost, so there's no point in trying to optimize the ACK flood case. + * Also, if you don't process certain ACKs for some reason, then all someone + * would have to do is launch a SYN and ACK flood at the same time, which + * would stop cookie verification and defeat the entire purpose of syncookies. + */ +static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 }; + +#define time_uptime (ofp_timer_ticks(0)/(1000000/OFP_TIMER_RESOLUTION_US)) + +static void +syncookie_generate(struct syncache_head *sch, struct syncache *sc, + uint32_t *flowlabel) +{ + MD5_CTX ctx; + u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)]; + uint32_t data; + uint32_t *secbits; + uint32_t off, pmss, mss; + int i; + uint32_t cryptobuffer[SYNCOOKIE_SECRET_SIZE]; + uint8_t *cryptoptr = (uint8_t *)cryptobuffer; + ssize_t cryptolen = sizeof(cryptobuffer); + + (void)flowlabel; + + odp_random_data(cryptoptr, cryptolen, 0); + + SCH_LOCK_ASSERT(sch); + + /* Which of the two secrets to use. */ + secbits = sch->sch_oddeven ? + sch->sch_secbits_odd : sch->sch_secbits_even; + + /* Reseed secret if too old. */ + if (sch->sch_reseed < time_uptime) { + sch->sch_oddeven = sch->sch_oddeven ? 0 : 1; /* toggle */ + secbits = sch->sch_oddeven ? + sch->sch_secbits_odd : sch->sch_secbits_even; + for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++) + secbits[i] = cryptobuffer[i]; + sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME; + } + + /* Secret rotation offset. */ + off = sc->sc_iss & 0x7; /* iss was randomized before */ + + /* Maximum segment size calculation. */ + pmss = + max( min(sc->sc_peer_mss, ofp_tcp_mssopt(&sc->sc_inc)), V_tcp_minmss); + for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--) + if (tcp_sc_msstab[mss] <= (int)pmss) + break; + + /* Fold parameters and MD5 digest into the ISN we will send. */ + data = sch->sch_oddeven;/* odd or even secret, 1 bit */ + data |= off << 1; /* secret offset, derived from iss, 3 bits */ + data |= mss << 4; /* mss, 3 bits */ + ofp_MD5Init(&ctx); + ofp_MD5Update(&ctx, ((u_int8_t *)secbits) + off, + SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off); + ofp_MD5Update(&ctx, secbits, off); + ofp_MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc)); + ofp_MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs)); + ofp_MD5Update(&ctx, &data, sizeof(data)); + ofp_MD5Final((u_int8_t *)&md5_buffer, &ctx); + + data |= (md5_buffer[0] << 7); + sc->sc_iss = data; + +#ifdef INET6 + *flowlabel = md5_buffer[1] & OFP_IPV6_FLOWLABEL_MASK; +#endif + + /* Additional parameters are stored in the timestamp if present. */ + if (sc->sc_flags & SCF_TIMESTAMP) { + data = ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */ + data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */ + data |= sc->sc_requested_s_scale << 2; /* SWIN scale, 4 bits */ + data |= sc->sc_requested_r_scale << 6; /* RWIN scale, 4 bits */ + data |= md5_buffer[2] << 10; /* more digest bits */ + data ^= md5_buffer[3]; + sc->sc_ts = data; + sc->sc_tsoff = data - ofp_timer_ticks(0); /* after XOR */ + } + + TCPSTAT_INC(tcps_sc_sendcookie); +} + +static struct syncache * +syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, + struct syncache *sc, struct tcpopt *to, struct ofp_tcphdr *th, + struct socket *so) +{ + MD5_CTX ctx; + uint32_t md5_buffer[8]; + uint32_t data = 0; + uint32_t *secbits; + tcp_seq ack, seq; + int off, mss, wnd, flags; + + SCH_LOCK_ASSERT(sch); + + /* + * Pull information out of SYN-ACK/ACK and + * revert sequence number advances. + */ + ack = th->th_ack - 1; + seq = th->th_seq - 1; + off = (ack >> 1) & 0x7; + mss = (ack >> 4) & 0x7; + flags = ack & 0x7f; + + /* Which of the two secrets to use. */ + secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even; + + /* + * The secret wasn't updated for the lifetime of a syncookie, + * so this SYN-ACK/ACK is either too old (replay) or totally bogus. + */ + if ((sch->sch_reseed + SYNCOOKIE_LIFETIME) < time_uptime) { + return (NULL); + } + + /* Recompute the digest so we can compare it. */ + ofp_MD5Init(&ctx); + ofp_MD5Update(&ctx, ((u_int8_t *)secbits) + off, + SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off); + ofp_MD5Update(&ctx, secbits, off); + ofp_MD5Update(&ctx, inc, sizeof(*inc)); + ofp_MD5Update(&ctx, &seq, sizeof(seq)); + ofp_MD5Update(&ctx, &flags, sizeof(flags)); + ofp_MD5Final((u_int8_t *)&md5_buffer, &ctx); + + /* Does the digest part of or ACK'ed ISS match? */ + if ((ack & (~0x7f)) != (md5_buffer[0] << 7)) { + return (NULL); + } + + /* Does the digest part of our reflected timestamp match? */ + if (to->to_flags & TOF_TS) { + data = md5_buffer[3] ^ to->to_tsecr; + if ((data & (~0x3ff)) != (md5_buffer[2] << 10)) { + return (NULL); + } + } + + /* Fill in the syncache values. */ + bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); + sc->sc_ipopts = ODP_PACKET_INVALID; + + sc->sc_irs = seq; + sc->sc_iss = ack; + +#ifdef INET6 + if (inc->inc_flags & INC_ISIPV6) { + if (sotoinpcb(so)->inp_flags & IN6P_AUTOFLOWLABEL) + sc->sc_flowlabel = md5_buffer[1] & OFP_IPV6_FLOWLABEL_MASK; + } else +#endif + { + sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl; + sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos; + } + + /* Additional parameters that were encoded in the timestamp. */ + if (data) { + sc->sc_flags |= SCF_TIMESTAMP; + sc->sc_tsreflect = to->to_tsval; + sc->sc_ts = to->to_tsecr; + sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks(); + sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0; + sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0; + sc->sc_requested_s_scale = min((data >> 2) & 0xf, + OFP_TCP_MAX_WINSHIFT); + sc->sc_requested_r_scale = min((data >> 6) & 0xf, + OFP_TCP_MAX_WINSHIFT); + if (sc->sc_requested_s_scale || sc->sc_requested_r_scale) + sc->sc_flags |= SCF_WINSCALE; + } else + sc->sc_flags |= SCF_NOOPT; + + wnd = sbspace(&so->so_rcv); + wnd = imax(wnd, 0); + wnd = imin(wnd, OFP_TCP_MAXWIN); + sc->sc_wnd = wnd; + + sc->sc_rxmits = 0; + sc->sc_peer_mss = tcp_sc_msstab[mss]; + + TCPSTAT_INC(tcps_sc_recvcookie); + return (sc); +} + +/* + * Returns the current number of syncache entries. This number + * will probably change before you get around to calling + * syncache_pcblist. + */ + +int +ofp_syncache_pcbcount(void) +{ + struct syncache_head *sch; + int count, i; + + for (count = 0, i = 0; i < (int)V_tcp_syncache.hashsize; i++) { + /* No need to lock for a read. */ + sch = &V_tcp_syncache.hashbase[i]; + count += sch->sch_length; + } + return count; +} + diff --git a/src/ofp_tcp_timer.c b/src/ofp_tcp_timer.c new file mode 100644 index 00000000..f90da73e --- /dev/null +++ b/src/ofp_tcp_timer.c @@ -0,0 +1,683 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 + */ + +#include + +#include "odp.h" +#include "ofpi_errno.h" +#include "ofpi_protosw.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" +#include "ofpi_sockstate.h" +#include "ofpi_in_pcb.h" +#include "ofpi_in.h" +#include "ofpi_callout.h" +#ifdef INET6 +/*#include "ofpi_in6_pcb.h"*/ +#endif +#include "ofpi_tcp_fsm.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_var.h" +#include "ofpi_tcp.h" +#ifdef TCPDEBUG +#include +#endif + +int ofp_tcp_keepinit; +OFP_SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); + +int ofp_tcp_keepidle; +OFP_SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); + +int ofp_tcp_keepintvl; +OFP_SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); + +int ofp_tcp_delacktime; +OFP_SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_delacktime, 0, sysctl_msec_to_ticks, "I", + "Time before a delayed ACK is sent"); + +int ofp_tcp_msl; +OFP_SYSCTL_PROC(_net_inet_tcp, OFP_OID_AUTO, msl, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); + +int ofp_tcp_rexmit_min; +OFP_SYSCTL_PROC(_net_inet_tcp, OFP_OID_AUTO, rexmit_min, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", + "Minimum Retransmission Timeout"); + +int ofp_tcp_rexmit_slop; +OFP_SYSCTL_PROC(_net_inet_tcp, OFP_OID_AUTO, rexmit_slop, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", + "Retransmission Timer Slop"); + +static int always_keepalive = 1; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, always_keepalive, OFP_CTLFLAG_RW, + &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); + +int ofp_tcp_fast_finwait2_recycle = 0; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, fast_finwait2_recycle, OFP_CTLFLAG_RW, + &ofp_tcp_fast_finwait2_recycle, 0, + "Recycle closed FIN_WAIT_2 connections faster"); + +int ofp_tcp_finwait2_timeout; +OFP_SYSCTL_PROC(_net_inet_tcp, OFP_OID_AUTO, finwait2_timeout, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &ofp_tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); + +int ofp_tcp_keepcnt = TCPTV_KEEPCNT; +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, keepcnt, OFP_CTLFLAG_RW, &ofp_tcp_keepcnt, 0, + "Number of keepalive probes to send"); + + /* max idle probes */ +int ofp_tcp_maxpersistidle; + + +//static int per_cpu_timers = 0; + +#if 0 /* HJo */ +#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ + ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) +#else +#define INP_CPU(inp) 0 +#endif + +/* + * Tcp protocol timeout routine called every 500 ms. + * Updates timestamps used for TCP + * causes finite state machine actions if timers expire. + */ +void +ofp_tcp_slowtimo(void *notused) +{ + (void)notused; + INP_INFO_WLOCK(&V_tcbinfo); + (void) ofp_tcp_tw_2msl_scan(0); + INP_INFO_WUNLOCK(&V_tcbinfo); + ofp_timer_start(500000, ofp_tcp_slowtimo, NULL, 0); +} + +int ofp_tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; + +int ofp_tcp_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; + +static int tcp_totbackoff = 2559; /* sum of ofp_tcp_backoff[] */ + +static int tcp_timer_race; + +/* + * TCP timer processing. + */ + +void +ofp_tcp_timer_delack(void *xtp) +{ + struct tcpcb *tp = xtp; + struct inpcb *inp; + + if (tp->t_timers) + tp->t_timers->tt_delack.odptmo = ODP_TIMER_INVALID; + + inp = tp->t_inpcb; + /* + * XXXRW: While this assert is in fact correct, bugs in the tcpcb + * tear-down mean we need it as a work-around for races between + * timers and ofp_tcp_discardcb(). + * + * KASSERT(inp != NULL, ("ofp_tcp_timer_delack: inp == NULL")); + */ + if (inp == NULL) { + tcp_timer_race++; + return; + } + INP_WLOCK(inp); + if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack) + || !callout_active(&tp->t_timers->tt_delack)) { + INP_WUNLOCK(inp); + return; + } + callout_deactivate(&tp->t_timers->tt_delack); + + t_flags_or(tp->t_flags, TF_ACKNOW); + TCPSTAT_INC(tcps_delack); + (void) ofp_tcp_output(tp); + INP_WUNLOCK(inp); +} + +void +ofp_tcp_timer_2msl(void *xtp) +{ + struct tcpcb *tp = xtp; + struct inpcb *inp; +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif + /* + * XXXRW: Does this actually happen? + */ + INP_INFO_WLOCK(&V_tcbinfo); + inp = tp->t_inpcb; + /* + * XXXRW: While this assert is in fact correct, bugs in the tcpcb + * tear-down mean we need it as a work-around for races between + * timers and ofp_tcp_discardcb(). + * + * KASSERT(inp != NULL, ("ofp_tcp_timer_2msl: inp == NULL")); + */ + if (inp == NULL) { + tcp_timer_race++; + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + INP_WLOCK(inp); + ofp_tcp_free_sackholes(tp); + if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) || + !callout_active(&tp->t_timers->tt_2msl)) { + INP_WUNLOCK(tp->t_inpcb); + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + callout_deactivate(&tp->t_timers->tt_2msl); + /* + * 2 MSL timeout in shutdown went off. If we're closed but + * still waiting for peer to close and connection has been idle + * too long, or if 2MSL time is up from TIME_WAIT, delete connection + * control block. Otherwise, check again in a bit. + * + * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, + * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. + * Ignore fact that there were recent incoming segments. + */ + if (ofp_tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && + tp->t_inpcb && tp->t_inpcb->inp_socket && + (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { + TCPSTAT_INC(tcps_finwait2_drops); + tp = ofp_tcp_close(tp); + } else { + if (tp->t_state != TCPS_TIME_WAIT && + (int)(ofp_timer_ticks(0) - tp->t_rcvtime) <= TP_MAXIDLE(tp)) + callout_reset_on(&tp->t_timers->tt_2msl, + TP_KEEPINTVL(tp), ofp_tcp_timer_2msl, tp, INP_CPU(inp)); + else + tp = ofp_tcp_close(tp); + } + +#ifdef TCPDEBUG + if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & OFP_SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct ofp_tcphdr *)0, + OFP_PRU_SLOWTIMO); +#endif + if (tp != NULL) + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); +} + +void +ofp_tcp_timer_keep(void *xtp) +{ + struct tcpcb *tp = xtp; + struct tcptemp *t_template; + struct inpcb *inp; +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif + INP_INFO_WLOCK(&V_tcbinfo); + inp = tp->t_inpcb; + /* + * XXXRW: While this assert is in fact correct, bugs in the tcpcb + * tear-down mean we need it as a work-around for races between + * timers and ofp_tcp_discardcb(). + * + * KASSERT(inp != NULL, ("ofp_tcp_timer_keep: inp == NULL")); + */ + if (inp == NULL) { + tcp_timer_race++; + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + INP_WLOCK(inp); + if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep) + || !callout_active(&tp->t_timers->tt_keep)) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + callout_deactivate(&tp->t_timers->tt_keep); + /* + * Keep-alive timer went off; send something + * or drop connection if idle for too long. + */ + TCPSTAT_INC(tcps_keeptimeo); + if (tp->t_state < TCPS_ESTABLISHED) + goto dropit; + if ((always_keepalive || inp->inp_socket->so_options & OFP_SO_KEEPALIVE) && + tp->t_state <= TCPS_CLOSING) { + if ((int)(ofp_timer_ticks(0) - tp->t_rcvtime) >= + TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) + goto dropit; + /* + * Send a packet designed to force a response + * if the peer is up and reachable: + * either an ACK if the connection is still alive, + * or an RST if the peer has closed the connection + * due to timeout or reboot. + * Using sequence number tp->snd_una-1 + * causes the transmitted zero-length segment + * to lie outside the receive window; + * by the protocol spec, this requires the + * correspondent TCP to respond. + */ + TCPSTAT_INC(tcps_keepprobe); + t_template = ofp_tcpip_maketemplate(inp); + if (t_template) { + ofp_tcp_respond(tp, t_template->tt_ipgen, + &t_template->tt_t, + (odp_packet_t )ODP_PACKET_INVALID, + tp->rcv_nxt, tp->snd_una - 1, 0); + free(t_template); + } + callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), + ofp_tcp_timer_keep, tp, INP_CPU(inp)); + } else + callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), + ofp_tcp_timer_keep, tp, INP_CPU(inp)); + +#ifdef TCPDEBUG + if (inp->inp_socket->so_options & OFP_SO_DEBUG) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct ofp_tcphdr *)0, + OFP_PRU_SLOWTIMO); +#endif + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + +dropit: + TCPSTAT_INC(tcps_keepdrops); + tp = ofp_tcp_drop(tp, OFP_ETIMEDOUT); + +#ifdef TCPDEBUG + if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & OFP_SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct ofp_tcphdr *)0, + OFP_PRU_SLOWTIMO); +#endif + if (tp != NULL) + INP_WUNLOCK(tp->t_inpcb); + INP_INFO_WUNLOCK(&V_tcbinfo); +} + +void +ofp_tcp_timer_persist(void *xtp) +{ + struct tcpcb *tp = xtp; + struct inpcb *inp; +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif + INP_INFO_WLOCK(&V_tcbinfo); + inp = tp->t_inpcb; + /* + * XXXRW: While this assert is in fact correct, bugs in the tcpcb + * tear-down mean we need it as a work-around for races between + * timers and ofp_tcp_discardcb(). + * + * KASSERT(inp != NULL, ("ofp_tcp_timer_persist: inp == NULL")); + */ + if (inp == NULL) { + tcp_timer_race++; + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + INP_WLOCK(inp); + if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist) + || !callout_active(&tp->t_timers->tt_persist)) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + callout_deactivate(&tp->t_timers->tt_persist); + /* + * Persistance timer into zero window. + * Force a byte to be output, if possible. + */ + TCPSTAT_INC(tcps_persisttimeo); + /* + * Hack: if the peer is dead/unreachable, we do not + * time out if the window is closed. After a full + * backoff, drop the connection if the idle time + * (no responses to probes) reaches the maximum + * backoff that we would use if retransmitting. + */ + if (tp->t_rxtshift == TCP_MAXRXTSHIFT && + ((int)(ofp_timer_ticks(0) - tp->t_rcvtime) >= ofp_tcp_maxpersistidle || + ofp_timer_ticks(0) - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { + TCPSTAT_INC(tcps_persistdrop); + tp = ofp_tcp_drop(tp, OFP_ETIMEDOUT); + goto out; + } + ofp_tcp_setpersist(tp); + t_flags_or(tp->t_flags, TF_FORCEDATA); + (void) ofp_tcp_output(tp); + t_flags_and(tp->t_flags, ~TF_FORCEDATA); + +out: +#ifdef TCPDEBUG + if (tp != NULL && tp->t_inpcb->inp_socket->so_options & OFP_SO_DEBUG) + tcp_trace(TA_USER, ostate, tp, NULL, NULL, OFP_PRU_SLOWTIMO); +#endif + if (tp != NULL) + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); +} + +void +ofp_tcp_timer_rexmt(void * xtp) +{ + struct tcpcb *tp = xtp; + int rexmt; + int headlocked; + struct inpcb *inp; +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif + INP_INFO_RLOCK(&V_tcbinfo); + inp = tp->t_inpcb; + /* + * XXXRW: While this assert is in fact correct, bugs in the tcpcb + * tear-down mean we need it as a work-around for races between + * timers and ofp_tcp_discardcb(). + * + * KASSERT(inp != NULL, ("ofp_tcp_timer_rexmt: inp == NULL")); + */ + if (inp == NULL) { + tcp_timer_race++; + INP_INFO_RUNLOCK(&V_tcbinfo); + return; + } + INP_WLOCK(inp); + if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt) + || !callout_active(&tp->t_timers->tt_rexmt)) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + return; + } + callout_deactivate(&tp->t_timers->tt_rexmt); + ofp_tcp_free_sackholes(tp); + /* + * Retransmission timer went off. Message has not + * been acked within retransmit interval. Back off + * to a longer retransmit interval and retransmit one segment. + */ + if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { + tp->t_rxtshift = TCP_MAXRXTSHIFT; + TCPSTAT_INC(tcps_timeoutdrop); + ofp_in_pcbref(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + INP_WUNLOCK(inp); + INP_INFO_WLOCK(&V_tcbinfo); + INP_WLOCK(inp); + if (ofp_in_pcbrele_wlocked(inp)) { + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + if (inp->inp_flags & INP_DROPPED) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return; + } + + tp = ofp_tcp_drop(tp, tp->t_softerror ? + tp->t_softerror : OFP_ETIMEDOUT); + headlocked = 1; + goto out; + } + INP_INFO_RUNLOCK(&V_tcbinfo); + headlocked = 0; + if (tp->t_rxtshift == 1) { + /* + * first retransmit; record ssthresh and cwnd so they can + * be recovered if this turns out to be a "bad" retransmit. + * A retransmit is considered "bad" if an ACK for this + * segment is received within RTT/2 interval; the assumption + * here is that the ACK was already in flight. See + * "On Estimating End-to-End Network Path Properties" by + * Allman and Paxson for more details. + */ + tp->snd_cwnd_prev = tp->snd_cwnd; + tp->snd_ssthresh_prev = tp->snd_ssthresh; + tp->snd_recover_prev = tp->snd_recover; + if (IN_FASTRECOVERY(tp->t_flags)) + t_flags_or(tp->t_flags, TF_WASFRECOVERY); + else + t_flags_and(tp->t_flags, ~TF_WASFRECOVERY); + if (IN_CONGRECOVERY(tp->t_flags)) + t_flags_or(tp->t_flags, TF_WASCRECOVERY); + else + t_flags_and(tp->t_flags, ~TF_WASCRECOVERY); + tp->t_badrxtwin = ofp_timer_ticks(0) + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + t_flags_or(tp->t_flags, TF_PREVVALID); + } else + t_flags_and(tp->t_flags, ~TF_PREVVALID); + TCPSTAT_INC(tcps_rexmttimeo); + if (tp->t_state == TCPS_SYN_SENT) + rexmt = TCP_REXMTVAL(tp) * ofp_tcp_syn_backoff[tp->t_rxtshift]; + else + rexmt = TCP_REXMTVAL(tp) * ofp_tcp_backoff[tp->t_rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, + tp->t_rttmin, TCPTV_REXMTMAX); + /* + * Disable rfc1323 if we haven't got any response to + * our third SYN to work-around some broken terminal servers + * (most of which have hopefully been retired) that have bad VJ + * header compression code which trashes TCP segments containing + * unknown-to-them TCP options. + */ + if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) + t_flags_and(tp->t_flags, ~(TF_REQ_SCALE|TF_REQ_TSTMP)); + /* + * If we backed off this far, our srtt estimate is probably bogus. + * Clobber it so we'll take the next rtt measurement as our srtt; + * move the current srtt into rttvar to keep the current + * retransmit times until then. + */ + if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { + tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); + tp->t_srtt = 0; + } + tp->snd_nxt = tp->snd_una; + tp->snd_recover = tp->snd_max; + /* + * Force a segment to be sent. + */ + t_flags_or(tp->t_flags, TF_ACKNOW); + /* + * If timing a segment in this window, stop the timer. + */ + tp->t_rtttime = 0; + /* HJo: FIX + ofp_cc_cong_signal(tp, NULL, CC_RTO); + */ + (void) ofp_tcp_output(tp); + +out: +#ifdef TCPDEBUG + if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & OFP_SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct ofp_tcphdr *)0, + OFP_PRU_SLOWTIMO); +#endif + if (tp != NULL) + INP_WUNLOCK(inp); + if (headlocked) + INP_INFO_WUNLOCK(&V_tcbinfo); +} + +#ifdef PASSIVE_INET +void +tcp_timer_reassdl(void *xtp) +{ + struct tcpcb *tp = xtp; + struct inpcb *inp; + + inp = tp->t_inpcb; + /* + * XXXRW: While this assert is in fact correct, bugs in the tcpcb + * tear-down mean we need it as a work-around for races between + * timers and ofp_tcp_discardcb(). + * + * KASSERT(inp != NULL, ("tcp_timer_reassdl: inp == NULL")); + */ + if (inp == NULL) { + tcp_timer_race++; + return; + } + INP_WLOCK(inp); + if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_reassdl) + || !callout_active(&tp->t_timers->tt_reassdl)) { + INP_WUNLOCK(inp); + return; + } + callout_deactivate(&tp->t_timers->tt_reassdl); + + tcp_reass_deliver_holes(tp); + + INP_WUNLOCK(inp); +} +#endif /* PASSIVE_INET */ + +void +ofp_tcp_timer_activate(struct tcpcb *tp, int timer_type, uint32_t delta) +{ + struct callout *t_callout = NULL; + void *f_callout = NULL; + struct inpcb *inp = tp->t_inpcb; + int cpu = INP_CPU(inp); + (void)cpu; + (void)inp; + + switch (timer_type) { + case TT_DELACK: + t_callout = &tp->t_timers->tt_delack; + f_callout = ofp_tcp_timer_delack; + break; + case TT_REXMT: + t_callout = &tp->t_timers->tt_rexmt; + f_callout = ofp_tcp_timer_rexmt; + break; + case TT_PERSIST: + t_callout = &tp->t_timers->tt_persist; + f_callout = ofp_tcp_timer_persist; + break; + case TT_KEEP: + if (delta > 6000) delta = 6000; + t_callout = &tp->t_timers->tt_keep; + f_callout = ofp_tcp_timer_keep; + break; + case TT_2MSL: + t_callout = &tp->t_timers->tt_2msl; + f_callout = ofp_tcp_timer_2msl; + break; + default: + panic("bad timer_type"); + } + if (delta == 0) { + callout_stop(t_callout); + } else { + callout_reset_on(t_callout, delta, f_callout, tp, cpu); + } +} + +int +ofp_tcp_timer_active(struct tcpcb *tp, int timer_type) +{ + struct callout *t_callout = NULL; + + switch (timer_type) { + case TT_DELACK: + t_callout = &tp->t_timers->tt_delack; + break; + case TT_REXMT: + t_callout = &tp->t_timers->tt_rexmt; + break; + case TT_PERSIST: + t_callout = &tp->t_timers->tt_persist; + break; + case TT_KEEP: + t_callout = &tp->t_timers->tt_keep; + break; + case TT_2MSL: + t_callout = &tp->t_timers->tt_2msl; + break; +#ifdef PASSIVE_INET + case TT_REASSDL: + t_callout = &tp->t_timers->tt_reassdl; + break; +#endif + default: + panic("bad timer_type"); + } + return callout_active(t_callout); +} + +#define ticks_to_msecs(t) (OFP_TIMER_RESOLUTION_US/1000*(t)) +#if 0 +void +tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer) +{ + bzero(xtimer, sizeof(struct xtcp_timer)); + if (timer == NULL) + return; + if (callout_active(&timer->tt_delack)) + xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks); + if (callout_active(&timer->tt_rexmt)) + xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks); + if (callout_active(&timer->tt_persist)) + xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks); + if (callout_active(&timer->tt_keep)) + xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks); + if (callout_active(&timer->tt_2msl)) + xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks); + xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); +} +#endif diff --git a/src/ofp_tcp_timewait.c b/src/ofp_tcp_timewait.c new file mode 100644 index 00000000..467fb459 --- /dev/null +++ b/src/ofp_tcp_timewait.c @@ -0,0 +1,554 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 + */ + +#include "odp.h" + +#include "ofpi_pkt_processing.h" +#include "ofpi_errno.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" +#include "ofpi_sockstate.h" +#include "ofpi_protosw.h" +#include "ofpi_in.h" +#include "ofpi_in_pcb.h" +#include "ofpi_ip.h" +#include "ofpi_tcp.h" +#include "ofpi_tcp_fsm.h" +#include "ofpi_tcp_seq.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_var.h" +#ifdef INET6 +#include "ofpi_ip6.h" +#include "ofpi_ip6_var.h" +#include "ofpi_tcp6_var.h" +#endif /*INET6*/ +#ifdef TCPDEBUG +#include +#endif + +static VNET_DEFINE(uma_zone_t, tcptw_zone); +#define V_tcptw_zone VNET(tcptw_zone) +static int maxtcptw; + +/* + * The timed wait queue contains references to each of the TCP sessions + * currently in the TIME_WAIT state. The queue pointers, including the + * queue pointers in each tcptw structure, are protected using the global + * ofp_tcbinfo lock, which must be held over queue iteration and modification. + */ +static VNET_DEFINE(OFP_TAILQ_HEAD(, tcptw), twq_2msl); +#define V_twq_2msl VNET(twq_2msl) + +static void tcp_tw_2msl_reset(struct tcptw *, int); +static void tcp_tw_2msl_stop(struct tcptw *); + +static int +tcptw_auto_size(void) +{ + int halfrange; + + /* + * Max out at half the ephemeral port range so that TIME_WAIT + * sockets don't tie up too many ephemeral ports. + */ + if (V_ipport_lastauto > V_ipport_firstauto) + halfrange = (V_ipport_lastauto - V_ipport_firstauto) / 2; + else + halfrange = (V_ipport_firstauto - V_ipport_lastauto) / 2; + /* Protect against goofy port ranges smaller than 32. */ + return (imin(imax(halfrange, 32), maxsockets / 5)); +} + +static int +sysctl_maxtcptw(OFP_SYSCTL_HANDLER_ARGS) +{ + int error, new; + (void)arg1; + (void)arg2; + + if (maxtcptw == 0) + new = tcptw_auto_size(); + else + new = maxtcptw; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) + if (new >= 32) { + maxtcptw = new; + uma_zone_set_max(V_tcptw_zone, maxtcptw); + } + return (error); +} + +OFP_SYSCTL_PROC(_net_inet_tcp, OFP_OID_AUTO, maxtcptw, OFP_CTLTYPE_INT|OFP_CTLFLAG_RW, + &maxtcptw, 0, sysctl_maxtcptw, "IU", + "Maximum number of compressed TCP TIME_WAIT entries"); + +VNET_DEFINE(int, ofp_nolocaltimewait) = 0; +#define V_nolocaltimewait VNET(ofp_nolocaltimewait) +OFP_SYSCTL_INT(_net_inet_tcp, OFP_OID_AUTO, nolocaltimewait, OFP_CTLFLAG_RW, + &ofp_nolocaltimewait, 0, + "Do not create compressed TCP TIME_WAIT entries for local connections"); + +void +ofp_tcp_tw_zone_change(void) +{ + /* HJo + if (maxtcptw == 0) + uma_zone_set_max(V_tcptw_zone, tcptw_auto_size()); + */ +} + +void +ofp_tcp_tw_init(void) +{ + V_tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + /* TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw); */ + if (maxtcptw == 0) + uma_zone_set_max(V_tcptw_zone, tcptw_auto_size()); + else + uma_zone_set_max(V_tcptw_zone, maxtcptw); + + OFP_TAILQ_INIT(&V_twq_2msl); +} + + +/* + * Move a TCP connection into TIME_WAIT state. + * ofp_tcbinfo is locked. + * inp is locked, and is unlocked before returning. + */ +void +ofp_tcp_twstart(struct tcpcb *tp) +{ + struct tcptw *tw; + struct inpcb *inp = tp->t_inpcb; + int acknow; + struct socket *so; +#ifdef INET6 + int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6; +#endif + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* tcp_tw_2msl_reset(). */ + INP_WLOCK_ASSERT(inp); + + if (V_nolocaltimewait) { + int error = 0; +#ifdef INET6 + if (isipv6) + error = 0 /*in6_localaddr(&inp->in6p_faddr)*/; + else +#endif + error = 0 /* HJo: FIX in_localip(inp->inp_faddr)*/; + + if (error) { + tp = ofp_tcp_close(tp); + if (tp != NULL) + INP_WUNLOCK(inp); + return; + } + } + + tw = uma_zalloc(V_tcptw_zone, M_NOWAIT); + if (tw == NULL) { + tw = ofp_tcp_tw_2msl_scan(1); + if (tw == NULL) { + tp = ofp_tcp_close(tp); + if (tp != NULL) + INP_WUNLOCK(inp); + return; + } + } + tw->tw_inpcb = inp; + + /* + * Recover last window size sent. + */ + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; + else + tw->last_win = 0; + + /* + * Set t_recent if timestamps are used on the connection. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == + (TF_REQ_TSTMP|TF_RCVD_TSTMP)) { + tw->t_recent = tp->ts_recent; + tw->ts_offset = tp->ts_offset; + } else { + tw->t_recent = 0; + tw->ts_offset = 0; + } + + tw->snd_nxt = tp->snd_nxt; + tw->rcv_nxt = tp->rcv_nxt; + tw->iss = tp->iss; + tw->irs = tp->irs; + tw->t_starttime = tp->t_starttime; + tw->tw_time = 0; + +/* XXX + * If this code will + * be used for fin-wait-2 state also, then we may need + * a ts_recent from the last segment. + */ + acknow = tp->t_flags & TF_ACKNOW; + + /* + * First, discard tcpcb state, which includes stopping its timers and + * freeing it. ofp_tcp_discardcb() used to also release the inpcb, but + * that work is now done in the caller. + * + * Note: ofp_soisdisconnected() call used to be made in ofp_tcp_discardcb(), + * and might not be needed here any longer. + */ + ofp_tcp_discardcb(tp); + so = inp->inp_socket; + ofp_soisdisconnected(so); + /* HJo tw->tw_cred = crhold(so->so_cred);*/ + OFP_SOCK_LOCK(so); + tw->tw_so_options = so->so_options; + OFP_SOCK_UNLOCK(so); + if (acknow) + ofp_tcp_twrespond(tw, OFP_TH_ACK); + inp->inp_ppcb = tw; + inp->inp_flags |= INP_TIMEWAIT; + tcp_tw_2msl_reset(tw, 0); + + /* + * If the inpcb owns the sole reference to the socket, then we can + * detach and free the socket as it is not needed in time wait. + */ + if (inp->inp_flags & INP_SOCKREF) { + KASSERT(so->so_state & SS_PROTOREF, + ("ofp_tcp_twstart: !SS_PROTOREF")); + inp->inp_flags &= ~INP_SOCKREF; + INP_WUNLOCK(inp); + ACCEPT_LOCK(); + OFP_SOCK_LOCK(so); + so->so_state &= ~SS_PROTOREF; + ofp_sofree(so); + } else + INP_WUNLOCK(inp); +} + + +/* + * Returns 1 if the TIME_WAIT state was killed and we should start over, + * looking for a pcb in the listen state. Returns 0 otherwise. + */ +int +ofp_tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct ofp_tcphdr *th, + odp_packet_t m, int tlen) +{ + struct tcptw *tw; + int thflags; + tcp_seq seq; + (void)to; + + /* ofp_tcbinfo lock required for ofp_tcp_twclose(), tcp_tw_2msl_reset(). */ + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); + + /* + * XXXRW: Time wait state for inpcb has been recycled, but inpcb is + * still present. This is undesirable, but temporarily necessary + * until we work out how to handle inpcb's who's timewait state has + * been removed. + */ + tw = intotw(inp); + if (tw == NULL) + goto drop; + + thflags = th->th_flags; + + /* + * NOTE: for FIN_WAIT_2 (to be added later), + * must validate sequence number before accepting RST + */ + + /* + * If the segment contains RST: + * Drop the segment - see Stevens, vol. 2, p. 964 and + * RFC 1337. + */ + if (thflags & OFP_TH_RST) + goto drop; + + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if ((thflags & OFP_TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) { + ofp_tcp_twclose(tw, 0); + return (1); + } + + /* + * Drop the segment if it does not contain an ACK. + */ + if ((thflags & OFP_TH_ACK) == 0) + goto drop; + + /* + * Reset the 2MSL timer if this is a duplicate FIN. + */ + if (thflags & OFP_TH_FIN) { + seq = th->th_seq + tlen + (thflags & OFP_TH_SYN ? 1 : 0); + if (seq + 1 == tw->rcv_nxt) + tcp_tw_2msl_reset(tw, 1); + } + + /* + * Acknowledge segments with control flags and no data. + */ + if (thflags != OFP_TH_ACK || tlen == 0 || + th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt) + ofp_tcp_twrespond(tw, OFP_TH_ACK); +drop: + INP_WUNLOCK(inp); + odp_packet_free(m); + return (0); +} + +void +ofp_tcp_twclose(struct tcptw *tw, int reuse) +{ + struct socket *so; + struct inpcb *inp; + + /* + * At this point, we are in one of two situations: + * + * (1) We have no socket, just an inpcb<->twtcp pair. We can free + * all state. + * + * (2) We have a socket -- if we own a reference, release it and + * notify the socket layer. + */ + inp = tw->tw_inpcb; + KASSERT((inp->inp_flags & INP_TIMEWAIT), ("ofp_tcp_twclose: !timewait")); + KASSERT(intotw(inp) == tw, ("ofp_tcp_twclose: inp_ppcb != tw")); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* tcp_tw_2msl_stop(). */ + INP_WLOCK_ASSERT(inp); + + tw->tw_inpcb = NULL; + tcp_tw_2msl_stop(tw); + inp->inp_ppcb = NULL; + ofp_in_pcbdrop(inp); + + so = inp->inp_socket; + if (so != NULL) { + /* + * If there's a socket, handle two cases: first, we own a + * strong reference, which we will now release, or we don't + * in which case another reference exists (XXXRW: think + * about this more), and we don't need to take action. + */ + if (inp->inp_flags & INP_SOCKREF) { + inp->inp_flags &= ~INP_SOCKREF; + INP_WUNLOCK(inp); + ACCEPT_LOCK(); + OFP_SOCK_LOCK(so); + KASSERT(so->so_state & SS_PROTOREF, + ("ofp_tcp_twclose: INP_SOCKREF && !SS_PROTOREF")); + so->so_state &= ~SS_PROTOREF; + ofp_sofree(so); + } else { + /* + * If we don't own the only reference, the socket and + * inpcb need to be left around to be handled by + * tcp_usr_detach() later. + */ + INP_WUNLOCK(inp); + } + } else + ofp_in_pcbfree(inp); + TCPSTAT_INC(tcps_closed); + /* crfree(tw->tw_cred);*/ + tw->tw_cred = NULL; + if (reuse) + return; + uma_zfree(V_tcptw_zone, tw); +} + +int +ofp_tcp_twrespond(struct tcptw *tw, int flags) +{ + struct inpcb *inp = tw->tw_inpcb; + struct ofp_tcphdr *th = NULL; + + odp_packet_t m; + struct ofp_ip *ip = NULL; + uint32_t hdrlen, optlen; + int error = 0; /* Keep compiler happy */ + struct tcpopt to; +#ifdef INET6 + struct ofp_ip6_hdr *ip6 = NULL; + int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6; +#endif + + INP_WLOCK_ASSERT(inp); + +#ifdef INET6 + if (isipv6) { + hdrlen = sizeof(struct ofp_ip6_hdr) + sizeof(struct ofp_tcphdr); + + m = ofp_packet_alloc(hdrlen); + + if (m == ODP_PACKET_INVALID) + return (OFP_ENOBUFS); + + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, sizeof(struct ofp_ip6_hdr)); + + ip6 = (struct ofp_ip6_hdr *)odp_packet_data(m); + th = (struct ofp_tcphdr *)(ip6 + 1); + ofp_tcpip_fillheaders(inp, ip6, th); + } + else +#endif + { + hdrlen = sizeof(struct tcpiphdr); + + m = ofp_packet_alloc(hdrlen); + + if (m == ODP_PACKET_INVALID) + return (OFP_ENOBUFS); + + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, sizeof(struct ofp_ip)); + + ip = (struct ofp_ip *)odp_packet_data(m); + th = (struct ofp_tcphdr *)(ip + 1); + ofp_tcpip_fillheaders(inp, ip, th); + } + + to.to_flags = 0; + + /* + * Send a timestamp and echo-reply if both our side and our peer + * have sent timestamps in our SYN's and this is not a RST. + */ + if (tw->t_recent && flags == OFP_TH_ACK) { + to.to_flags |= TOF_TS; + to.to_tsval = tcp_ts_getticks() + tw->ts_offset; + to.to_tsecr = tw->t_recent; + } + optlen = ofp_tcp_addoptions(&to, (uint8_t *)(th + 1)); + /* This is done in wrong order. */ + odp_packet_push_tail(m, optlen); + + th->th_seq = odp_cpu_to_be_32(tw->snd_nxt); + th->th_ack = odp_cpu_to_be_32(tw->rcv_nxt); + th->th_off = (sizeof(struct ofp_tcphdr) + optlen) >> 2; + th->th_flags = flags; + th->th_win = odp_cpu_to_be_16(tw->last_win); + +#ifdef INET6 + if (isipv6) { + odp_packet_set_csum_flags(m, CSUM_TCP_IPV6); + th->th_sum = 0; + th->th_sum = ofp_ip6_cksum(m, + sizeof(struct ofp_tcphdr) + optlen, OFP_IPPROTO_TCP, 0); + ip6->ofp_ip6_hlim = V_ip6_defhlim;/*in6_selecthlim(inp, NULL);*/ + ip6->ofp_ip6_plen = odp_cpu_to_be_16(odp_packet_len(m) - + sizeof (struct ofp_ip6_hdr)); + + error = ofp_ip6_output(m, NULL); + } + else +#endif + { + // HJo odp_packet_csum_flags(m) = CSUM_TCP; + ip->ip_len = odp_cpu_to_be_16(odp_packet_len(m)); + if (V_path_mtu_discovery) + ip->ip_off |= OFP_IP_DF; + + ip->ip_off = odp_cpu_to_be_16(ip->ip_off); + ip->ip_sum = ofp_in_cksum((uint16_t *)ip, sizeof(*ip)); + th->th_sum = 0; + /* th->th_sum = ofp_in4_cksum(m); output calculates csum */ + + error = ofp_ip_output(m, NULL); + } + + if (flags & OFP_TH_ACK) + TCPSTAT_INC(tcps_sndacks); + else + TCPSTAT_INC(tcps_sndctrl); + TCPSTAT_INC(tcps_sndtotal); + return (error); +} + +static void +tcp_tw_2msl_reset(struct tcptw *tw, int rearm) +{ + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(tw->tw_inpcb); + if (rearm) + OFP_TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl); + tw->tw_time = ticks + 2 * ofp_tcp_msl; + OFP_TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl); +} + +static void +tcp_tw_2msl_stop(struct tcptw *tw) +{ + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + OFP_TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl); +} + +struct tcptw * +ofp_tcp_tw_2msl_scan(int reuse) +{ + struct tcptw *tw; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + for (;;) { + tw = OFP_TAILQ_FIRST(&V_twq_2msl); + if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) + break; + INP_WLOCK(tw->tw_inpcb); + ofp_tcp_twclose(tw, reuse); + if (reuse) + return (tw); + } + return (NULL); +} diff --git a/src/ofp_tcp_usrreq.c b/src/ofp_tcp_usrreq.c new file mode 100644 index 00000000..f1ba6bd9 --- /dev/null +++ b/src/ofp_tcp_usrreq.c @@ -0,0 +1,2103 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. + * Copyright (c) 2006-2007 Robert N. M. Watson + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * Copyright (c) 2015 Nokia Solutions and Networks + * Copyright (c) 2015 Enea Software AB + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 + */ +#include +#include + +#include "odp.h" + +#include "ofpi_errno.h" +#include "ofpi_in.h" +#include "ofpi_ip.h" +#include "ofpi_ip6.h" +#include "ofpi_ip6_var.h" +#include "ofpi_udp.h" +#include "ofpi_icmp.h" +#include "ofpi_sysctl.h" +#include "ofpi_socketvar.h" +//#include "ofpi_socket.h" + +//#include "ofp_packet.h" + +#include "ofpi_in_pcb.h" +#include "ofpi_tcp.h" +#include "ofpi_tcp_var.h" +#include "ofpi_socketvar.h" +#include "ofpi_ip_var.h" +#include "ofpi_sockbuf.h" +#include "ofpi_socket.h" +#include "ofpi_sockstate.h" +#include "ofpi_protosw.h" +#include "ofpi_ethernet.h" +#include "ofpi_tcp_timer.h" +#include "ofpi_tcp_fsm.h" +#include "ofpi_tcp_offload.h" +#include "ofpi_ioctl.h" + +#ifdef INET6 +#include "ofpi_in6_pcb.h" +#endif + +#include "ofpi_util.h" + +#define tick (1000000/HZ) + +/* + * Macros to initialize tcp sequence numbers for + * send and receive from initial send and receive + * sequence numbers. + */ +#define tcp_rcvseqinit(tp) \ + (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 + +#define tcp_sendseqinit(tp) \ + (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \ + (tp)->snd_recover = (tp)->iss + + +/* + * TCP protocol interface to socket abstraction. + */ +static int tcp_attach(struct socket *); +#ifdef INET +static int tcp_connect(struct tcpcb *, struct ofp_sockaddr *, + struct thread *td); +#endif /* INET */ +#ifdef INET6 +static int tcp6_connect(struct tcpcb *, struct ofp_sockaddr *, + struct thread *td); +#endif /* INET6 */ +static void tcp_disconnect(struct tcpcb *); +static void tcp_usrclosed(struct tcpcb *); +/*static void tcp_fill_info(struct tcpcb *, struct tcp_info *);*/ + +#ifdef TCPDEBUG +#define TCPDEBUG0 int ostate = 0 +#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 +#define TCPDEBUG2(req) if (tp && (so->so_options & OFP_SO_DEBUG)) \ + tcp_trace(TA_USER, ostate, tp, 0, 0, req) +#else +#define TCPDEBUG0 +#define TCPDEBUG1() +#define TCPDEBUG2(req) +#endif + +/* + * TCP attaches to socket via pru_attach(), reserving space, + * and an internet control block. + */ +static int +tcp_usr_attach(struct socket *so, int proto, struct thread *td) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + int error; + TCPDEBUG0; + + (void)tp; + (void)proto; + (void)td; + + inp = sotoinpcb(so); + KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); + TCPDEBUG1(); + + error = tcp_attach(so); + if (error) + goto out; + + if ((so->so_options & OFP_SO_LINGER) && so->so_linger == 0) + so->so_linger = TCP_LINGERTIME; + + inp = sotoinpcb(so); + tp = intotcpcb(inp); +out: + TCPDEBUG2(OFP_PRU_ATTACH); + return error; +} + +/* + * tcp_detach is called when the socket layer loses its final reference + * to the socket, be it a file descriptor reference, a reference from TCP, + * etc. At this point, there is only one case in which we will keep around + * inpcb state: time wait. + * + * This function can probably be re-absorbed back into tcp_usr_detach() now + * that there is a single detach path. + */ +static void +tcp_detach(struct socket *so, struct inpcb *inp) +{ + struct tcpcb *tp; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); + + KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp")); + KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so")); + + tp = intotcpcb(inp); + + if (inp->inp_flags & INP_TIMEWAIT) { + /* + * There are two cases to handle: one in which the time wait + * state is being discarded (INP_DROPPED), and one in which + * this connection will remain in timewait. In the former, + * it is time to discard all state (except tcptw, which has + * already been discarded by the timewait close code, which + * should be further up the call stack somewhere). In the + * latter case, we detach from the socket, but leave the pcb + * present until timewait ends. + * + * XXXRW: Would it be cleaner to free the tcptw here? + */ + if (inp->inp_flags & INP_DROPPED) { + KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && " + "INP_DROPPED && tp != NULL")); + ofp_in_pcbdetach(inp); + ofp_in_pcbfree(inp); + } else { + ofp_in_pcbdetach(inp); + INP_WUNLOCK(inp); + } + } else { + /* + * If the connection is not in timewait, we consider two + * two conditions: one in which no further processing is + * necessary (dropped || embryonic), and one in which TCP is + * not yet done, but no longer requires the socket, so the + * pcb will persist for the time being. + * + * XXXRW: Does the second case still occur? + */ + if (inp->inp_flags & INP_DROPPED || + tp->t_state < TCPS_SYN_SENT) { + ofp_tcp_discardcb(tp); + ofp_in_pcbdetach(inp); + ofp_in_pcbfree(inp); + } else { + ofp_in_pcbdetach(inp); + INP_WUNLOCK(inp); + } + } +} + +/* + * pru_detach() detaches the TCP protocol from the socket. + * If the protocol state is non-embryonic, then can't + * do this directly: have to initiate a pru_disconnect(), + * which may finish later; embryonic TCB's can just + * be discarded here. + */ +static void +tcp_usr_detach(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); + INP_INFO_WLOCK(&V_tcbinfo); + INP_WLOCK(inp); + KASSERT(inp->inp_socket != NULL, + ("tcp_usr_detach: inp_socket == NULL")); + tcp_detach(so, inp); + INP_INFO_WUNLOCK(&V_tcbinfo); +} + +#ifdef INET +/* + * Give the socket an address. + */ +static int +tcp_usr_bind(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + struct ofp_sockaddr_in *sinp; + (void)tp; + + sinp = (struct ofp_sockaddr_in *)nam; + if (nam->sa_len != sizeof (*sinp)) + return (OFP_EINVAL); + /* + * Must check for multicast addresses and disallow binding + * to them. + */ + if (sinp->sin_family == OFP_AF_INET && + OFP_IN_MULTICAST(odp_be_to_cpu_32(sinp->sin_addr.s_addr))) + return (OFP_EAFNOSUPPORT); + + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_EINVAL; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + INP_HASH_WLOCK(&V_tcbinfo); + error = ofp_in_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); +out: + TCPDEBUG2(OFP_PRU_BIND); + INP_WUNLOCK(inp); + + return (error); +} +#endif /* INET */ + +#ifdef INET6 +static int +tcp6_usr_bind(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + struct ofp_sockaddr_in6 *sin6p; + + sin6p = (struct ofp_sockaddr_in6 *)nam; + if (nam->sa_len != sizeof (*sin6p)) + return (OFP_EINVAL); + /* + * Must check for multicast addresses and disallow binding + * to them. + */ + if (sin6p->sin6_family == OFP_AF_INET6 && + OFP_IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) + return (OFP_EAFNOSUPPORT); + + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_EINVAL; + goto out; + } + + tp = intotcpcb(inp); + (void)tp; + TCPDEBUG1(); + INP_HASH_WLOCK(&V_tcbinfo); + inp->inp_vflag &= ~INP_IPV4; + inp->inp_vflag |= INP_IPV6; + + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) + inp->inp_vflag |= INP_IPV4; + else if (OFP_IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { + struct ofp_sockaddr_in sin; + + ofp_in6_sin6_2_sin(&sin, sin6p); + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; + error = ofp_in_pcbbind(inp, (struct ofp_sockaddr *)&sin, + td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); + goto out; + } + } + error = ofp_in6_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); +out: + TCPDEBUG2(OFP_PRU_BIND); + INP_WUNLOCK(inp); + return (error); +} +#endif /* INET6 */ + +#ifdef INET +/* + * Prepare to accept connections. + */ +static int +tcp_usr_listen(struct socket *so, int backlog, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_EINVAL; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + OFP_SOCK_LOCK(so); + error = ofp_solisten_proto_check(so); + INP_HASH_WLOCK(&V_tcbinfo); + if (error == 0 && inp->inp_lport == 0) + error = ofp_in_pcbbind(inp, (struct ofp_sockaddr *)0, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); + if (error == 0) { + tp->t_state = TCPS_LISTEN; + ofp_solisten_proto(so, backlog); + tcp_offload_listen_open(tp); + } + OFP_SOCK_UNLOCK(so); + +out: + TCPDEBUG2(OFP_PRU_LISTEN); + INP_WUNLOCK(inp); + return (error); +} +#endif /* INET */ + +#ifdef INET6 +static int +tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); + INP_WLOCK(inp); + + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_EINVAL; + goto out; + } + + tp = intotcpcb(inp); + TCPDEBUG1(); + OFP_SOCK_LOCK(so); + + error = ofp_solisten_proto_check(so); + + INP_HASH_WLOCK(&V_tcbinfo); + if (error == 0 && inp->inp_lport == 0) { + inp->inp_vflag &= ~INP_IPV4; + + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) + inp->inp_vflag |= INP_IPV4; + error = ofp_in6_pcbbind(inp, (struct ofp_sockaddr *)0, td->td_ucred); + } + INP_HASH_WUNLOCK(&V_tcbinfo); + + if (error == 0) { + tp->t_state = TCPS_LISTEN; + ofp_solisten_proto(so, backlog); + } + OFP_SOCK_UNLOCK(so); +out: + TCPDEBUG2(OFP_PRU_LISTEN); + INP_WUNLOCK(inp); + return (error); +} +#endif /* INET6 */ + +#ifdef INET +/* + * Initiate connection to peer. + * Create a template for use in transmissions on this connection. + * Enter SYN_SENT state, and mark socket as connecting. + * Start keep-alive timer, and seed output sequence space. + * Send initial segment on connection. + */ +static int +tcp_usr_connect(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + struct ofp_sockaddr_in *sinp; + + sinp = (struct ofp_sockaddr_in *)nam; + if (nam->sa_len != sizeof (*sinp)) + return (OFP_EINVAL); + /* + * Must disallow TCP ``connections'' to multicast addresses. + */ + if (sinp->sin_family == OFP_AF_INET + && OFP_IN_MULTICAST(odp_be_to_cpu_32(sinp->sin_addr.s_addr))) + return (OFP_EAFNOSUPPORT); +#if 0 + if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0) + return (error); +#endif + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_EINVAL; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + if ((error = tcp_connect(tp, nam, td)) != 0) + goto out; + error = tcp_output_connect(so, nam); +out: + TCPDEBUG2(OFP_PRU_CONNECT); + INP_WUNLOCK(inp); + return (error); +} +#endif /* INET */ + +#ifdef INET6 +static int +tcp6_usr_connect(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + struct ofp_sockaddr_in6 *sin6p; + + TCPDEBUG0; + + sin6p = (struct ofp_sockaddr_in6 *)nam; + if (nam->sa_len != sizeof (*sin6p)) + return (OFP_EINVAL); + /* + * Must disallow TCP ``connections'' to multicast addresses. + */ + if (sin6p->sin6_family == OFP_AF_INET6 + && OFP_IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) + return (OFP_EAFNOSUPPORT); + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); + INP_WLOCK(inp); + + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_EINVAL; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + +#ifdef INET + /* + * XXXRW: Some confusion: V4/V6 flags relate to binding, and + * therefore probably require the hash lock, which isn't held here. + * Is this a significant problem? + */ + if (OFP_IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { + struct ofp_sockaddr_in sin; + + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { + error = OFP_EINVAL; + goto out; + } + + ofp_in6_sin6_2_sin(&sin, sin6p); + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; +#if 0 + if ((error = prison_remote_ip4(td->td_ucred, + &sin.sin_addr)) != 0) + goto out; +#endif + if ((error = tcp_connect(tp, (struct ofp_sockaddr *)&sin, td)) != 0) + goto out; + error = tcp_output_connect(so, nam); + goto out; + } +#endif + inp->inp_vflag &= ~INP_IPV4; + inp->inp_vflag |= INP_IPV6; + inp->inp_inc.inc_flags |= INC_ISIPV6; +#if 0 + if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0) + goto out; +#endif + if ((error = tcp6_connect(tp, nam, td)) != 0) + goto out; + error = tcp_output_connect(so, nam); +out: + TCPDEBUG2(OFP_PRU_CONNECT); + INP_WUNLOCK(inp); + return (error); +} +#endif /* INET6 */ + +/* + * Initiate disconnect from peer. + * If connection never passed embryonic stage, just drop; + * else if don't need to let data drain, then can just drop anyways, + * else have to begin TCP shutdown process: mark socket disconnecting, + * drain unread data, state switch to reflect user close, and + * send segment (e.g. FIN) to peer. Socket will be really disconnected + * when peer sends FIN and acks ours. + * + * SHOULD IMPLEMENT LATER OFP_PRU_CONNECT VIA REALLOC TCPCB. + */ +static int +tcp_usr_disconnect(struct socket *so) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + int error = 0; + + TCPDEBUG0; + INP_INFO_WLOCK(&V_tcbinfo); + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + tcp_disconnect(tp); +out: + TCPDEBUG2(OFP_PRU_DISCONNECT); + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return (error); +} + +#ifdef INET +/* + * Accept a connection. Essentially all the work is done at higher levels; + * just return the address of the peer, storing through addr. + * + * The rationale for acquiring the ofp_tcbinfo lock here is somewhat complicated, + * and is described in detail in the commit log entry for r175612. Acquiring + * it delays an accept(2) racing with ofp_sonewconn(), which inserts the socket + * before the inpcb address/port fields are initialized. A better fix would + * prevent the socket from being placed in the listen queue until all fields + * are fully initialized. + */ +static int +tcp_usr_accept(struct socket *so, struct ofp_sockaddr **nam) +{ + int error = 0; + struct inpcb *inp = NULL; + struct tcpcb *tp = NULL; + struct ofp_in_addr addr; + ofp_in_port_t port = 0; + TCPDEBUG0; + (void)tp; + + if (so->so_state & SS_ISDISCONNECTED) + return (OFP_ECONNABORTED); + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); + if (!(so->so_state & SS_EVENT)) /* Already locked in event state. */ + INP_INFO_RLOCK(&V_tcbinfo); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_ECONNABORTED; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + + /* + * We inline ofp_in_getpeeraddr and COMMON_END here, so that we can + * copy the data of interest and defer the malloc until after we + * release the lock. + */ + port = inp->inp_fport; + addr = inp->inp_faddr; + +out: + TCPDEBUG2(OFP_PRU_ACCEPT); + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + if (error == 0) + *nam = ofp_in_sockaddr(port, &addr); + return error; +} +#endif /* INET */ + +#ifdef INET6 +static int +tcp6_usr_accept(struct socket *so, struct ofp_sockaddr **nam) +{ + struct inpcb *inp = NULL; + int error = 0; + struct tcpcb *tp = NULL; + struct ofp_in_addr addr; + struct ofp_in6_addr addr6; + ofp_in_port_t port = 0; + int v4 = 0; + TCPDEBUG0; + + if (so->so_state & SS_ISDISCONNECTED) + return (OFP_ECONNABORTED); + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); + INP_INFO_RLOCK(&V_tcbinfo); + INP_WLOCK(inp); + + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_ECONNABORTED; + goto out; + } + tp = intotcpcb(inp); + (void)tp; + TCPDEBUG1(); + + /* + * We inline in6_mapped_peeraddr and COMMON_END here, so that we can + * copy the data of interest and defer the malloc until after we + * release the lock. + */ + if (inp->inp_vflag & INP_IPV4) { + v4 = 1; + port = inp->inp_fport; + addr = inp->inp_faddr; + } else { + port = inp->inp_fport; + addr6 = inp->in6p_faddr; + } + +out: + TCPDEBUG2(OFP_PRU_ACCEPT); + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + + if (error == 0) { + if (v4) + *nam = ofp_in6_v4mapsin6_sockaddr(port, &addr); + else + *nam = ofp_in6_sockaddr(port, &addr6); + } + return error; +} +#endif /* INET6 */ + +/* + * Mark the connection as being incapable of further output. + */ +static int +tcp_usr_shutdown(struct socket *so) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + + TCPDEBUG0; + INP_INFO_WLOCK(&V_tcbinfo); + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + ofp_socantsendmore(so); + tcp_usrclosed(tp); + if (!(inp->inp_flags & INP_DROPPED)) + error = tcp_output_disconnect(tp); + +out: + TCPDEBUG2(OFP_PRU_SHUTDOWN); + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + + return (error); +} + +/* + * After a receive, possibly send window update to peer. + */ +static int +tcp_usr_rcvd(struct socket *so, int flags) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + int error = 0; + (void)flags; + + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + tcp_output_rcvd(tp); + +out: + TCPDEBUG2(OFP_PRU_RCVD); + INP_WUNLOCK(inp); + return (error); +} + +/* + * Do a send by putting data in output queue and updating urgent + * marker if URG set. Possibly send more data. Unlike the other + * pru_*() routines, the mbuf chains are our responsibility. We + * must either enqueue them or free them. The other pru_* routines + * generally are caller-frees. + */ +static int +tcp_usr_send(struct socket *so, int flags, odp_packet_t m, + struct ofp_sockaddr *nam, odp_packet_t control, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; +#ifdef INET6 + int isipv6; +#endif + int info_locked = 0; + TCPDEBUG0; + (void)td; + /* + * We require the pcbinfo lock if we will close the socket as part of + * this call. + */ + if (flags & PRUS_EOF) { + INP_INFO_WLOCK(&V_tcbinfo); + info_locked = 1; + } + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + if (control != ODP_PACKET_INVALID) + odp_packet_free(control); + if (m != ODP_PACKET_INVALID) + odp_packet_free(m); + error = OFP_ECONNRESET; + goto out; + } +#ifdef INET6 + isipv6 = nam && nam->sa_family == OFP_AF_INET6; +#endif /* INET6 */ + tp = intotcpcb(inp); + TCPDEBUG1(); + if (control != ODP_PACKET_INVALID) { + /* TCP doesn't do control messages (rights, creds, etc) */ + if (odp_packet_len(control)) { + odp_packet_free(control); + if (m != ODP_PACKET_INVALID) + odp_packet_free(m); + error = OFP_EINVAL; + goto out; + } + odp_packet_free(control); /* empty control, just free it */ + } + if (!(flags & PRUS_OOB)) { + ofp_sbappendstream(&so->so_snd, m); + if (nam && tp->t_state < TCPS_SYN_SENT) { + /* + * Do implied connect if not yet connected, + * initialize window to default value, and + * initialize maxseg/maxopd using peer's cached + * MSS. + */ +#ifdef INET6 + if (isipv6) + error = tcp6_connect(tp, nam, td); + else +#endif + error = tcp_connect(tp, nam, td); + if (error) + goto out; + tp->snd_wnd = OFP_TTCP_CLIENT_SND_WND; + ofp_tcp_mss(tp, -1); + } + if (flags & PRUS_EOF) { + /* + * Close the send side of the connection after + * the data is sent. + */ + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + ofp_socantsendmore(so); + tcp_usrclosed(tp); + } + if (!(inp->inp_flags & INP_DROPPED)) { + if (flags & PRUS_MORETOCOME) + t_flags_or(tp->t_flags, TF_MORETOCOME); + error = tcp_output_send(tp); + if (flags & PRUS_MORETOCOME) + t_flags_and(tp->t_flags, ~TF_MORETOCOME); + } + } else { + /* + * XXXRW: PRUS_EOF not implemented with PRUS_OOB? + */ + SOCKBUF_LOCK(&so->so_snd); + if (sbspace(&so->so_snd) < -512) { + SOCKBUF_UNLOCK(&so->so_snd); + odp_packet_free(m); + error = OFP_ENOBUFS; + goto out; + } + /* + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section. + * Otherwise, snd_up should be one lower. + */ + ofp_sbappendstream_locked(&so->so_snd, m); + SOCKBUF_UNLOCK(&so->so_snd); + if (nam && tp->t_state < TCPS_SYN_SENT) { + /* + * Do implied connect if not yet connected, + * initialize window to default value, and + * initialize maxseg/maxopd using peer's cached + * MSS. + */ +#ifdef INET6 + if (isipv6) + error = tcp6_connect(tp, nam, td); + else +#endif + error = tcp_connect(tp, nam, td); + if (error) + goto out; + tp->snd_wnd = OFP_TTCP_CLIENT_SND_WND; + ofp_tcp_mss(tp, -1); + } + tp->snd_up = tp->snd_una + so->so_snd.sb_cc; + t_flags_or(tp->t_flags, TF_FORCEDATA); + error = tcp_output_send(tp); + t_flags_and(tp->t_flags, ~TF_FORCEDATA); + } +out: + TCPDEBUG2((flags & PRUS_OOB) ? OFP_PRU_SENDOOB : + ((flags & PRUS_EOF) ? OFP_PRU_SEND_EOF : OFP_PRU_SEND)); +#ifdef PASSIVE_INET + if (inp->inp_flags2 & INP_PASSIVE) + in_passive_release_locks(so); + else +#endif + INP_WUNLOCK(inp); + if (info_locked) + INP_INFO_WUNLOCK(&V_tcbinfo); + return (error); +} + +/* + * Abort the TCP. Drop the connection abruptly. + */ +static void +tcp_usr_abort(struct socket *so) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + TCPDEBUG0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); + + INP_INFO_WLOCK(&V_tcbinfo); + INP_WLOCK(inp); + KASSERT(inp->inp_socket != NULL, + ("tcp_usr_abort: inp_socket == NULL")); + + /* + * If we still have full TCP state, and we're not dropped, drop. + */ + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)) { + tp = intotcpcb(inp); + TCPDEBUG1(); + ofp_tcp_drop(tp, OFP_ECONNABORTED); + TCPDEBUG2(OFP_PRU_ABORT); + } + if (!(inp->inp_flags & INP_DROPPED)) { + OFP_SOCK_LOCK(so); + so->so_state |= SS_PROTOREF; + OFP_SOCK_UNLOCK(so); + inp->inp_flags |= INP_SOCKREF; + } + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); +} + +/* + * TCP socket is closed. Start friendly disconnect. + */ +static void +tcp_usr_close(struct socket *so) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + TCPDEBUG0; + + inp = sotoinpcb(so); + + KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL so=%p", so)); + + INP_INFO_WLOCK(&V_tcbinfo); + INP_WLOCK(inp); + + KASSERT(inp->inp_socket != NULL, + ("tcp_usr_close: inp_socket == NULL, inp=%p", inp)); + + /* + * If we still have full TCP state, and we're not dropped, initiate + * a disconnect. + */ + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)) { + tp = intotcpcb(inp); + TCPDEBUG1(); + tcp_disconnect(tp); + TCPDEBUG2(OFP_PRU_CLOSE); + } + if (!(inp->inp_flags & INP_DROPPED)) { + OFP_SOCK_LOCK(so); + so->so_state |= SS_PROTOREF; + OFP_SOCK_UNLOCK(so); + inp->inp_flags |= INP_SOCKREF; + } + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); +} + +/* + * Receive out-of-band data. + */ +static int +tcp_usr_rcvoob(struct socket *so, odp_packet_t m, int flags) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + (void)m; + (void)flags; + + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = OFP_ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + if ((so->so_oobmark == 0 && + (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || + so->so_options & OFP_SO_OOBINLINE || + tp->t_oobflags & OFP_TCPOOB_HADDATA) { + error = OFP_EINVAL; + goto out; + } + if ((tp->t_oobflags & OFP_TCPOOB_HAVEDATA) == 0) { + error = OFP_EWOULDBLOCK; + goto out; + } + /* HJo: FIX: + odp_packet_len(m) = 1; + *(char *)odp_packet_data(m) = tp->t_iobc; + if ((flags & OFP_MSG_PEEK) == 0) + tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); + */ + +out: + TCPDEBUG2(OFP_PRU_RCVOOB); + INP_WUNLOCK(inp); + return (error); +} + +struct pr_usrreqs ofp_tcp_usrreqs = { + .pru_abort = tcp_usr_abort, + .pru_accept = tcp_usr_accept, + .pru_attach = tcp_usr_attach, + .pru_bind = tcp_usr_bind, + .pru_connect = tcp_usr_connect, + .pru_control = ofp_in_control, + .pru_detach = tcp_usr_detach, + .pru_disconnect = tcp_usr_disconnect, + .pru_listen = tcp_usr_listen, + .pru_peeraddr = ofp_in_getpeeraddr, + .pru_rcvd = tcp_usr_rcvd, + .pru_rcvoob = tcp_usr_rcvoob, + .pru_send = tcp_usr_send, + .pru_shutdown = tcp_usr_shutdown, + .pru_sockaddr = ofp_in_getsockaddr, + .pru_sosetlabel = ofp_in_pcbsosetlabel, + .pru_close = tcp_usr_close, + .pru_sosend = ofp_sosend_generic, + .pru_soreceive = ofp_soreceive_generic, +}; + +#ifdef INET6 +struct pr_usrreqs ofp_tcp6_usrreqs = { + .pru_abort = tcp_usr_abort, + .pru_accept = tcp6_usr_accept, + .pru_attach = tcp_usr_attach, + .pru_bind = tcp6_usr_bind, + .pru_connect = tcp6_usr_connect, + .pru_control = NULL/*in6_control*/, + .pru_detach = tcp_usr_detach, + .pru_disconnect = tcp_usr_disconnect, + .pru_listen = tcp6_usr_listen, + .pru_peeraddr = NULL/*in6_mapped_peeraddr*/, + .pru_rcvd = tcp_usr_rcvd, + .pru_rcvoob = tcp_usr_rcvoob, + .pru_send = tcp_usr_send, + .pru_shutdown = tcp_usr_shutdown, + .pru_sockaddr = NULL/*in6_mapped_sockaddr*/, + .pru_sosetlabel = ofp_in_pcbsosetlabel, + .pru_close = tcp_usr_close, + .pru_sosend = ofp_sosend_generic, + .pru_soreceive = ofp_soreceive_generic, +}; +#endif /* INET6 */ + +#ifdef INET +/* + * Common subroutine to open a TCP connection to remote host specified + * by struct ofp_sockaddr_in in mbuf *nam. Call ofp_in_pcbbind to assign a local + * port number if needed. Call ofp_in_pcbconnect_setup to do the routing and + * to choose a local host address (interface). If there is an existing + * incarnation of the same connection in TIME-WAIT state and if the remote + * host was sending CC options and if the connection duration was < MSL, then + * truncate the previous TIME-WAIT state and proceed. + * Initialize connection parameters and enter SYN-SENT state. + */ +static int +tcp_connect(struct tcpcb *tp, struct ofp_sockaddr *nam, struct thread *td) +{ + struct inpcb *inp = tp->t_inpcb, *oinp; + struct socket *so = inp->inp_socket; + struct ofp_in_addr laddr; + uint16_t lport; + int error; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK(&V_tcbinfo); + + if (inp->inp_lport == 0) { + error = ofp_in_pcbbind(inp, (struct ofp_sockaddr *)0, td->td_ucred); + if (error) + goto out; + } + + /* + * Cannot simply call ofp_in_pcbconnect, because there might be an + * earlier incarnation of this same connection still in + * TIME_WAIT state, creating an ADDRINUSE error. + */ + laddr = inp->inp_laddr; + lport = inp->inp_lport; + error = ofp_in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, + &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); + if (error && oinp == NULL) + goto out; + if (oinp) { + error = OFP_EADDRINUSE; + goto out; + } + inp->inp_laddr = laddr; + ofp_in_pcbrehash(inp); + INP_HASH_WUNLOCK(&V_tcbinfo); + + /* + * Compute window scaling to request: + * Scale to fit into sweet spot. See tcp_syncache.c. + * XXX: This should move to ofp_tcp_output(). + */ + while (tp->request_r_scale < OFP_TCP_MAX_WINSHIFT && + ((uint64_t)OFP_TCP_MAXWIN << tp->request_r_scale) < ofp_sb_max) + tp->request_r_scale++; + + ofp_soisconnecting(so); + TCPSTAT_INC(tcps_connattempt); + tp->t_state = TCPS_SYN_SENT; + ofp_tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); + tp->iss = ofp_tcp_new_isn(tp); + tcp_sendseqinit(tp); + + return 0; + +out: + INP_HASH_WUNLOCK(&V_tcbinfo); + return (error); +} +#endif /* INET */ + +#ifdef INET6 +static int +tcp6_connect(struct tcpcb *tp, struct ofp_sockaddr *nam, struct thread *td) +{ + struct inpcb *inp = tp->t_inpcb, *oinp; + struct socket *so = inp->inp_socket; + struct ofp_sockaddr_in6 *sin6 = (struct ofp_sockaddr_in6 *)nam; + struct ofp_in6_addr addr6; + int error; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK(&V_tcbinfo); + + if (inp->inp_lport == 0) { + error = ofp_in6_pcbbind(inp, (struct ofp_sockaddr *)0, td->td_ucred); + if (error) + goto out; + } + + /* + * Cannot simply call ofp_in_pcbconnect, because there might be an + * earlier incarnation of this same connection still in + * TIME_WAIT state, creating an ADDRINUSE error. + * in6_pcbladdr() also handles scope zone IDs. + * + * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked() + * outside of in6_pcb.c if there were an in6_pcbconnect_setup(). + */ + error = ofp_in6_pcbladdr(inp, nam, &addr6); + if (error) + goto out; + oinp = ofp_in6_pcblookup_hash_locked(inp->inp_pcbinfo, + &sin6->sin6_addr, sin6->sin6_port, + OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) + ? &addr6 + : &inp->in6p_laddr, + inp->inp_lport, 0, NULL); + if (oinp) { + printf("OFP_EADDRINUSE\n"); + error = OFP_EADDRINUSE; + goto out; + } + + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) + inp->in6p_laddr = addr6; + + inp->in6p_faddr = sin6->sin6_addr; + inp->inp_fport = sin6->sin6_port; + /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ + inp->inp_flow &= ~OFP_IPV6_FLOWLABEL_MASK; + if (inp->inp_flags & IN6P_AUTOFLOWLABEL) + inp->inp_flow |= + (odp_cpu_to_be_32(ofp_ip6_randomflowlabel()) + & OFP_IPV6_FLOWLABEL_MASK); + ofp_in_pcbrehash(inp); + INP_HASH_WUNLOCK(&V_tcbinfo); + + /* Compute window scaling to request. */ + while (tp->request_r_scale < OFP_TCP_MAX_WINSHIFT && + (uint64_t)(OFP_TCP_MAXWIN << tp->request_r_scale) < ofp_sb_max) + tp->request_r_scale++; + + ofp_soisconnecting(so); + TCPSTAT_INC(tcps_connattempt); + tp->t_state = TCPS_SYN_SENT; + ofp_tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); + tp->iss = ofp_tcp_new_isn(tp); + tcp_sendseqinit(tp); + + return 0; + +out: + INP_HASH_WUNLOCK(&V_tcbinfo); + return error; +} +#endif /* INET6 */ + +#if 0 +/* + * Export TCP internal state information via a struct tcp_info, based on the + * Linux 2.6 API. Not ABI compatible as our constants are mapped differently + * (TCP state machine, etc). We export all information using FreeBSD-native + * constants -- for example, the numeric values for tcpi_state will differ + * from Linux. + */ +static void +tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) +{ + + INP_WLOCK_ASSERT(tp->t_inpcb); + bzero(ti, sizeof(*ti)); + + ti->tcpi_state = tp->t_state; + if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) + ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; + if (tp->t_flags & TF_SACK_PERMIT) + ti->tcpi_options |= TCPI_OPT_SACK; + if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { + ti->tcpi_options |= TCPI_OPT_WSCALE; + ti->tcpi_snd_wscale = tp->snd_scale; + ti->tcpi_rcv_wscale = tp->rcv_scale; + } + + ti->tcpi_rto = tp->t_rxtcur * tick; + ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick; + ti->tcpi_rtt = ((uint64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; + ti->tcpi_rttvar = ((uint64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; + + ti->tcpi_snd_ssthresh = tp->snd_ssthresh; + ti->tcpi_snd_cwnd = tp->snd_cwnd; + + /* + * FreeBSD-specific extension fields for tcp_info. + */ + ti->tcpi_rcv_space = tp->rcv_wnd; + ti->tcpi_rcv_nxt = tp->rcv_nxt; + ti->tcpi_snd_wnd = tp->snd_wnd; + ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ + ti->tcpi_snd_nxt = tp->snd_nxt; + ti->tcpi_snd_mss = tp->t_maxseg; + ti->tcpi_rcv_mss = tp->t_maxseg; + if (tp->t_flags & TF_TOE) + ti->tcpi_options |= TCPI_OPT_TOE; + ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; + ti->tcpi_rcv_ooopack = tp->t_rcvoopack; + ti->tcpi_snd_zerowin = tp->t_sndzerowin; +} +#endif + +/* + * ofp_tcp_ctloutput() must drop the inpcb lock before performing copyin on + * socket option arguments. When it re-acquires the lock after the copy, it + * has to revalidate that the connection is still valid for the socket + * option. + */ +#define INP_WLOCK_RECHECK(inp) do { \ + INP_WLOCK(inp); \ + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ + INP_WUNLOCK(inp); \ + return (OFP_ECONNRESET); \ + } \ + tp = intotcpcb(inp); \ +} while(0) + +int +ofp_tcp_ctloutput(struct socket *so, struct sockopt *sopt) +{ +#if 0 + int error, opt, optval; + uint32_t ui; + struct inpcb *inp; + struct tcpcb *tp; + struct tcp_info ti; + char buf[TCP_CA_NAME_MAX]; + struct cc_algo *algo; + + error = 0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("ofp_tcp_ctloutput: inp == NULL")); + INP_WLOCK(inp); + if (sopt->sopt_level != OFP_IPPROTO_TCP) { +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6PROTO) { + INP_WUNLOCK(inp); + error = ip6_ctloutput(so, sopt); + } +#endif /* INET6 */ +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + { + INP_WUNLOCK(inp); + error = ip_ctloutput(so, sopt); + } +#endif + return (error); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(inp); + return (OFP_ECONNRESET); + } + + switch (sopt->sopt_dir) { + case SOPT_SET: + switch (sopt->sopt_name) { +#ifdef TCP_SIGNATURE + case TCP_MD5SIG: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + if (optval > 0) + t_flags_or(tp->t_flags, TF_SIGNATURE); + else + t_flags_and(tp->t_flags, ~TF_SIGNATURE); + INP_WUNLOCK(inp); + break; +#endif /* TCP_SIGNATURE */ + case TCP_NODELAY: + case TCP_NOOPT: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + switch (sopt->sopt_name) { + case TCP_NODELAY: + opt = TF_NODELAY; + break; + case TCP_NOOPT: + opt = TF_NOOPT; + break; + default: + opt = 0; /* dead code to fool gcc */ + break; + } + + if (optval) + t_flags_or(tp->t_flags, opt); + else + t_flags_and(tp->t_flags, ~opt); + INP_WUNLOCK(inp); + break; + + case TCP_NOPUSH: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + if (optval) + t_flags_or(tp->t_flags, TF_NOPUSH); + else if (tp->t_flags & TF_NOPUSH) { + t_flags_and(tp->t_flags, ~TF_NOPUSH); + if (TCPS_HAVEESTABLISHED(tp->t_state)) + error = ofp_tcp_output(tp); + } + INP_WUNLOCK(inp); + break; + + case TCP_MAXSEG: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + if (optval > 0 && optval <= tp->t_maxseg && + optval + 40 >= V_tcp_minmss) + tp->t_maxseg = optval; + else + error = OFP_EINVAL; + INP_WUNLOCK(inp); + break; + + case TCP_INFO: + INP_WUNLOCK(inp); + error = OFP_EINVAL; + break; + + case TCP_CONGESTION: + INP_WUNLOCK(inp); + bzero(buf, sizeof(buf)); + error = sooptcopyin(sopt, &buf, sizeof(buf), 1); + if (error) + break; + INP_WLOCK_RECHECK(inp); + /* + * Return OFP_EINVAL if we can't find the requested cc algo. + */ + error = OFP_EINVAL; + CC_LIST_RLOCK(); + OFP_STAILQ_FOREACH(algo, &cc_list, entries) { + if (strncmp(buf, algo->name, TCP_CA_NAME_MAX) + == 0) { + /* We've found the requested algo. */ + error = 0; + /* + * We hold a write lock over the ofp_tcb + * so it's safe to do these things + * without ordering concerns. + */ + if (CC_ALGO(tp)->cb_destroy != NULL) + CC_ALGO(tp)->cb_destroy(tp->ccv); + CC_ALGO(tp) = algo; + /* + * If something goes pear shaped + * initialising the new algo, + * fall back to newreno (which + * does not require initialisation). + */ + if (algo->cb_init != NULL) + if (algo->cb_init(tp->ccv) > 0) { + CC_ALGO(tp) = &newreno_cc_algo; + /* + * The only reason init + * should fail is + * because of malloc. + */ + error = OFP_ENOMEM; + } + break; /* Break the OFP_STAILQ_FOREACH. */ + } + } + CC_LIST_RUNLOCK(); + INP_WUNLOCK(inp); + break; + + case TCP_KEEPIDLE: + case TCP_KEEPINTVL: + case TCP_KEEPINIT: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); + if (error) + return (error); + + if (ui > (UINT_MAX / hz)) { + error = OFP_EINVAL; + break; + } + ui *= hz; + + INP_WLOCK_RECHECK(inp); + switch (sopt->sopt_name) { + case TCP_KEEPIDLE: + tp->t_keepidle = ui; + /* + * XXX: better check current remaining + * timeout and "merge" it with new value. + */ + if ((tp->t_state > TCPS_LISTEN) && + (tp->t_state <= TCPS_CLOSING)) + ofp_tcp_timer_activate(tp, TT_KEEP, + TP_KEEPIDLE(tp)); + break; + case TCP_KEEPINTVL: + tp->t_keepintvl = ui; + if ((tp->t_state == TCPS_FIN_WAIT_2) && + (TP_MAXIDLE(tp) > 0)) + ofp_tcp_timer_activate(tp, TT_2MSL, + TP_MAXIDLE(tp)); + break; + case TCP_KEEPINIT: + tp->t_keepinit = ui; + if (tp->t_state == TCPS_SYN_RECEIVED || + tp->t_state == TCPS_SYN_SENT) + ofp_tcp_timer_activate(tp, TT_KEEP, + TP_KEEPINIT(tp)); + break; + } + INP_WUNLOCK(inp); + break; + +#ifdef PASSIVE_INET + case TCP_REASSDL: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); + if (error) + return (error); + + if (ui > (UINT_MAX / hz)) { + error = OFP_EINVAL; + break; + } + ui *= hz; + + INP_WLOCK_RECHECK(inp); + tp->t_reassdl = ui / 1000; + if (tp->t_reassdl == 0 && ui != 0) + tp->t_reassdl = 1; + INP_WUNLOCK(inp); + break; +#endif + + case TCP_KEEPCNT: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + tp->t_keepcnt = ui; + if ((tp->t_state == TCPS_FIN_WAIT_2) && + (TP_MAXIDLE(tp) > 0)) + ofp_tcp_timer_activate(tp, TT_2MSL, + TP_MAXIDLE(tp)); + INP_WUNLOCK(inp); + break; + + default: + INP_WUNLOCK(inp); + error = OFP_ENOPROTOOPT; + break; + } + break; + + case SOPT_GET: + tp = intotcpcb(inp); + switch (sopt->sopt_name) { +#ifdef TCP_SIGNATURE + case TCP_MD5SIG: + optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; +#endif + + case TCP_NODELAY: + optval = tp->t_flags & TF_NODELAY; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + case TCP_MAXSEG: + optval = tp->t_maxseg; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + case TCP_NOOPT: + optval = tp->t_flags & TF_NOOPT; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + case TCP_NOPUSH: + optval = tp->t_flags & TF_NOPUSH; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + case TCP_INFO: + tcp_fill_info(tp, &ti); + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &ti, sizeof ti); + break; + case TCP_CONGESTION: + bzero(buf, sizeof(buf)); + strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX); + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX); + break; + default: + INP_WUNLOCK(inp); + error = OFP_ENOPROTOOPT; + break; + } + break; + } + return (error); +#else + (void)so; + (void)sopt; + return 0; +#endif +} +#undef INP_WLOCK_RECHECK + +/* + * ofp_tcp_sendspace and ofp_tcp_recvspace are the default send and receive window + * sizes, respectively. These are obsolescent (this information should + * be set by the route). + */ +uint64_t ofp_tcp_sendspace = 1024*32; +OFP_SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, OFP_CTLFLAG_RW, + &ofp_tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); +uint64_t ofp_tcp_recvspace = 1024*64; +OFP_SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, OFP_CTLFLAG_RW, + &ofp_tcp_recvspace , 0, "Maximum incoming TCP datagram size"); + +/* + * Attach TCP protocol to socket, allocating + * internet protocol control block, tcp control block, + * bufer space, and entering LISTEN state if to accept connections. + */ +static int +tcp_attach(struct socket *so) +{ + struct tcpcb *tp; + struct inpcb *inp; + int error; + + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + error = ofp_soreserve(so, ofp_tcp_sendspace, ofp_tcp_recvspace); + if (error) + return (error); + } + so->so_rcv.sb_flags |= SB_AUTOSIZE; + so->so_snd.sb_flags |= SB_AUTOSIZE; + INP_INFO_WLOCK(&V_tcbinfo); + error = ofp_in_pcballoc(so, &V_tcbinfo); + if (error) { + INP_INFO_WUNLOCK(&V_tcbinfo); + return (error); + } + inp = sotoinpcb(so); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6PROTO) { + inp->inp_vflag |= INP_IPV6; + inp->in6p_hops = V_ip6_defhlim; + } + else +#endif + inp->inp_vflag |= INP_IPV4; + tp = ofp_tcp_newtcpcb(inp); + if (tp == NULL) { + ofp_in_pcbdetach(inp); + ofp_in_pcbfree(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return (OFP_ENOBUFS); + } + tp->t_state = TCPS_CLOSED; + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + return (0); +} + +/* + * Initiate (or continue) disconnect. + * If embryonic state, just send reset (once). + * If in ``let data drain'' option and linger null, just drop. + * Otherwise (hard), mark socket disconnecting and drop + * current input data; switch states based on user close, and + * send segment to peer (with FIN). + */ +static void +tcp_disconnect(struct tcpcb *tp) +{ + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); + + /* + * Neither ofp_tcp_close() nor ofp_tcp_drop() should return NULL, as the + * socket is still open. + */ + if (tp->t_state < TCPS_ESTABLISHED) { + tp = ofp_tcp_close(tp); + KASSERT(tp != NULL, + ("tcp_disconnect: ofp_tcp_close() returned NULL")); + } else if ((so->so_options & OFP_SO_LINGER) && so->so_linger == 0) { + tp = ofp_tcp_drop(tp, 0); + KASSERT(tp != NULL, + ("tcp_disconnect: ofp_tcp_drop() returned NULL")); + } else { + ofp_soisdisconnecting(so); + ofp_sbflush(&so->so_rcv); + tcp_usrclosed(tp); + if (!(inp->inp_flags & INP_DROPPED)) + tcp_output_disconnect(tp); + } +} + +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after OFP_PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +static void +tcp_usrclosed(struct tcpcb *tp) +{ + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(tp->t_inpcb); +#ifdef PASSIVE_INET +again: +#endif + switch (tp->t_state) { + case TCPS_LISTEN: + tcp_offload_listen_close(tp); + /* FALLTHROUGH */ + case TCPS_CLOSED: + tp->t_state = TCPS_CLOSED; + tp = ofp_tcp_close(tp); + /* + * ofp_tcp_close() should never return NULL here as the socket is + * still open. + */ + KASSERT(tp != NULL, + ("tcp_usrclosed: ofp_tcp_close() returned NULL")); + break; + + case TCPS_SYN_SENT: + case TCPS_SYN_RECEIVED: + t_flags_or(tp->t_flags, TF_NEEDFIN); + break; + + case TCPS_ESTABLISHED: + tp->t_state = TCPS_FIN_WAIT_1; + break; + + case TCPS_CLOSE_WAIT: +#ifdef PASSIVE_INET + /* Passive sockets don't wait for an ack. */ + if (tp->t_inpcb->inp_flags2 & INP_PASSIVE) { + tp->t_state = TCPS_CLOSED; + goto again; + } +#endif + tp->t_state = TCPS_LAST_ACK; + break; + } + if (tp->t_state >= TCPS_FIN_WAIT_2) { + ofp_soisdisconnected(tp->t_inpcb->inp_socket); + /* Prevent the connection hanging in FIN_WAIT_2 forever. */ + if (tp->t_state == TCPS_FIN_WAIT_2) { + int timeout; + + timeout = (ofp_tcp_fast_finwait2_recycle) ? + ofp_tcp_finwait2_timeout : TP_MAXIDLE(tp); + ofp_tcp_timer_activate(tp, TT_2MSL, timeout); + } + } +} + +#ifdef DDB +static void +db_print_indent(int indent) +{ + int i; + + for (i = 0; i < indent; i++) + db_printf(" "); +} + +static void +db_print_tstate(int t_state) +{ + + switch (t_state) { + case TCPS_CLOSED: + db_printf("TCPS_CLOSED"); + return; + + case TCPS_LISTEN: + db_printf("TCPS_LISTEN"); + return; + + case TCPS_SYN_SENT: + db_printf("TCPS_SYN_SENT"); + return; + + case TCPS_SYN_RECEIVED: + db_printf("TCPS_SYN_RECEIVED"); + return; + + case TCPS_ESTABLISHED: + db_printf("TCPS_ESTABLISHED"); + return; + + case TCPS_CLOSE_WAIT: + db_printf("TCPS_CLOSE_WAIT"); + return; + + case TCPS_FIN_WAIT_1: + db_printf("TCPS_FIN_WAIT_1"); + return; + + case TCPS_CLOSING: + db_printf("TCPS_CLOSING"); + return; + + case TCPS_LAST_ACK: + db_printf("TCPS_LAST_ACK"); + return; + + case TCPS_FIN_WAIT_2: + db_printf("TCPS_FIN_WAIT_2"); + return; + + case TCPS_TIME_WAIT: + db_printf("TCPS_TIME_WAIT"); + return; + + default: + db_printf("unknown"); + return; + } +} + +static void +db_print_tflags(uint32_t t_flags) +{ + int comma; + + comma = 0; + if (t_flags & TF_ACKNOW) { + db_printf("%sTF_ACKNOW", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_DELACK) { + db_printf("%sTF_DELACK", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_NODELAY) { + db_printf("%sTF_NODELAY", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_NOOPT) { + db_printf("%sTF_NOOPT", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_SENTFIN) { + db_printf("%sTF_SENTFIN", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_REQ_SCALE) { + db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_RCVD_SCALE) { + db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_REQ_TSTMP) { + db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_RCVD_TSTMP) { + db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_SACK_PERMIT) { + db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_NEEDSYN) { + db_printf("%sTF_NEEDSYN", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_NEEDFIN) { + db_printf("%sTF_NEEDFIN", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_NOPUSH) { + db_printf("%sTF_NOPUSH", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_MORETOCOME) { + db_printf("%sTF_MORETOCOME", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_LQ_OVERFLOW) { + db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_LASTIDLE) { + db_printf("%sTF_LASTIDLE", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_RXWIN0SENT) { + db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_FASTRECOVERY) { + db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_CONGRECOVERY) { + db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_WASFRECOVERY) { + db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_SIGNATURE) { + db_printf("%sTF_SIGNATURE", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_FORCEDATA) { + db_printf("%sTF_FORCEDATA", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_TSO) { + db_printf("%sTF_TSO", comma ? ", " : ""); + comma = 1; + } + if (t_flags & TF_ECN_PERMIT) { + db_printf("%sTF_ECN_PERMIT", comma ? ", " : ""); + comma = 1; + } +} + +static void +db_print_toobflags(char t_oobflags) +{ + int comma; + + comma = 0; + if (t_oobflags & TCPOOB_HAVEDATA) { + db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); + comma = 1; + } + if (t_oobflags & TCPOOB_HADDATA) { + db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); + comma = 1; + } +} + +static void +db_print_tcpcb(struct tcpcb *tp, const char *name, int indent) +{ + + db_print_indent(indent); + db_printf("%s at %p\n", name, tp); + + indent += 2; + + db_print_indent(indent); + db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", + OFP_LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); + + db_print_indent(indent); + db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n", + &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep); + + db_print_indent(indent); + db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl, + &tp->t_timers->tt_delack, tp->t_inpcb); + + db_print_indent(indent); + db_printf("t_state: %d (", tp->t_state); + db_print_tstate(tp->t_state); + db_printf(")\n"); + + db_print_indent(indent); + db_printf("t_flags: 0x%x (", tp->t_flags); + db_print_tflags(tp->t_flags); + db_printf(")\n"); + + db_print_indent(indent); + db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n", + tp->snd_una, tp->snd_max, tp->snd_nxt); + + db_print_indent(indent); + db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n", + tp->snd_up, tp->snd_wl1, tp->snd_wl2); + + db_print_indent(indent); + db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n", + tp->iss, tp->irs, tp->rcv_nxt); + + db_print_indent(indent); + db_printf("rcv_adv: 0x%08x rcv_wnd: %lu rcv_up: 0x%08x\n", + tp->rcv_adv, tp->rcv_wnd, tp->rcv_up); + + db_print_indent(indent); + db_printf("snd_wnd: %lu snd_cwnd: %lu\n", + tp->snd_wnd, tp->snd_cwnd); + + db_print_indent(indent); + db_printf("snd_ssthresh: %lu snd_recover: " + "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); + + db_print_indent(indent); + db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n", + tp->t_maxopd, tp->t_rcvtime, tp->t_starttime); + + db_print_indent(indent); + db_printf("t_rttime: %u t_rtsq: 0x%08x\n", + tp->t_rtttime, tp->t_rtseq); + + db_print_indent(indent); + db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n", + tp->t_rxtcur, tp->t_maxseg, tp->t_srtt); + + db_print_indent(indent); + db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u " + "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin, + tp->t_rttbest); + + db_print_indent(indent); + db_printf("t_rttupdated: %lu max_sndwnd: %lu t_softerror: %d\n", + tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); + + db_print_indent(indent); + db_printf("t_oobflags: 0x%x (", tp->t_oobflags); + db_print_toobflags(tp->t_oobflags); + db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); + + db_print_indent(indent); + db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", + tp->snd_scale, tp->rcv_scale, tp->request_r_scale); + + db_print_indent(indent); + db_printf("ts_recent: %u ts_recent_age: %u\n", + tp->ts_recent, tp->ts_recent_age); + + db_print_indent(indent); + db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: " + "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev); + + db_print_indent(indent); + db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x " + "t_badrxtwin: %u\n", tp->snd_ssthresh_prev, + tp->snd_recover_prev, tp->t_badrxtwin); + + db_print_indent(indent); + db_printf("snd_numholes: %d snd_holes first: %p\n", + tp->snd_numholes, OFP_TAILQ_FIRST(&tp->snd_holes)); + + db_print_indent(indent); + db_printf("snd_fack: 0x%08x rcv_numsacks: %d sack_newdata: " + "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata); + + /* Skip sackblks, sackhint. */ + + db_print_indent(indent); + db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n", + tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt); +} + +DB_SHOW_COMMAND(tcpcb, db_show_tcpcb) +{ + struct tcpcb *tp; + + if (!have_addr) { + db_printf("usage: show tcpcb \n"); + return; + } + tp = (struct tcpcb *)addr; + + db_print_tcpcb(tp, "tcpcb", 0); +} +#endif diff --git a/src/ofp_timer.c b/src/ofp_timer.c new file mode 100644 index 00000000..3196821b --- /dev/null +++ b/src/ofp_timer.c @@ -0,0 +1,357 @@ +/*- + * Copyright (c) 2014 Nokia + * Copyright (c) 2014 ENEA Software AB + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include + +#include "odp.h" + +#include "ofpi_util.h" +#include "ofpi_log.h" + +#include "ofpi_timer.h" + +struct ofp_timer_internal { + struct ofp_timer_internal *next; + odp_buffer_t buf; + odp_event_t t_ev; + uint32_t id; + ofp_timer_callback callback; + char arg[OFP_TIMER_ARG_LEN]; +}; + +struct ofp_timer_long_internal { + struct ofp_timer_internal tmo; +}; + +#define TIMER_POOL_SIZE (1024*1024) /* Timer pool size */ +#define TIMER_NUM_TIMERS 10000 +#define TIMER_LONG_SHIFT 13 +#define TIMER_NUM_LONG_SLOTS (1<lock); + shm->sec_counter = (shm->sec_counter + 1) & TIMER_LONG_MASK; + bufdata = shm->long_table[shm->sec_counter]; + shm->long_table[shm->sec_counter] = NULL; + odp_spinlock_unlock(&shm->lock); + + while (bufdata) { + struct ofp_timer_internal *next = bufdata->next; + bufdata->callback(&bufdata->arg); + odp_buffer_free(bufdata->buf); + bufdata = next; + } + + /* Start one second timeout */ + ofp_timer_start(1000000UL, one_sec, NULL, 0); +} + +int ofp_timer_init(int resolution_us, + int min_us, int max_us, + int tmo_count) +{ + odp_shm_t shm_h; + odp_queue_param_t param; + odp_pool_param_t pool_params; + odp_timer_pool_param_t timer_params; + + /* For later tuning. */ + (void)tmo_count; + + /* SHM */ + shm_h = odp_shm_reserve("OfpTimerShMem", sizeof(*shm), + ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + /* Timout pool */ + memset(&pool_params, 0, sizeof(pool_params)); + pool_params.tmo.num = TIMER_NUM_TIMERS; + pool_params.type = ODP_POOL_TIMEOUT; + + shm->pool = odp_pool_create("TimeoutPool", ODP_SHM_NULL, &pool_params); + + if (shm->pool == ODP_POOL_INVALID) { + OFP_ERR("Timeout pool create failed.\n"); + exit(EXIT_FAILURE); + } + + /* Buffer pool */ + memset(&pool_params, 0, sizeof(pool_params)); + pool_params.buf.size = sizeof(struct ofp_timer_internal); + pool_params.buf.align = 0; + pool_params.buf.num = TIMER_NUM_TIMERS; + pool_params.type = ODP_POOL_BUFFER; + + shm->buf_pool = odp_pool_create("TimeoutBufferPool", ODP_SHM_NULL, + &pool_params); + + if (shm->buf_pool == ODP_POOL_INVALID) { + OFP_ERR("Buffer pool create failed.\n"); + exit(EXIT_FAILURE); + } + + /* Timer pool */ + memset(&timer_params, 0, sizeof(timer_params)); + timer_params.res_ns = resolution_us*ODP_TIME_USEC; + timer_params.min_tmo = min_us*ODP_TIME_USEC; + timer_params.max_tmo = max_us*ODP_TIME_USEC; + timer_params.num_timers = TIMER_NUM_TIMERS; + timer_params.priv = 0; /* Shared */ + timer_params.clk_src = ODP_CLOCK_CPU; + shm->socket_timer_pool = odp_timer_pool_create("TmrPool", + &timer_params); + + if (shm->socket_timer_pool == ODP_TIMER_POOL_INVALID) { + OFP_ERR("Timer pool create failed.\n"); + exit(EXIT_FAILURE); + } + + odp_shm_print_all(); + + odp_timer_pool_start(); + + /* + * Create a queue + */ + memset(¶m, 0, sizeof(param)); + param.sched.prio = ODP_SCHED_PRIO_DEFAULT; + param.sched.sync = ODP_SCHED_SYNC_NONE; + param.sched.group = ODP_SCHED_GROUP_DEFAULT; + + shm->queue = odp_queue_create("TimerQueue", ODP_QUEUE_TYPE_SCHED, + ¶m); + + if (shm->queue == ODP_QUEUE_INVALID) { + OFP_ERR("Timer queue create failed.\n"); + exit(EXIT_FAILURE); + } + + odp_spinlock_init(&shm->lock); + + /* Start one second timeouts */ + ofp_timer_start(1000000UL, one_sec, NULL, 0); + + OFP_LOG("Timer init\n"); + return 0; +} + +void ofp_timer_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpTimerShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: Timer shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + OFP_LOG("Timer lookup\n"); +} + +odp_timer_t ofp_timer_start(uint64_t tmo_us, ofp_timer_callback callback, + void *arg, int arglen) +{ + uint64_t tick; + uint64_t period; + uint64_t period_ns; + struct ofp_timer_internal *bufdata; + odp_buffer_t buf; + odp_timer_set_t t; + odp_timeout_t tmo; + + /* Init shm if not done yet. */ + if (shm == NULL) + ofp_timer_lookup_shared_memory(); + + /* If shm is still NULL we have a problem. */ + if (shm == NULL) { + OFP_LOG("Cannot lookup timer shared memory.\n"); + exit(1); + } + + /* Alloc user buffer */ + buf = odp_buffer_alloc(shm->buf_pool); + if (buf == ODP_BUFFER_INVALID) { + OFP_LOG("Cannot allocate user buffer\n"); + exit(1); + } + + bufdata = (struct ofp_timer_internal *)odp_buffer_addr(buf); + bufdata->callback = callback; + bufdata->buf = buf; + bufdata->t_ev = ODP_EVENT_INVALID; + bufdata->next = NULL; + bufdata->id = 0; + if (arg && arglen) + memcpy(bufdata->arg, arg, arglen); + + if (tmo_us >= OFP_TIMER_MAX_US) { + /* Long 1 s resolution timeout */ + uint64_t sec = tmo_us/1000000UL; + if (sec > TIMER_NUM_LONG_SLOTS) { + OFP_LOG("Timeout too long = %"PRIu64"s\n", sec); + while (1) { } + } + + odp_spinlock_lock(&shm->lock); + int ix = (shm->sec_counter + sec) & TIMER_LONG_MASK; + bufdata->id = ((shm->id++)<next = shm->long_table[ix]; + shm->long_table[ix] = bufdata; + odp_spinlock_unlock(&shm->lock); + + return (odp_timer_t) bufdata->id; + } else { + /* Short 10 ms resolution timeout */ + + /* Alloc timout event */ + tmo = odp_timeout_alloc(shm->pool); + if (tmo == ODP_TIMEOUT_INVALID) { + OFP_ERR("Failed to allocate timeout\n"); + exit(1); + } + bufdata->t_ev = odp_timeout_to_event(tmo); + + period_ns = tmo_us*ODP_TIME_USEC; + period = odp_timer_ns_to_tick(shm->socket_timer_pool, period_ns); + tick = odp_timer_current_tick(shm->socket_timer_pool); + tick += period; + + shm->socket_timer = odp_timer_alloc(shm->socket_timer_pool, + shm->queue, bufdata); + if (shm->socket_timer == ODP_TIMER_INVALID) { + OFP_ERR("Failed to allocate timer\n"); + exit(1); + } + + t = odp_timer_set_abs(shm->socket_timer, tick, &bufdata->t_ev); + + if (t != ODP_TIMER_SUCCESS) { + OFP_LOG("Timeout request failed\n"); + exit(1); + } + + return shm->socket_timer; + } + return ODP_TIMER_INVALID; +} + +int ofp_timer_cancel(odp_timer_t tim) +{ + odp_event_t timeout_event = ODP_EVENT_INVALID; + odp_timeout_t tmo; + uint32_t t = (uint32_t)tim; + struct ofp_timer_internal *bufdata; + struct ofp_timer_internal *prev = NULL; + + if (tim == ODP_TIMER_INVALID) + return 0; + + if (t & 0x80000000) { + /* long timeout */ + odp_spinlock_lock(&shm->lock); + bufdata = shm->long_table[t & TIMER_LONG_MASK]; + + while (bufdata) { + struct ofp_timer_internal *next = bufdata->next; + if (bufdata->id == t) { + if (prev == NULL) + shm->long_table[t & TIMER_LONG_MASK] = next; + else + prev->next = next; + odp_buffer_free(bufdata->buf); + odp_spinlock_unlock(&shm->lock); + return 0; + } + prev = bufdata; + bufdata = next; + } + odp_spinlock_unlock(&shm->lock); + return -1; + } + else { + if (odp_timer_cancel(tim, &timeout_event) < 0) + { + OFP_LOG("Timeout already expired or inactive\n"); + return -1; + } + + if (timeout_event != ODP_EVENT_INVALID) { + tmo = odp_timeout_from_event(timeout_event); + bufdata = odp_timeout_user_ptr(tmo); + odp_buffer_free(bufdata->buf); + odp_timeout_free(tmo); + } else { + OFP_LOG("Lost timeout buffer at timer cancel\n"); + return -1; + } + + if (odp_timer_free(tim) != ODP_EVENT_INVALID) { + OFP_LOG("odp_timer_free failed in ofp_timer_cancel"); + return -1; + } + } + + return 0; +} + +void ofp_timer_handle(odp_event_t ev) +{ + struct ofp_timer_internal *bufdata; + odp_timeout_t tmo = odp_timeout_from_event(ev); + odp_timer_t tim = odp_timeout_timer(tmo); + + bufdata = (struct ofp_timer_internal *)odp_timeout_user_ptr(tmo); + fflush(NULL); + bufdata->callback(&bufdata->arg); + + odp_buffer_free(bufdata->buf); + odp_timeout_free(tmo); + odp_timer_free(tim); +} + +/* timer_num defines the timer type. At the moment + there is only one timer. */ +int ofp_timer_ticks(int timer_num) +{ + (void)timer_num; + if (!shm) + return 0; + return odp_timer_current_tick(shm->socket_timer_pool); +} + +odp_timer_pool_t ofp_timer(int timer_num) +{ + (void)timer_num; + return shm->socket_timer_pool; +} diff --git a/src/ofp_tunthread.c b/src/ofp_tunthread.c new file mode 100644 index 00000000..c7f9c456 --- /dev/null +++ b/src/ofp_tunthread.c @@ -0,0 +1,294 @@ +/* Copyright (c) 2013, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "ofpi_portconf.h" +#include "ofpi_if_vlan.h" +#include "ofpi_debug.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_stat.h" +#include "ofpi_log.h" +#include "ofpi_util.h" + +#define FPM_DEBUG + +static int tap_alloc(char *dev, int flags) { + + struct ifreq ifr; + int fd, err; + const char *clonedev = "/dev/net/tun"; + + /* Arguments taken by the function: + * + * char *dev: the name of an interface (or '\0'). MUST have enough + * space to hold the interface name if '\0' is passed + * int flags: interface flags (eg, IFF_TUN etc.) + */ + + /* open the clone device */ + if( (fd = open(clonedev, O_RDWR)) < 0 ) { + printf("Cant open clone device\n"); + return fd; + } + + /* preparation of the struct ifr, of type "struct ifreq" */ + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = flags; /* IFF_TUN or IFFemacs_TAP, plus maybe IFF_NO_PI */ + + if (*dev) { + /* if a device name was specified, put it in the structure; otherwise, + * the kernel will try to allocate the "next" device of the + * specified type */ + strncpy(ifr.ifr_name, dev, IFNAMSIZ); + } + + /* try to create the device */ + if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { + printf("Cant create TUN device\n"); + close(fd); + return -1; + } + + /* if the operation was successful, write back the name of the + * interface to the variable "dev", so the caller can know + * it. Note that the caller MUST reserve space in *dev (see calling + * code below) */ + strcpy(dev, ifr.ifr_name); + + /* Let hardware do checksum */ + ioctl(fd, TUNSETNOCSUM, 1); + + /* DEBUG */ + ioctl(fd, TUNSETDEBUG, 1); + + + /* this is the special file descriptor that the caller will use to talk + * with the virtual interface */ + return fd; +} + +/* Return the fd of the tap */ +int sp_setup_device(struct ofp_ifnet *ifnet) { + int fd; + struct ifreq ifr; + int gen_fd; + char fp_name[IFNAMSIZ]; + struct sockaddr hwaddr; + + memset(&hwaddr, 0x0, sizeof(hwaddr)); + + /* Prepare FP device name*/ + snprintf(fp_name, IFNAMSIZ, "fp%d", ifnet->port); + fp_name[IFNAMSIZ - 1] = 0; + + /* Create device */ + fd = tap_alloc(fp_name, IFF_TAP | IFF_NO_PI); + if (fd < 0) { + printf("Error when creating TAP device"); + exit(-1); + } + + hwaddr.sa_family = AF_UNIX; + memcpy(hwaddr.sa_data, ifnet->mac, sizeof(ifnet->mac)); + + /* Set the same MAC address as reported by ODP */ + memset(&ifr, 0x0, sizeof(ifr)); + strncpy(ifr.ifr_name, fp_name, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ - 1] = 0; + memcpy(&ifr.ifr_hwaddr, &hwaddr, sizeof(ifr.ifr_hwaddr)); + + OFP_DBG("Fastpath device %s addr %s\n", + fp_name, ofp_print_mac((uint8_t *)ifr.ifr_hwaddr.sa_data)); + + /* Setting HW address of FP kernel representation */ + if (ioctl(fd, SIOCSIFHWADDR, &ifr) < 0) { + perror("SIOCSIFHWADDR"); + close(fd); + exit(-1); + } + + gen_fd = socket(PF_INET, SOCK_DGRAM, 0); + + /* Setting MTU of FP kernel representation */ + memset(&ifr, 0x0, sizeof(ifr)); + strncpy(ifr.ifr_name, fp_name, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ - 1] = 0; + ifr.ifr_mtu = ifnet->if_mtu; + OFP_DBG("Fastpath device %s MTU %i\n", fp_name, ifr.ifr_mtu); + + if (ioctl(gen_fd, SIOCSIFMTU, &ifr) < 0) { + perror("SIOCSIFMTU"); + close(gen_fd); + close(fd); + exit(-1); + } + + /* Get flags */ + memset(&ifr, 0x0, sizeof(ifr)); + strncpy(ifr.ifr_name, fp_name, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ - 1] = 0; + if (ioctl(gen_fd, SIOCGIFFLAGS, &ifr) < 0) { + perror("SIOCGIFFLAGS"); + close(gen_fd); + close(fd); + exit(-1); + } + + /* Set flags - ifconfig up*/ + if (!(ifr.ifr_flags & IFF_UP)) { + /* ifconfig up */ + ifr.ifr_flags |= IFF_UP; + if (ioctl(gen_fd, SIOCSIFFLAGS, &ifr) < 0) { + perror("SIOCSIFFLAGS"); + close(gen_fd); + close(fd); + exit(-1); + } + } + + /* Get interface index */ + memset(&ifr, 0x0, sizeof(ifr)); + strncpy(ifr.ifr_name, fp_name, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ - 1] = 0; + if (ioctl(gen_fd, SIOCGIFINDEX, &ifr) < 0) { + perror("SIOCSIFINDEX"); + close(gen_fd); + close(fd); + exit(-1); + } + + /* Store ifindex in viu and create table */ + ifnet->linux_index = ifr.ifr_ifindex; + ifnet->sp_status = OFP_SP_UP; + + close(gen_fd); + return fd; +} + +void * sp_rx_thread(void *ifnet_void) { + struct ofp_ifnet *ifnet = (struct ofp_ifnet *) ifnet_void; + struct ofp_ifnet *pkt_ifnet; + struct ofp_ether_header *eth; + struct ofp_ether_vlan_header *vlan_hdr; + uint16_t vlan = 0; + odp_packet_t pkt; + odp_event_t ev; + int len; + + (void) len; + + ofp_init_local(); + + while(1) { + ev = odp_queue_deq(ifnet->spq_def); + + if (ev == ODP_EVENT_INVALID || + odp_event_type(ev) != ODP_EVENT_PACKET) { + /* FIXME: Blocking queue popping in ODP ? */ + usleep(2000); + continue; + } + pkt = odp_packet_from_event(ev); + + if (ifnet->sp_status != OFP_SP_UP) { + odp_packet_free(pkt); + continue; + } + + eth = odp_packet_l2_ptr(pkt, NULL); + if (odp_be_to_cpu_16(eth->ether_type) == OFP_ETHERTYPE_VLAN) { + vlan_hdr = (struct ofp_ether_vlan_header *)eth; + vlan = OFP_EVL_VLANOFTAG(vlan_hdr->evl_tag); + } else { + vlan = 0; + } + + pkt_ifnet = ofp_get_ifnet(ifnet->port, vlan); + if (pkt_ifnet == NULL || + pkt_ifnet->sp_status != OFP_SP_UP){ + odp_packet_free(pkt); + continue; + } + + OFP_DEBUG_PACKET(OFP_DEBUG_PKT_RECV_KNI, pkt, ifnet->port); + + OFP_UPDATE_PACKET_STAT(rx_sp, 1); + + len = write(ifnet->fd, + (void *)odp_packet_l2_ptr(pkt, NULL), + (size_t)odp_packet_len(pkt)); + + odp_packet_free(pkt); + } + + /* Never returns */ + return NULL; +} + +void * sp_tx_thread(void *ifnet_void) { + int len; + odp_packet_t pkt; + uint8_t *buf_pnt; + struct ofp_ifnet *ifnet = (struct ofp_ifnet *)ifnet_void; + + ofp_init_local(); + + while (1) { + /* FIXME: coalese syscalls and speed this up */ + + pkt = odp_packet_alloc(ifnet->pkt_pool, + ifnet->if_mtu + OFP_ETHER_HDR_LEN + + OFP_ETHER_VLAN_ENCAP_LEN); + + if (pkt == ODP_PACKET_INVALID) { + OFP_ERR("packet alloc failed\n"); + usleep(1000); + continue; + } + + buf_pnt = odp_packet_data(pkt); + + /* Blocking read */ + drop_pkg: + len = read(ifnet->fd, buf_pnt, odp_packet_len(pkt)); + if (len <= 0) { + OFP_ERR("Slowpath read error\n"); + goto drop_pkg; + } + + if (len > 0) { + odp_packet_reset(pkt, (size_t)len); + odp_packet_l2_offset_set(pkt, 0); + + OFP_DEBUG_PACKET(OFP_DEBUG_PKT_SEND_KNI, pkt, + ifnet->port); + + OFP_UPDATE_PACKET_STAT(tx_sp, 1); + + /* Enqueue the packet to + fastpath device */ + odp_queue_enq(ifnet->outq_def, odp_packet_to_event(pkt)); + } + } + + /* Never returns */ + return NULL; +} diff --git a/src/ofp_udp6_usrreq.c b/src/ofp_udp6_usrreq.c new file mode 100644 index 00000000..db0225c1 --- /dev/null +++ b/src/ofp_udp6_usrreq.c @@ -0,0 +1,1249 @@ +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ + * $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ + */ + +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 + */ +#if 0 +#include +__FBSDID("$FreeBSD: release/9.1.0/sys/netinet6/udp6_usrreq.c 238247 2012-07-08 14:21:36Z bz $"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipfw.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef IPSEC +#include +#include +#endif /* IPSEC */ + +#include +#endif + + +#include +#include "api/ofp_types.h" +#include "api/ofp_errno.h" +#include "ofpi_socket.h" +#include "ofpi_socketvar.h" +#include "ofpi_sockopt.h" +#include "ofpi_sockstate.h" +#include "ofpi_protosw.h" +#include "ofpi_in.h" +#include "ofpi_in_pcb.h" +#include "ofpi_udp.h" +#include "ofpi_udp_var.h" +#include "ofpi_ip.h" + +#include "ofpi_ip6protosw.h" +#include "ofpi_ip6_var.h" +#include "ofpi_in6_pcb.h" +#include "ofpi_ip6.h" +#include "ofpi_udp6_var.h" +#include "ofpi_pkt_processing.h" +#include "ofpi_hook.h" +#include "ofpi_util.h" + +/* + * UDP protocol implementation. + * Per RFC 768, August, 1980. + */ + +#define UDPSTAT_INC(x) + +#define log(...) + +extern struct protosw ofp_inetsw[]; +extern struct inpcbinfo ofp_udbinfo; + +extern int ofp_udp_log_in_vain; +extern int ofp_udp_blackhole; + +#if 0 +static void udp6_detach(struct socket *so); +#endif + +static void +udp6_append(struct inpcb *inp, odp_packet_t pkt, int off, + struct ofp_sockaddr_in6 *fromsa) +{ + struct socket *so; + odp_packet_t opts = ODP_PACKET_INVALID; + + (void)off; + + INP_LOCK_ASSERT(inp); + +#ifdef IPSEC + /* Check AH/ESP integrity. */ + if (ipsec6_in_reject(n, inp)) { + m_freem(n); + V_ipsec6stat.in_polvio++; + return; + } +#endif /* IPSEC */ +#ifdef MAC + if (mac_inpcb_check_deliver(inp, n) != 0) { + m_freem(n); + return; + } +#endif + +#if 0 + if (inp->inp_flags & INP_CONTROLOPTS || + inp->inp_socket->so_options & SO_TIMESTAMP) + ip6_savecontrol(inp, n, &opts); + m_adj(n, off + sizeof(struct udphdr)); +#endif + + so = inp->inp_socket; + + SOCKBUF_RLOCK(&so->so_rcv); + /* save sender data where L2 & L3 headers used to be */ + memcpy(odp_packet_l2_ptr(pkt, NULL), fromsa, ((struct ofp_sockaddr *)fromsa)->sa_len); + + /* Offer to event function */ + if (packet_accepted_as_event_rlocked(&so->so_rcv, pkt)) { + SOCKBUF_RUNLOCK(&so->so_rcv); + return; + } + SOCKBUF_RUNLOCK(&so->so_rcv); + + SOCKBUF_LOCK(&so->so_rcv); + if (ofp_sbappendaddr_locked(&so->so_rcv, pkt, opts) == 0) { + SOCKBUF_UNLOCK(&so->so_rcv); + odp_packet_free(pkt); + if (opts != ODP_PACKET_INVALID) + odp_packet_free(opts); + UDPSTAT_INC(udps_fullsock); + } else + sorwakeup_locked(so); +} + +int +ofp_udp6_input(odp_packet_t pkt, int *offp, int *nxt) +{ + int off = *offp; + int protocol = IS_IPV6_UDP; + struct ofp_ifnet *ifp; + struct ofp_ip6_hdr *ip6; + struct ofp_udphdr *uh; + int res = OFP_PKT_CONTINUE; + int plen, ulen; + /*uint16_t uh_sum;*/ + struct ofp_sockaddr_in6 fromsa; + struct inpcb *inp; + struct udpcb *up; + int uh_sum; + +#if 0 +#ifdef IPFIREWALL_FORWARD + struct m_tag *fwd_tag; +#endif +#endif + *nxt = OFP_IPPROTO_DONE; + + OFP_HOOK(OFP_HOOK_LOCAL, pkt, &protocol, &res); + if (res != OFP_PKT_CONTINUE) + return res; + + ifp = odp_packet_user_ptr(pkt); + ip6 = (struct ofp_ip6_hdr *)odp_packet_l3_ptr(pkt, NULL); + if (odp_packet_len(pkt) < off + sizeof(struct ofp_udphdr)) + return OFP_PKT_DROP; + + odp_packet_l4_offset_set(pkt, odp_packet_l3_offset(pkt) + off); + + uh = (struct ofp_udphdr *)((uint8_t *)ip6 + off); + + UDPSTAT_INC(udps_ipackets); + + /* + * Destination port of 0 is illegal, based on RFC768. + */ + if (uh->uh_dport == 0) + goto badunlocked; + + plen = odp_be_to_cpu_16(ip6->ofp_ip6_plen) - off + sizeof(*ip6); + ulen = odp_be_to_cpu_16((u_short)uh->uh_ulen); + + if (plen != ulen) { + UDPSTAT_INC(udps_badlen); + goto badunlocked; + } + + /* + * Checksum extended UDP header and data. + */ + if (uh->uh_sum == 0) { + UDPSTAT_INC(udps_nosum); + goto badunlocked; + } + + uh_sum = ofp_ip6_cksum(pkt, ulen, OFP_IPPROTO_UDP, 0); + if (uh_sum != 0) { + UDPSTAT_INC(udps_badsum); + goto badunlocked; + } + /* + * Construct sockaddr format source address. + */ + ofp_init_sin6(&fromsa, pkt); + fromsa.sin6_port = uh->uh_sport; + +#if 0 + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + struct inpcb *last; + struct ip6_moptions *imo; + + INP_INFO_RLOCK(&V_udbinfo); + /* + * In the event that laddr should be set to the link-local + * address (this happens in RIPng), the multicast address + * specified in the received packet will not match laddr. To + * handle this situation, matching is relaxed if the + * receiving interface is the same as one specified in the + * socket and if the destination multicast address matches + * one of the multicast groups specified in the socket. + */ + + /* + * KAME note: traditionally we dropped udpiphdr from mbuf + * here. We need udphdr for IPsec processing so we do that + * later. + */ + last = NULL; + OFP_LIST_FOREACH(inp, &V_udb, inp_list) { + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (inp->inp_lport != uh->uh_dport) + continue; + if (inp->inp_fport != 0 && + inp->inp_fport != uh->uh_sport) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, + &ip6->ip6_dst)) + continue; + } + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, + &ip6->ip6_src) || + inp->inp_fport != uh->uh_sport) + continue; + } + + /* + * XXXRW: Because we weren't holding either the inpcb + * or the hash lock when we checked for a match + * before, we should probably recheck now that the + * inpcb lock is (supposed to be) held. + */ + + /* + * Handle socket delivery policy for any-source + * and source-specific multicast. [RFC3678] + */ + imo = inp->in6p_moptions; + if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + struct sockaddr_in6 mcaddr; + int blocked; + + INP_RLOCK(inp); + + bzero(&mcaddr, sizeof(struct sockaddr_in6)); + mcaddr.sin6_len = sizeof(struct sockaddr_in6); + mcaddr.sin6_family = AF_INET6; + mcaddr.sin6_addr = ip6->ip6_dst; + + blocked = im6o_mc_filter(imo, ifp, + (struct sockaddr *)&mcaddr, + (struct sockaddr *)&fromsa); + if (blocked != MCAST_PASS) { + if (blocked == MCAST_NOTGMEMBER) + IP6STAT_INC(ip6s_notmember); + if (blocked == MCAST_NOTSMEMBER || + blocked == MCAST_MUTED) + UDPSTAT_INC(udps_filtermcast); + INP_RUNLOCK(inp); /* XXX */ + continue; + } + + INP_RUNLOCK(inp); + } + if (last != NULL) { + struct mbuf *n; + + if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { + INP_RLOCK(last); + up = intoudpcb(last); + if (up->u_tun_func == NULL) { + udp6_append(last, n, off, &fromsa); + } else { + /* + * Engage the tunneling + * protocol we will have to + * leave the info_lock up, + * since we are hunting + * through multiple UDP's. + * + */ + (*up->u_tun_func)(n, off, last); + } + INP_RUNLOCK(last); + } + } + last = inp; + /* + * Don't look for additional matches if this one does + * not have either the SO_REUSEPORT or SO_REUSEADDR + * socket options set. This heuristic avoids + * searching through all pcbs in the common case of a + * non-shared port. It assumes that an application + * will never clear these options after setting them. + */ + if ((last->inp_socket->so_options & + (SO_REUSEPORT|SO_REUSEADDR)) == 0) + break; + } + + if (last == NULL) { + /* + * No matching pcb found; discard datagram. (No need + * to send an ICMP Port Unreachable for a broadcast + * or multicast datgram.) + */ + UDPSTAT_INC(udps_noport); + UDPSTAT_INC(udps_noportmcast); + goto badheadlocked; + } + INP_RLOCK(last); + INP_INFO_RUNLOCK(&V_udbinfo); + up = intoudpcb(last); + if (up->u_tun_func == NULL) { + udp6_append(last, m, off, &fromsa); + } else { + /* + * Engage the tunneling protocol. + */ + (*up->u_tun_func)(m, off, last); + } + INP_RUNLOCK(last); + return (IPPROTO_PKT_PROCESSED); + } +#endif + /* + * Locate pcb for datagram. + */ +#if 0 +#ifdef IPFIREWALL_FORWARD + /* + * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. + */ + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag != NULL) { + struct sockaddr_in6 *next_hop6; + + next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); + + /* + * Transparently forwarded. Pretend to be the destination. + * Already got one like this? + */ + inp = in6_pcblookup_mbuf(&V_udbinfo, + &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, + INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); + if (!inp) { + /* + * It's new. Try to find the ambushing socket. + * Because we've rewritten the destination address, + * any hardware-generated hash is ignored. + */ + inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src, + uh->uh_sport, &next_hop6->sin6_addr, + next_hop6->sin6_port ? htons(next_hop6->sin6_port) : + uh->uh_dport, INPLOOKUP_WILDCARD | + INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif); + } + /* Remove the tag from the packet. We don't need it anymore. */ + m_tag_delete(m, fwd_tag); + } else +#endif /* IPFIREWALL_FORWARD */ +#endif + + inp = ofp_in6_pcblookup(&ofp_udbinfo, &ip6->ip6_src, + uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, ifp); + + if (inp == NULL) { + if (ofp_udp_log_in_vain) { + OFP_INFO( + "Connection attempt to UDP [%s]:%d from [%s]:%d\n", + ofp_print_ip6_addr((uint8_t *)&ip6->ip6_dst), + odp_be_to_cpu_16(uh->uh_dport), + ofp_print_ip6_addr((uint8_t *)&ip6->ip6_src), + odp_be_to_cpu_16(uh->uh_sport)); + } + + UDPSTAT_INC(udps_noport); +#if 0 + if (m->m_flags & M_MCAST) { + printf("UDP6: M_MCAST is set in a unicast packet.\n"); + UDPSTAT_INC(udps_noportmcast); + goto badunlocked; + } +#endif + + if (ofp_udp_blackhole) + goto badunlocked; + +#if 0 + if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) + goto badunlocked; +#endif + +#ifndef SP + /* + icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); + */ + *nxt = OFP_IPPROTO_DONE; + return OFP_PKT_PROCESSED; +#else + *nxt = OFP_IPPROTO_SP; + return OFP_PKT_CONTINUE; +#endif + } + INP_RLOCK_ASSERT(inp); + + up = intoudpcb(inp); + if (up->u_tun_func == NULL) { + udp6_append(inp, pkt, off, &fromsa); + } else { + /* + * Engage the tunneling protocol. + */ + + (*up->u_tun_func)(pkt, off, inp); + } + + INP_RUNLOCK(inp); + return OFP_PKT_PROCESSED; + +#if 0 +badheadlocked: + INP_INFO_RUNLOCK(&ofp_udbinfo); +#endif +badunlocked: + return OFP_PKT_DROP; +} + +void +ofp_udp6_ctlinput(int cmd, struct ofp_sockaddr *sa, void *d) +{ + (void)cmd; + (void)sa; + (void)d; +#if 0 + struct udphdr uh; + struct ip6_hdr *ip6; + struct mbuf *m; + int off = 0; + struct ip6ctlparam *ip6cp = NULL; + const struct sockaddr_in6 *sa6_src = NULL; + void *cmdarg; + struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; + struct udp_portonly { + u_int16_t uh_sport; + u_int16_t uh_dport; + } *uhp; + + if (sa->sa_family != AF_INET6 || + sa->sa_len != sizeof(struct sockaddr_in6)) + return; + + if ((unsigned)cmd >= PRC_NCMDS) + return; + if (PRC_IS_REDIRECT(cmd)) + notify = in6_rtchange, d = NULL; + else if (cmd == PRC_HOSTDEAD) + d = NULL; + else if (inet6ctlerrmap[cmd] == 0) + return; + + /* if the parameter is from icmp6, decode it. */ + if (d != NULL) { + ip6cp = (struct ip6ctlparam *)d; + m = ip6cp->ip6c_m; + ip6 = ip6cp->ip6c_ip6; + off = ip6cp->ip6c_off; + cmdarg = ip6cp->ip6c_cmdarg; + sa6_src = ip6cp->ip6c_src; + } else { + m = NULL; + ip6 = NULL; + cmdarg = NULL; + sa6_src = &sa6_any; + } + + if (ip6) { + /* + * XXX: We assume that when IPV6 is non NULL, + * M and OFF are valid. + */ + + /* Check if we can safely examine src and dst ports. */ + if (m->m_pkthdr.len < off + sizeof(*uhp)) + return; + + bzero(&uh, sizeof(uh)); + m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); + + (void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport, + (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, + cmdarg, notify); + } else + (void) in6_pcbnotify(&V_udbinfo, sa, 0, + (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); +#endif +} + +#if 0 +static int +udp6_getcred(OFP_SYSCTL_HANDLER_ARGS) +{ + struct xucred xuc; + struct sockaddr_in6 addrs[2]; + struct inpcb *inp; + int error; + + error = priv_check(req->td, PRIV_NETINET_GETCRED); + if (error) + return (error); + + if (req->newlen != sizeof(addrs)) + return (OFP_EINVAL); + if (req->oldlen != sizeof(struct xucred)) + return (OFP_EINVAL); + error = SYSCTL_IN(req, addrs, sizeof(addrs)); + if (error) + return (error); + if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || + (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { + return (error); + } + inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr, + addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, + INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); + if (inp != NULL) { + INP_RLOCK_ASSERT(inp); + if (inp->inp_socket == NULL) + error = OFP_ENOENT; + if (error == 0) + error = cr_canseesocket(req->td->td_ucred, + inp->inp_socket); + if (error == 0) + cru2x(inp->inp_cred, &xuc); + INP_RUNLOCK(inp); + } else + error = OFP_ENOENT; + if (error == 0) + error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); + return (error); +} + +OFP_SYSCTL_PROC(_net_inet6_udp6, OFP_OID_AUTO, getcred, OFP_CTLTYPE_OPAQUE|OFP_CTLFLAG_RW, 0, + 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); +#endif + +static int +udp6_output(struct inpcb *inp, odp_packet_t m, struct ofp_sockaddr *addr6, + odp_packet_t control, struct thread *td) +{ + uint32_t ulen = (uint16_t)odp_packet_len(m); + uint32_t plen = sizeof(struct ofp_udphdr) + ulen; + struct ofp_ip6_hdr *ip6; + struct ofp_udphdr *udp6; + struct ofp_in6_addr *laddr, *faddr, in6a; + struct ofp_sockaddr_in6 *sin6 = NULL; + struct ofp_ifnet *oifp = NULL; + /*int scope_ambiguous = 0;*/ + u_short fport; + int error = 0; + /*struct ofp_ip6_pktopts *optp, opt;*/ + int af = OFP_AF_INET6, hlen = sizeof(struct ofp_ip6_hdr); + struct ofp_sockaddr_in6 tmp; + + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); + + if (addr6) { + /* addr6 has been validated in udp6_send(). */ + sin6 = (struct ofp_sockaddr_in6 *)addr6; + + /* protect *sin6 from overwrites */ + tmp = *sin6; + sin6 = &tmp; + + /* + * Application should provide a proper zone ID or the use of + * default zone IDs should be enabled. Unfortunately, some + * applications do not behave as it should, so we need a + * workaround. Even if an appropriate ID is not determined, + * we'll see if we can determine the outgoing interface. If we + * can, determine the zone ID based on the interface below. + */ +#if 0 /* No scope check */ + if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) + scope_ambiguous = 1; + if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) + return (error); +#endif + } +#if 0 /* no packet options*/ + if (control != ODP_PACKET_INVALID) { + if ((error = ip6_setpktopts(control, &opt, + inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0) + goto release; + optp = &opt; + } else + optp = inp->in6p_outputopts; +#endif + + if (sin6) { + faddr = &sin6->sin6_addr; + + /* + * IPv4 version of udp_output calls in_pcbconnect in this case, + * which needs splnet and affects performance. + * Since we saw no essential reason for calling in_pcbconnect, + * we get rid of such kind of logic, and call in6_selectsrc + * and in6_pcbsetport in order to fill in the local address + * and the local port. + */ + if (sin6->sin6_port == 0) { + error = OFP_EADDRNOTAVAIL; + goto release; + } + + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + /* how about ::ffff:0.0.0.0 case? */ + error = OFP_EISCONN; + goto release; + } + + fport = sin6->sin6_port; /* allow 0 port */ + + if (OFP_IN6_IS_ADDR_V4MAPPED(faddr)) { + if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { + /* + * I believe we should explicitly discard the + * packet when mapped addresses are disabled, + * rather than send the packet as an IPv6 one. + * If we chose the latter approach, the packet + * might be sent out on the wire based on the + * default route, the situation which we'd + * probably want to avoid. + * (20010421 jinmei@kame.net) + */ + error = OFP_EINVAL; + goto release; + } + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && + !OFP_IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { + /* + * when remote addr is an IPv4-mapped address, + * local addr should not be an IPv6 address, + * since you cannot determine how to map IPv6 + * source address to IPv4. + */ + error = OFP_EINVAL; + goto release; + } + + af = OFP_AF_INET; + } + + if (!OFP_IN6_IS_ADDR_V4MAPPED(faddr)) { + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + OFP_IFNET_LOCK_READ(ifaddr6_list); + if (!OFP_TAILQ_EMPTY(ofp_get_ifaddr6head())) { + memcpy(in6a.ofp_s6_addr, + OFP_TAILQ_FIRST(ofp_get_ifaddr6head())->ip6_addr, + 16); + faddr = &in6a; + error = 0; + } + OFP_IFNET_UNLOCK_READ(ifaddr6_list); + } else + error = ofp_in6_selectsrc(sin6, NULL, inp, + NULL, td->td_ucred, &oifp, &in6a); + if (error) + goto release; + (void)oifp; +#if 0 + if (oifp && scope_ambiguous && + (error = in6_setscope(&sin6->sin6_addr, + oifp, NULL))) { + goto release; + } +#endif + laddr = &in6a; + } else + laddr = &inp->in6p_laddr; /* XXX */ + + if (laddr == NULL) { + if (error == 0) + error = OFP_EADDRNOTAVAIL; + goto release; + } + if (inp->inp_lport == 0 && + (error = ofp_in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) { + /* Undo an address bind that may have occurred. */ + inp->in6p_laddr = ofp_in6addr_any; + goto release; + } + } else { + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + error = OFP_ENOTCONN; + goto release; + } + if (OFP_IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { + if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { + /* + * XXX: this case would happen when the + * application sets the V6ONLY flag after + * connecting the foreign address. + * Such applications should be fixed, + * so we bark here. + */ + log(LOG_INFO, "udp6_output: IPV6_V6ONLY " + "option was set for a connected socket\n"); + error = OFP_EINVAL; + goto release; + } else + af = OFP_AF_INET; + } + laddr = &inp->in6p_laddr; + faddr = &inp->in6p_faddr; + fport = inp->inp_fport; + } + + if (af == OFP_AF_INET) + hlen = sizeof(struct ofp_ip); + + switch (af) { + case OFP_AF_INET6: +/* fill ipv6 header */ + ip6 = odp_packet_push_head(m, sizeof(struct ofp_udphdr) + hlen); + if (!ip6) { + error = OFP_ENOBUFS; + goto release; + } + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, hlen); + + ip6->ofp_ip6_flow = inp->inp_flow & OFP_IPV6_FLOWINFO_MASK; + ip6->ofp_ip6_vfc = 0; + ip6->ofp_ip6_vfc &= ~OFP_IPV6_VERSION_MASK; + ip6->ofp_ip6_vfc |= OFP_IPV6_VERSION; + ip6->ofp_ip6_plen = odp_cpu_to_be_16((uint16_t) plen); + ip6->ofp_ip6_nxt = OFP_IPPROTO_UDP; + ip6->ofp_ip6_hlim = inp->in6p_hops; + ip6->ip6_src = *laddr; + ip6->ip6_dst = *faddr; + +/* fill udp header */ + udp6 = (struct ofp_udphdr *) (ip6 + 1); + udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ + udp6->uh_dport = fport; + if (plen <= 0xffff) + udp6->uh_ulen = odp_cpu_to_be_16((uint16_t)plen); + else + udp6->uh_ulen = 0; + udp6->uh_sum = 0; + + udp6->uh_sum = (uint16_t)ofp_ip6_cksum(m, plen, OFP_IPPROTO_UDP, 0); + UDPSTAT_INC(udps_opackets); +#if 0 + error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions, + NULL, inp); +#else + if (ofp_ip6_output(m, NULL) == OFP_PKT_DROP) + error = OFP_EIO; + else + error = 0; +#endif + break; + case OFP_AF_INET: + error = OFP_EAFNOSUPPORT; + goto release; + } + goto releaseopt; +release: + odp_packet_free(m); +releaseopt: + if (control != ODP_PACKET_INVALID) { +#if 0 + ip6_clearpktopts(&opt, -1); +#endif + odp_packet_free(control); + } + + return (error); +} + + +static void +udp6_abort(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_abort: inp == NULL")); + + if (inp->inp_vflag & INP_IPV4) { + struct pr_usrreqs *pru; + + pru = ofp_inetsw[ofp_ip_protox[OFP_IPPROTO_UDP]].pr_usrreqs; + (*pru->pru_abort)(so); + return; + } + + INP_WLOCK(inp); + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + INP_HASH_WLOCK(&ofp_udbinfo); + ofp_in6_pcbdisconnect(inp); + inp->in6p_laddr = ofp_in6addr_any; + INP_HASH_WUNLOCK(&ofp_udbinfo); + ofp_soisdisconnected(so); + } + INP_WUNLOCK(inp); +} + +static int +udp6_attach(struct socket *so, int proto, struct thread *td) +{ + struct inpcb *inp; + int error; + + (void)proto; + (void)td; + + inp = sotoinpcb(so); + KASSERT(inp == NULL, ("udp6_attach: inp != NULL")); + + /* Constant space reserved. ?? + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + error = soreserve(so, udp_sendspace, udp_recvspace); + if (error) + return (error); + }*/ + + INP_INFO_WLOCK(&ofp_udbinfo); + + error = ofp_in_pcballoc(so, &ofp_udbinfo); + if (error) { + INP_INFO_WUNLOCK(&ofp_udbinfo); + return (error); + } + + inp = (struct inpcb *)so->so_pcb; + inp->inp_vflag |= INP_IPV6; + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) + inp->inp_vflag |= INP_IPV4; + + inp->in6p_hops = V_ip6_defhlim; + inp->in6p_cksum = -1; /* just to be sure */ + /* + * XXX: ugly!! + * IPv4 TTL initialization is necessary for an IPv6 socket as well, + * because the socket may be bound to an IPv6 wildcard address, + * which may match an IPv4-mapped IPv6 address. + */ + inp->inp_ip_ttl = V_ip_defttl; + + /* Replaced by static allocation. + error = udp_newudpcb(inp); + if (error) { + in_pcbdetach(inp); + in_pcbfree(inp); + INP_INFO_WUNLOCK(&V_udbinfo); + return (error); + } + */ + inp->inp_ppcb = &inp->ppcb_space.udp_ppcb; + + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&ofp_udbinfo); + + return (0); +} + + +static int +udp6_bind(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); + + INP_WLOCK(inp); + INP_HASH_WLOCK(&ofp_udbinfo); + inp->inp_vflag &= ~INP_IPV4; + inp->inp_vflag |= INP_IPV6; + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + struct ofp_sockaddr_in6 *sin6_p; + + sin6_p = (struct ofp_sockaddr_in6 *)nam; + + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) + inp->inp_vflag |= INP_IPV4; + else if (OFP_IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { + struct ofp_sockaddr_in sin; + + ofp_in6_sin6_2_sin(&sin, sin6_p); + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; + error = ofp_in_pcbbind(inp, + (struct ofp_sockaddr *)&sin, + td->td_ucred); + goto out; + } + } + + error = ofp_in6_pcbbind(inp, nam, td->td_ucred); +out: + INP_HASH_WUNLOCK(&ofp_udbinfo); + INP_WUNLOCK(inp); + return (error); +} + +static void +udp6_close(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_close: inp == NULL")); + + if (inp->inp_vflag & INP_IPV4) { + struct pr_usrreqs *pru; + + pru = ofp_inetsw[ofp_ip_protox[OFP_IPPROTO_UDP]].pr_usrreqs; + (*pru->pru_disconnect)(so); + return; + } + + INP_WLOCK(inp); + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + INP_HASH_WLOCK(&ofp_udbinfo); + ofp_in6_pcbdisconnect(inp); + inp->in6p_laddr = ofp_in6addr_any; + INP_HASH_WUNLOCK(&ofp_udbinfo); + ofp_soisdisconnected(so); + } + INP_WUNLOCK(inp); +} + +static int +udp6_connect(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + struct ofp_sockaddr_in6 *sin6; + int error; + + inp = sotoinpcb(so); + sin6 = (struct ofp_sockaddr_in6 *)nam; + KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); + + /* + * XXXRW: Need to clarify locking of v4/v6 flags. + */ + INP_WLOCK(inp); + + if (OFP_IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + struct ofp_sockaddr_in sin; + + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { + error = OFP_EINVAL; + goto out; + } + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY) { + error = OFP_EISCONN; + goto out; + } + ofp_in6_sin6_2_sin(&sin, sin6); + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; +#if 0 + error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); + if (error != 0) + goto out; +#endif /* 0 */ + INP_HASH_WLOCK(&ofp_udbinfo); + error = ofp_in_pcbconnect(inp, (struct ofp_sockaddr *)&sin, + td->td_ucred); + INP_HASH_WUNLOCK(&ofp_udbinfo); + if (error == 0) + ofp_soisconnected(so); + goto out; + } + + if (!OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + error = OFP_EISCONN; + goto out; + } + inp->inp_vflag &= ~INP_IPV4; + inp->inp_vflag |= INP_IPV6; +#if 0 + error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); + if (error != 0) + goto out; +#endif + INP_HASH_WLOCK(&ofp_udbinfo); + error = ofp_in6_pcbconnect(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&ofp_udbinfo); + if (error == 0) + ofp_soisconnected(so); +out: + INP_WUNLOCK(inp); + return (error); +} + +static void +udp6_detach(struct socket *so) +{ + struct inpcb *inp; + struct udpcb *up; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_detach: inp == NULL")); + + INP_INFO_WLOCK(&ofp_udbinfo); + INP_WLOCK(inp); + up = intoudpcb(inp); + KASSERT(up != NULL, ("%s: up == NULL", __func__)); + ofp_in_pcbdetach(inp); + ofp_in_pcbfree(inp); + INP_INFO_WUNLOCK(&ofp_udbinfo); +} + + +static int +udp6_disconnect(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); + + if (inp->inp_vflag & INP_IPV4) { + struct pr_usrreqs *pru; + + pru = ofp_inetsw[ofp_ip_protox[OFP_IPPROTO_UDP]].pr_usrreqs; + (void)(*pru->pru_disconnect)(so); + return (0); + } + + + INP_WLOCK(inp); + + if (OFP_IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + INP_WUNLOCK(inp); + return OFP_ENOTCONN; + } + + INP_HASH_WLOCK(&ofp_udbinfo); + ofp_in6_pcbdisconnect(inp); + inp->in6p_laddr = ofp_in6addr_any; + INP_HASH_WUNLOCK(&ofp_udbinfo); + OFP_SOCK_LOCK(so); + so->so_state &= ~SS_ISCONNECTED; /* XXX */ + OFP_SOCK_UNLOCK(so); + INP_WUNLOCK(inp); + return (0); +} + +static int +udp6_send(struct socket *so, int flags, odp_packet_t m, + struct ofp_sockaddr *addr, odp_packet_t control, struct thread *td) +{ + struct inpcb *inp; + int error = 0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp6_send: inp == NULL")); + + INP_WLOCK(inp); + if (addr) { + if (addr->sa_len != sizeof(struct ofp_sockaddr_in6)) { + error = OFP_EINVAL; + goto bad; + } + if (addr->sa_family != OFP_AF_INET6) { + error = OFP_EAFNOSUPPORT; + goto bad; + } + } + + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + int hasv4addr; + struct ofp_sockaddr_in6 *sin6 = 0; + + if (addr == 0) + hasv4addr = (inp->inp_vflag & INP_IPV4); + else { + sin6 = (struct ofp_sockaddr_in6 *)addr; + hasv4addr = OFP_IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) + ? 1 : 0; + } + if (hasv4addr) { + struct pr_usrreqs *pru; + + /* + * XXXRW: We release UDP-layer locks before calling + * udp_send() in order to avoid recursion. However, + * this does mean there is a short window where inp's + * fields are unstable. Could this lead to a + * potential race in which the factors causing us to + * select the UDPv4 output routine are invalidated? + */ + INP_WUNLOCK(inp); + if (sin6) + ofp_in6_sin6_2_sin_in_sock(addr); + pru = ofp_inetsw[ + ofp_ip_protox[OFP_IPPROTO_UDP]].pr_usrreqs; + /* addr will just be freed in sendit(). */ + return ((*pru->pru_send)(so, flags, m, addr, control, + td)); + + } + } + + INP_HASH_WLOCK(&ofp_udbinfo); + error = udp6_output(inp, m, addr, control, td); + INP_HASH_WUNLOCK(&ofp_udbinfo); + + INP_WUNLOCK(inp); + return (error); + +bad: + INP_WUNLOCK(inp); + odp_packet_free(m); + return (error); +} + +struct pr_usrreqs ofp_udp6_usrreqs = { + .pru_abort = udp6_abort, + .pru_attach = udp6_attach, + .pru_bind = udp6_bind, + .pru_connect = udp6_connect, + .pru_control = NULL, /*in6_control,*/ + .pru_detach = udp6_detach, + .pru_disconnect = udp6_disconnect, + .pru_peeraddr = NULL, /*in6_mapped_peeraddr,*/ + .pru_send = udp6_send, + .pru_shutdown = ofp_udp_shutdown, + .pru_sockaddr = NULL, /*in6_mapped_sockaddr,*/ + .pru_soreceive = ofp_soreceive_dgram, + .pru_sosend = ofp_sosend_dgram, + .pru_sosetlabel = NULL, /*in_pcbsosetlabel,*/ + .pru_close = udp6_close +}; diff --git a/src/ofp_udp_usrreq.c b/src/ofp_udp_usrreq.c new file mode 100644 index 00000000..3740ab21 --- /dev/null +++ b/src/ofp_udp_usrreq.c @@ -0,0 +1,1398 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. + * Copyright (c) 2008 Robert N. M. Watson + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 + */ + +#include +#include + +#include "ofpi_errno.h" +#include "odp.h" + +#include "ofpi_errno.h" +#include "ofpi_in.h" +#include "ofpi_ip.h" +#include "ofpi_ip6.h" +#include "ofpi_udp.h" +#include "ofpi_icmp.h" +//#include "ofpi_socket.h" + +//#include "ofp_packet.h" + +#include "ofpi_sysctl.h" +#include "ofpi_in_pcb.h" +#include "ofpi_udp_var.h" +#include "ofpi_socketvar.h" +#include "ofpi_ip_var.h" +#include "ofpi_sockbuf.h" +#include "ofpi_socket.h" +#include "ofpi_sockstate.h" +#include "ofpi_protosw.h" +#include "ofpi_ethernet.h" +#include "ofpi_ioctl.h" + +#include "ofpi_pkt_processing.h" +#include "ofpi_log.h" +#include "ofpi_debug.h" +#include "ofpi_hook.h" +#include "ofpi_util.h" + +#define RETURN(_r) do { if (_r) OFP_LOG("RETURN %d\n", _r); return (_r); } while (0) + +#define UDPSTAT_INC(x) +#define log(...) + +#ifndef UDBHASHSIZE +#define UDBHASHSIZE 128 +#endif + +#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ +#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ + +#define M_BCAST 0x00000200 /* send/received as link-level broadcast */ +#define M_MCAST 0x00000400 /* send/received as link-level multicast */ + + +int ofp_udp_cksum = 1; +int ofp_udp_log_in_vain = 0; +int ofp_udp_blackhole = 0; +uint64_t ofp_udp_sendspace = 9216; /* really max datagram size */ +uint64_t ofp_udp_recvspace = 40 * (1024 + sizeof(struct ofp_sockaddr_in6)); +int ofp_max_linkhdr; +VNET_DEFINE(int, ofp_ip_defttl) = 255; + +struct inpcbhead ofp_udb; /* from udp_var.h */ +struct inpcbinfo ofp_udbinfo; +struct ofp_udpstat ofp_udpstat; /* from udp_var.h */ + +static void udp_detach(struct socket *so); +static int udp_output(struct inpcb *, odp_packet_t , struct ofp_sockaddr *, + odp_packet_t , struct thread *); + +uint8_t ofp_inetctlerrmap[OFP_PRC_NCMDS] = { + 0, 0, 0, 0, + 0, OFP_EMSGSIZE, OFP_EHOSTDOWN, OFP_EHOSTUNREACH, + OFP_EHOSTUNREACH, OFP_EHOSTUNREACH, OFP_ECONNREFUSED, OFP_ECONNREFUSED, + OFP_EMSGSIZE, OFP_EHOSTUNREACH, 0, 0, + 0, 0, OFP_EHOSTUNREACH, 0, + OFP_ENOPROTOOPT, OFP_ECONNREFUSED +}; + +OFP_SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, OFP_CTLFLAG_RW, + &ofp_udp_cksum, 0, "compute udp checksum"); + +OFP_SYSCTL_INT(_net_inet_udp, OFP_OID_AUTO, log_in_vain, OFP_CTLFLAG_RW, + &ofp_udp_log_in_vain, 0, "Log all incoming UDP packets"); + +OFP_SYSCTL_INT(_net_inet_udp, OFP_OID_AUTO, blackhole, OFP_CTLFLAG_RW, + &ofp_udp_blackhole, 0, + "Do not send port unreachables for refused connects"); + +OFP_SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, OFP_CTLFLAG_RW, + &ofp_udp_sendspace, 0, "Maximum outgoing UDP datagram size"); + +OFP_SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, OFP_CTLFLAG_RW, + &ofp_udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); + +static int +udp_inpcb_init(void *mem, int size, int flags) +{ + struct inpcb *inp; + + (void)size; + (void)flags; + + inp = mem; + INP_LOCK_INIT(inp, "inp", "udpinp"); + return (0); +} + +void +ofp_udp_init(void) +{ + INP_INFO_LOCK_INIT(&ofp_udbinfo, 0); + + ofp_in_pcbinfo_init(&ofp_udbinfo, "udp", &ofp_udb, UDBHASHSIZE, UDBHASHSIZE, + "udp_inpcb", udp_inpcb_init, NULL, 0, + IPI_HASHFIELDS_2TUPLE); +} + +/* + * Subroutine of ofp_udp_input(), which appends the provided mbuf chain to the + * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that + * contains the source address. If the socket ends up being an IPv6 socket, + * udp_append() will convert to a sockaddr_in6 before passing the address + * into the socket code. + */ +static void +udp_append(struct inpcb *inp, struct ofp_ip *ip, odp_packet_t n, int off, + struct ofp_sockaddr_in *udp_in) +{ + struct ofp_sockaddr *append_sa; + struct socket *so; + odp_packet_t opts = ODP_PACKET_INVALID; + struct ofp_sockaddr_in6 udp_in6; + struct udpcb *up; + + (void)ip; + (void)udp_in6; + + INP_LOCK_ASSERT(inp); + + /* + * Engage the tunneling protocol. + */ + up = intoudpcb(inp); + if (up->u_tun_func != NULL) { + (*up->u_tun_func)(n, off, inp); + return; + } + + if (n == ODP_PACKET_INVALID) { + OFP_LOG("n == ODP_PACKET_INVALID\n"); + return; + } + + off += sizeof(struct ofp_udphdr); + + if (inp->inp_flags & INP_CONTROLOPTS || + inp->inp_socket->so_options & (OFP_SO_TIMESTAMP | OFP_SO_BINTIME)) { +#ifdef _INET6 + if (inp->inp_vflag & INP_IPV6) + (void)ip6_savecontrol_v4(inp, n, &opts, NULL); + else + ip_savecontrol(inp, &opts, ip, n); +#endif + } +#ifdef _INET6 + if (inp->inp_vflag & INP_IPV6) { + bzero(&udp_in6, sizeof(udp_in6)); + udp_in6.sin6_len = sizeof(udp_in6); + udp_in6.sin6_family = OFP_AF_INET6; + in6_sin_2_v4mapsin6(udp_in, &udp_in6); + append_sa = (struct ofp_sockaddr *)&udp_in6; + } else +#endif + append_sa = (struct ofp_sockaddr *)udp_in; + //odp_packet_seg_pull_head(n, odp_packet_seg(n, 0), off); + //odp_packet_adj(n, off); + + so = inp->inp_socket; + + SOCKBUF_RLOCK(&so->so_rcv); + /* save sender data where L2 & L3 headers used to be */ + memcpy(odp_packet_l2_ptr(n, NULL), append_sa, append_sa->sa_len); + + /* Offer to event function */ + if (packet_accepted_as_event_rlocked(&so->so_rcv, n)) { + SOCKBUF_RUNLOCK(&so->so_rcv); + return; + } + SOCKBUF_RUNLOCK(&so->so_rcv); + + SOCKBUF_LOCK(&so->so_rcv); + if (ofp_sbappendaddr_locked(&so->so_rcv, n, opts) == 0) { + SOCKBUF_UNLOCK(&so->so_rcv); + odp_packet_free(n); + if (opts != ODP_PACKET_INVALID) + odp_packet_free(opts); + UDPSTAT_INC(udps_fullsock); + } else { + sorwakeup_locked(so); + } +} + +int +ofp_udp_input(odp_packet_t m, int off) +{ + + int iphlen = off; + int protocol = IS_IPV4_UDP; + struct ofp_ip *ip; + struct ofp_udphdr *uh; + struct ofp_ifnet *ifp; + struct inpcb *inp; + //int len; + int res; +#ifndef SP + struct ofp_ip save_ip; +#endif /* SP*/ + struct ofp_sockaddr_in udp_in; + + OFP_HOOK(OFP_HOOK_LOCAL, m, &protocol, &res); + if (res != OFP_PKT_CONTINUE) + return res; + + ifp = odp_packet_user_ptr(m); + UDPSTAT_INC(udps_ipackets); + + /* + * Strip IP options, if any; should skip this, make available to + * user, and use on returned packets, but we don't yet have a way to + * check the checksum with options still present. + */ +#if 0 /* HJo: FIX */ + if (iphlen > sizeof (struct ofp_ip)) { + ip_stripoptions(m, (odp_packet_t )0); + iphlen = sizeof(struct ofp_ip); + } +#endif + /* + * Get IP and UDP header together in first mbuf. + */ + ip = (struct ofp_ip *)odp_packet_l3_ptr(m, NULL); + if (odp_packet_len(m) < iphlen + sizeof(struct ofp_udphdr)) { +#if 0 + if ((m = odp_packet_ensure_contiguous(m, iphlen + + sizeof(struct ofp_udphdr))) == 0) { + UDPSTAT_INC(udps_hdrops); + return; + } + ip = (struct ofp_ip *)odp_packet_data(m); +#else + return OFP_PKT_CONTINUE; +#endif + } + uh = (struct ofp_udphdr *)((char *)ip + iphlen); + + /* + * Destination port of 0 is illegal, based on RFC768. + */ + if (uh->uh_dport == 0) + goto badunlocked; + + /* + * Construct ofp_sockaddr format source address. Stuff source address + * and datagram in user buffer. + */ + bzero(&udp_in, sizeof(udp_in)); + udp_in.sin_len = sizeof(udp_in); + udp_in.sin_family = OFP_AF_INET; + udp_in.sin_port = uh->uh_sport; + udp_in.sin_addr = ip->ip_src; + + /* + * Make mbuf data length reflect UDP length. If not enough data to + * reflect UDP length, drop. + */ +#if 0 + len = odp_be_to_cpu_16((uint16_t)uh->uh_ulen); + + if (ip->ip_len != len) { + if (len > ip->ip_len || len < sizeof(struct ofp_udphdr)) { + UDPSTAT_INC(udps_badlen); + goto badunlocked; + } + //odp_packet_seg_pull_head(m, odp_packet_seg(m, 0), len - ip->ip_len); + odp_packet_adj(m, len - ip->ip_len); + /* ip->ip_len = len; */ + } +#endif + +#ifndef SP + /* + * Save a copy of the IP header in case we want restore it for + * sending an ICMP error message in response. + */ + if (!ofp_udp_blackhole) + save_ip = *ip; + else + memset(&save_ip, 0, sizeof(save_ip)); +#endif /*SP*/ + /* + * Checksum extended UDP header and data. + */ + if (uh->uh_sum) { +#if 0 + uint16_t uh_sum; + + if (odp_packet_csum_flags(m) & CSUM_DATA_VALID) { + if (odp_packet_csum_flags(m) & CSUM_PSEUDO_HDR) + uh_sum = odp_packet_csum_data(m); + else + uh_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, odp_cpu_to_be_32((uint16_t)len + + odp_packet_csum_data(m) + OFP_IPPROTO_UDP)); + uh_sum ^= 0xffff; + } else { + char b[9]; + + bcopy(((struct ipovly *)ip)->ih_x1, b, 9); + bzero(((struct ipovly *)ip)->ih_x1, 9); + ((struct ipovly *)ip)->ih_len = uh->uh_ulen; + uh_sum = in_cksum(m, len + sizeof (struct ofp_ip)); + bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); + } + if (uh_sum) { + UDPSTAT_INC(udps_badsum); + odp_packet_free(m)); + return(0); + } +#endif + } else { + UDPSTAT_INC(udps_nosum); + } + +#if 0 + if (OFP_IN_MULTICAST(odp_be_to_cpu_32(ip->ip_dst.s_addr)) || + in_broadcast(ip->ip_dst, ifp)) { + struct inpcb *last; + struct ip_moptions *imo; + + INP_INFO_RLOCK(&ofp_udbinfo); + last = NULL; + OFP_LIST_FOREACH(inp, &ofp_udb, inp_list) { + if (inp->inp_lport != uh->uh_dport) + continue; +#ifdef _INET6 + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_laddr.s_addr != OFP_INADDR_ANY && + inp->inp_laddr.s_addr != ip->ip_dst.s_addr) + continue; + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY && + inp->inp_faddr.s_addr != ip->ip_src.s_addr) + continue; + if (inp->inp_fport != 0 && + inp->inp_fport != uh->uh_sport) + continue; + + INP_RLOCK(inp); + + /* + * XXXRW: Because we weren't holding either the inpcb + * or the hash lock when we checked for a match + * before, we should probably recheck now that the + * inpcb lock is held. + */ + +#if 0 + /* + * Handle socket delivery policy for any-source + * and source-specific multicast. [RFC3678] + */ + imo = inp->inp_moptions; + if (IN_MULTICAST(odp_be_to_cpu_32(ip->ip_dst.s_addr))) { + struct ofp_sockaddr_in group; + int blocked; + if (imo == NULL) { + INP_RUNLOCK(inp); + continue; + } + bzero(&group, sizeof(struct ofp_sockaddr_in)); + group.sin_len = sizeof(struct ofp_sockaddr_in); + group.sin_family = OFP_AF_INET; + group.sin_addr = ip->ip_dst; + + blocked = imo_multi_filter(imo, ifp, + (struct ofp_sockaddr *)&group, + (struct ofp_sockaddr *)&udp_in); + if (blocked != MCAST_PASS) { + if (blocked == MCAST_NOTGMEMBER) + IPSTAT_INC(ips_notmember); + if (blocked == MCAST_NOTSMEMBER || + blocked == MCAST_MUTED) + UDPSTAT_INC(udps_filtermcast); + INP_RUNLOCK(inp); + continue; + } + } +#endif + +#if 0 + if (last != NULL) { + odp_packet_t n; + + n = m_copy(m, 0, M_COPYALL); + udp_append(last, ip, n, iphlen, &udp_in); + INP_RUNLOCK(last); + } +#endif + last = inp; + /* + * Don't look for additional matches if this one does + * not have either the OFP_SO_REUSEPORT or OFP_SO_REUSEADDR + * socket options set. This heuristic avoids + * searching through all pcbs in the common case of a + * non-shared port. It assumes that an application + * will never clear these options after setting them. + */ + if ((last->inp_socket->so_options & + (OFP_SO_REUSEPORT|OFP_SO_REUSEADDR)) == 0) + break; + } + + if (last == NULL) { + /* + * No matching pcb found; discard datagram. (No need + * to send an ICMP Port Unreachable for a broadcast + * or multicast datgram.) + */ + UDPSTAT_INC(udps_noportbcast); + if (inp) + INP_RUNLOCK(inp); + INP_INFO_RUNLOCK(&ofp_udbinfo); + goto badunlocked; + } + udp_append(last, ip, m, iphlen, &udp_in); + INP_RUNLOCK(last); + INP_INFO_RUNLOCK(&ofp_udbinfo); + return; + } /* Multicast */ +#endif + + /* + * Locate pcb for datagram. + */ + inp = ofp_in_pcblookup(&ofp_udbinfo, ip->ip_src, uh->uh_sport, + ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | + INPLOOKUP_RLOCKPCB, ifp); + + if (inp == NULL) { + if (ofp_udp_log_in_vain) { + /* LOG */ + OFP_LOG("Connection attempt to UDP %s:%d from %s:%d\n", + ofp_print_ip_addr(ip->ip_dst.s_addr), + odp_be_to_cpu_16(uh->uh_dport), + ofp_print_ip_addr(ip->ip_src.s_addr), + odp_be_to_cpu_16(uh->uh_sport)); + } + UDPSTAT_INC(udps_noport); + /* HJo + if (odp_packet_flags(m) & (M_BCAST | M_MCAST)) { + UDPSTAT_INC(udps_noportbcast); + goto badunlocked; + } + */ + if (ofp_udp_blackhole) + goto badunlocked; +#if 0 + if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) + goto badunlocked; +#endif + +#ifndef SP + *ip = save_ip; + ip->ip_len += iphlen; + ofp_icmp_error(m, OFP_ICMP_UNREACH, OFP_ICMP_UNREACH_PORT, 0, 0); + return OFP_PKT_PROCESSED; +#else + return OFP_PKT_CONTINUE; +#endif /* SP */ + } + + /* + * Check the minimum TTL for socket. + */ + INP_RLOCK_ASSERT(inp); + if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { + INP_RUNLOCK(inp); + odp_packet_free(m); + return OFP_PKT_PROCESSED; + } + + udp_append(inp, ip, m, iphlen, &udp_in); + INP_RUNLOCK(inp); + return OFP_PKT_PROCESSED; + +badunlocked: + OFP_LOG("badunlocked!\n"); + return OFP_PKT_DROP; +} + +/* + * Notify a udp user of an asynchronous error; just wake up so that they can + * collect error status. + */ +struct inpcb * +ofp_udp_notify(struct inpcb *inp, int err) +{ + /* + * While ofp_udp_ctlinput() always calls ofp_udp_notify() with a read lock + * when invoking it directly, in_pcbnotifyall() currently uses write + * locks due to sharing code with TCP. For now, accept either a read + * or a write lock, but a read lock is sufficient. + */ + INP_LOCK_ASSERT(inp); + + inp->inp_socket->so_error = err; + sorwakeup(inp->inp_socket); + sowwakeup(inp->inp_socket); + return (inp); +} + +void +ofp_udp_ctlinput(int cmd, struct ofp_sockaddr *sa, void *vip) +{ + (void)cmd; + (void)sa; + (void)vip; + +#if 0 + struct ofp_ip *ip = vip; + struct ofp_udphdr *uh; + struct ofp_in_addr faddr; + struct inpcb *inp; + + faddr = ((struct ofp_sockaddr_in *)sa)->sin_addr; + if (sa->sa_family != OFP_AF_INET || faddr.s_addr == OFP_INADDR_ANY) + return; + + /* + * Redirects don't need to be handled up here. + */ + if (PRC_IS_REDIRECT(cmd)) + return; + + /* + * Hostdead is ugly because it goes linearly through all PCBs. + * + * XXX: We never get this from ICMP, otherwise it makes an excellent + * DoS attack on machines with many connections. + */ + if (cmd == PRC_HOSTDEAD) + ip = NULL; + else if ((unsigned)cmd >= PRC_NCMDS || ofp_inetctlerrmap[cmd] == 0) + return; + if (ip != NULL) { + uh = (struct ofp_udphdr *)((char *)ip + (ip->ip_hl << 2)); + inp = ofp_in_pcblookup(&ofp_udbinfo, faddr, uh->uh_dport, + ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL); + if (inp != NULL) { + INP_RLOCK_ASSERT(inp); + if (inp->inp_socket != NULL) { +#if 0 + ofp_udp_notify(inp, ofp_inetctlerrmap[cmd]); +#endif + } + INP_RUNLOCK(inp); + } + } +#if 0 + else + in_pcbnotifyall(&ofp_udbinfo, faddr, ofp_inetctlerrmap[cmd], + ofp_udp_notify); +#endif +#endif +} + +#if 0 +static int +udp_pcblist(OFP_SYSCTL_HANDLER_ARGS) +{ + int error, i, n; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the PCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == 0) { + n = ofp_udbinfo.ipi_count; + n += imax(n / 8, 10); + req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); + return (0); + } + + if (req->newptr != 0) + return (OFP_EPERM); + + /* + * OK, now we're committed to doing something. + */ + INP_INFO_RLOCK(&ofp_udbinfo); + gencnt = V_udbinfo.ipi_gencnt; + n = V_udbinfo.ipi_count; + INP_INFO_RUNLOCK(&ofp_udbinfo); + + error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + + n * sizeof(struct xinpcb)); + if (error != 0) + return (error); + + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) + return (error); + + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) + return (OFP_ENOMEM); + + INP_INFO_RLOCK(&ofp_udbinfo); + for (inp = OFP_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; + inp = OFP_LIST_NEXT(inp, inp_list)) { + INP_WLOCK(inp); + if (inp->inp_gencnt <= gencnt && + cr_canseeinpcb(req->td->td_ucred, inp) == 0) { + ofp_in_pcbref(inp); + inp_list[i++] = inp; + } + INP_WUNLOCK(inp); + } + INP_INFO_RUNLOCK(&ofp_udbinfo); + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + INP_RLOCK(inp); + if (inp->inp_gencnt <= gencnt) { + struct xinpcb xi; + + bzero(&xi, sizeof(xi)); + xi.xi_len = sizeof xi; + /* XXX should avoid extra copy */ + bcopy(inp, &xi.xi_inp, sizeof *inp); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xi.xi_socket); + xi.xi_inp.inp_gencnt = inp->inp_gencnt; + INP_RUNLOCK(inp); + error = SYSCTL_OUT(req, &xi, sizeof xi); + } else + INP_RUNLOCK(inp); + } + INP_INFO_WLOCK(&ofp_udbinfo); + for (i = 0; i < n; i++) { + inp = inp_list[i]; + INP_RLOCK(inp); + if (!ofp_in_pcbrele_rlocked(inp)) + INP_RUNLOCK(inp); + } + INP_INFO_WUNLOCK(&ofp_udbinfo); + + if (!error) { + /* + * Give the user an updated idea of our state. If the + * generation differs from what we told her before, she knows + * that something happened while we were processing this + * request, and it might be necessary to retry. + */ + INP_INFO_RLOCK(&ofp_udbinfo); + xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = V_udbinfo.ipi_count; + INP_INFO_RUNLOCK(&ofp_udbinfo); + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + free(inp_list, M_TEMP); + return (error); +} + +OFP_SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, + OFP_CTLTYPE_OPAQUE | OFP_CTLFLAG_RD, NULL, 0, + udp_pcblist, "S,xinpcb", "List of active UDP sockets"); +#endif + +#if 0 +static int +udp_getcred(OFP_SYSCTL_HANDLER_ARGS) +{ + struct xucred xuc; + struct ofp_sockaddr_in addrs[2]; + struct inpcb *inp; + int error; + + error = priv_check(req->td, PRIV_NETINET_GETCRED); + if (error) + return (error); + error = SYSCTL_IN(req, addrs, sizeof(addrs)); + if (error) + return (error); + inp = ofp_in_pcblookup(&ofp_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, + addrs[0].sin_addr, addrs[0].sin_port, + INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); + if (inp != NULL) { + INP_RLOCK_ASSERT(inp); + if (inp->inp_socket == NULL) + error = OFP_ENOENT; + if (error == 0) + error = cr_canseeinpcb(req->td->td_ucred, inp); + if (error == 0) + cru2x(inp->inp_cred, &xuc); + INP_RUNLOCK(inp); + } else + error = OFP_ENOENT; + if (error == 0) + error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); + return (error); +} + +OFP_SYSCTL_PROC(_net_inet_udp, OFP_OID_AUTO, getcred, + OFP_CTLTYPE_OPAQUE|OFP_CTLFLAG_RW|OFP_CTLFLAG_PRISON, 0, 0, + udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); +#endif + +int +ofp_udp_ctloutput(struct socket *so, struct sockopt *sopt) +{ + int error = 0; + (void)so; + (void)sopt; +#if 0 + int optval; + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); + INP_WLOCK(inp); + if (sopt->sopt_level != OFP_IPPROTO_UDP) { + if (INP_CHECK_SOCKAF(so, OFP_AF_INET6)) { + INP_WUNLOCK(inp); + error = ip6_ctloutput(so, sopt); + } + else + { + INP_WUNLOCK(inp); + error = ip_ctloutput(so, sopt); + } + return (error); + } + + switch (sopt->sopt_dir) { + case SOPT_SET: + switch (sopt->sopt_name) { + case OFP_UDP_ENCAP: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + break; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); + INP_WLOCK(inp); + switch (optval) { + case 0: + /* Clear all UDP encap. */ + break; + default: + error = OFP_EINVAL; + break; + } + INP_WUNLOCK(inp); + break; + default: + INP_WUNLOCK(inp); + error = OFP_ENOPROTOOPT; + break; + } + break; + case SOPT_GET: + switch (sopt->sopt_name) { + default: + INP_WUNLOCK(inp); + error = OFP_ENOPROTOOPT; + break; + } + break; + } +#endif + return (error); +} + +#define goto_release do { if (1) OFP_LOG("GOTO release\n"); goto release; } while (0) + +#define UH_WLOCKED 2 +#define UH_RLOCKED 1 +#define UH_UNLOCKED 0 +static int +udp_output(struct inpcb *inp, odp_packet_t m, struct ofp_sockaddr *addr, + odp_packet_t control, struct thread *td) +{ + int error = 0; + int len = odp_packet_len(m); + struct ofp_in_addr faddr, laddr; + struct ofp_cmsghdr *cm; + struct ofp_sockaddr_in *sin, src; + int ipflags; + uint16_t fport, lport; + int unlock_udbinfo; + uint8_t tos; + + /* + * udp_output() may need to temporarily bind or connect the current + * inpcb. As such, we don't know up front whether we will need the + * pcbinfo lock or not. Do any work to decide what is needed up + * front before acquiring any locks. + */ + if (len + sizeof(struct udpiphdr) > OFP_IP_MAXPACKET) { + if (control != ODP_PACKET_INVALID) + odp_packet_free(control); + odp_packet_free(m); + return (OFP_EMSGSIZE); + } + + src.sin_family = 0; + INP_RLOCK(inp); + tos = inp->inp_ip_tos; + if (control != ODP_PACKET_INVALID) { + /* + * XXX: Currently, we assume all the optional information is + * stored in a single mbuf. + */ + + uint8_t *ctl_p = odp_packet_data(control); + unsigned int ctl_len = odp_packet_len(control); + + for (; ctl_len > 0; + ctl_p += OFP_CMSG_ALIGN(cm->cmsg_len), + ctl_len -= OFP_CMSG_ALIGN(cm->cmsg_len)) { + cm = (struct ofp_cmsghdr *)ctl_p; + if (ctl_len < sizeof(*cm) || cm->cmsg_len == 0 + || cm->cmsg_len > ctl_len) { + error = OFP_EINVAL; + break; + } + if (cm->cmsg_level != OFP_IPPROTO_IP) + continue; + + switch (cm->cmsg_type) { + case OFP_IP_SENDSRCADDR: + if (cm->cmsg_len != + OFP_CMSG_LEN(sizeof(struct ofp_in_addr))) { + error = OFP_EINVAL; + break; + } + bzero(&src, sizeof(src)); + src.sin_family = OFP_AF_INET; + src.sin_len = sizeof(src); + src.sin_port = inp->inp_lport; + src.sin_addr = + *(struct ofp_in_addr *)OFP_CMSG_DATA(cm); + break; + + case OFP_IP_TOS: + if (cm->cmsg_len != OFP_CMSG_LEN(sizeof(uint8_t))) { + error = OFP_EINVAL; + break; + } + tos = *(uint8_t *)OFP_CMSG_DATA(cm); + break; + + default: + error = OFP_ENOPROTOOPT; + break; + } + if (error) + break; + } + + odp_packet_free(control); + } + if (error) { + INP_RUNLOCK(inp); + odp_packet_free(m); + return (error); + } + + /* + * Depending on whether or not the application has bound or connected + * the socket, we may have to do varying levels of work. The optimal + * case is for a connected UDP socket, as a global lock isn't + * required at all. + * + * In order to decide which we need, we require stability of the + * inpcb binding, which we ensure by acquiring a read lock on the + * inpcb. This doesn't strictly follow the lock order, so we play + * the trylock and retry game; note that we may end up with more + * conservative locks than required the second time around, so later + * assertions have to accept that. Further analysis of the number of + * misses under contention is required. + * + * XXXRW: Check that hash locking update here is correct. + */ + sin = (struct ofp_sockaddr_in *)addr; + if (sin != NULL && + (inp->inp_laddr.s_addr == OFP_INADDR_ANY && inp->inp_lport == 0)) { + INP_RUNLOCK(inp); + INP_WLOCK(inp); + INP_HASH_WLOCK(&ofp_udbinfo); + unlock_udbinfo = UH_WLOCKED; + } else if ((sin != NULL && ( + (sin->sin_addr.s_addr == OFP_INADDR_ANY) || + (sin->sin_addr.s_addr == OFP_INADDR_BROADCAST) || + (inp->inp_laddr.s_addr == OFP_INADDR_ANY) || + (inp->inp_lport == 0))) || + (src.sin_family == OFP_AF_INET)) { + INP_HASH_RLOCK(&ofp_udbinfo); + unlock_udbinfo = UH_RLOCKED; + } else + unlock_udbinfo = UH_UNLOCKED; + + /* + * If the IP_SENDSRCADDR control message was specified, override the + * source address for this datagram. Its use is invalidated if the + * address thus specified is incomplete or clobbers other inpcbs. + */ + laddr = inp->inp_laddr; + lport = inp->inp_lport; + if (src.sin_family == OFP_AF_INET) { + INP_HASH_LOCK_ASSERT(&ofp_udbinfo); + if ((lport == 0) || + (laddr.s_addr == OFP_INADDR_ANY && + src.sin_addr.s_addr == OFP_INADDR_ANY)) { + error = OFP_EINVAL; + goto_release; + } + error = ofp_in_pcbbind_setup(inp, (struct ofp_sockaddr *)&src, + &laddr.s_addr, &lport, td->td_ucred); + if (error) + goto_release; + } + + /* + * If a UDP socket has been connected, then a local address/port will + * have been selected and bound. + * + * If a UDP socket has not been connected to, then an explicit + * destination address must be used, in which case a local + * address/port may not have been selected and bound. + */ + if (sin != NULL) { + INP_LOCK_ASSERT(inp); + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY) { + error = OFP_EISCONN; + goto_release; + } + + /* + * If a local address or port hasn't yet been selected, or if + * the destination address needs to be rewritten due to using + * a special INADDR_ constant, invoke ofp_in_pcbconnect_setup() + * to do the heavy lifting. Once a port is selected, we + * commit the binding back to the socket; we also commit the + * binding of the address if in jail. + * + * If we already have a valid binding and we're not + * requesting a destination address rewrite, use a fast path. + */ + if (inp->inp_laddr.s_addr == OFP_INADDR_ANY || + inp->inp_lport == 0 || + sin->sin_addr.s_addr == OFP_INADDR_ANY || + sin->sin_addr.s_addr == OFP_INADDR_BROADCAST) { + INP_HASH_LOCK_ASSERT(&ofp_udbinfo); + error = ofp_in_pcbconnect_setup(inp, addr, &laddr.s_addr, + &lport, &faddr.s_addr, &fport, NULL, + td->td_ucred); + if (error) + goto_release; + + /* + * XXXRW: Why not commit the port if the address is + * !OFP_INADDR_ANY? + */ + /* Commit the local port if newly assigned. */ + if (inp->inp_laddr.s_addr == OFP_INADDR_ANY && + inp->inp_lport == 0) { + INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(&ofp_udbinfo); +#if 0 + /* + * Remember addr if jailed, to prevent + * rebinding. + */ + if (prison_flag(td->td_ucred, PR_IP4)) + inp->inp_laddr = laddr; +#endif + inp->inp_lport = lport; + if (ofp_in_pcbinshash(inp) != 0) { + inp->inp_lport = 0; + error = OFP_EAGAIN; + goto_release; + } + inp->inp_flags |= INP_ANONPORT; + } + } else { + faddr = sin->sin_addr; + fport = sin->sin_port; + } + } else { + INP_LOCK_ASSERT(inp); + faddr = inp->inp_faddr; + fport = inp->inp_fport; + if (faddr.s_addr == OFP_INADDR_ANY) { + error = OFP_ENOTCONN; + goto_release; + } + } + + /* + * Calculate data length and get a mbuf for UDP, IP, and possible + * link-layer headers. Immediate slide the data pointer back forward + * since we won't use that space at this layer. + */ + struct ofp_ip *ip = odp_packet_push_head(m, sizeof(struct udpiphdr)); + if (!ip) { + error = OFP_ENOBUFS; + goto release; + } + + odp_packet_l3_offset_set(m, 0); + odp_packet_l4_offset_set(m, sizeof(struct ofp_ip)); + + struct ofp_udphdr *udp = (struct ofp_udphdr *) (ip + 1); + static uint16_t id = 0; + + ip->ip_hl = 5; + ip->ip_v = 4; + ip->ip_tos = tos; + ip->ip_len = odp_cpu_to_be_16(len + sizeof(struct ofp_ip) + + sizeof(struct ofp_udphdr)); + ip->ip_id = odp_cpu_to_be_16(id++); + ip->ip_off = 0; + ip->ip_ttl = inp->inp_ip_ttl; + ip->ip_p = OFP_IPPROTO_UDP; + ip->ip_src.s_addr = laddr.s_addr; + ip->ip_dst.s_addr = faddr.s_addr; + ip->ip_sum = 0; + + udp->uh_sport = lport; + udp->uh_dport = fport; + udp->uh_ulen = odp_cpu_to_be_16(len + sizeof(struct ofp_udphdr)); + udp->uh_sum = 0; + + /* + * Set the Don't Fragment bit in the IP header. + */ + if (inp->inp_flags & INP_DONTFRAG) + ip->ip_off |= OFP_IP_DF; + + ipflags = 0; + + if (inp->inp_socket->so_options & OFP_SO_DONTROUTE) + ipflags |= IP_ROUTETOIF; + if (inp->inp_socket->so_options & OFP_SO_BROADCAST) + ipflags |= IP_ALLOWBROADCAST; + if (inp->inp_flags & INP_ONESBCAST) + ipflags |= IP_SENDONES; + + /* + * Set up checksum and output datagram. + */ + ip->ip_sum = ofp_in_cksum((uint16_t *)ip, sizeof(*ip)); + udp->uh_sum = ofp_in4_cksum(m); +#if 0 + if (ofp_udp_cksum) { + if (inp->inp_flags & INP_ONESBCAST) + faddr.s_addr = OFP_INADDR_BROADCAST; + ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, + odp_cpu_to_be_16((uint16_t)len + sizeof(struct ofp_udphdr) + OFP_IPPROTO_UDP)); + + odp_packet_csum_flags(m) = CSUM_UDP; + odp_packet_set_csum_data(m, offsetof(struct ofp_udphdr, uh_sum)); + } else +#endif + UDPSTAT_INC(udps_opackets); + + if (unlock_udbinfo == UH_WLOCKED) + INP_HASH_WUNLOCK(&ofp_udbinfo); + else if (unlock_udbinfo == UH_RLOCKED) + INP_HASH_RUNLOCK(&ofp_udbinfo); + +#if 0 + error = ofp_ip_output(m, inp->inp_options, NULL, ipflags, + inp->inp_moptions, inp); +#else + if (ofp_ip_output(m, NULL) == OFP_PKT_DROP) + error = OFP_EIO; + else + error = 0; +#endif + if (unlock_udbinfo == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); + return (error); + +release: + if (unlock_udbinfo == UH_WLOCKED) { + INP_HASH_WUNLOCK(&ofp_udbinfo); + INP_WUNLOCK(inp); + } else if (unlock_udbinfo == UH_RLOCKED) { + INP_HASH_RUNLOCK(&ofp_udbinfo); + INP_RUNLOCK(inp); + } else + INP_RUNLOCK(inp); + odp_packet_free(m); + + return (error); +} + +static void +udp_abort(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_abort: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY) { + INP_HASH_WLOCK(&ofp_udbinfo); + ofp_in_pcbdisconnect(inp); + inp->inp_laddr.s_addr = OFP_INADDR_ANY; + INP_HASH_WUNLOCK(&ofp_udbinfo); + ofp_soisdisconnected(so); + } + INP_WUNLOCK(inp); +} + +static int +udp_attach(struct socket *so, int proto, struct thread *td) +{ + struct inpcb *inp; + int error; + + (void)proto; + (void)td; + + inp = sotoinpcb(so); + KASSERT(inp == NULL, ("udp_attach: inp != NULL")); + + /* HJo: Constant space reserved. + error = ofp_soreserve(so, ofp_udp_sendspace, ofp_udp_recvspace); + if (error) + return (error); + */ + + INP_INFO_WLOCK(&ofp_udbinfo); + + error = ofp_in_pcballoc(so, &ofp_udbinfo); + if (error) { + INP_INFO_WUNLOCK(&ofp_udbinfo); + return (error); + } + + inp = sotoinpcb(so); + inp->inp_vflag |= INP_IPV4; + inp->inp_ip_ttl = ofp_ip_defttl; + + /* HJo: Replaced by static allocation. + error = udp_newudpcb(inp); + if (error) { + ofp_in_pcbdetach(inp); + ofp_in_pcbfree(inp); + INP_INFO_WUNLOCK(&ofp_udbinfo); + return (error); + } + */ + inp->inp_ppcb = &inp->ppcb_space.udp_ppcb; + + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&ofp_udbinfo); + return (0); +} + +#if 0 +int +udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f) +{ + struct inpcb *inp; + struct udpcb *up; + + KASSERT(so->so_type == OFP_SOCK_DGRAM, + ("udp_set_kernel_tunneling: !dgram")); + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL")); + INP_WLOCK(inp); + up = intoudpcb(inp); + if (up->u_tun_func != NULL) { + INP_WUNLOCK(inp); + return (OFP_EBUSY); + } + up->u_tun_func = f; + INP_WUNLOCK(inp); + return (0); +} +#endif + +static int +udp_bind(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + int error; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_bind: inp == NULL")); + INP_WLOCK(inp); + INP_HASH_WLOCK(&ofp_udbinfo); + error = ofp_in_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&ofp_udbinfo); + INP_WUNLOCK(inp); + return (error); +} + +static void +udp_close(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_close: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY) { + INP_HASH_WLOCK(&ofp_udbinfo); + ofp_in_pcbdisconnect(inp); + inp->inp_laddr.s_addr = OFP_INADDR_ANY; + INP_HASH_WUNLOCK(&ofp_udbinfo); + ofp_soisdisconnected(so); + } + INP_WUNLOCK(inp); +} + +static int +udp_connect(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_connect: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_faddr.s_addr != OFP_INADDR_ANY) { + INP_WUNLOCK(inp); + return (OFP_EISCONN); + } + + /* HJo: + error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); + if (error != 0) { + INP_WUNLOCK(inp); + return (error); + } + */ + INP_HASH_WLOCK(&ofp_udbinfo); + error = ofp_in_pcbconnect(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&ofp_udbinfo); + if (error == 0) + ofp_soisconnected(so); + INP_WUNLOCK(inp); + return (error); +} + +static void +udp_detach(struct socket *so) +{ + struct inpcb *inp; + struct udpcb *up; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_detach: inp == NULL")); + KASSERT(inp->inp_faddr.s_addr == OFP_INADDR_ANY, + ("udp_detach: not disconnected")); + INP_INFO_WLOCK(&ofp_udbinfo); + INP_WLOCK(inp); + up = intoudpcb(inp); + KASSERT(up != NULL, ("%s: up == NULL", __func__)); + inp->inp_ppcb = NULL; + ofp_in_pcbdetach(inp); + ofp_in_pcbfree(inp); + INP_INFO_WUNLOCK(&ofp_udbinfo); +} + +static int +udp_disconnect(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_disconnect: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_faddr.s_addr == OFP_INADDR_ANY) { + INP_WUNLOCK(inp); + return (OFP_ENOTCONN); + } + INP_HASH_WLOCK(&ofp_udbinfo); + ofp_in_pcbdisconnect(inp); + inp->inp_laddr.s_addr = OFP_INADDR_ANY; + INP_HASH_WUNLOCK(&ofp_udbinfo); + OFP_SOCK_LOCK(so); +#if 1 /* HJo: FIX */ + so->so_state &= ~SS_ISCONNECTED; /* XXX */ +#endif + OFP_SOCK_UNLOCK(so); + INP_WUNLOCK(inp); + return (0); +} + +static int +udp_send(struct socket *so, int flags, odp_packet_t m, struct ofp_sockaddr *addr, + odp_packet_t control, struct thread *td) +{ + struct inpcb *inp; + + (void)flags; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_send: inp == NULL")); + return (udp_output(inp, m, addr, control, td)); +} + +int +ofp_udp_shutdown(struct socket *so) +{ + struct inpcb *inp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("ofp_udp_shutdown: inp == NULL")); + INP_WLOCK(inp); + ofp_socantsendmore(so); + INP_WUNLOCK(inp); + return (0); +} + +struct pr_usrreqs ofp_udp_usrreqs = { + .pru_abort = udp_abort, + .pru_attach = udp_attach, + .pru_bind = udp_bind, + .pru_connect = udp_connect, + .pru_control = ofp_in_control, + .pru_detach = udp_detach, + .pru_disconnect = udp_disconnect, + .pru_peeraddr = ofp_in_getpeeraddr, + .pru_send = udp_send, + .pru_soreceive = ofp_soreceive_dgram, + .pru_sosend = ofp_sosend_dgram, + .pru_shutdown = ofp_udp_shutdown, + .pru_sockaddr = ofp_in_getsockaddr, + .pru_sosetlabel = ofp_in_pcbsosetlabel, + .pru_close = udp_close, +}; diff --git a/src/ofp_uipc_domain.c b/src/ofp_uipc_domain.c new file mode 100644 index 00000000..050405e6 --- /dev/null +++ b/src/ofp_uipc_domain.c @@ -0,0 +1,554 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93 + */ +#if 0 +#include +__FBSDID("$FreeBSD: release/9.1.0/sys/kern/uipc_domain.c 237296 2012-06-20 09:38:35Z brueffer $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#endif + +#include +#include "ofpi_domain.h" +#include "ofpi_protosw.h" +#include "ofpi_ip6protosw.h" +#include "ofpi_socket.h" +#include "ofpi_util.h" + +#if 0 +/* + * System initialization + * + * Note: domain initialization takes place on a per domain basis + * as a result of traversing a SYSINIT linker set. Most likely, + * each domain would want to call DOMAIN_SET(9) itself, which + * would cause the domain to be added just after domaininit() + * is called during startup. + * + * See DOMAIN_SET(9) for details on its use. + */ + +static void domaininit(void *); +SYSINIT(domain, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, domaininit, NULL); + +static void domainfinalize(void *); +SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize, + NULL); + +static struct callout pffast_callout; +static struct callout pfslow_callout; + +static void pffasttimo(void *); +static void pfslowtimo(void *); + +struct domain *domains; /* registered protocol domains */ +int domain_init_status = 0; +static struct mtx dom_mtx; /* domain list lock */ +MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF); +#endif +/* + * Dummy protocol specific user requests function pointer array. + * All functions return OFP_EOPNOTSUPP. + */ +struct pr_usrreqs nousrreqs = { + .pru_accept = ofp_pru_accept_notsupp, + .pru_attach = ofp_pru_attach_notsupp, + .pru_bind = ofp_pru_bind_notsupp, + .pru_connect = ofp_pru_connect_notsupp, + .pru_connect2 = ofp_pru_connect2_notsupp, + .pru_control = ofp_pru_control_notsupp, + .pru_disconnect = ofp_pru_disconnect_notsupp, + .pru_listen = ofp_pru_listen_notsupp, + .pru_peeraddr = ofp_pru_peeraddr_notsupp, + .pru_rcvd = ofp_pru_rcvd_notsupp, + .pru_rcvoob = ofp_pru_rcvoob_notsupp, + .pru_send = ofp_pru_send_notsupp, + .pru_sense = ofp_pru_sense_null, + .pru_shutdown = ofp_pru_shutdown_notsupp, + .pru_sockaddr = ofp_pru_sockaddr_notsupp, + .pru_sosend = ofp_pru_sosend_notsupp, + .pru_soreceive = ofp_pru_soreceive_notsupp, + .pru_sopoll = ofp_pru_sopoll_notsupp, +}; + +static void +protosw_init(struct protosw *pr) +{ + struct pr_usrreqs *pu; + + pu = pr->pr_usrreqs; + KASSERT(pu != NULL, ("protosw_init: %ssw[%d] has no usrreqs!", + pr->pr_domain->dom_name, + (int)(pr - pr->pr_domain->dom_protosw))); + + /* + * Protocol switch methods fall into three categories: mandatory, + * mandatory but protosw_init() provides a default, and optional. + * + * For true protocols (i.e., pru_attach != NULL), KASSERT truly + * mandatory methods with no defaults, and initialize defaults for + * other mandatory methods if the protocol hasn't defined an + * implementation (NULL function pointer). + */ +#if 0 + if (pu->pru_attach != NULL) { + KASSERT(pu->pru_abort != NULL, + ("protosw_init: %ssw[%d] pru_abort NULL", + pr->pr_domain->dom_name, + (int)(pr - pr->pr_domain->dom_protosw))); + KASSERT(pu->pru_send != NULL, + ("protosw_init: %ssw[%d] pru_send NULL", + pr->pr_domain->dom_name, + (int)(pr - pr->pr_domain->dom_protosw))); + } +#endif + +#define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar) + DEFAULT(pu->pru_accept, ofp_pru_accept_notsupp); + DEFAULT(pu->pru_bind, ofp_pru_bind_notsupp); + DEFAULT(pu->pru_connect, ofp_pru_connect_notsupp); + DEFAULT(pu->pru_connect2, ofp_pru_connect2_notsupp); + DEFAULT(pu->pru_control, ofp_pru_control_notsupp); + DEFAULT(pu->pru_disconnect, ofp_pru_disconnect_notsupp); + DEFAULT(pu->pru_listen, ofp_pru_listen_notsupp); + DEFAULT(pu->pru_peeraddr, ofp_pru_peeraddr_notsupp); + DEFAULT(pu->pru_rcvd, ofp_pru_rcvd_notsupp); + DEFAULT(pu->pru_rcvoob, ofp_pru_rcvoob_notsupp); + DEFAULT(pu->pru_sense, ofp_pru_sense_null); + DEFAULT(pu->pru_shutdown, ofp_pru_shutdown_notsupp); + DEFAULT(pu->pru_sockaddr, ofp_pru_sockaddr_notsupp); + DEFAULT(pu->pru_sosend, ofp_pru_sosend_notsupp); + DEFAULT(pu->pru_soreceive, ofp_pru_soreceive_notsupp); + DEFAULT(pu->pru_sopoll, ofp_pru_sopoll_notsupp); +#undef DEFAULT + if (pr->pr_init) + (*pr->pr_init)(); +} + +/* + * Add a new protocol domain to the list of supported domains + * Note: you cant unload it again because a socket may be using it. + * XXX can't fail at this time. + */ +void +domain_init(void *arg) +{ + struct domain *dp = arg; + struct protosw *pr; + + if (dp->dom_init) + (*dp->dom_init)(); + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + protosw_init(pr); +#if 0 + /* + * update global information about maximums + */ + max_hdr = max_linkhdr + max_protohdr; + max_datalen = MHLEN - max_hdr; + if (max_datalen < 1) + panic("%s: max_datalen < 1", __func__); +#endif +} + +#if 0 +#ifdef VIMAGE +void +vnet_domain_init(void *arg) +{ + + /* Virtualized case is no different -- call init functions. */ + domain_init(arg); +} + +void +vnet_domain_uninit(void *arg) +{ + struct domain *dp = arg; + struct protosw *pr; + + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_destroy) + (*pr->pr_destroy)(); + if (dp->dom_destroy) + (*dp->dom_destroy)(); +} +#endif + +/* + * Add a new protocol domain to the list of supported domains + * Note: you cant unload it again because a socket may be using it. + * XXX can't fail at this time. + */ +void +domain_add(void *data) +{ + struct domain *dp; + + dp = (struct domain *)data; + mtx_lock(&dom_mtx); + dp->dom_next = domains; + domains = dp; + + KASSERT(domain_init_status >= 1, + ("attempt to domain_add(%s) before domaininit()", + dp->dom_name)); +#ifndef INVARIANTS + if (domain_init_status < 1) + printf("WARNING: attempt to domain_add(%s) before " + "domaininit()\n", dp->dom_name); +#endif +#ifdef notyet + KASSERT(domain_init_status < 2, + ("attempt to domain_add(%s) after domainfinalize()", + dp->dom_name)); +#else + if (domain_init_status >= 2) + printf("WARNING: attempt to domain_add(%s) after " + "domainfinalize()\n", dp->dom_name); +#endif + mtx_unlock(&dom_mtx); +} + +static void +socket_zone_change(void *tag) +{ + + uma_zone_set_max(socket_zone, maxsockets); +} + +/* ARGSUSED*/ +static void +domaininit(void *dummy) +{ + + /* + * Before we do any setup, make sure to initialize the + * zone allocator we get struct sockets from. + */ + socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(socket_zone, maxsockets); + EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, + EVENTHANDLER_PRI_FIRST); + + if (max_linkhdr < 16) /* XXX */ + max_linkhdr = 16; + + callout_init(&pffast_callout, CALLOUT_MPSAFE); + callout_init(&pfslow_callout, CALLOUT_MPSAFE); + + mtx_lock(&dom_mtx); + KASSERT(domain_init_status == 0, ("domaininit called too late!")); + domain_init_status = 1; + mtx_unlock(&dom_mtx); +} + +/* ARGSUSED*/ +static void +domainfinalize(void *dummy) +{ + + mtx_lock(&dom_mtx); + KASSERT(domain_init_status == 1, ("domainfinalize called too late!")); + domain_init_status = 2; + mtx_unlock(&dom_mtx); + + callout_reset(&pffast_callout, 1, pffasttimo, NULL); + callout_reset(&pfslow_callout, 1, pfslowtimo, NULL); +} +struct protosw * +pffindtype(int family, int type) +{ + struct domain *dp; + struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (0); +found: + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_type && pr->pr_type == type) + return (pr); + return (0); +} +#endif /* 0 */ + +struct protosw * +ofp_pffindproto(int family, int protocol, int type) +{ + struct protosw *pr = 0; + struct protosw *pr_end = 0; + + if (family == 0) + return 0; + + if (family == OFP_AF_INET) { + pr = ofp_inetdomain.dom_protosw; + pr_end = ofp_inetdomain.dom_protoswNPROTOSW; + } +#ifdef INET6 + else if (family == OFP_AF_INET6) { + pr = ofp_inet6domain.dom_protosw; + pr_end = ofp_inet6domain.dom_protoswNPROTOSW; + } +#endif /*INET6*/ + else + return 0; + + while (pr < pr_end) { + if ((pr->pr_type == type) && + ((protocol && (pr->pr_protocol == protocol)) || + (!protocol))) + return pr; + pr++; + } + return 0; +} +#if 0 +/* + * The caller must make sure that the new protocol is fully set up and ready to + * accept requests before it is registered. + */ +int +pf_proto_register(int family, struct protosw *npr) +{ + VNET_ITERATOR_DECL(vnet_iter); + struct domain *dp; + struct protosw *pr, *fpr; + + /* Sanity checks. */ + if (family == 0) + return (OFP_EPFNOSUPPORT); + if (npr->pr_type == 0) + return (OFP_EPROTOTYPE); + if (npr->pr_protocol == 0) + return (OFP_EPROTONOSUPPORT); + if (npr->pr_usrreqs == NULL) + return (OFP_ENXIO); + + /* Try to find the specified domain based on the family. */ + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (OFP_EPFNOSUPPORT); + +found: + /* Initialize backpointer to struct domain. */ + npr->pr_domain = dp; + fpr = NULL; + + /* + * Protect us against races when two protocol registrations for + * the same protocol happen at the same time. + */ + mtx_lock(&dom_mtx); + + /* The new protocol must not yet exist. */ + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { + if ((pr->pr_type == npr->pr_type) && + (pr->pr_protocol == npr->pr_protocol)) { + mtx_unlock(&dom_mtx); + return (OFP_EEXIST); /* XXX: Check only protocol? */ + } + /* While here, remember the first free spacer. */ + if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER)) + fpr = pr; + } + + /* If no free spacer is found we can't add the new protocol. */ + if (fpr == NULL) { + mtx_unlock(&dom_mtx); + return (OFP_ENOMEM); + } + + /* Copy the new struct protosw over the spacer. */ + bcopy(npr, fpr, sizeof(*fpr)); + + /* Job is done, no more protection required. */ + mtx_unlock(&dom_mtx); + + /* Initialize and activate the protocol. */ + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET_QUIET(vnet_iter); + protosw_init(fpr); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK(); + + return (0); +} + +/* + * The caller must make sure the protocol and its functions correctly shut down + * all sockets and release all locks and memory references. + */ +int +pf_proto_unregister(int family, int protocol, int type) +{ + struct domain *dp; + struct protosw *pr, *dpr; + + /* Sanity checks. */ + if (family == 0) + return (OFP_EPFNOSUPPORT); + if (protocol == 0) + return (OFP_EPROTONOSUPPORT); + if (type == 0) + return (OFP_EPROTOTYPE); + + /* Try to find the specified domain based on the family type. */ + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (OFP_EPFNOSUPPORT); + +found: + dpr = NULL; + + /* Lock out everyone else while we are manipulating the protosw. */ + mtx_lock(&dom_mtx); + + /* The protocol must exist and only once. */ + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { + if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) { + if (dpr != NULL) { + mtx_unlock(&dom_mtx); + return (OFP_EMLINK); /* Should not happen! */ + } else + dpr = pr; + } + } + + /* Protocol does not exist. */ + if (dpr == NULL) { + mtx_unlock(&dom_mtx); + return (OFP_EPROTONOSUPPORT); + } + + /* De-orbit the protocol and make the slot available again. */ + dpr->pr_type = 0; + dpr->pr_domain = dp; + dpr->pr_protocol = PROTO_SPACER; + dpr->pr_flags = 0; + dpr->pr_input = NULL; + dpr->pr_output = NULL; + dpr->pr_ctlinput = NULL; + dpr->pr_ctloutput = NULL; + dpr->pr_init = NULL; + dpr->pr_fasttimo = NULL; + dpr->pr_slowtimo = NULL; + dpr->pr_drain = NULL; + dpr->pr_usrreqs = &nousrreqs; + + /* Job is done, not more protection required. */ + mtx_unlock(&dom_mtx); + + return (0); +} + +void +pfctlinput(int cmd, struct sockaddr *sa) +{ + struct domain *dp; + struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_ctlinput) + (*pr->pr_ctlinput)(cmd, sa, (void *)0); +} + +void +pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam) +{ + struct domain *dp; + struct protosw *pr; + + if (!sa) + return; + for (dp = domains; dp; dp = dp->dom_next) { + /* + * the check must be made by xx_ctlinput() anyways, to + * make sure we use data item pointed to by ctlparam in + * correct way. the following check is made just for safety. + */ + if (dp->dom_family != sa->sa_family) + continue; + + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_ctlinput) + (*pr->pr_ctlinput)(cmd, sa, ctlparam); + } +} + +static void +pfslowtimo(void *arg) +{ + struct domain *dp; + struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_slowtimo) + (*pr->pr_slowtimo)(); + callout_reset(&pfslow_callout, hz/2, pfslowtimo, NULL); +} + +static void +pffasttimo(void *arg) +{ + struct domain *dp; + struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_fasttimo) + (*pr->pr_fasttimo)(); + callout_reset(&pffast_callout, hz/5, pffasttimo, NULL); +} +#endif + diff --git a/src/ofp_uipc_sockbuf.c b/src/ofp_uipc_sockbuf.c new file mode 100644 index 00000000..462afc03 --- /dev/null +++ b/src/ofp_uipc_sockbuf.c @@ -0,0 +1,639 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 + */ + +#include + +#include "ofpi_errno.h" +#include "ofpi_systm.h" +#include "ofpi_socketvar.h" +#include "ofpi_sockstate.h" +#include "ofpi_in_pcb.h" +#include "ofpi_in.h" +#include "ofpi_log.h" + + +/* + * Primitive routines for operating on socket buffers + */ + +/* + * Constants related to network buffer management. + * MCLBYTES must be no larger than PAGE_SIZE. + */ +#if 1 +#ifndef MSIZE +#define MSIZE 256 /* size of an mbuf */ +#endif /* MSIZE */ + +#ifndef MCLSHIFT +#define MCLSHIFT 11 /* convert bytes to mbuf clusters */ +#endif /* MCLSHIFT */ + +//#define MCLBYTES (1 << MCLSHIFT) /* size of an mbuf cluster */ +#endif + +uint64_t ofp_sb_max = SB_MAX; +uint64_t ofp_sb_max_adj = + (int64_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted ofp_sb_max */ + +static uint64_t sb_efficiency = 8; /* parameter for ofp_sbreserve() */ + +int packet_accepted_as_event_rlocked(struct sockbuf *sb, odp_packet_t pkt) +{ + struct socket *so_rcv_sock = sb->sb_socket; + struct ofp_sigevent *ev = &so_rcv_sock->so_sigevent; + struct ofp_sock_sigval ss_temp, *ss; + union ofp_sigval sv; + + if (!so_rcv_sock) + return 0; + + ss = &ss_temp; + sv.sival_ptr = ss; + + if (ev->ofp_sigev_notify) { + ss->pkt = pkt; + ss->event = OFP_EVENT_RECV; + ss->sockfd = so_rcv_sock->so_number; + ev->ofp_sigev_notify_function(sv); + if (ss->pkt == ODP_PACKET_INVALID) { + /* Callback function accepted the packet. */ + return 1; + } + } + return 0; +} + +static int packet_accepted_as_event(struct sockbuf *sb, odp_packet_t pkt) +{ + struct socket *so = sb->sb_socket; + if (!so) + return 0; + + struct ofp_sigevent *ev = &so->so_sigevent; + struct ofp_sock_sigval *ss = ev->ofp_sigev_value.sival_ptr; + + if (ev->ofp_sigev_notify && &(so->so_rcv) == sb) { + ss->pkt = pkt; + ss->event = OFP_EVENT_RECV; + ss->sockfd = so->so_number; + so->so_state |= SS_EVENT; + ev->ofp_sigev_notify_function(ev->ofp_sigev_value); + so->so_state &= ~SS_EVENT; + if (ss->pkt == ODP_PACKET_INVALID) { + /* Callback function accepted the packet. */ + return 1; + } + } + return 0; +} + +int ofp_sockbuf_put_last(struct sockbuf *sb, odp_packet_t pkt) +{ + /* Offer to event function */ + if (packet_accepted_as_event(sb, pkt)) + return 0; + + int next = sb->sb_put + 1; + if (next >= SOCKBUF_LEN) + next = 0; + + if (next == sb->sb_get) { + /* No more room. */ + ofp_sockbuf_packet_free(pkt); + OFP_LOG("NO MORE ROOM (next=%d)!\n", next); + return -1; + } + + sb->sb_mb[sb->sb_put] = pkt; + sb->sb_put = next; + sballoc(sb, pkt); + return 0; +} + +odp_packet_t ofp_sockbuf_get_first(struct sockbuf *sb) +{ + if (sb->sb_get == sb->sb_put) + return ODP_PACKET_INVALID; + + return sb->sb_mb[sb->sb_get]; +} + +odp_packet_t ofp_sockbuf_remove_first(struct sockbuf *sb) +{ + odp_packet_t pkt = ODP_PACKET_INVALID; + + if (sb->sb_get != sb->sb_put) { + pkt = sb->sb_mb[sb->sb_get]; + if (++sb->sb_get >= SOCKBUF_LEN) + sb->sb_get = 0; + } + return pkt; +} + +void ofp_sockbuf_packet_free(odp_packet_t pkt) +{ + odp_packet_free(pkt); +} + +void ofp_sockbuf_copy_out(struct sockbuf *sb, int off, int len, char *dst) +{ + int i = sb->sb_get, dstoff = 0; + + while (i != sb->sb_put) { + int plen = odp_packet_len(sb->sb_mb[i]); + if (off >= plen) { + off -= plen; + if (++i >= SOCKBUF_LEN) + i = 0; + } else + break; + } + + while (len && i != sb->sb_put) { + int plen = odp_packet_len(sb->sb_mb[i]) - off; + if (plen > len) + plen = len; + odp_packet_copydata_out(sb->sb_mb[i], off, plen, dst + dstoff); + off = 0; + len -= plen; + dstoff += plen; + + if (++i >= SOCKBUF_LEN) + i = 0; + } +} + +/* + * Append address and data, and optionally, control (ancillary) data to the + * receive queue of a socket. If present, m0 must include a packet header + * with total length. Returns 0 if no space in sockbuf or insufficient + * mbufs. + */ +int +ofp_sbappendaddr_locked(struct sockbuf *sb, + odp_packet_t pkt, odp_packet_t control) +{ + SOCKBUF_LOCK_ASSERT(sb); + + if (control != ODP_PACKET_INVALID) + odp_packet_free(control); + + sb->sb_mb[sb->sb_put++] = pkt; + if (sb->sb_put >= SOCKBUF_LEN) + sb->sb_put = 0; + + if (sb->sb_put == sb->sb_get) { + sb->sb_put--; + if (sb->sb_put < 0) + sb->sb_put = SOCKBUF_LEN-1; + OFP_LOG("Buffers full (sb_get=%d)! Max num = %d\n", + sb->sb_get, SOCKBUF_LEN); + return 0; /* buffers full */ + } + + sballoc(sb, pkt); + return (1); +} + +/* + * Free all mbufs in a sockbuf. Check that all resources are reclaimed. + */ +static void +sbflush_internal(struct sockbuf *sb) +{ + while (sb->sb_get != sb->sb_put) { + odp_packet_free(sb->sb_mb[sb->sb_get]); + if (++sb->sb_get >= SOCKBUF_LEN) + sb->sb_get = 0; + } +} + +void +ofp_sbflush_locked(struct sockbuf *sb) +{ + SOCKBUF_LOCK_ASSERT(sb); + sbflush_internal(sb); +} + +void +ofp_sbflush(struct sockbuf *sb) +{ + SOCKBUF_LOCK(sb); + ofp_sbflush_locked(sb); + SOCKBUF_UNLOCK(sb); +} + +/* + * This version of sbappend() should only be used when the caller absolutely + * knows that there will never be more than one record in the socket buffer, + * that is, a stream protocol (such as TCP). + */ +void +ofp_sbappendstream_locked(struct sockbuf *sb, odp_packet_t m) +{ + SOCKBUF_LOCK_ASSERT(sb); + + SBLASTMBUFCHK(sb); + + sb->sb_lastrecord = sb->sb_put; + ofp_sbcompress(sb, m, sb->sb_mbtail); + + SBLASTRECORDCHK(sb); +} + +/* + * This version of sbappend() should only be used when the caller absolutely + * knows that there will never be more than one record in the socket buffer, + * that is, a stream protocol (such as TCP). + */ +void +ofp_sbappendstream(struct sockbuf *sb, odp_packet_t m) +{ + SOCKBUF_LOCK(sb); + ofp_sbappendstream_locked(sb, m); + SOCKBUF_UNLOCK(sb); + } + +/* + * Append the data in mbuf chain (m) into the socket buffer sb following mbuf + * (n). If (n) is NULL, the buffer is presumed empty. + * + * When the data is compressed, mbufs in the chain may be handled in one of + * three ways: + * + * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no + * record boundary, and no change in data type). + * + * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into + * an mbuf already in the socket buffer. This can occur if an + * appropriate mbuf exists, there is room, and no merging of data types + * will occur. + * + * (3) The mbuf may be appended to the end of the existing mbuf chain. + * + * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as + * end-of-record. + */ +void +ofp_sbcompress(struct sockbuf *sb, odp_packet_t pkt, int n) +{ + (void)n; + SOCKBUF_LOCK_ASSERT(sb); + ofp_sockbuf_put_last(sb, pkt); +} + +/* + * Drop data from (the front of) a sockbuf. + */ +static void +sbdrop_internal(struct sockbuf *sb, int len) +{ + odp_packet_t pkt; + + while (len > 0) { + pkt = ofp_sockbuf_get_first(sb); + if (pkt == ODP_PACKET_INVALID) + return; + + int buflen = odp_packet_len(pkt); + if (buflen > len) { + odp_packet_pull_head(pkt, len); + sb->sb_cc -= len; + if (sb->sb_sndptroff != 0) + sb->sb_sndptroff -= len; + break; + } + len -= buflen; + pkt = ofp_sockbuf_remove_first(sb); + sbfree(sb, pkt); + ofp_sockbuf_packet_free(pkt); + } +} + +/* + * Drop data from (the front of) a sockbuf. + */ +void +ofp_sbdrop_locked(struct sockbuf *sb, int len) +{ + SOCKBUF_LOCK_ASSERT(sb); + + sbdrop_internal(sb, len); +} + +void +ofp_sbdrop(struct sockbuf *sb, int len) +{ + SOCKBUF_LOCK(sb); + ofp_sbdrop_locked(sb, len); + SOCKBUF_UNLOCK(sb); +} + +/* + * Drop a record off the front of a sockbuf and move the next record to the + * front. + */ +void +ofp_sbdroprecord_locked(struct sockbuf *sb) +{ + odp_packet_t pkt; + + SOCKBUF_LOCK_ASSERT(sb); + + pkt = ofp_sockbuf_remove_first(sb); + if (pkt != ODP_PACKET_INVALID) { + sbfree(sb, pkt); + odp_packet_free(pkt); + } +} + +void +ofp_socantsendmore_locked(struct socket *so) +{ + SOCKBUF_LOCK_ASSERT(&so->so_snd); + + so->so_snd.sb_state |= SBS_CANTSENDMORE; + sowwakeup_locked(so); +} + +void +ofp_socantsendmore(struct socket *so) +{ + SOCKBUF_LOCK(&so->so_snd); + ofp_socantsendmore_locked(so); +} + +void +ofp_socantrcvmore_locked(struct socket *so) +{ + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + + so->so_rcv.sb_state |= SBS_CANTRCVMORE; + sorwakeup_locked(so); +} + +void +ofp_socantrcvmore(struct socket *so) +{ + SOCKBUF_LOCK(&so->so_rcv); + ofp_socantrcvmore_locked(so); +} + +/* + * Wait for data to arrive at/drain from a socket buffer. + */ + +extern unsigned int sleep(unsigned int seconds); +int +ofp_sbwait(struct sockbuf *sb) +{ + SOCKBUF_LOCK_ASSERT(sb); + + sb->sb_flags |= SB_WAIT; + return (ofp_msleep(&sb->sb_cc, &sb->sb_mtx, + 0 /*HJo (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH*/, + "sbwait", + 1000000UL/HZ*sb->sb_timeo)); +} + +int +ofp_sblock(struct sockbuf *sb, int flags) +{ + KASSERT((flags & SBL_VALID) == flags, + ("ofp_sblock: flags invalid (0x%x)", flags)); + + if (flags & SBL_WAIT) { + if ((sb->sb_flags & SB_NOINTR) || + (flags & SBL_NOINTR)) { + odp_spinlock_lock(&sb->sb_sx); + return (0); + } + //OFP_LOG("lock: dont know what to do\n"); + //odp_spinlock_lock(&sb->sb_sx); + return 0; + /* HJo: What is this? (sx_xlock_sig(&sb->sb_sx));*/ + } else { + if (odp_spinlock_is_locked(&sb->sb_sx)) + return (OFP_EWOULDBLOCK); + return (0); + } +} + +void +ofp_sbunlock(struct sockbuf *sb) +{ + odp_spinlock_unlock(&sb->sb_sx); +} + +void +ofp_sowakeup(struct socket *so, struct sockbuf *sb) +{ + (void)so; + SOCKBUF_UNLOCK(sb); + + SOCKBUF_LOCK_ASSERT(sb); + + /*HJo selwakeuppri(&sb->sb_sel, PSOCK);*/ + ofp_wakeup(NULL); +#if 0 + if (!SEL_WAITING(&sb->sb_sel)) + sb->sb_flags &= ~SB_SEL; +#endif + + if (sb->sb_flags & SB_WAIT) { + ofp_wakeup(&sb->sb_cc); + } +#if 0 + KNOTE_LOCKED(&sb->sb_sel.si_note, 0); + if (sb->sb_upcall != NULL) { + ret = sb->sb_upcall(so, sb->sb_upcallarg, M_DONTWAIT); + if (ret == SU_ISCONNECTED) { + KASSERT(sb == &so->so_rcv, + ("OFP_SO_SND upcall returned SU_ISCONNECTED")); + ofp_soupcall_clear(so, OFP_SO_RCV); + } + } else + ret = SU_OK; + if (sb->sb_flags & SB_AIO) + aio_swake(so, sb); +#endif + + SOCKBUF_UNLOCK(sb); +#if 0 + if (ret == SU_ISCONNECTED) + ofp_soisconnected(so); + if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) + pgsigio(&so->so_sigio, SIGIO, 0); + mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); +#endif +} + +/* + * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't + * become limiting if buffering efficiency is near the normal case. + */ +int +ofp_sbreserve_locked(struct sockbuf *sb, uint64_t cc, struct socket *so, + struct thread *td) +{ + (void)so; + (void)td; + SOCKBUF_LOCK_ASSERT(sb); + + /* + * When a thread is passed, we take into account the thread's socket + * buffer size limit. The caller will generally pass curthread, but + * in the TCP input path, NULL will be passed to indicate that no + * appropriate thread resource limits are available. In that case, + * we don't apply a process limit. + */ + if (cc > ofp_sb_max_adj) + return (0); + sb->sb_hiwat = cc; + sb->sb_mbmax = min(cc * sb_efficiency, ofp_sb_max); + if (sb->sb_lowat > (int)sb->sb_hiwat) + sb->sb_lowat = sb->sb_hiwat; + return (1); +} + +int +ofp_sbreserve(struct sockbuf *sb, uint64_t cc, struct socket *so, + struct thread *td) +{ + int error; + + SOCKBUF_LOCK(sb); + error = ofp_sbreserve_locked(sb, cc, so, td); + SOCKBUF_UNLOCK(sb); + return (error); +} + +/* + * Socket buffer (struct sockbuf) utility routines. + * + * Each socket contains two socket buffers: one for sending data and one for + * receiving data. Each buffer contains a queue of mbufs, information about + * the number of mbufs and amount of data in the queue, and other fields + * allowing select() statements and notification on data availability to be + * implemented. + * + * Data stored in a socket buffer is maintained as a list of records. Each + * record is a list of mbufs chained together with the m_next field. Records + * are chained together with the m_nextpkt field. The upper level routine + * ofp_soreceive() expects the following conventions to be observed when placing + * information in the receive buffer: + * + * 1. If the protocol requires each message be preceded by the sender's name, + * then a record containing that name must be present before any + * associated data (mbuf's must be of type MT_SONAME). + * 2. If the protocol supports the exchange of ``access rights'' (really just + * additional data associated with the message), and there are ``rights'' + * to be received, then a record containing this data should be present + * (mbuf's must be of type MT_RIGHTS). + * 3. If a name or rights record exists, then it must be followed by a data + * record, perhaps of zero length. + * + * Before using a new socket structure it is first necessary to reserve + * buffer space to the socket, by calling ofp_sbreserve(). This should commit + * some of the available buffer space in the system buffer pool for the + * socket (currently, it does nothing but enforce limits). The space should + * be released by calling ofp_sbrelease() when the socket is destroyed. + */ +int +ofp_soreserve(struct socket *so, uint64_t sndcc, uint64_t rcvcc) +{ + struct thread *td = NULL /* HJo curthread*/; + + SOCKBUF_LOCK(&so->so_snd); + SOCKBUF_LOCK(&so->so_rcv); + if (ofp_sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) + goto bad; + if (ofp_sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) + goto bad2; + if (so->so_rcv.sb_lowat == 0) + so->so_rcv.sb_lowat = 1; + if (so->so_snd.sb_lowat == 0) + so->so_snd.sb_lowat = MCLBYTES; + if (so->so_snd.sb_lowat > (int)so->so_snd.sb_hiwat) + so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_snd); + return (0); +bad2: + ofp_sbrelease_locked(&so->so_snd, so); +bad: + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_snd); + return (OFP_ENOBUFS); +} + +/* + * Free mbufs held by a socket, and reserved mbuf space. + */ +void +ofp_sbrelease_internal(struct sockbuf *sb, struct socket *so) +{ + (void)so; + + sbflush_internal(sb); +#if 0 /* HJo */ + (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, + RLIM_INFINITY); +#else + sb->sb_hiwat = 0; +#endif + sb->sb_mbmax = 0; +} + +void +ofp_sbrelease_locked(struct sockbuf *sb, struct socket *so) +{ + SOCKBUF_LOCK_ASSERT(sb); + + ofp_sbrelease_internal(sb, so); +} + +void +ofp_sbrelease(struct sockbuf *sb, struct socket *so) +{ + SOCKBUF_LOCK(sb); + ofp_sbrelease_locked(sb, so); + SOCKBUF_UNLOCK(sb); +} + +void +ofp_sbdestroy(struct sockbuf *sb, struct socket *so) +{ + ofp_sbrelease_internal(sb, so); +} diff --git a/src/ofp_uipc_socket.c b/src/ofp_uipc_socket.c new file mode 100644 index 00000000..6784b1f3 --- /dev/null +++ b/src/ofp_uipc_socket.c @@ -0,0 +1,2843 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. + * Copyright (c) 2004 The FreeBSD Foundation + * Copyright (c) 2004-2008 Robert N. M. Watson + * Copyright (c) 2014, Nokia + * Copyright (c) 2014, Enea Software AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 + */ + +/* + * Comments on the socket life cycle: + * + * soalloc() sets of socket layer state for a socket, called only by + * ofp_socreate() and ofp_sonewconn(). Socket layer private. + * + * sodealloc() tears down socket layer state for a socket, called only by + * ofp_sofree() and ofp_sonewconn(). Socket layer private. + * + * pru_attach() associates protocol layer state with an allocated socket; + * called only once, may fail, aborting socket allocation. This is called + * from ofp_socreate() and ofp_sonewconn(). Socket layer private. + * + * pru_detach() disassociates protocol layer state from an attached socket, + * and will be called exactly once for sockets in which pru_attach() has + * been successfully called. If pru_attach() returned an error, + * pru_detach() will not be called. Socket layer private. + * + * pru_abort() and pru_close() notify the protocol layer that the last + * consumer of a socket is starting to tear down the socket, and that the + * protocol should terminate the connection. Historically, pru_abort() also + * detached protocol state from the socket state, but this is no longer the + * case. + * + * ofp_socreate() creates a socket and attaches protocol state. This is a public + * interface that may be used by socket layer consumers to create new + * sockets. + * + * ofp_sonewconn() creates a socket and attaches protocol state. This is a + * public interface that may be used by protocols to create new sockets when + * a new connection is received and will be available for accept() on a + * listen socket. + * + * ofp_soclose() destroys a socket after possibly waiting for it to disconnect. + * This is a public interface that socket consumers should use to close and + * release a socket when done with it. + * + * ofp_soabort() destroys a socket without waiting for it to disconnect (used + * only for incoming connections that are already partially or fully + * connected). This is used internally by the socket layer when clearing + * listen socket queues (due to overflow or close on the listen socket), but + * is also a public interface protocols may use to abort connections in + * their incomplete listen queues should they no longer be required. Sockets + * placed in completed connection listen queues should not be aborted for + * reasons described in the comment above the ofp_soclose() implementation. This + * is not a general purpose close routine, and except in the specific + * circumstances described here, should not be used. + * + * ofp_sofree() will free a socket and its protocol state if all references on + * the socket have been released, and is the public interface to attempt to + * free a socket when a reference is removed. This is a socket layer private + * interface. + * + * NOTE: In addition to ofp_socreate() and ofp_soclose(), which provide a single + * socket reference to the consumer to be managed as required, there are two + * calls to explicitly manage socket references, soref(), and sorele(). + * Currently, these are generally required only when transitioning a socket + * from a listen queue to a file descriptor, in order to prevent garbage + * collection of the socket at an untimely moment. For a number of reasons, + * these interfaces are not preferred, and should be avoided. + * + * NOTE: With regard to VNETs the general rule is that callers do not set + * curvnet. Exceptions to this rule include ofp_soabort(), ofp_sodisconnect(), + * ofp_sofree() (and with that sorele(), sotryfree()), as well as ofp_sonewconn() + * and sorflush(), which are usually called from a pre-set VNET context. + * sopoll() currently does not need a VNET context to be set. + */ + +#include +#include + +#include "odp.h" + +#include "ofpi_errno.h" +#include "ofpi_timer.h" +#include "ofpi_in.h" +#include "ofpi_ip.h" +#include "ofpi_ip6.h" +#include "ofpi_udp.h" +#include "ofpi_icmp.h" + +#include "ofpi_util.h" + +#include "ofpi_socketvar.h" +#include "ofpi_socket.h" +#include "ofpi_in_pcb.h" +#include "ofpi_domain.h" +#include "ofpi_protosw.h" +#include "ofpi_ip6protosw.h" +#include "ofpi_sockstate.h" +#include "ofpi_log.h" + +#define OFP_SOCK_NUM_OFFSET 100 +#define OFP_NUM_SOCKET_POOLS 32 + +/* + * Shared data + */ +struct ofp_socket_mem { +#define NUM_SOCKETS maxsockets + struct socket socket_list[NUM_SOCKETS]; + struct socket *free_sockets; + int sockets_allocated, max_sockets_allocated; + int socket_zone; + + odp_rwlock_t so_global_mtx; + odp_rwlock_t ofp_accept_mtx; + int somaxconn; + odp_pool_t pool; + odp_pool_t pools[OFP_NUM_SOCKET_POOLS]; + int num_pools; + + struct sleeper { + struct sleeper *next; + void *channel; + const char *wmesg; + int go; + odp_timer_t tmo; + int woke_by_timer; + } *sleep_list; + odp_spinlock_t sleep_lock; +}; + +/* + * Data per core + */ +//static __thread struct ofp_socket_mem *shm; +static struct ofp_socket_mem *shm; + +#if 0 +/* For debugging */ +void print_open_conns(void); +void ofp_print_long_counters(void); +void ofp_print_sockets(void) +{ + int i; + for (i = 0; i < NUM_SOCKETS; i++) { + struct socket *so = &shm->socket_list[i]; + if (!so->so_proto) + continue; + OFP_LOG("Socket %d: rcv.put=%d rcv.get=%d snd.put=%d snd.get=%d\n", + so->so_number, so->so_rcv.sb_put, so->so_rcv.sb_get, + so->so_snd.sb_put, so->so_snd.sb_get); + } + + struct sleeper *s = shm->sleep_list; + while (s) { + OFP_LOG("Sleeper %s, tmo=%x go=%d timer=%d\n", + s->wmesg, s->tmo, s->go, s->woke_by_timer); + s = s->next; + } + print_open_conns(); +} + +struct cli_conn; +void f_sockets(struct cli_conn *conn, const char *s) +{ + ofp_print_sockets(); +} +#endif + +int ofp_socket_pool_create(const char *name, int size) +{ + odp_pool_param_t pool_params; + odp_pool_t pool; + uma_zone_t zone; + + OFP_LOG("POOL: Creating pool [%d] %s, size=%d\n", + shm->num_pools, name, size); + + pool_params.buf.size = size + 8; /* HJo: FIX */ + pool_params.buf.align = 0; + pool_params.buf.num = NUM_SOCKETS; + pool_params.type = ODP_POOL_BUFFER; + + if (shm->num_pools >= OFP_NUM_SOCKET_POOLS) { + OFP_ERR("POOL: Too many pools!\n"); + return -1; + } + + pool = odp_pool_create(name, ODP_SHM_NULL, &pool_params); + if (pool == ODP_POOL_INVALID) { + OFP_ERR("POOL: Cannot allocate pool!\n"); + return -1; + } + + zone = shm->num_pools++; + shm->pools[zone] = pool; + + OFP_LOG("POOL: Pools created = %d\n", shm->num_pools); + return zone; +} + +struct sock_pool_data { + union { + odp_buffer_t buffer; + uint8_t dummy[8]; + }; + uint8_t data[8]; +}; + +void *ofp_socket_pool_alloc(int zone) +{ + odp_buffer_t buffer; + struct sock_pool_data *addr; + static int gdb_visit_num = 0; + if (zone == 1) gdb_visit_num++; + + if (zone < 0 || zone >= shm->num_pools) { + OFP_ERR("POOL: Wrong zone %d!\n", zone); + return NULL; + } + + buffer = odp_buffer_alloc(shm->pools[zone]); + if (buffer == ODP_BUFFER_INVALID) { + OFP_ERR("POOL: Cannot allocate buffer!\n"); + return NULL; + } + + addr = odp_buffer_addr(buffer); + addr->buffer = buffer; + + return &addr->data; +} + +void ofp_socket_pool_free(void *item) +{ + struct sock_pool_data *addr = (struct sock_pool_data *) + ((uint8_t *)item - sizeof(addr->dummy)); + + odp_buffer_free(addr->buffer); +} + +odp_packet_t ofp_packet_alloc(uint32_t len) +{ + return odp_packet_alloc(shm->pool, len); +} + +odp_rwlock_t *ofp_accept_mtx(void) +{ + return &shm->ofp_accept_mtx; +} + +void ofp_accept_lock(void) +{ + odp_rwlock_write_lock(&shm->ofp_accept_mtx); +} + +void ofp_accept_unlock(void) +{ + odp_rwlock_write_unlock(&shm->ofp_accept_mtx); +} + +void ofp_socket_alloc_shared_memory(odp_pool_t pool) +{ + odp_shm_t shm_h; + uint32_t i; + + /* Reserve memory for args from shared mem */ + + shm_h = odp_shm_reserve("OfpSocketShMem", sizeof(*shm), + ODP_CACHE_LINE_SIZE, 0); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: Util shared mem alloc failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } + + memset(shm, 0, sizeof(*shm)); + + for (i = 0; i < NUM_SOCKETS; i++) { + shm->socket_list[i].next = (i == NUM_SOCKETS - 1) ? + NULL : &(shm->socket_list[i+1]); + shm->socket_list[i].so_number = i + OFP_SOCK_NUM_OFFSET; + } + shm->free_sockets = &(shm->socket_list[0]); + + //shm->socket_zone = ofp_socket_pool_create("socket", sizeof(struct socket)); + shm->somaxconn = SOMAXCONN; + shm->pool = pool; + odp_rwlock_init(&shm->so_global_mtx); + odp_rwlock_init(&shm->ofp_accept_mtx); + odp_spinlock_init(&shm->sleep_lock); +} + + +void ofp_socket_lookup_shared_memory(void) +{ + odp_shm_t shm_h; + + shm_h = odp_shm_lookup("OfpSocketShMem"); + shm = odp_shm_addr(shm_h); + + if (shm == NULL) { + OFP_ABORT("Error: Util shared mem lookup failed on core: %u.\n", + odp_cpu_id()); + exit(EXIT_FAILURE); + } +} + +struct socket *ofp_get_sock_by_fd(int fd) +{ + return &shm->socket_list[fd - OFP_SOCK_NUM_OFFSET]; +} + +/* + * Get a socket structure from our zone, and initialize it. + * Allocate socket and PCB at the same time. + * + * soalloc() returns a socket with a ref count of 0. + */ +static struct socket *soalloc(void) +{ +#if 1 + odp_rwlock_write_lock(&shm->so_global_mtx); + struct socket *so = shm->free_sockets; + if (shm->free_sockets) { + shm->free_sockets = shm->free_sockets->next; + shm->sockets_allocated++; + if (shm->sockets_allocated > shm->max_sockets_allocated) + shm->max_sockets_allocated = shm->sockets_allocated; + } + odp_rwlock_write_unlock(&shm->so_global_mtx); +#else + struct socket *so = ofp_socket_pool_alloc(shm->socket_zone); +#endif + + if (so == NULL) { + OFP_ERR("Cannot allocate socket!\n"); + return (NULL); + } + + /* clean socket memory */ + int number = so->so_number; + memset(so, 0, sizeof(*so)); + so->so_number = number; + + SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); + SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); + odp_spinlock_init(&so->so_snd.sb_sx); + odp_spinlock_init(&so->so_rcv.sb_sx); + + return (so); +} + + +/* + * Free the storage associated with a socket at the socket layer, tear down + * locks, labels, etc. All protocol state is assumed already to have been + * torn down (and possibly never set up) by the caller. + */ +static void +sodealloc(struct socket *so) +{ + KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); + KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); + + so->so_proto = 0; + odp_rwlock_write_lock(&shm->so_global_mtx); + so->next = shm->free_sockets; + shm->free_sockets = so; + shm->sockets_allocated--; + odp_rwlock_write_unlock(&shm->so_global_mtx); +} + +/* + * ofp_socreate returns a socket with a ref count of 1. The socket should be + * closed with ofp_soclose(). + */ +int +ofp_socreate(int dom, struct socket **aso, int type, int proto, struct thread *td) +{ + struct protosw *prp; + struct socket *so; + int error; + + prp = ofp_pffindproto(dom, proto, type); + + if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL || + prp->pr_usrreqs->pru_attach == ofp_pru_attach_notsupp) + return (OFP_EPROTONOSUPPORT); + + if (prp->pr_type == 0) + return (OFP_EPROTONOSUPPORT); + + if (prp->pr_type != type) + return (OFP_EPROTOTYPE); + + so = soalloc(); + + if (so == NULL) + return (OFP_ENOBUFS); + + OFP_TAILQ_INIT(&so->so_incomp); + OFP_TAILQ_INIT(&so->so_comp); + so->so_type = type; + // HJo: FIX: so->so_cred = crhold(cred); + so->so_cred = &so->so_cred_space; + + so->so_fibnum = td->td_proc.p_fibnum; + so->so_proto = prp; +#if 0 + knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); + knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); +#endif + so->so_count = 1; + + error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); + if (error) { + KASSERT(so->so_count == 1, ("ofp_socreate: so_count %d", + so->so_count)); + so->so_count = 0; + sodealloc(so); + return (error); + } + + *aso = so; + return (0); +} + +/* + * When an attempt at a new connection is noted on a socket which accepts + * connections, ofp_sonewconn is called. If the connection is possible (subject + * to space constraints, etc.) then we allocate a new structure, properly + * linked into the data structure of the original socket, and return this. + * Connstatus may be 0, or OFP_SO_ISCONFIRMING, or OFP_SO_ISCONNECTED. + * + * Note: the ref count on the socket is 0 on return. + */ +struct socket * +ofp_sonewconn(struct socket *head, int connstatus) +{ + struct socket *so; + int over; + + ACCEPT_LOCK(); + over = (head->so_qlen > 3 * head->so_qlimit / 2); + ACCEPT_UNLOCK(); + if (over) + return (NULL); + so = soalloc(); + if (so == NULL) + return (NULL); + if ((head->so_options & OFP_SO_ACCEPTFILTER) != 0) + connstatus = 0; + so->so_head = head; + so->so_type = head->so_type; + so->so_options = head->so_options &~ (OFP_SO_ACCEPTCONN|OFP_SO_PASSIVE); + so->so_linger = head->so_linger; + so->so_state = head->so_state | SS_NOFDREF; + so->so_fibnum = head->so_fibnum; + so->so_proto = head->so_proto; + //HJo so->so_cred = crhold(head->so_cred); + //knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); + //knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); + if (ofp_soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || + (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { + sodealloc(so); + return (NULL); + } + so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; + so->so_snd.sb_lowat = head->so_snd.sb_lowat; + so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; + so->so_snd.sb_timeo = head->so_snd.sb_timeo; + so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; + so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; + so->so_state |= connstatus; + + so->so_sigevent = head->so_sigevent; + so->so_rcv.sb_socket = so; + so->so_snd.sb_socket = NULL; + + ACCEPT_LOCK(); + if (connstatus) { + OFP_TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); + so->so_qstate |= SQ_COMP; + head->so_qlen++; + } else { + /* + * Keep removing sockets from the head until there's room for + * us to insert on the tail. In pre-locking revisions, this + * was a simple if(), but as we could be racing with other + * threads and ofp_soabort() requires dropping locks, we must + * loop waiting for the condition to be true. + */ + while (head->so_incqlen > head->so_qlimit) { + struct socket *sp; + sp = OFP_TAILQ_FIRST(&head->so_incomp); + OFP_TAILQ_REMOVE(&head->so_incomp, sp, so_list); + head->so_incqlen--; + sp->so_qstate &= ~SQ_INCOMP; + sp->so_head = NULL; + ACCEPT_UNLOCK(); + ofp_soabort(sp); + ACCEPT_LOCK(); + } + OFP_TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); + so->so_qstate |= SQ_INCOMP; + head->so_incqlen++; + } + ACCEPT_UNLOCK(); + if (connstatus) { + sorwakeup(head); + ofp_wakeup_one(&head->so_timeo); + } + return (so); +} + +int +ofp_sobind(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + int error; + + error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td); + return error; +} + +/* + * ofp_solisten() transitions a socket from a non-listening state to a listening + * state, but can also be used to update the listen queue depth on an + * existing listen socket. The protocol will call back into the sockets + * layer using ofp_solisten_proto_check() and ofp_solisten_proto() to check and set + * socket-layer listen state. Call backs are used so that the protocol can + * acquire both protocol and socket layer locks in whatever order is required + * by the protocol. + * + * Protocol implementors are advised to hold the socket lock across the + * socket-layer test and set to avoid races at the socket layer. + */ +int +ofp_solisten(struct socket *so, int backlog, struct thread *td) +{ + int error; + + error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td); + return error; +} + +int +ofp_solisten_proto_check(struct socket *so) +{ + OFP_SOCK_LOCK_ASSERT(so); + + if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | + SS_ISDISCONNECTING)) + return (OFP_EINVAL); + return (0); +} + +void +ofp_solisten_proto(struct socket *so, int backlog) +{ + OFP_SOCK_LOCK_ASSERT(so); + + if (backlog < 0 || backlog > shm->somaxconn) + backlog = shm->somaxconn; + so->so_qlimit = backlog; + so->so_options |= OFP_SO_ACCEPTCONN; +} + + +static void +sofree_dequeue(struct socket *so) +{ + struct socket *head; + + head = so->so_head; + if (head != NULL) { + KASSERT((so->so_qstate & SQ_COMP) != 0 || + (so->so_qstate & SQ_INCOMP) != 0, + ("ofp_sofree: so_head != NULL, but neither SQ_COMP nor " + "SQ_INCOMP")); + KASSERT((so->so_qstate & SQ_COMP) == 0 || + (so->so_qstate & SQ_INCOMP) == 0, + ("ofp_sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP")); + OFP_TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + so->so_qstate &= ~SQ_INCOMP; + so->so_head = NULL; + } + KASSERT((so->so_qstate & SQ_COMP) == 0 && + (so->so_qstate & SQ_INCOMP) == 0, + ("ofp_sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)", + so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); + if (so->so_options & OFP_SO_ACCEPTCONN) { + KASSERT((OFP_TAILQ_EMPTY(&so->so_comp)), ("ofp_sofree: so_comp populated")); + KASSERT((OFP_TAILQ_EMPTY(&so->so_incomp)), ("ofp_sofree: so_comp populated")); + } +} + +static void +sofree_dispose(struct socket *so) +{ + struct protosw *pr = so->so_proto; +#if 0 + if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) + (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); +#endif + if (pr->pr_usrreqs->pru_detach != NULL) + (*pr->pr_usrreqs->pru_detach)(so); + + /* + * From this point on, we assume that no other references to this + * socket exist anywhere else in the stack. Therefore, no locks need + * to be acquired or held. + * + * We used to do a lot of socket buffer and socket locking here, as + * well as invoke sorflush() and perform wakeups. The direct call to + * dom_dispose() and ofp_sbrelease_internal() are an inlining of what was + * necessary from sorflush(). + * + * Notice that the socket buffer and kqueue state are torn down + * before calling pru_detach. This means that protocols shold not + * assume they can perform socket wakeups, etc, in their detach code. + */ + + ofp_sbdestroy(&so->so_snd, so); + ofp_sbdestroy(&so->so_rcv, so); +#if 0 + seldrain(&so->so_snd.sb_sel); + seldrain(&so->so_rcv.sb_sel); + knlist_destroy(&so->so_rcv.sb_sel.si_note); + knlist_destroy(&so->so_snd.sb_sel.si_note); +#endif + sodealloc(so); +} + +static int +sohasrefs(const struct socket *so) +{ + return ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 || + (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)); +} + +/* + * Evaluate the reference count and named references on a socket; if no + * references remain, free it. This should be called whenever a reference is + * released, such as in sorele(), but also when named reference flags are + * cleared in socket or protocol code. + * + * ofp_sofree() will free the socket if: + * + * - There are no outstanding file descriptor references or related consumers + * (so_count == 0). + * + * - The socket has been closed by user space, if ever open (SS_NOFDREF). + * + * - The protocol does not have an outstanding strong reference on the socket + * (SS_PROTOREF). + * + * - The socket is not in a completed connection queue, so a process has been + * notified that it is present. If it is removed, the user process may + * block in accept() despite select() saying the socket was ready. + */ +void +ofp_sofree(struct socket *so) +{ + ACCEPT_LOCK_ASSERT(); + OFP_SOCK_LOCK_ASSERT(so); + + if (sohasrefs(so)) { + OFP_SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + return; + } + + sofree_dequeue(so); + + OFP_SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + + sofree_dispose(so); +} + +/* + * Close a socket on last file table reference removal. Initiate disconnect + * if connected. Free socket when disconnect complete. + * + * This function will sorele() the socket. Note that ofp_soclose() may be called + * prior to the ref count reaching zero. The actual socket structure will + * not be freed until the ref count reaches zero. + */ +int +ofp_soclose(struct socket *so) +{ + int error = 0; + + KASSERT(!(so->so_state & SS_NOFDREF), ("ofp_soclose: SS_NOFDREF on enter")); + + //funsetown(&so->so_sigio); + if (so->so_state & SS_ISCONNECTED) { + if ((so->so_state & SS_ISDISCONNECTING) == 0) { + error = ofp_sodisconnect(so); + if (error) { + if (error == OFP_ENOTCONN) + error = 0; + goto drop; + } + } + if (so->so_options & OFP_SO_LINGER) { + if ((so->so_state & SS_ISDISCONNECTING) && + (so->so_state & SS_NBIO)) + goto drop; + + while (so->so_state & SS_ISCONNECTED) { + /* HJo: was tsleep */ + error = ofp_msleep(&so->so_timeo, NULL, + 0, "soclos", so->so_linger*1000000); + if (error) + break; + } + } + } + +drop: + if (so->so_proto->pr_usrreqs->pru_close != NULL) + (*so->so_proto->pr_usrreqs->pru_close)(so); + if (so->so_options & OFP_SO_ACCEPTCONN) { + struct socket *sp; + ACCEPT_LOCK(); + while ((sp = OFP_TAILQ_FIRST(&so->so_incomp)) != NULL) { + OFP_TAILQ_REMOVE(&so->so_incomp, sp, so_list); + so->so_incqlen--; + sp->so_qstate &= ~SQ_INCOMP; + sp->so_head = NULL; + ACCEPT_UNLOCK(); + ofp_soabort(sp); + ACCEPT_LOCK(); + } + while ((sp = OFP_TAILQ_FIRST(&so->so_comp)) != NULL) { + OFP_TAILQ_REMOVE(&so->so_comp, sp, so_list); + so->so_qlen--; + sp->so_qstate &= ~SQ_COMP; + sp->so_head = NULL; + ACCEPT_UNLOCK(); + ofp_soabort(sp); + ACCEPT_LOCK(); + } + ACCEPT_UNLOCK(); + } + ACCEPT_LOCK(); + OFP_SOCK_LOCK(so); + KASSERT((so->so_state & SS_NOFDREF) == 0, ("ofp_soclose: NOFDREF")); + so->so_state |= SS_NOFDREF; + sorele(so); + return (error); +} + +void +sorflush(struct socket *so) +{ + struct sockbuf *sb = &so->so_rcv; + /*struct protosw *pr = so->so_proto;*/ + struct sockbuf asb; + + /* + * In order to avoid calling dom_dispose with the socket buffer mutex + * held, and in order to generally avoid holding the lock for a long + * time, we make a copy of the socket buffer and clear the original + * (except locks, state). The new socket buffer copy won't have + * initialized locks so we can only call routines that won't use or + * assert those locks. + * + * Dislodge threads currently blocked in receive and wait to acquire + * a lock against other simultaneous readers before clearing the + * socket buffer. Don't let our acquire be interrupted by a signal + * despite any existing socket disposition on interruptable waiting. + */ + ofp_socantrcvmore(so); + (void) ofp_sblock(sb, SBL_WAIT | SBL_NOINTR); + + /* + * Invalidate/clear most of the sockbuf structure, but leave selinfo + * and mutex data unchanged. + */ + SOCKBUF_LOCK(sb); + bzero(&asb, offsetof(struct sockbuf, sb_startzero)); + bcopy(&sb->sb_startzero, &asb.sb_startzero, + sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); + bzero(&sb->sb_startzero, + sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); + SOCKBUF_UNLOCK(sb); + ofp_sbunlock(sb); + + /* + * Dispose of special rights and flush the socket buffer. Don't call + * any unsafe routines (that rely on locks being initialized) on asb. + */ + /*if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) + (*pr->pr_domain->dom_dispose)(asb.sb_mb);*/ + ofp_sbrelease_internal(&asb, so); +} + +int +ofp_soshutdown(struct socket *so, int how) +{ + struct protosw *pr = so->so_proto; + int error; + + if (!(how == OFP_SHUT_RD || how == OFP_SHUT_WR || how == OFP_SHUT_RDWR)) + return (OFP_EINVAL); + + if (pr->pr_usrreqs->pru_flush != NULL) { + (*pr->pr_usrreqs->pru_flush)(so, how); + } + if (how != OFP_SHUT_WR) + sorflush(so); + if (how != OFP_SHUT_RD) { + error = (*pr->pr_usrreqs->pru_shutdown)(so); + return (error); + } + return (0); +} + +/* + * ofp_soabort() is used to abruptly tear down a connection, such as when a + * resource limit is reached (listen queue depth exceeded), or if a listen + * socket is closed while there are sockets waiting to be accepted. + * + * This interface is tricky, because it is called on an unreferenced socket, + * and must be called only by a thread that has actually removed the socket + * from the listen queue it was on, or races with other threads are risked. + * + * This interface will call into the protocol code, so must not be called + * with any socket locks held. Protocols do call it while holding their own + * recursible protocol mutexes, but this is something that should be subject + * to review in the future. + */ +void +ofp_soabort(struct socket *so) +{ + /* + * In as much as is possible, assert that no references to this + * socket are held. This is not quite the same as asserting that the + * current thread is responsible for arranging for no references, but + * is as close as we can get for now. + */ + KASSERT(so->so_count == 0, ("ofp_soabort: so_count")); + KASSERT((so->so_state & SS_PROTOREF) == 0, ("ofp_soabort: SS_PROTOREF")); + KASSERT(so->so_state & SS_NOFDREF, ("ofp_soabort: !SS_NOFDREF")); + KASSERT((so->so_state & SQ_COMP) == 0, ("ofp_soabort: SQ_COMP")); + KASSERT((so->so_state & SQ_INCOMP) == 0, ("ofp_soabort: SQ_INCOMP")); + + if (so->so_proto->pr_usrreqs->pru_abort != NULL) + (*so->so_proto->pr_usrreqs->pru_abort)(so); + + ACCEPT_LOCK(); + OFP_SOCK_LOCK(so); + ofp_sofree(so); +} + +int +ofp_soaccept(struct socket *so, struct ofp_sockaddr **nam) +{ + int error; + + OFP_SOCK_LOCK(so); + KASSERT((so->so_state & SS_NOFDREF) != 0, ("ofp_soaccept: !NOFDREF")); + so->so_state &= ~SS_NOFDREF; + OFP_SOCK_UNLOCK(so); + error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); + return (error); +} + +int +ofp_soconnect(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + int error; + + if (so->so_options & OFP_SO_ACCEPTCONN) + return (OFP_EOPNOTSUPP); + + /* + * If protocol is connection-based, can only connect once. + * Otherwise, if connected, try to disconnect first. This allows + * user to disconnect by connecting to, e.g., a null address. + */ + if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && + ((so->so_proto->pr_flags & PR_CONNREQUIRED) || + (error = ofp_sodisconnect(so)))) { + error = OFP_EISCONN; + } else { + /* + * Prevent accumulated error from previous connection from + * biting us. + */ + so->so_error = 0; + error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); + } + + return (error); +} + +int +ofp_sodisconnect(struct socket *so) +{ + int error; + + if ((so->so_state & SS_ISCONNECTED) == 0) + return (OFP_ENOTCONN); + if (so->so_state & SS_ISDISCONNECTING) + return (OFP_EALREADY); + error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); + return (error); +} + +#define SBLOCKWAIT(f) (((f) & OFP_MSG_DONTWAIT) ? 0 : SBL_WAIT) + +int +ofp_sosend_dgram(struct socket *so, struct ofp_sockaddr *addr, struct uio *uio, + odp_packet_t top, odp_packet_t control, int flags, struct thread *td) +{ + long space = 0; + ofp_ssize_t resid; + int clen = 0, error, dontroute; + const uint8_t *data; + //size_t len; + + KASSERT(so->so_type == OFP_SOCK_DGRAM, ("sodgram_send: !OFP_SOCK_DGRAM")); + KASSERT(so->so_proto->pr_flags & PR_ATOMIC, + ("sodgram_send: !PR_ATOMIC")); + + + if (uio != NULL) { + data = uio->uio_iov->iov_base; + resid = uio->uio_iov->iov_len; + } else { + data = odp_packet_data(top); + resid = odp_packet_len(top); + } + + dontroute = + (flags & OFP_MSG_DONTROUTE) && (so->so_options & OFP_SO_DONTROUTE) == 0; + /* HJo + if (td != NULL) + td->td_ru.ru_msgsnd++; + */ + if (control != ODP_PACKET_INVALID) + clen = odp_packet_len(control); + + SOCKBUF_LOCK(&so->so_snd); + if (so->so_snd.sb_state & SBS_CANTSENDMORE) { + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_EPIPE; + goto out; + } + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + SOCKBUF_UNLOCK(&so->so_snd); + goto out; + } + if ((so->so_state & SS_ISCONNECTED) == 0) { + /* + * `sendto' and `sendmsg' is allowed on a connection-based + * socket if it supports implied connect. Return OFP_ENOTCONN if + * not connected and no address is supplied. + */ + if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && + (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { + if ((so->so_state & SS_ISCONFIRMING) == 0 && + !(resid == 0 && clen != 0)) { + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_ENOTCONN; + goto out; + } + } else if (addr == NULL) { + if (so->so_proto->pr_flags & PR_CONNREQUIRED) + error = OFP_ENOTCONN; + else + error = OFP_EDESTADDRREQ; + SOCKBUF_UNLOCK(&so->so_snd); + goto out; + } + } + + SOCKBUF_UNLOCK(&so->so_snd); + + if (uio != NULL) { + uint8_t *p; + error = OFP_ENOBUFS; + + top = ofp_packet_alloc(resid); + + if (top == ODP_PACKET_INVALID) + goto out; + + odp_packet_user_ptr_set(top, NULL); + + error = 0; + + p = odp_packet_data(top); + + memcpy(p, data, resid); +/*Bogdan: ToDo chain of buffers for multiple uio_iov*/ + } + + resid = 0; + + KASSERT(resid == 0, ("ofp_sosend_dgram: resid != 0")); + /* + * XXXRW: Frobbing OFP_SO_DONTROUTE here is even worse without ofp_sblock + * than with. + */ + if (dontroute) { + OFP_SOCK_LOCK(so); + so->so_options |= OFP_SO_DONTROUTE; + OFP_SOCK_UNLOCK(so); + } + /* + * XXX all the SBS_CANTSENDMORE checks previously done could be out + * of date. We could have recieved a reset packet in an interrupt or + * maybe we slept while doing page faults in uiomove() etc. We could + * probably recheck again inside the locking protection here, but + * there are probably other places that this also happens. We must + * rethink this. + */ + error = (*so->so_proto->pr_usrreqs->pru_send)(so, + (flags & OFP_MSG_OOB) ? PRUS_OOB : + /* + * If the user set OFP_MSG_EOF, the protocol understands this flag and + * nothing left to send then use OFP_PRU_SEND_EOF instead of OFP_PRU_SEND. + */ + ((flags & OFP_MSG_EOF) && + (so->so_proto->pr_flags & PR_IMPLOPCL) && + (resid <= 0)) ? + PRUS_EOF : + /* If there is more to send set PRUS_MORETOCOME */ + (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, + top, addr, control, td); + if (dontroute) { + OFP_SOCK_LOCK(so); + so->so_options &= ~OFP_SO_DONTROUTE; + OFP_SOCK_UNLOCK(so); + } + clen = 0; + control = ODP_PACKET_INVALID; + top = ODP_PACKET_INVALID; +out: + if (top != ODP_PACKET_INVALID) + odp_packet_free(top); + if (control != ODP_PACKET_INVALID) + odp_packet_free(control); + return (error); +} + +/* + * Send on a socket. If send must go all at once and message is larger than + * send buffering, then hard error. Lock against other senders. If must go + * all at once and not enough room now, then inform user that this would + * block and do nothing. Otherwise, if nonblocking, send as much as + * possible. The data to be sent is described by "uio" if nonzero, otherwise + * by the mbuf chain "top" (which must be null if uio is not). Data provided + * in mbuf chain must be small enough to send all at once. + * + * Returns nonzero on error, timeout or signal; callers must check for short + * counts if OFP_EINTR/OFP_ERESTART are returned. Data and control buffers are freed + * on return. + */ +int +ofp_sosend_generic(struct socket *so, struct ofp_sockaddr *addr, struct uio *uio, + odp_packet_t top, odp_packet_t control, int flags, struct thread *td) +{ + long space; + ofp_ssize_t resid; + int clen = 0, error, dontroute; + int atomic = sosendallatonce(so) || top; + + if (uio != NULL) + resid = uio->uio_resid; + else + resid = odp_packet_len(top); + /* + * In theory resid should be unsigned. However, space must be + * signed, as it might be less than 0 if we over-committed, and we + * must use a signed comparison of space and resid. On the other + * hand, a negative resid causes us to loop sending 0-length + * segments to the protocol. + * + * Also check to make sure that OFP_MSG_EOR isn't used on OFP_SOCK_STREAM + * type sockets since that's an error. + */ + if (resid < 0 || (so->so_type == OFP_SOCK_STREAM && (flags & OFP_MSG_EOR))) { + error = OFP_EINVAL; + goto out; + } + + dontroute = + (flags & OFP_MSG_DONTROUTE) && (so->so_options & OFP_SO_DONTROUTE) == 0 && + (so->so_proto->pr_flags & PR_ATOMIC); + /* HJo + if (td != NULL) + td->td_ru.ru_msgsnd++; + */ + if (control != ODP_PACKET_INVALID) + clen = odp_packet_len(control); + + error = ofp_sblock(&so->so_snd, SBLOCKWAIT(flags)); + if (error) + goto out; +restart: + + do { + SOCKBUF_LOCK(&so->so_snd); + if (so->so_snd.sb_state & SBS_CANTSENDMORE) { + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_EPIPE; + + goto release; + } + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + SOCKBUF_UNLOCK(&so->so_snd); + + goto release; + } + if ((so->so_state & SS_ISCONNECTED) == 0) { + + /* + * `sendto' and `sendmsg' is allowed on a connection- + * based socket if it supports implied connect. + * Return OFP_ENOTCONN if not connected and no address is + * supplied. + */ + if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && + (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { + if ((so->so_state & SS_ISCONFIRMING) == 0 && + !(resid == 0 && clen != 0)) { + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_ENOTCONN; + goto release; + } + } else if (addr == NULL) { + SOCKBUF_UNLOCK(&so->so_snd); + if (so->so_proto->pr_flags & PR_CONNREQUIRED) + error = OFP_ENOTCONN; + else + error = OFP_EDESTADDRREQ; + goto release; + } + } + + space = sbspace(&so->so_snd); + if (flags & OFP_MSG_OOB) + space += 1024; + if ((atomic && resid > so->so_snd.sb_hiwat) || + clen > (int)so->so_snd.sb_hiwat) { + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_EMSGSIZE; + + goto release; + } + + if (space < resid + clen && + (atomic || space < so->so_snd.sb_lowat || space < clen)) { + + if ((so->so_state & SS_NBIO) || (flags & OFP_MSG_NBIO)) { + if (so->so_upcallprep.soup_send) { + so->so_upcallprep.soup_send(so, + so->so_upcallprep.soup_send_arg, + resid); + } + SOCKBUF_UNLOCK(&so->so_snd); + error = OFP_EWOULDBLOCK; + goto release; + } + + error = ofp_sbwait(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); + if (error) + goto release; + goto restart; + } + + SOCKBUF_UNLOCK(&so->so_snd); + space -= clen; + do { + + if (uio == NULL) { + + resid = 0; + /* HJo: FIX + if (flags & OFP_MSG_EOR) + odp_packet_flags(top) |= M_EOR; + */ + } else { + + top = odp_packet_alloc(shm->pool, 1); + error = OFP_ENOBUFS; + + if (top == ODP_PACKET_INVALID) + goto release; + + int cancopy = resid; + if (cancopy > SHM_PKT_POOL_BUF_SIZE) + cancopy = SHM_PKT_POOL_BUF_SIZE; + if (cancopy > space) + cancopy = space; + odp_packet_reset(top, cancopy); + odp_packet_user_ptr_set(top, NULL); + uint8_t *p = odp_packet_data(top); + memcpy(p, uio->uio_iov->iov_base, cancopy); + uio->uio_iov->iov_base = cancopy + + (uint8_t *)uio->uio_iov->iov_base; + uio->uio_resid -= cancopy; + space -= resid - uio->uio_resid; + resid = uio->uio_resid; + } + if (dontroute) { + OFP_SOCK_LOCK(so); + so->so_options |= OFP_SO_DONTROUTE; + OFP_SOCK_UNLOCK(so); + } + /* + * XXX all the SBS_CANTSENDMORE checks previously + * done could be out of date. We could have recieved + * a reset packet in an interrupt or maybe we slept + * while doing page faults in uiomove() etc. We + * could probably recheck again inside the locking + * protection here, but there are probably other + * places that this also happens. We must rethink + * this. + */ + error = (*so->so_proto->pr_usrreqs->pru_send)(so, + (flags & OFP_MSG_OOB) ? PRUS_OOB : + /* + * If the user set OFP_MSG_EOF, the protocol understands + * this flag and nothing left to send then use + * OFP_PRU_SEND_EOF instead of OFP_PRU_SEND. + */ + ((flags & OFP_MSG_EOF) && + (so->so_proto->pr_flags & PR_IMPLOPCL) && + (resid <= 0)) ? + PRUS_EOF : + /* If there is more to send set PRUS_MORETOCOME. */ + (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, + top, addr, control, td); + if (dontroute) { + OFP_SOCK_LOCK(so); + so->so_options &= ~OFP_SO_DONTROUTE; + OFP_SOCK_UNLOCK(so); + } + clen = 0; + control = ODP_PACKET_INVALID; + top = ODP_PACKET_INVALID; + + if (error) + goto release; + } while (resid && space > 0); + } while (resid); + +release: + ofp_sbunlock(&so->so_snd); +out: + + if (top != ODP_PACKET_INVALID) + odp_packet_free(top); + if (control != ODP_PACKET_INVALID) + odp_packet_free(control); + return (error); +} + +int +ofp_sosend(struct socket *so, struct ofp_sockaddr *addr, struct uio *uio, + odp_packet_t top, odp_packet_t control, int flags, struct thread *td) +{ + int error; + + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))) { + printf("state = %x\n", so->so_state); + return OFP_ENOTCONN; + } else if (addr) + return OFP_EISCONN; + } + + error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top, + control, flags, td); + return (error); +} + +/* + * Implement receive operations on a socket. We depend on the way that + * records are added to the sockbuf by sbappend. In particular, each record + * (mbufs linked through m_next) must begin with an address if the protocol + * so specifies, followed by an optional mbuf or mbufs containing ancillary + * data, and then zero or more mbufs of data. In order to allow parallelism + * between network receive and copying to user space, as well as avoid + * sleeping with a mutex held, we release the socket buffer mutex during the + * user space copy. Although the sockbuf is locked, new data may still be + * appended, and thus we must maintain consistency of the sockbuf during that + * time. + * + * The caller may receive the data as a single mbuf chain by supplying an + * mbuf **mp0 for use in returning the chain. The uio is then used only for + * the count in uio_resid. + */ +int +ofp_soreceive_generic(struct socket *so, struct ofp_sockaddr **psa, struct uio *uio, + odp_packet_t *mp0, odp_packet_t *controlp, int *flagsp) +{ + odp_packet_t m, *mp; + int flags, error, offset; + ofp_ssize_t len; + struct protosw *pr = so->so_proto; + int moff, /* type = 0, last_m_flags,*/ hole_break = 0; + ofp_ssize_t orig_resid = uio->uio_resid; + + mp = mp0; + if (psa != NULL) + *psa = NULL; + if (controlp != NULL) + *controlp = ODP_PACKET_INVALID; + if (flagsp != NULL) { + hole_break = *flagsp & OFP_MSG_HOLE_BREAK; + *flagsp &= ~OFP_MSG_HOLE_BREAK; + flags = *flagsp &~ OFP_MSG_EOR; + } else + flags = 0; + + hole_break = hole_break; + + /* HJo: FIX + if (flags & OFP_MSG_OOB) + return (soreceive_rcvoob(so, uio, flags)); + */ + if (mp != NULL) + *mp = ODP_PACKET_INVALID; + if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) + && uio->uio_resid) { + (*pr->pr_usrreqs->pru_rcvd)(so, 0); + } + + error = ofp_sblock(&so->so_rcv, SBLOCKWAIT(flags)); + if (error) + return (error); +restart: + SOCKBUF_LOCK(&so->so_rcv); + m = ofp_sockbuf_get_first(&so->so_rcv); + /* + * If we have less data than requested, block awaiting more (subject + * to any timeout) if: + * 1. the current count is less than the low water mark, or + * 2. OFP_MSG_WAITALL is set, and it is possible to do the entire + * receive operation at once if we block (resid <= hiwat). + * 3. OFP_MSG_DONTWAIT is not set + * If OFP_MSG_WAITALL is set but resid is larger than the receive buffer, + * we have to do the receive in sections, and thus risk returning a + * short count if a timeout or signal occurs after we start. + */ + if (m == ODP_PACKET_INVALID || + (((flags & OFP_MSG_DONTWAIT) == 0 && + so->so_rcv.sb_cc < uio->uio_resid) && + ((int)so->so_rcv.sb_cc < so->so_rcv.sb_lowat || + ((flags & OFP_MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + (pr->pr_flags & PR_ATOMIC) == 0)) { + KASSERT(m != ODP_PACKET_INVALID || !so->so_rcv.sb_cc, + ("receive: so->so_rcv.sb_cc == %u", + so->so_rcv.sb_cc)); + if (so->so_error) { + if (m != ODP_PACKET_INVALID) + goto dontblock; + error = so->so_error; + if ((flags & OFP_MSG_PEEK) == 0) + so->so_error = 0; + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { + if (m == ODP_PACKET_INVALID) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } else + goto dontblock; + } + /* HJo: FIX: + for (; m != NULL; m = m->m_next) + if (m->m_type == MT_OOBDATA || (odp_packet_flags(m) & M_EOR)) { + m = so->so_rcv.sb_mb; + goto dontblock; + } + */ + if (m != ODP_PACKET_INVALID) + goto dontblock; + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) { + SOCKBUF_UNLOCK(&so->so_rcv); + error = OFP_ENOTCONN; + goto release; + } + if (uio->uio_resid == 0) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } + if ((so->so_state & SS_NBIO) || + (flags & (OFP_MSG_DONTWAIT|OFP_MSG_NBIO))) { + if (so->so_upcallprep.soup_receive != NULL) { + so->so_upcallprep.soup_receive(so, + so->so_upcallprep.soup_receive_arg, + orig_resid - uio->uio_resid, uio->uio_resid); + } + SOCKBUF_UNLOCK(&so->so_rcv); + error = OFP_EWOULDBLOCK; + goto release; + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + error = ofp_sbwait(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_rcv); + if (error) + goto release; + goto restart; + } +dontblock: + /* + * From this point onward, we maintain 'nextrecord' as a cache of the + * pointer to the next record in the socket buffer. We must keep the + * various socket buffer pointers and local stack versions of the + * pointers in sync, pushing out modifications before dropping the + * socket buffer mutex, and re-reading them when picking it up. + * + * Otherwise, we will race with the network stack appending new data + * or records onto the socket buffer by using inconsistent/stale + * versions of the field, possibly resulting in socket buffer + * corruption. + * + * By holding the high-level ofp_sblock(), we prevent simultaneous + * readers from pulling off the front of the socket buffer. + */ + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + /* HJo + if (uio->uio_td) + uio->uio_td->td_ru.ru_msgrcv++; + KASSERT(m == so->so_rcv.sb_mb, ("ofp_soreceive: m != so->so_rcv.sb_mb")); + */ + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + if (pr->pr_flags & PR_ADDR) { + /* HJo + KASSERT(m->m_type == MT_SONAME, + ("m->m_type == %d", m->m_type)); + */ + orig_resid = 0; + /* HJo: FIX + if (psa != NULL) + *psa = sodupsockaddr((struct ofp_sockaddr *)odp_packet_data(m), + M_NOWAIT); + */ + if (flags & OFP_MSG_PEEK) { + /* HJo m = m->m_next; */ + } else { + /* HJo + sbfree(&so->so_rcv, m); + odp_packet_free(m); + m = ofp_sockbuf_remove_first(&so->so_rcv); + */ + /* sockbuf_pushsync(&so->so_rcv, nextrecord);*/ + } + } + +#if 0 /* HJo: FIX */ + /* + * Process one or more MT_CONTROL mbufs present before any data mbufs + * in the first mbuf chain on the socket buffer. If OFP_MSG_PEEK, we + * just copy the data; if !OFP_MSG_PEEK, we call into the protocol to + * perform externalization (or freeing if controlp == NULL). + */ + if (m != NULL && m->m_type == MT_CONTROL) { + odp_packet_t cm = NULL, *cmn; + odp_packet_t *cme = &cm; + + do { + if (flags & OFP_MSG_PEEK) { + if (controlp != NULL) { + *controlp = m_copy(m, 0, odp_packet_get_len(m)); + controlp = &(*controlp)->m_next; + } + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + so->so_rcv.sb_mb = m->m_next; + m->m_next = NULL; + *cme = m; + cme = &(*cme)->m_next; + m = so->so_rcv.sb_mb; + } + } while (m != NULL && m->m_type == MT_CONTROL); + /* + if ((flags & OFP_MSG_PEEK) == 0) + sockbuf_pushsync(&so->so_rcv, nextrecord); + */ + while (cm != NULL) { + cmn = cm->m_next; + cm->m_next = NULL; + if (pr->pr_domain->dom_externalize != NULL) { + SOCKBUF_UNLOCK(&so->so_rcv); + VNET_SO_ASSERT(so); + error = (*pr->pr_domain->dom_externalize) + (cm, controlp); + SOCKBUF_LOCK(&so->so_rcv); + } else if (controlp != NULL) + *controlp = cm; + else + odp_packet_free(cm)); + if (controlp != NULL) { + orig_resid = 0; + while (*controlp != NULL) + controlp = &(*controlp)->m_next; + } + cm = cmn; + } + /* + if (m != NULL) + nextrecord = so->so_rcv.sb_mb->m_nextpkt; + else + nextrecord = so->so_rcv.sb_mb; + */ + orig_resid = 0; + } + if (m != NULL) { + if ((flags & OFP_MSG_PEEK) == 0) { + /* + KASSERT(m->m_nextpkt == nextrecord, + ("ofp_soreceive: post-control, nextrecord !sync")); + if (nextrecord == NULL) { + KASSERT(so->so_rcv.sb_mb == m, + ("ofp_soreceive: post-control, sb_mb!=m")); + KASSERT(so->so_rcv.sb_lastrecord == m, + ("ofp_soreceive: post-control, lastrecord!=m")); + } + */ + } + type = m->m_type; + if (type == MT_OOBDATA) + flags |= OFP_MSG_OOB; + last_m_flags = odp_packet_flags(m); + if (hole_break && (odp_packet_flags(m) & M_HOLE)) + flags |= OFP_MSG_HOLE_BREAK; + } else { + if ((flags & OFP_MSG_PEEK) == 0) { + /* + KASSERT(so->so_rcv.sb_mb == nextrecord, + ("ofp_soreceive: sb_mb != nextrecord")); + if (so->so_rcv.sb_mb == NULL) { + KASSERT(so->so_rcv.sb_lastrecord == NULL, + ("ofp_soreceive: sb_lastercord != NULL")); + } + */ + } + } +#endif /* HJo */ + + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + + /* + * Now continue to read any data mbufs off of the head of the socket + * buffer until the read request is satisfied. Note that 'type' is + * used to store the type of any mbuf reads that have happened so far + * such that ofp_soreceive() can stop reading if the type changes, which + * causes ofp_soreceive() to return only one of regular data and inline + * out-of-band data in a single socket receive operation. + */ + moff = 0; + offset = 0; + while (m != ODP_PACKET_INVALID && uio->uio_resid > 0 && error == 0) { + /* + * If the type of mbuf has changed since the last mbuf + * examined ('type'), end the receive operation. + */ + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + /* HJo: FIX + if (hole_break && + ((odp_packet_flags(m) ^ last_m_flags) & M_HOLE)) + break; + last_m_flags = odp_packet_flags(m); + if (m->m_type == MT_OOBDATA) { + if (type != MT_OOBDATA) + break; + } else if (type == MT_OOBDATA) + break; + else + KASSERT(m->m_type == MT_DATA, + ("m->m_type == %d", m->m_type)); + */ + + so->so_rcv.sb_state &= ~SBS_RCVATMARK; + len = uio->uio_resid; + if (so->so_oobmark && len > (int)(so->so_oobmark - offset)) + len = so->so_oobmark - offset; + if (len > odp_packet_len(m) - moff) + len = odp_packet_len(m) - moff; + /* + * If mp is set, just pass back the mbufs. Otherwise copy + * them out via the uio, then free. Sockbuf must be + * consistent here (points to current mbuf, it points to next + * record) when we drop priority; we must note any additions + * to the sockbuf when we block interrupts again. + */ + if (mp == NULL) { + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_rcv); + + if (!odp_packet_copydata_out(m, moff, len, + uio->uio_iov->iov_base)) { + uio->uio_resid -= len; + } + + SOCKBUF_LOCK(&so->so_rcv); + if (error) { + /* + * The MT_SONAME mbuf has already been removed + * from the record, so it is necessary to + * remove the data mbufs, if any, to preserve + * the invariant in the case of PR_ADDR that + * requires MT_SONAME mbufs at the head of + * each record. + */ + if (m != ODP_PACKET_INVALID && + pr->pr_flags & PR_ATOMIC && + ((flags & OFP_MSG_PEEK) == 0)) + (void)ofp_sbdroprecord_locked(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } + } else + uio->uio_resid -= len; + + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + if (len == odp_packet_len(m) - moff) { + /* HJo + if (odp_packet_flags(m) & M_EOR) + flags |= OFP_MSG_EOR; + */ + if (flags & OFP_MSG_PEEK) { + /* HJo m = m->m_next; */ + moff = 0; + } else { + /* HJo nextrecord = m->m_nextpkt; */ + sbfree(&so->so_rcv, m); + if (mp != NULL) { + *mp = m; + /* HJo + mp = &m->m_next; + so->so_rcv.sb_mb = m = m->m_next; + *mp = NULL; + */ + } else { + ofp_sockbuf_remove_first(&so->so_rcv); + m = ofp_sockbuf_get_first(&so->so_rcv); + } + /* + sockbuf_pushsync(&so->so_rcv, nextrecord); + */ + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + } + } else { + if (flags & OFP_MSG_PEEK) + moff += len; + else { + if (mp != NULL) { + int copy_flag; + +#define M_WAIT 1 +#define M_DONTWAIT 2 + if (flags & OFP_MSG_DONTWAIT) + copy_flag = M_DONTWAIT; + else + copy_flag = M_WAIT; + if (copy_flag == M_WAIT) + SOCKBUF_UNLOCK(&so->so_rcv); + *mp = odp_packet_copy(m, shm->pool); + if (copy_flag == M_WAIT) + SOCKBUF_LOCK(&so->so_rcv); + if (*mp == ODP_PACKET_INVALID) { + /* + * m_copym() couldn't + * allocate an mbuf. Adjust + * uio_resid back (it was + * adjusted down by len + * bytes, which we didn't end + * up "copying" over). + */ + uio->uio_resid += len; + break; + } + } + /* HJo + if ((odp_packet_flags(m) & M_HOLE) == 0) + m->m_data += len; + odp_packet_get_len(m) -= len; + */ + odp_packet_pull_head(m, len); + so->so_rcv.sb_cc -= len; + } + } + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + if (so->so_oobmark) { + if ((flags & OFP_MSG_PEEK) == 0) { + so->so_oobmark -= len; + if (so->so_oobmark == 0) { + so->so_rcv.sb_state |= SBS_RCVATMARK; + break; + } + } else { + offset += len; + if (offset == (int)so->so_oobmark) + break; + } + } + if (flags & OFP_MSG_EOR) + break; + /* + * If the OFP_MSG_WAITALL flag is set (for non-atomic socket), we + * must not quit until "uio->uio_resid == 0" or an error + * termination. If a signal/timeout occurs, return with a + * short count but without error. Keep sockbuf locked + * against other readers. + */ + while (flags & OFP_MSG_WAITALL && m == ODP_PACKET_INVALID && + uio->uio_resid > 0 && + !sosendallatonce(so) /* && nextrecord == NULL*/) { + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE) + break; + /* + * Notify the protocol that some data has been + * drained before blocking. + */ + if (pr->pr_flags & PR_WANTRCVD) { + SOCKBUF_UNLOCK(&so->so_rcv); + (*pr->pr_usrreqs->pru_rcvd)(so, flags); + SOCKBUF_LOCK(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + /* + * We could receive some data while was notifying + * the protocol. Skip blocking in this case. + */ + if (so->so_rcv.sb_mb == NULL) { + error = ofp_sbwait(&so->so_rcv); + if (error) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } + } + m = ofp_sockbuf_remove_first(&so->so_rcv); + } + } + + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + if (m != ODP_PACKET_INVALID && pr->pr_flags & PR_ATOMIC) { + flags |= OFP_MSG_TRUNC; + if ((flags & OFP_MSG_PEEK) == 0) + (void) ofp_sbdroprecord_locked(&so->so_rcv); + } + if ((flags & OFP_MSG_PEEK) == 0) { + if (m == ODP_PACKET_INVALID) { + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ + if (uio->uio_resid > 0 && orig_resid != uio->uio_resid + && !sosendallatonce(so) /* && nextrecord == NULL */) { + if (so->so_upcallprep.soup_receive != NULL) { + so->so_upcallprep.soup_receive(so, + so->so_upcallprep.soup_receive_arg, + orig_resid - uio->uio_resid, uio->uio_resid); + } + } + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + /* + * If ofp_soreceive() is being done from the socket callback, + * then don't need to generate ACK to peer to update window, + * since ACK will be generated on return to TCP. + */ + if (!(flags & OFP_MSG_SOCALLBCK) && + (pr->pr_flags & PR_WANTRCVD)) { + SOCKBUF_UNLOCK(&so->so_rcv); + (*pr->pr_usrreqs->pru_rcvd)(so, flags); + SOCKBUF_LOCK(&so->so_rcv); + } + } + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + if (orig_resid == uio->uio_resid && orig_resid && + (flags & OFP_MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto restart; + } + SOCKBUF_UNLOCK(&so->so_rcv); + + if (flagsp != NULL) + *flagsp |= flags; +release: + ofp_sbunlock(&so->so_rcv); + return (error); +} + +/* + * Optimized version of ofp_soreceive() for simple datagram cases from userspace. + * Unlike in the stream case, we're able to drop a datagram if copyout() + * fails, and because we handle datagrams atomically, we don't need to use a + * sleep lock to prevent I/O interlacing. + */ +int +ofp_soreceive_dgram(struct socket *so, struct ofp_sockaddr **psa, struct uio *uio, + odp_packet_t *mp0, odp_packet_t *controlp, int *flagsp) +{ + int flags, error; + size_t len; + struct protosw *pr = so->so_proto; + + (void)mp0; + + /* HJo: Originally psa will be allocated. We want it set beforehand. + if (psa != NULL) + *psa = NULL; + */ + if (controlp != NULL) + *controlp = ODP_PACKET_INVALID; + if (flagsp != NULL) + flags = *flagsp &~ OFP_MSG_EOR; + else + flags = 0; + + /* + * For any complicated cases, fall back to the full + * ofp_soreceive_generic(). + */ +#if 0 + if (mp0 != NULL || (flags & OFP_MSG_PEEK) || (flags & OFP_MSG_OOB)) + return (ofp_soreceive_generic(so, psa, uio, mp0, controlp, + flagsp)); +#endif + /* + * Enforce restrictions on use. + */ + KASSERT((pr->pr_flags & PR_WANTRCVD) == 0, + ("ofp_soreceive_dgram: wantrcvd")); + KASSERT(pr->pr_flags & PR_ATOMIC, ("ofp_soreceive_dgram: !atomic")); + KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0, + ("ofp_soreceive_dgram: SBS_RCVATMARK")); + KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0, + ("ofp_soreceive_dgram: P_CONNREQUIRED")); + + /* + * Loop blocking while waiting for a datagram. + */ + SOCKBUF_LOCK(&so->so_rcv); + while (so->so_rcv.sb_put == so->so_rcv.sb_get) { + KASSERT(so->so_rcv.sb_cc == 0, + ("ofp_soreceive_dgram: sb_mb NULL but sb_cc %u", + so->so_rcv.sb_cc)); + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + SOCKBUF_UNLOCK(&so->so_rcv); + return (error); + } + if (so->so_rcv.sb_state & SBS_CANTRCVMORE || + uio->uio_resid == 0) { + SOCKBUF_UNLOCK(&so->so_rcv); + return (0); + } + if ((so->so_state & SS_NBIO) || + (flags & (OFP_MSG_DONTWAIT|OFP_MSG_NBIO))) { + SOCKBUF_UNLOCK(&so->so_rcv); + return (OFP_EWOULDBLOCK); + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + error = ofp_sbwait(&so->so_rcv); + if (error) { + SOCKBUF_UNLOCK(&so->so_rcv); + return (error); + } + } + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + + odp_packet_t pkt = so->so_rcv.sb_mb[so->so_rcv.sb_get]; + sbfree(&so->so_rcv, pkt); + if (++so->so_rcv.sb_get >= SOCKBUF_LEN) + so->so_rcv.sb_get = 0; + + SOCKBUF_UNLOCK(&so->so_rcv); + + struct ofp_udphdr *uh = (struct ofp_udphdr *)odp_packet_l4_ptr(pkt, NULL); + uint8_t *data = (uint8_t *)(uh + 1); + len = odp_be_to_cpu_16(uh->uh_ulen) - sizeof(*uh); + if (len > uio->uio_iov->iov_len) { + len = uio->uio_iov->iov_len; + flags |= OFP_MSG_TRUNC; + } + + memcpy(uio->uio_iov->iov_base, data, len); + + if (psa && *psa) { + if (pr->pr_flags & PR_ADDR) { + /* address is save on L2 & L3 */ + struct ofp_sockaddr *sa = + (struct ofp_sockaddr *)odp_packet_l2_ptr(pkt, NULL); + memcpy(*psa, sa, sa->sa_len); + } else + (*psa)->sa_len = 0; + } + + odp_packet_free(pkt); + uio->uio_resid -= len; + + if (flagsp != NULL) + *flagsp |= flags; + + return (0); +} + +int +ofp_soreceive(struct socket *so, struct ofp_sockaddr **psa, struct uio *uio, + odp_packet_t *mp0, odp_packet_t *controlp, int *flagsp) +{ + int error; + + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))) + return OFP_ENOTCONN; + } + + error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, + controlp, flagsp)); + + return (error); +} + +static int +sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) +{ + size_t valsize; + + /* + * If the user gives us more than we wanted, we ignore it, but if we + * don't get the minimum length the caller wants, we return OFP_EINVAL. + * On success, sopt->sopt_valsize is set to however much we actually + * retrieved. + */ + if ((valsize = sopt->sopt_valsize) < minlen) + return OFP_EINVAL; + if (valsize > len) + sopt->sopt_valsize = valsize = len; + + bcopy(sopt->sopt_val, buf, valsize); + return (0); +} + +int +ofp_sosetopt(struct socket *so, struct sockopt *sopt) +{ + int error, optval = 0; + struct ofp_linger l; + struct ofp_timeval tv; + uint64_t val; + uint32_t val32; + + error = 0; + if (sopt->sopt_level != OFP_SOL_SOCKET) { + if (so->so_proto->pr_ctloutput != NULL) { + error = (*so->so_proto->pr_ctloutput)(so, sopt); + return (error); + } + error = OFP_ENOPROTOOPT; + } else { + switch (sopt->sopt_name) { + case OFP_SO_ACCEPTFILTER: + error = OFP_EOPNOTSUPP; + break; + case OFP_SO_LINGER: + error = sooptcopyin(sopt, &l, sizeof l, sizeof l); + if (error) + goto bad; + + OFP_SOCK_LOCK(so); + so->so_linger = l.l_linger; + if (l.l_onoff) + so->so_options |= OFP_SO_LINGER; + else + so->so_options &= ~OFP_SO_LINGER; + OFP_SOCK_UNLOCK(so); + break; + + case OFP_SO_DEBUG: + case OFP_SO_KEEPALIVE: + case OFP_SO_DONTROUTE: + case OFP_SO_USELOOPBACK: + case OFP_SO_BROADCAST: + case OFP_SO_REUSEADDR: + case OFP_SO_REUSEPORT: + case OFP_SO_OOBINLINE: + case OFP_SO_TIMESTAMP: + case OFP_SO_BINTIME: + case OFP_SO_NOSIGPIPE: + case OFP_SO_NO_DDP: + case OFP_SO_NO_OFFLOAD: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + goto bad; + OFP_SOCK_LOCK(so); + if (optval) + so->so_options |= sopt->sopt_name; + else + so->so_options &= ~sopt->sopt_name; + OFP_SOCK_UNLOCK(so); + break; + + case OFP_SO_SETFIB: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (optval < 0 || optval >= 4096 /* HJo rt_numfibs*/) { + error = OFP_EINVAL; + goto bad; + } + if (((so->so_proto->pr_domain->dom_family == OFP_PF_INET) || + (so->so_proto->pr_domain->dom_family == OFP_PF_INET6))) { + so->so_fibnum = optval; + /* Note: ignore error */ + if (so->so_proto->pr_ctloutput) + (*so->so_proto->pr_ctloutput)(so, sopt); + } else { + so->so_fibnum = 0; + } + break; + + case OFP_SO_ALTFIB: + error = OFP_EOPNOTSUPP; + break; + + case OFP_SO_USER_COOKIE: + error = sooptcopyin(sopt, &val32, sizeof val32, + sizeof val32); + if (error) + goto bad; + so->so_user_cookie = val32; + break; + + case OFP_SO_L2INFO: + error = OFP_EOPNOTSUPP; + break; + + case OFP_SO_PASSIVE: + case OFP_SO_PROMISC: + error = OFP_EOPNOTSUPP; + break; + + case OFP_SO_SNDBUF: + case OFP_SO_RCVBUF: + case OFP_SO_SNDLOWAT: + case OFP_SO_RCVLOWAT: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + goto bad; + + /* + * Values < 1 make no sense for any of these options, + * so disallow them. + */ + if (optval < 1) { + error = OFP_EINVAL; + goto bad; + } + + switch (sopt->sopt_name) { + case OFP_SO_SNDBUF: + case OFP_SO_RCVBUF: + if (ofp_sbreserve(sopt->sopt_name == OFP_SO_SNDBUF ? + &so->so_snd : &so->so_rcv, (uint64_t)optval, + so, NULL) == 0) { + error = OFP_ENOBUFS; + goto bad; + } + (sopt->sopt_name == OFP_SO_SNDBUF ? &so->so_snd : + &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE; + break; + + /* + * Make sure the low-water is never greater than the + * high-water. + */ + case OFP_SO_SNDLOWAT: + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_lowat = + (optval > (int)so->so_snd.sb_hiwat) ? + (int)so->so_snd.sb_hiwat : optval; + SOCKBUF_UNLOCK(&so->so_snd); + break; + case OFP_SO_RCVLOWAT: + SOCKBUF_LOCK(&so->so_rcv); + so->so_rcv.sb_lowat = + (optval > (int)so->so_rcv.sb_hiwat) ? + (int)so->so_rcv.sb_hiwat : optval; + SOCKBUF_UNLOCK(&so->so_rcv); + break; + } + break; + + case OFP_SO_SNDTIMEO: + case OFP_SO_RCVTIMEO: + error = sooptcopyin(sopt, &tv, sizeof tv, + sizeof tv); + if (error) + goto bad; + + /* assert(hz > 0); */ + if (tv.tv_sec > (int32_t)(INT_MAX / hz) || + tv.tv_usec >= 1000000) { + error = OFP_EDOM; + goto bad; + } + /* assert(tick > 0); */ + /* assert(ULONG_MAX - INT_MAX >= 1000000); */ +#define tick (1000000/HZ) + val = (uint64_t)(tv.tv_sec * hz) + tv.tv_usec / tick; + if (val > INT_MAX) { + error = OFP_EDOM; + goto bad; + } + if (val == 0 && tv.tv_usec != 0) + val = 1; + + switch (sopt->sopt_name) { + case OFP_SO_SNDTIMEO: + so->so_snd.sb_timeo = val; + break; + case OFP_SO_RCVTIMEO: + so->so_rcv.sb_timeo = val; + break; + } + break; + + case OFP_SO_LABEL: + error = OFP_EOPNOTSUPP; + break; + + default: + error = OFP_ENOPROTOOPT; + break; + } + if (error == 0 && so->so_proto->pr_ctloutput != NULL) + (void)(*so->so_proto->pr_ctloutput)(so, sopt); + } +bad: + return (error); +} + +static int +sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) +{ + int error; + size_t valsize; + + error = 0; + + /* + * Documented get behavior is that we always return a value, possibly + * truncated to fit in the user's buffer. Traditional behavior is + * that we always tell the user precisely how much we copied, rather + * than something useful like the total amount we had available for + * her. Note that this interface is not idempotent; the entire + * answer must generated ahead of time. + */ + valsize = min(len, sopt->sopt_valsize); + sopt->sopt_valsize = valsize; + if (sopt->sopt_val != NULL) { + bcopy(buf, sopt->sopt_val, valsize); + } + return (error); +} + +int +ofp_sogetopt(struct socket *so, struct sockopt *sopt) +{ + int error, optval; + struct ofp_linger l; + struct timeval tv; + + error = 0; + if (sopt->sopt_level != OFP_SOL_SOCKET) { + if (so->so_proto->pr_ctloutput != NULL) + error = (*so->so_proto->pr_ctloutput)(so, sopt); + else + error = OFP_ENOPROTOOPT; + return (error); + } else { + switch (sopt->sopt_name) { + case OFP_SO_ACCEPTFILTER: + error = OFP_EOPNOTSUPP; + break; + case OFP_SO_LINGER: + OFP_SOCK_LOCK(so); + l.l_onoff = so->so_options & OFP_SO_LINGER; + l.l_linger = so->so_linger; + OFP_SOCK_UNLOCK(so); + error = sooptcopyout(sopt, &l, sizeof l); + break; + + case OFP_SO_USELOOPBACK: + case OFP_SO_DONTROUTE: + case OFP_SO_DEBUG: + case OFP_SO_KEEPALIVE: + case OFP_SO_REUSEADDR: + case OFP_SO_REUSEPORT: + case OFP_SO_BROADCAST: + case OFP_SO_OOBINLINE: + case OFP_SO_ACCEPTCONN: + case OFP_SO_TIMESTAMP: + case OFP_SO_BINTIME: + case OFP_SO_NOSIGPIPE: + optval = so->so_options & sopt->sopt_name; +integer: + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + + case OFP_SO_TYPE: + optval = so->so_type; + goto integer; + + case OFP_SO_PROTOCOL: + optval = so->so_proto->pr_protocol; + goto integer; + + case OFP_SO_ERROR: + OFP_SOCK_LOCK(so); + optval = so->so_error; + so->so_error = 0; + OFP_SOCK_UNLOCK(so); + goto integer; + + case OFP_SO_L2INFO: + error = OFP_EOPNOTSUPP; + break; + + case OFP_SO_SNDBUF: + optval = so->so_snd.sb_hiwat; + goto integer; + + case OFP_SO_RCVBUF: + optval = so->so_rcv.sb_hiwat; + goto integer; + + case OFP_SO_SNDLOWAT: + optval = so->so_snd.sb_lowat; + goto integer; + + case OFP_SO_RCVLOWAT: + optval = so->so_rcv.sb_lowat; + goto integer; + + case OFP_SO_SNDTIMEO: + case OFP_SO_RCVTIMEO: + optval = (sopt->sopt_name == OFP_SO_SNDTIMEO ? + so->so_snd.sb_timeo : so->so_rcv.sb_timeo); + + tv.tv_sec = optval / hz; + tv.tv_usec = (optval % hz) * tick; + error = sooptcopyout(sopt, &tv, sizeof tv); + break; + + case OFP_SO_LABEL: + error = OFP_EOPNOTSUPP; + break; + + case OFP_SO_PEERLABEL: + error = OFP_EOPNOTSUPP; + break; + + case OFP_SO_LISTENQLIMIT: + optval = so->so_qlimit; + goto integer; + + case OFP_SO_LISTENQLEN: + optval = so->so_qlen; + goto integer; + + case OFP_SO_LISTENINCQLEN: + optval = so->so_incqlen; + goto integer; + + default: + error = OFP_ENOPROTOOPT; + break; + } + } + + return (error); +} + +/* + * These functions are used by protocols to notify the socket layer (and its + * consumers) of state changes in the sockets driven by protocol-side events. + */ + +/* + * Procedures to manipulate state flags of socket and do appropriate wakeups. + * + * Normal sequence from the active (originating) side is that + * ofp_soisconnecting() is called during processing of connect() call, resulting + * in an eventual call to ofp_soisconnected() if/when the connection is + * established. When the connection is torn down ofp_soisdisconnecting() is + * called during processing of disconnect() call, and ofp_soisdisconnected() is + * called when the connection to the peer is totally severed. The semantics + * of these routines are such that connectionless protocols can call + * ofp_soisconnected() and ofp_soisdisconnected() only, bypassing the in-progress + * calls when setting up a ``connection'' takes no time. + * + * From the passive side, a socket is created with two queues of sockets: + * so_incomp for connections in progress and so_comp for connections already + * made and awaiting user acceptance. As a protocol is preparing incoming + * connections, it creates a socket structure queued on so_incomp by calling + * ofp_sonewconn(). When the connection is established, ofp_soisconnected() is + * called, and transfers the socket structure to so_comp, making it available + * to accept(). + * + * If a socket is closed with sockets on either so_incomp or so_comp, these + * sockets are dropped. + * + * If higher-level protocols are implemented in the kernel, the wakeups done + * here will sometimes cause software-interrupt process scheduling. + */ +void +ofp_soisconnecting(struct socket *so) +{ + + OFP_SOCK_LOCK(so); + so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= SS_ISCONNECTING; + OFP_SOCK_UNLOCK(so); +} + +void +ofp_soisconnected(struct socket *so) +{ + struct socket *head; + + ACCEPT_LOCK(); + OFP_SOCK_LOCK(so); + so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); + so->so_state |= SS_ISCONNECTED; + head = so->so_head; + if (head != NULL && (so->so_qstate & SQ_INCOMP)) { + if ((so->so_options & OFP_SO_ACCEPTFILTER) == 0) { + OFP_SOCK_UNLOCK(so); + OFP_TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + so->so_qstate &= ~SQ_INCOMP; + OFP_TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); + head->so_qlen++; + so->so_qstate |= SQ_COMP; + ACCEPT_UNLOCK(); + ofp_send_sock_event(head, so, OFP_EVENT_ACCEPT); + sorwakeup(head); + ofp_wakeup_one(&head->so_timeo); + } else { + ACCEPT_UNLOCK(); + ofp_soupcall_set(so, OFP_SO_RCV, + head->so_accf->so_accept_filter->accf_callback, + head->so_accf->so_accept_filter_arg); + so->so_options &= ~OFP_SO_ACCEPTFILTER; + /* HJo: FIX + ret = head->so_accf->so_accept_filter->accf_callback(so, + head->so_accf->so_accept_filter_arg, M_DONTWAIT); + if (ret == SU_ISCONNECTED) + ofp_soupcall_clear(so, OFP_SO_RCV); + */ + OFP_SOCK_UNLOCK(so); + /* HJo + if (ret == SU_ISCONNECTED) + goto restart; + */ + } + return; + } + OFP_SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + ofp_wakeup(&so->so_timeo); + sorwakeup(so); + sowwakeup(so); +} + +void +ofp_soisdisconnecting(struct socket *so) +{ + + /* + * Note: This code assumes that OFP_SOCK_LOCK(so) and + * SOCKBUF_LOCK(&so->so_rcv) are the same. + */ + /* Socket handled by event and already locked? */ + if (!(so->so_state & SS_EVENT)) + SOCKBUF_LOCK(&so->so_rcv); + so->so_state &= ~SS_ISCONNECTING; + so->so_state |= SS_ISDISCONNECTING; + so->so_rcv.sb_state |= SBS_CANTRCVMORE; + sorwakeup_locked(so); + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_state |= SBS_CANTSENDMORE; + sowwakeup_locked(so); + ofp_wakeup(&so->so_timeo); +} + +void +ofp_soisdisconnected(struct socket *so) +{ + + /* + * Note: This code assumes that OFP_SOCK_LOCK(so) and + * SOCKBUF_LOCK(&so->so_rcv) are the same. + */ + SOCKBUF_LOCK(&so->so_rcv); + so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= SS_ISDISCONNECTED; + so->so_rcv.sb_state |= SBS_CANTRCVMORE; + sorwakeup_locked(so); + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_state |= SBS_CANTSENDMORE; + ofp_sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); + sowwakeup_locked(so); + ofp_wakeup(&so->so_timeo); +} + +/* + * Register per-socket buffer upcalls. + */ +void +ofp_soupcall_set(struct socket *so, int which, + int (*func)(struct socket *, void *, int), void *arg) +{ + struct sockbuf *sb = NULL; + + switch (which) { + case OFP_SO_RCV: + sb = &so->so_rcv; + break; + case OFP_SO_SND: + sb = &so->so_snd; + break; + default: + panic("ofp_soupcall_set: bad which"); + } + SOCKBUF_LOCK_ASSERT(sb); +#if 0 + /* XXX: accf_http actually wants to do this on purpose. */ + KASSERT(sb->sb_upcall == NULL, ("ofp_soupcall_set: overwriting upcall")); +#endif + sb->sb_upcall = func; + sb->sb_upcallarg = arg; + sb->sb_flags |= SB_UPCALL; +} + +void +ofp_soupcall_clear(struct socket *so, int which) +{ + struct sockbuf *sb = NULL; + + switch (which) { + case OFP_SO_RCV: + sb = &so->so_rcv; + break; + case OFP_SO_SND: + sb = &so->so_snd; + break; + default: + panic("ofp_soupcall_clear: bad which"); + } + SOCKBUF_LOCK_ASSERT(sb); + KASSERT(sb->sb_upcall != NULL, ("ofp_soupcall_clear: no upcall to clear")); + sb->sb_upcall = NULL; + sb->sb_upcallarg = NULL; + sb->sb_flags &= ~SB_UPCALL; +} + +/* + * ofp_sohasoutofband(): protocol notifies socket layer of the arrival of new + * out-of-band data, which will then notify socket consumers. + */ +void +ofp_sohasoutofband(struct socket *so) +{ + (void)so; + /* HJo: No sig + if (so->so_sigio != NULL) + pgsigio(&so->so_sigio, SIGURG, 0); + */ + /* HJo: FIX + selwakeuppri(&so->so_rcv.sb_sel, PSOCK); + */ + ofp_wakeup(&so->so_rcv.sb_sel); +} + +/* Emulation for BSD ofp_wakeup */ + +static int _ofp_wakeup(void *channel, int one, int tmo); + +struct voidarg { + void *p; +}; + +static void +sleep_timeout(void *arg) +{ + struct voidarg *arg1 = arg; + _ofp_wakeup(arg1->p, 1, 1); +} + +int +ofp_msleep(void *channel, odp_rwlock_t *mtx, int priority, const char *wmesg, + uint32_t timeout) +{ + struct sleeper sleepy; + struct voidarg arg; + (void)mtx; + (void)priority; + + odp_spinlock_lock(&shm->sleep_lock); + sleepy.next = shm->sleep_list; + sleepy.channel = channel; + sleepy.wmesg = wmesg; + sleepy.go = 0; + sleepy.woke_by_timer = 0; + sleepy.tmo = ODP_TIMER_INVALID; + shm->sleep_list = &sleepy; + if (timeout) { + arg.p = channel; + sleepy.tmo = ofp_timer_start(timeout, sleep_timeout, &arg, sizeof(arg)); + } + odp_spinlock_unlock(&shm->sleep_lock); + + while (sleepy.go == 0) { + if (mtx) { + odp_rwlock_write_unlock(mtx); + } + usleep(1000); + if (mtx) { + odp_rwlock_write_lock(mtx); + } + } + + if (sleepy.tmo != ODP_TIMER_INVALID) + ofp_timer_cancel(sleepy.tmo); + + return (sleepy.woke_by_timer ? OFP_EWOULDBLOCK : 0); +} + +static int +_ofp_wakeup(void *channel, int one, int tmo) +{ + struct sleeper *p, *prev = NULL, *next; + + odp_spinlock_lock(&shm->sleep_lock); + + p = shm->sleep_list; + while (p) { + next = p->next; + if (channel == p->channel) { + if (prev) + prev->next = p->next; + else + shm->sleep_list = p->next; + if (tmo) { + p->tmo = ODP_TIMER_INVALID; + p->woke_by_timer = 1; + } + p->go = 1; + if (one) + break; + } else + prev = p; + p = next; + } + + odp_spinlock_unlock(&shm->sleep_lock); + return -1; +} + +int +ofp_wakeup_one(void *channel) +{ + /* wake up selects */ + if (channel) + _ofp_wakeup(NULL, 0, 0); + return _ofp_wakeup(channel, 1, 0); +} + +int +ofp_wakeup(void *channel) +{ + /* wake up selects */ + if (channel) + _ofp_wakeup(NULL, 0, 0); + return _ofp_wakeup(channel, 0, 0); +} + + +int +ofp_pru_accept_notsupp(struct socket *so, struct ofp_sockaddr **nam) +{ + (void)so; + (void)nam; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_attach_notsupp(struct socket *so, int proto, struct thread *td) +{ + (void)so; + (void)proto; + (void)td; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_bind_notsupp(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + (void)so; + (void)nam; + (void)td; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_connect_notsupp(struct socket *so, struct ofp_sockaddr *nam, struct thread *td) +{ + (void)so; + (void)nam; + (void)td; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_connect2_notsupp(struct socket *so1, struct socket *so2) +{ + (void)so1; + (void)so2; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_control_notsupp(struct socket *so, uint32_t cmd, char * data, + struct ofp_ifnet *ifp, struct thread *td) +{ + (void)so; + (void)cmd; + (void)data; + (void)ifp; + (void)td; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_disconnect_notsupp(struct socket *so) +{ + (void)so; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) +{ + (void)so; + (void)backlog; + (void)td; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_peeraddr_notsupp(struct socket *so, struct ofp_sockaddr **nam) +{ + (void)so; + (void)nam; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_rcvd_notsupp(struct socket *so, int flags) +{ + (void)so; + (void)flags; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_rcvoob_notsupp(struct socket *so, odp_packet_t m, int flags) +{ + (void)so; + (void)m; + (void)flags; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_send_notsupp(struct socket *so, int flags, odp_packet_t m, + struct ofp_sockaddr *addr, odp_packet_t control, struct thread *td) +{ + (void)so; + (void)m; + (void)flags; + (void)addr; + (void)control; + (void)td; + return OFP_EOPNOTSUPP; +} + +/* + * This isn't really a ``null'' operation, but it's the default one and + * doesn't do anything destructive. + */ +int +ofp_pru_sense_null(struct socket *so, struct stat *sb) +{ + + /*sb->st_blksize = so->so_snd.sb_hiwat;*/ + (void)so; + (void)sb; + return 0; +} + +int +ofp_pru_shutdown_notsupp(struct socket *so) +{ + (void)so; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_sockaddr_notsupp(struct socket *so, struct ofp_sockaddr **nam) +{ + (void)so; + (void)nam; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_sosend_notsupp(struct socket *so, struct ofp_sockaddr *addr, + struct uio *uio, odp_packet_t top, odp_packet_t control, int flags, + struct thread *td) +{ + (void)so; + (void)addr; + (void)uio; + (void)top; + (void)control; + (void)flags; + (void)td; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_soreceive_notsupp(struct socket *so, struct ofp_sockaddr **paddr, + struct uio *uio, odp_packet_t *mp0, odp_packet_t *controlp, int *flagsp) +{ + (void)so; + (void)paddr; + (void)uio; + (void)mp0; + (void)controlp; + (void)flagsp; + return OFP_EOPNOTSUPP; +} + +int +ofp_pru_sopoll_notsupp(struct socket *so, int events, struct ofp_ucred *cred, + struct thread *td) +{ + (void)so; + (void)events; + (void)cred; + (void)td; + return OFP_EOPNOTSUPP; +} + +int +ofp_send_sock_event(struct socket *head, struct socket *so, int event) +{ + struct ofp_sigevent *ev = &head->so_sigevent; + + if (ev->ofp_sigev_notify) { + struct ofp_sock_sigval *ss = ev->ofp_sigev_value.sival_ptr; + ss->event = event; + ss->sockfd = head->so_number; + ss->sockfd2 = so->so_number; + so->so_state |= SS_EVENT; + head->so_state |= SS_EVENT; + ev->ofp_sigev_notify_function(ev->ofp_sigev_value); + so->so_state &= ~SS_EVENT; + head->so_state &= ~SS_EVENT; + } + return 0; +} diff --git a/src/ofp_util.c b/src/ofp_util.c new file mode 100644 index 00000000..31f866e8 --- /dev/null +++ b/src/ofp_util.c @@ -0,0 +1,388 @@ +/* Copyright (c) 2014, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ofpi.h" +#include "ofpi_util.h" +#include "ofpi_log.h" + +int ofp_first_log_time = 0; + +uint16_t ofp_in_cksum(register uint16_t *addr, register int len) +{ + register int nleft = len; + register uint16_t *w = addr; + register uint16_t answer; + register int sum = 0; + + /* + * Our algorithm is simple, using a 32 bit accumulator (sum), + * we add sequential 16 bit words to it, and at the end, fold + * back all the carry bits from the top 16 bits into the lower + * 16 bits. + */ + while (nleft > 1) { + sum += *w++; + nleft -= 2; + } + + /* mop up an odd byte, if necessary */ + if (nleft == 1) + sum += odp_cpu_to_be_16(*(u_char *)w << 8); + + /* + * add back carry outs from top 16 bits to low 16 bits + */ + sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ + sum += (sum >> 16); /* add carry */ + answer = ~sum; /* truncate to 16 bits */ + return answer; +} + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE do { \ +l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum); \ +} while (0) + +static int __ofp_cksum(const odp_packet_t pkt, unsigned int off, + unsigned int len) +{ + int sum = 0; + uint16_t tmp = 0; + odp_packet_seg_t seg; + uint32_t seglen, cksum_len, done = 0; + uint8_t *cksum_data; + union { + uint16_t s[2]; + uint32_t l; + } l_util; + + seg = odp_packet_first_seg(pkt); + while (seg != ODP_PACKET_SEG_INVALID) { + seglen = odp_packet_seg_data_len(pkt, seg); + + if (off >= seglen) { + off -= seglen; + continue; + } + + cksum_len = seglen - off; + if (cksum_len > len) + cksum_len = len; + + cksum_data = (uint8_t *)odp_packet_seg_data(pkt, seg) + off; + tmp = ~ofp_in_cksum((uint16_t *)cksum_data, cksum_len); + + /* swap bytes on odd boundary */ + if (done % 2) + tmp = ((tmp&0x00ff) << 8) | ((tmp&0xff00) >> 8); + + sum += tmp; + off = 0; + done += cksum_len; + + if (done == len) + break; + + seg = odp_packet_next_seg(pkt, seg); + } + + REDUCE; + return sum; +} + +int ofp_cksum(const odp_packet_t pkt, unsigned int off, unsigned int len) +{ + return (~__ofp_cksum(pkt, off, len)) & 0xffff; +} + +int ofp_getsum(const odp_packet_t pkt, unsigned int off, unsigned int len) +{ + return __ofp_cksum(pkt, off, len); +} + +struct ofp_ipovly { + uint8_t ih_x1[9]; /* (unused) */ + uint8_t ih_pr; /* protocol */ + uint16_t ih_len; /* protocol length */ + struct ofp_in_addr ih_src; /* source internet address */ + struct ofp_in_addr ih_dst; /* destination internet address */ +} __attribute__((__packed__)); + +static inline int __ofp_in4_cksum(const odp_packet_t pkt) +{ + struct ofp_ip *ip; + int off, len, sum = 0; + uint16_t *w, tmp; + union { + uint16_t s[2]; + uint32_t l; + } l_util; + union { + struct ofp_ipovly ipov; + uint16_t w[10]; + } u; + + ip = (struct ofp_ip *)odp_packet_l3_ptr(pkt, NULL); + off = ip->ip_hl << 2; + /* pseudo header used to compute UDP checksum */ + memset(&u.ipov, 0, sizeof(u.ipov)); + u.ipov.ih_len = odp_cpu_to_be_16(odp_be_to_cpu_16(ip->ip_len) - off); + u.ipov.ih_pr = ip->ip_p; + u.ipov.ih_src = ip->ip_src; + u.ipov.ih_dst = ip->ip_dst; + w = u.w; + /* assumes sizeof(ipov) == 20 */ + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; + sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; + + len = odp_be_to_cpu_16(ip->ip_len) - off; + tmp = ~ofp_cksum(pkt, odp_packet_l3_offset(pkt) + off, len); + sum += tmp; + REDUCE; + return (~sum & 0xffff); +} + +int ofp_in4_cksum(const odp_packet_t pkt) +{ + return __ofp_in4_cksum(pkt); +} + +/** + * Helper function to print MAC address. + */ +char *ofp_print_mac(uint8_t *mac) +{ + static char buf[2][24]; + static int sel = 0; + int i, n = 0; + + sel = sel ^ 1; + for (i = 0; i < 6; i++) + n += sprintf(&buf[sel][n], + "%c%02x", i == 0 ? ' ' : ':', mac[i]); + return buf[sel]; +} + +/** + * Helper function to print IP address. + */ +char *ofp_print_ip_addr(uint32_t addr) +{ + static char buf[4][24]; + static int sel = 0; + uint32_t ip = odp_be_to_cpu_32(addr); + + sel++; + if (sel > 3) + sel = 0; + sprintf(buf[sel], "%d.%d.%d.%d", + ip>>24, (ip>>16)&0xff, (ip>>8)&0xff, ip&0xff); + + return buf[sel]; +} + +char *ofp_print_ip6_addr(uint8_t *addr) +{ + int i, n = 0; + static char buf[2][OFP_INET6_ADDRSTRLEN]; + static int sel = 0; + + sel = sel ^ 1; + for (i = 0; i < 16; i += 2) + n += sprintf(buf[sel] + n, "%s%02x%02x", + i == 0 ? "" : ":", addr[i], addr[i+1]); + + return buf[sel]; +} + +void ofp_print_hex(uint8_t log_level, + unsigned char *data, int len) +{ + int i; + + if (!data) { + OFP_LOG_NO_CTX(log_level, "* ofp_print_hex: no data!*"); + return; + } + + for (i = 0; i < len; i++) + OFP_LOG_NO_CTX(log_level, "%02x ", data[i]); +} + +/* + * In develepment environment this will generate a core dump. + * In production environment this should be re-defined to + * product specific function + */ +void ofp_generate_coredump(void) +{ + int a = 0; + int b = 7; + int c = b/a; + + a = c; +} + +int ofp_hex_to_num(char *s) +{ + int n = 0; + + while (s && *s) { + if (*s >= '0' && *s <= '9') + n = (n << 4) | (*s - '0'); + else if (*s >= 'a' && *s <= 'f') + n = (n << 4) | (*s - 'a' + 10); + else if (*s >= 'A' && *s <= 'F') + n = (n << 4) | (*s - 'A' + 10); + else + break; + s++; + } + + return n; +} + +void ofp_mac_to_link_local(uint8_t *mac, uint8_t *lladdr) +{ + memset(lladdr, 0, 16); + memcpy(lladdr + 8, mac, 3); + memcpy(lladdr + 13, mac + 3, 3); + lladdr[8] ^= 0x02; + lladdr[11] = 0xff; + lladdr[12] = 0xfe; + lladdr[0] = 0xfe; + lladdr[1] = 0x80; +} + +int ofp_has_mac(uint8_t *mac) +{ + int i; + + for (i = 0; i < OFP_ETHER_ADDR_LEN; ++i) + if (mac[i]) + return 1; + + return 0; +} + +void ofp_ip6_masklen_to_mask(int masklen, uint8_t *mask) +{ + int i; + int bytes = masklen/8; + int bits = 8 - (masklen%8); + + for (i = 0; i < 16; i++) + mask[i] = 0; + + for (i = 0; i < bytes; i++) + mask[i] = 0xff; + + if (i < 16 && bits < 8) + mask[i] = (~0) << bits; +} + +/* + * mask in little endian order + */ +int ofp_mask_length(int masklen, uint8_t *mask) +{ + int i, j, m, ml = masklen; + + for (i = 0; i < masklen/8; i++) { + for (j = 0; j < 8; j++) { + m = 1 << j; + if (mask[i] & m) + return ml; + ml--; + } + } + return 0; +} + +int ofp_name_to_port_vlan(const char *dev, int *vlan) +{ + int port = -1; + char *p; + + if (!dev) + return -1; + + /* gre */ + if (strncmp(dev, OFP_GRE_IFNAME_PREFIX, + strlen(OFP_GRE_IFNAME_PREFIX)) == 0) { + *vlan = atoi(dev + strlen(OFP_GRE_IFNAME_PREFIX)); + return GRE_PORTS; + } + + /* fp */ + if (strncmp(dev, OFP_IFNAME_PREFIX, strlen(OFP_IFNAME_PREFIX))) + return -1; + + port = atoi(dev + strlen(OFP_IFNAME_PREFIX)); + + p = strchr(dev, '.'); + + if (p) + *vlan = atoi(p+1); + else + *vlan = 0; + + return port; +} + +char *ofp_port_vlan_to_ifnet_name(int port, int vlan) +{ + static char buf[2][18]; + static int sel = 0; + + sel = sel ^ 1; + + if (vlan) + if (port == GRE_PORTS) + sprintf(buf[sel], "%s%d", + OFP_GRE_IFNAME_PREFIX, vlan); + else + sprintf(buf[sel], "%s%d.%d", + OFP_IFNAME_PREFIX, port, vlan); + else + sprintf(buf[sel], "%s%d", OFP_IFNAME_PREFIX, port); + + return buf[sel]; +} + +int ofp_sendf(int fd, const char *fmt, ...) +{ + char buf[1024]; + int ret, n; + va_list ap; + struct stat statbuf; + + va_start(ap, fmt); + n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + fstat(fd, &statbuf); + if (S_ISSOCK(fd)) + ret = send(fd, buf, n, 0); + else + ret = write(fd, buf, n); + + return ret; +} diff --git a/test/Makefile.am b/test/Makefile.am new file mode 100644 index 00000000..b2c4a95e --- /dev/null +++ b/test/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = cunit diff --git a/test/cunit/Makefile.am b/test/cunit/Makefile.am new file mode 100644 index 00000000..c43cb827 --- /dev/null +++ b/test/cunit/Makefile.am @@ -0,0 +1,45 @@ +LIB = $(top_builddir)/lib +LDADD = $(LIB)/libofp.la + +DEFAULT_INCLUDES=-I. + +AM_CFLAGS += -DINET + +if OFP_IPv6 +AM_CFLAGS += -DINET6 +endif + +if OFP_SP +AM_CFLAGS += -DSP +endif + +if OFP_MTRIE +AM_CFLAGS += -DMTRIE +endif + +AM_CFLAGS += \ + -I$(srcdir) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/include/api \ + -I$(CUNIT_PATH)/include + +AM_LDFLAGS += \ + -L$(LIB) \ + -L$(CUNIT_PATH)/lib + +if OFP_CUNIT_ENABLED +TESTS = ${check_PROGRAMS} +check_PROGRAMS = ofp_test_util ofp_test_stat ofp_test_packet_input \ + ofp_test_packet_output ofp_test_debug_pcap \ + ofp_test_debug_print ofp_test_fragmentation \ + ofp_test_port_conf +bin_PROGRAMS = ${check_PROGRAMS} +ofp_test_util_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +ofp_test_stat_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +ofp_test_packet_input_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +ofp_test_packet_output_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +ofp_test_debug_pcap_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +ofp_test_debug_print_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +ofp_test_fragmentation_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +ofp_test_port_conf_LDFLAGS = $(AM_LDFLAGS) -static -lcunit +endif diff --git a/test/cunit/cksum_packets.h b/test/cunit/cksum_packets.h new file mode 100644 index 00000000..845dd7bb --- /dev/null +++ b/test/cunit/cksum_packets.h @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef __CKSUM_PACKETS_H__ +#define __CKSUM_PACKETS_H__ + +/* Frame (91 bytes) */ +static uint8_t odd_len_icmp[91] = { +0x00, 0x00, 0x5e, 0x00, 0x01, 0x07, 0x84, 0x34, /* ..^....4 */ +0x97, 0x21, 0x3a, 0x7d, 0x08, 0x00, 0x45, 0x00, /* .!:}..E. */ +0x00, 0x4d, 0x89, 0xcd, 0x40, 0x00, 0x40, 0x01, /* .M..@.@. */ +0x52, 0xfc, 0x0a, 0x90, 0xa4, 0xc5, 0x0a, 0x90, /* R....... */ +0xa4, 0x01, 0x08, 0x00, 0xf7, 0x84, 0x4a, 0xf1, /* ......J. */ +0x00, 0x01, 0x69, 0xa5, 0xf5, 0x54, 0x00, 0x00, /* ..i..T.. */ +0x00, 0x00, 0x30, 0x8c, 0x04, 0x00, 0x00, 0x00, /* ..0..... */ +0x00, 0x00, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30 /* ./0 */ +}; + +/* Frame (667 bytes) */ +static uint8_t udp_packet[667] = { +0x84, 0x34, 0x97, 0x21, 0x3a, 0x7d, 0x6c, 0x3b, /* .4.!:}l; */ +0xe5, 0xf2, 0x0e, 0xf7, 0x08, 0x00, 0x45, 0x00, /* ......E. */ +0x02, 0x8d, 0x35, 0x48, 0x00, 0x00, 0x80, 0x11, /* ..5H.... */ +0xa4, 0x8a, 0x0a, 0x90, 0xa4, 0xa8, 0x0a, 0x90, /* ........ */ +0xa4, 0xc5, 0xfe, 0x8b, 0x08, 0x00, 0x02, 0x79, /* .......y */ +0x2d, 0x4d, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, /* -Mabcdef */ +0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, /* ghijklmn */ +0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, /* opqrstuv */ +0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, /* wxyzabcd */ +0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, /* efghijkl */ +0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, /* mnopqrst */ +0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, /* uvwxyzab */ +0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, /* cdefghij */ +0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, /* klmnopqr */ +0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, /* stuvwxyz */ +0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, /* abcdefgh */ +0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, /* ijklmnop */ +0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* qrstuvwx */ +0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, /* yzabcdef */ +0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, /* ghijklmn */ +0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, /* opqrstuv */ +0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, /* wxyzabcd */ +0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, /* efghijkl */ +0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, /* mnopqrst */ +0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, /* uvwxyzab */ +0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, /* cdefghij */ +0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, /* klmnopqr */ +0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, /* stuvwxyz */ +0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, /* abcdefgh */ +0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, /* ijklmnop */ +0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* qrstuvwx */ +0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, /* yzabcdef */ +0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, /* ghijklmn */ +0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, /* opqrstuv */ +0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, /* wxyzabcd */ +0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, /* efghijkl */ +0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, /* mnopqrst */ +0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, /* uvwxyzab */ +0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, /* cdefghij */ +0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, /* klmnopqr */ +0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, /* stuvwxyz */ +0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, /* abcdefgh */ +0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, /* ijklmnop */ +0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* qrstuvwx */ +0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, /* yzabcdef */ +0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, /* ghijklmn */ +0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, /* opqrstuv */ +0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, /* wxyzabcd */ +0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, /* efghijkl */ +0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, /* mnopqrst */ +0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, /* uvwxyzab */ +0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, /* cdefghij */ +0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, /* klmnopqr */ +0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, /* stuvwxyz */ +0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, /* abcdefgh */ +0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, /* ijklmnop */ +0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* qrstuvwx */ +0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, /* yzabcdef */ +0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, /* ghijklmn */ +0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, /* opqrstuv */ +0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, /* wxyzabcd */ +0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, /* efghijkl */ +0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, /* mnopqrst */ +0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, /* uvwxyzab */ +0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, /* cdefghij */ +0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, /* klmnopqr */ +0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, /* stuvwxyz */ +0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, /* abcdefgh */ +0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, /* ijklmnop */ +0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* qrstuvwx */ +0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, /* yzabcdef */ +0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, /* ghijklmn */ +0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, /* opqrstuv */ +0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, 0x63, 0x64, /* wxyzabcd */ +0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, /* efghijkl */ +0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, /* mnopqrst */ +0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x61, 0x62, /* uvwxyzab */ +0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, /* cdefghij */ +0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, /* klmnopqr */ +0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, /* stuvwxyz */ +0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, /* abcdefgh */ +0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, /* ijklmnop */ +0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, /* qrstuvwx */ +0x79, 0x7a, 0x0a /* yz. */ +}; + +#endif /* __CKSUM_PACKETS_H__ */ diff --git a/test/cunit/fragmented_packet.h b/test/cunit/fragmented_packet.h new file mode 100644 index 00000000..b5cc5ee2 --- /dev/null +++ b/test/cunit/fragmented_packet.h @@ -0,0 +1,664 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef __FRAGMENTED_PACKET_H__ +#define __FRAGMENTED_PACKET_H__ + +/* Frame (1514 bytes) */ +static uint8_t pkt1_frag1[1514] = { +0x00, 0x00, 0x5e, 0x00, 0x01, 0x07, 0x84, 0x34, /* ..^....4 */ +0x97, 0x21, 0x3a, 0x7d, 0x08, 0x00, 0x45, 0x00, /* .!:}..E. */ +0x05, 0xdc, 0xdb, 0x3b, 0x20, 0x00, 0x40, 0x01, /* ...; .@. */ +0x1c, 0x19, 0x0a, 0x90, 0xa4, 0xab, 0x0a, 0x90, /* ........ */ +0xa4, 0x01, 0x08, 0x00, 0x12, 0x57, 0x61, 0x19, /* .....Wa. */ +0x00, 0x01, 0x93, 0x5d, 0xe4, 0x54, 0x00, 0x00, /* ...].T.. */ +0x00, 0x00, 0x37, 0x35, 0x04, 0x00, 0x00, 0x00, /* ..75.... */ +0x00, 0x00, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf /* .. */ +}; + +/* Frame (1062 bytes) */ +static uint8_t pkt1_frag2[1062] = { +0x00, 0x00, 0x5e, 0x00, 0x01, 0x07, 0x84, 0x34, /* ..^....4 */ +0x97, 0x21, 0x3a, 0x7d, 0x08, 0x00, 0x45, 0x00, /* .!:}..E. */ +0x04, 0x18, 0xdb, 0x3b, 0x00, 0xb9, 0x40, 0x01, /* ...;..@. */ +0x3d, 0x24, 0x0a, 0x90, 0xa4, 0xab, 0x0a, 0x90, /* =$...... */ +0xa4, 0x01, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* ........ */ +0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, /* ........ */ +0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, /* ........ */ +0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, /* ........ */ +0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* ........ */ +0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, /* ........ */ +0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, /* ........ */ +0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, /* ........ */ +0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, /* ........ */ +0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, /* ........ */ +0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, /* ........ */ +0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, /* ........ */ +0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, /* .. !"#$% */ +0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* &'()*+,- */ +0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, /* ./012345 */ +0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* 6789:;<= */ +0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, /* >?@ABCDE */ +0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, /* FGHIJKLM */ +0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, /* NOPQRSTU */ +0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, /* VWXYZ[\] */ +0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, /* ^_`abcde */ +0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, /* fghijklm */ +0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, /* nopqrstu */ +0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, /* vwxyz{|} */ +0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, /* ~....... */ +0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, /* ........ */ +0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, /* ........ */ +0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, /* ........ */ +0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, /* ........ */ +0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, /* ........ */ +0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, /* ........ */ +0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, /* ........ */ +0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3 /* ...... */ +}; + +/* Reassembled IPv4 (2508 bytes) */ +static uint8_t pkt1_full[2542] = { +0x00, 0x00, 0x5e, 0x00, 0x01, 0x07, 0x84, 0x34, /* ..^....4 */ +0x97, 0x21, 0x3a, 0x7d, 0x08, 0x00, 0x45, 0x00, /* .!:}..E. */ +0x09, 0xe0, 0xdb, 0x3b, 0x00, 0x00, 0x40, 0x01, /* ...; .@. */ +0x1c, 0x19, 0x0a, 0x90, 0xa4, 0xab, 0x0a, 0x90, /* ........ */ +0xa4, 0x01, +0x08, 0x00, 0x12, 0x57, 0x61, 0x19, 0x00, 0x01, /* ...Wa... */ +0x93, 0x5d, 0xe4, 0x54, 0x00, 0x00, 0x00, 0x00, /* .].T.... */ +0x37, 0x35, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, /* 75...... */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* ........ */ +0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* ........ */ +0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* ........ */ +0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* ........ */ +0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* ........ */ +0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* ........ */ +0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* ........ */ +0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* ........ */ +0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ........ */ +0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* ........ */ +0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* ........ */ +0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* ........ */ +0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* !"#$%&' */ +0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ()*+,-./ */ +0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 01234567 */ +0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 89:;<=>? */ +0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* @ABCDEFG */ +0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* HIJKLMNO */ +0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* PQRSTUVW */ +0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* XYZ[\]^_ */ +0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* `abcdefg */ +0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* hijklmno */ +0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* pqrstuvw */ +0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* xyz{|}~. */ +0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* ........ */ +0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* ........ */ +0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* ........ */ +0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* ........ */ +0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* ........ */ +0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* ........ */ +0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* ........ */ +0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* ........ */ +0xc0, 0xc1, 0xc2, 0xc3 /* .... */ +}; + +#endif /* __FRAGMENTED_PACKET_H__ */ diff --git a/test/cunit/ofp_test_debug_pcap.c b/test/cunit/ofp_test_debug_pcap.c new file mode 100644 index 00000000..79a08143 --- /dev/null +++ b/test/cunit/ofp_test_debug_pcap.c @@ -0,0 +1,294 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include +#include +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include +#include "test_raw_frames.h" +#include "ofpi.h" +#include "ofpi_log.h" +#include "ofpi_debug.h" + +/* + * Test data + */ +char testFileName[] = "testbuf.txt"; +char pcap_file_name[] = "test.pcap"; +uint32_t ipaddr = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ +uint8_t ip6addr[16] = { +0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, +0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef +}; +uint8_t macaddr[6] = { 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA }; +uint8_t pcap_header[24] = { +0xd4, 0xc3, 0xb2, 0xa1, 0x02, 0x00, 0x04, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xff, 0xff, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00 +}; + +/* + * INIT + */ +#define SHM_PKT_POOL_SIZE (32*2048) +#define SHM_PKT_POOL_BUF_SIZE 1856 + +static int +init_suite(void) +{ + odp_pool_param_t pool_params; + odp_pool_t pool; + + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + ofp_pcap_alloc_shared_memory(); + + pool_params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.num = SHM_PKT_POOL_SIZE/SHM_PKT_POOL_BUF_SIZE; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, + &pool_params); + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + return -1; + } + + odp_shm_print_all(); + odp_pool_print(pool); + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +/* + * Helpers + */ +#define fail_with_odp(msg) do { OFP_ERR(msg); CU_FAIL(msg); } while (0) + +static int +create_odp_packet_ip4(odp_packet_t *opkt, uint8_t *pkt_data, int plen) +{ + odp_pool_t pool; + uint8_t *buf; + odp_packet_t pkt; + struct ofp_ip *iphdr; + + pool = odp_pool_lookup("packet_pool"); + if (pool == ODP_POOL_INVALID) { + fail_with_odp("ODP packet_pool not found\n"); + return -1; + } + + pkt = odp_packet_alloc(pool, plen); + if (pkt == ODP_PACKET_INVALID) { + fail_with_odp("ODP packet alloc failed"); + return -1; + } + + buf = odp_packet_data(pkt); + + memcpy(buf, pkt_data, plen); + + iphdr = (struct ofp_ip *)&buf[14]; + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_ipv4_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, OFP_ETHER_HDR_LEN); + odp_packet_l4_offset_set(pkt, OFP_ETHER_HDR_LEN + (iphdr->ip_hl<<2)); + + *opkt = pkt; + + return 0; +} + +#define PCAP_TIMESTAMP_LEN 8 +#define PCAP_PKT_SIZE_LEN 8 + +static int +assert_pcap_pkt(uint8_t *buf, unsigned buf_size, unsigned *offset, + const uint8_t *ref_buf, unsigned ref_len) +{ + union{ + uint32_t ul[2]; + uint8_t b[8]; + } pkt_size; + + if (*offset + PCAP_TIMESTAMP_LEN + PCAP_PKT_SIZE_LEN + ref_len > + buf_size) { + CU_FAIL("PCAP dump failed - buf_size to small"); + return -1; + } + + /* don't check timestamp */ + *offset += PCAP_TIMESTAMP_LEN; + + pkt_size.ul[0] = ref_len; + pkt_size.ul[1] = ref_len; + if (memcmp(&buf[*offset], pkt_size.b, PCAP_PKT_SIZE_LEN)) { + CU_FAIL("PCAP dump failed - pkt_size"); + return -1; + } + CU_PASS("PCAP dump"); + + *offset += PCAP_PKT_SIZE_LEN; + + if (memcmp(&buf[*offset], ref_buf, ref_len)) { + CU_FAIL("PCAP dump failed - ref_buf"); + return -1; + } + CU_PASS("PCAP dump"); + + *offset += ref_len; + + return 0; +} + +/* + * Testcases + */ + +static void +test_pcap(void) +{ + odp_packet_t pkt; + int port = 22; + unsigned fsize, l, offset = 0; + uint8_t *buf; + + /* INIT */ + ofp_debug_capture_ports = 1 << port; + ofp_debug_flags = OFP_DEBUG_PRINT_RECV_NIC | + OFP_DEBUG_PRINT_SEND_NIC | + OFP_DEBUG_CAPTURE; + + /* TEST */ + ofp_set_capture_file(pcap_file_name); + + if (create_odp_packet_ip4(&pkt, tcp_frame, sizeof(tcp_frame))) + goto err; + ofp_save_packet_to_pcap_file(OFP_DEBUG_PRINT_RECV_NIC, pkt, port); + + if (create_odp_packet_ip4(&pkt, arp_frame, sizeof(arp_frame))) + goto err; + ofp_save_packet_to_pcap_file(OFP_DEBUG_PRINT_RECV_NIC, pkt, port); + + if (create_odp_packet_ip4(&pkt, icmp_frame, sizeof(icmp_frame))) + goto err; + ofp_save_packet_to_pcap_file(OFP_DEBUG_PRINT_RECV_NIC, pkt, port); + + (void)ip6udp_frame; + (void)icmp6_frame; + + /* ASSERT */ + FILE *f = fopen(pcap_file_name, "rb"); + + fseek(f, 0, SEEK_END); + fsize = ftell(f); + fseek(f, 0, SEEK_SET); + + buf = (uint8_t *)malloc(fsize); + l = fread(buf, 1, fsize, f); + + fclose(f); + + if (l < sizeof(pcap_header) || + memcmp(&buf[offset], pcap_header, sizeof(pcap_header))) { + CU_FAIL("PCAP header failed") + goto err; + } else { + CU_PASS("PCAP header passed") + } + offset += sizeof(pcap_header); + + if (assert_pcap_pkt(buf, fsize, &offset, tcp_frame, sizeof(tcp_frame))) + goto err; + + if (assert_pcap_pkt(buf, fsize, &offset, arp_frame, sizeof(arp_frame))) + goto err; + + if (assert_pcap_pkt(buf, fsize, &offset, icmp_frame, + sizeof(icmp_frame))) + goto err; + +err: + ofp_debug_capture_ports = 0; + ofp_debug_flags = ofp_debug_flags ^ OFP_DEBUG_CAPTURE; +} + +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp capture", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_pcap)) { + CU_cleanup_registry(); + return CU_get_error(); + } + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-PCAP"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/ofp_test_debug_print.c b/test/cunit/ofp_test_debug_print.c new file mode 100644 index 00000000..d85c900d --- /dev/null +++ b/test/cunit/ofp_test_debug_print.c @@ -0,0 +1,390 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include +#include "test_raw_frames.h" +#include "ofpi.h" +#include "ofpi_debug.h" +#include "../../src/ofp_debug_print.c" + + +/* + * Test data + */ +char testFileName[] = "testbuf.txt"; +char pcap_file_name[] = "test.pcap"; +uint32_t ipaddr = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ +uint8_t ip6addr[16] = { +0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, +0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef +}; +uint8_t macaddr[6] = { 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA }; +uint8_t pcap_header[24] = { +0xd4, 0xc3, 0xb2, 0xa1, 0x02, 0x00, 0x04, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xff, 0xff, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00 +}; + +/* + * INIT + */ +#define SHM_PKT_POOL_SIZE (32*2048) +#define SHM_PKT_POOL_BUF_SIZE 1856 + +static int +init_suite(void) +{ + odp_pool_param_t pool_params; + odp_pool_t pool; + + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + + pool_params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.num = SHM_PKT_POOL_SIZE/SHM_PKT_POOL_BUF_SIZE; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, + &pool_params); + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + return -1; + } + + odp_pool_print(pool); + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +/* + * Helpers + */ +#define fail_with_odp(msg) do { OFP_ERR(msg); CU_FAIL(msg); } while (0) + +static int +create_odp_packet_ip4(odp_packet_t *opkt, uint8_t *pkt_data, int plen) +{ + odp_pool_t pool; + uint8_t *buf; + odp_packet_t pkt; + struct ofp_ip *iphdr; + + pool = odp_pool_lookup("packet_pool"); + if (pool == ODP_POOL_INVALID) { + fail_with_odp("ODP packet_pool not found\n"); + return -1; + } + + pkt = odp_packet_alloc(pool, plen); + if (pkt == ODP_PACKET_INVALID) { + fail_with_odp("ODP packet alloc failed"); + return -1; + } + + buf = odp_packet_data(pkt); + + memcpy(buf, pkt_data, plen); + + iphdr = (struct ofp_ip *)&buf[14]; + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_ipv4_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, OFP_ETHER_HDR_LEN); + odp_packet_l4_offset_set(pkt, OFP_ETHER_HDR_LEN + (iphdr->ip_hl<<2)); + + *opkt = pkt; + + return 0; +} + +static char *get_packet_start(char *buff_txt) +{ + char *pkt_txt; + + pkt_txt = strstr(buff_txt, "CUnit-Util"); + if (pkt_txt == NULL) + return NULL; + + while ((*pkt_txt != '\n') && + (*pkt_txt != 0)) + pkt_txt++; + + if (*pkt_txt == 0) + pkt_txt = NULL; + else + pkt_txt++; + + return pkt_txt; +} + +/* + * Testcases + */ +static void +test_print_arp(void) +{ +#define BUFLEN 46 + char res[BUFLEN]; + FILE *f; + + memset(res, 0x0, BUFLEN); + + f = fopen(testFileName, "w"); + print_arp(f, (char *)(&arp_frame[L2_HEADER_NO_VLAN_SIZE])); + fclose(f); + + f = fopen(testFileName, "r"); + if (fgets(res, BUFLEN, f) != NULL) { + CU_ASSERT_STRING_EQUAL( + res, + "ARP 1 192.168.56.101 -> 192.168.56.102 "); + } else { + CU_FAIL("Cannot read output file."); + } + fclose(f); +#undef BUFLEN +} + +static void +test_print_ipv6__ip6udp_frame(void) +{ +#define BUFLEN 120 + char res[BUFLEN]; + FILE *f; + + memset(res, 0x0, BUFLEN); + + f = fopen(testFileName, "w"); + print_ipv6(f, (char *)(&ip6udp_frame[L2_HEADER_NO_VLAN_SIZE])); + fclose(f); + + f = fopen(testFileName, "r"); + if (fgets(res, BUFLEN, f) != NULL) { + CU_ASSERT_STRING_EQUAL( + res, + "IPv6 UDP: len=44 fe80:0000:0000:0000:0222:68ff:fe0f:" + "ba87 port 5353 -> ff02:0000:0000:0000:0000:0000:0000:" + "00fb port 535"); + } else { + CU_FAIL("Cannot read output file."); + } + fclose(f); +#undef BUFLEN +} + +static void +test_print_ipv6__icmp6_frame(void) +{ +#define BUFLEN 190 + char res[BUFLEN]; + FILE *f; + + memset(res, 0x0, BUFLEN); + + f = fopen(testFileName, "w"); + print_ipv6(f, (char *)(&icmp6_frame[L2_HEADER_NO_VLAN_SIZE])); + fclose(f); + + f = fopen(testFileName, "r"); + if (fread(res, 1, BUFLEN, f)) { + CU_ASSERT_STRING_EQUAL( + res, + "IPv6 ICMP: len=24 type=Neighbor-Solicitation target=fe80:0000:0000:0000:c51b:dd4f:db50:54d7 code=0\n" + " 0000:0000:0000:0000:0000:0000:0000:0000 -> ff02:0000:0000:0000:0000:0001:ff50:54d7 "); + } else { + CU_FAIL("Cannot read output file."); + } + fclose(f); +#undef BUFLEN +} + +static void +test_print_ipv4__tcpframe(void) +{ +#define BUFLEN 200 + char res[BUFLEN]; + FILE *f; + + memset(res, 0x0, BUFLEN); + + f = fopen(testFileName, "w"); + print_ipv4(f, (char *)(&tcp_frame[L2_HEADER_NO_VLAN_SIZE])); + fclose(f); + + f = fopen(testFileName, "r"); + if (fgets(res, BUFLEN, f) != NULL) { + CU_ASSERT_STRING_EQUAL( + res, + "IP len=92 TCP 192.168.56.101:53662 -> 192.168.56.102:22\n"); + } else { + CU_FAIL("Cannot read output file."); + } + fclose(f); +#undef BUFLEN +} + +static void +test_print_ipv4__icmpframe(void) +{ +#define BUFLEN 70 + char res[BUFLEN]; + FILE *f; + + memset(res, 0x0, BUFLEN); + + f = fopen(testFileName, "w"); + print_ipv4(f, (char *)(&icmp_frame[L2_HEADER_NO_VLAN_SIZE])); + fclose(f); + + f = fopen(testFileName, "r"); + if (fgets(res, BUFLEN, f) != NULL) { + CU_ASSERT_STRING_EQUAL( + res, + "IP ICMP: echo 192.168.56.101 -> 192.168.56.102 id=256 seq=15616"); + } else { + CU_FAIL("Cannot read output file."); + } + fclose(f); +#undef BUFLEN +} + +static void +test_ofp_print_packet(void) +{ +#define BUFLEN 250 + char res[BUFLEN + 1]; + char *pkt_txt; + FILE *f; + odp_packet_t pkt; + + memset(res, 0x0, BUFLEN); + + if (create_odp_packet_ip4(&pkt, tcp_frame, sizeof(tcp_frame))) { + CU_FAIL("Cannot create packet."); + return; + } + + /* outputs to packets.txt */ + ofp_print_packet("CUnit-Util", pkt); + + f = fopen(DEFAULT_DEBUG_TXT_FILE_NAME, "r"); + if (fread(res, 1, BUFLEN, f)) { + pkt_txt = get_packet_start(res); + if (pkt_txt == NULL) { + CU_FAIL("Packet not found."); + fclose(f); + return; + } + CU_ASSERT_STRING_EQUAL( + pkt_txt, + " 08:00:27:00:a8:1e -> 08:00:27:ae:3e:d3\n" + " IP len=92 TCP 192.168.56.101:53662 " + "-> 192.168.56.102:22\n" + " seq=0x3fc97a8a ack=0xee1651e9 off=5\n" + " flags=PA win=16383 sum=0xb5f0 urp=0\n"); + } else { + CU_FAIL("Cannot read output file."); + } + fclose(f); +#undef BUFLEN +} + +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp debug print", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_print_arp)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_print_ipv6__ip6udp_frame)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_print_ipv6__icmp6_frame)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_print_ipv4__tcpframe)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_print_ipv4__icmpframe)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_print_packet)) { + CU_cleanup_registry(); + return CU_get_error(); + } + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-Debug-print"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/ofp_test_fragmentation.c b/test/cunit/ofp_test_fragmentation.c new file mode 100644 index 00000000..8f0ecda3 --- /dev/null +++ b/test/cunit/ofp_test_fragmentation.c @@ -0,0 +1,681 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fragmented_packet.h" + +#define fail_with_odp(msg) do { OFP_ERR(msg); CU_FAIL(msg); } while (0) + +/* + * Test data + */ + +#define SHM_PKT_POOL_SIZE (32*2048) +#define SHM_PKT_POOL_BUF_SIZE 3000 + +static uint32_t port = 0, vlan = 0, vrf = 0, def_mtu = 1500; +static uint32_t dev_ip = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ +static uint8_t dev_mac[6] = {0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; +static uint32_t dst_ipaddr = 0x660AA8C0; /* C0.A8.0A.66 = 192.168.10.102 */ +static uint8_t dst_mac[6] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; +static uint8_t orig_pkt_data[SHM_PKT_POOL_BUF_SIZE]; +static struct ofp_nh_entry nexthop; +static struct ofp_ifnet *dev; + +/* + * Helpers + */ + +static void init_ifnet(void) +{ + char str[256]; + + ofp_config_interface_up_v4(port, vlan, vrf, dev_ip, 24); + + dev = ofp_get_ifnet(port, vlan); + memcpy(dev->mac, dev_mac, OFP_ETHER_ADDR_LEN); + dev->if_mtu = def_mtu; +#ifdef SP + dev->linux_index = port + 3; /* an if index of Linux != port val */ + ofp_update_ifindex_lookup_tab(dev); +#endif /* SP */ + + dev->pkt_pool = odp_pool_lookup("packet_pool"); + + sprintf(str, "out default queue:%d", port); + dev->outq_def = odp_queue_create(str, + ODP_QUEUE_TYPE_POLL, + NULL); + if (dev->outq_def == ODP_QUEUE_INVALID) { + fail_with_odp("Out default queue create failed.\n"); + return; + } +} + +static int +init_suite(void) +{ + odp_pool_t pool; + odp_pool_param_t pool_params; + ofp_pkt_hook pkt_hook[OFP_HOOK_MAX]; + + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + ofp_portconf_alloc_shared_memory(); + ofp_route_alloc_shared_memory(); + ofp_rt_lookup_alloc_shared_memory(); + ofp_avl_alloc_shared_memory(); + ofp_arp_alloc_shared_memory(); + ofp_timer_init(OFP_TIMER_RESOLUTION_US, + OFP_TIMER_MIN_US, + OFP_TIMER_MAX_US, + OFP_TIMER_TMO_COUNT); + + memset(pkt_hook, 0, sizeof(pkt_hook)); + ofp_hook_alloc_shared_memory(&pkt_hook[0]); + + ofp_init_ifnet_data(); + ofp_route_init(); + ofp_arp_global_init(); + ofp_arp_local_init(); + + pool_params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.num = SHM_PKT_POOL_SIZE/SHM_PKT_POOL_BUF_SIZE; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, + &pool_params); + + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + return -1; + } + + odp_shm_print_all(); + odp_pool_print(pool); + + init_ifnet(); + + ofp_arp_ipv4_insert(dst_ipaddr, dst_mac, dev); + + nexthop.gw = dst_ipaddr; + nexthop.vlan = vlan; + nexthop.port = port; + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +static int +create_odp_packet_ip4(odp_packet_t *opkt, uint8_t *pkt_data, int plen, + uint32_t dst_addr) +{ + odp_pool_t pool; + uint8_t *buf; + odp_packet_t pkt = ODP_PACKET_INVALID; + struct ofp_ip *iphdr; + + memset(orig_pkt_data, 0x0, sizeof(orig_pkt_data)); + + pool = odp_pool_lookup("packet_pool"); + if (pool == ODP_POOL_INVALID) { + fail_with_odp("ODP packet_pool not found\n"); + return -1; + } + + pkt = odp_packet_alloc(pool, plen); + if (pkt == ODP_PACKET_INVALID) { + fail_with_odp("ODP packet alloc failed"); + return -1; + } + + buf = odp_packet_data(pkt); + + if (odp_packet_copydata_in(pkt, 0, plen, pkt_data) < 0) { + fail_with_odp("Packet data copy failed\n"); + return -1; + }; + + iphdr = (struct ofp_ip *)&buf[OFP_ETHER_HDR_LEN]; + + /* changes to the default packet. Recalculate ip checksum */ + if (dst_addr) { + iphdr->ip_dst.s_addr = dst_addr; + iphdr->ip_sum = 0; + iphdr->ip_sum = + ofp_in_cksum((uint16_t *)iphdr, iphdr->ip_hl<<2); + } + /* END OF changes to the default packet */ + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_ipv4_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, OFP_ETHER_HDR_LEN); + odp_packet_l4_offset_set(pkt, OFP_ETHER_HDR_LEN + (iphdr->ip_hl<<2)); + + *opkt = pkt; + + memcpy(orig_pkt_data, pkt_data, plen); + + return 0; +} + +static void assert_ip_header(struct ofp_ip *ip, struct ofp_ip *ip_orig, + uint16_t len, uint16_t mf, uint16_t fr_off) +{ + CU_ASSERT_EQUAL(ip->ip_hl, ip_orig->ip_hl); + CU_ASSERT_EQUAL(ip->ip_v, ip_orig->ip_v); + CU_ASSERT_EQUAL(ip->ip_tos, ip_orig->ip_tos); + CU_ASSERT_EQUAL(ip->ip_len, odp_cpu_to_be_16(len)); + CU_ASSERT_EQUAL(ip->ip_id, ip_orig->ip_id); + if (mf) + CU_ASSERT((odp_be_to_cpu_16(ip->ip_off) & OFP_IP_MF) > 0) + else + CU_ASSERT((odp_be_to_cpu_16(ip->ip_off) & OFP_IP_MF) == 0) + CU_ASSERT_EQUAL(odp_be_to_cpu_16(ip->ip_off) & OFP_IP_OFFMASK, + fr_off); + CU_ASSERT_EQUAL(ip->ip_ttl, ip_orig->ip_ttl); + CU_ASSERT_EQUAL(ip->ip_p, ip_orig->ip_p); + CU_ASSERT_EQUAL(ip->ip_src.s_addr, ip_orig->ip_src.s_addr); + CU_ASSERT_EQUAL(ip->ip_dst.s_addr, ip_orig->ip_dst.s_addr); +} + +/* + * Tests + */ + +static void test_packet_size_is_less_then_mtu(void) +{ + odp_packet_t pkt_orig, pkt_sent; + odp_event_t ev; + int res; + struct ofp_ether_header *eth; + + if (create_odp_packet_ip4(&pkt_orig, pkt1_frag1, + sizeof(pkt1_frag1), 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_ip_output(pkt_orig, &nexthop); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + CU_ASSERT_EQUAL_FATAL(odp_queue_deq(dev->outq_def), ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), sizeof(pkt1_frag1)); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + if (memcmp(odp_packet_l3_ptr(pkt_sent, NULL), + &orig_pkt_data[OFP_ETHER_HDR_LEN], + sizeof(pkt1_frag1) - OFP_ETHER_HDR_LEN)) + CU_FAIL("corrupt l3 + data forwarded"); + CU_PASS("Correct packet"); + + odp_packet_free(pkt_sent); +} + +static void test_dont_fragment_set_pkt_dropped(void) +{ + odp_packet_t pkt; + odp_event_t ev; + int res; + struct ofp_ip *ip; + + if (create_odp_packet_ip4(&pkt, pkt1_full, + sizeof(pkt1_full), 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + ip = odp_packet_l3_ptr(pkt, NULL); + ip->ip_off |= odp_cpu_to_be_16(OFP_IP_DF); + + res = ofp_ip_output(pkt, &nexthop); + CU_ASSERT_EQUAL(res, OFP_PKT_DROP); + + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_EQUAL(ev, ODP_EVENT_INVALID); + + odp_packet_free(pkt); +} + + +static void test_packet_to_two_fragments(void) +{ + odp_packet_t pkt_orig, pkt_sent; + odp_event_t ev; + int res; + struct ofp_ether_header *eth; + struct ofp_ip *ip; + struct ofp_ip *ip_orig; + uint16_t pl_pos, pl_len, orig_pl_len, pktlen; + + if (create_odp_packet_ip4(&pkt_orig, pkt1_full, sizeof(pkt1_full), 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_ip_output(pkt_orig, &nexthop); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + /* ASSERT 1st fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), + dev->if_mtu + OFP_ETHER_HDR_LEN); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + ip_orig = (struct ofp_ip *)(&orig_pkt_data[OFP_ETHER_HDR_LEN]); + orig_pl_len = odp_be_to_cpu_16(ip_orig->ip_len) - (ip_orig->ip_hl<<2); + + assert_ip_header(ip, ip_orig, dev->if_mtu, 1, 0); /* MF, off=0 */ + + pl_len = dev->if_mtu - (ip->ip_hl<<2); + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2), + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + pl_pos = pl_len; + CU_PASS("Correct packet"); + + odp_packet_free(pkt_sent); + + /* ASSERT 2nd fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + pl_len = orig_pl_len - pl_pos; + pktlen = pl_len + OFP_ETHER_HDR_LEN + sizeof(struct ofp_ip); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), pktlen); /* 1062 */ + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + + assert_ip_header(ip, ip_orig, pl_len + sizeof(struct ofp_ip), + 0, pl_pos/8); /* 1048, MF, 1480 */ + + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2) + pl_pos, + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + CU_PASS("Correct packet"); + + odp_packet_free(pkt_sent); + + /* no more fragments */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_EQUAL(ev, ODP_EVENT_INVALID); +} + +static void test_packet_to_many_fragments(void) +{ + odp_packet_t pkt_orig, pkt_sent; + odp_event_t ev; + int res; + struct ofp_ether_header *eth; + struct ofp_ip *ip; + struct ofp_ip *ip_orig; + uint16_t pl_pos, pl_len, orig_pl_len, pktlen, seglen; + + dev->if_mtu = 820; + seglen = dev->if_mtu - sizeof(struct ofp_ip); + + if (create_odp_packet_ip4(&pkt_orig, pkt1_full, sizeof(pkt1_full), 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_ip_output(pkt_orig, &nexthop); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + /* ASSERT 1st fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), + dev->if_mtu + OFP_ETHER_HDR_LEN); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + ip_orig = (struct ofp_ip *)(&orig_pkt_data[OFP_ETHER_HDR_LEN]); + orig_pl_len = odp_be_to_cpu_16(ip_orig->ip_len) - (ip_orig->ip_hl<<2); + + assert_ip_header(ip, ip_orig, dev->if_mtu, 1, 0); + + pl_len = dev->if_mtu - (ip->ip_hl<<2); + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2), + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + pl_pos = pl_len; + CU_PASS("Correct packet"); + + odp_packet_free(pkt_sent); + + /* ASSERT 2nd fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + pl_len = orig_pl_len - pl_pos; + pl_len = (pl_len < seglen) ? pl_len : seglen; + pktlen = pl_len + OFP_ETHER_HDR_LEN + sizeof(struct ofp_ip); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), pktlen); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + + assert_ip_header(ip, ip_orig, pl_len + sizeof(struct ofp_ip), + 1, pl_pos/8); + + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2) + pl_pos, + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + CU_PASS("Correct packet"); + pl_pos += pl_len; + + odp_packet_free(pkt_sent); + + /* ASSERT 3rd fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + pl_len = orig_pl_len - pl_pos; + pl_len = (pl_len < seglen) ? pl_len : seglen; + pktlen = pl_len + OFP_ETHER_HDR_LEN + sizeof(struct ofp_ip); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), pktlen); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + + assert_ip_header(ip, ip_orig, pl_len + sizeof(struct ofp_ip), + 1, pl_pos/8); + + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2) + pl_pos, + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + CU_PASS("Correct packet"); + pl_pos += pl_len; + + odp_packet_free(pkt_sent); + + /* ASSERT 4th fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + pl_len = orig_pl_len - pl_pos; + pl_len = (pl_len < seglen) ? pl_len : seglen; + pktlen = pl_len + OFP_ETHER_HDR_LEN + sizeof(struct ofp_ip); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), pktlen); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + + assert_ip_header(ip, ip_orig, pl_len + sizeof(struct ofp_ip), + 0, pl_pos/8); + + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2) + pl_pos, + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + CU_PASS("Correct packet"); + pl_pos += pl_len; + + odp_packet_free(pkt_sent); + + /* no more fragments */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_EQUAL(ev, ODP_EVENT_INVALID); + + dev->if_mtu = def_mtu; +} + +static void test_fragment_fragmented_to_two(void) +{ + odp_packet_t pkt_orig, pkt_sent; + odp_event_t ev; + int res; + struct ofp_ether_header *eth; + struct ofp_ip *ip; + struct ofp_ip *ip_orig; + uint16_t pl_pos, pl_len, orig_pl_len, pktlen, start_offset; + + dev->if_mtu = 620; + + if (create_odp_packet_ip4(&pkt_orig, pkt1_frag2, + sizeof(pkt1_frag2), 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_ip_output(pkt_orig, &nexthop); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + /* ASSERT 1st fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), + dev->if_mtu + OFP_ETHER_HDR_LEN); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + ip_orig = (struct ofp_ip *)(&orig_pkt_data[OFP_ETHER_HDR_LEN]); + orig_pl_len = odp_be_to_cpu_16(ip_orig->ip_len) - (ip_orig->ip_hl<<2); + start_offset = odp_be_to_cpu_16(ip_orig->ip_off) & OFP_IP_OFFMASK; + + assert_ip_header(ip, ip_orig, dev->if_mtu, 1, start_offset); + + pl_len = dev->if_mtu - (ip->ip_hl<<2); + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2), + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + pl_pos = pl_len; + CU_PASS("Correct packet"); + + odp_packet_free(pkt_sent); + + /* ASSERT 2nd fragment */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt_sent = odp_packet_from_event(ev); + pl_len = orig_pl_len - pl_pos; + pktlen = pl_len + OFP_ETHER_HDR_LEN + sizeof(struct ofp_ip); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt_sent), pktlen); + + eth = odp_packet_l2_ptr(pkt_sent, NULL); + if (memcmp(eth->ether_dhost, dst_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt_sent, NULL); + + assert_ip_header(ip, ip_orig, pl_len + sizeof(struct ofp_ip), + 0, start_offset + pl_pos/8); + + if (memcmp((uint8_t *)ip + (ip->ip_hl<<2), + (uint8_t *)ip_orig + (ip_orig->ip_hl<<2) + pl_pos, + pl_len)) + CU_FAIL("corrupt l3 + data forwarded"); + CU_PASS("Correct packet"); + + odp_packet_free(pkt_sent); + + /* no more fragments */ + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_EQUAL(ev, ODP_EVENT_INVALID); + + dev->if_mtu = def_mtu; +} + +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp fragmentation", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_packet_size_is_less_then_mtu)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_dont_fragment_set_pkt_dropped)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_packet_to_two_fragments)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_packet_to_many_fragments)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_fragment_fragmented_to_two)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-fragmentation"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/ofp_test_packet_input.c b/test/cunit/ofp_test_packet_input.c new file mode 100644 index 00000000..7d8a5927 --- /dev/null +++ b/test/cunit/ofp_test_packet_input.c @@ -0,0 +1,808 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Test data + */ +static uint32_t dst_ipaddr = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ +/* Frame IP/UDP/SNMP (140 bytes) */ +static uint8_t test_frame[140] = { +0x40, 0x01, 0xec, 0x36, 0x93, 0x18, 0xc8, 0x35, +0xb8, 0x28, 0x91, 0x3e, 0x08, 0x00, 0x45, 0x00, +0x00, 0x7a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, +0xf0, 0x71, 0x0a, 0x00, 0x1a, 0x01, 0x0a, 0x00, +0x1c, 0x01, 0x00, 0xa1, 0xff, 0xfe, 0x00, 0x66, +0xd4, 0xc3, 0x30, 0x5c, 0x02, 0x01, 0x01, 0x04, +0x06, 0x4e, 0x45, 0x54, 0x4d, 0x41, 0x4e, 0xa2, +0x4f, 0x02, 0x03, 0x0f, 0xb0, 0xc7, 0x02, 0x01, +0x00, 0x02, 0x01, 0x00, 0x30, 0x42, 0x30, 0x14, +0x06, 0x0f, 0x2b, 0x06, 0x01, 0x04, 0x01, 0x81, +0x41, 0x81, 0x31, 0x01, 0x02, 0x02, 0x01, 0x07, +0x00, 0x02, 0x01, 0x01, 0x30, 0x14, 0x06, 0x0f, +0x2b, 0x06, 0x01, 0x04, 0x01, 0x81, 0x41, 0x81, +0x31, 0x01, 0x02, 0x02, 0x01, 0x08, 0x00, 0x02, +0x01, 0x00, 0x30, 0x14, 0x06, 0x0f, 0x2b, 0x06, +0x01, 0x04, 0x01, 0x81, 0x41, 0x81, 0x31, 0x01, +0x02, 0x02, 0x01, 0x09, 0x00, 0x02, 0x01, 0x00, +0xa9, 0x59, 0xcd, 0x58 +}; + +/* Frame IP/GRE/IP/ICMP (138 bytes) */ +static uint8_t gre_frame[138] = { +0xc2, 0x01, 0x57, 0x75, 0x00, 0x00, 0xc2, 0x00, /* ..Wu.... */ +0x57, 0x75, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, /* Wu....E. */ +0x00, 0x7c, 0x00, 0x0a, 0x00, 0x00, 0xff, 0x2f, /* .|...../ */ +0xa7, 0x46, 0x0a, 0x00, 0x00, 0x01, 0x0a, 0x00, /* .F...... */ +0x00, 0x02, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, /* ......E. */ +0x00, 0x64, 0x00, 0x0a, 0x00, 0x00, 0xff, 0x01, /* .d...... */ +0xb5, 0x89, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, /* ........ */ +0x02, 0x02, 0x08, 0x00, 0xbf, 0xd4, 0x00, 0x02, /* ........ */ +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, /* ........ */ +0xbe, 0x70, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* .p...... */ +0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* ........ */ +0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* ........ */ +0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* ........ */ +0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* ........ */ +0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* ........ */ +0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* ........ */ +0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, 0xab, 0xcd, /* ........ */ +0xab, 0xcd /* .. */ +}; + +/* + * Helpers + */ +#define fail_with_odp(msg) do { OFP_ERR(msg); CU_FAIL(msg); } while (0) + +#define OFP_TEST_FAIL 0xFFFF +#define OFP_TEST_LOCAL_HOOK 0xFF01 + +#define TEST_LOCAL_HOOK 0x8001 +#define TEST_FORWARD_HOOK 0x8002 +#define TEST_LOCAL_HOOK_GRE 0x8003 +#define TEST_LOCAL_HOOK_GRE_APP 0x8004 +#define TEST_GRE_HOOK 0x8005 +/* global identifier for a testcase */ +static int my_test_val; +/* save the packet that was sent as input to ofp_packet_input */ +static uint8_t in_pkt_data[1024]; +static struct ofp_ifnet *ifnet; +static odp_queue_t interface_queue[16]; +static uint32_t port, vlan, vrf, local_ip; +static uint32_t tun_rem_ip = 0x660AA8C0; /* C0.A8.0A.66 = 192.168.10.102 */ +static uint32_t tun_addr = 0x010A0A0A; /* 0A.0A.0A.01 = 10.10.10.1 */ +static uint32_t tun_p2p = 0x020A0A0A; /* 0A.0A.0A.02 = 10.10.10.2 */ +static uint16_t tun_mask = 32; /* p-t-p */ + +static enum ofp_return_code fastpath_ip4_forward_hook(odp_packet_t pkt, + void *nh) +{ + (void) pkt; + (void) nh; + if (my_test_val == TEST_FORWARD_HOOK) { + CU_PASS("fastpath_ip4_forward_hook\n"); + return OFP_PKT_CONTINUE; + } else + return OFP_TEST_FAIL; +} + +static enum ofp_return_code fastpath_ip6_forward_hook(odp_packet_t pkt, + void *nh) +{ + (void) pkt; + (void) nh; + return OFP_TEST_FAIL; +} + +static enum ofp_return_code fastpath_local_hook(odp_packet_t pkt, + void *arg) +{ + int protocol = *(int *)arg; + (void) pkt; + if (my_test_val == TEST_LOCAL_HOOK) { + CU_ASSERT_EQUAL(protocol, IS_IPV4); + + CU_ASSERT_EQUAL(odp_packet_len(pkt), sizeof(test_frame)); + if (memcmp((uint8_t *)odp_packet_data(pkt) + + odp_packet_l3_offset(pkt), + in_pkt_data + OFP_ETHER_HDR_LEN, + odp_packet_len(pkt) - OFP_ETHER_HDR_LEN)) + CU_FAIL("Corrupt data"); + + return OFP_TEST_LOCAL_HOOK; + } else if (my_test_val == TEST_LOCAL_HOOK_GRE) { + /* GRE packet is offered to local hook, then + after processing to forward hook */ + my_test_val = TEST_FORWARD_HOOK; + return OFP_PKT_CONTINUE; + } else if (my_test_val == TEST_LOCAL_HOOK_GRE_APP) { + /* GRE packet is offered to local hook, then + after tunnel is not found to GRE hook */ + my_test_val = TEST_GRE_HOOK; + return OFP_PKT_CONTINUE; + } else + return OFP_TEST_FAIL; +} + +static enum ofp_return_code fastpath_gre_hook(odp_packet_t pkt, void *nh) +{ + (void) pkt; + (void) nh; + if (my_test_val == TEST_GRE_HOOK) { + CU_PASS("fastpath_GRE_hook\n"); + return OFP_PKT_CONTINUE; + } else { + return OFP_TEST_FAIL; + } +} +/* + * INIT + */ +static void +test_init_ifnet(void) +{ + char str[256]; + + ofp_config_interface_up_v4(port, vlan, vrf, local_ip, 24); + + ifnet = ofp_get_ifnet(port, vlan); + ifnet->pkt_pool = odp_pool_lookup("packet_pool"); + +#ifdef SP + ifnet->linux_index = port + 3; /* an if index of Linux != port val */ + ofp_update_ifindex_lookup_tab(ifnet); + + sprintf(str, "slow path stack port:%d", port); + ifnet->spq_def = odp_queue_create(str, + ODP_QUEUE_TYPE_POLL, + NULL); + if (ifnet->spq_def == ODP_QUEUE_INVALID) { + fail_with_odp("Slow path queue create failed.\n"); + return; + } +#endif + + sprintf(str, "out default queue:%d", port); + ifnet->outq_def = odp_queue_create(str, + ODP_QUEUE_TYPE_POLL, + NULL); + if (ifnet->outq_def == ODP_QUEUE_INVALID) { + fail_with_odp("Out default queue create failed.\n"); + return; + } + + sprintf(str, "interface queue:%d", port); + interface_queue[port] = + odp_queue_create(str, ODP_QUEUE_TYPE_POLL, NULL); + if (interface_queue[port] == ODP_QUEUE_INVALID) { + OFP_ERR("Poll queue create failed.\n"); + return; + } + odp_queue_set_context(interface_queue[port], ifnet); + + ofp_config_interface_up_tun(GRE_PORTS, 100 + port, vrf, local_ip, + tun_rem_ip, tun_p2p, tun_addr, + tun_mask); +} + +#define SHM_PKT_POOL_SIZE (32*2048) +#define SHM_PKT_POOL_BUF_SIZE 1856 + +static int +init_suite(void) +{ + odp_pool_t pool; + odp_pool_param_t pool_params; + ofp_pkt_hook pkt_hook[OFP_HOOK_MAX]; + + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + ofp_portconf_alloc_shared_memory(); + ofp_route_alloc_shared_memory(); + ofp_rt_lookup_alloc_shared_memory(); + ofp_avl_alloc_shared_memory(); + ofp_arp_alloc_shared_memory(); + ofp_pcap_alloc_shared_memory(); + ofp_timer_init(OFP_TIMER_RESOLUTION_US, + OFP_TIMER_MIN_US, + OFP_TIMER_MAX_US, + OFP_TIMER_TMO_COUNT); + + memset(pkt_hook, 0, sizeof(pkt_hook)); + pkt_hook[OFP_HOOK_LOCAL] = fastpath_local_hook; + pkt_hook[OFP_HOOK_FWD_IPv4] = fastpath_ip4_forward_hook; + pkt_hook[OFP_HOOK_FWD_IPv6] = fastpath_ip6_forward_hook; + pkt_hook[OFP_HOOK_GRE] = fastpath_gre_hook; + ofp_hook_alloc_shared_memory(&pkt_hook[0]); + + ofp_init_ifnet_data(); + ofp_route_init(); + ofp_arp_global_init(); + ofp_arp_local_init(); + + pool_params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.num = SHM_PKT_POOL_SIZE/SHM_PKT_POOL_BUF_SIZE; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, + &pool_params); + + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + return -1; + } + + ofp_ip_init(); + + odp_shm_print_all(); + odp_pool_print(pool); + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +static int +create_odp_packet_ip4(odp_packet_t *opkt, uint8_t *pkt_data, int plen, + uint32_t dst_addr, uint32_t src_addr) +{ + odp_pool_t pool; + uint8_t *buf; + odp_packet_t pkt = ODP_PACKET_INVALID; + struct ofp_ip *iphdr; + + pool = odp_pool_lookup("packet_pool"); + if (pool == ODP_POOL_INVALID) { + fail_with_odp("ODP packet_pool not found\n"); + return -1; + } + + pkt = odp_packet_alloc(pool, plen); + if (pkt == ODP_PACKET_INVALID) { + fail_with_odp("ODP packet alloc failed"); + return -1; + } + + buf = odp_packet_data(pkt); + + memcpy(buf, pkt_data, plen); + + iphdr = (struct ofp_ip *)&buf[14]; + + /* changes to the default packet. Recalculate ip checksum */ + if (dst_addr) + iphdr->ip_dst.s_addr = dst_addr; + if (src_addr) + iphdr->ip_src.s_addr = src_addr; + if (dst_addr || src_addr) { + iphdr->ip_sum = 0; + iphdr->ip_sum = + ofp_in_cksum((uint16_t *)iphdr, iphdr->ip_hl<<2); + } + /* END OF changes to the default packet */ + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_ipv4_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, OFP_ETHER_HDR_LEN); + odp_packet_l4_offset_set(pkt, OFP_ETHER_HDR_LEN + (iphdr->ip_hl<<2)); + + *opkt = pkt; + + memcpy(in_pkt_data, buf, plen); + return 0; +} + +static void +test_ofp_add_route(uint32_t port, uint32_t vrf, uint32_t vlan, + uint32_t destination, uint32_t mask_len, + uint32_t rt_dst_len, uint32_t gw) +{ + struct ofp_route_msg msg; + /* add/test only IPv4 routes and not IPv6 or DEFAULT routes(rt_dst=0)*/ + CU_ASSERT_EQUAL(rt_dst_len, 4); + if (rt_dst_len == 4) { + msg.vrf = vrf; + msg.port = port; + msg.vlan = vlan; + + msg.type = OFP_ROUTE_ADD; + msg.dst = destination; + msg.masklen = mask_len; + msg.gw = gw; + ofp_set_route(&msg); + } + + + uint32_t flags; + struct ofp_nh_entry *node = + ofp_get_next_hop(vrf, destination, &flags); + + CU_ASSERT_EQUAL(node->gw, gw); + CU_ASSERT_EQUAL(node->port, port); + CU_ASSERT_EQUAL(node->vlan, vlan); +} + + +static void +test_ofp_packet_input_local_hook(void) +{ + odp_packet_t pkt; + int res; + + /* Call ofp_packet_input with a pkt with destination ip + * that matches the local ip on ifnet. + * The packet is terminated in local hook */ + my_test_val = TEST_LOCAL_HOOK; + ifnet->ip_addr = dst_ipaddr; + if (create_odp_packet_ip4(&pkt, test_frame, sizeof(test_frame), + dst_ipaddr, 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_packet_input(pkt, interface_queue[port], + ofp_eth_vlan_processing); + CU_ASSERT_EQUAL(res, OFP_TEST_LOCAL_HOOK); +#ifdef SP + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->spq_def), ODP_EVENT_INVALID); +#endif /* SP */ + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->outq_def), ODP_EVENT_INVALID); + ifnet->ip_addr = 0; + CU_PASS("ofp_packet_input_local_hook"); +} + +#ifdef SP +static void +test_ofp_packet_input_to_sp(void) +{ + odp_packet_t pkt; + odp_event_t ev; + int res; + + my_test_val = TEST_FORWARD_HOOK; + /* Call ofp_packet_input using a pkt with destination ip + * that does NOT match the local ip on ifnet and NO route is found. + * The packet is forwarded to slow path queue. */ + if (create_odp_packet_ip4(&pkt, test_frame, sizeof(test_frame), 0, 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_packet_input(pkt, interface_queue[port], + ofp_eth_vlan_processing); + + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + CU_ASSERT_NOT_EQUAL(ev = odp_queue_deq(ifnet->spq_def), + ODP_EVENT_INVALID); + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->spq_def), ODP_EVENT_INVALID); + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->outq_def), ODP_EVENT_INVALID); + + if (memcmp(odp_packet_data(odp_packet_from_event(ev)), + in_pkt_data, sizeof(test_frame))) + CU_FAIL("corrupt data sent to slow path"); + odp_packet_free(odp_packet_from_event(ev)); + CU_PASS("ofp_packet_input_to_sp"); +} +#endif /* SP */ + +static void +test_ofp_packet_input_send_arp(void) +{ + odp_packet_t pkt; + odp_event_t ev; + int res; + + /* Call ofp_packet_input using a pkt with destination ip + * that does NOT match the local ip on ifnet and a route is found. + * No ARP is found for gateway IP so an ARP req is sent. + * Function returns OFP_PKT_DROP and packet can be reused.*/ + my_test_val = TEST_FORWARD_HOOK; + + test_ofp_add_route(port, vrf, vlan, dst_ipaddr, 24, 4, + dst_ipaddr + 1); + + if (create_odp_packet_ip4(&pkt, test_frame, sizeof(test_frame), + dst_ipaddr, 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_packet_input(pkt, interface_queue[port], + ofp_eth_vlan_processing); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + odp_packet_free(pkt); + + CU_ASSERT_NOT_EQUAL(ev = odp_queue_deq(ifnet->outq_def), + ODP_EVENT_INVALID); + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->outq_def), ODP_EVENT_INVALID); +#ifdef SP + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->spq_def), ODP_EVENT_INVALID); +#endif /* SP */ + + pkt = odp_packet_from_event(ev); + CU_ASSERT_NOT_EQUAL(pkt, ODP_PACKET_INVALID); + CU_ASSERT_EQUAL(odp_packet_has_arp(pkt), 1); + CU_ASSERT_EQUAL(odp_packet_has_vlan(pkt), 0); + CU_ASSERT_EQUAL(odp_packet_len(pkt), sizeof(struct ofp_arphdr) + + sizeof(struct ofp_ether_header)); + odp_packet_free(odp_packet_from_event(ev)); + ofp_arp_init_tables(); /* to clean saved packet */ + CU_PASS("ofp_packet_input_send_arp"); +} + +static void +test_ofp_packet_input_forwarding_to_output(void) +{ + odp_packet_t pkt; + odp_event_t ev; + int res; + + /* Call ofp_packet_input using a pkt with destination ip + * that does NOT match the local ip on ifnet and a route is found. + * ARP is found for gateway IP. + * Function returns OFP_PKT_PROCESSED and + * packet is forwarded to ofp_ip_output.*/ + unsigned char ll_addr[13] = "123456789012"; + + my_test_val = TEST_FORWARD_HOOK; + + CU_ASSERT_EQUAL( + ofp_ipv4_lookup_mac(dst_ipaddr + 1, ll_addr, ifnet), -1); + CU_ASSERT_EQUAL( + ofp_arp_ipv4_insert(dst_ipaddr + 1, ll_addr, ifnet), 0); + + if (create_odp_packet_ip4(&pkt, test_frame, sizeof(test_frame), + dst_ipaddr, 0)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_packet_input(pkt, interface_queue[port], + ofp_eth_vlan_processing); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + CU_ASSERT_NOT_EQUAL(ev = odp_queue_deq(ifnet->outq_def), + ODP_EVENT_INVALID); + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->outq_def), ODP_EVENT_INVALID); + +#ifdef SP + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->spq_def), ODP_EVENT_INVALID); +#endif /* SP */ + + CU_ASSERT_EQUAL(odp_packet_len(pkt), sizeof(test_frame)); + + pkt = odp_packet_from_event(ev); + struct ofp_ip *ip_in_pkt_data = (struct ofp_ip *)(in_pkt_data + 14); + (ip_in_pkt_data)->ip_ttl--; + ip_in_pkt_data->ip_sum = 0; + ip_in_pkt_data->ip_sum = ofp_in_cksum((uint16_t *)ip_in_pkt_data, + ip_in_pkt_data->ip_hl<<2); + + if (memcmp((uint8_t *)odp_packet_data(pkt) + odp_packet_l3_offset(pkt), + in_pkt_data + OFP_ETHER_HDR_LEN, + sizeof(test_frame) - OFP_ETHER_HDR_LEN)) + CU_FAIL("corrupt l3 + data forwarded"); + struct ofp_ether_header *eth = + (struct ofp_ether_header *)odp_packet_l2_ptr(pkt, NULL); + + if (memcmp(eth->ether_dhost, ll_addr, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address on the forwarded packet"); + CU_ASSERT_EQUAL(eth->ether_type, odp_cpu_to_be_16(OFP_ETHERTYPE_IP)); + + CU_PASS("ofp_packet_input_forwarding_to_output"); +} + +static void +test_ofp_packet_input_gre_processed_inner_pkt_forwarded(void) +{ + odp_packet_t pkt; + odp_event_t ev; + int res; + struct ofp_ether_header *eth; + struct ofp_ip *ip; + struct ofp_ip *ip_encap; + uint32_t dst_ip; + uint8_t dst_mac_addr[6] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + + my_test_val = TEST_LOCAL_HOOK_GRE; + /* Call ofp_packet_input using a GRE pkt with destination ip + * that matches the local ip on ifnet, tunnel found, GRE processed. + * Inner packet does not match local ip, route found, + * packet forwarded */ + + ifnet->ip_addr = local_ip; + if (create_odp_packet_ip4(&pkt, gre_frame, sizeof(gre_frame), + local_ip, tun_rem_ip)) { + CU_FAIL("Fail to create packet"); + return; + } + + ip_encap = (struct ofp_ip *)&in_pkt_data[38]; + + dst_ip = local_ip + 10; + test_ofp_add_route(port, vrf, vlan, ip_encap->ip_dst.s_addr, 24, 4, + dst_ip); + ofp_arp_ipv4_insert(dst_ip, dst_mac_addr, ifnet); + + res = ofp_packet_input(pkt, interface_queue[port], + ofp_eth_vlan_processing); + + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + CU_ASSERT_NOT_EQUAL_FATAL(ev = odp_queue_deq(ifnet->outq_def), + ODP_EVENT_INVALID); +#ifdef SP + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->spq_def), ODP_EVENT_INVALID); +#endif + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->outq_def), ODP_EVENT_INVALID); + + pkt = odp_packet_from_event(ev); + eth = odp_packet_data(pkt); + ip = odp_packet_l3_ptr(pkt, NULL); + + if (memcmp(eth->ether_dhost, dst_mac_addr, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, ifnet->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + CU_ASSERT_EQUAL(ip->ip_src.s_addr, ip_encap->ip_src.s_addr); + CU_ASSERT_EQUAL(ip->ip_dst.s_addr, ip_encap->ip_dst.s_addr); + + if (memcmp(ip + (ip->ip_hl << 2), ip_encap + (ip->ip_hl << 2), + odp_be_to_cpu_16(ip_encap->ip_len) - (ip->ip_hl << 2))) + CU_FAIL("corrupt l3 + data"); + + odp_packet_free(odp_packet_from_event(ev)); + ifnet->ip_addr = 0; + CU_PASS("ofp_packet_input_gre_processed_inner_pkt_to_sp"); +} + +static void test_ofp_packet_input_gre_orig_pkt_to_sp(void) +{ + odp_packet_t pkt; + int res; +#ifdef SP + odp_event_t ev; +#endif + + my_test_val = TEST_LOCAL_HOOK_GRE_APP; + /* Call ofp_packet_input using a GRE pkt with destination ip + * that matches the local ip on ifnet, tunnel not found, + * packet offered to GRE hook, returns continue. + * Full packet sent to slowpath */ + + ifnet->ip_addr = local_ip; + if (create_odp_packet_ip4(&pkt, gre_frame, sizeof(gre_frame), + local_ip, tun_rem_ip + 1)) { + CU_FAIL("Fail to create packet"); + return; + } + + res = ofp_packet_input(pkt, interface_queue[port], + ofp_eth_vlan_processing); + +#ifdef SP + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + CU_ASSERT_NOT_EQUAL_FATAL(ev = odp_queue_deq(ifnet->spq_def), + ODP_EVENT_INVALID); + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->spq_def), ODP_EVENT_INVALID); + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->outq_def), ODP_EVENT_INVALID); + + if (memcmp(odp_packet_data(odp_packet_from_event(ev)), + in_pkt_data, sizeof(gre_frame))) + CU_FAIL("corrupt data sent to slow path"); + + odp_packet_free(odp_packet_from_event(ev)); + ifnet->ip_addr = 0; + CU_PASS("ofp_packet_input_gre_orig_pkt_to_sp"); +#else + CU_ASSERT_EQUAL(res, OFP_PKT_DROP); + CU_ASSERT_EQUAL(odp_queue_deq(ifnet->outq_def), ODP_EVENT_INVALID); +#endif +} + +static void test_init_packet_input_basic(void) +{ + port = 0; + vlan = 0; + vrf = 0; + local_ip = dst_ipaddr; +} + +static void test_init_packet_input_vrf(void) +{ + port = 1; + vlan = 0; + vrf = 1; + local_ip = dst_ipaddr / 2; + tun_rem_ip += 1; + tun_p2p += 1; + tun_addr += 1; +} + + +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + (void)gre_frame; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp packet input", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_init_packet_input_basic)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_init_ifnet)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_local_hook)) { + CU_cleanup_registry(); + return CU_get_error(); + } +#ifdef SP + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_to_sp)) { + CU_cleanup_registry(); + return CU_get_error(); + } +#endif /* SP */ + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_send_arp)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_forwarding_to_output)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_gre_processed_inner_pkt_forwarded)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_gre_orig_pkt_to_sp)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + ptr_suite = CU_add_suite("test VRF", NULL , NULL); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, + test_init_packet_input_vrf)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, + test_init_ifnet)) { + CU_cleanup_registry(); + return CU_get_error(); + } + +#ifdef SP + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_to_sp)) { + CU_cleanup_registry(); + return CU_get_error(); + } +#endif /* SP */ + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_send_arp)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_forwarding_to_output)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_gre_processed_inner_pkt_forwarded)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_packet_input_gre_orig_pkt_to_sp)) { + CU_cleanup_registry(); + return CU_get_error(); + } + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-PKT-IN"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/ofp_test_packet_output.c b/test/cunit/ofp_test_packet_output.c new file mode 100644 index 00000000..b7da8206 --- /dev/null +++ b/test/cunit/ofp_test_packet_output.c @@ -0,0 +1,488 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_raw_frames.h" + +#define fail_with_odp(msg) do { OFP_ERR(msg); CU_FAIL(msg); } while (0) + +/* + * Test data + */ +static uint8_t test_frame[140] = { +0x40, 0x01, 0xec, 0x36, 0x93, 0x18, 0xc8, 0x35, +0xb8, 0x28, 0x91, 0x3e, 0x08, 0x00, 0x45, 0x00, +0x00, 0x7a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, +0xf0, 0x71, 0x0a, 0x00, 0x1a, 0x01, 0x0a, 0x00, +0x1c, 0x01, 0x00, 0xa1, 0xff, 0xfe, 0x00, 0x66, +0xd4, 0xc3, 0x30, 0x5c, 0x02, 0x01, 0x01, 0x04, +0x06, 0x4e, 0x45, 0x54, 0x4d, 0x41, 0x4e, 0xa2, +0x4f, 0x02, 0x03, 0x0f, 0xb0, 0xc7, 0x02, 0x01, +0x00, 0x02, 0x01, 0x00, 0x30, 0x42, 0x30, 0x14, +0x06, 0x0f, 0x2b, 0x06, 0x01, 0x04, 0x01, 0x81, +0x41, 0x81, 0x31, 0x01, 0x02, 0x02, 0x01, 0x07, +0x00, 0x02, 0x01, 0x01, 0x30, 0x14, 0x06, 0x0f, +0x2b, 0x06, 0x01, 0x04, 0x01, 0x81, 0x41, 0x81, +0x31, 0x01, 0x02, 0x02, 0x01, 0x08, 0x00, 0x02, +0x01, 0x00, 0x30, 0x14, 0x06, 0x0f, 0x2b, 0x06, +0x01, 0x04, 0x01, 0x81, 0x41, 0x81, 0x31, 0x01, +0x02, 0x02, 0x01, 0x09, 0x00, 0x02, 0x01, 0x00, +0xa9, 0x59, 0xcd, 0x58 +}; + +#define SHM_PKT_POOL_SIZE (32*2048) +#define SHM_PKT_POOL_BUF_SIZE 3000 + +static uint32_t port = 0, vlan = 0, vrf = 0, def_mtu = 1500; +static uint32_t dev_ip = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ +static uint8_t dev_mac[6] = {0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; +static struct ofp_ifnet *dev; +static uint8_t orig_pkt_data[SHM_PKT_POOL_BUF_SIZE]; +uint16_t greid = 100; +static uint32_t tun_rem_ip = 0x660AA8C0; /* C0.A8.0A.66 = 192.168.10.102 */ +static uint8_t tun_rem_mac[6] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; +static uint32_t tun_addr = 0x010A0A0A; /* 0A.0A.0A.01 = 10.10.10.1 */ +static uint32_t tun_p2p = 0x020A0A0A; /* 0A.0A.0A.02 = 10.10.10.2 */ +static uint16_t tun_mask = 32; /* p-t-p */ + +/* + * INIT + */ +static void init_ifnet(void) +{ + char str[256]; + + ofp_config_interface_up_v4(port, vlan, vrf, dev_ip, 24); + + dev = ofp_get_ifnet(port, vlan); + memcpy(dev->mac, dev_mac, OFP_ETHER_ADDR_LEN); + dev->if_mtu = def_mtu; +#ifdef SP + dev->linux_index = port + 3; /* an if index of Linux != port val */ + ofp_update_ifindex_lookup_tab(dev); +#endif /* SP */ + + dev->pkt_pool = odp_pool_lookup("packet_pool"); + + sprintf(str, "out default queue:%d", port); + dev->outq_def = odp_queue_create(str, + ODP_QUEUE_TYPE_POLL, + NULL); + if (dev->outq_def == ODP_QUEUE_INVALID) { + fail_with_odp("Out default queue create failed.\n"); + return; + } + + ofp_config_interface_up_tun(GRE_PORTS, 100, 0, dev_ip, tun_rem_ip, + tun_p2p, tun_addr, tun_mask); + + /* No nexthop for tunnel remote address */ + ofp_config_interface_up_tun(GRE_PORTS, 200, 0, dev_ip, 0x08070605, + tun_p2p + 1, tun_addr + 1, tun_mask); +} + +static int +init_suite(void) +{ + odp_pool_t pool; + odp_pool_param_t pool_params; + ofp_pkt_hook pkt_hook[OFP_HOOK_MAX]; + + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + ofp_portconf_alloc_shared_memory(); + ofp_route_alloc_shared_memory(); + ofp_rt_lookup_alloc_shared_memory(); + ofp_avl_alloc_shared_memory(); + ofp_arp_alloc_shared_memory(); + ofp_pcap_alloc_shared_memory(); + ofp_timer_init(OFP_TIMER_RESOLUTION_US, + OFP_TIMER_MIN_US, + OFP_TIMER_MAX_US, + OFP_TIMER_TMO_COUNT); + + memset(pkt_hook, 0, sizeof(pkt_hook)); + ofp_hook_alloc_shared_memory(&pkt_hook[0]); + + ofp_init_ifnet_data(); + ofp_route_init(); + ofp_arp_global_init(); + ofp_arp_local_init(); + + pool_params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.num = SHM_PKT_POOL_SIZE/SHM_PKT_POOL_BUF_SIZE; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, + &pool_params); + + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + return -1; + } + + odp_shm_print_all(); + odp_pool_print(pool); + + init_ifnet(); + + ofp_arp_ipv4_insert(tun_rem_ip, tun_rem_mac, dev); + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +static int +create_odp_packet_ip4(odp_packet_t *opkt, uint8_t *pkt_data, int plen, + uint32_t dst_addr) +{ + odp_pool_t pool; + uint8_t *buf; + odp_packet_t pkt = ODP_PACKET_INVALID; + struct ofp_ip *iphdr; + + memset(orig_pkt_data, 0x0, sizeof(orig_pkt_data)); + + pool = odp_pool_lookup("packet_pool"); + if (pool == ODP_POOL_INVALID) { + fail_with_odp("ODP packet_pool not found\n"); + return -1; + } + + pkt = odp_packet_alloc(pool, plen); + if (pkt == ODP_PACKET_INVALID) { + fail_with_odp("ODP packet alloc failed"); + return -1; + } + + buf = odp_packet_data(pkt); + + if (odp_packet_copydata_in(pkt, 0, plen, pkt_data) < 0) { + fail_with_odp("Packet data copy failed\n"); + return -1; + }; + + iphdr = (struct ofp_ip *)&buf[OFP_ETHER_HDR_LEN]; + + /* changes to the default packet. Recalculate ip checksum */ + if (dst_addr) { + iphdr->ip_dst.s_addr = dst_addr; + iphdr->ip_sum = 0; + iphdr->ip_sum = + ofp_in_cksum((uint16_t *)iphdr, iphdr->ip_hl<<2); + } + /* END OF changes to the default packet */ + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_l2_set(pkt, 1); + odp_packet_has_ipv4_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, OFP_ETHER_HDR_LEN); + odp_packet_l4_offset_set(pkt, OFP_ETHER_HDR_LEN + (iphdr->ip_hl<<2)); + + *opkt = pkt; + + odp_packet_copydata_out(pkt, 0, plen, orig_pkt_data); + + return 0; +} + +static int +create_odp_packet_ip6(odp_packet_t *opkt, uint8_t *pkt_data, int plen) +{ + odp_pool_t pool; + odp_packet_t pkt = ODP_PACKET_INVALID; + + memset(orig_pkt_data, 0x0, sizeof(orig_pkt_data)); + + pool = odp_pool_lookup("packet_pool"); + if (pool == ODP_POOL_INVALID) { + fail_with_odp("ODP packet_pool not found\n"); + return -1; + } + + pkt = odp_packet_alloc(pool, plen); + if (pkt == ODP_PACKET_INVALID) { + fail_with_odp("ODP packet alloc failed"); + return -1; + } + + if (odp_packet_copydata_in(pkt, 0, plen, pkt_data) < 0) { + fail_with_odp("Packet data copy failed\n"); + return -1; + }; + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_l2_set(pkt, 1); + odp_packet_has_ipv6_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, OFP_ETHER_HDR_LEN); + + *opkt = pkt; + + odp_packet_copydata_out(pkt, 0, plen, orig_pkt_data); + + return 0; +} + +/* + * Tests + */ +static void +test_packet_output_gre(void) +{ + odp_packet_t pkt = ODP_PACKET_INVALID; + odp_event_t ev; + int res; + struct ofp_ether_header *eth; + struct ofp_ip *ip; + struct ofp_ip *ip_orig; + struct ofp_greip *greip; + + if (create_odp_packet_ip4(&pkt, test_frame, sizeof(test_frame), + tun_p2p)) { + CU_FAIL("Fail to create packet"); + return; + } + + ofp_set_debug_flags(0x1F); + + /* + * Packet's destination is GRE tunnel's p2p address, next hop is GRE + * interface. GRE+IP header is prepended. Packet's new destination is + * link local. Packet is put into output queue. + */ + res = ofp_ip_output(pkt, NULL); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt = odp_packet_from_event(ev); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt), + sizeof(test_frame) + 20 + 4); + + eth = odp_packet_l2_ptr(pkt, NULL); + if (memcmp(eth->ether_dhost, tun_rem_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt, NULL); + CU_ASSERT_EQUAL(ip->ip_src.s_addr, dev_ip); + CU_ASSERT_EQUAL(ip->ip_dst.s_addr, tun_rem_ip); + CU_ASSERT_EQUAL(ip->ip_p, OFP_IPPROTO_GRE); + + greip = (struct ofp_greip *)ip; + CU_ASSERT_EQUAL(greip->gi_g.flags, 0); + CU_ASSERT_EQUAL(greip->gi_g.ptype, + odp_cpu_to_be_16(OFP_ETHERTYPE_IP)); + + /* inner ip */ + ip = (struct ofp_ip *)(greip + 1); + ip_orig = (struct ofp_ip *)(&orig_pkt_data[OFP_ETHER_HDR_LEN]); + if (memcmp(ip, ip_orig, odp_be_to_cpu_16(ip_orig->ip_len))) + CU_FAIL("Inner IP packet error."); +} + +static void +test_packet_output_gre_no_nexthop(void) +{ + odp_packet_t pkt = ODP_PACKET_INVALID; + odp_event_t ev; + int res; + + if (create_odp_packet_ip4(&pkt, test_frame, sizeof(test_frame), + tun_p2p + 1)) { + CU_FAIL("Fail to create packet"); + return; + } + + /* + * Packet's destination is GRE tunnel's p2p address, no next hop + * is found for tunnel destination address, packet is dropped. + */ + res = ofp_ip_output(pkt, NULL); + CU_ASSERT_EQUAL(res, OFP_PKT_DROP); + + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); +} + +#ifdef INET6 +static void +test_packet_output_ipv6_to_gre(void) +{ + odp_packet_t pkt = ODP_PACKET_INVALID; + odp_event_t ev; + int res; + struct ofp_route_msg msg; + struct ofp_in6_addr addr6; + struct ofp_ether_header *eth; + struct ofp_ip6_hdr *ip6, *ip6_orig; + struct ofp_ip *ip; + struct ofp_greip *greip; + + (void)tcp_frame; + (void)icmp_frame; + (void)arp_frame; + (void)icmp6_frame; + + if (create_odp_packet_ip6(&pkt, ip6udp_frame, sizeof(ip6udp_frame))) { + CU_FAIL("Fail to create packet"); + return; + } + + ip6 = odp_packet_l3_ptr(pkt, NULL); + addr6.__u6_addr.__u6_addr16[0] = + ip6->ip6_dst.__u6_addr.__u6_addr16[0]; + + memset(&msg, 0, sizeof(msg)); + msg.type = OFP_ROUTE6_ADD; + msg.vrf = 0; + memcpy(msg.dst6, &addr6, 16); + msg.masklen = 64; + /* gw = 0 */ + msg.port = GRE_PORTS; + msg.vlan = 100; + ofp_set_route(&msg); + + res = ofp_ip6_output(pkt, NULL); + CU_ASSERT_EQUAL(res, OFP_PKT_PROCESSED); + + ev = odp_queue_deq(dev->outq_def); + CU_ASSERT_NOT_EQUAL_FATAL(ev, ODP_EVENT_INVALID); + + pkt = odp_packet_from_event(ev); + CU_ASSERT_EQUAL_FATAL(odp_packet_len(pkt), + sizeof(ip6udp_frame) + 20 + 4); + + eth = odp_packet_l2_ptr(pkt, NULL); + if (memcmp(eth->ether_dhost, tun_rem_mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad destination mac address."); + if (memcmp(eth->ether_shost, dev->mac, OFP_ETHER_ADDR_LEN)) + CU_FAIL("Bad source mac address."); + + ip = odp_packet_l3_ptr(pkt, NULL); + CU_ASSERT_EQUAL(ip->ip_src.s_addr, dev_ip); + CU_ASSERT_EQUAL(ip->ip_dst.s_addr, tun_rem_ip); + CU_ASSERT_EQUAL(ip->ip_p, OFP_IPPROTO_GRE); + + greip = (struct ofp_greip *)ip; + CU_ASSERT_EQUAL(greip->gi_g.flags, 0); + CU_ASSERT_EQUAL(greip->gi_g.ptype, + odp_cpu_to_be_16(OFP_ETHERTYPE_IPV6)); + + /* inner ip */ + ip6 = (struct ofp_ip6_hdr *)(greip + 1); + ip6_orig = (struct ofp_ip6_hdr *) + (&orig_pkt_data[OFP_ETHER_HDR_LEN]); + if (memcmp(ip6, ip6_orig, + odp_be_to_cpu_16(ip6_orig->ofp_ip6_plen) + sizeof(*ip6))) + CU_FAIL("Inner IP packet error."); +} +#endif +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp packet out", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_packet_output_gre)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_packet_output_gre_no_nexthop)) { + CU_cleanup_registry(); + return CU_get_error(); + } +#ifdef INET6 + if (NULL == CU_ADD_TEST(ptr_suite, + test_packet_output_ipv6_to_gre)) { + CU_cleanup_registry(); + return CU_get_error(); + } +#endif + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-PKT-OUT"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/ofp_test_port_conf.c b/test/cunit/ofp_test_port_conf.c new file mode 100644 index 00000000..3ebedb82 --- /dev/null +++ b/test/cunit/ofp_test_port_conf.c @@ -0,0 +1,342 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Test data + */ +#define SHM_PKT_POOL_SIZE (32*2048) +#define SHM_PKT_POOL_BUF_SIZE 3000 + +uint8_t ifmac[6] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; +uint32_t ifmtu = 1480; +uint8_t link_local[16]; +#ifdef SP +int sp_status = OFP_SP_DOWN; +#endif + +/* + * INIT + */ +static int +init_suite(void) +{ + odp_pool_t pool; + odp_pool_param_t pool_params; + ofp_pkt_hook pkt_hook[OFP_HOOK_MAX]; + struct ofp_ifnet *dev; + + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + ofp_portconf_alloc_shared_memory(); + ofp_route_alloc_shared_memory(); + ofp_rt_lookup_alloc_shared_memory(); + ofp_avl_alloc_shared_memory(); + ofp_arp_alloc_shared_memory(); + ofp_pcap_alloc_shared_memory(); + ofp_timer_init(OFP_TIMER_RESOLUTION_US, + OFP_TIMER_MIN_US, + OFP_TIMER_MAX_US, + OFP_TIMER_TMO_COUNT); + + memset(pkt_hook, 0, sizeof(pkt_hook)); + ofp_hook_alloc_shared_memory(&pkt_hook[0]); + + ofp_init_ifnet_data(); + ofp_route_init(); + ofp_arp_global_init(); + ofp_arp_local_init(); + + pool_params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.num = SHM_PKT_POOL_SIZE/SHM_PKT_POOL_BUF_SIZE; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, + &pool_params); + + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + return -1; + } + + odp_shm_print_all(); + odp_pool_print(pool); + + dev = ofp_get_ifnet(0, 0); + dev->if_mtu = ifmtu; + memcpy(dev->mac, ifmac, OFP_ETHER_ADDR_LEN); + ofp_mac_to_link_local(ifmac, link_local); + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +static void assert_next_hop(struct ofp_nh_entry *nh, uint32_t gw, + uint16_t port, uint16_t vlan) +{ + CU_ASSERT_PTR_NOT_NULL_FATAL(nh); + CU_ASSERT_EQUAL(nh->gw, gw); + CU_ASSERT_EQUAL(nh->port, port); + CU_ASSERT_EQUAL(nh->vlan, vlan); +} + +static void assert_dev(struct ofp_ifnet *dev, int port, uint16_t vlan, + uint16_t vrf, uint32_t ifaddr, uint32_t ifmtu, + int masklen, uint32_t bcast, uint8_t *link_local) +{ + CU_ASSERT_PTR_NOT_NULL_FATAL(dev); + CU_ASSERT_EQUAL(dev->port, port); + CU_ASSERT_EQUAL(dev->vlan, vlan); + CU_ASSERT_EQUAL(dev->vrf, vrf); + CU_ASSERT_EQUAL(dev->if_mtu, ifmtu); + CU_ASSERT_EQUAL(dev->ip_addr, ifaddr); + CU_ASSERT_EQUAL(dev->bcast_addr, bcast); + CU_ASSERT_EQUAL(dev->masklen, masklen); +#ifdef SP + CU_ASSERT_EQUAL(dev->sp_status, sp_status); +#endif +#ifdef INET6 + if (memcmp(dev->link_local, link_local, 16)) + CU_FAIL("Link local address"); +#endif /* INET6 */ +} + +/* + * Tests + */ + +static void +test_sinlge_port_basic(void) +{ + int port = 0; + uint16_t vlan = 0; + uint16_t vrf = 1; + uint32_t ifaddr = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ + int masklen = 24; + uint32_t bcast = ifaddr | odp_cpu_to_be_32(0xFF); + struct ofp_ifnet *dev; + struct ofp_nh_entry *nh; + const char *res; + + res = ofp_config_interface_up_v4(port, vlan, vrf, ifaddr, masklen); + CU_ASSERT_PTR_NULL_FATAL(res); + + dev = ofp_get_ifnet(port, vlan); + assert_dev(dev, port, vlan, vrf, ifaddr, ifmtu, masklen, bcast, + link_local); + nh = ofp_get_next_hop(vrf, ifaddr, NULL); + assert_next_hop(nh, 0, port, vlan); + + + res = ofp_config_interface_down(port, vlan); + CU_ASSERT_PTR_NULL_FATAL(res); + + dev = ofp_get_ifnet(port, vlan); + assert_dev(dev, port, vlan, vrf, 0, ifmtu, masklen, bcast, link_local); + nh = ofp_get_next_hop(vrf, ifaddr, NULL); + CU_ASSERT_PTR_NULL(nh); +} + +static void +test_two_ports_vlan(void) +{ + int port = 0; + uint16_t vlan = 0, vlan1 = 100; + uint16_t vrf = 1, vrf1 = 2; + uint32_t ifaddr = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ + uint32_t ifaddr1 = 0x650AA8C1; + int masklen = 24, masklen1 = 20; + uint32_t bcast = ifaddr | odp_cpu_to_be_32(0xFF); + uint32_t bcast1 = ifaddr1 | odp_cpu_to_be_32(0xFFF); + struct ofp_ifnet *dev; + struct ofp_nh_entry *nh; + const char *res; + + res = ofp_config_interface_up_v4(port, vlan, vrf, ifaddr, masklen); + CU_ASSERT_PTR_NULL_FATAL(res); + res = ofp_config_interface_up_v4(port, vlan1, vrf1, ifaddr1, masklen1); + CU_ASSERT_PTR_NULL_FATAL(res); + + dev = ofp_get_ifnet(port, vlan); + CU_ASSERT_PTR_NOT_NULL_FATAL(dev); + assert_dev(dev, port, vlan, vrf, ifaddr, ifmtu, masklen, bcast, + link_local); + nh = ofp_get_next_hop(vrf, ifaddr, NULL); + assert_next_hop(nh, 0, port, vlan); + + dev = ofp_get_ifnet(port, vlan1); + assert_dev(dev, port, vlan1, vrf1, ifaddr1, ifmtu, masklen1, bcast1, + link_local); + nh = ofp_get_next_hop(vrf1, ifaddr1, NULL); + assert_next_hop(nh, 0, port, vlan1); + + res = ofp_config_interface_down(port, vlan); + CU_ASSERT_PTR_NULL_FATAL(res); + res = ofp_config_interface_down(port, vlan1); + CU_ASSERT_PTR_NULL_FATAL(res); + + dev = ofp_get_ifnet(port, vlan); + assert_dev(dev, port, vlan, vrf, 0, ifmtu, masklen, bcast, link_local); + nh = ofp_get_next_hop(vrf, ifaddr, NULL); + CU_ASSERT_PTR_NULL(nh); + + dev = ofp_get_ifnet(port, vlan1); + CU_ASSERT_PTR_NULL_FATAL(dev); + nh = ofp_get_next_hop(vrf1, ifaddr1, NULL); + CU_ASSERT_PTR_NULL(nh); +} + +static void +test_gre_port(void) +{ + int port = 0; + uint16_t vlan = 10; + uint16_t vrf = 1; + uint32_t ifaddr = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ + int masklen = 24, gre_ml = 32; + uint16_t greid = 100; + uint32_t greaddr = 0x010A0A0A; + uint32_t grep2p = 0x020A0A0A; + struct ofp_ifnet *dev; + struct ofp_nh_entry *nh; + const char *res; + + res = ofp_config_interface_up_v4(port, vlan, vrf, ifaddr, masklen); + CU_ASSERT_PTR_NULL_FATAL(res); + + /* Non-existent endpoint in vrf */ + res = ofp_config_interface_up_tun(GRE_PORTS, greid, vrf + 1, ifaddr, + ifaddr + 1, greaddr, grep2p, + gre_ml); + CU_ASSERT_PTR_NOT_NULL_FATAL(res); + dev = ofp_get_ifnet(GRE_PORTS, greid); + CU_ASSERT_PTR_NULL_FATAL(dev); + + /* Successful test */ + res = ofp_config_interface_up_tun(GRE_PORTS, greid, vrf, ifaddr, + ifaddr + 1, grep2p, greaddr, + gre_ml); + CU_ASSERT_PTR_NULL_FATAL(res); + dev = ofp_get_ifnet(GRE_PORTS, greid); + CU_ASSERT_PTR_NOT_NULL_FATAL(dev); + CU_ASSERT_EQUAL(dev->ip_local, ifaddr); + CU_ASSERT_EQUAL(dev->ip_remote, ifaddr + 1); + CU_ASSERT_EQUAL(dev->ip_addr, greaddr); + CU_ASSERT_EQUAL(dev->ip_p2p, grep2p); + CU_ASSERT_EQUAL(dev->masklen, gre_ml); + CU_ASSERT_EQUAL(dev->if_mtu, ifmtu - 24); + + nh = ofp_get_next_hop(vrf, grep2p, NULL); + assert_next_hop(nh, 0, GRE_PORTS, greid); + + res = ofp_config_interface_down(port, vlan); + CU_ASSERT_PTR_NULL_FATAL(res); + res = ofp_config_interface_down(GRE_PORTS, greid); + CU_ASSERT_PTR_NULL_FATAL(res); + dev = ofp_get_ifnet(GRE_PORTS, greid); + CU_ASSERT_PTR_NULL_FATAL(dev); +} + +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp port config", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_sinlge_port_basic)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_two_ports_vlan)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_gre_port)) { + CU_cleanup_registry(); + return CU_get_error(); + } + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-Port-conf"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/ofp_test_stat.c b/test/cunit/ofp_test_stat.c new file mode 100644 index 00000000..be9e2d75 --- /dev/null +++ b/test/cunit/ofp_test_stat.c @@ -0,0 +1,126 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include "../../src/ofp_stat.c" + +/* + * INIT + */ +static int +init_suite(void) +{ + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + ofp_stat_alloc_shared_memory(); + + odp_shm_print_all(); + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +/* + * Testcases + */ +static void +test_ofp_stat_lookup_shared_memory(void) +{ + ofp_stat_lookup_shared_memory(); + /* ODP_ABORT if problem happens */ + + CU_PASS("Stat shm lookup successful"); +} + +static void +test_packet_statistics(void) +{ + struct ofp_packet_stat *st; + + st = ofp_get_packet_statistics(); + CU_ASSERT_EQUAL(st->per_core[odp_cpu_id()].rx_fp, 0); + + OFP_UPDATE_PACKET_STAT(rx_fp, 4); + CU_ASSERT_EQUAL(st->per_core[odp_cpu_id()].rx_fp, 4); +} + +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp stat", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_stat_lookup_shared_memory)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_packet_statistics)) { + CU_cleanup_registry(); + return CU_get_error(); + } + + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-Stat"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/ofp_test_util.c b/test/cunit/ofp_test_util.c new file mode 100644 index 00000000..681d880b --- /dev/null +++ b/test/cunit/ofp_test_util.c @@ -0,0 +1,517 @@ +/* Copyright (c) 2014, Nokia + * Copyright (c) 2014, ENEA Software AB + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef OFP_TESTMODE_AUTO +#define OFP_TESTMODE_AUTO 1 +#endif + +#include +#include +#include +#include + +#if OFP_TESTMODE_AUTO +#include +#else +#include +#endif + +#include +#include +#include "ofpi_log.h" +#include "../../src/ofp_util.c" +#include "fragmented_packet.h" +#include "cksum_packets.h" + +/* + * Test data + */ +char testFileName[] = "testbuf.txt"; +uint32_t ipaddr = 0x650AA8C0; /* C0.A8.0A.65 = 192.168.10.101 */ +uint8_t ip6addr[16] = { +0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, +0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef +}; +uint8_t macaddr[6] = { 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA }; + +/* + * INIT + */ +#define SHM_PKT_POOL_SIZE (32*2048) +#define SHM_PKT_POOL_SEG_SIZE 1856 +#define SHM_PKT_POOL_BUF_SIZE 3000 + +static int +init_suite(void) +{ + odp_pool_param_t pool_params; + odp_pool_t pool; + + (void)pkt1_frag1; + (void)pkt1_frag2; + + /* Init ODP before calling anything else */ + if (odp_init_global(NULL, NULL)) { + OFP_ERR("Error: ODP global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local()) { + OFP_ERR("Error: ODP local init failed.\n"); + return -1; + } + + + pool_params.pkt.seg_len = SHM_PKT_POOL_SEG_SIZE; + pool_params.pkt.len = SHM_PKT_POOL_BUF_SIZE; + pool_params.pkt.num = SHM_PKT_POOL_SIZE/SHM_PKT_POOL_BUF_SIZE; + pool_params.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet_pool", ODP_SHM_NULL, + &pool_params); + if (pool == ODP_POOL_INVALID) { + OFP_ERR("Error: packet pool create failed.\n"); + return -1; + } + + odp_pool_print(pool); + + return 0; +} + +static int +clean_suite(void) +{ + return 0; +} + +/* + * Helpers + */ +#define fail_with_odp(msg) do { OFP_ERR(msg); CU_FAIL(msg); } while (0) + +static int +create_odp_packet_ip4(odp_packet_t *opkt, uint8_t *pkt_data, int plen) +{ + odp_pool_t pool; + odp_packet_t pkt = ODP_PACKET_INVALID; + + pool = odp_pool_lookup("packet_pool"); + if (pool == ODP_POOL_INVALID) { + fail_with_odp("ODP packet_pool not found\n"); + return -1; + } + + pkt = odp_packet_alloc(pool, plen); + if (pkt == ODP_PACKET_INVALID) { + fail_with_odp("ODP packet alloc failed"); + return -1; + } + + if (odp_packet_copydata_in(pkt, 0, plen, pkt_data) < 0) { + fail_with_odp("Packet data copy failed\n"); + return -1; + }; + + odp_packet_has_eth_set(pkt, 1); + odp_packet_has_ipv4_set(pkt, 1); + odp_packet_l2_offset_set(pkt, 0); + odp_packet_l3_offset_set(pkt, OFP_ETHER_HDR_LEN); + + *opkt = pkt; + + return 0; +} + +/* + * Testcases + */ +static void +test_ofp_ofp_in_cksum__ip4_addr(void) +{ + uint16_t res = ofp_in_cksum((uint16_t *)&ipaddr, sizeof(ipaddr)); + + CU_ASSERT_EQUAL(res, 0xF234); +} + +static void +test_ofp_in_cksum_odd_len_icmp(void) +{ + odp_packet_t pkt; + struct ofp_ip *ip; + struct ofp_icmp *icmp; + uint16_t res, ip_hl; + + if (create_odp_packet_ip4(&pkt, odd_len_icmp, sizeof(odd_len_icmp))) { + CU_FAIL("Fail to create packet"); + return; + } + + ip = odp_packet_l3_ptr(pkt, NULL); + ip_hl = ip->ip_hl << 2; + icmp = (struct ofp_icmp *)((uint8_t *)ip + ip_hl); + icmp->icmp_cksum = 0; + + res = ofp_in_cksum((uint16_t *)icmp, + odp_be_to_cpu_16(ip->ip_len) - ip_hl); + + CU_ASSERT_EQUAL(res, 0x84F7); +} + +static void +test___ofp_cksum(void) +{ + odp_packet_t pkt; + struct ofp_ip *ip; + struct ofp_icmp *icmp; + uint16_t res, ip_hl; + + if (create_odp_packet_ip4(&pkt, pkt1_full, sizeof(pkt1_full))) { + CU_FAIL("Fail to create packet"); + return; + } + + ip = odp_packet_l3_ptr(pkt, NULL); + ip_hl = ip->ip_hl << 2; + icmp = (struct ofp_icmp *)((uint8_t *)ip + ip_hl); + icmp->icmp_cksum = 0; + + res = __ofp_cksum(pkt, + odp_packet_l3_offset(pkt) + ip_hl, + odp_be_to_cpu_16(ip->ip_len) - ip_hl); + + CU_ASSERT_EQUAL(res, 0xA8ED); +} + +static void +test_ofp_cksum(void) +{ + odp_packet_t pkt; + struct ofp_ip *ip; + struct ofp_icmp *icmp; + uint16_t res, ip_hl; + + if (create_odp_packet_ip4(&pkt, pkt1_full, sizeof(pkt1_full))) { + CU_FAIL("Fail to create packet"); + return; + } + + ip = odp_packet_l3_ptr(pkt, NULL); + ip_hl = ip->ip_hl << 2; + icmp = (struct ofp_icmp *)((uint8_t *)ip + ip_hl); + icmp->icmp_cksum = 0; + + res = ofp_cksum(pkt, + odp_packet_l3_offset(pkt) + ip_hl, + odp_be_to_cpu_16(ip->ip_len) - ip_hl); + + CU_ASSERT_EQUAL(res, 0x5712); +} + +static void +test___ofp_in4_cksum(void) +{ + odp_packet_t pkt; + struct ofp_ip *ip; + struct ofp_udphdr *udp; + uint16_t res, ip_hl; + + if (create_odp_packet_ip4(&pkt, udp_packet, sizeof(udp_packet))) { + CU_FAIL("Fail to create packet"); + return; + } + + ip = odp_packet_l3_ptr(pkt, NULL); + ip_hl = ip->ip_hl << 2; + udp = (struct ofp_udphdr *)((uint8_t *)ip + ip_hl); + udp->uh_sum = 0; + + res = ofp_in4_cksum(pkt); + + CU_ASSERT_EQUAL(res, 0x4d2d); +} + +static void +test_ofp_in4_cksum(void) +{ + odp_packet_t pkt; + struct ofp_ip *ip; + struct ofp_udphdr *udp; + uint16_t res, ip_hl; + + if (create_odp_packet_ip4(&pkt, udp_packet, sizeof(udp_packet))) { + CU_FAIL("Fail to create packet"); + return; + } + + ip = odp_packet_l3_ptr(pkt, NULL); + ip_hl = ip->ip_hl << 2; + udp = (struct ofp_udphdr *)((uint8_t *)ip + ip_hl); + udp->uh_sum = 0; + + res = ofp_in4_cksum(pkt); + + CU_ASSERT_EQUAL(res, 0x4d2d); +} + +static void +test_ofp_print_mac(void) +{ + char *res = ofp_print_mac(macaddr); + + CU_ASSERT_STRING_EQUAL(res, " ff:ee:dd:cc:bb:aa"); +} + +static void +test_ofp_print_ip_addr(void) +{ + char *res = ofp_print_ip_addr(ipaddr); + + CU_ASSERT_STRING_EQUAL(res, "192.168.10.101"); +} + +static void +test_ofp_print_ip6_addr(void) +{ + char *res = ofp_print_ip6_addr(ip6addr); + + CU_ASSERT_STRING_EQUAL(res, "dead:beef:dead:beef:dead:beef:dead:beef"); +} + +static void +test_ofp_hex_to_num(void) +{ + int res; + char str[] = "08F7e0"; + + res = ofp_hex_to_num(str); + CU_ASSERT_EQUAL(res, 0x8F7e0); +} + +static void +test_ofp_mac_to_link_local(void) +{ + uint8_t linklocal[16]; + uint8_t ref[16] = { 0xfe, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xfd, 0xee, 0xdd, 0xff, 0xfe, 0xcc, 0xbb, 0xaa }; + + ofp_mac_to_link_local(macaddr, linklocal); + + if (memcmp(linklocal, ref, 16)) + CU_FAIL("memcmp failed") + else + CU_PASS("memcmp") +} + +static void +test_ofp_ip6_masklen_to_mask(void) +{ + uint8_t mask[16]; + uint8_t ref[16] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + + ofp_ip6_masklen_to_mask(61, mask); + + if (memcmp(mask, ref, 16)) + CU_FAIL("memcmp failed") + else + CU_PASS("memcmp") +} + +static void +test_ofp_mask_length(void) +{ + int res; + unsigned long mask = 0xFFFFFC00; /* L.E.: 00 FC FF FF */ + + res = ofp_mask_length(32, (uint8_t *)&mask); + + CU_ASSERT_EQUAL(res, 22); +} + +static void +test_ofp_name_to_port_vlan(void) +{ + char devname[] = "fp4.100"; + int port, vlan; + + port = ofp_name_to_port_vlan(devname, &vlan); + + CU_ASSERT_EQUAL(port, 4); + CU_ASSERT_EQUAL(vlan, 100); + + strcpy(devname, "gre101"); + port = ofp_name_to_port_vlan(devname, &vlan); + + CU_ASSERT_EQUAL(port, GRE_PORTS); + CU_ASSERT_EQUAL(vlan, 101); + +} + +static void +test_ofp_port_vlan_to_ifnet_name(void) +{ + int port = 4; + int vlan = 100; + char *res = (char *)malloc(20); + + memset(res, 0x0, 20); + + res = ofp_port_vlan_to_ifnet_name(port, vlan); + + CU_ASSERT_STRING_EQUAL(res, "fp4.100"); +} + +static void +test_ofp_sendf(void) +{ +#define BUFLEN 15 + char res[BUFLEN]; + int fd, l; + FILE *f; + + memset(res, 0x0, BUFLEN); + + fd = open(testFileName, + O_WRONLY | O_CREAT | O_TRUNC, + S_IWRITE | S_IREAD); + l = ofp_sendf(fd, "%s %d", "Hello ODP!", 0xA); + close(fd); + + CU_ASSERT_EQUAL(l, 13); + + f = fopen(testFileName, "r"); + if (fgets(res, BUFLEN, f) != NULL) + CU_ASSERT_STRING_EQUAL(res, "Hello ODP! 10") + else + CU_FAIL("Cannot read output file.") + + fclose(f); +#undef BUFLEN +} + +static void test_ofp_has_mac(void) +{ + int res; + uint8_t empty[6]; + + memset(empty, 0, sizeof(empty)); + res = ofp_has_mac(empty); + CU_ASSERT_EQUAL(res, 0); + + res = ofp_has_mac(macaddr); + CU_ASSERT_EQUAL(res, 1); +} + +/* + * Main + */ +int +main(void) +{ + CU_pSuite ptr_suite = NULL; + int nr_of_failed_tests = 0; + int nr_of_failed_suites = 0; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* add a suite to the registry */ + ptr_suite = CU_add_suite("ofp util", init_suite, clean_suite); + if (NULL == ptr_suite) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_ofp_in_cksum__ip4_addr)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_in_cksum_odd_len_icmp)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test___ofp_cksum)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_cksum)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test___ofp_in4_cksum)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_in4_cksum)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_print_mac)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_print_ip_addr)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_print_ip6_addr)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_hex_to_num)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_mac_to_link_local)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_ip6_masklen_to_mask)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_mask_length)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_name_to_port_vlan)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, + test_ofp_port_vlan_to_ifnet_name)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_sendf)) { + CU_cleanup_registry(); + return CU_get_error(); + } + if (NULL == CU_ADD_TEST(ptr_suite, test_ofp_has_mac)) { + CU_cleanup_registry(); + return CU_get_error(); + } + +#if OFP_TESTMODE_AUTO + CU_set_output_filename("CUnit-Util"); + CU_automated_run_tests(); +#else + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); +#endif + + nr_of_failed_tests = CU_get_number_of_tests_failed(); + nr_of_failed_suites = CU_get_number_of_suites_failed(); + CU_cleanup_registry(); + + return (nr_of_failed_suites > 0 ? + nr_of_failed_suites : nr_of_failed_tests); +} diff --git a/test/cunit/test_raw_frames.h b/test/cunit/test_raw_frames.h new file mode 100644 index 00000000..f4f7789e --- /dev/null +++ b/test/cunit/test_raw_frames.h @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2014 ENEA Software AB + * Copyright (c) 2014 Nokia + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef __TEST_RAW_FRAMES_H__ +#define __TEST_RAW_FRAMES_H__ + +/* Frame (106 bytes) */ +static uint8_t tcp_frame[106] = { +0x08, 0x00, 0x27, 0xae, 0x3e, 0xd3, 0x08, 0x00, /* ..'.>... */ +0x27, 0x00, 0xa8, 0x1e, 0x08, 0x00, 0x45, 0x00, /* '.....E. */ +0x00, 0x5c, 0x00, 0x93, 0x40, 0x00, 0x80, 0x06, /* .\..@... */ +0x07, 0xed, 0xc0, 0xa8, 0x38, 0x65, 0xc0, 0xa8, /* ....8e.. */ +0x38, 0x66, 0xd1, 0x9e, 0x00, 0x16, 0x3f, 0xc9, /* 8f....?. */ +0x7a, 0x8a, 0xee, 0x16, 0x51, 0xe9, 0x50, 0x18, /* z...Q.P. */ +0x3f, 0xff, 0xb5, 0xf0, 0x00, 0x00, 0xb3, 0x3a, /* ?......: */ +0x5c, 0xa0, 0x8e, 0x61, 0xff, 0x00, 0xd9, 0xbd, /* \..a.... */ +0x20, 0x52, 0x08, 0xd1, 0xf9, 0xcc, 0x5b, 0xc8, /* R....[. */ +0x18, 0x1d, 0xee, 0x01, 0xd6, 0x34, 0x61, 0xf8, /* .....4a. */ +0xe2, 0x74, 0x5a, 0xd0, 0x16, 0x8f, 0x30, 0x63, /* .tZ...0c */ +0x34, 0x9a, 0xdd, 0x49, 0x5c, 0x16, 0x0f, 0x2c, /* 4..I\.., */ +0xab, 0xd6, 0x04, 0x79, 0xf4, 0xdb, 0xe4, 0xd7, /* ...y.... */ +0x3c, 0x22 /* <" */ +}; + +/* Frame (74 bytes) */ +static uint8_t icmp_frame[74] = { +0x08, 0x00, 0x27, 0xae, 0x3e, 0xd3, 0x08, 0x00, /* ..'.>... */ +0x27, 0x00, 0xa8, 0x1e, 0x08, 0x00, 0x45, 0x00, /* '.....E. */ +0x00, 0x3c, 0x00, 0x95, 0x00, 0x00, 0x80, 0x01, /* .<...... */ +0x48, 0x10, 0xc0, 0xa8, 0x38, 0x65, 0xc0, 0xa8, /* H...8e.. */ +0x38, 0x66, 0x08, 0x00, 0x4d, 0x1e, 0x00, 0x01, /* 8f..M... */ +0x00, 0x3d, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, /* .=abcdef */ +0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, /* ghijklmn */ +0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, /* opqrstuv */ +0x77, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* wabcdefg */ +0x68, 0x69 /* hi */ +}; + +/* Frame (42 bytes) */ +static uint8_t arp_frame[42] = { +0x08, 0x00, 0x27, 0xae, 0x3e, 0xd3, 0x08, 0x00, /* ..'.>... */ +0x27, 0x00, 0xa8, 0x1e, 0x08, 0x06, 0x00, 0x01, /* '....... */ +0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0x08, 0x00, /* ........ */ +0x27, 0x00, 0xa8, 0x1e, 0xc0, 0xa8, 0x38, 0x65, /* '.....8e */ +0x08, 0x00, 0x27, 0xae, 0x3e, 0xd3, 0xc0, 0xa8, /* ..'.>... */ +0x38, 0x66 /* 8f */ +}; + +/* Frame (98 bytes) */ +static uint8_t ip6udp_frame[98] = { +0x33, 0x33, 0x00, 0x00, 0x00, 0xfb, 0x00, 0x22, /* 33....." */ +0x68, 0x0f, 0xba, 0x87, 0x86, 0xdd, 0x60, 0x00, /* h.....`. */ +0x00, 0x00, 0x00, 0x2c, 0x11, 0xff, 0xfe, 0x80, /* ...,.... */ +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x22, /* ......." */ +0x68, 0xff, 0xfe, 0x0f, 0xba, 0x87, 0xff, 0x02, /* h....... */ +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ........ */ +0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x14, 0xe9, /* ........ */ +0x14, 0xe9, 0x00, 0x2c, 0x48, 0x45, 0x00, 0x00, /* ...,HE.. */ +0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, /* ........ */ +0x00, 0x00, 0x07, 0x5f, 0x70, 0x6c, 0x61, 0x73, /* ..._plas */ +0x6d, 0x61, 0x04, 0x5f, 0x74, 0x63, 0x70, 0x05, /* ma._tcp. */ +0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x00, 0x00, 0x0c, /* local... */ +0x00, 0x01 /* .. */ +}; + +/* Frame (78 bytes) */ +static uint8_t icmp6_frame[78] = { +0x33, 0x33, 0xff, 0x50, 0x54, 0xd7, 0xd0, 0x67, /* 33.PT..g */ +0xe5, 0x30, 0x06, 0xad, 0x86, 0xdd, 0x60, 0x00, /* .0....`. */ +0x00, 0x00, 0x00, 0x18, 0x3a, 0xff, 0x00, 0x00, /* ....:... */ +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ........ */ +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x02, /* ........ */ +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ........ */ +0x00, 0x01, 0xff, 0x50, 0x54, 0xd7, 0x87, 0x00, /* ...PT... */ +0x54, 0x6c, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x80, /* Tl...... */ +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x1b, /* ........ */ +0xdd, 0x4f, 0xdb, 0x50, 0x54, 0xd7 /* .O.PT. */ +}; + +#endif /* __TEST_RAW_FRAMES_H__ */