-
Notifications
You must be signed in to change notification settings - Fork 690
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
redhat: add udev/systemd/etc infrastructure bits
Red Hat has been shipping an "rdma" package, as well as it's own systemd unit files for some daemons for a while now, in both Fedora and Red Hat Enterprise Linux. Some of these are fairly RH-specific, but might be of use to others, so we'd like to move them into the upstream source tree. Most of these were authored by Doug Ledford, though I'm currently the one that maintains (most of) them in RHEL. CC: Doug Ledford <dledford@redhat.com> Signed-off-by: Jarod Wilson <jarod@redhat.com> Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
- Loading branch information
1 parent
8df5873
commit 39fa824
Showing
19 changed files
with
1,367 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[Unit] | ||
Description=Starts the InfiniBand Address Cache Manager daemon | ||
Documentation=man:ibacm | ||
Requires=rdma.service | ||
After=rdma.service opensm.service | ||
|
||
[Service] | ||
Type=forking | ||
ExecStart=/usr/sbin/ibacm | ||
|
||
[Install] | ||
WantedBy=network.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Load IPoIB | ||
IPOIB_LOAD=yes | ||
# Load SRP (SCSI Remote Protocol initiator support) module | ||
SRP_LOAD=yes | ||
# Load SRPT (SCSI Remote Protocol target support) module | ||
SRPT_LOAD=yes | ||
# Load iSER (iSCSI over RDMA initiator support) module | ||
ISER_LOAD=yes | ||
# Load iSERT (iSCSI over RDMA target support) module | ||
ISERT_LOAD=yes | ||
# Load RDS (Reliable Datagram Service) network protocol | ||
RDS_LOAD=no | ||
# Load NFSoRDMA client transport module | ||
XPRTRDMA_LOAD=yes | ||
# Load NFSoRDMA server transport module | ||
SVCRDMA_LOAD=no | ||
# Load Tech Preview device driver modules | ||
TECH_PREVIEW_LOAD=no | ||
# Should we modify the system mtrr registers? We may need to do this if you | ||
# get messages from the ib_ipath driver saying that it couldn't enable | ||
# write combining for the PIO buffs on the card. | ||
# | ||
# Note: recent kernels should do this for us, but in case they don't, we'll | ||
# leave this option | ||
FIXUP_MTRR_REGS=no |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
install cxgb3 /sbin/modprobe --ignore-install cxgb3 $CMDLINE_OPTS && /sbin/modprobe iw_cxgb3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
install cxgb4 /sbin/modprobe --ignore-install cxgb4 $CMDLINE_OPTS && /sbin/modprobe iw_cxgb4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
# This is a simple script that checks the contents of /proc/mtrr to see if | ||
# the BIOS maker for the computer took the easy way out in terms of | ||
# specifying memory regions when there is a hole below 4GB for PCI access | ||
# and the machine has 4GB or more of RAM. When the contents of /proc/mtrr | ||
# show a 4GB mapping of write-back cached RAM, minus punch out hole(s) of | ||
# uncacheable regions (the area reserved for PCI access), then it becomes | ||
# impossible for the ib_ipath driver to set write_combining on its PIO | ||
# buffers. To correct the problem, remap the lower memory region in various | ||
# chunks up to the start of the punch out hole(s), then delete the punch out | ||
# hole(s) entirely as they aren't needed any more. That way, ib_ipath will | ||
# be able to set write_combining on its PIO memory access region. | ||
|
||
BEGIN { | ||
regs = 0 | ||
} | ||
|
||
function check_base(mem) | ||
{ | ||
printf "Base memory data: base=0x%08x, size=0x%x\n", base[mem], size[mem] > "/dev/stderr" | ||
if (size[mem] < (512 * 1024 * 1024)) | ||
return 0 | ||
if (type[mem] != "write-back") | ||
return 0 | ||
if (base[mem] >= (4 * 1024 * 1024 * 1024)) | ||
return 0 | ||
return 1 | ||
} | ||
|
||
function check_hole(hole) | ||
{ | ||
printf "Hole data: base=0x%08x, size=0x%x\n", base[hole], size[hole] > "/dev/stderr" | ||
if (size[hole] > (1 * 1024 * 1024 * 1024)) | ||
return 0 | ||
if (type[hole] != "uncachable") | ||
return 0 | ||
if ((base[hole] + size[hole]) > (4 * 1024 * 1024 * 1024)) | ||
return 0 | ||
return 1 | ||
} | ||
|
||
function build_entries(start, end, new_base, new_size, tmp_base) | ||
{ | ||
# mtrr registers require alignment of blocks, so a 256MB chunk must | ||
# be 256MB aligned. Additionally, all blocks must be a power of 2 | ||
# in size. So, do the largest power of two size that we can and | ||
# still have start + block <= end, rinse and repeat. | ||
tmp_base = start | ||
do { | ||
new_base = tmp_base | ||
new_size = 4096 | ||
while (((new_base + new_size) < end) && | ||
((new_base % new_size) == 0)) | ||
new_size = lshift(new_size, 1) | ||
if (((new_base + new_size) > end) || | ||
((new_base % new_size) != 0)) | ||
new_size = rshift(new_size, 1) | ||
printf "base=0x%x size=0x%x type=%s\n", | ||
new_base, new_size, type[mem] > "/dev/stderr" | ||
printf "base=0x%x size=0x%x type=%s\n", | ||
new_base, new_size, type[mem] > "/proc/mtrr" | ||
fflush("") | ||
tmp_base = new_base + new_size | ||
} while (tmp_base < end) | ||
} | ||
|
||
{ | ||
gsub("^reg", "") | ||
gsub(": base=", " ") | ||
gsub(" [(].*), size=", " ") | ||
gsub(": ", " ") | ||
gsub(", count=.*$", "") | ||
register[regs] = strtonum($1) | ||
base[regs] = strtonum($2) | ||
size[regs] = strtonum($3) | ||
human_size[regs] = size[regs] | ||
if (match($3, "MB")) { size[regs] *= 1024*1024; mult[regs] = "MB" } | ||
else { size[regs] *= 1024; mult[regs] = "KB" } | ||
type[regs] = $4 | ||
enabled[regs] = 1 | ||
end[regs] = base[regs] + size[regs] | ||
regs++ | ||
} | ||
|
||
END { | ||
# First we need to find our base memory region. We only care about | ||
# the memory register that starts at base 0. This is the only one | ||
# that we can reliably know is our global memory region, and the | ||
# only one that we can reliably check against overlaps. It's entirely | ||
# possible that any memory region not starting at 0 and having an | ||
# overlap with another memory region is in fact intentional and we | ||
# shouldn't touch it. | ||
for(i=0; i<regs; i++) | ||
if (base[i] == 0) | ||
break | ||
# Did we get a valid base register? | ||
if (i == regs) | ||
exit 1 | ||
mem = i | ||
if (!check_base(mem)) | ||
exit 1 | ||
|
||
cur_hole = 0 | ||
for(i=0; i<regs; i++) { | ||
if (i == mem) | ||
continue | ||
if (base[i] < end[mem] && check_hole(i)) | ||
holes[cur_hole++] = i | ||
} | ||
if (cur_hole == 0) { | ||
print "Nothing to do" > "/dev/stderr" | ||
exit 1 | ||
} | ||
printf "Found %d punch-out holes\n", cur_hole > "/dev/stderr" | ||
|
||
# We need to sort the holes according to base address | ||
for(j = 0; j < cur_hole - 1; j++) { | ||
for(i = cur_hole - 1; i > j; i--) { | ||
if(base[holes[i]] < base[holes[i-1]]) { | ||
tmp = holes[i] | ||
holes[i] = holes[i-1] | ||
holes[i-1] = tmp | ||
} | ||
} | ||
} | ||
# OK, the common case would be that the BIOS is mapping holes out | ||
# of the 4GB memory range, and that our hole(s) are consecutive and | ||
# that our holes and our memory region end at the same place. However, | ||
# things like machines with 8GB of RAM or more can foul up these | ||
# common traits. | ||
# | ||
# So, our modus operandi is to disable all of the memory/hole regions | ||
# to start, then build new base memory zones that in the end add | ||
# up to the same as our original zone minus the holes. We know that | ||
# we will never have a hole listed here that belongs to a valid | ||
# hole punched in a write-combining memory region because you can't | ||
# overlay write-combining on top of write-back and we know our base | ||
# memory region is write-back, so in order for this hole to overlap | ||
# our base memory region it can't be also overlapping a write-combining | ||
# region. | ||
printf "disable=%d\n", register[mem] > "/dev/stderr" | ||
printf "disable=%d\n", register[mem] > "/proc/mtrr" | ||
fflush("") | ||
enabled[mem] = 0 | ||
for(i=0; i < cur_hole; i++) { | ||
printf "disable=%d\n", register[holes[i]] > "/dev/stderr" | ||
printf "disable=%d\n", register[holes[i]] > "/proc/mtrr" | ||
fflush("") | ||
enabled[holes[i]] = 0 | ||
} | ||
build_entries(base[mem], base[holes[0]]) | ||
for(i=0; i < cur_hole - 1; i++) | ||
if (base[holes[i+1]] > end[holes[i]]) | ||
build_entries(end[holes[i]], base[holes[i+1]]) | ||
if (end[mem] > end[holes[i]]) | ||
build_entries(end[holes[i]], end[mem]) | ||
# We changed up the mtrr regs, so signal to the rdma script to | ||
# reload modules that need the mtrr regs to be right. | ||
exit 0 | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
#!/bin/bash | ||
# Network Interface Configuration System | ||
# Copyright (c) 1996-2013 Red Hat, Inc. all rights reserved. | ||
# | ||
# This program is free software; you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License, version 2, | ||
# as published by the Free Software Foundation. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program; if not, write to the Free Software | ||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
|
||
. /etc/init.d/functions | ||
|
||
cd /etc/sysconfig/network-scripts | ||
. ./network-functions | ||
|
||
[ -f ../network ] && . ../network | ||
|
||
CONFIG=${1} | ||
|
||
source_config | ||
|
||
# Allow the user to override the detection of our physical device by passing | ||
# it in. No checking is done, if the user gives us a bogus dev, it's | ||
# their problem. | ||
[ -n "${PHYSDEV}" ] && REALDEVICE="$PHYSDEV" | ||
|
||
. /etc/sysconfig/network | ||
|
||
# Check to make sure the device is actually up | ||
check_device_down ${DEVICE} && exit 0 | ||
|
||
# If we are a P_Key device, we need to munge a few things | ||
if [ "${PKEY}" = yes ]; then | ||
[ -z "${PKEY_ID}" ] && { | ||
net_log $"InfiniBand IPoIB device: PKEY=yes requires a PKEY_ID" | ||
exit 1 | ||
} | ||
[ -z "${PHYSDEV}" ] && { | ||
net_log $"InfiniBand IPoIB device: PKEY=yes requires a PHYSDEV" | ||
exit 1 | ||
} | ||
# Normalize our PKEY_ID to have the high bit set | ||
NEW_PKEY_ID=`printf "0x%04x" $(( 0x8000 | ${PKEY_ID} ))` | ||
NEW_PKEY_NAME=`printf "%04x" ${NEW_PKEY_ID}` | ||
[ "${DEVICE}" != "${PHYSDEV}.${NEW_PKEY_NAME}" ] && { | ||
net_log $"Configured DEVICE name does not match what new device name would be. This | ||
is most likely because once the PKEY_ID was normalized, it no longer | ||
resulted in the expected device naming, and so the DEVICE entry in the | ||
config file needs to be updated to match. This can also be caused by | ||
giving PKEY_ID as a hex number but without using the mandatory 0x prefix. | ||
Configured DEVICE=$DEVICE | ||
Configured PHYSDEV=$PHYSDEV | ||
Configured PKEY_ID=$PKEY_ID | ||
Calculated PKEY_ID=$NEW_PKEY_ID | ||
Calculated name=${PHYSDEV}.${NEW_PKEY_NAME}" | ||
exit 1 | ||
} | ||
[ -d "/sys/class/net/${DEVICE}" ] || exit 0 | ||
# When we get to downing the IP address, we need REALDEVICE to | ||
# point to our PKEY device | ||
REALDEVICE="${DEVICE}" | ||
fi | ||
|
||
|
||
if [ "${SLAVE}" != "yes" -o -z "${MASTER}" ]; then | ||
if [ -n "${HWADDR}" -a -z "${MACADDR}" ]; then | ||
HWADDR=$(echo $HWADDR | tail -c 24) | ||
FOUNDMACADDR=$(get_hwaddr ${REALDEVICE} | tail -c 24) | ||
if [ -n "${FOUNDMACADDR}" -a "${FOUNDMACADDR}" != "${HWADDR}" ]; then | ||
NEWCONFIG=$(get_config_by_hwaddr ${FOUNDMACADDR}) | ||
if [ -n "${NEWCONFIG}" ]; then | ||
eval $(LANG=C grep -F "DEVICE=" $NEWCONFIG) | ||
else | ||
net_log $"Device ${DEVICE} has MAC address ${FOUNDMACADDR}, instead of configured address ${HWADDR}. Ignoring." | ||
exit 1 | ||
fi | ||
if [ -n "${NEWCONFIG}" -a "${NEWCONFIG##*/}" != "${CONFIG##*/}" -a "${DEVICE}" = "${REALDEVICE}" ]; then | ||
exec /sbin/ifdown ${NEWCONFIG} | ||
else | ||
net_log $"Device ${DEVICE} has MAC address ${FOUNDMACADDR}, instead of configured address ${HWADDR}. Ignoring." | ||
exit 1 | ||
fi | ||
fi | ||
fi | ||
fi | ||
|
||
if is_bonding_device ${DEVICE} ; then | ||
for device in $(LANG=C grep -l "^[[:space:]]*MASTER=\"\?${DEVICE}\"\?\([[:space:]#]\|$\)" /etc/sysconfig/network-scripts/ifcfg-*) ; do | ||
is_ignored_file "$device" && continue | ||
/sbin/ifdown ${device##*/} | ||
done | ||
for arg in $BONDING_OPTS ; do | ||
key=${arg%%=*}; | ||
[[ "${key}" != "arp_ip_target" ]] && continue | ||
value=${arg##*=}; | ||
if [ "${value:0:1}" != "" ]; then | ||
OLDIFS=$IFS; | ||
IFS=','; | ||
for arp_ip in $value; do | ||
if grep -q $arp_ip /sys/class/net/${DEVICE}/bonding/arp_ip_target; then | ||
echo "-$arp_ip" > /sys/class/net/${DEVICE}/bonding/arp_ip_target | ||
fi | ||
done | ||
IFS=$OLDIFS; | ||
else | ||
value=${value#+}; | ||
if grep -q $value /sys/class/net/${DEVICE}/bonding/arp_ip_target; then | ||
echo "-$value" > /sys/class/net/${DEVICE}/bonding/arp_ip_target | ||
fi | ||
fi | ||
done | ||
fi | ||
|
||
/etc/sysconfig/network-scripts/ifdown-ipv6 ${CONFIG} | ||
|
||
retcode=0 | ||
[ -n "$(pidof -x dhclient)" ] && { | ||
for VER in "" 6 ; do | ||
if [ -f "/var/run/dhclient$VER-${DEVICE}.pid" ]; then | ||
dhcpid=$(cat /var/run/dhclient$VER-${DEVICE}.pid) | ||
generate_lease_file_name $VER | ||
if [[ "$DHCPRELEASE" = [yY1]* ]]; then | ||
/sbin/dhclient -r -lf ${LEASEFILE} -pf /var/run/dhclient$VER-${DEVICE}.pid ${DEVICE} >/dev/null 2>&1 | ||
retcode=$? | ||
else | ||
kill $dhcpid >/dev/null 2>&1 | ||
retcode=$? | ||
reason=STOP$VER interface=${DEVICE} /sbin/dhclient-script | ||
fi | ||
if [ -f "/var/run/dhclient$VER-${DEVICE}.pid" ]; then | ||
rm -f /var/run/dhclient$VER-${DEVICE}.pid | ||
kill $dhcpid >/dev/null 2>&1 | ||
fi | ||
fi | ||
done | ||
} | ||
# we can't just delete the configured address because that address | ||
# may have been changed in the config file since the device was | ||
# brought up. Flush all addresses associated with this | ||
# instance instead. | ||
if [ -d "/sys/class/net/${REALDEVICE}" ]; then | ||
if [ "${REALDEVICE}" = "${DEVICE}" ]; then | ||
ip addr flush dev ${REALDEVICE} scope global 2>/dev/null | ||
else | ||
ip addr flush dev ${REALDEVICE} label ${DEVICE} scope global 2>/dev/null | ||
fi | ||
|
||
if [ "${SLAVE}" = "yes" -a -n "${MASTER}" ]; then | ||
echo "-${DEVICE}" > /sys/class/net/${MASTER}/bonding/slaves 2>/dev/null | ||
fi | ||
|
||
if [ "${REALDEVICE}" = "${DEVICE}" ]; then | ||
ip link set dev ${DEVICE} down 2>/dev/null | ||
fi | ||
fi | ||
[ "$retcode" = "0" ] && retcode=$? | ||
|
||
# wait up to 5 seconds for device to actually come down... | ||
waited=0 | ||
while ! check_device_down ${DEVICE} && [ "$waited" -lt 50 ] ; do | ||
usleep 10000 | ||
waited=$(($waited+1)) | ||
done | ||
|
||
if [ "$retcode" = 0 ] ; then | ||
/etc/sysconfig/network-scripts/ifdown-post $CONFIG | ||
# do NOT use $? because ifdown should return whether or not | ||
# the interface went down. | ||
fi | ||
|
||
if [ -n "$PKEY" ]; then | ||
# PKey PKEY | ||
echo "$NEW_PKEY_ID" > /sys/class/net/${PHYSDEV}/delete_child | ||
fi | ||
|
||
exit $retcode |
Oops, something went wrong.