Skip to content

Commit

Permalink
prov/efa: Improve efa_rdm_pke definition
Browse files Browse the repository at this point in the history
This removes the hardcoded padding bytes from the struct definition,
which were ostensibly used as a mechanism to enforce alignment of
wiredata to the 128 byte boundary.

While the static_asserts somewhat protect against changes made to the
members which the struct is composed of, the manual calculation is a
maintenance headache which is prone to developer error, as well as
implementation-defined quirks.

More importantly, this change more accurately indicates the intent to
align to the 128-byte boundary.

This commit also makes the struct size uniform when debug mode is
enabled. This wasn't (currently) necessary, as the 32 bytes of padding
were already sufficient for the 16-byte dlist_entry struct.

This is the C99-compliant version, unionizing the struct members
preceding wiredata with a 128-byte array. This is still prone to
changes which cause the struct to exceed 128 bytes. In that scenario,
the array would be increased to EFA_RDM_PKE_ALIGNMENT * 2. Naturally,
this method is slightly less maintainable than the C11-compliant
_Alignas solution.

Signed-off-by: Darryl Abbate <drl@amazon.com>
  • Loading branch information
darrylabbate committed Oct 24, 2023
1 parent 9f604d6 commit b479b5c
Showing 1 changed file with 111 additions and 142 deletions.
253 changes: 111 additions & 142 deletions prov/efa/src/rdm/efa_rdm_pke.h
Original file line number Diff line number Diff line change
@@ -1,35 +1,5 @@
/*
* Copyright (c) Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */
/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */

#ifndef _EFA_RDM_PKE_H
#define _EFA_RDM_PKE_H
Expand All @@ -40,10 +10,11 @@

#define EFA_RDM_PKE_IN_USE BIT_ULL(0) /**< this packet entry is being used */
#define EFA_RDM_PKE_RNR_RETRANSMIT BIT_ULL(1) /**< this packet entry encountered RNR and is being retransmitted*/
#define EFA_RDM_PKE_LOCAL_READ BIT_ULL(2) /**< this packet entry is used as context of a local read operation */
#define EFA_RDM_PKE_LOCAL_READ BIT_ULL(2) /**< this packet entry is used as context of a local read operation */
#define EFA_RDM_PKE_DC_LONGCTS_DATA BIT_ULL(3) /**< this DATA packet entry is used by a delivery complete LONGCTS send/write protocol*/
#define EFA_RDM_PKE_LOCAL_WRITE BIT_ULL(4) /**< this packet entry is used as context of an RDMA Write to self */
#define EFA_RDM_PKE_LOCAL_WRITE BIT_ULL(4) /**< this packet entry is used as context of an RDMA Write to self */

#define EFA_RDM_PKE_ALIGNMENT 128

/**
* @enum for packet entry allocation type
Expand Down Expand Up @@ -100,111 +71,112 @@ enum efa_rdm_pke_alloc_type {
* using endpoint's read_copy_pkt_pool, whose memory was registered.
*/
struct efa_rdm_pke {
/**
* entry to the linked list of outstanding/queued packet entries
*
* `entry` is used for sending only.
* It is either linked to `peer->outstanding_tx_pkts` (after a packet has been successfully sent, but it get a completion),
* or linked to `ope->queued_pkts` (after it encountered RNR error completion).
*/
struct dlist_entry entry;
union {
struct {
/**
* entry to the linked list of outstanding/queued packet entries
*
* `entry` is used for sending only.
* It is either linked to `peer->outstanding_tx_pkts` (after a packet has been successfully sent, but it get a completion),
* or linked to `ope->queued_pkts` (after it encountered RNR error completion).
*/
struct dlist_entry entry;
#if ENABLE_DEBUG
/** @brief entry to a linked list of posted buf list */
struct dlist_entry dbg_entry;
uint8_t pad[112];
/** @brief entry to a linked list of posted buf list */
struct dlist_entry dbg_entry;
#endif
/** @brief pointer to #efa_rdm_ep */
struct efa_rdm_ep *ep;

/** @brief pointer to #efa_rdm_ope */
struct efa_rdm_ope *ope;

/** @brief number of bytes sent/received over wire */
size_t pkt_size;

/**
* @brief memory registration
*
* @details
* If this packet is used by EFA device, `mr` the memory registration of wiredata over the EFA device.
* If this packet is used by SHM, `mr` is NULL because SHM does not require memory registration
*
* @todo
* Use type `struct ibv_mr` instead of `struct fid_mr` for this field
*/
struct fid_mr *mr;
/**
* @brief peer address
*
* @details
* When sending a packet, `addr` will be provided by application and it cannot be FI_ADDR_NOTAVAIL.
* However, after a packet is sent, application can remove a peer by calling fi_av_remove().
* When removing the peering, `addr` will be set to FI_ADDR_NOTAVAIL. Later, when device report
* completion for such a TX packet, the TX completion will be ignored.
*
* When receiving a packet, lower device will set `addr`. If the sender's address is not in
* address vector (AV), `lower device will set `addr` to FI_ADDR_NOTAVAIL. This can happen in
* two scenarios:
*
* 1. There has been no prior communication with the peer. In this case, the packet should have
* peer's raw address in the header, and progress engine will insert the raw address into
* address vector, and update `addr`.
*
* 2. This packet is from a peer whose address has been removed from AV. In this case, the
* recived packet will be ignored because all resources associated with peer has been released.
*/
fi_addr_t addr;

/** @brief indicate where the memory of this packet entry reside */
enum efa_rdm_pke_alloc_type alloc_type;

/**
* @brief flags indicating the status of the packet entry
*
* @details
* Possible flags include #EFA_RDM_PKE_IN_USE #EFA_RDM_PKE_RNR_RETRANSMIT,
* #EFA_RDM_PKE_LOCAL_READ, and #EFA_RDM_PKE_DC_LONGCTS_DATA
*/
uint32_t flags;

/**
* @brief link multiple MEDIUM/RUNTREAD RTM with same
* message ID together
*
* @details
* used on receiver side only
*/
struct efa_rdm_pke *next;

/**
* @brief a buffer that contains actual user data that is going over wire
*
* @details
* "payload" points to either a location inside user's buffer,
* (when user's buffer is registered with EFA device), or
* a location in "wiredata" (where user data has been copied to).
* The EFA provider tries its best to avoid copy, but copy is not
* always avoidable.
*/
char *payload;

/**
* @brief memory regstration for user buffer
*
* @details
* payload_mr is same as mr, when payload is pointing to
* a location inside wiredata.
*/
struct fid_mr *payload_mr;

/**
* @brief size of payload buffer
*
*/
size_t payload_size;

uint8_t pad2[32];

/** @brief pointer to #efa_rdm_ep */
struct efa_rdm_ep *ep;

/** @brief pointer to #efa_rdm_ope */
struct efa_rdm_ope *ope;

/** @brief number of bytes sent/received over wire */
size_t pkt_size;

/**
* @brief memory registration
*
* @details
* If this packet is used by EFA device, `mr` the memory registration of wiredata over the EFA device.
* If this packet is used by SHM, `mr` is NULL because SHM does not require memory registration
*
* @todo
* Use type `struct ibv_mr` instead of `struct fid_mr` for this field
*/
struct fid_mr *mr;
/**
* @brief peer address
*
* @details
* When sending a packet, `addr` will be provided by application and it cannot be FI_ADDR_NOTAVAIL.
* However, after a packet is sent, application can remove a peer by calling fi_av_remove().
* When removing the peering, `addr` will be set to FI_ADDR_NOTAVAIL. Later, when device report
* completion for such a TX packet, the TX completion will be ignored.
*
* When receiving a packet, lower device will set `addr`. If the sender's address is not in
* address vector (AV), `lower device will set `addr` to FI_ADDR_NOTAVAIL. This can happen in
* two scenarios:
*
* 1. There has been no prior communication with the peer. In this case, the packet should have
* peer's raw address in the header, and progress engine will insert the raw address into
* address vector, and update `addr`.
*
* 2. This packet is from a peer whose address has been removed from AV. In this case, the
* recived packet will be ignored because all resources associated with peer has been released.
*/
fi_addr_t addr;

/** @brief indicate where the memory of this packet entry reside */
enum efa_rdm_pke_alloc_type alloc_type;

/**
* @brief flags indicating the status of the packet entry
*
* @details
* Possible flags include #EFA_RDM_PKE_IN_USE #EFA_RDM_PKE_RNR_RETRANSMIT,
* #EFA_RDM_PKE_LOCAL_READ, and #EFA_RDM_PKE_DC_LONGCTS_DATA
*/
uint32_t flags;

/**
* @brief link multiple MEDIUM/RUNTREAD RTM with same
* message ID together
*
* @details
* used on receiver side only
*/
struct efa_rdm_pke *next;

/**
* @brief a buffer that contains actual user data that is going over wire
*
* @details
* "payload" points to either a location inside user's buffer,
* (when user's buffer is registered with EFA device), or
* a location in "wiredata" (where user data has been copied to).
* The EFA provider tries its best to avoid copy, but copy is not
* always avoidable.
*/
char *payload;

/**
* @brief memory regstration for user buffer
*
* @details
* payload_mr is same as mr, when payload is pointing to
* a location inside wiredata.
*/
struct fid_mr *payload_mr;

/**
* @brief size of payload buffer
*
*/
size_t payload_size;
};
uint8_t _aligned[EFA_RDM_PKE_ALIGNMENT];
};
/** @brief buffer that contains data that is going over wire
*
* @details
Expand All @@ -225,12 +197,9 @@ struct efa_rdm_pke {
char wiredata[0];
};


#if defined(static_assert) && defined(__x86_64__)
#if ENABLE_DEBUG
static_assert(sizeof(struct efa_rdm_pke) == 256, "efa_rdm_pke check");
#else
static_assert(sizeof(struct efa_rdm_pke) == 128, "efa_rdm_pke check");
#endif
static_assert(sizeof (struct efa_rdm_pke) % EFA_RDM_PKE_ALIGNMENT == 0, "efa_rdm_pke alignment check");
#endif

struct efa_rdm_ep;
Expand Down

0 comments on commit b479b5c

Please sign in to comment.