Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement migration v2 #746

Merged
merged 42 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 41 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3d6d384
implement migration v2 in libvfio-user
w-henderson Jul 10, 2023
a70cbe9
fix: implementation fixes from comments
w-henderson Jul 12, 2023
2898d38
fix: more implementation changes for comments
w-henderson Jul 13, 2023
3aaaa70
fix: more fixes and responses to comments
w-henderson Jul 13, 2023
de676cf
fix: make migration data size 32-bit
w-henderson Jul 14, 2023
d7d7122
fix: further fixes in response to comments
w-henderson Jul 17, 2023
453f673
feat: begin implementing DMA dirty page logging (WIP)
w-henderson Jul 20, 2023
d317b9a
fix: bugs in dirty page logging
w-henderson Jul 20, 2023
1ce8931
fix: dirty page logging bug fixes
w-henderson Jul 21, 2023
29f91e6
fix: wrong size for dirty page bitmap
w-henderson Jul 21, 2023
e781a19
fix: prevent uninitialized return value
w-henderson Jul 24, 2023
3a8f13f
fix: don't try to log non-mapped regions
w-henderson Jul 31, 2023
1476fa2
fix: set argsz when getting dirty bitmap
w-henderson Jul 31, 2023
d5ecd25
fix: conflict with kernel definitions
w-henderson Aug 1, 2023
5d46895
fix: crash when not logging all DMA regions
w-henderson Aug 2, 2023
3e3f04b
fix: prevent uninitialised bytes being sent on error
w-henderson Aug 2, 2023
49701eb
fix: migration FSM transitions
w-henderson Aug 3, 2023
c2cc2e4
fix: error returns incorrectly not using ERROR_INT
w-henderson Aug 4, 2023
7b2a8cf
fix: make state transitions idempotent
w-henderson Aug 4, 2023
73765a7
fix: disallow STOP_COPY to PRE_COPY transition
w-henderson Aug 4, 2023
c40d16f
fix: out of bounds read
w-henderson Aug 7, 2023
cea84e7
refactor: respond to John's review
w-henderson Aug 9, 2023
96ef311
refactor: move transition definitions to migration.c
w-henderson Aug 10, 2023
f70dc75
fix: read semantics in comment
w-henderson Aug 10, 2023
baddc37
fix: allow custom page size in dma_controller_dirty_page_get
w-henderson Aug 10, 2023
ce6a6c2
fix: bug with return values of MIG_DATA_READ/WRITE
w-henderson Aug 16, 2023
6c8a772
refactor: device feature handling and minor changes
w-henderson Aug 17, 2023
4603183
fix: buffer overflow in DMA report generator
w-henderson Aug 17, 2023
9a917f6
revert dirty bitmap log message bugfix to put in new PR
w-henderson Aug 17, 2023
aa6cc13
fix: not checking bitmap size valid
w-henderson Aug 18, 2023
2750d9f
fix: argsz semantics in implementation
w-henderson Aug 24, 2023
26efa26
fix: clear uninitialized output on argsz error
w-henderson Aug 24, 2023
ab2e9fd
respond to John's review
w-henderson Aug 29, 2023
554c932
respond to Thanos's review
w-henderson Aug 29, 2023
977211f
changes from Thanos's review
w-henderson Aug 30, 2023
3c9dcab
improve variable naming and documentation
w-henderson Aug 30, 2023
1507e56
more fixes from Thanos's review
w-henderson Aug 30, 2023
f1927fa
refactor dirty page get
w-henderson Aug 30, 2023
2c98f1d
respond to more of Thanos's review
w-henderson Aug 30, 2023
9ce4988
minor changes from John's review
w-henderson Aug 31, 2023
fde85d6
introduce iov_free
w-henderson Aug 31, 2023
9ba1d7e
fix: migration quiesce and short write
w-henderson Sep 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 13 additions & 149 deletions include/libvfio-user.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,21 +583,8 @@ typedef enum {
VFU_MIGR_STATE_RESUME
} vfu_migr_state_t;

#define VFU_MIGR_CALLBACKS_VERS 1
#define VFU_MIGR_CALLBACKS_VERS 2

/*
* Callbacks during the pre-copy and stop-and-copy phases.
*
* The client executes the following steps to copy migration data:
*
* 1. get_pending_bytes: device must return amount of migration data
* 2. prepare_data: device must prepare migration data
* 3. read_data: device must provide migration data
*
* The client repeats the above steps until there is no more migration data to
* return (the device must return 0 from get_pending_bytes to indicate that
* there are no more migration data to be consumed in this iteration).
*/
typedef struct {

/*
Expand All @@ -615,152 +602,30 @@ typedef struct {
* FIXME maybe we should create a single callback and pass the state?
*/
int (*transition)(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state);

/* Callbacks for saving device state */

/*
* Function that is called to retrieve the amount of pending migration
* data. If migration data were previously made available (function
* prepare_data has been called) then calling this function signifies that
* they have been read (e.g. migration data can be discarded). If the
* function returns 0 then migration has finished and this function won't
* be called again.
*
* The amount of pending migration data returned by the device does not
* necessarily have to monotonically decrease over time and does not need
* to match the amount of migration data returned via the @size argument in
* prepare_data. It can completely fluctuate according to the needs of the
* device. These semantics are derived from the pending_bytes register in
* VFIO. Therefore the value returned by get_pending_bytes must be
* primarily regarded as boolean, either 0 or non-zero, as far as migration
* completion is concerned. More advanced vfio-user clients can make
* assumptions on how migration is progressing on devices that guarantee
* that the amount of pending migration data decreases over time.
*/
uint64_t (*get_pending_bytes)(vfu_ctx_t *vfu_ctx);

/*
* Function that is called to instruct the device to prepare migration data
* to be read when in pre-copy or stop-and-copy state, and to prepare for
* receiving migration data when in resuming state.
*
* When in pre-copy and stop-and-copy state, the function must return only
* after migration data are available at the specified offset. This
* callback is called once per iteration. The amount of data available
* pointed to by @size can be different that the amount of data returned by
* get_pending_bytes in the beginning of the iteration.
*
* In VFIO, the data_offset and data_size registers can be read multiple
* times during an iteration and are invariant, libvfio-user simplifies
* this by caching the values and returning them when read, guaranteeing
* that prepare_data() is called only once per migration iteration.
*
* When in resuming state, @offset must be set to where migration data must
* written. @size points to NULL.
*
* The callback should return -1 on error, setting errno.
*/
int (*prepare_data)(vfu_ctx_t *vfu_ctx, uint64_t *offset, uint64_t *size);


/*
* Function that is called to read migration data. offset and size can be
* any subrange on the offset and size previously returned by prepare_data.
* The function must return the amount of data read or -1 on error, setting
* errno.
* Function that is called to read `count` bytes of migration data into
* `buf`. The function must return the amount of data read or -1 on error,
* setting errno. The function may return less data than requested.
*
* This function can be called even if the migration data can be memory
* mapped.
* If the function returns zero, this is interpreted to mean that there is
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
* no more migration data to read.
*/
ssize_t (*read_data)(vfu_ctx_t *vfu_ctx, void *buf,
uint64_t count, uint64_t offset);

/* Callbacks for restoring device state */
ssize_t (*read_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count);
w-henderson marked this conversation as resolved.
Show resolved Hide resolved

/*
* Fuction that is called for writing previously stored device state. The
* Function that is called for writing previously stored device state. The
* function must return the amount of data written or -1 on error, setting
* errno.
*/
ssize_t (*write_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count,
uint64_t offset);

/*
* Function that is called when client has written some previously stored
* device state.
*
* The callback should return -1 on error, setting errno.
* errno. Partial writes are not supported, so any return value other than
* `count` is invalid.
*/
int (*data_written)(vfu_ctx_t *vfu_ctx, uint64_t count);
ssize_t (*write_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count);

} vfu_migration_callbacks_t;

/**
* The definition for VFIO_DEVICE_STATE_XXX differs with the version of vfio
* header file used. Some old systems wouldn't have these definitions. Some
* other newer systems would be using region based migration, and not
* have VFIO_DEVICE_STATE_V1_XXXX defined. The latest ones have
* VFIO_DEVICE_STATE_V1_XXXX defined. The following addresses all
* these scenarios.
*/
#if defined(VFIO_DEVICE_STATE_STOP)

_Static_assert(VFIO_DEVICE_STATE_STOP == 0,
"incompatible VFIO_DEVICE_STATE_STOP definition");

#define VFIO_DEVICE_STATE_V1_STOP VFIO_DEVICE_STATE_STOP
#define VFIO_DEVICE_STATE_V1_RUNNING VFIO_DEVICE_STATE_RUNNING
#define VFIO_DEVICE_STATE_V1_SAVING VFIO_DEVICE_STATE_SAVING
#define VFIO_DEVICE_STATE_V1_RESUMING VFIO_DEVICE_STATE_RESUMING

#elif !defined(VFIO_REGION_TYPE_MIGRATION_DEPRECATED) /* VFIO_DEVICE_STATE_STOP */

#define VFIO_DEVICE_STATE_V1_STOP (0)
#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0)
#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1)
#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2)
#define VFIO_DEVICE_STATE_MASK ((1 << 3) - 1)

#endif /* VFIO_REGION_TYPE_MIGRATION_DEPRECATED */

/*
* The currently defined migration registers; if using migration callbacks,
* these are handled internally by the library.
*
* This is analogous to struct vfio_device_migration_info.
*/
struct vfio_user_migration_info {
/* VFIO_DEVICE_STATE_* */
uint32_t device_state;
uint32_t reserved;
uint64_t pending_bytes;
uint64_t data_offset;
uint64_t data_size;
};

/*
* Returns the size of the area needed to hold the migration registers at the
* beginning of the migration region; guaranteed to be page aligned.
*/
size_t
vfu_get_migr_register_area_size(void);

/**
* vfu_setup_device_migration provides an abstraction over the migration
* protocol: the user specifies a set of callbacks which are called in response
* to client accesses of the migration region; the migration region read/write
* callbacks are not called after this function call. Offsets in callbacks are
* relative to @data_offset.
*
* @vfu_ctx: the libvfio-user context
* @callbacks: migration callbacks
* @data_offset: offset in the migration region where data begins.
*
* @returns 0 on success, -1 on error, sets errno.
*/
int
vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx,
const vfu_migration_callbacks_t *callbacks,
uint64_t data_offset);
const vfu_migration_callbacks_t *callbacks);

/**
* Triggers an interrupt.
Expand Down Expand Up @@ -903,7 +768,6 @@ enum {
VFU_PCI_DEV_ROM_REGION_IDX,
VFU_PCI_DEV_CFG_REGION_IDX,
VFU_PCI_DEV_VGA_REGION_IDX,
VFU_PCI_DEV_MIGR_REGION_IDX,
VFU_PCI_DEV_NUM_REGIONS,
};

Expand Down
105 changes: 87 additions & 18 deletions include/vfio-user.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ enum vfio_user_command {
VFIO_USER_DMA_READ = 11,
VFIO_USER_DMA_WRITE = 12,
VFIO_USER_DEVICE_RESET = 13,
VFIO_USER_DIRTY_PAGES = 14,
VFIO_USER_REGION_WRITE_MULTI = 15,
VFIO_USER_DEVICE_FEATURE = 16,
VFIO_USER_MIG_DATA_READ = 17,
VFIO_USER_MIG_DATA_WRITE = 18,
VFIO_USER_MAX,
};

Expand Down Expand Up @@ -202,31 +205,97 @@ typedef struct vfio_user_region_io_fds_reply {
} sub_regions[];
} __attribute__((packed)) vfio_user_region_io_fds_reply_t;

/* Analogous to struct vfio_device_feature_dma_logging_range */
struct vfio_user_device_feature_dma_logging_range {
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
uint64_t iova;
uint64_t length;
} __attribute__((packed));

/* Analogous to vfio_iommu_type1_dirty_bitmap. */
struct vfio_user_dirty_pages {
uint32_t argsz;
#ifndef VFIO_IOMMU_DIRTY_PAGES_FLAG_START
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0)
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1)
#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2)
#endif
uint32_t flags;
/* Analogous to struct vfio_device_feature_dma_logging_control */
struct vfio_user_device_feature_dma_logging_control {
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
uint64_t page_size;
uint32_t num_ranges;
uint32_t reserved;
struct vfio_user_device_feature_dma_logging_range ranges[];
} __attribute__((packed));

/* Analogous to struct vfio_iommu_type1_dirty_bitmap_get. */
struct vfio_user_bitmap_range {
/* Analogous to struct vfio_device_feature_dma_logging_report */
struct vfio_user_device_feature_dma_logging_report {
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
uint64_t iova;
uint64_t size;
struct vfio_user_bitmap bitmap;
uint64_t length;
uint64_t page_size;
uint8_t bitmap[];
} __attribute__((packed));

#ifndef VFIO_DEVICE_FEATURE_DMA_LOGGING_START
#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
#endif

/* Analogous to struct vfio_device_feature */
struct vfio_user_device_feature {
uint32_t argsz;
uint32_t flags;
#ifndef VFIO_DEVICE_FEATURE_MASK
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */
#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */
#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */
#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */
#endif
uint8_t data[];
} __attribute__((packed));

/* Analogous to struct vfio_device_feature_migration */
struct vfio_user_device_feature_migration {
uint64_t flags;
#ifndef VFIO_REGION_TYPE_MIGRATION_DEPRECATED
#define VFIO_MIGRATION_STOP_COPY (1 << 0)
#define VFIO_MIGRATION_P2P (1 << 1)
#endif
/*
* PRE_COPY was added in a later kernel version, after
* VFIO_REGION_TYPE_MIGRATION_DEPRECATED had been introduced.
*/
#ifndef VFIO_MIGRATION_PRE_COPY
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
#define VFIO_MIGRATION_PRE_COPY (1 << 2)
#endif
} __attribute__((packed));
#ifndef VFIO_REGION_TYPE_MIGRATION_DEPRECATED
#define VFIO_DEVICE_FEATURE_MIGRATION 1
#endif
_Static_assert(sizeof(struct vfio_user_device_feature_migration) == 8,
"bad vfio_user_device_feature_migration size");

#ifndef VFIO_REGION_TYPE_MIGRATION
/* Analogous to struct vfio_device_feature_mig_state */
struct vfio_user_device_feature_mig_state {
uint32_t device_state;
uint32_t data_fd;
} __attribute__((packed));
#ifndef VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE
#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2
#endif
_Static_assert(sizeof(struct vfio_user_device_feature_migration) == 8,
"bad vfio_user_device_feature_mig_state size");

#define VFIO_REGION_TYPE_MIGRATION (3)
#define VFIO_REGION_SUBTYPE_MIGRATION (1)
/* Analogous to enum vfio_device_mig_state */
enum vfio_user_device_mig_state {
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
VFIO_USER_DEVICE_STATE_ERROR = 0,
VFIO_USER_DEVICE_STATE_STOP = 1,
VFIO_USER_DEVICE_STATE_RUNNING = 2,
VFIO_USER_DEVICE_STATE_STOP_COPY = 3,
VFIO_USER_DEVICE_STATE_RESUMING = 4,
VFIO_USER_DEVICE_STATE_RUNNING_P2P = 5,
VFIO_USER_DEVICE_STATE_PRE_COPY = 6,
VFIO_USER_DEVICE_STATE_PRE_COPY_P2P = 7,
VFIO_USER_DEVICE_NUM_STATES = 8,
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
};

#endif /* VFIO_REGION_TYPE_MIGRATION */
struct vfio_user_mig_data {
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
uint32_t argsz;
uint32_t size;
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
uint8_t data[];
} __attribute__((packed));

#ifdef __cplusplus
}
Expand Down
41 changes: 38 additions & 3 deletions lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

#include <limits.h>
#include <stdint.h>
#include <sys/uio.h>

#define UNUSED __attribute__((unused))
#define EXPORT __attribute__((visibility("default")))
Expand All @@ -60,6 +61,20 @@

typedef unsigned long long ull_t;

static inline int
ERROR_INT(int err)
{
errno = err;
return -1;
}

static inline void *
ERROR_PTR(int err)
{
errno = err;
return NULL;
}

/* Saturating uint64_t addition. */
static inline uint64_t
satadd_u64(uint64_t a, uint64_t b)
Expand All @@ -71,14 +86,34 @@ satadd_u64(uint64_t a, uint64_t b)
/*
* The size, in bytes, of the bitmap that represents the given range with the
* given page size.
*
* Returns -1 and sets errno if the given page size is invalid for the given
* range.
*/
static inline size_t
_get_bitmap_size(size_t size, size_t pgsize)
static inline ssize_t
w-henderson marked this conversation as resolved.
Show resolved Hide resolved
get_bitmap_size(size_t region_size, size_t pgsize)
tmakatos marked this conversation as resolved.
Show resolved Hide resolved
{
size_t nr_pages = (size / pgsize) + (size % pgsize != 0);
if (pgsize == 0) {
return ERROR_INT(EINVAL);
}
if (region_size < pgsize) {
return ERROR_INT(EINVAL);
}

size_t nr_pages = (region_size / pgsize) + (region_size % pgsize != 0);
return ROUND_UP(nr_pages, sizeof(uint64_t) * CHAR_BIT) / CHAR_BIT;
}

static inline void
iov_free(struct iovec *iov)
{
if (iov->iov_base != NULL) {
free(iov->iov_base);
iov->iov_base = NULL;
}
iov->iov_len = 0;
}

#ifdef UNIT_TEST

#define MOCK_DEFINE(f) \
Expand Down
Loading
Loading