Skip to content

Commit

Permalink
Merge tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org…
Browse files Browse the repository at this point in the history
…/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.16:

- Add a new uAPI (under the memory ioctl) to request from the driver
  to export a DMA-BUF object that represents a memory region on
  the device's DRAM. This is needed to enable peer-to-peer over PCIe
  between habana device and an RDMA adapter (e.g. mlnx5 or efa
  rdma adapter).

- Add debugfs node to dynamically configure CS timeout. Up until now,
  it was only configurable through kernel module parameter.

- Fetch more comprehensive power information from the firmware.

- Always take timestamp when waiting for user interrupt, as the user
  needs that information to optimize the graph runtime compilation.

- Modify user interrupt to look on 64-bit user value as fence, instead
  of 32-bit.

- Bypass reset in case of repeated h/w error event after device reset.
  This is to prevent endless loop of resets to the device.

- Fix several bugs in multi CS completion code.

- Fix race condition in fd close/open.

- Update to latest firmware headers

- Add select CRC32 in kconfig

- Small fixes, cosmetics

* tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (25 commits)
  habanalabs: refactor fence handling in hl_cs_poll_fences
  habanalabs: context cleanup cosmetics
  habanalabs: simplify wait for interrupt with timestamp flow
  habanalabs: initialize hpriv fields before adding new node
  habanalabs: Unify frequency set/get functionality
  habanalabs: select CRC32
  habanalabs: add support for dma-buf exporter
  habanalabs: define uAPI to export FD for DMA-BUF
  habanalabs: fix NULL pointer dereference
  habanalabs: fix race condition in multi CS completion
  habanalabs: use only u32
  habanalabs: update firmware files
  habanalabs: bypass reset for continuous h/w error event
  habanalabs: take timestamp on wait for interrupt
  habanalabs: prevent race between fd close/open
  habanalabs: refactor reset log message
  habanalabs: define soft-reset as inference op
  habanalabs: fix debugfs device memory MMU VA translation
  habanalabs: add support for a long interrupt target value
  habanalabs: remove redundant cs validity checks
  ...
  • Loading branch information
gregkh committed Oct 19, 2021
2 parents 2b74240 + b2faac3 commit be24dd4
Show file tree
Hide file tree
Showing 27 changed files with 1,309 additions and 305 deletions.
6 changes: 6 additions & 0 deletions Documentation/ABI/testing/debugfs-driver-habanalabs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure.
Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump.

What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date: Sep 2021
KernelVersion: 5.16
Contact: obitton@habana.ai
Description: Sets the command submission timeout value in seconds.

What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6
Expand Down
2 changes: 2 additions & 0 deletions drivers/misc/habanalabs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ config HABANA_AI
depends on PCI && HAS_IOMEM
select GENERIC_ALLOCATOR
select HWMON
select DMA_SHARED_BUFFER
select CRC32
help
Enables PCIe card driver for Habana's AI Processors (AIP) that are
designed to accelerate Deep Learning inference and training workloads.
Expand Down
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/common/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o \
common/state_dump.o
common/state_dump.o common/hwmgr.o
105 changes: 72 additions & 33 deletions drivers/misc/habanalabs/common/command_submission.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence)
fence->cs_sequence = sequence;
fence->error = 0;
fence->timestamp = ktime_set(0, 0);
fence->mcs_handling_done = false;
init_completion(&fence->completion);
}

Expand Down Expand Up @@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
/* Don't cancel TDR in case this CS was timedout because we might be
* running from the TDR context
*/
if (cs && (cs->timedout ||
hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
return;

if (cs && cs->tdr_active)
if (cs->tdr_active)
cancel_delayed_work_sync(&cs->work_tdr);

spin_lock(&hdev->cs_mirror_lock);
Expand Down Expand Up @@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
mcs_compl->timestamp =
ktime_to_ns(fence->timestamp);
complete_all(&mcs_compl->completion);

/*
* Setting mcs_handling_done inside the lock ensures
* at least one fence have mcs_handling_done set to
* true before wait for mcs finish. This ensures at
* least one CS will be set as completed when polling
* mcs fences.
*/
fence->mcs_handling_done = true;
}

spin_unlock(&mcs_compl->lock);
}
/* In case CS completed without mcs completion initialized */
fence->mcs_handling_done = true;
}

static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
Expand Down Expand Up @@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
break;
}

mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;

if (status == CS_WAIT_STATUS_BUSY)
continue;

mcs_data->completion_bitmap |= BIT(i);

/*
* best effort to extract timestamp. few notes:
* - if even single fence is gone we cannot extract timestamp
* (as fence not exist anymore)
* - for all completed CSs we take the earliest timestamp.
* for this we have to validate that:
* 1. given timestamp was indeed set
* 2. the timestamp is earliest of all timestamps so far
*/
switch (status) {
case CS_WAIT_STATUS_BUSY:
/* CS did not finished, keep waiting on its QID*/
mcs_data->stream_master_qid_map |=
fence->stream_master_qid_map;
break;
case CS_WAIT_STATUS_COMPLETED:
/*
* Using mcs_handling_done to avoid possibility of mcs_data
* returns to user indicating CS completed before it finished
* all of its mcs handling, to avoid race the next time the
* user waits for mcs.
*/
if (!fence->mcs_handling_done)
break;

if (status == CS_WAIT_STATUS_GONE) {
mcs_data->completion_bitmap |= BIT(i);
/*
* For all completed CSs we take the earliest timestamp.
* For this we have to validate that the timestamp is
* earliest of all timestamps so far.
*/
if (mcs_data->update_ts &&
(ktime_compare(fence->timestamp, first_cs_time) < 0))
first_cs_time = fence->timestamp;
break;
case CS_WAIT_STATUS_GONE:
mcs_data->update_ts = false;
mcs_data->gone_cs = true;
} else if (mcs_data->update_ts &&
(ktime_compare(fence->timestamp,
ktime_set(0, 0)) > 0) &&
(ktime_compare(fence->timestamp, first_cs_time) < 0)) {
first_cs_time = fence->timestamp;
/*
* It is possible to get an old sequence numbers from user
* which related to already completed CSs and their fences
* already gone. In this case, CS set as completed but
* no need to consider its QID for mcs completion.
*/
mcs_data->completion_bitmap |= BIT(i);
break;
default:
dev_err(hdev->dev, "Invalid fence status\n");
return -EINVAL;
}

}

hl_fences_put(mcs_data->fence_arr, arr_len);
Expand Down Expand Up @@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)

static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
u32 timeout_us, u64 user_address,
u32 target_value, u16 interrupt_offset,
enum hl_cs_wait_status *status)
u64 target_value, u16 interrupt_offset,
enum hl_cs_wait_status *status,
u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
struct hl_user_interrupt *interrupt;
unsigned long timeout, flags;
u32 completion_value;
u64 completion_value;
long completion_rc;
int rc = 0;

Expand Down Expand Up @@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
/* We check for completion value as interrupt could have been received
* before we added the node to the wait list
*/
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
goto remove_pending_user_interrupt;
}

if (completion_value >= target_value)
if (completion_value >= target_value) {
*status = CS_WAIT_STATUS_COMPLETED;
else
/* There was no interrupt, we assume the completion is now. */
pend->fence.timestamp = ktime_get();
} else
*status = CS_WAIT_STATUS_BUSY;

if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
Expand All @@ -2812,7 +2842,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
reinit_completion(&pend->fence.completion);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;

Expand All @@ -2839,6 +2869,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
list_del(&pend->wait_list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

*timestamp = ktime_to_ns(pend->fence.timestamp);

kfree(pend);
hl_ctx_put(ctx);

Expand All @@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
struct asic_fixed_properties *prop;
union hl_wait_cs_args *args = data;
enum hl_cs_wait_status status;
u64 timestamp;
int rc;

prop = &hdev->asic_prop;
Expand Down Expand Up @@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)

rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
args->in.target, interrupt_offset, &status);
args->in.target, interrupt_offset, &status,
&timestamp);

if (rc) {
if (rc != -EINTR)
Expand All @@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)

memset(args, 0, sizeof(*args));

if (timestamp) {
args->out.timestamp_nsec = timestamp;
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
}

switch (status) {
case CS_WAIT_STATUS_COMPLETED:
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
Expand Down
8 changes: 1 addition & 7 deletions drivers/misc/habanalabs/common/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,6 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
return rc;
}

void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
{
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
return;
}

int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
int rc = 0;
Expand Down Expand Up @@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
idp = &mgr->ctx_handles;

idr_for_each_entry(idp, ctx, id)
hl_ctx_free(hdev, ctx);
kref_put(&ctx->refcount, hl_ctx_do_release);

idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->ctx_lock);
Expand Down
51 changes: 51 additions & 0 deletions drivers/misc/habanalabs/common/debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
return count;
}

static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[200];
ssize_t rc;

if (*ppos)
return 0;

sprintf(tmp_buf, "%d\n",
jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
strlen(tmp_buf) + 1);

return rc;
}

static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u32 value;
ssize_t rc;

rc = kstrtouint_from_user(buf, count, 10, &value);
if (rc)
return rc;

if (value)
hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;

return count;
}

static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
Expand Down Expand Up @@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
.write = hl_state_dump_write
};

static const struct file_operations hl_timeout_locked_fops = {
.owner = THIS_MODULE,
.read = hl_timeout_locked_read,
.write = hl_timeout_locked_write
};

static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
Expand Down Expand Up @@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_state_dump_fops);

debugfs_create_file("timeout_locked",
0644,
dev_entry->root,
dev_entry,
&hl_timeout_locked_fops);

for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,
Expand Down
Loading

0 comments on commit be24dd4

Please sign in to comment.