diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c index d4e2957f8b24..3dd4f4fdc108 100644 --- a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c @@ -389,3 +389,107 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } + +static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) +{ + char *irq_flag_name; + /* Previously MMU-AS command was used for L2 cache flush on page-table update. + * And we're using the same max-loops count for GPU command, because amount of + * L2 cache flush overhead are same between them. + */ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + + /* Wait for the GPU cache clean operation to complete */ + while (--max_loops && + !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) { + ; + } + + /* reset gpu if time-out occurred */ + if (max_loops == 0) { + switch (irq_bit) { + case CLEAN_CACHES_COMPLETED: + irq_flag_name = "CLEAN_CACHES_COMPLETED"; + break; + case FLUSH_PA_RANGE_COMPLETED: + irq_flag_name = "FLUSH_PA_RANGE_COMPLETED"; + break; + default: + irq_flag_name = "UNKNOWN"; + break; + } + + dev_err(kbdev->dev, + "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n", + irq_flag_name); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + return -EBUSY; + } + + /* Clear the interrupt bit. */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit); + + return 0; +} + +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, + u32 flush_op) +{ + int need_to_wake_up = 0; + int ret = 0; + + /* hwaccess_lock must be held to avoid any sync issue with + * kbase_gpu_start_cache_clean() / kbase_clean_caches_done() + */ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* 1. Check if kbdev->cache_clean_in_progress is set. + * If it is set, it means there are threads waiting for + * CLEAN_CACHES_COMPLETED irq to be raised and that the + * corresponding irq mask bit is set. + * We'll clear the irq mask bit and busy-wait for the cache + * clean operation to complete before submitting the cache + * clean command required after the GPU page table update. + * Pended flush commands will be merged to requested command. + */ + if (kbdev->cache_clean_in_progress) { + /* disable irq first */ + u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); + + /* busy wait irq status to be enabled */ + ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + if (ret) + return ret; + + /* merge pended command if there's any */ + flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE( + kbdev->cache_clean_queued, flush_op); + + /* enable wake up notify flag */ + need_to_wake_up = 1; + } else { + /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + CLEAN_CACHES_COMPLETED); + } + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + + /* 3. Busy-wait irq status to be enabled. */ + ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + if (ret) + return ret; + + /* 4. Wake-up blocked threads when there is any. */ + if (need_to_wake_up) + kbase_gpu_cache_clean_wait_complete(kbdev); + + return ret; +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h index 7cda61ac6cdb..5d8ebe91580b 100644 --- a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h @@ -40,6 +40,19 @@ struct rb_entry { struct kbase_jd_atom *katom; }; +/* SLOT_RB_TAG_PURGED assumes a value that is different from + * NULL (SLOT_RB_NULL_TAG_VAL) and will not be the result of + * any valid pointer via macro translation: SLOT_RB_TAG_KCTX(x). + */ +#define SLOT_RB_TAG_PURGED ((u64)(1 << 1)) +#define SLOT_RB_NULL_TAG_VAL ((u64)0) + +/** + * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a + * u64 for serving as tagged value. + * @kctx: Pointer to kbase context. + */ +#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx)) /** * struct slot_rb - Slot ringbuffer * @entries: Ringbuffer entries @@ -51,7 +64,7 @@ struct rb_entry { struct slot_rb { struct rb_entry entries[SLOT_RB_SIZE]; - struct kbase_context *last_context; + u64 last_kctx_tagged; u8 read_idx; u8 write_idx; diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c index 2ce203e3a31b..1ae678be4492 100644 --- a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c @@ -289,6 +289,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); kbase_kinstr_jm_atom_hw_submit(katom); + + /* Update the slot's last katom submission kctx */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx); + #ifdef CONFIG_GPU_TRACEPOINTS if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ @@ -299,7 +303,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, sizeof(js_string)), ktime_to_ns(katom->start_timestamp), (u32)katom->kctx->id, 0, katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; } #endif diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c index 3212d2257d38..7c50ed8a1c8b 100644 --- a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c @@ -1246,8 +1246,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, ktime_to_ns(*end_timestamp), (u32)next_katom->kctx->id, 0, next_katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = - next_katom->kctx; } else { char js_string[16]; @@ -1256,7 +1254,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, sizeof(js_string)), ktime_to_ns(ktime_get()), 0, 0, 0); - kbdev->hwaccess.backend.slot_rb[js].last_context = 0; } } #endif @@ -1674,3 +1671,33 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + int js; + bool tracked = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged; + + if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) { + /* Marking the slot kctx tracking field is purged */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED; + tracked = true; + } + } + + if (tracked) { + /* The context had run some jobs before the purge, other slots + * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as + * purged as well. + */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged == + SLOT_RB_NULL_TAG_VAL) + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = + SLOT_RB_TAG_PURGED; + } + } +} diff --git a/drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c index 8bc05293a145..cdee21cb83f0 100644 --- a/drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c +++ b/drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c @@ -132,6 +132,7 @@ static const struct kbase_context_init context_init[] = { "Memory pool goup initialization failed"}, {kbase_mem_evictable_init, kbase_mem_evictable_deinit, "Memory evictable initialization failed"}, + { kbase_ctx_sched_init_ctx, NULL, NULL }, {kbasep_js_kctx_init, kbasep_js_kctx_term, "JS kctx initialization failed"}, {kbase_jd_init, kbase_jd_exit, diff --git a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c index 930fe89acc96..df9f41d2ef88 100644 --- a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c +++ b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,12 @@ /* * Base kernel context APIs */ +#include +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #include #include @@ -132,17 +138,50 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - kctx->as_nr = KBASEP_AS_NR_INVALID; - - atomic_set(&kctx->refcount, 0); - - spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; + kctx->task = NULL; atomic_set(&kctx->nonmapped_pages, 0); atomic_set(&kctx->permanent_mapped_pages, 0); kctx->tgid = current->tgid; kctx->pid = current->pid; + /* Check if this is a Userspace created context */ + if (likely(kctx->filp)) { + struct pid *pid_struct; + + rcu_read_lock(); + pid_struct = find_get_pid(kctx->tgid); + if (likely(pid_struct)) { + struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID); + + if (likely(task)) { + /* Take a reference on the task to avoid slow lookup + * later on from the page allocation loop. + */ + get_task_struct(task); + kctx->task = task; + } else { + dev_err(kctx->kbdev->dev, + "Failed to get task pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + + put_pid(pid_struct); + } else { + dev_err(kctx->kbdev->dev, + "Failed to get pid pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + rcu_read_unlock(); + + if (unlikely(err)) + return err; + kbase_mem_mmgrab(); + kctx->process_mm = current->mm; + } + atomic_set(&kctx->used_pages, 0); mutex_init(&kctx->reg_lock); @@ -168,13 +207,16 @@ int kbase_context_common_init(struct kbase_context *kctx) mutex_init(&kctx->legacy_hwcnt_lock); mutex_lock(&kctx->kbdev->kctx_list_lock); - err = kbase_insert_kctx_to_process(kctx); - if (err) - dev_err(kctx->kbdev->dev, - "(err:%d) failed to insert kctx to kbase_process\n", err); - mutex_unlock(&kctx->kbdev->kctx_list_lock); + if (err) { + dev_err(kctx->kbdev->dev, + "(err:%d) failed to insert kctx to kbase_process", err); + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } + } return err; } @@ -245,15 +287,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx) void kbase_context_common_term(struct kbase_context *kctx) { - unsigned long flags; int pages; - mutex_lock(&kctx->kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->kbdev->mmu_hw_mutex); - pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kctx->kbdev->dev, @@ -265,7 +300,10 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); - + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, kctx->tgid); } diff --git a/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h index 16f1d7098688..347a3ba05904 100644 --- a/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h +++ b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h @@ -69,3 +69,19 @@ int kbase_device_init(struct kbase_device *kbdev); * */ void kbase_device_term(struct kbase_device *kbdev); +/** + * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait + * @kbdev: Kbase device + * @flush_op: Flush command register value to be sent to HW + * + * Issue a cache flush command to hardware, then busy wait an irq status. + * This function will clear CLEAN_CACHES_COMPLETED irq mask bit set by other + * threads through kbase_gpu_start_cache_clean(), and wake them up manually + * after the busy-wait is done. Any pended cache flush commands raised by + * other thread are handled in this function. + * hwaccess_lock must be held by the caller. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, + u32 flush_op); diff --git a/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h b/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h index 258ff33348fe..54f2e68dd132 100644 --- a/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -259,4 +259,11 @@ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ #define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ +#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ + ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) +#define FLUSH_PA_RANGE_COMPLETED \ + (1 << 20) /* Set when a physical range cache clean operation has completed. */ #endif /* _KBASE_GPU_REGMAP_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h index 759f30d21b8d..be87ac7c26d3 100644 --- a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h +++ b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h @@ -27,6 +27,17 @@ #include "mali_kbase_gpu_id.h" #include "backend/mali_kbase_gpu_regmap_jm.h" +/* GPU_U definition */ +#ifdef __ASSEMBLER__ +#define GPU_U(x) x +#define GPU_UL(x) x +#define GPU_ULL(x) x +#else +#define GPU_U(x) x##u +#define GPU_UL(x) x##ul +#define GPU_ULL(x) x##ull +#endif /* __ASSEMBLER__ */ + /* Begin Register Offsets */ /* GPU control registers */ @@ -344,6 +355,20 @@ #define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then flush all L2 caches then issue a flush region command to all MMUs */ +/* AS_LOCKADDR register */ +#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0) +#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \ + (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \ + AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) +#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \ + AS_LOCKADDR_LOCKADDR_BASE_SHIFT) + /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ #define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h index 49ab3827868e..c69bbf40dbe5 100644 --- a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h @@ -345,19 +345,6 @@ enum kbase_atom_exit_protected_state { KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, }; -/** - * struct kbase_ext_res - Contains the info for external resources referred - * by an atom, which have been mapped on GPU side. - * @gpu_address: Start address of the memory region allocated for - * the resource from GPU virtual address space. - * @alloc: pointer to physical pages tracking object, set on - * mapping the external resource on GPU side. - */ -struct kbase_ext_res { - u64 gpu_address; - struct kbase_mem_phy_alloc *alloc; -}; - /** * struct kbase_jd_atom - object representing the atom, containing the complete * state and attributes of an atom. @@ -391,7 +378,8 @@ struct kbase_ext_res { * each allocation is read in order to enforce an * overall physical memory usage limit. * @nr_extres: number of external resources referenced by the atom. - * @extres: pointer to the location containing info about + * @extres: Pointer to @nr_extres VA regions containing the external + * resource allocation and other information. * @nr_extres external resources referenced by the atom. * @device_nr: indicates the coregroup with which the atom is * associated, when @@ -501,7 +489,7 @@ struct kbase_jd_atom { #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ u16 nr_extres; - struct kbase_ext_res *extres; + struct kbase_va_region **extres; u32 device_nr; u64 jc; diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h index 6dc57d04426d..0f593a643016 100644 --- a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h +++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -109,10 +109,12 @@ * 11.26 * - Added kinstr_jm API * 11.27 - * - Backwards compatible extension to HWC ioctl. - */ + * Backwards compatible extension to HWC ioctl. + * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE + * before allocating GPU memory for the context. +*/ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 27 +#define BASE_UK_VERSION_MINOR 38 /** * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase.h b/drivers/gpu/arm/b_r26p0/mali_kbase.h index 34bc91cdfcad..4dd580b46f95 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase.h +++ b/drivers/gpu/arm/b_r26p0/mali_kbase.h @@ -365,16 +365,6 @@ static inline void kbase_free_user_buffer( } } -/** - * kbase_mem_copy_from_extres() - Copy from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @buf_data: Pointer to the information about external resources: - * pages pertaining to the external resource, number of - * pages to copy. - */ -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data); int kbase_process_soft_job(struct kbase_jd_atom *katom); int kbase_prepare_soft_job(struct kbase_jd_atom *katom); void kbase_finish_soft_job(struct kbase_jd_atom *katom); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c b/drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c index 49837bcfaa02..0c3dd0319ef3 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c @@ -847,6 +847,11 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, u64 flags = alloc->in.flags; u64 gpu_va; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + rcu_read_lock(); /* Don't allow memory allocation until user space has set up the * tracking page (which sets kctx->process_mm). Also catches when we've @@ -876,7 +881,7 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, alloc->in.extent, - &flags, &gpu_va); + &flags, &gpu_va, mmu_sync_info); if (!reg) return -ENOMEM; diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c index cea91bcaf02e..7bf847b549e6 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c @@ -66,6 +66,13 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev) } } +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +{ + kctx->as_nr = KBASEP_AS_NR_INVALID; + atomic_set(&kctx->refcount, 0); + return 0; +} + /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space * * @kbdev: The context for which to find a free address space @@ -184,9 +191,10 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) { struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(atomic_read(&kctx->refcount) != 0); @@ -198,6 +206,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); } void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h index 1affa719e6dc..4cc6f90d3fcd 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -60,6 +60,17 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev); */ void kbase_ctx_sched_term(struct kbase_device *kbdev); +/** + * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling + * @kctx: The context to initialize + * + * This must be called during context initialization before any other context + * scheduling functions are called on @kctx + * + * Return: 0 + */ +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx); + /** * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context * @kctx: The context to which to retain a reference @@ -114,9 +125,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); * This function should be called when a context is being destroyed. The * context must no longer have any reference. If it has been assigned an * address space before then the AS will be unprogrammed. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. */ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h index 1c1453a6cf84..761c8a83d034 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -293,7 +293,7 @@ struct kbase_as { * it is NULL */ struct kbase_mmu_table { - u64 *mmu_teardown_pages; + u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL]; struct mutex mmu_lock; phys_addr_t pgd; u8 group_id; @@ -577,8 +577,11 @@ struct kbase_devfreq_opp { * @entry_set_ate: program the pte to be a valid address translation entry to * encode the physical address of the actual page being mapped. * @entry_set_pte: program the pte to be a valid entry to encode the physical - * address of the next lower level page table. - * @entry_invalidate: clear out or invalidate the pte. + * address of the next lower level page table and also update + * the number of valid entries. + * @entries_invalidate: clear out or invalidate a range of ptes. + * @get_num_valid_entries: returns the number of valid entries for a specific pgd. + * @set_num_valid_entries: sets the number of valid entries for a specific pgd * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. */ struct kbase_mmu_mode { @@ -594,7 +597,10 @@ struct kbase_mmu_mode { void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, unsigned long flags, int level); void (*entry_set_pte)(u64 *entry, phys_addr_t phy); - void (*entry_invalidate)(u64 *entry); + void (*entries_invalidate)(u64 *entry, u32 count); + unsigned int (*get_num_valid_entries)(u64 *pgd); + void (*set_num_valid_entries)(u64 *pgd, + unsigned int num_of_valid_entries); unsigned long flags; }; @@ -1464,11 +1470,13 @@ struct kbase_reg_zone { * Generally the reference count is incremented when the context * is scheduled in and an atom is pulled from the context's per * slot runnable tree. - * @mm_update_lock: lock used for handling of special tracking page. * @process_mm: Pointer to the memory descriptor of the process which * created the context. Used for accounting the physical * pages used for GPU allocations, done for the context, - * to the memory consumed by the process. + * to the memory consumed by the process. A reference is taken + * on this descriptor for the Userspace created contexts so that + * Kbase can safely access it to update the memory usage counters. + * The reference is dropped on context termination. * @gpu_va_end: End address of the GPU va space (in 4KB page units) * @jit_va: Indicates if a JIT_VA zone has been created. * @mem_profile_data: Buffer containing the profiling information provided by @@ -1597,6 +1605,10 @@ struct kbase_reg_zone { * @kinstr_jm: Kernel job manager instrumentation context handle * @tl_kctx_list_node: List item into the device timeline's list of * contexts, for timeline summarization. + * @task: Pointer to the task structure of the main thread of the process + * that created the Kbase context. It would be set only for the + * contexts created by the Userspace and not for the contexts + * created internally by the Kbase. * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1691,8 +1703,7 @@ struct kbase_context { atomic_t refcount; - spinlock_t mm_update_lock; - struct mm_struct __rcu *process_mm; + struct mm_struct *process_mm; u64 gpu_va_end; bool jit_va; @@ -1750,6 +1761,7 @@ struct kbase_context { #endif struct kbase_kinstr_jm *kinstr_jm; struct list_head tl_kctx_list_node; + struct task_struct *task; }; #ifdef CONFIG_MALI_CINSTR_GWT @@ -1796,8 +1808,7 @@ struct kbasep_gwt_list_element { */ struct kbase_ctx_ext_res_meta { struct list_head ext_res_node; - struct kbase_mem_phy_alloc *alloc; - u64 gpu_addr; + struct kbase_va_region *reg; u32 ref; }; @@ -1827,6 +1838,24 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) return false; } +/** + * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock + * region, as a logarithm + * + * @gpu_props: GPU properties + * + * Return: the minimum size of the MMU lock region as dictated by the corresponding + * arch spec. + */ +static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props) +{ + if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >= + GPU_ID2_MODEL_MAKE(12, 0)) + return 12; /* 4 kB */ + + return 15; /* 32 kB */ +} + /* Conversion helpers for setting up high resolution timers */ #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c index 188e53bf1abe..5a55f0b9873d 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c @@ -282,6 +282,11 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) int err; struct kbase_context *kctx; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + if (!wa_blob_load_needed(kbdev)) return 0; @@ -375,7 +380,7 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, - 0, &flags, &gpu_va); + 0, &flags, &gpu_va, mmu_sync_info); if (!va_region) { dev_err(kbdev->dev, "Failed to allocate for blob\n"); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h index 3d5934e0e0a1..690fb1ac0f3d 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h @@ -299,4 +299,21 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, */ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); +/** + * kbase_backend_slot_kctx_purge_locked - Perform a purge on the slot_rb tracked + * kctx + * + * @kbdev: Device pointer + * @kctx: The kbase context that needs to be purged from slot_rb[] + * + * For JM GPUs, the L1 read only caches may need a start_flush invalidation, + * potentially on all slots (even if the kctx was only using a single slot), + * following a context termination or address-space ID recycle. This function + * performs a clean-up purge on the given kctx which if it has been tracked by + * slot_rb[] objects. + * + * Caller must hold kbase_device->hwaccess_lock. + */ +void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx); + #endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c index 02a42bfdea14..f8f8a0e49531 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c @@ -439,6 +439,11 @@ static int kbasep_hwcnt_backend_jm_dump_alloc( u64 flags; u64 nr_pages; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + WARN_ON(!info); WARN_ON(!kctx); WARN_ON(!gpu_dump_va); @@ -453,7 +458,7 @@ static int kbasep_hwcnt_backend_jm_dump_alloc( nr_pages = PFN_UP(info->dump_bytes); - reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info); if (!reg) return -ENOMEM; diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_jd.c b/drivers/gpu/arm/b_r26p0/mali_kbase_jd.c index 0c580357357d..cb3d05feb2de 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_jd.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_jd.c @@ -188,13 +188,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - kbase_unmap_external_resource(katom->kctx, reg, alloc); + kbase_unmap_external_resource(katom->kctx, katom->extres[res_no]); } kfree(katom->extres); katom->extres = NULL; @@ -210,7 +204,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) { - int err_ret_val = -EINVAL; + int err = -EINVAL; u32 res_no; #ifdef CONFIG_MALI_DMA_FENCE struct kbase_dma_fence_resv_info info = { @@ -243,20 +237,10 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!katom->extres) return -ENOMEM; - /* copy user buffer to the end of our real buffer. - * Make sure the struct sizes haven't changed in a way - * we don't support */ - BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); - input_extres = (struct base_external_resource *) - (((unsigned char *)katom->extres) + - (sizeof(*katom->extres) - sizeof(*input_extres)) * - katom->nr_extres); - - if (copy_from_user(input_extres, - get_compat_pointer(katom->kctx, user_atom->extres_list), - sizeof(*input_extres) * katom->nr_extres) != 0) { - err_ret_val = -EINVAL; - goto early_err_out; + input_extres = kmalloc_array(katom->nr_extres, sizeof(*input_extres), GFP_KERNEL); + if (!input_extres) { + err = -ENOMEM; + goto failed_input_alloc; } #ifdef CONFIG_MALI_DMA_FENCE @@ -265,39 +249,44 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st sizeof(struct reservation_object *), GFP_KERNEL); if (!info.resv_objs) { - err_ret_val = -ENOMEM; - goto early_err_out; + err = -ENOMEM; + goto failed_input_copy; } info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), sizeof(unsigned long), GFP_KERNEL); if (!info.dma_fence_excl_bitmap) { - err_ret_val = -ENOMEM; - goto early_err_out; + err = -ENOMEM; + goto failed_input_copy; } } #endif /* CONFIG_MALI_DMA_FENCE */ + if (copy_from_user(input_extres, + get_compat_pointer(katom->kctx, user_atom->extres_list), + sizeof(*input_extres) * katom->nr_extres) != 0) { + err = -EINVAL; + goto failed_input_copy; + } + /* Take the processes mmap lock */ down_read(¤t->mm->mmap_sem); /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { - struct base_external_resource *res = &input_extres[res_no]; + struct base_external_resource *user_res = &input_extres[res_no]; struct kbase_va_region *reg; - struct kbase_mem_phy_alloc *alloc; #ifdef CONFIG_MALI_DMA_FENCE bool exclusive; - exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + exclusive = (user_res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) ? true : false; #endif reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, - res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + katom->kctx, user_res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); /* did we find a matching region object? */ - if (kbase_is_region_invalid_or_free(reg)) { + if (unlikely(kbase_is_region_invalid_or_free(reg))) { /* roll back */ goto failed_loop; } @@ -307,12 +296,9 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; } - alloc = kbase_map_external_resource(katom->kctx, reg, - current->mm); - if (!alloc) { - err_ret_val = -EINVAL; + err = kbase_map_external_resource(katom->kctx, reg, current->mm); + if (err) goto failed_loop; - } #ifdef CONFIG_MALI_DMA_FENCE if (implicit_sync && @@ -326,14 +312,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st } #endif /* CONFIG_MALI_DMA_FENCE */ - /* finish with updating out array with the data we found */ - /* NOTE: It is important that this is the last thing we do (or - * at least not before the first write) as we overwrite elements - * as we loop and could be overwriting ourself, so no writes - * until the last read for an element. - * */ - katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = alloc; + katom->extres[res_no] = reg; } /* successfully parsed the extres array */ /* drop the vm lock now */ @@ -357,6 +336,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st } #endif /* CONFIG_MALI_DMA_FENCE */ + /* Free the buffer holding data from userspace */ + kfree(input_extres); /* all done OK */ return 0; @@ -371,19 +352,23 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kbase_gpu_vm_lock(katom->kctx); #endif - failed_loop: - /* undo the loop work */ +failed_loop: + /* undo the loop work. We are guaranteed to have access to the VA region + * as we hold a reference to it until it's unmapped + */ while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg = katom->extres[res_no]; - kbase_unmap_external_resource(katom->kctx, NULL, alloc); + kbase_unmap_external_resource(katom->kctx, reg); } kbase_gpu_vm_unlock(katom->kctx); /* Release the processes mmap lock */ up_read(¤t->mm->mmap_sem); - early_err_out: +failed_input_copy: + kfree(input_extres); +failed_input_alloc: kfree(katom->extres); katom->extres = NULL; #ifdef CONFIG_MALI_DMA_FENCE @@ -392,7 +377,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(info.dma_fence_excl_bitmap); } #endif - return err_ret_val; + return err; } static inline void jd_resolve_dep(struct list_head *out_list, diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_js.c b/drivers/gpu/arm/b_r26p0/mali_kbase_js.c index 1e7518cccaab..8b068e76b908 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_js.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_js.c @@ -656,6 +656,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&kbdev->js_data.runpool_mutex); } + + kbase_ctx_sched_remove_ctx(kctx); } /** diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c index c6175f6649c1..a70bcb9d06ad 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -362,6 +362,7 @@ int kbase_remove_va_region(struct kbase_va_region *reg) struct rb_node *rbnext; struct kbase_va_region *next = NULL; struct rb_root *reg_rbtree = NULL; + struct kbase_va_region *orig_reg = reg; int merged_front = 0; int merged_back = 0; @@ -422,6 +423,12 @@ int kbase_remove_va_region(struct kbase_va_region *reg) } rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); } + /* This operation is always safe because the function never frees + * the region. If the region has been merged to both front and back, + * then it's the previous region that is supposed to be freed. + */ + orig_reg->start_pfn = 0; + out: return err; @@ -701,6 +708,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); @@ -1401,7 +1412,9 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) KBASE_EXPORT_TEST_API(kbase_free_alloced_region); -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align, + enum kbase_caller_mmu_sync_info mmu_sync_info) { int err; size_t i = 0; @@ -1445,9 +1458,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 alloc->imported.alias.aliased[i].length, reg->flags & gwt_mask, kctx->as_nr, - group_id); + group_id, mmu_sync_info); if (err) - goto bad_insert; + goto bad_aliased_insert; /* Note: mapping count is tracked at alias * creation time @@ -1458,10 +1471,11 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 kctx->aliasing_sink_page, alloc->imported.alias.aliased[i].length, (reg->flags & mask & gwt_mask) | attr, - group_id); + group_id, mmu_sync_info); + if (err) - goto bad_insert; + goto bad_aliased_insert; } } } else { @@ -1472,7 +1486,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, - group_id); + group_id, mmu_sync_info); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(alloc); @@ -1498,17 +1512,23 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 reg->nr_pages - reg->gpu_alloc->nents, (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, - KBASE_MEM_GROUP_SINK); + KBASE_MEM_GROUP_SINK, mmu_sync_info); if (err) goto bad_insert; } return err; +bad_aliased_insert: + while (i-- > 0) { + + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + reg->nr_pages, kctx->as_nr); + + } + + bad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, reg->nr_pages, - kctx->as_nr); kbase_remove_va_region(reg); @@ -1517,12 +1537,13 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 KBASE_EXPORT_TEST_API(kbase_gpu_mmap); -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable); +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable); int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { int err = 0; + struct kbase_mem_phy_alloc *alloc; if (reg->start_pfn == 0) return 0; @@ -1530,17 +1551,40 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (!reg->gpu_alloc) return -EINVAL; + alloc = reg->gpu_alloc; /* Tear down down GPU page tables, depending on memory type. */ switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_ALIAS: /* Fall-through */ + case KBASE_MEM_TYPE_ALIAS: /* Fall-through */{ + size_t i = 0; + + /* Due to the way the number of valid PTEs and ATEs are tracked + * currently, only the GPU virtual range that is backed & mapped + * should be passed to the kbase_mmu_teardown_pages() function, + * hence individual aliased regions needs to be unmapped + * separately. + */ + for (i = 0; i < alloc->imported.alias.nents; i++) { + if (alloc->imported.alias.aliased[i].alloc) { + int err_loop = kbase_mmu_teardown_pages( + kctx->kbdev, &kctx->mmu, + reg->start_pfn + (i * alloc->imported.alias.stride), + alloc->pages + (i * alloc->imported.alias.stride), + alloc->imported.alias.aliased[i].length, + kctx->as_nr); + if (WARN_ON_ONCE(err_loop)) + err = err_loop; + } + } + } + break; case KBASE_MEM_TYPE_IMPORTED_UMM: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, reg->nr_pages, kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, kctx->as_nr); break; default: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, kbase_reg_current_backed_size(reg), - kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, kbase_reg_current_backed_size(reg), + kctx->as_nr); break; } @@ -1561,8 +1605,9 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* The allocation could still have active mappings. */ if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, - (reg->flags & KBASE_REG_GPU_WR)); + kbase_jd_user_buf_unmap(kctx, alloc, reg, + (reg->flags & + (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); } } } @@ -1712,6 +1757,7 @@ void kbase_sync_single(struct kbase_context *kctx, src = ((unsigned char *)kmap(gpu_page)) + offset; dst = ((unsigned char *)kmap(cpu_page)) + offset; } + memcpy(dst, src, size); kunmap(gpu_page); kunmap(cpu_page); @@ -2103,7 +2149,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, &kctx->mem_pools.large[alloc->group_id], nr_lp * (SZ_2M / SZ_4K), tp, - true); + true, kctx->task); if (res > 0) { nr_left -= res; @@ -2157,7 +2203,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, err = kbase_mem_pool_grow( &kctx->mem_pools.large[alloc->group_id], - 1); + 1, kctx->task); if (err) break; } while (1); @@ -2204,7 +2250,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_left) { res = kbase_mem_pool_alloc_pages( &kctx->mem_pools.small[alloc->group_id], - nr_left, tp, false); + nr_left, tp, false, kctx->task); if (res <= 0) goto alloc_failed; } @@ -3552,7 +3598,8 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_alloc_info *info, struct kbase_va_region *reg, - struct kbase_sub_alloc **prealloc_sas) + struct kbase_sub_alloc **prealloc_sas, + enum kbase_caller_mmu_sync_info mmu_sync_info) { size_t delta; size_t pages_required; @@ -3614,7 +3661,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, spin_unlock(&kctx->mem_partials_lock); kbase_gpu_vm_unlock(kctx); - ret = kbase_mem_pool_grow(pool, pool_delta); + ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); kbase_gpu_vm_lock(kctx); if (ret) @@ -3649,7 +3696,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, spin_unlock(&kctx->mem_partials_lock); ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, - old_size); + old_size, mmu_sync_info); /* * The grow failed so put the allocation back in the * pool and return failure. @@ -3858,6 +3905,11 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; + /* Calls to this function are inherently synchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; + lockdep_assert_held(&kctx->jctx.lock); if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) @@ -3946,7 +3998,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * so any state protected by that lock might need to be * re-evaluated if more code is added here in future. */ - ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, + mmu_sync_info); + #if MALI_JIT_PRESSURE_LIMIT_BASE if (!ignore_pressure_limit) @@ -4008,7 +4062,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kbase_gpu_vm_unlock(kctx); reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, - info->extent, &flags, &gpu_addr); + info->extent, &flags, &gpu_addr, mmu_sync_info); if (!reg) { /* Most likely not enough GPU virtual space left for * the new JIT allocation. @@ -4300,6 +4354,8 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, struct mm_struct *mm = alloc->imported.user_buf.mm; long pinned_pages; long i; + int write; + lockdep_assert_held(&kctx->reg_lock); if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) return -EINVAL; @@ -4314,42 +4370,47 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) return -EINVAL; + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, #if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + write ? FOLL_WRITE : 0, pages, NULL); #else - reg->flags & KBASE_REG_GPU_WR, + write, 0, pages, NULL); #endif #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, + write, 0, pages, NULL); #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + write ? FOLL_WRITE : 0, pages, NULL); +#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE + pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, + write ? FOLL_WRITE : 0, pages, NULL, NULL); #else - pinned_pages = get_user_pages_remote(NULL, mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, - pages, NULL, NULL); + pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages, + write ? FOLL_WRITE : 0, pages, NULL, NULL); #endif if (pinned_pages <= 0) return pinned_pages; if (pinned_pages != alloc->imported.user_buf.nr_pages) { + /* Above code already ensures there will not have been a CPU + * mapping by ensuring alloc->nents is 0 + */ for (i = 0; i < pinned_pages; i++) put_page(pages[i]); return -ENOMEM; @@ -4363,46 +4424,68 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct kbase_va_region *reg) { - long pinned_pages; + int err; + long pinned_pages = 0; struct kbase_mem_phy_alloc *alloc; struct page **pages; struct tagged_addr *pa; long i; - unsigned long address; struct device *dev; - unsigned long offset; - unsigned long local_size; unsigned long gwt_mask = ~0; - int err = kbase_jd_user_buf_pin_pages(kctx, reg); + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + lockdep_assert_held(&kctx->reg_lock); + + err = kbase_jd_user_buf_pin_pages(kctx, reg); if (err) return err; alloc = reg->gpu_alloc; pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; pinned_pages = alloc->nents; pages = alloc->imported.user_buf.pages; dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; + + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ + + for (i = 0; i < pinned_pages; i++) { dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif + + err = dma_mapping_error(dev, dma_addr); + if (err) goto unwind; alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; } #ifdef CONFIG_MALI_CINSTR_GWT @@ -4413,18 +4496,40 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id); + alloc->group_id, mmu_sync_info); if (err == 0) return 0; /* fall down */ unwind: alloc->nents = 0; + + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This is precautionary measure in case a GPU job has taken + * advantage of a partially GPU-mapped range to write and corrupt the + * content of memory, either inside or outside the imported region. + * + * Notice that this error recovery path doesn't try to be optimal and just + * flushes the entire page range. + */ + while (i--) { - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); - } + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif + } + + /* The user buffer could already have been previously pinned before + * entering this function, and hence there could potentially be CPU + * mappings of it + */ + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); while (++i < pinned_pages) { put_page(pages[i]); @@ -4438,28 +4543,119 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT * have a corresponding call to kbase_jd_user_buf_unpin_pages(). */ -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable) { long i; struct page **pages; + unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; unsigned long size = alloc->imported.user_buf.size; + lockdep_assert_held(&kctx->reg_lock); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); pages = alloc->imported.user_buf.pages; + +#if !MALI_USE_CSF + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); +#else + CSTD_UNUSED(reg); +#endif + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; + unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page); + /* Notice: this is a temporary variable that is used for DMA sync + * operations, and that could be incremented by an offset if the + * current page contains both imported and non-imported memory + * sub-regions. + * + * It is valid to add an offset to this value, because the offset + * is always kept within the physically contiguous dma-mapped range + * and there's no need to translate to physical address to offset it. + * + * This variable is not going to be used for the actual DMA unmap + * operation, that shall always use the original DMA address of the + * whole memory page. + */ + + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - offset_within_page + * | |/ + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } + + /* Notice: use the original DMA address to unmap the whole memory page. */ + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + #endif + + + + if (writeable) set_page_dirty_lock(pages[i]); put_page(pages[i]); pages[i] = NULL; - size -= local_size; + size -= imported_size; } alloc->nents = 0; } @@ -4504,11 +4700,11 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, return 0; } -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm) +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm) { - int err; + int err = 0; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; lockdep_assert_held(&kctx->reg_lock); @@ -4517,14 +4713,14 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents)) - goto exit; + return -EINVAL; reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { err = kbase_jd_user_buf_map(kctx, reg); if (err) { reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto exit; + return err; } } } @@ -4532,21 +4728,29 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_UMM: { err = kbase_mem_umm_map(kctx, reg); if (err) - goto exit; + return err; break; } default: - goto exit; + WARN(1, "Invalid external resource GPU allocation type (%x) on mapping", + alloc->type); + return -EINVAL; } - return kbase_mem_phy_alloc_get(reg->gpu_alloc); -exit: - return NULL; + kbase_va_region_alloc_get(kctx, reg); + kbase_mem_phy_alloc_get(alloc); + return err; } -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) { + /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the + * unmapping operation. + */ + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + + lockdep_assert_held(&kctx->reg_lock); + switch (alloc->type) { case KBASE_MEM_TYPE_IMPORTED_UMM: { kbase_mem_umm_unmap(kctx, reg, alloc); @@ -4558,26 +4762,32 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, if (0 == alloc->imported.user_buf.current_mapping_usage_count) { bool writeable = true; - if (!kbase_is_region_invalid_or_free(reg) && - reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - kbase_reg_current_backed_size(reg), - kctx->as_nr); + if (!kbase_is_region_invalid_or_free(reg)) { + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, + kbase_reg_current_backed_size(reg), + kctx->as_nr); + } - if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) + if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) writeable = false; - kbase_jd_user_buf_unmap(kctx, alloc, writeable); + kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); } } break; default: - break; + WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", + alloc->type); + return; } kbase_mem_phy_alloc_put(alloc); + kbase_va_region_alloc_put(kctx, reg); +} + +static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) +{ + return reg->start_pfn << PAGE_SHIFT; } struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( @@ -4593,7 +4803,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * metadata which matches the region which is being acquired. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { - if (walker->gpu_addr == gpu_addr) { + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { meta = walker; meta->ref++; break; @@ -4605,8 +4815,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( struct kbase_va_region *reg; /* Find the region */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) goto failed; @@ -4619,13 +4828,15 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * Fill in the metadata object and acquire a reference * for the physical resource. */ - meta->alloc = kbase_map_external_resource(kctx, reg, NULL); - meta->ref = 1; + meta->reg = reg; - if (!meta->alloc) + /* Map the external resource to the GPU allocation of the region + * and acquire the reference to the VA region + */ + if (kbase_map_external_resource(kctx, meta->reg, NULL)) goto fail_map; - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + meta->ref = 1; list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); } @@ -4650,7 +4861,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) * metadata which matches the region which is being released. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) - if (walker->gpu_addr == gpu_addr) + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) return walker; return NULL; @@ -4659,14 +4870,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) static void release_sticky_resource_meta(struct kbase_context *kctx, struct kbase_ctx_ext_res_meta *meta) { - struct kbase_va_region *reg; - - /* Drop the physical memory reference and free the metadata. */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, - meta->gpu_addr); - - kbase_unmap_external_resource(kctx, reg, meta->alloc); + kbase_unmap_external_resource(kctx, meta->reg); list_del(&meta->ext_res_node); kfree(meta); } diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h index c228661ff59b..d17a94a4d780 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -805,7 +805,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * @pages: Pointer to array where the physical address of the allocated * pages will be stored. * @partial_allowed: If fewer pages allocated is allowed - * + * @page_owner: Pointer to the task that created the Kbase context for which + * the pages are being allocated. It can be NULL if the pages + * won't be associated with any Kbase context. * Like kbase_mem_pool_alloc() but optimized for allocating many pages. * * Return: @@ -821,7 +823,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. */ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed); + struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner); /** * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool @@ -933,13 +935,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); * kbase_mem_pool_grow - Grow the pool * @pool: Memory pool to grow * @nr_to_grow: Number of pages to add to the pool - * + * @page_owner: Pointer to the task that created the Kbase context for which + * the memory pool is being grown. It can be NULL if the pages + * to be allocated won't be associated with any Kbase context. * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to * become larger than the maximum size specified. * * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages */ -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner); /** * kbase_mem_pool_trim - Grow or shrink the pool to a new size @@ -1096,7 +1100,9 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size * * Call kbase_add_va_region() and map the region on the GPU. */ -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align, + enum kbase_caller_mmu_sync_info mmu_sync_info); /** * @brief Remove the region from the GPU and unregister it. @@ -1148,6 +1154,7 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); +#if defined(CONFIG_MALI_VECTOR_DUMP) /** Dump the MMU tables to a buffer * * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the @@ -1164,7 +1171,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); * small) */ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); - +#endif /** * kbase_sync_now - Perform cache maintenance on a memory region * @@ -1707,25 +1714,28 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx); /** * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. - * @reg: The region to map. + * @reg: External resource to map. * @locked_mm: The mm_struct which has been locked for this operation. * - * Return: The physical allocation which backs the region on success or NULL - * on failure. + * On successful mapping, the VA region and the gpu_alloc refcounts will be + * increased, making it safe to use and store both values directly. + * + * Return: Zero on success, or negative error code. */ -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm); +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm); /** * kbase_unmap_external_resource - Unmap an external resource from the GPU. * @kctx: kbase context. - * @reg: The region to unmap or NULL if it has already been released. - * @alloc: The physical allocation being unmapped. + * @reg: VA region corresponding to external resource + * + * On successful unmapping, the VA region and the gpu_alloc refcounts will + * be decreased. If the refcount reaches zero, both @reg and the corresponding + * allocation may be freed, so using them after returning from this function + * requires the caller to explicitly check their state. */ -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); - +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg); /** * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer. @@ -1967,4 +1977,34 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; } +/* + * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process + */ +static inline void kbase_mem_mmgrab(void) +{ + /* This merely takes a reference on the memory descriptor structure + * i.e. mm_struct of current process and not on its address space and + * so won't block the freeing of address space on process exit. + */ +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + atomic_inc(¤t->mm->mm_count); +#else + mmgrab(current->mm); +#endif +} +/** + * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed + * @kctx: Pointer to kbase context + * + * Don't allow the allocation of GPU memory if the ioctl has been issued + * from the forked child process using the mali device file fd inherited from + * the parent process. + * + * Return: true if allocation is allowed. + */ +static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx) +{ + return (kctx->process_mm == current->mm); +} + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c index f88348e9c538..ced2b068817a 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -272,7 +273,8 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, - u64 *gpu_va) + u64 *gpu_va, + enum kbase_caller_mmu_sync_info mmu_sync_info) { int zone; struct kbase_va_region *reg; @@ -431,7 +433,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, *gpu_va = (u64) cookie; } else /* we control the VA */ { - if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) { + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1,mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); kbase_gpu_vm_unlock(kctx); goto no_mmap; @@ -796,6 +798,11 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) struct kbase_context *kctx = gpu_alloc->imported.native.kctx; int err = 0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kctx->reg_lock); mutex_lock(&kctx->jit_evict_lock); @@ -826,7 +833,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) if (!err) err = kbase_mem_grow_gpu_mapping(kctx, gpu_alloc->reg, - gpu_alloc->evicted, 0); + gpu_alloc->evicted, 0, mmu_sync_info); gpu_alloc->evicted = 0; } @@ -881,6 +888,15 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations + * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. + * This would usually include JIT allocations, Tiler heap related allocations + * & GPU queue ringbuffer and none of them needs to be explicitly marked + * as evictable by Userspace. + */ + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + /* Is the region being transitioning between not needed and needed? */ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; @@ -1182,6 +1198,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc; unsigned long gwt_mask = ~0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kctx->reg_lock); alloc = reg->gpu_alloc; @@ -1215,7 +1236,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id); + alloc->group_id, mmu_sync_info); if (err) goto bad_insert; @@ -1234,7 +1255,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, reg->nr_pages - alloc->nents, (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, - KBASE_MEM_GROUP_SINK); + KBASE_MEM_GROUP_SINK, mmu_sync_info); if (err) goto bad_pad_insert; } @@ -1242,11 +1263,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, return 0; bad_pad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - alloc->nents, - kctx->as_nr); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + alloc->nents, kctx->as_nr); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1274,11 +1292,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { int err; - err = kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - reg->nr_pages, - kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, kctx->as_nr); WARN_ON(err); } @@ -1480,6 +1495,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + struct tagged_addr *pa; + struct device *dev; int write; /* Flag supported only for dma-buf imported memory */ @@ -1621,31 +1638,48 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE reg->gpu_alloc->nents = 0; reg->extent = 0; + pa = kbase_get_gpu_phy_pages(reg); + dev = kctx->kbdev->dev; + if (pages) { - struct device *dev = kctx->kbdev->dev; - unsigned long local_size = user_buf->size; - unsigned long offset = user_buf->address & ~PAGE_MASK; - struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ + for (i = 0; i < faulted_pages; i++) { dma_addr_t dma_addr; - unsigned long min; + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + #endif - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + } reg->gpu_alloc->nents = faulted_pages; @@ -1654,10 +1688,22 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE return reg; unwind_dma_map: + + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This precautionary measure is kept here to keep this code + * aligned with kbase_jd_user_buf_map() to allow for a potential refactor + * in the future. + */ while (i--) { - dma_unmap_page(kctx->kbdev->dev, - user_buf->dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = user_buf->dma_addrs[i]; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif } fault_mismatch: if (pages) { @@ -1673,7 +1719,6 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE no_region: bad_size: return NULL; - } @@ -1686,6 +1731,11 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, size_t i; bool coherent; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(flags); KBASE_DEBUG_ASSERT(ai); @@ -1711,6 +1761,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, if (!nents) goto bad_nents; + if (stride > U64_MAX / nents) + goto bad_size; + if ((nents * stride) > (U64_MAX / PAGE_SIZE)) /* 64-bit address range is the max */ goto bad_size; @@ -1855,7 +1908,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, #else if (1) { #endif - if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { + if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1,mmu_sync_info) != 0) { dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); goto no_mmap; } @@ -1900,6 +1953,11 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, { struct kbase_va_region *reg; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(gpu_va); KBASE_DEBUG_ASSERT(va_pages); @@ -1932,7 +1990,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - + if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { + dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); + goto bad_flags; + } if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { dev_warn(kctx->kbdev->dev, "padding is only supported for UMM"); @@ -1999,7 +2060,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { /* we control the VA, mmap now to the GPU */ - if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) + if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) != 0) goto no_gpu_va; /* return real GPU VA */ *gpu_va = reg->start_pfn << PAGE_SHIFT; @@ -2034,7 +2095,8 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) + u64 new_pages, u64 old_pages, + enum kbase_caller_mmu_sync_info mmu_sync_info) { struct tagged_addr *phy_pages; u64 delta = new_pages - old_pages; @@ -2046,7 +2108,8 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, phy_pages = kbase_get_gpu_phy_pages(reg); ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, phy_pages + old_pages, delta, - reg->flags, kctx->as_nr, reg->gpu_alloc->group_id); + reg->flags, kctx->as_nr, reg->gpu_alloc->group_id, + mmu_sync_info); return ret; } @@ -2084,10 +2147,11 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, u64 const new_pages, u64 const old_pages) { u64 delta = old_pages - new_pages; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; int ret = 0; - ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + new_pages, delta, kctx->as_nr); + ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, + alloc->pages + new_pages, delta, kctx->as_nr); return ret; } @@ -2100,6 +2164,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) struct kbase_va_region *reg; bool read_locked = false; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(gpu_addr != 0); @@ -2147,6 +2216,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (reg->flags & KBASE_REG_DONT_NEED) goto out_unlock; + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED /* Reject resizing commit size */ if (reg->flags & KBASE_REG_PF_GROW) @@ -2189,7 +2261,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) /* Update GPU mapping. */ res = kbase_mem_grow_gpu_mapping(kctx, reg, - new_pages, old_pages); + new_pages, old_pages, mmu_sync_info); /* On error free the new pages */ if (res) { @@ -2521,7 +2593,6 @@ static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) while (kbase_jit_evict(kctx)) ; } -#endif static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, @@ -2538,9 +2609,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; -#ifdef CONFIG_MALI_VECTOR_DUMP kbase_free_unused_jit_allocations(kctx); -#endif kaddr = kbase_mmu_dump(kctx, nr_pages); @@ -2588,7 +2657,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, out: return err; } - +#endif void kbase_os_mem_map_lock(struct kbase_context *kctx) { @@ -2614,6 +2683,10 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, struct kbase_va_region *reg; int err = 0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; *aligned_offset = 0; dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); @@ -2647,7 +2720,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, *nr_pages = kbase_reg_current_backed_size(reg); if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, - reg->nr_pages, 1) != 0) { + reg->nr_pages, 1, mmu_sync_info) != 0) { dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); /* Unable to map in GPU space. */ WARN_ON(1); @@ -2726,6 +2799,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = -EINVAL; goto out_unlock; case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): +#if defined(CONFIG_MALI_VECTOR_DUMP) /* MMU dump */ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); if (0 != err) @@ -2733,6 +2807,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, /* free the region on munmap */ free_on_close = 1; break; +#else + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, @@ -2807,13 +2886,13 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); - +#if defined(CONFIG_MALI_VECTOR_DUMP) if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on * the pages, so we can now free the kernel mapping */ vfree(kaddr); } - +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: kbase_gpu_vm_unlock(kctx); out: @@ -2948,6 +3027,10 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto out_unlock; + + /* check access permissions can be satisfied * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ @@ -3029,73 +3112,23 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { - struct mm_struct *mm; - - rcu_read_lock(); - mm = rcu_dereference(kctx->process_mm); - if (mm) { - atomic_add(pages, &kctx->nonmapped_pages); + struct mm_struct *mm = kctx->process_mm; + if (unlikely(!mm)) + return; + atomic_add(pages, &kctx->nonmapped_pages); #ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); -#else - spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); - spin_unlock(&mm->page_table_lock); -#endif - } - rcu_read_unlock(); -} - -static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) -{ - int pages; - struct mm_struct *mm; - - spin_lock(&kctx->mm_update_lock); - mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); - if (!mm) { - spin_unlock(&kctx->mm_update_lock); - return; - } - - rcu_assign_pointer(kctx->process_mm, NULL); - spin_unlock(&kctx->mm_update_lock); - synchronize_rcu(); - - pages = atomic_xchg(&kctx->nonmapped_pages, 0); -#ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); #else spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); spin_unlock(&mm->page_table_lock); #endif } -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx; - - kctx = vma->vm_private_data; - kbasep_os_process_page_usage_drain(kctx); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .close = kbase_special_vm_close, -}; - static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { - /* check that this is the only tracking page */ - spin_lock(&kctx->mm_update_lock); - if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { - spin_unlock(&kctx->mm_update_lock); - return -EFAULT; - } - - rcu_assign_pointer(kctx->process_mm, current->mm); - - spin_unlock(&kctx->mm_update_lock); + if (vma_pages(vma) != 1) + return -EINVAL; /* no real access */ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); @@ -3104,9 +3137,7 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_ #else vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; #endif - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; - return 0; + return 0; } diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h index 0febd3059e7c..43ceb7db5c35 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h @@ -57,7 +57,7 @@ struct kbase_mem_phy_alloc; */ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, - u64 *gpu_va); + u64 *gpu_va, enum kbase_caller_mmu_sync_info mmu_sync_info); /** * kbase_mem_query - Query properties of a GPU memory region @@ -186,7 +186,8 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx); */ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); + u64 new_pages, u64 old_pages, + enum kbase_caller_mmu_sync_info mmu_sync_info); /** * kbase_mem_evictable_make - Make a physical allocation eligible for eviction diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h index 70116030f233..2e408b193842 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h @@ -53,6 +53,8 @@ struct tagged_addr { phys_addr_t tagged_addr; }; #define HUGE_HEAD (1u << 1) #define FROM_PARTIAL (1u << 2) +#define NUM_4K_PAGES_IN_2MB_PAGE (SZ_2M / SZ_4K) + /* * Note: if macro for converting physical address to page is not defined * in the kernel itself, it is defined hereby. This is to avoid build errors @@ -163,4 +165,20 @@ static inline bool is_partial(struct tagged_addr t) return t.tagged_addr & FROM_PARTIAL; } +/** + * index_in_large_page() - Get index of a 4KB page within a 2MB page which + * wasn't split to be used partially. + * + * @t: Tagged physical address of the physical 4KB page that lies within + * the large (or 2 MB) physical page. + * + * Return: Index of the 4KB page within a 2MB page + */ +static inline unsigned int index_in_large_page(struct tagged_addr t) +{ + WARN_ON(!is_huge(t)); + + return (PFN_DOWN(as_phys_addr_t(t)) & (NUM_4K_PAGES_IN_2MB_PAGE - 1)); +} + #endif /* _KBASE_LOWLEVEL_H */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c index 0723e32e2003..e6cb24e508ae 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,11 @@ #include #include #include - +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #define pool_dbg(pool, format, ...) \ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ (pool->next_pool) ? "kctx" : "kbdev", \ @@ -38,6 +42,46 @@ #define NOT_DIRTY false #define NOT_RECLAIMED false +/** +* can_alloc_page() - Check if the current thread can allocate a physical page +* +* @pool: Pointer to the memory pool. +* @page_owner: Pointer to the task/process that created the Kbase context +* for which a page needs to be allocated. It can be NULL if +* the page won't be associated with Kbase context. +* @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. +* +* This function checks if the current thread is a kernel thread and can make a +* request to kernel to allocate a physical page. If the kernel thread is allocating +* a page for the Kbase context and the process that created the context is exiting +* or is being killed, then there is no point in doing a page allocation. +* +* The check done by the function is particularly helpful when the system is running +* low on memory. When a page is allocated from the context of a kernel thread, OoM +* killer doesn't consider the kernel thread for killing and kernel keeps retrying +* to allocate the page as long as the OoM killer is able to kill processes. +* The check allows kernel thread to quickly exit the page allocation loop once OoM +* killer has initiated the killing of @page_owner, thereby unblocking the context +* termination for @page_owner and freeing of GPU memory allocated by it. This helps +* in preventing the kernel panic and also limits the number of innocent processes +* that get killed. +* +* Return: true if the page can be allocated otherwise false. +*/ +static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner, + const bool alloc_from_kthread) +{ + if (likely(!alloc_from_kthread || !page_owner)) + return true; + + if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) { + dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm, + task_pid_nr(page_owner)); + return false; + } + + return true; +} static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) { @@ -241,11 +285,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, } int kbase_mem_pool_grow(struct kbase_mem_pool *pool, - size_t nr_to_grow) + size_t nr_to_grow, struct task_struct *page_owner) { struct page *p; size_t i; - + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); kbase_mem_pool_lock(pool); pool->dont_reclaim = true; @@ -258,6 +302,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, return -ENOMEM; } kbase_mem_pool_unlock(pool); + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + return -ENOMEM; p = kbase_mem_alloc_page(pool); if (!p) { @@ -290,7 +336,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) if (new_size < cur_size) kbase_mem_pool_shrink(pool, cur_size - new_size); else if (new_size > cur_size) - err = kbase_mem_pool_grow(pool, new_size - cur_size); + err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL); if (err) { size_t grown_size = kbase_mem_pool_size(pool); @@ -553,13 +599,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, } int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed) + struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner) { struct page *p; size_t nr_from_pool; size_t i = 0; int err = -ENOMEM; size_t nr_pages_internal; + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); nr_pages_internal = nr_4k_pages / (1u << (pool->order)); @@ -591,7 +638,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, if (i != nr_4k_pages && pool->next_pool) { /* Allocate via next pool */ err = kbase_mem_pool_alloc_pages(pool->next_pool, - nr_4k_pages - i, pages + i, partial_allowed); + nr_4k_pages - i, pages + i, partial_allowed,page_owner); if (err < 0) goto err_rollback; @@ -600,6 +647,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, } else { /* Get any remaining pages from kernel */ while (i != nr_4k_pages) { + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + goto err_rollback; + p = kbase_mem_alloc_page(pool); if (!p) { if (partial_allowed) diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c index 127d8aeeeae8..e3286a328a4b 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c @@ -99,7 +99,8 @@ static int kbasep_read_soft_event_status( unsigned char *mapped_evt; struct kbase_vmap_struct map; - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), + KBASE_REG_CPU_RD, &map); if (!mapped_evt) return -EFAULT; @@ -120,7 +121,8 @@ static int kbasep_write_soft_event_status( (new_status != BASE_JD_SOFT_EVENT_RESET)) return -EINVAL; - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), + KBASE_REG_CPU_WR, &map); if (!mapped_evt) return -EFAULT; @@ -278,8 +280,6 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) struct device *dev = kctx->kbdev->dev; int i; - dev_warn(dev,"kbase_fence_debug_check_atom\n"); - for (i = 0; i < 2; i++) { struct kbase_jd_atom *dep; @@ -506,6 +506,7 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) kbase_js_sched_all(katom->kctx->kbdev); } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -762,8 +763,18 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, } #endif -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data) +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) { unsigned int i; unsigned int target_page_nr = 0; @@ -875,7 +886,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) return 0; } - +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) int kbasep_jit_alloc_validate(struct kbase_context *kctx, @@ -972,6 +983,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) jit_info_copy_size_for_jit_version[kctx->jit_version]; WARN_ON(jit_info_user_copy_size > sizeof(*info)); + if (!kbase_mem_allow_alloc(kctx)) { + dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", + current->comm, current->pid, kctx->tgid, kctx->id); + ret = -EINVAL; + goto fail; + } + /* For backwards compatibility, and to prevent reading more than 1 jit * info struct on jit version 1 */ @@ -1207,8 +1225,8 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) * Write the address of the JIT allocation to the user provided * GPU allocation. */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); + ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), + KBASE_REG_CPU_WR, &mapping); if (!ptr) { /* * Leave the allocations "live" as the JIT free atom @@ -1506,10 +1524,11 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr)) goto failed_loop; - } else + } else { if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr)) failed = true; + } } /* @@ -1598,6 +1617,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) case BASE_JD_REQ_SOFT_EVENT_RESET: kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: { int res = kbase_debug_copy(katom); @@ -1606,6 +1626,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) katom->event_code = BASE_JD_EVENT_JOB_INVALID; break; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: ret = kbase_jit_allocate_process(katom); break; @@ -1722,8 +1743,10 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) if (katom->jc == 0) return -EINVAL; break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: return kbase_debug_copy_prepare(katom); +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_EXT_RES_MAP: return kbase_ext_res_prepare(katom); case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: @@ -1755,9 +1778,11 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) kbase_sync_fence_in_remove(katom); break; #endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: kbase_debug_copy_finish(katom); break; +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: kbase_jit_allocate_finish(katom); break; diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c index 75940fb08a05..b8575dba4e96 100644 --- a/drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c @@ -263,12 +263,14 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); if (fd < 0) { sync_fence_put(fence); + katom->fence = NULL; goto out; } #else fd = get_unused_fd(); if (fd < 0) { sync_fence_put(fence); + katom->fence = NULL; goto out; } @@ -283,13 +285,18 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) spin_unlock(&files->file_lock); #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + /* Take an extra reference count on the created fence file */ + get_file(fence->file); /* bind fence to the new fd */ sync_fence_install(fence, fd); - katom->fence = sync_fence_fdget(fd); - if (katom->fence == NULL) { - /* The only way the fence can be NULL is if userspace closed it - * for us, so we don't need to clear it up */ + /* Drop the extra reference count */ + fput(fence->file); + + if (katom->fence != fence) { + if (katom->fence) + sync_fence_put(katom->fence); + katom->fence = NULL; fd = -EINVAL; goto out; } diff --git a/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c index cc233c993b10..321dd6d08d22 100644 --- a/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c @@ -65,15 +65,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, u32 const exception_data = (status >> 8) & 0xFFFFFF; int const as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at VA 0x%016llX\n" + "GPU bus fault in AS%d at VA %pK\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "exception data 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, status, exception_type, kbase_gpu_exception_name(exception_type), exception_data, diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c index a919e0302a66..54102048c921 100644 --- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,44 +46,287 @@ #include #include -#define KBASE_MMU_PAGE_ENTRIES 512 +/* Threshold used to decide whether to flush full caches or just a physical range */ +#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20 +#define MGM_DEFAULT_PTE_GROUP (0) /* MALI_SEC_INTEGRATION */ #include +/* Macro to convert updated PDGs to flags indicating levels skip in flush */ +#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF) + +/* Small wrapper function to factor out GPU-dependent context releasing */ +static void release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +#if MALI_USE_CSF + CSTD_UNUSED(kbdev); + kbase_ctx_sched_release_ctx_lock(kctx); +#else /* MALI_USE_CSF */ + kbasep_js_runpool_release_ctx(kbdev, kctx); +#endif /* MALI_USE_CSF */ +} + +static void mmu_hw_operation_begin(struct kbase_device *kbdev) +{ +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) +#if MALI_USE_CSF + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { + unsigned long flags; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON_ONCE(kbdev->mmu_hw_operation_in_progress); + kbdev->mmu_hw_operation_in_progress = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif /* MALI_USE_CSF */ +#endif /* !CONFIG_MALI_NO_MALI */ +} + +static void mmu_hw_operation_end(struct kbase_device *kbdev) +{ +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) +#if MALI_USE_CSF + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { + unsigned long flags; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON_ONCE(!kbdev->mmu_hw_operation_in_progress); + kbdev->mmu_hw_operation_in_progress = false; + /* Invoke the PM state machine, the L2 power off may have been + * skipped due to the MMU command. + */ + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif /* MALI_USE_CSF */ +#endif /* !CONFIG_MALI_NO_MALI */ +} + /** - * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. + * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done + * through GPU_CONTROL interface + * @kbdev: kbase device to check GPU model ID on. * - * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. + * This function returns whether a cache flush for page table update should + * run through GPU_CONTROL interface or MMU_AS_CONTROL interface. + * + * Return: True if cache flush should be done on GPU command. + */ +static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) +{ + uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id & + GPU_ID2_ARCH_MAJOR) >> + GPU_ID2_ARCH_MAJOR_SHIFT; + + return arch_maj_cur > 11; +} + +/** + * mmu_flush_pa_range() - Flush physical address range * - * If sync is not set then transactions still in flight when the flush is issued - * may use the old page tables and the data they write will not be written out - * to memory, this function returns after the flush has been issued but - * before all accesses which might effect the flushed region have completed. + * @kbdev: kbase device to issue the MMU operation on. + * @phys: Starting address of the physical range to start the operation on. + * @nr_bytes: Number of bytes to work on. + * @op: Type of cache flush operation to perform. * - * If sync is set then accesses in the flushed region will be drained - * before data is flush and invalidated through L1, L2 and into memory, - * after which point this function will return. + * Issue a cache flush physical range command. */ -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync); /** - * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches. - * @kbdev: Device pointer. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. - * @as_nr: GPU address space number for which flush + invalidate is required. + * mmu_invalidate() - Perform an invalidate operation on MMU caches. + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. + * + * Perform an MMU invalidate operation on a particual address space + * by issuing a UNLOCK command. + */ +static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, + const struct kbase_mmu_hw_op_param *op_param) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + as_nr = kctx ? kctx->as_nr : as_nr; + err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/* Perform a flush/invalidate on a particular address space + */ +static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int err; + bool gpu_powered; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + gpu_powered = kbdev->pm.backend.gpu_powered; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* GPU is off so there's no need to perform flush/invalidate. + * But even if GPU is not actually powered down, after gpu_powered flag + * was set to false, it is still safe to skip the flush/invalidate. + * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE + * which is sent when address spaces are restored after gpu_powered flag + * is set to true. Flushing of L2 cache is certainly not required as L2 + * cache is definitely off if gpu_powered is false. + */ + if (!gpu_powered) + return; + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* GPU has just been powered off due to system suspend. + * So again, no need to perform flush/invalidate. + */ + return; + } + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_flush(kbdev, as, op_param); + mmu_hw_operation_end(kbdev); + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover. + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); + + if (kbase_prepare_to_reset_gpu( + kbdev)) + kbase_reset_gpu(kbdev); + } + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + kbase_pm_context_idle(kbdev); +} + +/** + * mmu_flush_invalidate() - Perform a flush operation on GPU caches. + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which flush + invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. + * + * This function performs the cache flush operation described by @op_param. + * The function retains a reference to the given @kctx and releases it + * after performing the flush operation. + * + * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue + * a cache flush + invalidate to the L2 caches and invalidate the TLBs. + * + * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue + * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as + * invalidating the TLBs. + * + * If operation is set to KBASE_MMU_OP_UNLOCK then this function will only + * invalidate the MMU caches and TLBs. + */ +static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, + const struct kbase_mmu_hw_op_param *op_param) +{ + bool ctx_is_in_runpool; + + /* Early out if there is nothing to do */ + if (op_param->nr == 0) + return; + + /* If no context is provided then MMU operation is performed on address + * space which does not belong to user space context. Otherwise, retain + * refcount to context provided and release after flush operation. + */ + if (!kctx) { + mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param); + } else { +#if !MALI_USE_CSF + mutex_lock(&kbdev->js_data.queue_mutex); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); + mutex_unlock(&kbdev->js_data.queue_mutex); +#else + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); +#endif /* !MALI_USE_CSF */ + + if (ctx_is_in_runpool) { + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param); + + release_ctx(kbdev, kctx); + } + } +} + + +/** + * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via + * the GPU_CONTROL interface + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which flush + invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. * - * This is used for MMU tables which do not belong to a user space context. + * Perform a flush/invalidate on a particular address space via the GPU_CONTROL + * interface. */ -static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, - u64 vpfn, size_t nr, bool sync, int as_nr); +static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, + int as_nr, const struct kbase_mmu_hw_op_param *op_param) +{ + int err = 0; + unsigned long flags; + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + as_nr = kctx ? kctx->as_nr : as_nr; + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], + op_param); + } + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover. + */ + dev_err(kbdev->dev, + "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); +} /** * kbase_mmu_sync_pgd - sync page directory to memory @@ -113,10 +356,88 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, * - ATE: Address Translation Entry. A 64bit value pointing to * a 4kB physical page. */ - static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id); + struct tagged_addr *phys, size_t nr, unsigned long flags, + int group_id, u64 *dirty_pgds); +/** + * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and + * free memory of the page directories + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @pgds: Physical addresses of page directories to be freed. + * @vpfn: The virtual page frame number. + * @level: The level of MMU page table. + * @flush_op: The type of MMU flush operation to perform. + * @dirty_pgds: Flags to track every level where a PGD has been updated. + * @free_pgds_list: Linked list of the page directory pages to free. + */ +static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t *pgds, + u64 vpfn, int level, + u64 *dirty_pgds, + struct list_head *free_pgds_list); +/** + * kbase_mmu_free_pgd() - Free memory of the page directory + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @pgd: Physical address of page directory to be freed. + * + * This function is supposed to be called with mmu_lock held and after + * ensuring that GPU won't be able to access the page. +*/ +static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t pgd) +{ + struct page *p; + + lockdep_assert_held(&mmut->mmu_lock); + + p = pfn_to_page(PFN_DOWN(pgd)); + + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +/** + * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @free_pgds_list: Linked list of the page directory pages to free. + * + * This function will call kbase_mmu_free_pgd() on each page directory page + * present in the @free_pgds_list. + * + * The function is supposed to be called after the GPU cache and MMU TLB has + * been invalidated post the teardown loop. + */ +static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct list_head *free_pgds_list) +{ + struct page *page, *next_page; + + mutex_lock(&mmut->mmu_lock); + + list_for_each_entry_safe(page, next_page, free_pgds_list, lru) { + list_del_init(&page->lru); + kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page)); + } + + mutex_unlock(&mmut->mmu_lock); +} /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to @@ -213,17 +534,37 @@ static void kbase_gpu_mmu_handle_write_faulting_as( KBASE_MMU_FAULT_TYPE_PAGE); } +static void set_gwt_element_page_addr_and_size( + struct kbasep_gwt_list_element *element, + u64 fault_page_addr, struct tagged_addr fault_phys) +{ + u64 fault_pfn = fault_page_addr >> PAGE_SHIFT; + unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1); + + /* If the fault address lies within a 2MB page, then consider + * the whole 2MB page for dumping to avoid incomplete dumps. + */ + if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) { + element->page_addr = fault_page_addr & ~(SZ_2M - 1); + element->num_pages = NUM_4K_PAGES_IN_2MB_PAGE; + } else { + element->page_addr = fault_page_addr; + element->num_pages = 1; + } +} + static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, struct kbase_as *faulting_as) { struct kbasep_gwt_list_element *pos; struct kbase_va_region *region; struct kbase_device *kbdev; + struct tagged_addr *fault_phys_addr; struct kbase_fault *fault; u64 fault_pfn, pfn_offset; - u32 op; int ret; int as_no; + u64 dirty_pgds = 0; as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); @@ -251,15 +592,18 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, return; } + pfn_offset = fault_pfn - region->start_pfn; + fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset]; + /* Capture addresses of faulting write location * for job dumping if write tracking is enabled. */ if (kctx->gwt_enabled) { - u64 page_addr = fault->addr & PAGE_MASK; + u64 fault_page_addr = fault->addr & PAGE_MASK; bool found = false; /* Check if this write was already handled. */ list_for_each_entry(pos, &kctx->gwt_current_list, link) { - if (page_addr == pos->page_addr) { + if (fault_page_addr == pos->page_addr) { found = true; break; } @@ -269,8 +613,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, pos = kmalloc(sizeof(*pos), GFP_KERNEL); if (pos) { pos->region = region; - pos->page_addr = page_addr; - pos->num_pages = 1; + set_gwt_element_page_addr_and_size(pos, + fault_page_addr, *fault_phys_addr); list_add(&pos->link, &kctx->gwt_current_list); } else { dev_warn(kbdev->dev, "kmalloc failure"); @@ -278,17 +622,12 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, } } - pfn_offset = fault_pfn - region->start_pfn; /* Now make this faulting page writable to GPU. */ - ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - 1, region->flags, region->gpu_alloc->group_id); + ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags, + region->gpu_alloc->group_id, &dirty_pgds); - /* flush L2 and unlock the VA (resumes the MMU) */ - op = AS_COMMAND_FLUSH_PT; - - kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, - fault_pfn, 1, op); + kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, + kctx->id, dirty_pgds); kbase_gpu_vm_unlock(kctx); } @@ -523,13 +862,6 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, return true; } -/* Small wrapper function to factor out GPU-dependent context releasing */ -static void release_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - kbasep_js_runpool_release_ctx(kbdev, kctx); -} - void page_fault_worker(struct work_struct *data) { u64 fault_pfn; @@ -553,6 +885,11 @@ void page_fault_worker(struct work_struct *data) size_t pages_trimmed = 0; #endif + /* Calls to this function are inherently synchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; + faulting_as = container_of(data, struct kbase_as, work_pagefault); fault = &faulting_as->pf_data; fault_pfn = fault->addr >> PAGE_SHIFT; @@ -579,7 +916,9 @@ void page_fault_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); #if MALI_JIT_PRESSURE_LIMIT_BASE +#if !MALI_USE_CSF mutex_lock(&kctx->jctx.lock); +#endif #endif if (unlikely(fault->protected_mode)) { @@ -625,21 +964,13 @@ void page_fault_worker(struct work_struct *data) goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Address size fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory attributes fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); goto fault_done; default: @@ -705,6 +1036,10 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } + if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == + AS_FAULTSTATUS_ACCESS_TYPE_READ) + dev_warn(kbdev->dev, "Grow on pagefault while reading"); + /* find the size we need to grow it by * we know the result fit in a size_t due to * kbase_region_tracker_find_region_enclosing_address @@ -715,6 +1050,8 @@ void page_fault_worker(struct work_struct *data) current_backed_size = kbase_reg_current_backed_size(region); if (fault_rel_pfn < current_backed_size) { + struct kbase_mmu_hw_op_param op_param; + dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", fault->addr, region->start_pfn, @@ -733,8 +1070,29 @@ void page_fault_worker(struct work_struct *data) * transaction (which should cause the other page fault to be * raised again). */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, - AS_COMMAND_UNLOCK, 1); + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = kctx->id; + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } else { + /* Can safely skip the invalidate for all levels in case + * of duplicate page faults. + */ + op_param.flush_skip_levels = 0xF; + op_param.vpfn = fault_pfn; + op_param.nr = 1; + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); @@ -753,14 +1111,38 @@ void page_fault_worker(struct work_struct *data) new_pages); if (new_pages == 0) { + struct kbase_mmu_hw_op_param op_param; + mutex_lock(&kbdev->mmu_hw_mutex); /* Duplicate of a fault we've already handled, nothing to do */ kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); + /* See comment [1] about UNLOCK usage */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, - AS_COMMAND_UNLOCK, 1); + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = kctx->id; + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } else { + /* Can safely skip the invalidate for all levels in case + * of duplicate page faults. + */ + op_param.flush_skip_levels = 0xF; + op_param.vpfn = fault_pfn; + op_param.nr = 1; + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); @@ -785,8 +1167,9 @@ void page_fault_worker(struct work_struct *data) spin_unlock(&kctx->mem_partials_lock); if (grown) { + u64 dirty_pgds = 0; u64 pfn_offset; - u32 op; + struct kbase_mmu_hw_op_param op_param; /* alloc success */ WARN_ON(kbase_reg_current_backed_size(region) > @@ -805,7 +1188,8 @@ void page_fault_worker(struct work_struct *data) err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset, &kbase_get_gpu_phy_pages(region)[pfn_offset], - new_pages, region->flags, region->gpu_alloc->group_id); + new_pages, region->flags, + region->gpu_alloc->group_id, &dirty_pgds); if (err) { kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); @@ -849,9 +1233,6 @@ void page_fault_worker(struct work_struct *data) /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); - /* flush L2 and unlock the VA (resumes the MMU) */ - op = AS_COMMAND_FLUSH_PT; - /* clear MMU interrupt - this needs to be done after updating * the page tables but before issuing a FLUSH command. The * FLUSH cmd has a side effect that it restarts stalled memory @@ -863,9 +1244,30 @@ void page_fault_worker(struct work_struct *data) kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); - kbase_mmu_hw_do_operation(kbdev, faulting_as, - fault->addr >> PAGE_SHIFT, - new_pages, op, 1); + op_param.vpfn = region->start_pfn + pfn_offset; + op_param.nr = new_pages; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + /* Unlock to invalidate the TLB (and resume the MMU) */ + op_param.flush_skip_levels = + pgd_level_to_skip_flush(dirty_pgds); + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } else { + /* flush L2 and unlock the VA (resumes the MMU) */ + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_flush(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } + + if (err) { + dev_err(kbdev->dev, + "Flush for GPU page table update did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ @@ -903,6 +1305,7 @@ void page_fault_worker(struct work_struct *data) kbase_gpu_vm_unlock(kctx); } else { int ret = -ENOMEM; + const u8 group_id = region->gpu_alloc->group_id; kbase_gpu_vm_unlock(kctx); @@ -914,23 +1317,21 @@ void page_fault_worker(struct work_struct *data) if (grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = - &kctx->mem_pools.large[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.large[group_id]; pages_to_grow = (pages_to_grow + ((1 << lp_mem_pool->order) - 1)) >> lp_mem_pool->order; ret = kbase_mem_pool_grow(lp_mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); } else { #endif struct kbase_mem_pool *const mem_pool = - &kctx->mem_pools.small[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.small[group_id]; ret = kbase_mem_pool_grow(mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); #ifdef CONFIG_MALI_2MB_ALLOC } #endif @@ -972,8 +1373,9 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { u64 *page; - int i; + struct page *p; + phys_addr_t pgd; p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); if (!p) @@ -983,6 +1385,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, if (page == NULL) goto alloc_free; + pgd = page_to_phys(p); + /* If the MMU tables belong to a context then account the memory usage * to that context, otherwise the MMU tables are device wide and are * only accounted to the device. @@ -1003,13 +1407,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) - kbdev->mmu_mode->entry_invalidate(&page[i]); + kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); kunmap(p); - return page_to_phys(p); + return pgd; alloc_free: kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, @@ -1021,9 +1424,9 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, /* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the * new table from the pool if needed and possible */ -static int mmu_get_next_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - phys_addr_t *pgd, u64 vpfn, int level) +static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd, + u64 *dirty_pgds) { u64 *page; phys_addr_t target_pgd; @@ -1047,9 +1450,13 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, return -EINVAL; } - target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + target_pgd = kbdev->mmu_mode->pte_to_phy_addr( + page[vpfn]); if (!target_pgd) { + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; + unsigned int current_valid_entries; + u64 managed_pte; target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); if (!target_pgd) { dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", @@ -1058,10 +1465,31 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, return -ENOMEM; } - kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); + current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(page); + kbdev->mmu_mode->entry_set_pte(&managed_pte, target_pgd); + page[vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); + kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1); - kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); /* Rely on the caller to update the address space flags. */ + if (newly_created_pgd && !*newly_created_pgd) { + *newly_created_pgd = true; + /* If code reaches here we know parent PGD of target PGD was + * not newly created and should be flushed. + */ + flush_op = KBASE_MMU_OP_FLUSH_PT; + + if (dirty_pgds) + *dirty_pgds |= 1ULL << level; + } + + /* MMU cache flush strategy is FLUSH_PT because a new entry is added + * to an existing PGD which may be stored in GPU caches and needs a + * "clean" operation. An "invalidation" operation is not required here + * as this entry points to a new page and cannot be present in GPU + * caches. + */ + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); } kunmap(p); @@ -1073,11 +1501,9 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, /* * Returns the PGD for the specified level of translation */ -static int mmu_get_pgd_at_level(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - int level, - phys_addr_t *out_pgd) +static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + int level, phys_addr_t *out_pgd, bool *newly_created_pgd, + u64 *dirty_pgds) { phys_addr_t pgd; int l; @@ -1086,7 +1512,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, pgd = mmut->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { - int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + int err = + mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); /* Handle failure condition */ if (err) { dev_dbg(kbdev->dev, @@ -1101,20 +1528,18 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, return 0; } -static int mmu_get_bottom_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - phys_addr_t *out_pgd) +static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds) { - return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, - out_pgd); + return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd, + newly_created_pgd, dirty_pgds); } static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 from_vpfn, u64 to_vpfn) + struct kbase_mmu_table *mmut, u64 from_vpfn, + u64 to_vpfn, u64 *dirty_pgds, + struct list_head *free_pgds_list) { - phys_addr_t pgd; u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; @@ -1127,28 +1552,33 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode = kbdev->mmu_mode; while (vpfn < to_vpfn) { - unsigned int i; + unsigned int idx = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; unsigned int pcount = 0; unsigned int left = to_vpfn - vpfn; int level; u64 *page; + phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); + + register unsigned int num_of_valid_entries; if (count > left) count = left; /* need to check if this is a 2MB page or a 4kB */ - pgd = mmut->pgd; - for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); + pgds[level] = pgd; + page = kmap(p); if (mmu_mode->ate_is_valid(page[idx], level)) break; /* keep the mapping */ - kunmap(phys_to_page(pgd)); + kunmap(p); pgd = mmu_mode->pte_to_phy_addr(page[idx]); + p = phys_to_page(pgd); } switch (level) { @@ -1166,26 +1596,82 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, goto next; } + if (dirty_pgds && pcount > 0) + *dirty_pgds |= 1ULL << level; + + num_of_valid_entries = mmu_mode->get_num_valid_entries(page); + if (WARN_ON_ONCE(num_of_valid_entries < pcount)) + num_of_valid_entries = 0; + else + num_of_valid_entries -= pcount; + /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[idx + i]); + mmu_mode->entries_invalidate(&page[idx], pcount); + if (!num_of_valid_entries) { + kunmap(p); + + list_add(&p->lru, free_pgds_list); + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, + dirty_pgds, + free_pgds_list); + vpfn += count; + continue; + } + + mmu_mode->set_num_valid_entries(page, num_of_valid_entries); kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, 8 * pcount); - kunmap(phys_to_page(pgd)); + kunmap(p); next: vpfn += count; } } +static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, const u64 vpfn, + size_t nr, u64 dirty_pgds, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + struct kbase_mmu_hw_op_param op_param; + int as_nr = 0; + + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; + op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + +#if MALI_USE_CSF + as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; +#else + WARN_ON(!mmut->kctx); +#endif + + /* MMU cache flush strategy depends on whether GPU control commands for + * flushing physical address ranges are supported. The new physical pages + * are not present in GPU caches therefore they don't need any cache + * maintenance, but PGDs in the page table may or may not be created anew. + * + * Operations that affect the whole GPU cache shall only be done if it's + * impossible to update physical ranges. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); + else + mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); +} + /* * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' */ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int const group_id) + struct tagged_addr phys, size_t nr, + unsigned long flags, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) { phys_addr_t pgd; u64 *pgd_page; @@ -1198,6 +1684,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, size_t remain = nr; int err; struct kbase_device *kbdev; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); if (WARN_ON(kctx == NULL)) return -EINVAL; @@ -1218,6 +1706,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; + register unsigned int num_of_valid_entries; + bool newly_created_pgd = false; if (count > remain) count = remain; @@ -1230,8 +1720,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, - vpfn, &pgd); + err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd, + &dirty_pgds); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for @@ -1241,7 +1731,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, err = kbase_mem_pool_grow( &kbdev->mem_pools.small[ kctx->mmu.group_id], - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL,kctx->task); mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { @@ -1250,10 +1740,9 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - &kctx->mmu, - start_vpfn, - start_vpfn + recover_count); + mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, + start_vpfn + recover_count, + &dirty_pgds, &free_pgds_list); } goto fail_unlock; } @@ -1266,15 +1755,17 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - &kctx->mmu, - start_vpfn, - start_vpfn + recover_count); + mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, + start_vpfn + recover_count, + &dirty_pgds, &free_pgds_list); } err = -ENOMEM; goto fail_unlock; } + num_of_valid_entries = + kbdev->mmu_mode->get_num_valid_entries(pgd_page); + for (i = 0; i < count; i++) { unsigned int ofs = index + i; @@ -1285,9 +1776,23 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); } + kbdev->mmu_mode->set_num_valid_entries( + pgd_page, num_of_valid_entries + count); + vpfn += count; remain -= count; + if (count > 0 && !newly_created_pgd) + dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL; + + /* MMU cache flush operation here will depend on whether bottom level + * PGD is newly created or not. + * + * If bottom level PGD is newly created then no cache maintenance is + * required as the PGD will not exist in GPU cache. Otherwise GPU cache + * maintenance is required for existing PGD. + */ + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64)); @@ -1301,12 +1806,16 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_count += count; } mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); return 0; fail_unlock: mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); + kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); return err; } @@ -1349,7 +1858,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, - int const group_id) + int const group_id, + u64 *dirty_pgds) { phys_addr_t pgd; u64 *pgd_page; @@ -1357,6 +1867,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, size_t remain = nr; int err; struct kbase_mmu_mode const *mmu_mode; + LIST_HEAD(free_pgds_list); /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ @@ -1376,6 +1887,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; struct page *p; int cur_level; + register unsigned int num_of_valid_entries; + bool newly_created_pgd = false; if (count > remain) count = remain; @@ -1393,8 +1906,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, - cur_level, &pgd); + err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd, + &newly_created_pgd, dirty_pgds); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for @@ -1403,7 +1916,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, mutex_unlock(&mmut->mmu_lock); err = kbase_mem_pool_grow( &kbdev->mem_pools.small[mmut->group_id], - cur_level); + cur_level,mmut->kctx ? mmut->kctx->task : NULL); mutex_lock(&mmut->mmu_lock); } while (!err); @@ -1414,8 +1927,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - mmut, start_vpfn, insert_vpfn); + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, + insert_vpfn, dirty_pgds, + &free_pgds_list); } goto fail_unlock; } @@ -1429,21 +1943,23 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - mmut, start_vpfn, insert_vpfn); + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, + insert_vpfn, dirty_pgds, + &free_pgds_list); } err = -ENOMEM; goto fail_unlock; } + num_of_valid_entries = + mmu_mode->get_num_valid_entries(pgd_page); + if (cur_level == MIDGARD_MMU_LEVEL(2)) { int level_index = (insert_vpfn >> 9) & 0x1FF; - u64 *target = &pgd_page[level_index]; + pgd_page[level_index] = + kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id); - if (mmu_mode->pte_is_valid(*target, cur_level)) - cleanup_empty_pte(kbdev, mmut, target); - *target = kbase_mmu_create_ate(kbdev, *phys, flags, - cur_level, group_id); + num_of_valid_entries++; } else { for (i = 0; i < count; i++) { unsigned int ofs = vindex + i; @@ -1461,8 +1977,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, *target = kbase_mmu_create_ate(kbdev, phys[i], flags, cur_level, group_id); } + num_of_valid_entries += count; } + mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + + if (dirty_pgds && count > 0 && !newly_created_pgd) + *dirty_pgds |= 1ULL << cur_level; + phys += count; insert_vpfn += count; remain -= count; @@ -1474,10 +1996,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, kunmap(p); } - err = 0; + mutex_unlock(&mmut->mmu_lock); + return 0; fail_unlock: mutex_unlock(&mmut->mmu_lock); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds, + CALLER_MMU_ASYNC); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; } @@ -1488,147 +2014,83 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int const group_id) + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) { int err; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, - phys, nr, flags, group_id); + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds); + if (err) + return err; - if (mmut->kctx) - kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false); - else - kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, - as_nr); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info); - return err; + return 0; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); /** - * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches + * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches * without retaining the kbase context. * @kctx: The KBase context. * @vpfn: The virtual page frame number to start the flush on. * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. * * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any * other locking. */ -static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) +static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr) { struct kbase_device *kbdev = kctx->kbdev; int err; - u32 op; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + struct kbase_mmu_hw_op_param op_param; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); /* Early out if there is nothing to do */ if (nr == 0) return; - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; + /* flush L2 and unlock the VA (resumes the MMU) */ + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_MEM; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + /* Value used to prevent skipping of any levels when flushing */ + op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr], + &op_param); + } else { + err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], + &op_param); + } - err = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - vpfn, nr, op, 0); if (err) { /* Flush failed to complete, assume the * GPU has hung and perform a reset to recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu_locked(kbdev)) kbase_reset_gpu_locked(kbdev); } } -/* Perform a flush/invalidate on a particular address space - */ -static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, - struct kbase_as *as, - u64 vpfn, size_t nr, bool sync) -{ - int err; - u32 op; - - if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* GPU is off so there's no need to perform flush/invalidate */ - return; - } - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; - - err = kbase_mmu_hw_do_operation(kbdev, - as, vpfn, nr, op, 0); - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - } - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - - kbase_pm_context_idle(kbdev); -} - -static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, - u64 vpfn, size_t nr, bool sync, int as_nr) -{ - /* Skip if there is nothing to do */ - if (nr) { - kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn, - nr, sync); - } -} - -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) -{ - struct kbase_device *kbdev; - bool ctx_is_in_runpool; - - /* Early out if there is nothing to do */ - if (nr == 0) - return; - - /* MALI_SEC_INTEGRATION */ -#ifdef CONFIG_MALI_RT_PM - if (!gpu_is_power_on()) - return; -#endif - - kbdev = kctx->kbdev; - mutex_lock(&kbdev->js_data.queue_mutex); - ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); - mutex_unlock(&kbdev->js_data.queue_mutex); - - if (ctx_is_in_runpool) { - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], - vpfn, nr, sync); - - release_ctx(kbdev, kctx); - } -} void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, @@ -1660,6 +2122,7 @@ void kbase_mmu_disable(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); /* * The address space is being disabled, drain all knowledge of it out @@ -1668,12 +2131,103 @@ void kbase_mmu_disable(struct kbase_context *kctx) * The job scheduler code will already be holding the locks and context * so just do the flush. */ - kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); + kbase_mmu_flush_noretain(kctx, 0, ~0); kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +#if !MALI_USE_CSF + /* + * JM GPUs has some L1 read only caches that need to be invalidated + * with START_FLUSH configuration. Purge the MMU disabled kctx from + * the slot_rb tracking field so such invalidation is performed when + * a new katom is executed on the affected slots. + */ + kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); +#endif } KBASE_EXPORT_TEST_API(kbase_mmu_disable); +static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t *pgds, + u64 vpfn, int level, + u64 *dirty_pgds, + struct list_head *free_pgds_list) +{ + int current_level; + + lockdep_assert_held(&mmut->mmu_lock); + + for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); + current_level--) { + phys_addr_t current_pgd = pgds[current_level]; + struct page *p = phys_to_page(current_pgd); + u64 *current_page = kmap(p); + unsigned int current_valid_entries = + kbdev->mmu_mode->get_num_valid_entries(current_page); + int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; + + /* We need to track every level that needs updating */ + if (dirty_pgds) + *dirty_pgds |= 1ULL << current_level; + + kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); + + if (current_valid_entries == 1 && + current_level != MIDGARD_MMU_LEVEL(0)) { + kunmap(p); + + list_add(&p->lru, free_pgds_list); + } else { + current_valid_entries--; + + kbdev->mmu_mode->set_num_valid_entries( + current_page, current_valid_entries); + kunmap(p); + + //kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), + // kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), + // flush_op); + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + + 8 * index, sizeof(u64)); + break; + } + } +} + +/** + * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. + * + * @kbdev: Pointer to kbase device. + * @kctx: Pointer to kbase context. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @phys: Array of physical pages to flush. + * @op_param: Non-NULL pointer to struct containing information about the flush + * operation to perform. + * + * This function will do one of three things: + * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the + * individual pages that were unmapped if feature is supported on GPU. + * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is + * supported on GPU or, + * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. + */ +static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, + struct kbase_context *kctx, int as_nr, + struct tagged_addr *phys, + struct kbase_mmu_hw_op_param *op_param) +{ + + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); + return; + } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { + mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param); + return; + } + +} + /* * We actually only discard the ATE, and not the page table * pages. There is a potential DoS here, as we'll leak memory by @@ -1686,44 +2240,67 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more * information. */ -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, int as_nr) { - phys_addr_t pgd; u64 start_vpfn = vpfn; size_t requested_nr = nr; + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; + struct kbase_mmu_hw_op_param op_param; int err = -EFAULT; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; if (nr == 0) { /* early out if nothing to do */ return 0; } + /* MMU cache flush strategy depends on the number of pages to unmap. In both cases + * the operation is invalidate but the granularity of cache maintenance may change + * according to the situation. + * + * If GPU control command operations are present and the number of pages is "small", + * then the optimal strategy is flushing on the physical address range of the pages + * which are affected by the operation. That implies both the PGDs which are modified + * or removed from the page table and the physical pages which are freed from memory. + * + * Otherwise, there's no alternative to invalidating the whole GPU cache. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) + flush_op = KBASE_MMU_OP_FLUSH_PT; + mutex_lock(&mmut->mmu_lock); mmu_mode = kbdev->mmu_mode; while (nr) { - unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; unsigned int pcount; int level; u64 *page; + phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + register unsigned int num_of_valid_entries; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); if (count > nr) count = nr; - /* need to check if this is a 2MB or a 4kB page */ - pgd = mmut->pgd; - + /* need to check if this is a 2MB page or a 4kB */ for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { phys_addr_t next_pgd; index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); + page = kmap(p); if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { @@ -1747,8 +2324,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, goto next; } next_pgd = mmu_mode->pte_to_phy_addr(page[index]); - kunmap(phys_to_page(pgd)); + kunmap(p); + pgds[level] = pgd; pgd = next_pgd; + p = phys_to_page(pgd); } switch (level) { @@ -1757,7 +2336,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, dev_warn(kbdev->dev, "%s: No support for ATEs at level %d\n", __func__, level); - kunmap(phys_to_page(pgd)); + kunmap(p); goto out; case MIDGARD_MMU_LEVEL(2): /* can only teardown if count >= 512 */ @@ -1783,29 +2362,56 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, continue; } + if (pcount > 0) + dirty_pgds |= 1ULL << level; + + num_of_valid_entries = mmu_mode->get_num_valid_entries(page); + if (WARN_ON_ONCE(num_of_valid_entries < pcount)) + num_of_valid_entries = 0; + else + num_of_valid_entries -= pcount; + /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[index + i]); + mmu_mode->entries_invalidate(&page[index], pcount); + + if (!num_of_valid_entries) { + kunmap(p); + list_add(&p->lru, &free_pgds_list); + + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, + &dirty_pgds, + &free_pgds_list); + + vpfn += count; + nr -= count; + continue; + } + + mmu_mode->set_num_valid_entries(page, num_of_valid_entries); kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(phys_to_page(pgd)) + - 8 * index, 8*pcount); + kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64)); next: - kunmap(phys_to_page(pgd)); + kunmap(p); vpfn += count; nr -= count; } err = 0; out: mutex_unlock(&mmut->mmu_lock); - - if (mmut->kctx) - kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr, - true); - else - kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr, - true, as_nr); + /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ + op_param = (struct kbase_mmu_hw_op_param){ + .vpfn = start_vpfn, + .nr = requested_nr, + .mmu_sync_info = mmu_sync_info, + .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, + .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : + KBASE_MMU_OP_FLUSH_MEM, + .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), + }; + mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; } @@ -1831,8 +2437,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). */ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id) + struct tagged_addr *phys, size_t nr, unsigned long flags, + int const group_id, u64 *dirty_pgds) { phys_addr_t pgd; u64 *pgd_page; @@ -1857,53 +2463,77 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, unsigned int index = vpfn & 0x1FF; size_t count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; + register unsigned int num_of_valid_entries; + int cur_level = MIDGARD_MMU_BOTTOMLEVEL; if (count > nr) count = nr; - do { - err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, - vpfn, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow( - &kbdev->mem_pools.small[ - kctx->mmu.group_id], - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu.mmu_lock); - } while (!err); - if (err) { - dev_warn(kbdev->dev, - "mmu_get_bottom_pgd failure\n"); + if (is_huge(*phys) && (index == index_in_large_page(*phys))) + cur_level = MIDGARD_MMU_LEVEL(2); + + err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL, + dirty_pgds); + if (WARN_ON(err)) goto fail_unlock; - } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "kmap failure\n"); + dev_warn(kbdev->dev, "kmap failure on update_pages"); err = -ENOMEM; goto fail_unlock; } - for (i = 0; i < count; i++) - pgd_page[index + i] = kbase_mmu_create_ate(kbdev, - phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, - group_id); + num_of_valid_entries = + kbdev->mmu_mode->get_num_valid_entries(pgd_page); + + if (cur_level == MIDGARD_MMU_LEVEL(2)) { + int level_index = (vpfn >> 9) & 0x1FF; + struct tagged_addr *target_phys = + phys - index_in_large_page(*phys); + +#ifdef CONFIG_MALI_DEBUG + WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( + pgd_page[level_index], MIDGARD_MMU_LEVEL(2))); +#endif + pgd_page[level_index] = kbase_mmu_create_ate(kbdev, + *target_phys, flags, MIDGARD_MMU_LEVEL(2), + group_id); + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (level_index * sizeof(u64)), + sizeof(u64)); + } else { + for (i = 0; i < count; i++) { +#ifdef CONFIG_MALI_DEBUG + WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( + pgd_page[index + i], + MIDGARD_MMU_BOTTOMLEVEL)); +#endif + pgd_page[index + i] = kbase_mmu_create_ate(kbdev, + phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, + group_id); + } + + /* MMU cache flush strategy is NONE because GPU cache maintenance + * will be done by the caller. + */ + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); + } + + kbdev->mmu_mode->set_num_valid_entries(pgd_page, + num_of_valid_entries); + + if (dirty_pgds && count > 0) + *dirty_pgds |= 1ULL << cur_level; phys += count; vpfn += count; nr -= count; - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (index * sizeof(u64)), - count * sizeof(u64)); - - kunmap(pfn_to_page(PFN_DOWN(pgd))); + kunmap(p); } mutex_unlock(&kctx->mmu.mmu_lock); @@ -1919,22 +2549,40 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, unsigned long flags, int const group_id) { int err; + struct kbase_mmu_hw_op_param op_param; + u64 dirty_pgds = 0; - err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, - group_id); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, true); + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds); + + op_param = (const struct kbase_mmu_hw_op_param){ + .vpfn = vpfn, + .nr = nr, + .op = KBASE_MMU_OP_FLUSH_MEM, + .kctx_id = kctx->id, + .mmu_sync_info = mmu_sync_info, + .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), + }; + + if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev)) + mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param); + else + mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param); return err; } static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t pgd, - int level, u64 *pgd_page_buffer) + int level) { - phys_addr_t target_pgd; - struct page *p; u64 *pgd_page; int i; - struct kbase_mmu_mode const *mmu_mode; + struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; + u64 *pgd_page_buffer = NULL; lockdep_assert_held(&mmut->mmu_lock); @@ -1942,51 +2590,40 @@ static void mmu_teardown_level(struct kbase_device *kbdev, /* kmap_atomic should NEVER fail. */ if (WARN_ON(pgd_page == NULL)) return; + if (level != MIDGARD_MMU_BOTTOMLEVEL) { /* Copy the page to our preallocated buffer so that we can minimize * kmap_atomic usage */ + pgd_page_buffer = mmut->mmu_teardown_pages[level]; memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + } + + /* Invalidate page after copying */ + mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); kunmap_atomic(pgd_page); pgd_page = pgd_page_buffer; - mmu_mode = kbdev->mmu_mode; - - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { - target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); - - if (target_pgd) { + if (level != MIDGARD_MMU_BOTTOMLEVEL) { + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { if (mmu_mode->pte_is_valid(pgd_page[i], level)) { + phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr( + pgd_page[i]); mmu_teardown_level(kbdev, mmut, target_pgd, - level + 1, - pgd_page_buffer + - (PAGE_SIZE / sizeof(u64))); + level + 1); } } } - p = pfn_to_page(PFN_DOWN(pgd)); - - kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], - p, true); - - atomic_sub(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - atomic_sub(1, &mmut->kctx->used_pages); - } - - kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, int const group_id) { + int level; + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || WARN_ON(group_id < 0)) return -EINVAL; @@ -1994,14 +2631,20 @@ int kbase_mmu_init(struct kbase_device *const kbdev, mmut->group_id = group_id; mutex_init(&mmut->mmu_lock); mmut->kctx = kctx; + mmut->pgd = 0; - /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ - mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */ + for (level = MIDGARD_MMU_TOPLEVEL; + level < MIDGARD_MMU_BOTTOMLEVEL; level++) { + mmut->mmu_teardown_pages[level] = + kmalloc(PAGE_SIZE, GFP_KERNEL); - if (mmut->mmu_teardown_pages == NULL) - return -ENOMEM; + if (!mmut->mmu_teardown_pages[level]) { + kbase_mmu_term(kbdev, mmut); + return -ENOMEM; + } + } - mmut->pgd = 0; /* We allocate pages into the kbdev memory pool, then * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. @@ -2011,7 +2654,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, err = kbase_mem_pool_grow( &kbdev->mem_pools.small[mmut->group_id], - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL); if (err) { kbase_mmu_term(kbdev, mmut); return -ENOMEM; @@ -2027,17 +2670,29 @@ int kbase_mmu_init(struct kbase_device *const kbdev, void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { + int level; + + WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", + mmut->kctx->tgid, mmut->kctx->id); + + if (mmut->pgd) { mutex_lock(&mmut->mmu_lock); - mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL, - mmut->mmu_teardown_pages); + mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); mutex_unlock(&mmut->mmu_lock); if (mmut->kctx) KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); } - kfree(mmut->mmu_teardown_pages); + for (level = MIDGARD_MMU_TOPLEVEL; + level < MIDGARD_MMU_BOTTOMLEVEL; level++) { + if (!mmut->mmu_teardown_pages[level]) + break; + kfree(mmut->mmu_teardown_pages[level]); + } + mutex_destroy(&mmut->mmu_lock); } diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h index c9e27b1255c5..a92b25b7b3dc 100644 --- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h @@ -23,6 +23,31 @@ #ifndef _KBASE_MMU_H_ #define _KBASE_MMU_H_ + +#define KBASE_MMU_PAGE_ENTRIES 512 + +struct kbase_context; +struct kbase_mmu_table; + +/** + * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. + * A pointer to this type is passed down from the outer-most callers in the kbase + * module - where the information resides as to the synchronous / asynchronous + * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to + * existing GPU work does it come from requests (like ioctl) from user-space, power management, + * etc. + * + * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice + * of a 'valid' value + * @CALLER_MMU_SYNC: Arbitrary value for 'synchronous that isn't easy to choose by accident + * @CALLER_MMU_ASYNC: Also hard to choose by accident + */ +enum kbase_caller_mmu_sync_info { + CALLER_MMU_UNSET_SYNCHRONICITY, + CALLER_MMU_SYNC = 0x02, + CALLER_MMU_ASYNC +}; + /** * kbase_mmu_init - Initialise an object representing GPU page tables * @@ -80,22 +105,21 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); u64 kbase_mmu_create_ate(struct kbase_device *kbdev, struct tagged_addr phy, unsigned long flags, int level, int group_id); -int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - const u64 start_vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id); +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id, u64 *dirty_pgds); int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id); + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int group_id); + struct tagged_addr phys, size_t nr, + unsigned long flags, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - size_t nr, int as_nr); +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, int as_nr); int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h index e6eef86d7ac0..833bdaa00dfa 100644 --- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h @@ -32,6 +32,8 @@ #ifndef _KBASE_MMU_HW_H_ #define _KBASE_MMU_HW_H_ +#include "mali_kbase_mmu.h" + /* Forward declarations */ struct kbase_device; struct kbase_as; @@ -48,6 +50,45 @@ enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED }; +/** + * enum kbase_mmu_op_type - enum for MMU operations + * @KBASE_MMU_OP_NONE: To help catch uninitialized struct + * @KBASE_MMU_OP_FIRST: The lower boundary of enum + * @KBASE_MMU_OP_LOCK: Lock memory region + * @KBASE_MMU_OP_UNLOCK: Unlock memory region + * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) + * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) + * @KBASE_MMU_OP_COUNT: The upper boundary of enum + */ +enum kbase_mmu_op_type { + KBASE_MMU_OP_NONE = 0, /* Must be zero */ + KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ + KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, + KBASE_MMU_OP_UNLOCK, + KBASE_MMU_OP_FLUSH_PT, + KBASE_MMU_OP_FLUSH_MEM, + KBASE_MMU_OP_COUNT /* Must be the last in enum */ +}; + +/** + * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions + * @vpfn: MMU Virtual Page Frame Number to start the operation on. + * @nr: Number of pages to work on. + * @op: Operation type (written to ASn_COMMAND). + * @kctx_id: Kernel context ID for MMU command tracepoint. + * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + * @flush_skip_levels: Page table levels to skip flushing. (Only + * applicable if GPU supports feature) + */ +struct kbase_mmu_hw_op_param { + u64 vpfn; + u32 nr; + enum kbase_mmu_op_type op; + u32 kctx_id; + enum kbase_caller_mmu_sync_info mmu_sync_info; + u64 flush_skip_levels; +}; + /** * kbase_mmu_hw_configure - Configure an address space for use. * @kbdev: kbase device to configure. @@ -59,6 +100,53 @@ enum kbase_mmu_fault_type { void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as); +/** + * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without + * programming the LOCKADDR register and wait + * for it to complete before returning. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** + * kbase_mmu_hw_do_unlock - Issue UNLOCK command to the MMU and wait for it + * to complete before returning. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** + * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Issue a flush operation on the address space as per the information + * specified inside @op_param. This function should not be called for + * GPUs where MMU command to flush the cache(s) is deprecated. + * mmu_hw_mutex needs to be held when calling this function. + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_do_operation - Issue an operation to the MMU. * @kbdev: kbase device to issue the MMU operation on. @@ -78,6 +166,42 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, u64 vpfn, u32 nr, u32 type, unsigned int handling_irq); +/** + * kbase_mmu_hw_do_flush_locked - Issue a flush operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Issue a flush operation on the address space as per the information + * specified inside @op_param. This function should not be called for + * GPUs where MMU command to flush the cache(s) is deprecated. + * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this + * function. + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** + * kbase_mmu_hw_do_flush_on_gpu_ctrl - Issue a flush operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Issue a flush operation on the address space as per the information + * specified inside @op_param. GPU command is used to flush the cache(s) + * instead of the MMU command. + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by * the MMU. diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c index f22e73e07398..f66d82e38fe7 100644 --- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c @@ -27,61 +27,107 @@ #include #include #include +#include +#include +#include /** * lock_region() - Generate lockaddr to lock memory region in MMU - * @pfn: Starting page frame number of the region to lock - * @num_pages: Number of pages to lock. It must be greater than 0. - * @lockaddr: Address and size of memory region to lock + * + * @gpu_props: GPU properties for finding the MMU lock region size. + * @lockaddr: Address and size of memory region to lock. + * @op_param: Pointer to a struct containing the starting page frame number of + * the region to lock, the number of pages to lock and page table + * levels to skip when flushing (if supported). * * The lockaddr value is a combination of the starting address and * the size of the region that encompasses all the memory pages to lock. * - * The size is expressed as a logarithm: it is represented in a way - * that is compatible with the HW specification and it also determines - * how many of the lowest bits of the address are cleared. + * Bits 5:0 are used to represent the size, which must be a power of 2. + * The smallest amount of memory to be locked corresponds to 32 kB, + * i.e. 8 memory pages, because a MMU cache line is made of 64 bytes + * and every page table entry is 8 bytes. Therefore it is not possible + * to lock less than 8 memory pages at a time. + * + * The size is expressed as a logarithm minus one: + * - A value of 14 is thus interpreted as log(32 kB) = 15, where 32 kB + * is the smallest possible size. + * - Likewise, a value of 47 is interpreted as log(256 TB) = 48, where 256 TB + * is the largest possible size (implementation defined value according + * to the HW spec). + * + * Bits 11:6 are reserved. + * + * Bits 63:12 are used to represent the base address of the region to lock. + * Only the upper bits of the address are used; lowest bits are cleared + * to avoid confusion. + * + * The address is aligned to a multiple of the region size. This has profound + * implications on the region size itself: often the MMU will lock a region + * larger than the given number of pages, because the lock region cannot start + * from any arbitrary address. * * Return: 0 if success, or an error code on failure. */ -static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr) +static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, + const struct kbase_mmu_hw_op_param *op_param) { - const u64 lockaddr_base = pfn << PAGE_SHIFT; - u64 lockaddr_size_log2, region_frame_number_start, - region_frame_number_end; + const u64 lockaddr_base = op_param->vpfn << PAGE_SHIFT; + const u64 lockaddr_end = ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1; + u64 lockaddr_size_log2; - if (num_pages == 0) + if (op_param->nr == 0) return -EINVAL; - /* The size is expressed as a logarithm and should take into account - * the possibility that some pages might spill into the next region. + /* The MMU lock region is a self-aligned region whose size + * is a power of 2 and that contains both start and end + * of the address range determined by pfn and num_pages. + * The size of the MMU lock region can be defined as the + * largest divisor that yields the same result when both + * start and end addresses are divided by it. + * + * For instance: pfn=0x4F000 num_pages=2 describe the + * address range between 0x4F000 and 0x50FFF. It is only + * 2 memory pages. However there isn't a single lock region + * of 8 kB that encompasses both addresses because 0x4F000 + * would fall into the [0x4E000, 0x4FFFF] region while + * 0x50000 would fall into the [0x50000, 0x51FFF] region. + * The minimum lock region size that includes the entire + * address range is 128 kB, and the region would be + * [0x40000, 0x5FFFF]. + * + * The region size can be found by comparing the desired + * start and end addresses and finding the highest bit + * that differs. The smallest naturally aligned region + * must include this bit change, hence the desired region + * starts with this bit (and subsequent bits) set to 0 + * and ends with the bit (and subsequent bits) set to 1. + * + * In the example above: 0x4F000 ^ 0x50FFF = 0x1FFFF + * therefore the highest bit that differs is bit #16 + * and the region size (as a logarithm) is 16 + 1 = 17, i.e. 128 kB. */ - lockaddr_size_log2 = fls(num_pages) + PAGE_SHIFT - 1; - - /* Round up if the number of pages is not a power of 2. */ - if (num_pages != ((u32)1 << (lockaddr_size_log2 - PAGE_SHIFT))) - lockaddr_size_log2 += 1; - - /* Round up if some memory pages spill into the next region. */ - region_frame_number_start = pfn >> (lockaddr_size_log2 - PAGE_SHIFT); - region_frame_number_end = - (pfn + num_pages - 1) >> (lockaddr_size_log2 - PAGE_SHIFT); - - if (region_frame_number_start < region_frame_number_end) - lockaddr_size_log2 += 1; - - /* Represent the size according to the HW specification. */ - lockaddr_size_log2 = MAX(lockaddr_size_log2, - KBASE_LOCK_REGION_MIN_SIZE_LOG2); + lockaddr_size_log2 = fls(lockaddr_base ^ lockaddr_end); + /* Cap the size against minimum and maximum values allowed. */ if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) return -EINVAL; - /* The lowest bits are cleared and then set to size - 1 to represent - * the size in a way that is compatible with the HW specification. + lockaddr_size_log2 = + MAX(lockaddr_size_log2, kbase_get_lock_region_min_size_log2(gpu_props)); + + /* Represent the result in a way that is compatible with HW spec. + * + * Upper bits are used for the base address, whose lower bits + * are cleared to avoid confusion because they are going to be ignored + * by the MMU anyway, since lock regions shall be aligned with + * a multiple of their size and cannot start from any address. + * + * Lower bits are used for the size, which is represented as + * logarithm minus one of the actual size. */ *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); *lockaddr |= lockaddr_size_log2 - 1; - return 0; } @@ -176,43 +222,244 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); } -int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, - u64 vpfn, u32 nr, u32 op, - unsigned int handling_irq) +/** + * mmu_command_instr - Record an MMU command for instrumentation purposes. + * + * @kbdev: Kbase device used to issue MMU operation on. + * @kctx_id: Kernel context ID for MMU command tracepoint. + * @cmd: Command issued to the MMU. + * @lock_addr: Address of memory region locked for the operation. + * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + */ +static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr); + u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr); + + bool is_mmu_synchronous = (mmu_sync_info == CALLER_MMU_SYNC); + + KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, kctx_id, cmd, is_mmu_synchronous, lock_addr_base, + lock_addr_size); +} + +/* Helper function to program the LOCKADDR register before LOCK/UNLOCK command + * is issued. + */ +static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock_addr, + const struct kbase_mmu_hw_op_param *op_param) { int ret; - lockdep_assert_held(&kbdev->mmu_hw_mutex); + ret = lock_region(&kbdev->gpu_props, lock_addr, op_param); - if (op == AS_COMMAND_UNLOCK) { - /* Unlock doesn't require a lock first */ - ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); - } else { - u64 lock_addr; + if (!ret) { + /* Set the region that needs to be updated */ + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO), + *lock_addr & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI), + (*lock_addr >> 32) & 0xFFFFFFFFUL); + } + return ret; +} - ret = lock_region(vpfn, nr, &lock_addr); +/** + * mmu_hw_do_lock_no_wait - Issue LOCK command to the MMU and return without + * waiting for it's completion. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @lock_addr: Address of memory region locked for this operation. + * @op_param: Pointer to a struct containing information about the MMU operation. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *as, u64 *lock_addr, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret; - if (!ret) { - /* Lock the region that needs to be updated */ - kbase_reg_write(kbdev, - MMU_AS_REG(as->number, AS_LOCKADDR_LO), - lock_addr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, - MMU_AS_REG(as->number, AS_LOCKADDR_HI), - (lock_addr >> 32) & 0xFFFFFFFFUL); - write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); - /* Run the MMU operation */ - write_cmd(kbdev, as->number, op); + if (!ret) + write_cmd(kbdev, as->number, AS_COMMAND_LOCK); - /* Wait for the flush to complete */ - ret = wait_ready(kbdev, as->number); - } + return ret; +} + +static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret; + u64 lock_addr = 0x0; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); + + if (!ret) + ret = wait_ready(kbdev, as->number); + + if (!ret) + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr, + op_param->mmu_sync_info); + + return ret; +} + +int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret = 0; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + + /* Wait for UNLOCK command to complete */ + if (!ret) + ret = wait_ready(kbdev, as->number); + + if (!ret) { + u64 lock_addr = 0x0; + /* read MMU_AS_CONTROL.LOCKADDR register */ + lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) + << 32; + lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO)); + + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK, + lock_addr, op_param->mmu_sync_info); } return ret; } +int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret = 0; + u64 lock_addr = 0x0; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param); + + if (!ret) + ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, + op_param); + + return ret; +} +static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) +{ + int ret; + u64 lock_addr = 0x0; + u32 mmu_cmd = AS_COMMAND_FLUSH_MEM; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at + * this point would be unexpected. + */ + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && + op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + dev_err(kbdev->dev, "Unexpected flush operation received"); + return -EINVAL; + } + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + if (op_param->op == KBASE_MMU_OP_FLUSH_PT) + mmu_cmd = AS_COMMAND_FLUSH_PT; + + /* Lock the region that needs to be updated */ + ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); + if (ret) + return ret; + +#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) + /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here + * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is + * supported, and this function doesn't gets called for the GPUs where + * FLUSH_MEM/PT command is deprecated. + */ + if (mmu_cmd == AS_COMMAND_FLUSH_MEM) { + ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, + as->number, hwaccess_locked); + if (ret) + return ret; + } +#endif + + write_cmd(kbdev, as->number, mmu_cmd); + + /* Wait for the command to complete */ + ret = wait_ready(kbdev, as->number); + + if (!ret) + mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr, + op_param->mmu_sync_info); + + return ret; +} + +int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return mmu_hw_do_flush(kbdev, as, op_param, true); +} + +int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + return mmu_hw_do_flush(kbdev, as, op_param, false); +} + +int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret, ret2; + u32 gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2_LSC; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at + * this point would be unexpected. + */ + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && + op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + dev_err(kbdev->dev, "Unexpected flush operation received"); + return -EINVAL; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + if (op_param->op == KBASE_MMU_OP_FLUSH_PT) + gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2; + + /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ + ret = mmu_hw_do_lock(kbdev, as, op_param); + if (ret) + return ret; + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */ + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, gpu_cmd); + + /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */ + ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); + + return ret ?: ret2; +} + void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, enum kbase_mmu_fault_type type) { diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c index 02493e9b2621..a83d25a4efec 100644 --- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c @@ -43,6 +43,9 @@ #define ENTRY_ACCESS_BIT (1ULL << 10) #define ENTRY_NX_BIT (1ULL << 54) +#define UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR (55) +#define VALID_ENTRY_MASK ((u64)0xF << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR) + /* Helper Function to perform assignment of page table entries, to * ensure the use of strd, which is required on LPAE systems. */ @@ -104,6 +107,7 @@ static phys_addr_t pte_to_phy_addr(u64 entry) if (!(entry & 1)) return 0; + entry &= ~VALID_ENTRY_MASK; return entry & ~0xFFF; } @@ -170,29 +174,66 @@ static void entry_set_ate(u64 *entry, ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); } +static unsigned int get_num_valid_entries(u64 *pgd) +{ + register unsigned int num_of_valid_entries; + + num_of_valid_entries = + (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8)); + num_of_valid_entries |= + (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4)); + num_of_valid_entries |= + (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)); + + return num_of_valid_entries; +} + +static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries) +{ + WARN_ON_ONCE(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES); + + pgd[0] &= ~VALID_ENTRY_MASK; + pgd[0] |= ((u64)(num_of_valid_entries & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + + pgd[1] &= ~VALID_ENTRY_MASK; + pgd[1] |= ((u64)((num_of_valid_entries >> 4) & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + + pgd[2] &= ~VALID_ENTRY_MASK; + pgd[2] |= ((u64)((num_of_valid_entries >> 8) & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); +} + static void entry_set_pte(u64 *entry, phys_addr_t phy) { page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); } -static void entry_invalidate(u64 *entry) +static void entries_invalidate(u64 *entry, u32 count) { - page_table_entry_set(entry, ENTRY_IS_INVAL); + u32 i; + + for (i = 0; i < count; i++) + page_table_entry_set(entry + i, ENTRY_IS_INVAL); } -static struct kbase_mmu_mode const aarch64_mode = { - .update = mmu_update, - .get_as_setup = kbase_mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE -}; +static const struct kbase_mmu_mode aarch64_mode = { .update = mmu_update, + .get_as_setup = kbase_mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entries_invalidate = entries_invalidate, + .get_num_valid_entries = get_num_valid_entries, + .set_num_valid_entries = set_num_valid_entries, + .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE }; struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) { diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c index 91a2d7ac4dcb..cc6aa16292c9 100644 --- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c @@ -37,6 +37,8 @@ #define ENTRY_ACCESS_BIT (1ULL << 10) #define ENTRY_NX_BIT (1ULL << 54) +#define UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR (55) +#define VALID_ENTRY_MASK ((u64)0xF << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR) #define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) @@ -186,14 +188,51 @@ static void entry_set_ate(u64 *entry, ENTRY_IS_ATE); } +static unsigned int get_num_valid_entries(u64 *pgd) +{ + register unsigned int num_of_valid_entries; + + num_of_valid_entries = + (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8)); + num_of_valid_entries |= + (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4)); + num_of_valid_entries |= + (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)); + + return num_of_valid_entries; +} + +static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries) +{ + WARN_ON_ONCE(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES); + + pgd[0] &= ~VALID_ENTRY_MASK; + pgd[0] |= ((u64)(num_of_valid_entries & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + + pgd[1] &= ~VALID_ENTRY_MASK; + pgd[1] |= ((u64)((num_of_valid_entries >> 4) & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + + pgd[2] &= ~VALID_ENTRY_MASK; + pgd[2] |= ((u64)((num_of_valid_entries >> 8) & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); +} + static void entry_set_pte(u64 *entry, phys_addr_t phy) { page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); } -static void entry_invalidate(u64 *entry) +static void entries_invalidate(u64 *entry, u32 count) { - page_table_entry_set(entry, ENTRY_IS_INVAL); + u32 i; + + for (i = 0; i < count; i++) + page_table_entry_set(entry + i, ENTRY_IS_INVAL); } static struct kbase_mmu_mode const lpae_mode = { @@ -205,7 +244,9 @@ static struct kbase_mmu_mode const lpae_mode = { .pte_is_valid = pte_is_valid, .entry_set_ate = entry_set_ate, .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, + .entries_invalidate = entries_invalidate, + .get_num_valid_entries = get_num_valid_entries, + .set_num_valid_entries = set_num_valid_entries, .flags = 0 }; diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c index 2a60ac8c655a..efe3a927d3d2 100644 --- a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c @@ -67,6 +67,9 @@ extern int set_hmp_boost(int enable); #define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) +#define UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR (55) +#define VALID_ENTRY_MASK ((u64)0xF << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR) + /* * peak_flops: 100/85 * sobel: 100/50 @@ -387,6 +390,7 @@ static phys_addr_t mmu_pte_to_phy_addr(u64 entry) if (!(entry & 1)) return 0; + entry &= ~VALID_ENTRY_MASK; return entry & ~0xFFF; } diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c index bda9f79e4db2..835a8fbf845c 100644 --- a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c @@ -186,6 +186,8 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { int rcode; + if (!timeline_is_permitted()) + return -EPERM; ret = anon_inode_getfd( "[mali_tlstream]", diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h index 71e39b694a8b..13fa6b89bbab 100644 --- a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h @@ -143,4 +143,6 @@ void kbase_timeline_test( void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); #endif /* MALI_UNIT_TEST */ +bool timeline_is_permitted(void); + #endif /* _KBASE_TIMELINE_H */ diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c index cdde928bbab9..b5da504f4ec2 100644 --- a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,59 @@ #include +#ifndef MALI_STRIP_KBASE_DEVELOPMENT +/* Development builds need to test instrumentation and enable unprivileged + * processes to acquire timeline streams, in order to avoid complications + * with configurations across multiple platforms and systems. + * + * Release builds, instead, shall deny access to unprivileged processes + * because there are no use cases where they are allowed to acquire timeline + * streams, unless they're given special permissions by a privileged process. + */ +static int kbase_unprivileged_global_profiling = 1; +#else +static int kbase_unprivileged_global_profiling; +#endif + +/** + * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes + * + * @val: String containing value to set. Only strings representing positive + * integers are accepted as valid; any non-positive integer (including 0) + * is rejected. + * @kp: Module parameter associated with this method. + * + * This method can only be used to enable permissions for unprivileged processes, + * if they are disabled: for this reason, the only values which are accepted are + * strings representing positive integers. Since it's impossible to disable + * permissions once they're set, any integer which is non-positive is rejected, + * including 0. + * + * Return: 0 if success, otherwise error code. + */ +static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp) +{ + int new_val; + int ret = kstrtoint(val, 0, &new_val); + + if (ret == 0) { + if (new_val < 1) + return -EINVAL; + + kbase_unprivileged_global_profiling = 1; + } + + return ret; +} + +static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = { + .get = param_get_int, + .set = kbase_unprivileged_global_profiling_set, +}; + +module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops, + &kbase_unprivileged_global_profiling, 0600); + /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read( struct file *filp, @@ -43,6 +96,15 @@ const struct file_operations kbasep_tlstream_fops = { .poll = kbasep_timeline_io_poll, }; +bool timeline_is_permitted(void) +{ +#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE + return kbase_unprivileged_global_profiling || perfmon_capable(); +#else + return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN); +#endif +} + /** * kbasep_timeline_io_packet_pending - check timeline streams for pending packets * @timeline: Timeline instance diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c index e445a3a3d683..7fe3ee39c912 100644 --- a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c @@ -136,6 +136,7 @@ enum tl_msg_id_aux { KBASE_AUX_DEVFREQ_TARGET, KBASE_AUX_PROTECTED_ENTER_START, KBASE_AUX_PROTECTED_ENTER_END, + KBASE_AUX_MMU_COMMAND, KBASE_AUX_PROTECTED_LEAVE_START, KBASE_AUX_PROTECTED_LEAVE_END, KBASE_AUX_JIT_STATS, @@ -1751,6 +1752,45 @@ void __kbase_tlstream_aux_protected_enter_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_aux_mmu_command( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 mmu_cmd_id, + u32 mmu_synchronicity, + u64 mmu_lock_addr, + u32 mmu_lock_page_num +) +{ + const u32 msg_id = KBASE_AUX_MMU_COMMAND; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(mmu_cmd_id) + + sizeof(mmu_synchronicity) + + sizeof(mmu_lock_addr) + + sizeof(mmu_lock_page_num) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_cmd_id, sizeof(mmu_cmd_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_synchronicity, sizeof(mmu_synchronicity)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_lock_addr, sizeof(mmu_lock_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_lock_page_num, sizeof(mmu_lock_page_num)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_aux_protected_leave_start( struct kbase_tlstream *stream, const void *gpu) diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h index 7ea8ba8d0200..a8b844969c5d 100644 --- a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h @@ -263,6 +263,14 @@ void __kbase_tlstream_aux_protected_enter_start( void __kbase_tlstream_aux_protected_enter_end( struct kbase_tlstream *stream, const void *gpu); +void __kbase_tlstream_aux_mmu_command( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 mmu_cmd_id, + u32 mmu_synchronicity, + u64 mmu_lock_addr, + u32 mmu_lock_page_num +); void __kbase_tlstream_aux_protected_leave_start( struct kbase_tlstream *stream, const void *gpu); @@ -1432,6 +1440,37 @@ struct kbase_tlstream; gpu); \ } while (0) +/** + * KBASE_TLSTREAM_AUX_MMU_COMMAND - mmu commands with synchronicity info + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @mmu_cmd_id: MMU Command ID (e.g AS_COMMAND_UPDATE) + * @mmu_synchronicity: Indicates whether the command is related to current running job that needs to be resolved to make it progress (synchronous, e.g. grow on page fault, JIT) or not (asynchronous, e.g. IOCTL calls from user-space). This param will be 0 if it is an asynchronous operation. + * @mmu_lock_addr: start address of regions to be locked/unlocked/invalidated + * @mmu_lock_page_num: number of pages to be locked/unlocked/invalidated + */ +#define KBASE_TLSTREAM_AUX_MMU_COMMAND( \ + kbdev, \ + kernel_ctx_id, \ + mmu_cmd_id, \ + mmu_synchronicity, \ + mmu_lock_addr, \ + mmu_lock_page_num \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_mmu_command( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + kernel_ctx_id, \ + mmu_cmd_id, \ + mmu_synchronicity, \ + mmu_lock_addr, \ + mmu_lock_page_num \ + ); \ + } while (0) + /** * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - * leave protected mode start diff --git a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_cache_policy_backend.h index 278125acb814..35ee17aca4bb 100644 --- a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_cache_policy_backend.h @@ -23,7 +23,7 @@ #define _KBASE_CACHE_POLICY_BACKEND_H_ #include "mali_kbase.h" -#include +#include /** * kbase_cache_set_coherency_mode() - Sets the system coherency mode diff --git a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_defs.h index e29ace710ed7..919901c29565 100644 --- a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_defs.h @@ -38,10 +38,32 @@ struct rb_entry { struct kbase_jd_atom *katom; }; +/* SLOT_RB_TAG_PURGED assumes a value that is different from + * NULL (SLOT_RB_NULL_TAG_VAL) and will not be the result of + * any valid pointer via macro translation: SLOT_RB_TAG_KCTX(x). + */ +#define SLOT_RB_TAG_PURGED ((u64)(1 << 1)) +#define SLOT_RB_NULL_TAG_VAL ((u64)0) + +/** + * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a + * u64 for serving as tagged value. + * @kctx: Pointer to kbase context. + */ +#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx)) /** * struct slot_rb - Slot ringbuffer * @entries: Ringbuffer entries - * @last_context: The last context to submit a job on this slot + * @last_kctx_tagged: The last context that submitted a job to the slot's + * HEAD_NEXT register. The value is a tagged variant so + * must not be dereferenced. It is used in operation to + * track when shader core L1 caches might contain a + * previous context's data, and so must only be set to + * SLOT_RB_NULL_TAG_VAL after reset/powerdown of the + * cores. In slot job submission, if there is a kctx + * change, and the relevant katom is configured with + * BASE_JD_REQ_SKIP_CACHE_START, a L1 read only cache + * maintenace operation is enforced. * @read_idx: Current read index of buffer * @write_idx: Current write index of buffer * @job_chain_flag: Flag used to implement jobchain disambiguation @@ -49,7 +71,7 @@ struct rb_entry { struct slot_rb { struct rb_entry entries[SLOT_RB_SIZE]; - struct kbase_context *last_context; + u64 last_kctx_tagged; u8 read_idx; u8 write_idx; diff --git a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_hw.c index da24be210b45..0942317212ce 100644 --- a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_hw.c @@ -303,6 +303,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); kbase_kinstr_jm_atom_hw_submit(katom); + + /* Update the slot's last katom submission kctx */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx); + #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ @@ -313,7 +317,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, sizeof(js_string)), ktime_to_ns(katom->start_timestamp), (u32)katom->kctx->id, 0, katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; } #endif diff --git a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_rb.c index fd87b78b7307..79edfbca178a 100644 --- a/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bv_r32p1/backend/gpu/mali_kbase_jm_rb.c @@ -1262,8 +1262,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, ktime_to_ns(*end_timestamp), (u32)next_katom->kctx->id, 0, next_katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = - next_katom->kctx; } else { char js_string[16]; @@ -1272,7 +1270,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, sizeof(js_string)), ktime_to_ns(ktime_get()), 0, 0, 0); - kbdev->hwaccess.backend.slot_rb[js].last_context = 0; } } #endif @@ -1696,3 +1693,34 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + +void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + int js; + bool tracked = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged; + + if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) { + /* Marking the slot kctx tracking field is purged */ + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED; + tracked = true; + } + } + + if (tracked) { + /* The context had run some jobs before the purge, other slots + * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as + * purged as well. + */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged == + SLOT_RB_NULL_TAG_VAL) + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = + SLOT_RB_TAG_PURGED; + } + } +} diff --git a/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_csf.c index 1ce806f639d4..74492cc4e1ba 100644 --- a/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_csf.c @@ -31,6 +31,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_DEBUG_FS) #include @@ -86,6 +87,7 @@ static const struct kbase_context_init context_init[] = { "Memory pool group initialization failed" }, { kbase_mem_evictable_init, kbase_mem_evictable_deinit, "Memory evictable initialization failed" }, + { kbase_ctx_sched_init_ctx, NULL, NULL }, { kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" }, { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, diff --git a/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_jm.c index 8ce81e78a9e6..49ec7a37cba1 100644 --- a/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_jm.c +++ b/drivers/gpu/arm/bv_r32p1/context/backend/mali_kbase_context_jm.c @@ -127,6 +127,7 @@ static const struct kbase_context_init context_init[] = { "Memory pool group initialization failed" }, { kbase_mem_evictable_init, kbase_mem_evictable_deinit, "Memory evictable initialization failed" }, + { kbase_ctx_sched_init_ctx, NULL, NULL }, { kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" }, { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, diff --git a/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c b/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c index b2e7025dd334..304bbea79be2 100644 --- a/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,12 @@ /* * Base kernel context APIs */ +#include +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #include #include @@ -129,17 +135,50 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - kctx->as_nr = KBASEP_AS_NR_INVALID; - - atomic_set(&kctx->refcount, 0); - - spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; + kctx->task = NULL; atomic_set(&kctx->nonmapped_pages, 0); atomic_set(&kctx->permanent_mapped_pages, 0); kctx->tgid = current->tgid; kctx->pid = current->pid; + /* Check if this is a Userspace created context */ + if (likely(kctx->filp)) { + struct pid *pid_struct; + + rcu_read_lock(); + pid_struct = find_get_pid(kctx->tgid); + if (likely(pid_struct)) { + struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID); + + if (likely(task)) { + /* Take a reference on the task to avoid slow lookup + * later on from the page allocation loop. + */ + get_task_struct(task); + kctx->task = task; + } else { + dev_err(kctx->kbdev->dev, + "Failed to get task pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + + put_pid(pid_struct); + } else { + dev_err(kctx->kbdev->dev, + "Failed to get pid pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + rcu_read_unlock(); + + if (unlikely(err)) + return err; + kbase_mem_mmgrab(); + kctx->process_mm = current->mm; + } + atomic_set(&kctx->used_pages, 0); mutex_init(&kctx->reg_lock); @@ -166,13 +205,16 @@ int kbase_context_common_init(struct kbase_context *kctx) mutex_init(&kctx->legacy_hwcnt_lock); mutex_lock(&kctx->kbdev->kctx_list_lock); - err = kbase_insert_kctx_to_process(kctx); - if (err) - dev_err(kctx->kbdev->dev, - "(err:%d) failed to insert kctx to kbase_process\n", err); - mutex_unlock(&kctx->kbdev->kctx_list_lock); + if (err) { + dev_err(kctx->kbdev->dev, + "(err:%d) failed to insert kctx to kbase_process", err); + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } + } return err; } @@ -243,15 +285,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx) void kbase_context_common_term(struct kbase_context *kctx) { - unsigned long flags; int pages; - mutex_lock(&kctx->kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->kbdev->mmu_hw_mutex); - pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kctx->kbdev->dev, @@ -263,6 +298,11 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } + KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); } diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c index d3d2980febf8..28c8de946719 100644 --- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include "mali_kbase_csf_tiler_heap.h" #include #include "mali_kbase_csf_timeout.h" @@ -348,7 +348,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, ret = kbase_mem_pool_alloc_pages( &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - num_pages, queue->phys, false); + num_pages, queue->phys, false,kctx->task); if (ret != num_pages) goto phys_alloc_failed; @@ -1072,7 +1072,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, /* Get physical page for a normal suspend buffer */ err = kbase_mem_pool_alloc_pages( &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - nr_pages, &s_buf->phy[0], false); + nr_pages, &s_buf->phy[0], false, kctx->task); if (err < 0) goto phy_pages_alloc_failed; @@ -2995,7 +2995,7 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - 1, &phys, false); + 1, &phys, false, NULL); if (ret <= 0) { fput(filp); @@ -3031,7 +3031,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, - false); + false, NULL); if (ret <= 0) return ret; diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c index 25767de4f6b8..e6f7dfef8e03 100644 --- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -546,7 +546,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, } else { ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + num_pages, phys, false, NULL); if (ret < 0) goto out; } @@ -2248,7 +2248,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + num_pages, phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.h index 60d70651fb83..1d027f94bd78 100644 --- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.h +++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.h @@ -23,7 +23,7 @@ #define _KBASE_CSF_FIRMWARE_H_ #include "device/mali_kbase_device.h" -#include +#include /* * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c index edd64bec4f95..72ac6e2bb030 100644 --- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1289,7 +1289,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + num_pages, phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c index 4e26a496305e..b8a83f9ce628 100644 --- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c @@ -335,6 +335,14 @@ static int kbase_kcpu_jit_allocate_prepare( lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + if (!kbase_mem_allow_alloc(kctx)) { + dev_dbg(kctx->kbdev->dev, + "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", + current->comm, current->pid, kctx->tgid, kctx->id); + ret = -EINVAL; + goto out; + } + if (!data || count > kcpu_queue->kctx->jit_max_allocations || count > ARRAY_SIZE(kctx->jit_alloc)) { ret = -EINVAL; diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_scheduler.c index 810957041804..c129f7f94d32 100644 --- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_scheduler.c @@ -28,8 +28,8 @@ #include #include #include -#include -#include +#include +#include /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -4770,7 +4770,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (!kctx->csf.sched.sync_update_wq) { dev_err(kctx->kbdev->dev, "Failed to initialize scheduler context workqueue"); - return -ENOMEM; + err = -ENOMEM; + goto alloc_wq_failed; } INIT_WORK(&kctx->csf.sched.sync_update_work, @@ -4781,10 +4782,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (err) { dev_err(kctx->kbdev->dev, "Failed to register a sync update callback"); - destroy_workqueue(kctx->csf.sched.sync_update_wq); + goto event_wait_add_failed; } return err; + +event_wait_add_failed: + destroy_workqueue(kctx->csf.sched.sync_update_wq); +alloc_wq_failed: + kbase_ctx_sched_remove_ctx(kctx); + return err; } void kbase_csf_scheduler_context_term(struct kbase_context *kctx) @@ -4792,6 +4799,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); cancel_work_sync(&kctx->csf.sched.sync_update_work); destroy_workqueue(kctx->csf.sched.sync_update_wq); + + kbase_ctx_sched_remove_ctx(kctx); } int kbase_csf_scheduler_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device.h b/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device.h index 517c16b15fca..bb96d9b55e52 100644 --- a/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device.h +++ b/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device.h @@ -117,6 +117,23 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); */ bool kbase_is_gpu_removed(struct kbase_device *kbdev); +/** + * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait + * @kbdev: Kbase device + * @flush_op: Flush command register value to be sent to HW + * + * Issue a cache flush command to hardware, then busy wait an irq status. + * This function will clear CLEAN_CACHES_COMPLETED irq mask bit set by other + * threads through kbase_gpu_start_cache_clean(), and wake them up manually + * after the busy-wait is done. Any pended cache flush commands raised by + * other thread are handled in this function. + * hwaccess_lock must be held by the caller. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, + u32 flush_op); + /** * kbase_gpu_start_cache_clean - Start a cache clean * @kbdev: Kbase device diff --git a/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device_hw.c index e80559a19b9e..d2f2e14fe9db 100644 --- a/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/bv_r32p1/device/mali_kbase_device_hw.c @@ -76,6 +76,109 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev) } #endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) +{ + char *irq_flag_name; + /* Previously MMU-AS command was used for L2 cache flush on page-table update. + * And we're using the same max-loops count for GPU command, because amount of + * L2 cache flush overhead are same between them. + */ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + + /* Wait for the GPU cache clean operation to complete */ + while (--max_loops && + !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) { + ; + } + + /* reset gpu if time-out occurred */ + if (max_loops == 0) { + switch (irq_bit) { + case CLEAN_CACHES_COMPLETED: + irq_flag_name = "CLEAN_CACHES_COMPLETED"; + break; + case FLUSH_PA_RANGE_COMPLETED: + irq_flag_name = "FLUSH_PA_RANGE_COMPLETED"; + break; + default: + irq_flag_name = "UNKNOWN"; + break; + } + + dev_err(kbdev->dev, + "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n", + irq_flag_name); + + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + return -EBUSY; + } + + /* Clear the interrupt bit. */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit); + + return 0; +} + +int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, + u32 flush_op) +{ + int need_to_wake_up = 0; + int ret = 0; + + /* hwaccess_lock must be held to avoid any sync issue with + * kbase_gpu_start_cache_clean() / kbase_clean_caches_done() + */ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* 1. Check if kbdev->cache_clean_in_progress is set. + * If it is set, it means there are threads waiting for + * CLEAN_CACHES_COMPLETED irq to be raised and that the + * corresponding irq mask bit is set. + * We'll clear the irq mask bit and busy-wait for the cache + * clean operation to complete before submitting the cache + * clean command required after the GPU page table update. + * Pended flush commands will be merged to requested command. + */ + if (kbdev->cache_clean_in_progress) { + /* disable irq first */ + u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); + + /* busy wait irq status to be enabled */ + ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + if (ret) + return ret; + + /* merge pended command if there's any */ + flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE( + kbdev->cache_clean_queued, flush_op); + + /* enable wake up notify flag */ + need_to_wake_up = 1; + } else { + /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + CLEAN_CACHES_COMPLETED); + } + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + + /* 3. Busy-wait irq status to be enabled. */ + ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + if (ret) + return ret; + + /* 4. Wake-up blocked threads when there is any. */ + if (need_to_wake_up) + kbase_gpu_cache_clean_wait_complete(kbdev); + + return ret; +} void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) { u32 irq_mask; diff --git a/drivers/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_fault_csf.c b/drivers/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_fault_csf.c index f9d4c14c6936..6c4f3e67b48b 100644 --- a/drivers/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_fault_csf.c +++ b/drivers/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_fault_csf.c @@ -20,7 +20,7 @@ */ #include -#include +#include #include const char *kbase_gpu_exception_name(u32 const exception_code) diff --git a/drivers/gpu/arm/bv_r32p1/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/bv_r32p1/gpu/mali_kbase_gpu_regmap.h index 47e77810afb8..eda54e81b276 100644 --- a/drivers/gpu/arm/bv_r32p1/gpu/mali_kbase_gpu_regmap.h +++ b/drivers/gpu/arm/bv_r32p1/gpu/mali_kbase_gpu_regmap.h @@ -22,8 +22,49 @@ #ifndef _KBASE_GPU_REGMAP_H_ #define _KBASE_GPU_REGMAP_H_ -#include +#include +#if MALI_USE_CSF +#include +#else +#include +#endif +/* GPU_U definition */ +#ifdef __ASSEMBLER__ +#define GPU_U(x) x +#define GPU_UL(x) x +#define GPU_ULL(x) x +#else +#define GPU_U(x) x##u +#define GPU_UL(x) x##ul +#define GPU_ULL(x) x##ull +#endif /* __ASSEMBLER__ */ +/* AS_LOCKADDR register */ +#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0) +#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \ + (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \ + AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \ + (((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \ + (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \ + AS_LOCKADDR_LOCKADDR_SIZE_MASK)) +#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) +#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ + (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \ + AS_LOCKADDR_LOCKADDR_BASE_SHIFT) +#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \ + (((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \ + (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \ + AS_LOCKADDR_LOCKADDR_BASE_MASK)) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT (6) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK ((0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) +#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \ + (((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \ + ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK)) /* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ #ifdef CONFIG_MALI_DEBUG #undef GPU_IRQ_REG_ALL diff --git a/drivers/gpu/arm/bv_r32p1/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bv_r32p1/jm/mali_kbase_jm_defs.h index c490f1c77ed8..2a5827a2b4ee 100644 --- a/drivers/gpu/arm/bv_r32p1/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bv_r32p1/jm/mali_kbase_jm_defs.h @@ -342,19 +342,6 @@ enum kbase_atom_exit_protected_state { KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, }; -/** - * struct kbase_ext_res - Contains the info for external resources referred - * by an atom, which have been mapped on GPU side. - * @gpu_address: Start address of the memory region allocated for - * the resource from GPU virtual address space. - * @alloc: pointer to physical pages tracking object, set on - * mapping the external resource on GPU side. - */ -struct kbase_ext_res { - u64 gpu_address; - struct kbase_mem_phy_alloc *alloc; -}; - /** * struct kbase_jd_atom - object representing the atom, containing the complete * state and attributes of an atom. @@ -388,7 +375,8 @@ struct kbase_ext_res { * each allocation is read in order to enforce an * overall physical memory usage limit. * @nr_extres: number of external resources referenced by the atom. - * @extres: pointer to the location containing info about + * @extres: Pointer to @nr_extres VA regions containing the external + * resource allocation and other information. * @nr_extres external resources referenced by the atom. * @device_nr: indicates the coregroup with which the atom is * associated, when @@ -518,7 +506,7 @@ struct kbase_jd_atom { #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ u16 nr_extres; - struct kbase_ext_res *extres; + struct kbase_va_region **extres; u32 device_nr; u64 jc; diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase.h b/drivers/gpu/arm/bv_r32p1/mali_kbase.h index b4e50aef3c10..308b7867a5a9 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase.h @@ -45,7 +45,7 @@ #include #include -#include +#include #include /* @@ -64,7 +64,7 @@ #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" #include "mali_kbase_gpuprops.h" -#include +#include #if !MALI_USE_CSF #include "mali_kbase_debug_job_fault.h" #include "mali_kbase_jd_debugfs.h" @@ -374,16 +374,6 @@ static inline void kbase_free_user_buffer( } } -/** - * kbase_mem_copy_from_extres() - Copy from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @buf_data: Pointer to the information about external resources: - * pages pertaining to the external resource, number of - * pages to copy. - */ -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data); #if !MALI_USE_CSF int kbase_process_soft_job(struct kbase_jd_atom *katom); int kbase_prepare_soft_job(struct kbase_jd_atom *katom); diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_cache_policy.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_cache_policy.h index 7da33a6c07c5..cf6c6047d751 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_cache_policy.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_cache_policy.h @@ -27,7 +27,7 @@ #define _KBASE_CACHE_POLICY_H_ #include "mali_kbase.h" -#include +#include /** * kbase_cache_enabled - Choose the cache policy for a specific region diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_core_linux.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_core_linux.c index 9a1172b5b99b..48db1d6e6ae0 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_core_linux.c @@ -46,7 +46,7 @@ #include #endif #include -#include +#include #if !MALI_USE_CSF #include "mali_kbase_kinstr_jm.h" #endif @@ -808,16 +808,13 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, u64 flags = alloc->in.flags; u64 gpu_va; - rcu_read_lock(); - /* Don't allow memory allocation until user space has set up the - * tracking page (which sets kctx->process_mm). Also catches when we've - * forked. + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. */ - if (rcu_dereference(kctx->process_mm) != current->mm) { - rcu_read_unlock(); + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + if (!kbase_mem_allow_alloc(kctx)) return -EINVAL; - } - rcu_read_unlock(); if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) return -ENOMEM; @@ -848,8 +845,8 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, } #endif - reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, - alloc->in.extension, &flags, &gpu_va); + reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, alloc->in.extension, + &flags, &gpu_va, mmu_sync_info); if (!reg) return -ENOMEM; diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.c index d06380deca41..5854f8826e46 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.c @@ -64,6 +64,13 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev) } } +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +{ + kctx->as_nr = KBASEP_AS_NR_INVALID; + atomic_set(&kctx->refcount, 0); + return 0; +} + /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space * * @kbdev: The context for which to find a free address space @@ -182,9 +189,10 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) { struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(atomic_read(&kctx->refcount) != 0); @@ -196,6 +204,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); } void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.h index 334724f60837..a4a0a62d6e2c 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_ctx_sched.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,6 +59,17 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev); */ void kbase_ctx_sched_term(struct kbase_device *kbdev); +/** + * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling + * @kctx: The context to initialize + * + * This must be called during context initialization before any other context + * scheduling functions are called on @kctx + * + * Return: 0 + */ +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx); + /** * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context * @kctx: The context to which to retain a reference @@ -113,9 +124,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); * This function should be called when a context is being destroyed. The * context must no longer have any reference. If it has been assigned an * address space before then the AS will be unprogrammed. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. */ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h index 8a65316b0615..639d9a5bcd3d 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -262,7 +262,7 @@ struct kbase_fault { * it is NULL */ struct kbase_mmu_table { - u64 *mmu_teardown_pages; + u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL]; struct mutex mmu_lock; phys_addr_t pgd; u8 group_id; @@ -544,8 +544,11 @@ struct kbase_devfreq_opp { * @entry_set_ate: program the pte to be a valid address translation entry to * encode the physical address of the actual page being mapped. * @entry_set_pte: program the pte to be a valid entry to encode the physical - * address of the next lower level page table. - * @entry_invalidate: clear out or invalidate the pte. + * address of the next lower level page table and also update + * the number of valid entries. + * @entries_invalidate: clear out or invalidate a range of ptes. + * @get_num_valid_entries: returns the number of valid entries for a specific pgd. + * @set_num_valid_entries: sets the number of valid entries for a specific pgd * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. */ struct kbase_mmu_mode { @@ -561,7 +564,10 @@ struct kbase_mmu_mode { void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, unsigned long flags, int level); void (*entry_set_pte)(u64 *entry, phys_addr_t phy); - void (*entry_invalidate)(u64 *entry); + void (*entries_invalidate)(u64 *entry, u32 count); + unsigned int (*get_num_valid_entries)(u64 *pgd); + void (*set_num_valid_entries)(u64 *pgd, + unsigned int num_of_valid_entries); unsigned long flags; }; @@ -1536,11 +1542,13 @@ struct kbase_sub_alloc { * is scheduled in and an atom is pulled from the context's per * slot runnable tree in JM GPU or GPU command queue * group is programmed on CSG slot in CSF GPU. - * @mm_update_lock: lock used for handling of special tracking page. * @process_mm: Pointer to the memory descriptor of the process which * created the context. Used for accounting the physical * pages used for GPU allocations, done for the context, - * to the memory consumed by the process. + * to the memory consumed by the process. A reference is taken + * on this descriptor for the Userspace created contexts so that + * Kbase can safely access it to update the memory usage counters. + * The reference is dropped on context termination. * @gpu_va_end: End address of the GPU va space (in 4KB page units) * @jit_va: Indicates if a JIT_VA zone has been created. * @mem_profile_data: Buffer containing the profiling information provided by @@ -1672,7 +1680,10 @@ struct kbase_sub_alloc { * @limited_core_mask: The mask that is applied to the affinity in case of atoms * marked with BASE_JD_REQ_LIMITED_CORE_MASK. * @platform_data: Pointer to platform specific per-context data. - * + * @task: Pointer to the task structure of the main thread of the process + * that created the Kbase context. It would be set only for the + * contexts created by the Userspace and not for the contexts + * created internally by the Kbase.* * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. * Up to one context can be created for each client that opens the device file @@ -1762,8 +1773,7 @@ struct kbase_context { atomic_t refcount; - spinlock_t mm_update_lock; - struct mm_struct __rcu *process_mm; + struct mm_struct *process_mm; u64 gpu_va_end; bool jit_va; @@ -1825,6 +1835,8 @@ struct kbase_context { #if !MALI_USE_CSF void *platform_data; #endif + + struct task_struct *task; }; #ifdef CONFIG_MALI_CINSTR_GWT @@ -1853,17 +1865,15 @@ struct kbasep_gwt_list_element { * to a @kbase_context. * @ext_res_node: List head for adding the metadata to a * @kbase_context. - * @alloc: The physical memory allocation structure - * which is mapped. - * @gpu_addr: The GPU virtual address the resource is - * mapped to. + * @reg: External resource information, containing + * the corresponding VA region * @ref: Reference count. * * External resources can be mapped into multiple contexts as well as the same * context multiple times. - * As kbase_va_region itself isn't refcounted we can't attach our extra - * information to it as it could be removed under our feet leaving external - * resources pinned. + * As kbase_va_region is refcounted, we guarantee that it will be available + * for the duration of the external resource, meaning it is sufficient to use + * it to rederive any additional data, like the GPU address. * This metadata structure binds a single external resource to a single * context, ensuring that per context mapping is tracked separately so it can * be overridden when needed and abuses by the application (freeing the resource @@ -1871,8 +1881,7 @@ struct kbasep_gwt_list_element { */ struct kbase_ctx_ext_res_meta { struct list_head ext_res_node; - struct kbase_mem_phy_alloc *alloc; - u64 gpu_addr; + struct kbase_va_region *reg; u32 ref; }; @@ -1902,6 +1911,23 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) return false; } +/** + * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock + * region, as a logarithm + * + * @gpu_props: GPU properties + * + * Return: the minimum size of the MMU lock region as dictated by the corresponding + * arch spec. + */ +static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props) +{ + if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >= + GPU_ID2_MODEL_MAKE(12, 0)) + return 12; /* 4 kB */ + + return 15; /* 32 kB */ +} /* Conversion helpers for setting up high resolution timers */ #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_dummy_job_wa.c index 1e91ba0b2681..bdc5d6d5b316 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_dummy_job_wa.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_dummy_job_wa.c @@ -281,6 +281,11 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) int err; struct kbase_context *kctx; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kbdev->fw_load_lock); if (!wa_blob_load_needed(kbdev)) @@ -375,8 +380,8 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) nr_pages = PFN_UP(blob->size); flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; - va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, - 0, &flags, &gpu_va); + va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, + &gpu_va, mmu_sync_info); if (!va_region) { dev_err(kbdev->dev, "Failed to allocate for blob\n"); diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.c index d5ae261598e8..04c9c5c7885e 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.h index b20b99b654a7..668169b3b86d 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops.h @@ -82,6 +82,8 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev); * @kbdev: The kbase device * * Fills prop_buffer with the GPU properties for user space to read. + * + * Return: MALI_ERROR_NONE on success. Any other value indicates failure. */ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops_types.h index 02705a072c5e..1bb2be9bf08b 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_gpuprops_types.h @@ -26,7 +26,7 @@ #ifndef _KBASE_GPUPROPS_TYPES_H_ #define _KBASE_GPUPROPS_TYPES_H_ -#include +#include #define KBASE_GPU_SPEED_MHZ 123 #define KBASE_GPU_PC_SIZE_LOG2 24U diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_gwt.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_gwt.h index 30de43de06df..c6be5e176938 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_gwt.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_gwt.h @@ -23,7 +23,7 @@ #define _KBASE_GWT_H #include -#include +#include /** * kbase_gpu_gwt_start - Start the GPU write tracking diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwaccess_jm.h index 8689647aaa8b..d0207f74c969 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwaccess_jm.h @@ -299,4 +299,21 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, */ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); +/** + * kbase_backend_slot_kctx_purge_locked - Perform a purge on the slot_rb tracked + * kctx + * + * @kbdev: Device pointer + * @kctx: The kbase context that needs to be purged from slot_rb[] + * + * For JM GPUs, the L1 read only caches may need a start_flush invalidation, + * potentially on all slots (even if the kctx was only using a single slot), + * following a context termination or address-space ID recycle. This function + * performs a clean-up purge on the given kctx which if it has been tracked by + * slot_rb[] objects. + * + * Caller must hold kbase_device->hwaccess_lock. + */ +void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx); + #endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_csf_if_fw.c index 78a8dc0f505b..cae515c7a4e9 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -28,7 +28,7 @@ #include #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" -#include +#include #include "csf/mali_kbase_csf_firmware.h" #include "mali_kbase_hwcnt_backend_csf_if_fw.h" diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_jm.c index 64001b157abb..aba038ba15c9 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_backend_jm.c @@ -519,6 +519,11 @@ static int kbasep_hwcnt_backend_jm_dump_alloc( u64 flags; u64 nr_pages; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + WARN_ON(!info); WARN_ON(!kctx); WARN_ON(!gpu_dump_va); @@ -531,7 +536,8 @@ static int kbasep_hwcnt_backend_jm_dump_alloc( nr_pages = PFN_UP(info->dump_bytes); - reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, + mmu_sync_info); if (!reg) return -ENOMEM; diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_legacy.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_legacy.c index 0687253c0c74..104b461bf4af 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_legacy.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_hwcnt_legacy.c @@ -23,7 +23,7 @@ #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" #include "mali_kbase_hwcnt_gpu.h" -#include +#include #include #include diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_jd.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_jd.c index 924475712e97..54bf9f0172a2 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_jd.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_jd.c @@ -187,13 +187,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - kbase_unmap_external_resource(katom->kctx, reg, alloc); + kbase_unmap_external_resource(katom->kctx, katom->extres[res_no]); } kfree(katom->extres); katom->extres = NULL; @@ -209,7 +203,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) { - int err_ret_val = -EINVAL; + int err = -EINVAL; u32 res_no; #ifdef CONFIG_MALI_DMA_FENCE struct kbase_dma_fence_resv_info info = { @@ -242,21 +236,10 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!katom->extres) return -ENOMEM; - /* copy user buffer to the end of our real buffer. - * Make sure the struct sizes haven't changed in a way - * we don't support - */ - BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); - input_extres = (struct base_external_resource *) - (((unsigned char *)katom->extres) + - (sizeof(*katom->extres) - sizeof(*input_extres)) * - katom->nr_extres); - - if (copy_from_user(input_extres, - get_compat_pointer(katom->kctx, user_atom->extres_list), - sizeof(*input_extres) * katom->nr_extres) != 0) { - err_ret_val = -EINVAL; - goto early_err_out; + input_extres = kmalloc_array(katom->nr_extres, sizeof(*input_extres), GFP_KERNEL); + if (!input_extres) { + err = -ENOMEM; + goto failed_input_alloc; } #ifdef CONFIG_MALI_DMA_FENCE @@ -270,39 +253,45 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #endif GFP_KERNEL); if (!info.resv_objs) { - err_ret_val = -ENOMEM; - goto early_err_out; + err = -ENOMEM; + goto failed_input_copy; } info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), sizeof(unsigned long), GFP_KERNEL); if (!info.dma_fence_excl_bitmap) { - err_ret_val = -ENOMEM; - goto early_err_out; + err = -ENOMEM; + goto failed_input_copy; } } #endif /* CONFIG_MALI_DMA_FENCE */ + if (copy_from_user(input_extres, + get_compat_pointer(katom->kctx, user_atom->extres_list), + sizeof(*input_extres) * katom->nr_extres) != 0) { + err = -EINVAL; + goto failed_input_copy; + } + /* Take the processes mmap lock */ down_read(kbase_mem_get_process_mmap_lock()); /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { - struct base_external_resource *res = &input_extres[res_no]; + struct base_external_resource *user_res = &input_extres[res_no]; struct kbase_va_region *reg; - struct kbase_mem_phy_alloc *alloc; #ifdef CONFIG_MALI_DMA_FENCE bool exclusive; - exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + + exclusive = (user_res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) ? true : false; #endif reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, - res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + katom->kctx, user_res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); /* did we find a matching region object? */ - if (kbase_is_region_invalid_or_free(reg)) { + if (unlikely(kbase_is_region_invalid_or_free(reg))) { /* roll back */ goto failed_loop; } @@ -312,12 +301,9 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; } - alloc = kbase_map_external_resource(katom->kctx, reg, - current->mm); - if (!alloc) { - err_ret_val = -EINVAL; + err = kbase_map_external_resource(katom->kctx, reg, current->mm); + if (err) goto failed_loop; - } #ifdef CONFIG_MALI_DMA_FENCE if (implicit_sync && @@ -333,15 +319,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st exclusive); } #endif /* CONFIG_MALI_DMA_FENCE */ - - /* finish with updating out array with the data we found */ - /* NOTE: It is important that this is the last thing we do (or - * at least not before the first write) as we overwrite elements - * as we loop and could be overwriting ourself, so no writes - * until the last read for an element. - */ - katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = alloc; + katom->extres[res_no] = reg; } /* successfully parsed the extres array */ /* drop the vm lock now */ @@ -364,12 +342,13 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(info.dma_fence_excl_bitmap); } #endif /* CONFIG_MALI_DMA_FENCE */ + /* Free the buffer holding data from userspace */ + kfree(input_extres); /* all done OK */ return 0; /* error handling section */ - #ifdef CONFIG_MALI_DMA_FENCE failed_dma_fence_setup: /* Lock the processes mmap lock */ @@ -379,19 +358,23 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kbase_gpu_vm_lock(katom->kctx); #endif - failed_loop: - /* undo the loop work */ +failed_loop: + /* undo the loop work. We are guaranteed to have access to the VA region + * as we hold a reference to it until it's unmapped + */ while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg = katom->extres[res_no]; - kbase_unmap_external_resource(katom->kctx, NULL, alloc); + kbase_unmap_external_resource(katom->kctx, reg); } kbase_gpu_vm_unlock(katom->kctx); /* Release the processes mmap lock */ up_read(kbase_mem_get_process_mmap_lock()); - early_err_out: +failed_input_copy: + kfree(input_extres); +failed_input_alloc: kfree(katom->extres); katom->extres = NULL; #ifdef CONFIG_MALI_DMA_FENCE @@ -400,7 +383,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(info.dma_fence_excl_bitmap); } #endif - return err_ret_val; + return err; } static inline void jd_resolve_dep(struct list_head *out_list, diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_jd_debugfs.c index 6378931cea71..854b9d72bf54 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_jd_debugfs.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_jd_debugfs.c @@ -28,7 +28,7 @@ #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) #include #endif -#include +#include struct kbase_jd_debugfs_depinfo { u8 id; diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_js.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_js.c index e59e22c676cb..1e7e88f07ef3 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_js.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_js.c @@ -661,6 +661,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&kbdev->js_data.runpool_mutex); } + + kbase_ctx_sched_remove_ctx(kctx); } /** @@ -3830,4 +3832,3 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr } return out_jd_priority; } - diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.c index 1b23b4179267..0dc251e43780 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.c @@ -25,7 +25,7 @@ */ #include "mali_kbase_kinstr_jm.h" -#include +#include #include "mali_kbase.h" #include "mali_kbase_linux.h" diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.h index 2c904e57ad22..09eac1a094bb 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_kinstr_jm.h @@ -63,7 +63,7 @@ #ifndef _KBASE_KINSTR_JM_H_ #define _KBASE_KINSTR_JM_H_ -#include +#include #ifdef __KERNEL__ #include diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c index 1a8e6d91b88f..e8a22bdaf04c 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -367,6 +367,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev, struct rb_node *rbnext; struct kbase_va_region *next = NULL; struct rb_root *reg_rbtree = NULL; + struct kbase_va_region *orig_reg = reg; int merged_front = 0; int merged_back = 0; @@ -464,6 +465,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev, rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); } + /* This operation is always safe because the function never frees + * the region. If the region has been merged to both front and back, + * then it's the previous region that is supposed to be freed. + */ + orig_reg->start_pfn = 0; + out: return; } @@ -747,6 +754,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); @@ -1499,7 +1510,9 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) KBASE_EXPORT_TEST_API(kbase_free_alloced_region); -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align, + enum kbase_caller_mmu_sync_info mmu_sync_info) { int err; size_t i = 0; @@ -1543,9 +1556,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 alloc->imported.alias.aliased[i].length, reg->flags & gwt_mask, kctx->as_nr, - group_id); + group_id, mmu_sync_info); if (err) - goto bad_insert; + goto bad_aliased_insert; /* Note: mapping count is tracked at alias * creation time @@ -1556,10 +1569,10 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 kctx->aliasing_sink_page, alloc->imported.alias.aliased[i].length, (reg->flags & mask & gwt_mask) | attr, - group_id); + group_id, mmu_sync_info); if (err) - goto bad_insert; + goto bad_aliased_insert; } } } else { @@ -1570,7 +1583,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, - group_id); + group_id, mmu_sync_info); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(alloc); @@ -1596,17 +1609,23 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 reg->nr_pages - reg->gpu_alloc->nents, (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, - KBASE_MEM_GROUP_SINK); + KBASE_MEM_GROUP_SINK, mmu_sync_info); if (err) goto bad_insert; } return err; +bad_aliased_insert: + while (i-- > 0) { + + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + reg->nr_pages, kctx->as_nr); + + } + + bad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, reg->nr_pages, - kctx->as_nr); kbase_remove_va_region(kctx->kbdev, reg); @@ -1615,8 +1634,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 KBASE_EXPORT_TEST_API(kbase_gpu_mmap); -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable); +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable); int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -1632,13 +1651,13 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) switch (reg->gpu_alloc->type) { case KBASE_MEM_TYPE_ALIAS: /* Fall-through */ case KBASE_MEM_TYPE_IMPORTED_UMM: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, reg->nr_pages, kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + reg->gpu_alloc->pages, reg->nr_pages, kctx->as_nr); break; default: - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, kbase_reg_current_backed_size(reg), - kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + reg->gpu_alloc->pages, kbase_reg_current_backed_size(reg), + kctx->as_nr); break; } @@ -1659,8 +1678,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* The allocation could still have active mappings. */ if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, - (reg->flags & KBASE_REG_GPU_WR)); + kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, reg, + (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); } } } @@ -1810,6 +1829,7 @@ void kbase_sync_single(struct kbase_context *kctx, src = ((unsigned char *)kmap(gpu_page)) + offset; dst = ((unsigned char *)kmap(cpu_page)) + offset; } + memcpy(dst, src, size); kunmap(gpu_page); kunmap(cpu_page); @@ -2227,7 +2247,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, &kctx->mem_pools.large[alloc->group_id], nr_lp * (SZ_2M / SZ_4K), tp, - true); + true, kctx->task); if (res > 0) { nr_left -= res; @@ -2281,7 +2301,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, err = kbase_mem_pool_grow( &kctx->mem_pools.large[alloc->group_id], - 1); + 1, kctx->task); if (err) break; } while (1); @@ -2328,7 +2348,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_left) { res = kbase_mem_pool_alloc_pages( &kctx->mem_pools.small[alloc->group_id], - nr_left, tp, false); + nr_left, tp, false, kctx->task); if (res <= 0) goto alloc_failed; } @@ -2820,6 +2840,13 @@ KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); /** * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. * @alloc: The allocation for the imported user buffer. + * + * This must only be called when terminating an alloc, when its refcount + * (number of users) has become 0. This also ensures it is only called once all + * CPU mappings have been closed. + * + * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active + * allocations */ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); #endif @@ -3740,7 +3767,8 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_alloc_info *info, struct kbase_va_region *reg, - struct kbase_sub_alloc **prealloc_sas) + struct kbase_sub_alloc **prealloc_sas, + enum kbase_caller_mmu_sync_info mmu_sync_info) { size_t delta; size_t pages_required; @@ -3802,7 +3830,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, spin_unlock(&kctx->mem_partials_lock); kbase_gpu_vm_unlock(kctx); - ret = kbase_mem_pool_grow(pool, pool_delta); + ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); kbase_gpu_vm_lock(kctx); if (ret) @@ -3837,7 +3865,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, spin_unlock(&kctx->mem_partials_lock); ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, - old_size); + old_size, mmu_sync_info); /* * The grow failed so put the allocation back in the * pool and return failure. @@ -4052,6 +4080,11 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; + /* Calls to this function are inherently synchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; + #if MALI_USE_CSF lockdep_assert_held(&kctx->csf.kcpu_queues.lock); #else @@ -4144,7 +4177,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * so any state protected by that lock might need to be * re-evaluated if more code is added here in future. */ - ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, + mmu_sync_info); #if MALI_JIT_PRESSURE_LIMIT_BASE if (!ignore_pressure_limit) @@ -4207,8 +4241,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_unlock(kctx); - reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, - info->extension, &flags, &gpu_addr); + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, + &flags, &gpu_addr, mmu_sync_info); if (!reg) { /* Most likely not enough GPU virtual space left for * the new JIT allocation. @@ -4500,7 +4534,23 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, #if MALI_USE_CSF static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) { - if (alloc->nents) { + /* In CSF builds, we keep pages pinned until the last reference is + * released on the alloc. A refcount of 0 also means we can be sure + * that all CPU mappings have been closed on this alloc, and no more + * mappings of it will be created. + * + * Further, the WARN() below captures the restriction that this + * function will not handle anything other than the alloc termination + * path, because the caller of kbase_mem_phy_alloc_put() is not + * required to hold the kctx's reg_lock, and so we could not handle + * removing an existing CPU mapping here. + * + * Refer to this function's kernel-doc comments for alternatives for + * unpinning a User buffer. + */ + + if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, + "must only be called on terminating an allocation")) { struct page **pages = alloc->imported.user_buf.pages; long i; @@ -4508,6 +4558,8 @@ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) for (i = 0; i < alloc->nents; i++) put_page(pages[i]); + + alloc->nents = 0; } } #endif @@ -4521,6 +4573,10 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, struct mm_struct *mm = alloc->imported.user_buf.mm; long pinned_pages; long i; + int write; + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + + lockdep_assert_held(&kctx->reg_lock); if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) return -EINVAL; @@ -4541,35 +4597,35 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, alloc->imported.user_buf.nr_pages, #if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + write ? FOLL_WRITE : 0, pages, NULL); #else - reg->flags & KBASE_REG_GPU_WR, + write, 0, pages, NULL); #endif #elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, + write, 0, pages, NULL); #elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + write ? FOLL_WRITE : 0, pages, NULL); #elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + write ? FOLL_WRITE : 0, pages, NULL, NULL); #else - pinned_pages = get_user_pages_remote(mm, + pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + write ? FOLL_WRITE : 0, pages, NULL, NULL); #endif @@ -4577,6 +4633,9 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE return pinned_pages; if (pinned_pages != alloc->imported.user_buf.nr_pages) { + /* Above code already ensures there will not have been a CPU + * mapping by ensuring alloc->nents is 0 + */ for (i = 0; i < pinned_pages; i++) put_page(pages[i]); return -ENOMEM; @@ -4590,46 +4649,74 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct kbase_va_region *reg) { - long pinned_pages; + int err; + long pinned_pages = 0; struct kbase_mem_phy_alloc *alloc; struct page **pages; struct tagged_addr *pa; long i; - unsigned long address; struct device *dev; - unsigned long offset; - unsigned long local_size; unsigned long gwt_mask = ~0; - int err = kbase_jd_user_buf_pin_pages(kctx, reg); + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + lockdep_assert_held(&kctx->reg_lock); + + err = kbase_jd_user_buf_pin_pages(kctx, reg); if (err) return err; alloc = reg->gpu_alloc; pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; pinned_pages = alloc->nents; pages = alloc->imported.user_buf.pages; dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; + + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ + + + + /* The user buffer could already have been previously pinned before + * entering this function, and hence there could potentially be CPU + * mappings of it + */ + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; - unsigned long min; + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) goto unwind; alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; } #ifdef CONFIG_MALI_CINSTR_GWT @@ -4638,19 +4725,35 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, #endif err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - pa, kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, - alloc->group_id); + pa, kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id, mmu_sync_info); if (err == 0) return 0; /* fall down */ unwind: alloc->nents = 0; + + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This is precautionary measure in case a GPU job has taken + * advantage of a partially GPU-mapped range to write and corrupt the + * content of memory, either inside or outside the imported region. + * + * Notice that this error recovery path doesn't try to be optimal and just + * flushes the entire page range. + */ + while (i--) { - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif } while (++i < pinned_pages) { @@ -4665,22 +4768,113 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT * have a corresponding call to kbase_jd_user_buf_unpin_pages(). */ -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable) { long i; struct page **pages; + unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; unsigned long size = alloc->imported.user_buf.size; + lockdep_assert_held(&kctx->reg_lock); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); pages = alloc->imported.user_buf.pages; + +#if !MALI_USE_CSF + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); +#else + CSTD_UNUSED(reg); +#endif + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; + unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page); + /* Notice: this is a temporary variable that is used for DMA sync + * operations, and that could be incremented by an offset if the + * current page contains both imported and non-imported memory + * sub-regions. + * + * It is valid to add an offset to this value, because the offset + * is always kept within the physically contiguous dma-mapped range + * and there's no need to translate to physical address to offset it. + * + * This variable is not going to be used for the actual DMA unmap + * operation, that shall always use the original DMA address of the + * whole memory page. + */ + + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - offset_within_page + * | |/ + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } + + /* Notice: use the original DMA address to unmap the whole memory page. */ + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + #endif + + + + if (writeable) set_page_dirty_lock(pages[i]); #if !MALI_USE_CSF @@ -4688,7 +4882,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, pages[i] = NULL; #endif - size -= local_size; + size -= imported_size; } #if !MALI_USE_CSF alloc->nents = 0; @@ -4735,11 +4929,11 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, return 0; } -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm) +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm) { - int err; + int err = 0; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; lockdep_assert_held(&kctx->reg_lock); @@ -4748,7 +4942,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents)) - goto exit; + return -EINVAL; reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; if (reg->gpu_alloc->imported.user_buf @@ -4756,7 +4950,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( err = kbase_jd_user_buf_map(kctx, reg); if (err) { reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto exit; + return err; } } } @@ -4764,21 +4958,29 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_UMM: { err = kbase_mem_umm_map(kctx, reg); if (err) - goto exit; + return err; break; } default: - goto exit; + WARN(1, "Invalid external resource GPU allocation type (%x) on mapping", + alloc->type); + return -EINVAL; } - return kbase_mem_phy_alloc_get(reg->gpu_alloc); -exit: - return NULL; + kbase_va_region_alloc_get(kctx, reg); + kbase_mem_phy_alloc_get(alloc); + return err; } -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) { + /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the + * unmapping operation. + */ + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + + lockdep_assert_held(&kctx->reg_lock); + switch (alloc->type) { case KBASE_MEM_TYPE_IMPORTED_UMM: { kbase_mem_umm_unmap(kctx, reg, alloc); @@ -4790,26 +4992,32 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, if (alloc->imported.user_buf.current_mapping_usage_count == 0) { bool writeable = true; - if (!kbase_is_region_invalid_or_free(reg) && - reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - kbase_reg_current_backed_size(reg), - kctx->as_nr); + if (!kbase_is_region_invalid_or_free(reg)) { + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, + kbase_reg_current_backed_size(reg), + kctx->as_nr); + } - if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) + if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) writeable = false; - kbase_jd_user_buf_unmap(kctx, alloc, writeable); + kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); } } break; default: - break; + WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", + alloc->type); + return; } kbase_mem_phy_alloc_put(alloc); + kbase_va_region_alloc_put(kctx, reg); +} + +static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) +{ + return reg->start_pfn << PAGE_SHIFT; } struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( @@ -4825,7 +5033,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * metadata which matches the region which is being acquired. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { - if (walker->gpu_addr == gpu_addr) { + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { meta = walker; meta->ref++; break; @@ -4837,8 +5045,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( struct kbase_va_region *reg; /* Find the region */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) goto failed; @@ -4846,18 +5053,18 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( meta = kzalloc(sizeof(*meta), GFP_KERNEL); if (!meta) goto failed; - /* * Fill in the metadata object and acquire a reference * for the physical resource. */ - meta->alloc = kbase_map_external_resource(kctx, reg, NULL); - meta->ref = 1; + meta->reg = reg; - if (!meta->alloc) + /* Map the external resource to the GPU allocation of the region + * and acquire the reference to the VA region + */ + if (kbase_map_external_resource(kctx, meta->reg, NULL)) goto fail_map; - - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + meta->ref = 1; list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); } @@ -4882,7 +5089,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) * metadata which matches the region which is being released. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) - if (walker->gpu_addr == gpu_addr) + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) return walker; return NULL; @@ -4891,14 +5098,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) static void release_sticky_resource_meta(struct kbase_context *kctx, struct kbase_ctx_ext_res_meta *meta) { - struct kbase_va_region *reg; - - /* Drop the physical memory reference and free the metadata. */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, - meta->gpu_addr); - - kbase_unmap_external_resource(kctx, reg, meta->alloc); + kbase_unmap_external_resource(kctx, meta->reg); list_del(&meta->ext_res_node); kfree(meta); } diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h index cec0e6286a10..a20855012a32 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,7 @@ #endif #include -#include +#include #include #include "mali_kbase_pm.h" #include "mali_kbase_defs.h" @@ -889,7 +889,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * @pages: Pointer to array where the physical address of the allocated * pages will be stored. * @partial_allowed: If fewer pages allocated is allowed - * + * @page_owner: Pointer to the task that created the Kbase context for which + * the pages are being allocated. It can be NULL if the pages + * won't be associated with any Kbase context. * Like kbase_mem_pool_alloc() but optimized for allocating many pages. * * Return: @@ -905,7 +907,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. */ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed); + struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner); /** * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool @@ -1017,13 +1019,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); * kbase_mem_pool_grow - Grow the pool * @pool: Memory pool to grow * @nr_to_grow: Number of pages to add to the pool - * + * @page_owner: Pointer to the task that created the Kbase context for which + * the memory pool is being grown. It can be NULL if the pages + * to be allocated won't be associated with any Kbase context. * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to * become larger than the maximum size specified. * * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages */ -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner); /** * kbase_mem_pool_trim - Grow or shrink the pool to a new size @@ -1187,7 +1191,9 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size * * Call kbase_add_va_region() and map the region on the GPU. */ -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align, + enum kbase_caller_mmu_sync_info mmu_sync_info); /** * Remove the region from the GPU and unregister it. @@ -1241,6 +1247,7 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); +#if defined(CONFIG_MALI_VECTOR_DUMP) /** * kbase_mmu_dump() - Dump the MMU tables to a buffer. * @@ -1260,6 +1267,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); * (including if the @c nr_pages is too small) */ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); +#endif /** * kbase_sync_now - Perform cache maintenance on a memory region @@ -1796,25 +1804,28 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx); /** * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. - * @reg: The region to map. + * @reg: External resource to map. * @locked_mm: The mm_struct which has been locked for this operation. * - * Return: The physical allocation which backs the region on success or NULL - * on failure. + * On successful mapping, the VA region and the gpu_alloc refcounts will be + * increased, making it safe to use and store both values directly. + * + * Return: Zero on success, or negative error code. */ -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm); +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm); /** * kbase_unmap_external_resource - Unmap an external resource from the GPU. * @kctx: kbase context. - * @reg: The region to unmap or NULL if it has already been released. - * @alloc: The physical allocation being unmapped. + * @reg: VA region corresponding to external resource + * + * On successful unmapping, the VA region and the gpu_alloc refcounts will + * be decreased. If the refcount reaches zero, both @reg and the corresponding + * allocation may be freed, so using them after returning from this function + * requires the caller to explicitly check their state. */ -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); - +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg); /** * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer. @@ -2113,4 +2124,33 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; } +/* + * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process + */ +static inline void kbase_mem_mmgrab(void) +{ + /* This merely takes a reference on the memory descriptor structure + * i.e. mm_struct of current process and not on its address space and + * so won't block the freeing of address space on process exit. + */ +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + atomic_inc(¤t->mm->mm_count); +#else + mmgrab(current->mm); +#endif +} +/** + * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed + * @kctx: Pointer to kbase context + * + * Don't allow the allocation of GPU memory if the ioctl has been issued + * from the forked child process using the mali device file fd inherited from + * the parent process. + * + * Return: true if allocation is allowed. + */ +static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx) +{ + return (kctx->process_mm == current->mm); +} #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c index b41425ecc95b..d82bc016a119 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,10 +39,11 @@ #include #include +#include #include #include #include -#include +#include #include #include #include @@ -293,9 +294,9 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, */ } -struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, - u64 va_pages, u64 commit_pages, - u64 extension, u64 *flags, u64 *gpu_va) +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, + u64 extension, u64 *flags, u64 *gpu_va, + enum kbase_caller_mmu_sync_info mmu_sync_info) { int zone; struct kbase_va_region *reg; @@ -471,7 +472,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, *gpu_va = (u64) cookie; } else /* we control the VA */ { - if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) { + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1, + mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); kbase_gpu_vm_unlock(kctx); goto no_mmap; @@ -829,6 +831,11 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) struct kbase_context *kctx = gpu_alloc->imported.native.kctx; int err = 0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kctx->reg_lock); mutex_lock(&kctx->jit_evict_lock); @@ -858,9 +865,9 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) * pre-eviction size. */ if (!err) - err = kbase_mem_grow_gpu_mapping(kctx, - gpu_alloc->reg, - gpu_alloc->evicted, 0); + err = kbase_mem_grow_gpu_mapping( + kctx, gpu_alloc->reg, + gpu_alloc->evicted, 0, mmu_sync_info); gpu_alloc->evicted = 0; } @@ -915,6 +922,15 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations + * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. + * This would usually include JIT allocations, Tiler heap related allocations + * & GPU queue ringbuffer and none of them needs to be explicitly marked + * as evictable by Userspace. + */ + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + /* Is the region being transitioning between not needed and needed? */ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; @@ -1220,6 +1236,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc; unsigned long gwt_mask = ~0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kctx->reg_lock); alloc = reg->gpu_alloc; @@ -1246,14 +1267,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, - kctx->as_nr, - alloc->group_id); + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id, mmu_sync_info); if (err) goto bad_insert; @@ -1266,13 +1284,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx, * Assume alloc->nents is the number of actual pages in the * dma-buf memory. */ - err = kbase_mmu_insert_single_page(kctx, - reg->start_pfn + alloc->nents, - kctx->aliasing_sink_page, - reg->nr_pages - alloc->nents, - (reg->flags | KBASE_REG_GPU_RD) & - ~KBASE_REG_GPU_WR, - KBASE_MEM_GROUP_SINK); + err = kbase_mmu_insert_single_page( + kctx, reg->start_pfn + alloc->nents, + kctx->aliasing_sink_page, reg->nr_pages - alloc->nents, + (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, + KBASE_MEM_GROUP_SINK, mmu_sync_info); if (err) goto bad_pad_insert; } @@ -1280,11 +1296,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, return 0; bad_pad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - alloc->nents, - kctx->as_nr); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + alloc->nents, kctx->as_nr); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1312,11 +1325,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { int err; - err = kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - reg->nr_pages, - kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, kctx->as_nr); WARN_ON(err); } @@ -1518,6 +1528,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + struct tagged_addr *pa; + struct device *dev; int write; /* Flag supported only for dma-buf imported memory */ @@ -1658,31 +1670,48 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE reg->gpu_alloc->nents = 0; reg->extension = 0; + pa = kbase_get_gpu_phy_pages(reg); + dev = kctx->kbdev->dev; + if (pages) { - struct device *dev = kctx->kbdev->dev; - unsigned long local_size = user_buf->size; - unsigned long offset = user_buf->address & ~PAGE_MASK; - struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ + for (i = 0; i < faulted_pages; i++) { dma_addr_t dma_addr; - unsigned long min; + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + #endif - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + } reg->gpu_alloc->nents = faulted_pages; @@ -1691,13 +1720,29 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE return reg; unwind_dma_map: + + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This precautionary measure is kept here to keep this code + * aligned with kbase_jd_user_buf_map() to allow for a potential refactor + * in the future. + */ while (i--) { - dma_unmap_page(kctx->kbdev->dev, - user_buf->dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = user_buf->dma_addrs[i]; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif } fault_mismatch: if (pages) { + /* In this case, the region was not yet in the region tracker, + * and so there are no CPU mappings to remove before we unpin + * the page + */ for (i = 0; i < faulted_pages; i++) put_page(pages[i]); } @@ -1710,7 +1755,6 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE no_region: bad_size: return NULL; - } @@ -1722,6 +1766,10 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 gpu_va; size_t i; bool coherent; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(flags); @@ -1748,6 +1796,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, if (!nents) goto bad_nents; + if (stride > U64_MAX / nents) + goto bad_size; + if ((nents * stride) > (U64_MAX / PAGE_SIZE)) /* 64-bit address range is the max */ goto bad_size; @@ -1896,7 +1947,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, #else if (1) { #endif - if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { + if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1, + mmu_sync_info) != 0) { dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); goto no_mmap; } @@ -1941,6 +1993,11 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, { struct kbase_va_region *reg; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(gpu_va); KBASE_DEBUG_ASSERT(va_pages); @@ -1973,7 +2030,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - + if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { + dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); + goto bad_flags; + } if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { dev_warn(kctx->kbdev->dev, "padding is only supported for UMM"); @@ -2040,7 +2100,8 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { /* we control the VA, mmap now to the GPU */ - if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) + if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) != + 0) goto no_gpu_va; /* return real GPU VA */ *gpu_va = reg->start_pfn << PAGE_SHIFT; @@ -2074,8 +2135,9 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, } int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) + struct kbase_va_region *reg, u64 new_pages, + u64 old_pages, + enum kbase_caller_mmu_sync_info mmu_sync_info) { struct tagged_addr *phy_pages; u64 delta = new_pages - old_pages; @@ -2086,8 +2148,10 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, /* Map the new pages into the GPU */ phy_pages = kbase_get_gpu_phy_pages(reg); ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + old_pages, phy_pages + old_pages, delta, - reg->flags, kctx->as_nr, reg->gpu_alloc->group_id); + reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags, + kctx->as_nr, reg->gpu_alloc->group_id, + mmu_sync_info); return ret; } @@ -2125,10 +2189,11 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, u64 const new_pages, u64 const old_pages) { u64 delta = old_pages - new_pages; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; int ret = 0; - ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + new_pages, delta, kctx->as_nr); + ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, + alloc->pages + new_pages, delta, kctx->as_nr); return ret; } @@ -2141,6 +2206,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) struct kbase_va_region *reg; bool read_locked = false; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(gpu_addr != 0); @@ -2191,6 +2261,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (reg->flags & KBASE_REG_DONT_NEED) goto out_unlock; + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED /* Reject resizing commit size */ if (reg->flags & KBASE_REG_PF_GROW) @@ -2232,8 +2305,8 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) /* No update required for CPU mappings, that's done on fault. */ /* Update GPU mapping. */ - res = kbase_mem_grow_gpu_mapping(kctx, reg, - new_pages, old_pages); + res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, + old_pages, mmu_sync_info); /* On error free the new pages */ if (res) { @@ -2561,7 +2634,6 @@ static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) while (kbase_jit_evict(kctx)) ; } -#endif static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, @@ -2578,9 +2650,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; -#ifdef CONFIG_MALI_VECTOR_DUMP kbase_free_unused_jit_allocations(kctx); -#endif kaddr = kbase_mmu_dump(kctx, nr_pages); @@ -2628,7 +2698,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, out: return err; } - +#endif void kbase_os_mem_map_lock(struct kbase_context *kctx) { @@ -2652,6 +2722,11 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, struct kbase_va_region *reg; int err = 0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + *aligned_offset = 0; dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); @@ -2686,7 +2761,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, *nr_pages = kbase_reg_current_backed_size(reg); if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, - reg->nr_pages, 1) != 0) { + reg->nr_pages, 1, mmu_sync_info) != 0) { dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); /* Unable to map in GPU space. */ WARN_ON(1); @@ -2752,17 +2827,10 @@ int kbase_context_mmap(struct kbase_context *const kctx, goto out_unlock; } - /* if not the MTP, verify that the MTP has been mapped */ - rcu_read_lock(); - /* catches both when the special page isn't present or - * when we've forked - */ - if (rcu_dereference(kctx->process_mm) != current->mm) { + if (!kbase_mem_allow_alloc(kctx)) { err = -EINVAL; - rcu_read_unlock(); goto out_unlock; } - rcu_read_unlock(); switch (vma->vm_pgoff) { case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): @@ -2771,6 +2839,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = -EINVAL; goto out_unlock; case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): +#if defined(CONFIG_MALI_VECTOR_DUMP) /* MMU dump */ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); if (err != 0) @@ -2778,6 +2847,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, /* free the region on munmap */ free_on_close = 1; break; +#else + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ #if MALI_USE_CSF case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): kbase_gpu_vm_unlock(kctx); @@ -2848,8 +2922,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, dev_warn(dev, "mmap aliased: invalid params!\n"); goto out_unlock; } - } - else if (reg->cpu_alloc->nents < + } else if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { /* limit what we map to the amount currently backed */ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) @@ -2866,7 +2939,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); - +#if defined(CONFIG_MALI_VECTOR_DUMP) if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on * the pages, so we can now free the kernel mapping @@ -2885,7 +2958,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, */ vma->vm_pgoff = PFN_DOWN(vma->vm_start); } - +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: kbase_gpu_vm_unlock(kctx); out: @@ -3021,6 +3094,10 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto out_unlock; + + /* check access permissions can be satisfied * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ @@ -3104,79 +3181,27 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { - struct mm_struct *mm; - - rcu_read_lock(); - mm = rcu_dereference(kctx->process_mm); - if (mm) { - atomic_add(pages, &kctx->nonmapped_pages); -#ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); -#else - spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); - spin_unlock(&mm->page_table_lock); -#endif - } - rcu_read_unlock(); -} - -static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) -{ - int pages; - struct mm_struct *mm; - - spin_lock(&kctx->mm_update_lock); - mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); - if (!mm) { - spin_unlock(&kctx->mm_update_lock); - return; - } - - rcu_assign_pointer(kctx->process_mm, NULL); - spin_unlock(&kctx->mm_update_lock); - synchronize_rcu(); - - pages = atomic_xchg(&kctx->nonmapped_pages, 0); + struct mm_struct *mm = kctx->process_mm; + if (unlikely(!mm)) + return; + atomic_add(pages, &kctx->nonmapped_pages); #ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); #else spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); spin_unlock(&mm->page_table_lock); #endif } -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx; - - kctx = vma->vm_private_data; - kbasep_os_process_page_usage_drain(kctx); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .close = kbase_special_vm_close, -}; - static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { - /* check that this is the only tracking page */ - spin_lock(&kctx->mm_update_lock); - if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { - spin_unlock(&kctx->mm_update_lock); - return -EFAULT; - } - - rcu_assign_pointer(kctx->process_mm, current->mm); - - spin_unlock(&kctx->mm_update_lock); + if (vma_pages(vma) != 1) + return -EINVAL; /* no real access */ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; return 0; } diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.h index 36159c1b71c1..4f4a1f3af0e0 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.h @@ -44,13 +44,15 @@ struct kbase_hwc_dma_mapping { * @flags: bitmask of BASE_MEM_* flags to convey special requirements & * properties for the new allocation. * @gpu_va: Start address of the memory region which was allocated from GPU - * virtual address space. + * virtual address space. If the BASE_MEM_FLAG_MAP_FIXED is set + * then this parameter shall be provided by the caller. + * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. * * Return: 0 on success or error code */ -struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, - u64 va_pages, u64 commit_pages, - u64 extension, u64 *flags, u64 *gpu_va); +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, + u64 extension, u64 *flags, u64 *gpu_va, + enum kbase_caller_mmu_sync_info mmu_sync_info); /** * kbase_mem_query - Query properties of a GPU memory region @@ -178,8 +180,9 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx); * Note: Caller must be holding the region lock. */ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); + struct kbase_va_region *reg, u64 new_pages, + u64 old_pages, + enum kbase_caller_mmu_sync_info mmu_sync_info); /** * kbase_mem_evictable_make - Make a physical allocation eligible for eviction diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_lowlevel.h index 3f260bf42c4b..5648c09b195f 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_lowlevel.h +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_lowlevel.h @@ -48,6 +48,8 @@ struct tagged_addr { phys_addr_t tagged_addr; }; #define HUGE_HEAD (1u << 1) #define FROM_PARTIAL (1u << 2) +#define NUM_4K_PAGES_IN_2MB_PAGE (SZ_2M / SZ_4K) + /* * Note: if macro for converting physical address to page is not defined * in the kernel itself, it is defined hereby. This is to avoid build errors @@ -158,4 +160,20 @@ static inline bool is_partial(struct tagged_addr t) return t.tagged_addr & FROM_PARTIAL; } +/** + * index_in_large_page() - Get index of a 4KB page within a 2MB page which + * wasn't split to be used partially. + * + * @t: Tagged physical address of the physical 4KB page that lies within + * the large (or 2 MB) physical page. + * + * Return: Index of the 4KB page within a 2MB page + */ +static inline unsigned int index_in_large_page(struct tagged_addr t) +{ + WARN_ON(!is_huge(t)); + + return (PFN_DOWN(as_phys_addr_t(t)) & (NUM_4K_PAGES_IN_2MB_PAGE - 1)); +} + #endif /* _KBASE_LOWLEVEL_H */ diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c index a11da825e21c..1889e2049b1b 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,11 @@ #include #include #include - +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #define pool_dbg(pool, format, ...) \ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ (pool->next_pool) ? "kctx" : "kbdev", \ @@ -37,6 +41,46 @@ #define NOT_DIRTY false #define NOT_RECLAIMED false +/** +* can_alloc_page() - Check if the current thread can allocate a physical page +* +* @pool: Pointer to the memory pool. +* @page_owner: Pointer to the task/process that created the Kbase context +* for which a page needs to be allocated. It can be NULL if +* the page won't be associated with Kbase context. +* @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. +* +* This function checks if the current thread is a kernel thread and can make a +* request to kernel to allocate a physical page. If the kernel thread is allocating +* a page for the Kbase context and the process that created the context is exiting +* or is being killed, then there is no point in doing a page allocation. +* +* The check done by the function is particularly helpful when the system is running +* low on memory. When a page is allocated from the context of a kernel thread, OoM +* killer doesn't consider the kernel thread for killing and kernel keeps retrying +* to allocate the page as long as the OoM killer is able to kill processes. +* The check allows kernel thread to quickly exit the page allocation loop once OoM +* killer has initiated the killing of @page_owner, thereby unblocking the context +* termination for @page_owner and freeing of GPU memory allocated by it. This helps +* in preventing the kernel panic and also limits the number of innocent processes +* that get killed. +* +* Return: true if the page can be allocated otherwise false. +*/ +static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner, + const bool alloc_from_kthread) +{ + if (likely(!alloc_from_kthread || !page_owner)) + return true; + + if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) { + dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm, + task_pid_nr(page_owner)); + return false; + } + + return true; +} static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) { @@ -232,11 +276,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, } int kbase_mem_pool_grow(struct kbase_mem_pool *pool, - size_t nr_to_grow) + size_t nr_to_grow, struct task_struct *page_owner) { struct page *p; size_t i; - + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); kbase_mem_pool_lock(pool); pool->dont_reclaim = true; @@ -249,6 +293,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, return -ENOMEM; } kbase_mem_pool_unlock(pool); + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + return -ENOMEM; p = kbase_mem_alloc_page(pool); if (!p) { @@ -281,7 +327,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) if (new_size < cur_size) kbase_mem_pool_shrink(pool, cur_size - new_size); else if (new_size > cur_size) - err = kbase_mem_pool_grow(pool, new_size - cur_size); + err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL); if (err) { size_t grown_size = kbase_mem_pool_size(pool); @@ -527,13 +573,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, } int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed) + struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner) { struct page *p; size_t nr_from_pool; size_t i = 0; int err = -ENOMEM; size_t nr_pages_internal; + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); nr_pages_internal = nr_4k_pages / (1u << (pool->order)); @@ -565,7 +612,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, if (i != nr_4k_pages && pool->next_pool) { /* Allocate via next pool */ err = kbase_mem_pool_alloc_pages(pool->next_pool, - nr_4k_pages - i, pages + i, partial_allowed); + nr_4k_pages - i, pages + i, partial_allowed,page_owner); if (err < 0) goto err_rollback; @@ -574,6 +621,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, } else { /* Get any remaining pages from kernel */ while (i != nr_4k_pages) { + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + goto err_rollback; + p = kbase_mem_alloc_page(pool); if (!p) { if (partial_allowed) diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_native_mgm.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_native_mgm.c index 4554bee783e7..089b394137a7 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_native_mgm.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_native_mgm.c @@ -140,6 +140,30 @@ kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, return pte; } +/** + * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in + * kbase_native_mgm_update_gpu_pte() + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @mmu_level: The level of the MMU page table where the page is getting mapped. + * @pte: The prepared page table entry. + * + * This function simply returns the @pte without modification. + * + * Return: A GPU page table entry to be stored in a page table. + */ +static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, + int group_id, int mmu_level, u64 pte) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + CSTD_UNUSED(mmu_level); + + return pte; +} + struct memory_group_manager_device kbase_native_mgm_dev = { .ops = { .mgm_alloc_page = kbase_native_mgm_alloc, @@ -147,6 +171,7 @@ struct memory_group_manager_device kbase_native_mgm_dev = { .mgm_get_import_memory_id = NULL, .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot, .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte, + .mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte, }, .data = NULL }; diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c index 72cb75999f4d..be23c8f7b81b 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c @@ -27,7 +27,7 @@ #include #endif #include -#include +#include #include #include #include @@ -96,7 +96,8 @@ static int kbasep_read_soft_event_status( unsigned char *mapped_evt; struct kbase_vmap_struct map; - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), + KBASE_REG_CPU_RD, &map); if (!mapped_evt) return -EFAULT; @@ -117,7 +118,8 @@ static int kbasep_write_soft_event_status( (new_status != BASE_JD_SOFT_EVENT_RESET)) return -EINVAL; - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), + KBASE_REG_CPU_WR, &map); if (!mapped_evt) return -EFAULT; @@ -501,6 +503,7 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) kbase_js_sched_all(katom->kctx->kbdev); } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -727,7 +730,6 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) return ret; } -#endif /* !MALI_USE_CSF */ #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, @@ -759,8 +761,18 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, } #endif -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data) +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) { unsigned int i; unsigned int target_page_nr = 0; @@ -855,7 +867,6 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, return ret; } -#if !MALI_USE_CSF static int kbase_debug_copy(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -873,6 +884,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) return 0; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ #endif /* !MALI_USE_CSF */ #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) @@ -969,6 +981,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) jit_info_copy_size_for_jit_version[kctx->jit_version]; WARN_ON(jit_info_user_copy_size > sizeof(*info)); + if (!kbase_mem_allow_alloc(kctx)) { + dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", + current->comm, current->pid, kctx->tgid, kctx->id); + ret = -EINVAL; + goto fail; + } + /* For backwards compatibility, and to prevent reading more than 1 jit * info struct on jit version 1 */ @@ -1204,8 +1223,8 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) * Write the address of the JIT allocation to the user provided * GPU allocation. */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); + ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), + KBASE_REG_CPU_WR, &mapping); if (!ptr) { /* * Leave the allocations "live" as the JIT free atom @@ -1484,10 +1503,11 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr)) goto failed_loop; - } else + } else { if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr)) failed = true; + } } /* @@ -1576,6 +1596,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) case BASE_JD_REQ_SOFT_EVENT_RESET: kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: { int res = kbase_debug_copy(katom); @@ -1584,6 +1605,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) katom->event_code = BASE_JD_EVENT_JOB_INVALID; break; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: ret = kbase_jit_allocate_process(katom); break; @@ -1695,8 +1717,10 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) if (katom->jc == 0) return -EINVAL; break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: return kbase_debug_copy_prepare(katom); +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_EXT_RES_MAP: return kbase_ext_res_prepare(katom); case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: @@ -1736,9 +1760,11 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) break; #endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: kbase_debug_copy_finish(katom); break; +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: kbase_jit_allocate_finish(katom); break; diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_sync_android.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_sync_android.c index 8af2584cdc18..ead42c800ba7 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_sync_android.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_sync_android.c @@ -249,17 +249,22 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); if (fd < 0) { sync_fence_put(fence); + katom->fence = NULL; goto out; } + /* Take an extra reference count on the created fence file */ + get_file(fence->file); /* bind fence to the new fd */ sync_fence_install(fence, fd); katom->fence = sync_fence_fdget(fd); - if (katom->fence == NULL) { - /* The only way the fence can be NULL is if userspace closed it - * for us, so we don't need to clear it up - */ + /* Drop the extra reference count */ + fput(fence->file); + if (katom->fence != fence) { + if (katom->fence) + sync_fence_put(katom->fence); + katom->fence = NULL; fd = -EINVAL; goto out; } diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_vinstr.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_vinstr.c index d00bc002ac47..b5c3af4b5f6e 100644 --- a/drivers/gpu/arm/bv_r32p1/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_vinstr.c @@ -22,9 +22,9 @@ #include "mali_kbase_vinstr.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" -#include +#include #include "mali_kbase_hwcnt_gpu.h" -#include +#include #include "mali_malisw.h" #include "mali_kbase_debug.h" diff --git a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c index 05253aeb9876..067b5dcebae0 100644 --- a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c @@ -148,17 +148,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, "true" : "false"; int as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at VA 0x%016llX\n" + "GPU bus fault in AS%d at VA %pK\n" "VA_VALID: %s\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, addr_valid, status, exception_type, kbase_gpu_exception_name(exception_type), diff --git a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c index 01ca419af60c..ebe35bc8fe89 100644 --- a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c @@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, u32 const exception_data = (status >> 8) & 0xFFFFFF; int const as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at VA 0x%016llX\n" + "GPU bus fault in AS%d at PA %pK\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "exception data 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, status, exception_type, kbase_gpu_exception_name(exception_type), exception_data, diff --git a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c index 52528ad5e231..68eda00b16a6 100644 --- a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,56 +43,301 @@ #include #include - +#if !MALI_USE_CSF +#include +#endif #include -#define KBASE_MMU_PAGE_ENTRIES 512 + +/* Threshold used to decide whether to flush full caches or just a physical range */ +#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20 +#define MGM_DEFAULT_PTE_GROUP (0) + +/* Macro to convert updated PDGs to flags indicating levels skip in flush */ +#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF) + +/* Small wrapper function to factor out GPU-dependent context releasing */ +static void release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +#if MALI_USE_CSF + CSTD_UNUSED(kbdev); + kbase_ctx_sched_release_ctx_lock(kctx); +#else /* MALI_USE_CSF */ + kbasep_js_runpool_release_ctx(kbdev, kctx); +#endif /* MALI_USE_CSF */ +} + +static void mmu_hw_operation_begin(struct kbase_device *kbdev) +{ +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) +#if MALI_USE_CSF + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { + unsigned long flags; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON_ONCE(kbdev->mmu_hw_operation_in_progress); + kbdev->mmu_hw_operation_in_progress = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif /* MALI_USE_CSF */ +#endif /* !CONFIG_MALI_NO_MALI */ +} + +static void mmu_hw_operation_end(struct kbase_device *kbdev) +{ +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) +#if MALI_USE_CSF + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { + unsigned long flags; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON_ONCE(!kbdev->mmu_hw_operation_in_progress); + kbdev->mmu_hw_operation_in_progress = false; + /* Invoke the PM state machine, the L2 power off may have been + * skipped due to the MMU command. + */ + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif /* MALI_USE_CSF */ +#endif /* !CONFIG_MALI_NO_MALI */ +} /** - * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. + * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done + * through GPU_CONTROL interface + * @kbdev: kbase device to check GPU model ID on. + * + * This function returns whether a cache flush for page table update should + * run through GPU_CONTROL interface or MMU_AS_CONTROL interface. * - * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. + * Return: True if cache flush should be done on GPU command. + */ +static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) +{ + uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id & + GPU_ID2_ARCH_MAJOR) >> + GPU_ID2_ARCH_MAJOR_SHIFT; + + return arch_maj_cur > 11; +} + +/** + * mmu_flush_pa_range() - Flush physical address range * - * If sync is not set then transactions still in flight when the flush is issued - * may use the old page tables and the data they write will not be written out - * to memory, this function returns after the flush has been issued but - * before all accesses which might effect the flushed region have completed. + * @kbdev: kbase device to issue the MMU operation on. + * @phys: Starting address of the physical range to start the operation on. + * @nr_bytes: Number of bytes to work on. + * @op: Type of cache flush operation to perform. * - * If sync is set then accesses in the flushed region will be drained - * before data is flush and invalidated through L1, L2 and into memory, - * after which point this function will return. + * Issue a cache flush physical range command. */ -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync); /** - * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches. - * @kbdev: Device pointer. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. - * @as_nr: GPU address space number for which flush + invalidate is required. + * mmu_invalidate() - Perform an invalidate operation on MMU caches. + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. + * + * Perform an MMU invalidate operation on a particual address space + * by issuing a UNLOCK command. + */ +static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, + const struct kbase_mmu_hw_op_param *op_param) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + as_nr = kctx ? kctx->as_nr : as_nr; + err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover"); + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/* Perform a flush/invalidate on a particular address space + */ +static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int err; + bool gpu_powered; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + gpu_powered = kbdev->pm.backend.gpu_powered; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* GPU is off so there's no need to perform flush/invalidate. + * But even if GPU is not actually powered down, after gpu_powered flag + * was set to false, it is still safe to skip the flush/invalidate. + * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE + * which is sent when address spaces are restored after gpu_powered flag + * is set to true. Flushing of L2 cache is certainly not required as L2 + * cache is definitely off if gpu_powered is false. + */ + if (!gpu_powered) + return; + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* GPU has just been powered off due to system suspend. + * So again, no need to perform flush/invalidate. + */ + return; + } + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_flush(kbdev, as, op_param); + mmu_hw_operation_end(kbdev); + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover. + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); + + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + kbase_pm_context_idle(kbdev); +} + +/** + * mmu_flush_invalidate() - Perform a flush operation on GPU caches. + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which flush + invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. + * + * This function performs the cache flush operation described by @op_param. + * The function retains a reference to the given @kctx and releases it + * after performing the flush operation. + * + * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue + * a cache flush + invalidate to the L2 caches and invalidate the TLBs. + * + * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue + * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as + * invalidating the TLBs. + * + * If operation is set to KBASE_MMU_OP_UNLOCK then this function will only + * invalidate the MMU caches and TLBs. + */ +static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, + const struct kbase_mmu_hw_op_param *op_param) +{ + bool ctx_is_in_runpool; + + /* Early out if there is nothing to do */ + if (op_param->nr == 0) + return; + + /* If no context is provided then MMU operation is performed on address + * space which does not belong to user space context. Otherwise, retain + * refcount to context provided and release after flush operation. + */ + if (!kctx) { + mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param); + } else { +#if !MALI_USE_CSF + mutex_lock(&kbdev->js_data.queue_mutex); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); + mutex_unlock(&kbdev->js_data.queue_mutex); +#else + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); +#endif /* !MALI_USE_CSF */ + + if (ctx_is_in_runpool) { + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param); + + release_ctx(kbdev, kctx); + } + } +} + +/** + * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via + * the GPU_CONTROL interface + * @kbdev: The Kbase device. + * @kctx: The Kbase context. + * @as_nr: GPU address space number for which flush + invalidate is required. + * @op_param: Non-NULL pointer to struct containing information about the MMU + * operation to perform. * - * This is used for MMU tables which do not belong to a user space context. + * Perform a flush/invalidate on a particular address space via the GPU_CONTROL + * interface. */ -static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, - u64 vpfn, size_t nr, bool sync, int as_nr); +static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, + int as_nr, const struct kbase_mmu_hw_op_param *op_param) +{ + int err = 0; + unsigned long flags; + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { + as_nr = kctx ? kctx->as_nr : as_nr; + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], + op_param); + } + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover. + */ + dev_err(kbdev->dev, + "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); +} /** - * kbase_mmu_sync_pgd() - sync page directory to memory when needed. + * kbase_mmu_sync_pgd - sync page directory to memory * @kbdev: Device pointer. * @handle: Address of DMA region. * @size: Size of the region to sync. * * This should be called after each page directory update. */ + static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, dma_addr_t handle, size_t size) { - /* In non-coherent system, ensure the GPU can read + /* If page table is not coherent then ensure the gpu can read * the pages from memory */ #if IS_ENABLED(CONFIG_MALI_EXYNOS_LLC) @@ -112,15 +357,93 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, * - ATE: Address Translation Entry. A 64bit value pointing to * a 4kB physical page. */ - static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id); + struct tagged_addr *phys, size_t nr, unsigned long flags, + int group_id, u64 *dirty_pgds); +/** + * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and + * free memory of the page directories + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @pgds: Physical addresses of page directories to be freed. + * @vpfn: The virtual page frame number. + * @level: The level of MMU page table. + * @flush_op: The type of MMU flush operation to perform. + * @dirty_pgds: Flags to track every level where a PGD has been updated. + * @free_pgds_list: Linked list of the page directory pages to free. + */ +static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t *pgds, + u64 vpfn, int level, + u64 *dirty_pgds, + struct list_head *free_pgds_list); +/** + * kbase_mmu_free_pgd() - Free memory of the page directory + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @pgd: Physical address of page directory to be freed. + * + * This function is supposed to be called with mmu_lock held and after + * ensuring that GPU won't be able to access the page. +*/ +static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t pgd) +{ + struct page *p; + + lockdep_assert_held(&mmut->mmu_lock); + + p = pfn_to_page(PFN_DOWN(pgd)); + + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +/** + * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @free_pgds_list: Linked list of the page directory pages to free. + * + * This function will call kbase_mmu_free_pgd() on each page directory page + * present in the @free_pgds_list. + * + * The function is supposed to be called after the GPU cache and MMU TLB has + * been invalidated post the teardown loop. + */ +static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct list_head *free_pgds_list) +{ + struct page *page, *next_page; + + mutex_lock(&mmut->mmu_lock); + + list_for_each_entry_safe(page, next_page, free_pgds_list, lru) { + list_del_init(&page->lru); + kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page)); + } + + mutex_unlock(&mmut->mmu_lock); +} /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to * a region on a GPU page fault - * @kbdev: KBase device + * * @reg: The region that will be backed with more pages * @fault_rel_pfn: PFN of the fault relative to the start of the region * @@ -215,17 +538,37 @@ static void kbase_gpu_mmu_handle_write_faulting_as( KBASE_MMU_FAULT_TYPE_PAGE); } +static void set_gwt_element_page_addr_and_size( + struct kbasep_gwt_list_element *element, + u64 fault_page_addr, struct tagged_addr fault_phys) +{ + u64 fault_pfn = fault_page_addr >> PAGE_SHIFT; + unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1); + + /* If the fault address lies within a 2MB page, then consider + * the whole 2MB page for dumping to avoid incomplete dumps. + */ + if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) { + element->page_addr = fault_page_addr & ~(SZ_2M - 1); + element->num_pages = NUM_4K_PAGES_IN_2MB_PAGE; + } else { + element->page_addr = fault_page_addr; + element->num_pages = 1; + } +} + static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, struct kbase_as *faulting_as) { struct kbasep_gwt_list_element *pos; struct kbase_va_region *region; struct kbase_device *kbdev; + struct tagged_addr *fault_phys_addr; struct kbase_fault *fault; u64 fault_pfn, pfn_offset; - u32 op; int ret; int as_no; + u64 dirty_pgds = 0; as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); @@ -253,15 +596,18 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, return; } + pfn_offset = fault_pfn - region->start_pfn; + fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset]; + /* Capture addresses of faulting write location * for job dumping if write tracking is enabled. */ if (kctx->gwt_enabled) { - u64 page_addr = fault->addr & PAGE_MASK; + u64 fault_page_addr = fault->addr & PAGE_MASK; bool found = false; /* Check if this write was already handled. */ list_for_each_entry(pos, &kctx->gwt_current_list, link) { - if (page_addr == pos->page_addr) { + if (fault_page_addr == pos->page_addr) { found = true; break; } @@ -271,8 +617,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, pos = kmalloc(sizeof(*pos), GFP_KERNEL); if (pos) { pos->region = region; - pos->page_addr = page_addr; - pos->num_pages = 1; + set_gwt_element_page_addr_and_size(pos, + fault_page_addr, *fault_phys_addr); list_add(&pos->link, &kctx->gwt_current_list); } else { dev_warn(kbdev->dev, "kmalloc failure"); @@ -280,17 +626,12 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, } } - pfn_offset = fault_pfn - region->start_pfn; /* Now make this faulting page writable to GPU. */ - ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - 1, region->flags, region->gpu_alloc->group_id); - - /* flush L2 and unlock the VA (resumes the MMU) */ - op = AS_COMMAND_FLUSH_PT; + ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags, + region->gpu_alloc->group_id, &dirty_pgds); - kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, - fault_pfn, 1, op); + kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, + kctx->id, dirty_pgds); kbase_gpu_vm_unlock(kctx); } @@ -525,18 +866,6 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, return true; } -/* Small wrapper function to factor out GPU-dependent context releasing */ -static void release_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ -#if MALI_USE_CSF - CSTD_UNUSED(kbdev); - kbase_ctx_sched_release_ctx_lock(kctx); -#else /* MALI_USE_CSF */ - kbasep_js_runpool_release_ctx(kbdev, kctx); -#endif /* MALI_USE_CSF */ -} - void kbase_mmu_page_fault_worker(struct work_struct *data) { u64 fault_pfn; @@ -560,6 +889,11 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) size_t pages_trimmed = 0; #endif + /* Calls to this function are inherently synchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; + faulting_as = container_of(data, struct kbase_as, work_pagefault); fault = &faulting_as->pf_data; fault_pfn = fault->addr >> PAGE_SHIFT; @@ -570,9 +904,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) "Entering %s %pK, fault_pfn %lld, as_no %d\n", __func__, (void *)data, fault_pfn, as_no); - /* Grab the context that was already refcounted in kbase_mmu_interrupt() - * Therefore, it cannot be scheduled out of this AS until we explicitly - * release it + /* Grab the context that was already refcounted in kbase_mmu_interrupt(). + * Therefore, it cannot be scheduled out of this AS until we explicitly release it */ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); if (!kctx) { @@ -596,7 +929,6 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) goto fault_done; } #endif - if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault); @@ -640,13 +972,13 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault", fault); goto fault_done; default: @@ -728,6 +1060,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) current_backed_size = kbase_reg_current_backed_size(region); if (fault_rel_pfn < current_backed_size) { + struct kbase_mmu_hw_op_param op_param; + dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", fault->addr, region->start_pfn, @@ -746,8 +1080,29 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) * transaction (which should cause the other page fault to be * raised again). */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, - AS_COMMAND_UNLOCK, 1); + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = kctx->id; + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } else { + /* Can safely skip the invalidate for all levels in case + * of duplicate page faults. + */ + op_param.flush_skip_levels = 0xF; + op_param.vpfn = fault_pfn; + op_param.nr = 1; + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); @@ -766,14 +1121,38 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) new_pages); if (new_pages == 0) { + struct kbase_mmu_hw_op_param op_param; + mutex_lock(&kbdev->mmu_hw_mutex); /* Duplicate of a fault we've already handled, nothing to do */ kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); + /* See comment [1] about UNLOCK usage */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, - AS_COMMAND_UNLOCK, 1); + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = kctx->id; + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } else { + /* Can safely skip the invalidate for all levels in case + * of duplicate page faults. + */ + op_param.flush_skip_levels = 0xF; + op_param.vpfn = fault_pfn; + op_param.nr = 1; + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } + + if (err) { + dev_err(kbdev->dev, + "Invalidation for MMU did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); @@ -798,8 +1177,9 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) spin_unlock(&kctx->mem_partials_lock); if (grown) { + u64 dirty_pgds = 0; u64 pfn_offset; - u32 op; + struct kbase_mmu_hw_op_param op_param; /* alloc success */ WARN_ON(kbase_reg_current_backed_size(region) > @@ -818,7 +1198,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset, &kbase_get_gpu_phy_pages(region)[pfn_offset], - new_pages, region->flags, region->gpu_alloc->group_id); + new_pages, region->flags, + region->gpu_alloc->group_id, &dirty_pgds); if (err) { kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); @@ -862,9 +1243,6 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); - /* flush L2 and unlock the VA (resumes the MMU) */ - op = AS_COMMAND_FLUSH_PT; - /* clear MMU interrupt - this needs to be done after updating * the page tables but before issuing a FLUSH command. The * FLUSH cmd has a side effect that it restarts stalled memory @@ -876,9 +1254,30 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); - kbase_mmu_hw_do_operation(kbdev, faulting_as, - fault->addr >> PAGE_SHIFT, - new_pages, op, 1); + op_param.vpfn = region->start_pfn + pfn_offset; + op_param.nr = new_pages; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + /* Unlock to invalidate the TLB (and resume the MMU) */ + op_param.flush_skip_levels = + pgd_level_to_skip_flush(dirty_pgds); + err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, + &op_param); + } else { + /* flush L2 and unlock the VA (resumes the MMU) */ + mmu_hw_operation_begin(kbdev); + err = kbase_mmu_hw_do_flush(kbdev, faulting_as, + &op_param); + mmu_hw_operation_end(kbdev); + } + + if (err) { + dev_err(kbdev->dev, + "Flush for GPU page table update did not complete on handling page fault @ 0x%llx", + fault->addr); + } mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ @@ -916,6 +1315,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) kbase_gpu_vm_unlock(kctx); } else { int ret = -ENOMEM; + const u8 group_id = region->gpu_alloc->group_id; kbase_gpu_vm_unlock(kctx); @@ -927,23 +1327,21 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) if (grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = - &kctx->mem_pools.large[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.large[group_id]; pages_to_grow = (pages_to_grow + ((1 << lp_mem_pool->order) - 1)) >> lp_mem_pool->order; ret = kbase_mem_pool_grow(lp_mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); } else { #endif struct kbase_mem_pool *const mem_pool = - &kctx->mem_pools.small[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.small[group_id]; ret = kbase_mem_pool_grow(mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); #ifdef CONFIG_MALI_2MB_ALLOC } #endif @@ -987,8 +1385,9 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { u64 *page; - int i; + struct page *p; + phys_addr_t pgd; #ifdef CONFIG_MALI_2MB_ALLOC p = kbase_mem_pool_alloc(&kbdev->mem_pools.large[mmut->group_id]); @@ -1002,6 +1401,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, if (page == NULL) goto alloc_free; + pgd = page_to_phys(p); + /* If the MMU tables belong to a context then account the memory usage * to that context, otherwise the MMU tables are device wide and are * only accounted to the device. @@ -1022,13 +1423,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) - kbdev->mmu_mode->entry_invalidate(&page[i]); + kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); kunmap(p); - return page_to_phys(p); + return pgd; alloc_free: @@ -1044,9 +1444,9 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, /* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the * new table from the pool if needed and possible */ -static int mmu_get_next_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - phys_addr_t *pgd, u64 vpfn, int level) +static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd, + u64 *dirty_pgds) { u64 *page; phys_addr_t target_pgd; @@ -1070,9 +1470,13 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, return -EINVAL; } - target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + target_pgd = kbdev->mmu_mode->pte_to_phy_addr( + page[vpfn]); if (!target_pgd) { + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; + unsigned int current_valid_entries; + u64 managed_pte; target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); if (!target_pgd) { dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", @@ -1081,10 +1485,31 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, return -ENOMEM; } - kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); + current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(page); + kbdev->mmu_mode->entry_set_pte(&managed_pte, target_pgd); + page[vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); + kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1); - kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); /* Rely on the caller to update the address space flags. */ + if (newly_created_pgd && !*newly_created_pgd) { + *newly_created_pgd = true; + /* If code reaches here we know parent PGD of target PGD was + * not newly created and should be flushed. + */ + flush_op = KBASE_MMU_OP_FLUSH_PT; + + if (dirty_pgds) + *dirty_pgds |= 1ULL << level; + } + + /* MMU cache flush strategy is FLUSH_PT because a new entry is added + * to an existing PGD which may be stored in GPU caches and needs a + * "clean" operation. An "invalidation" operation is not required here + * as this entry points to a new page and cannot be present in GPU + * caches. + */ + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); } kunmap(p); @@ -1096,11 +1521,9 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, /* * Returns the PGD for the specified level of translation */ -static int mmu_get_pgd_at_level(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - int level, - phys_addr_t *out_pgd) +static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + int level, phys_addr_t *out_pgd, bool *newly_created_pgd, + u64 *dirty_pgds) { phys_addr_t pgd; int l; @@ -1109,7 +1532,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, pgd = mmut->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { - int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + int err = + mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); /* Handle failure condition */ if (err) { dev_dbg(kbdev->dev, @@ -1124,20 +1548,18 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, return 0; } -static int mmu_get_bottom_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - phys_addr_t *out_pgd) +static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds) { - return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, - out_pgd); + return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd, + newly_created_pgd, dirty_pgds); } static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 from_vpfn, u64 to_vpfn) + struct kbase_mmu_table *mmut, u64 from_vpfn, + u64 to_vpfn, u64 *dirty_pgds, + struct list_head *free_pgds_list) { - phys_addr_t pgd; u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; @@ -1150,28 +1572,33 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode = kbdev->mmu_mode; while (vpfn < to_vpfn) { - unsigned int i; + unsigned int idx = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; unsigned int pcount = 0; unsigned int left = to_vpfn - vpfn; int level; u64 *page; + phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); + + register unsigned int num_of_valid_entries; if (count > left) count = left; /* need to check if this is a 2MB page or a 4kB */ - pgd = mmut->pgd; - for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); + pgds[level] = pgd; + page = kmap(p); if (mmu_mode->ate_is_valid(page[idx], level)) break; /* keep the mapping */ - kunmap(phys_to_page(pgd)); + kunmap(p); pgd = mmu_mode->pte_to_phy_addr(page[idx]); + p = phys_to_page(pgd); } switch (level) { @@ -1189,26 +1616,82 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, goto next; } + if (dirty_pgds && pcount > 0) + *dirty_pgds |= 1ULL << level; + + num_of_valid_entries = mmu_mode->get_num_valid_entries(page); + if (WARN_ON_ONCE(num_of_valid_entries < pcount)) + num_of_valid_entries = 0; + else + num_of_valid_entries -= pcount; + /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[idx + i]); + mmu_mode->entries_invalidate(&page[idx], pcount); + + if (!num_of_valid_entries) { + kunmap(p); + list_add(&p->lru, free_pgds_list); + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, + dirty_pgds, + free_pgds_list); + vpfn += count; + continue; + } + + mmu_mode->set_num_valid_entries(page, num_of_valid_entries); kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, 8 * pcount); - kunmap(phys_to_page(pgd)); + kunmap(p); next: vpfn += count; } } +static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, const u64 vpfn, + size_t nr, u64 dirty_pgds, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + struct kbase_mmu_hw_op_param op_param; + int as_nr = 0; + + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; + op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + +#if MALI_USE_CSF + as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; +#else + WARN_ON(!mmut->kctx); +#endif + + /* MMU cache flush strategy depends on whether GPU control commands for + * flushing physical address ranges are supported. The new physical pages + * are not present in GPU caches therefore they don't need any cache + * maintenance, but PGDs in the page table may or may not be created anew. + * + * Operations that affect the whole GPU cache shall only be done if it's + * impossible to update physical ranges. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); + else + mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); +} + /* * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' */ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int const group_id) + struct tagged_addr phys, size_t nr, + unsigned long flags, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) { phys_addr_t pgd; u64 *pgd_page; @@ -1221,6 +1704,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, size_t remain = nr; int err; struct kbase_device *kbdev; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); if (WARN_ON(kctx == NULL)) return -EINVAL; @@ -1241,6 +1726,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; + register unsigned int num_of_valid_entries; + bool newly_created_pgd = false; if (count > remain) count = remain; @@ -1253,8 +1740,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, - vpfn, &pgd); + err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd, + &dirty_pgds); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for @@ -1268,19 +1755,19 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, &kbdev->mem_pools.small[ #endif kctx->mmu.group_id], - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL,kctx->task); mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { - dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); + dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n", + __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - &kctx->mmu, - start_vpfn, - start_vpfn + recover_count); + mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, + start_vpfn + recover_count, + &dirty_pgds, &free_pgds_list); } goto fail_unlock; } @@ -1288,20 +1775,22 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); + dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); if (recover_required) { /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - &kctx->mmu, - start_vpfn, - start_vpfn + recover_count); + mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, + start_vpfn + recover_count, + &dirty_pgds, &free_pgds_list); } err = -ENOMEM; goto fail_unlock; } + num_of_valid_entries = + kbdev->mmu_mode->get_num_valid_entries(pgd_page); + for (i = 0; i < count; i++) { unsigned int ofs = index + i; @@ -1312,9 +1801,23 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); } + kbdev->mmu_mode->set_num_valid_entries( + pgd_page, num_of_valid_entries + count); + vpfn += count; remain -= count; + if (count > 0 && !newly_created_pgd) + dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL; + + /* MMU cache flush operation here will depend on whether bottom level + * PGD is newly created or not. + * + * If bottom level PGD is newly created then no cache maintenance is + * required as the PGD will not exist in GPU cache. Otherwise GPU cache + * maintenance is required for existing PGD. + */ + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64)); @@ -1328,12 +1831,16 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_count += count; } mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); return 0; fail_unlock: mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); + kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); return err; } @@ -1380,7 +1887,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, - int const group_id) + int const group_id, + u64 *dirty_pgds) { phys_addr_t pgd; u64 *pgd_page; @@ -1388,6 +1896,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, size_t remain = nr; int err; struct kbase_mmu_mode const *mmu_mode; + LIST_HEAD(free_pgds_list); /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ @@ -1407,6 +1916,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; struct page *p; int cur_level; + register unsigned int num_of_valid_entries; + bool newly_created_pgd = false; if (count > remain) count = remain; @@ -1424,8 +1935,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, - cur_level, &pgd); + err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd, + &newly_created_pgd, dirty_pgds); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for @@ -1438,19 +1949,19 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, #else &kbdev->mem_pools.small[mmut->group_id], #endif - cur_level); + cur_level,mmut->kctx ? mmut->kctx->task : NULL); mutex_lock(&mmut->mmu_lock); } while (!err); if (err) { - dev_warn(kbdev->dev, - "%s: mmu_get_bottom_pgd failure\n", __func__); + dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - mmut, start_vpfn, insert_vpfn); + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, + insert_vpfn, dirty_pgds, + &free_pgds_list); } goto fail_unlock; } @@ -1464,21 +1975,23 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kbdev, - mmut, start_vpfn, insert_vpfn); + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, + insert_vpfn, dirty_pgds, + &free_pgds_list); } err = -ENOMEM; goto fail_unlock; } + num_of_valid_entries = + mmu_mode->get_num_valid_entries(pgd_page); + if (cur_level == MIDGARD_MMU_LEVEL(2)) { int level_index = (insert_vpfn >> 9) & 0x1FF; - u64 *target = &pgd_page[level_index]; + pgd_page[level_index] = + kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id); - if (mmu_mode->pte_is_valid(*target, cur_level)) - cleanup_empty_pte(kbdev, mmut, target); - *target = kbase_mmu_create_ate(kbdev, *phys, flags, - cur_level, group_id); + num_of_valid_entries++; } else { for (i = 0; i < count; i++) { unsigned int ofs = vindex + i; @@ -1496,8 +2009,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, *target = kbase_mmu_create_ate(kbdev, phys[i], flags, cur_level, group_id); } + num_of_valid_entries += count; } + mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + + if (dirty_pgds && count > 0 && !newly_created_pgd) + *dirty_pgds |= 1ULL << cur_level; + phys += count; insert_vpfn += count; remain -= count; @@ -1509,10 +2028,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, kunmap(p); } - err = 0; + mutex_unlock(&mmut->mmu_lock); + return 0; fail_unlock: mutex_unlock(&mmut->mmu_lock); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds, + CALLER_MMU_ASYNC); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; } @@ -1523,170 +2046,83 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int const group_id) + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) { int err; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, - phys, nr, flags, group_id); + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds); + if (err) + return err; - if (mmut->kctx) - kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false); - else - kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, - as_nr); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info); - return err; + return 0; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); /** - * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches + * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches * without retaining the kbase context. * @kctx: The KBase context. * @vpfn: The virtual page frame number to start the flush on. * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. * * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any * other locking. */ -static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) +static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr) { struct kbase_device *kbdev = kctx->kbdev; int err; - u32 op; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + struct kbase_mmu_hw_op_param op_param; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); /* Early out if there is nothing to do */ if (nr == 0) return; - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; + /* flush L2 and unlock the VA (resumes the MMU) */ + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_MEM; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + /* Value used to prevent skipping of any levels when flushing */ + op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr], + &op_param); + } else { + err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], + &op_param); + } - err = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - vpfn, nr, op, 0); if (err) { /* Flush failed to complete, assume the * GPU has hung and perform a reset to recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } -/* Perform a flush/invalidate on a particular address space - */ -static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, - struct kbase_as *as, - u64 vpfn, size_t nr, bool sync) -{ - int err; - u32 op; - bool gpu_powered; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - gpu_powered = kbdev->pm.backend.gpu_powered; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* GPU is off so there's no need to perform flush/invalidate. - * But even if GPU is not actually powered down, after gpu_powered flag - * was set to false, it is still safe to skip the flush/invalidate. - * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE - * which is sent when address spaces are restored after gpu_powered flag - * is set to true. Flushing of L2 cache is certainly not required as L2 - * cache is definitely off if gpu_powered is false. - */ - if (!gpu_powered) - return; - - if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* GPU has just been powered off due to system suspend. - * So again, no need to perform flush/invalidate. - */ - return; - } - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; - - err = kbase_mmu_hw_do_operation(kbdev, - as, vpfn, nr, op, 0); - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu( - kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - } - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - - kbase_pm_context_idle(kbdev); -} - -static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, - u64 vpfn, size_t nr, bool sync, int as_nr) -{ - /* Skip if there is nothing to do */ - if (nr) { - kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn, - nr, sync); - } -} - -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) -{ - struct kbase_device *kbdev; - bool ctx_is_in_runpool; - - /* Early out if there is nothing to do */ - if (nr == 0) - return; - - /* MALI_SEC_INTEGRATION */ - if (!mali_exynos_get_gpu_power_state()) - return; - - kbdev = kctx->kbdev; -#if !MALI_USE_CSF - mutex_lock(&kbdev->js_data.queue_mutex); - ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); - mutex_unlock(&kbdev->js_data.queue_mutex); -#else - ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); -#endif /* !MALI_USE_CSF */ - - if (ctx_is_in_runpool) { - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], - vpfn, nr, sync); - - release_ctx(kbdev, kctx); - } -} - void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) @@ -1726,12 +2162,103 @@ void kbase_mmu_disable(struct kbase_context *kctx) * The job scheduler code will already be holding the locks and context * so just do the flush. */ - kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); + kbase_mmu_flush_noretain(kctx, 0, ~0); kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +#if !MALI_USE_CSF + /* + * JM GPUs has some L1 read only caches that need to be invalidated + * with START_FLUSH configuration. Purge the MMU disabled kctx from + * the slot_rb tracking field so such invalidation is performed when + * a new katom is executed on the affected slots. + */ + kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); +#endif } KBASE_EXPORT_TEST_API(kbase_mmu_disable); +static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t *pgds, + u64 vpfn, int level, + u64 *dirty_pgds, + struct list_head *free_pgds_list) +{ + int current_level; + + lockdep_assert_held(&mmut->mmu_lock); + + for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); + current_level--) { + phys_addr_t current_pgd = pgds[current_level]; + struct page *p = phys_to_page(current_pgd); + u64 *current_page = kmap(p); + unsigned int current_valid_entries = + kbdev->mmu_mode->get_num_valid_entries(current_page); + int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; + + /* We need to track every level that needs updating */ + if (dirty_pgds) + *dirty_pgds |= 1ULL << current_level; + + kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); + + if (current_valid_entries == 1 && + current_level != MIDGARD_MMU_LEVEL(0)) { + kunmap(p); + + list_add(&p->lru, free_pgds_list); + } else { + current_valid_entries--; + + kbdev->mmu_mode->set_num_valid_entries( + current_page, current_valid_entries); + kunmap(p); + + //kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), + // kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), + // flush_op); + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + + 8 * index, sizeof(u64)); + break; + } + } +} + +/** + * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. + * + * @kbdev: Pointer to kbase device. + * @kctx: Pointer to kbase context. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @phys: Array of physical pages to flush. + * @op_param: Non-NULL pointer to struct containing information about the flush + * operation to perform. + * + * This function will do one of three things: + * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the + * individual pages that were unmapped if feature is supported on GPU. + * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is + * supported on GPU or, + * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. + */ +static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, + struct kbase_context *kctx, int as_nr, + struct tagged_addr *phys, + struct kbase_mmu_hw_op_param *op_param) +{ + + if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); + return; + } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { + mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param); + return; + } + +} + /* * We actually only discard the ATE, and not the page table * pages. There is a potential DoS here, as we'll leak memory by @@ -1744,44 +2271,67 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more * information. */ -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, int as_nr) { - phys_addr_t pgd; u64 start_vpfn = vpfn; size_t requested_nr = nr; + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; + struct kbase_mmu_hw_op_param op_param; int err = -EFAULT; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; if (nr == 0) { /* early out if nothing to do */ return 0; } + /* MMU cache flush strategy depends on the number of pages to unmap. In both cases + * the operation is invalidate but the granularity of cache maintenance may change + * according to the situation. + * + * If GPU control command operations are present and the number of pages is "small", + * then the optimal strategy is flushing on the physical address range of the pages + * which are affected by the operation. That implies both the PGDs which are modified + * or removed from the page table and the physical pages which are freed from memory. + * + * Otherwise, there's no alternative to invalidating the whole GPU cache. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) + flush_op = KBASE_MMU_OP_FLUSH_PT; + mutex_lock(&mmut->mmu_lock); mmu_mode = kbdev->mmu_mode; while (nr) { - unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; unsigned int pcount; int level; u64 *page; + phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + register unsigned int num_of_valid_entries; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); if (count > nr) count = nr; - /* need to check if this is a 2MB or a 4kB page */ - pgd = mmut->pgd; - + /* need to check if this is a 2MB page or a 4kB */ for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { phys_addr_t next_pgd; index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); + page = kmap(p); if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { @@ -1805,8 +2355,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, goto next; } next_pgd = mmu_mode->pte_to_phy_addr(page[index]); - kunmap(phys_to_page(pgd)); + kunmap(p); + pgds[level] = pgd; pgd = next_pgd; + p = phys_to_page(pgd); } switch (level) { @@ -1815,7 +2367,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, dev_warn(kbdev->dev, "%s: No support for ATEs at level %d\n", __func__, level); - kunmap(phys_to_page(pgd)); + kunmap(p); goto out; case MIDGARD_MMU_LEVEL(2): /* can only teardown if count >= 512 */ @@ -1841,29 +2393,56 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, continue; } + if (pcount > 0) + dirty_pgds |= 1ULL << level; + + num_of_valid_entries = mmu_mode->get_num_valid_entries(page); + if (WARN_ON_ONCE(num_of_valid_entries < pcount)) + num_of_valid_entries = 0; + else + num_of_valid_entries -= pcount; + /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[index + i]); + mmu_mode->entries_invalidate(&page[index], pcount); + + if (!num_of_valid_entries) { + kunmap(p); + list_add(&p->lru, &free_pgds_list); + + kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, + &dirty_pgds, + &free_pgds_list); + + vpfn += count; + nr -= count; + continue; + } + + mmu_mode->set_num_valid_entries(page, num_of_valid_entries); kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(phys_to_page(pgd)) + - 8 * index, 8*pcount); + kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64)); next: - kunmap(phys_to_page(pgd)); + kunmap(p); vpfn += count; nr -= count; } err = 0; out: mutex_unlock(&mmut->mmu_lock); - - if (mmut->kctx) - kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr, - true); - else - kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr, - true, as_nr); + /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ + op_param = (struct kbase_mmu_hw_op_param){ + .vpfn = start_vpfn, + .nr = requested_nr, + .mmu_sync_info = mmu_sync_info, + .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, + .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : + KBASE_MMU_OP_FLUSH_MEM, + .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), + }; + mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; } @@ -1889,8 +2468,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); * The caller is responsible for validating the memory attributes */ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id) + struct tagged_addr *phys, size_t nr, unsigned long flags, + int const group_id, u64 *dirty_pgds) { phys_addr_t pgd; u64 *pgd_page; @@ -1915,57 +2494,77 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, unsigned int index = vpfn & 0x1FF; size_t count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; + register unsigned int num_of_valid_entries; + int cur_level = MIDGARD_MMU_BOTTOMLEVEL; if (count > nr) count = nr; - do { - err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, - vpfn, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow( -#ifdef CONFIG_MALI_2MB_ALLOC - &kbdev->mem_pools.large[ -#else - &kbdev->mem_pools.small[ -#endif - kctx->mmu.group_id], - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu.mmu_lock); - } while (!err); - if (err) { - dev_warn(kbdev->dev, - "mmu_get_bottom_pgd failure\n"); + if (is_huge(*phys) && (index == index_in_large_page(*phys))) + cur_level = MIDGARD_MMU_LEVEL(2); + + err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL, + dirty_pgds); + if (WARN_ON(err)) goto fail_unlock; - } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "kmap failure\n"); + dev_warn(kbdev->dev, "kmap failure on update_pages"); err = -ENOMEM; goto fail_unlock; } - for (i = 0; i < count; i++) - pgd_page[index + i] = kbase_mmu_create_ate(kbdev, - phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, - group_id); + num_of_valid_entries = + kbdev->mmu_mode->get_num_valid_entries(pgd_page); + + if (cur_level == MIDGARD_MMU_LEVEL(2)) { + int level_index = (vpfn >> 9) & 0x1FF; + struct tagged_addr *target_phys = + phys - index_in_large_page(*phys); + +#ifdef CONFIG_MALI_DEBUG + WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( + pgd_page[level_index], MIDGARD_MMU_LEVEL(2))); +#endif + pgd_page[level_index] = kbase_mmu_create_ate(kbdev, + *target_phys, flags, MIDGARD_MMU_LEVEL(2), + group_id); + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (level_index * sizeof(u64)), + sizeof(u64)); + } else { + for (i = 0; i < count; i++) { +#ifdef CONFIG_MALI_DEBUG + WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( + pgd_page[index + i], + MIDGARD_MMU_BOTTOMLEVEL)); +#endif + pgd_page[index + i] = kbase_mmu_create_ate(kbdev, + phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, + group_id); + } + + /* MMU cache flush strategy is NONE because GPU cache maintenance + * will be done by the caller. + */ + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); + } + + kbdev->mmu_mode->set_num_valid_entries(pgd_page, + num_of_valid_entries); + + if (dirty_pgds && count > 0) + *dirty_pgds |= 1ULL << cur_level; phys += count; vpfn += count; nr -= count; - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (index * sizeof(u64)), - count * sizeof(u64)); - - kunmap(pfn_to_page(PFN_DOWN(pgd))); + kunmap(p); } mutex_unlock(&kctx->mmu.mmu_lock); @@ -1981,22 +2580,40 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, unsigned long flags, int const group_id) { int err; + struct kbase_mmu_hw_op_param op_param; + u64 dirty_pgds = 0; + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds); + + op_param = (const struct kbase_mmu_hw_op_param){ + .vpfn = vpfn, + .nr = nr, + .op = KBASE_MMU_OP_FLUSH_MEM, + .kctx_id = kctx->id, + .mmu_sync_info = mmu_sync_info, + .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), + }; - err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, - group_id); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, true); + if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev)) + mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param); + else + mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param); return err; } static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t pgd, - int level, u64 *pgd_page_buffer) + int level) { - phys_addr_t target_pgd; - struct page *p; u64 *pgd_page; int i; - struct kbase_mmu_mode const *mmu_mode; + struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; + u64 *pgd_page_buffer = NULL; lockdep_assert_held(&mmut->mmu_lock); @@ -2004,54 +2621,40 @@ static void mmu_teardown_level(struct kbase_device *kbdev, /* kmap_atomic should NEVER fail. */ if (WARN_ON(pgd_page == NULL)) return; + if (level != MIDGARD_MMU_BOTTOMLEVEL) { /* Copy the page to our preallocated buffer so that we can minimize * kmap_atomic usage */ + pgd_page_buffer = mmut->mmu_teardown_pages[level]; memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + } + + /* Invalidate page after copying */ + mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); kunmap_atomic(pgd_page); pgd_page = pgd_page_buffer; - mmu_mode = kbdev->mmu_mode; - - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { - target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); - - if (target_pgd) { + if (level != MIDGARD_MMU_BOTTOMLEVEL) { + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { if (mmu_mode->pte_is_valid(pgd_page[i], level)) { + phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr( + pgd_page[i]); mmu_teardown_level(kbdev, mmut, target_pgd, - level + 1, - pgd_page_buffer + - (PAGE_SIZE / sizeof(u64))); + level + 1); } } } - p = pfn_to_page(PFN_DOWN(pgd)); -#ifdef CONFIG_MALI_2MB_ALLOC - kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], -#else - kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], -#endif - p, true); - - atomic_sub(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - atomic_sub(1, &mmut->kctx->used_pages); - } - - kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, int const group_id) { + int level; + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || WARN_ON(group_id < 0)) return -EINVAL; @@ -2059,14 +2662,20 @@ int kbase_mmu_init(struct kbase_device *const kbdev, mmut->group_id = group_id; mutex_init(&mmut->mmu_lock); mmut->kctx = kctx; + mmut->pgd = 0; - /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ - mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */ + for (level = MIDGARD_MMU_TOPLEVEL; + level < MIDGARD_MMU_BOTTOMLEVEL; level++) { + mmut->mmu_teardown_pages[level] = + kmalloc(PAGE_SIZE, GFP_KERNEL); - if (mmut->mmu_teardown_pages == NULL) - return -ENOMEM; + if (!mmut->mmu_teardown_pages[level]) { + kbase_mmu_term(kbdev, mmut); + return -ENOMEM; + } + } - mmut->pgd = 0; /* We allocate pages into the kbdev memory pool, then * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. @@ -2080,7 +2689,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, #else &kbdev->mem_pools.small[mmut->group_id], #endif - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL); if (err) { kbase_mmu_term(kbdev, mmut); return -ENOMEM; @@ -2096,17 +2705,28 @@ int kbase_mmu_init(struct kbase_device *const kbdev, void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { + int level; + + WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", + mmut->kctx->tgid, mmut->kctx->id); + if (mmut->pgd) { mutex_lock(&mmut->mmu_lock); - mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL, - mmut->mmu_teardown_pages); + mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); mutex_unlock(&mmut->mmu_lock); if (mmut->kctx) KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); } - kfree(mmut->mmu_teardown_pages); + for (level = MIDGARD_MMU_TOPLEVEL; + level < MIDGARD_MMU_BOTTOMLEVEL; level++) { + if (!mmut->mmu_teardown_pages[level]) + break; + kfree(mmut->mmu_teardown_pages[level]); + } + mutex_destroy(&mmut->mmu_lock); } @@ -2115,6 +2735,7 @@ void kbase_mmu_as_term(struct kbase_device *kbdev, int i) destroy_workqueue(kbdev->as[i].pf_wq); } +#if defined(CONFIG_MALI_VECTOR_DUMP) static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) { @@ -2254,6 +2875,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ void kbase_mmu_bus_fault_worker(struct work_struct *data) { diff --git a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.h index a2d1a8ee8475..fde745536c46 100644 --- a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.h +++ b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.h @@ -22,15 +22,40 @@ #ifndef _KBASE_MMU_H_ #define _KBASE_MMU_H_ +#include + +#define KBASE_MMU_PAGE_ENTRIES 512 +#define KBASE_MMU_INVALID_PGD_ADDRESS (~(phys_addr_t)0) +struct kbase_context; +struct kbase_mmu_table; +/** + * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. + * A pointer to this type is passed down from the outer-most callers in the kbase + * module - where the information resides as to the synchronous / asynchronous + * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to + * existing GPU work does it come from requests (like ioctl) from user-space, power management, + * etc. + * + * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice + * of a 'valid' value + * @CALLER_MMU_SYNC: Arbitrary value for 'synchronous that isn't easy to choose by accident + * @CALLER_MMU_ASYNC: Also hard to choose by accident + */ +enum kbase_caller_mmu_sync_info { + CALLER_MMU_UNSET_SYNCHRONICITY, + CALLER_MMU_SYNC = 0x02, + CALLER_MMU_ASYNC +}; + /** * kbase_mmu_as_init() - Initialising GPU address space object. * - * This is called from device probe to initialise an address space object - * of the device. - * * @kbdev: The kbase device structure for the device (must be a valid pointer). * @i: Array index of address space object. * + * This is called from device probe to initialise an address space object + * of the device. + * * Return: 0 on success and non-zero value on failure. */ int kbase_mmu_as_init(struct kbase_device *kbdev, int i); @@ -103,22 +128,21 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); u64 kbase_mmu_create_ate(struct kbase_device *kbdev, struct tagged_addr phy, unsigned long flags, int level, int group_id); -int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - const u64 start_vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id); +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id, u64 *dirty_pgds); int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id); + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int group_id); + struct tagged_addr phys, size_t nr, + unsigned long flags, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - size_t nr, int as_nr); +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, int as_nr); int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); diff --git a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw.h index d1f1ff2befe5..2c6b7e76944a 100644 --- a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw.h @@ -31,6 +31,8 @@ #ifndef _KBASE_MMU_HW_H_ #define _KBASE_MMU_HW_H_ +#include "mali_kbase_mmu.h" + /* Forward declarations */ struct kbase_device; struct kbase_as; @@ -52,6 +54,45 @@ enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED }; +/** + * enum kbase_mmu_op_type - enum for MMU operations + * @KBASE_MMU_OP_NONE: To help catch uninitialized struct + * @KBASE_MMU_OP_FIRST: The lower boundary of enum + * @KBASE_MMU_OP_LOCK: Lock memory region + * @KBASE_MMU_OP_UNLOCK: Unlock memory region + * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) + * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) + * @KBASE_MMU_OP_COUNT: The upper boundary of enum + */ +enum kbase_mmu_op_type { + KBASE_MMU_OP_NONE = 0, /* Must be zero */ + KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ + KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, + KBASE_MMU_OP_UNLOCK, + KBASE_MMU_OP_FLUSH_PT, + KBASE_MMU_OP_FLUSH_MEM, + KBASE_MMU_OP_COUNT /* Must be the last in enum */ +}; + +/** + * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions + * @vpfn: MMU Virtual Page Frame Number to start the operation on. + * @nr: Number of pages to work on. + * @op: Operation type (written to ASn_COMMAND). + * @kctx_id: Kernel context ID for MMU command tracepoint. + * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + * @flush_skip_levels: Page table levels to skip flushing. (Only + * applicable if GPU supports feature) + */ +struct kbase_mmu_hw_op_param { + u64 vpfn; + u32 nr; + enum kbase_mmu_op_type op; + u32 kctx_id; + enum kbase_caller_mmu_sync_info mmu_sync_info; + u64 flush_skip_levels; +}; + /** * kbase_mmu_hw_configure - Configure an address space for use. * @kbdev: kbase device to configure. @@ -63,6 +104,53 @@ enum kbase_mmu_fault_type { void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as); +/** + * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without + * programming the LOCKADDR register and wait + * for it to complete before returning. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** + * kbase_mmu_hw_do_unlock - Issue UNLOCK command to the MMU and wait for it + * to complete before returning. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** + * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Issue a flush operation on the address space as per the information + * specified inside @op_param. This function should not be called for + * GPUs where MMU command to flush the cache(s) is deprecated. + * mmu_hw_mutex needs to be held when calling this function. + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_do_operation - Issue an operation to the MMU. * @kbdev: kbase device to issue the MMU operation on. @@ -82,6 +170,42 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, u64 vpfn, u32 nr, u32 type, unsigned int handling_irq); +/** + * kbase_mmu_hw_do_flush_locked - Issue a flush operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Issue a flush operation on the address space as per the information + * specified inside @op_param. This function should not be called for + * GPUs where MMU command to flush the cache(s) is deprecated. + * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this + * function. + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** + * kbase_mmu_hw_do_flush_on_gpu_ctrl - Issue a flush operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Issue a flush operation on the address space as per the information + * specified inside @op_param. GPU command is used to flush the cache(s) + * instead of the MMU command. + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + /** * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by * the MMU. diff --git a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw_direct.c index 261e28ed49b9..da3274058ee8 100644 --- a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_hw_direct.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include @@ -43,41 +44,62 @@ * * Return: 0 if success, or an error code on failure. */ -static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr) +static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, + const struct kbase_mmu_hw_op_param *op_param) { - const u64 lockaddr_base = pfn << PAGE_SHIFT; - u64 lockaddr_size_log2, region_frame_number_start, - region_frame_number_end; + const u64 lockaddr_base = op_param->vpfn << PAGE_SHIFT; + const u64 lockaddr_end = ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1; + u64 lockaddr_size_log2; - if (num_pages == 0) + if (op_param->nr == 0) return -EINVAL; - /* The size is expressed as a logarithm and should take into account - * the possibility that some pages might spill into the next region. + /* The MMU lock region is a self-aligned region whose size + * is a power of 2 and that contains both start and end + * of the address range determined by pfn and num_pages. + * The size of the MMU lock region can be defined as the + * largest divisor that yields the same result when both + * start and end addresses are divided by it. + * + * For instance: pfn=0x4F000 num_pages=2 describe the + * address range between 0x4F000 and 0x50FFF. It is only + * 2 memory pages. However there isn't a single lock region + * of 8 kB that encompasses both addresses because 0x4F000 + * would fall into the [0x4E000, 0x4FFFF] region while + * 0x50000 would fall into the [0x50000, 0x51FFF] region. + * The minimum lock region size that includes the entire + * address range is 128 kB, and the region would be + * [0x40000, 0x5FFFF]. + * + * The region size can be found by comparing the desired + * start and end addresses and finding the highest bit + * that differs. The smallest naturally aligned region + * must include this bit change, hence the desired region + * starts with this bit (and subsequent bits) set to 0 + * and ends with the bit (and subsequent bits) set to 1. + * + * In the example above: 0x4F000 ^ 0x50FFF = 0x1FFFF + * therefore the highest bit that differs is bit #16 + * and the region size (as a logarithm) is 16 + 1 = 17, i.e. 128 kB. */ - lockaddr_size_log2 = fls(num_pages) + PAGE_SHIFT - 1; - - /* Round up if the number of pages is not a power of 2. */ - if (num_pages != ((u32)1 << (lockaddr_size_log2 - PAGE_SHIFT))) - lockaddr_size_log2 += 1; - - /* Round up if some memory pages spill into the next region. */ - region_frame_number_start = pfn >> (lockaddr_size_log2 - PAGE_SHIFT); - region_frame_number_end = - (pfn + num_pages - 1) >> (lockaddr_size_log2 - PAGE_SHIFT); - - if (region_frame_number_start < region_frame_number_end) - lockaddr_size_log2 += 1; - - /* Represent the size according to the HW specification. */ - lockaddr_size_log2 = MAX(lockaddr_size_log2, - KBASE_LOCK_REGION_MIN_SIZE_LOG2); + lockaddr_size_log2 = fls(lockaddr_base ^ lockaddr_end); + /* Cap the size against minimum and maximum values allowed. */ if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) return -EINVAL; - /* The lowest bits are cleared and then set to size - 1 to represent - * the size in a way that is compatible with the HW specification. + lockaddr_size_log2 = + MAX(lockaddr_size_log2, kbase_get_lock_region_min_size_log2(gpu_props)); + + /* Represent the result in a way that is compatible with HW spec. + * + * Upper bits are used for the base address, whose lower bits + * are cleared to avoid confusion because they are going to be ignored + * by the MMU anyway, since lock regions shall be aligned with + * a multiple of their size and cannot start from any address. + * + * Lower bits are used for the size, which is represented as + * logarithm minus one of the actual size. */ *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); *lockaddr |= lockaddr_size_log2 - 1; @@ -118,6 +140,9 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) status = wait_ready(kbdev, as_nr); if (status == 0) kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); + else{ + dev_err(kbdev->dev,"Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u",as_nr, cmd); + } return status; } @@ -127,6 +152,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) struct kbase_mmu_setup *current_setup = &as->current_setup; u64 transcfg = 0; + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + transcfg = current_setup->transcfg; /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK @@ -174,45 +202,251 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) transcfg); write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); +#if MALI_USE_CSF + /* Wait for UPDATE command to complete */ + wait_ready(kbdev, as->number); +#endif +} + +/** + * mmu_command_instr - Record an MMU command for instrumentation purposes. + * + * @kbdev: Kbase device used to issue MMU operation on. + * @kctx_id: Kernel context ID for MMU command tracepoint. + * @cmd: Command issued to the MMU. + * @lock_addr: Address of memory region locked for the operation. + * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + */ +static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr); + u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr); + + bool is_mmu_synchronous = (mmu_sync_info == CALLER_MMU_SYNC); + + KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, kctx_id, cmd, is_mmu_synchronous, lock_addr_base, + lock_addr_size); +} + +/* Helper function to program the LOCKADDR register before LOCK/UNLOCK command + * is issued. + */ +static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock_addr, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret; + + ret = lock_region(&kbdev->gpu_props, lock_addr, op_param); + + if (!ret) { + /* Set the region that needs to be updated */ + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO), + *lock_addr & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI), + (*lock_addr >> 32) & 0xFFFFFFFFUL); + } + return ret; +} + +/** + * mmu_hw_do_lock_no_wait - Issue LOCK command to the MMU and return without + * waiting for it's completion. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @lock_addr: Address of memory region locked for this operation. + * @op_param: Pointer to a struct containing information about the MMU operation. + * + * Return: 0 if issuing the command was successful, otherwise an error code. + */ +static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *as, u64 *lock_addr, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret; + + ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); + + if (!ret) + write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + + return ret; +} + +static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret; + u64 lock_addr = 0x0; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); + + if (!ret) + ret = wait_ready(kbdev, as->number); + + if (!ret) + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr, + op_param->mmu_sync_info); + + return ret; +} + +int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret = 0; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + + /* Wait for UNLOCK command to complete */ + if (!ret) + ret = wait_ready(kbdev, as->number); + + if (!ret) { + u64 lock_addr = 0x0; + /* read MMU_AS_CONTROL.LOCKADDR register */ + lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) + << 32; + lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO)); + + mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK, + lock_addr, op_param->mmu_sync_info); + } + + return ret; } -int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, - u64 vpfn, u32 nr, u32 op, - unsigned int handling_irq) +int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret = 0; + u64 lock_addr = 0x0; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param); + + if (!ret) + ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, + op_param); + + return ret; +} + +static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) { int ret; + u64 lock_addr = 0x0; + u32 mmu_cmd = AS_COMMAND_FLUSH_MEM; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at + * this point would be unexpected. + */ + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && + op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + dev_err(kbdev->dev, "Unexpected flush operation received"); + return -EINVAL; + } lockdep_assert_held(&kbdev->mmu_hw_mutex); - if (op == AS_COMMAND_UNLOCK) { - /* Unlock doesn't require a lock first */ - ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); - } else { - u64 lock_addr; - - ret = lock_region(vpfn, nr, &lock_addr); - - if (!ret) { - /* Lock the region that needs to be updated */ - kbase_reg_write(kbdev, - MMU_AS_REG(as->number, AS_LOCKADDR_LO), - lock_addr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, - MMU_AS_REG(as->number, AS_LOCKADDR_HI), - (lock_addr >> 32) & 0xFFFFFFFFUL); - write_cmd(kbdev, as->number, AS_COMMAND_LOCK); - - /* Run the MMU operation */ - write_cmd(kbdev, as->number, op); - - /* Wait for the flush to complete */ - ret = wait_ready(kbdev, as->number); - } + if (op_param->op == KBASE_MMU_OP_FLUSH_PT) + mmu_cmd = AS_COMMAND_FLUSH_PT; + + /* Lock the region that needs to be updated */ + ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); + if (ret) + return ret; + +#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) + /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here + * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is + * supported, and this function doesn't gets called for the GPUs where + * FLUSH_MEM/PT command is deprecated. + */ + if (mmu_cmd == AS_COMMAND_FLUSH_MEM) { + ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, + as->number, hwaccess_locked); + if (ret) + return ret; } +#endif + + write_cmd(kbdev, as->number, mmu_cmd); + + /* Wait for the command to complete */ + ret = wait_ready(kbdev, as->number); + + if (!ret) + mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr, + op_param->mmu_sync_info); return ret; } +int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return mmu_hw_do_flush(kbdev, as, op_param, true); +} + +int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + return mmu_hw_do_flush(kbdev, as, op_param, false); +} + +int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + int ret, ret2; + u32 gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2_LSC; + + if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + + /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at + * this point would be unexpected. + */ + if (op_param->op != KBASE_MMU_OP_FLUSH_PT && + op_param->op != KBASE_MMU_OP_FLUSH_MEM) { + dev_err(kbdev->dev, "Unexpected flush operation received"); + return -EINVAL; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + if (op_param->op == KBASE_MMU_OP_FLUSH_PT) + gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2; + + /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ + ret = mmu_hw_do_lock(kbdev, as, op_param); + if (ret) + return ret; + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */ + ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, gpu_cmd); + + /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */ + ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); + + return ret ?: ret2; +} + void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, enum kbase_mmu_fault_type type) { diff --git a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_mode_aarch64.c index 16b928d42e25..b324fa64d87d 100644 --- a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu_mode_aarch64.c @@ -42,6 +42,9 @@ #define ENTRY_ACCESS_BIT (1ULL << 10) #define ENTRY_NX_BIT (1ULL << 54) +#define UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR (55) +#define VALID_ENTRY_MASK ((u64)0xF << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR) + /* Helper Function to perform assignment of page table entries, to * ensure the use of strd, which is required on LPAE systems. */ @@ -85,6 +88,7 @@ static phys_addr_t pte_to_phy_addr(u64 entry) if (!(entry & 1)) return 0; + entry &= ~VALID_ENTRY_MASK; return entry & ~0xFFF; } @@ -151,29 +155,66 @@ static void entry_set_ate(u64 *entry, ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); } +static unsigned int get_num_valid_entries(u64 *pgd) +{ + register unsigned int num_of_valid_entries; + + num_of_valid_entries = + (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8)); + num_of_valid_entries |= + (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4)); + num_of_valid_entries |= + (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >> + (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)); + + return num_of_valid_entries; +} + +static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries) +{ + WARN_ON_ONCE(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES); + + pgd[0] &= ~VALID_ENTRY_MASK; + pgd[0] |= ((u64)(num_of_valid_entries & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + + pgd[1] &= ~VALID_ENTRY_MASK; + pgd[1] |= ((u64)((num_of_valid_entries >> 4) & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + + pgd[2] &= ~VALID_ENTRY_MASK; + pgd[2] |= ((u64)((num_of_valid_entries >> 8) & 0xF) + << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); +} + static void entry_set_pte(u64 *entry, phys_addr_t phy) { page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); } -static void entry_invalidate(u64 *entry) +static void entries_invalidate(u64 *entry, u32 count) { - page_table_entry_set(entry, ENTRY_IS_INVAL); + u32 i; + + for (i = 0; i < count; i++) + page_table_entry_set(entry + i, ENTRY_IS_INVAL); } -static struct kbase_mmu_mode const aarch64_mode = { - .update = mmu_update, - .get_as_setup = kbase_mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE -}; +static const struct kbase_mmu_mode aarch64_mode = { .update = mmu_update, + .get_as_setup = kbase_mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entries_invalidate = entries_invalidate, + .get_num_valid_entries = get_num_valid_entries, + .set_num_valid_entries = set_num_valid_entries, + .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE }; struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) { diff --git a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.c b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.c index 09818a590da0..ed4128968c01 100644 --- a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.c +++ b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.c @@ -192,6 +192,8 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { int rcode; + if (!timeline_is_permitted()) + return -EPERM; #if MALI_USE_CSF if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { diff --git a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.h b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.h index 63926ebc63a5..36b2ebafe606 100644 --- a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.h +++ b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline.h @@ -117,4 +117,6 @@ void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); #endif /* MALI_UNIT_TEST */ +bool timeline_is_permitted(void); + #endif /* _KBASE_TIMELINE_H */ diff --git a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline_io.c index 23e42adc1264..6996faa27040 100644 --- a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_timeline_io.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,6 +27,59 @@ #include #include +#ifndef MALI_STRIP_KBASE_DEVELOPMENT +/* Development builds need to test instrumentation and enable unprivileged + * processes to acquire timeline streams, in order to avoid complications + * with configurations across multiple platforms and systems. + * + * Release builds, instead, shall deny access to unprivileged processes + * because there are no use cases where they are allowed to acquire timeline + * streams, unless they're given special permissions by a privileged process. + */ +static int kbase_unprivileged_global_profiling = 1; +#else +static int kbase_unprivileged_global_profiling; +#endif + +/** + * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes + * + * @val: String containing value to set. Only strings representing positive + * integers are accepted as valid; any non-positive integer (including 0) + * is rejected. + * @kp: Module parameter associated with this method. + * + * This method can only be used to enable permissions for unprivileged processes, + * if they are disabled: for this reason, the only values which are accepted are + * strings representing positive integers. Since it's impossible to disable + * permissions once they're set, any integer which is non-positive is rejected, + * including 0. + * + * Return: 0 if success, otherwise error code. + */ +static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp) +{ + int new_val; + int ret = kstrtoint(val, 0, &new_val); + + if (ret == 0) { + if (new_val < 1) + return -EINVAL; + + kbase_unprivileged_global_profiling = 1; + } + + return ret; +} + +static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = { + .get = param_get_int, + .set = kbase_unprivileged_global_profiling_set, +}; + +module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops, + &kbase_unprivileged_global_profiling, 0600); + /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, loff_t *f_pos); @@ -45,6 +98,15 @@ const struct file_operations kbasep_tlstream_fops = { .fsync = kbasep_timeline_io_fsync, }; +bool timeline_is_permitted(void) +{ +#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE + return kbase_unprivileged_global_profiling || perfmon_capable(); +#else + return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN); +#endif +} + /** * kbasep_timeline_io_packet_pending - check timeline streams for pending * packets diff --git a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.c index 2c0de0196f44..4b18b4556857 100644 --- a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.c @@ -124,21 +124,7 @@ enum tl_msg_id_obj { KBASE_OBJ_MSG_COUNT, }; -/* Message ids of trace events that are recorded in the auxiliary stream. */ -enum tl_msg_id_aux { - KBASE_AUX_PM_STATE, - KBASE_AUX_PAGEFAULT, - KBASE_AUX_PAGESALLOC, - KBASE_AUX_DEVFREQ_TARGET, - KBASE_AUX_PROTECTED_ENTER_START, - KBASE_AUX_PROTECTED_ENTER_END, - KBASE_AUX_PROTECTED_LEAVE_START, - KBASE_AUX_PROTECTED_LEAVE_END, - KBASE_AUX_JIT_STATS, - KBASE_AUX_TILER_HEAP_STATS, - KBASE_AUX_EVENT_JOB_SLOT, - KBASE_AUX_MSG_COUNT, -}; + #define OBJ_TP_LIST \ TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \ @@ -508,6 +494,22 @@ enum tl_msg_id_aux { const char *obj_desc_header = (const char *) &__obj_desc_header; const size_t obj_desc_header_size = sizeof(__obj_desc_header); +/* Message ids of trace events that are recorded in the aux stream. */ +enum tl_msg_id_aux { + KBASE_AUX_PM_STATE, + KBASE_AUX_PAGEFAULT, + KBASE_AUX_PAGESALLOC, + KBASE_AUX_DEVFREQ_TARGET, + KBASE_AUX_JIT_STATS, + KBASE_AUX_TILER_HEAP_STATS, + KBASE_AUX_EVENT_JOB_SLOT, + KBASE_AUX_PROTECTED_ENTER_START, + KBASE_AUX_PROTECTED_ENTER_END, + KBASE_AUX_MMU_COMMAND, + KBASE_AUX_PROTECTED_LEAVE_START, + KBASE_AUX_PROTECTED_LEAVE_END, + KBASE_AUX_MSG_COUNT, +}; #define AUX_TP_LIST \ TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \ @@ -3216,4 +3218,42 @@ void __kbase_tlstream_tl_kbase_csffw_reset( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_aux_mmu_command( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 mmu_cmd_id, + u32 mmu_synchronicity, + u64 mmu_lock_addr, + u32 mmu_lock_page_num +) +{ + const u32 msg_id = KBASE_AUX_MMU_COMMAND; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(mmu_cmd_id) + + sizeof(mmu_synchronicity) + + sizeof(mmu_lock_addr) + + sizeof(mmu_lock_page_num) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_cmd_id, sizeof(mmu_cmd_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_synchronicity, sizeof(mmu_synchronicity)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_lock_addr, sizeof(mmu_lock_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &mmu_lock_page_num, sizeof(mmu_lock_page_num)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} /* clang-format on */ diff --git a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.h index 887a1aa0f48f..d5fa6c2478ba 100644 --- a/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/bv_r32p1/tl/mali_kbase_tracepoints.h @@ -492,6 +492,14 @@ void __kbase_tlstream_tl_kbase_csffw_reset( struct kbase_tlstream *stream, u64 csffw_cycle); +void __kbase_tlstream_aux_mmu_command( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 mmu_cmd_id, + u32 mmu_synchronicity, + u64 mmu_lock_addr, + u32 mmu_lock_page_num +); struct kbase_tlstream; /** @@ -3091,6 +3099,36 @@ struct kbase_tlstream; do { } while (0) #endif /* MALI_USE_CSF */ +/** + * KBASE_TLSTREAM_AUX_MMU_COMMAND - mmu commands with synchronicity info + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @mmu_cmd_id: MMU Command ID (e.g AS_COMMAND_UPDATE) + * @mmu_synchronicity: Indicates whether the command is related to current running job that needs to be resolved to make it progress (synchronous, e.g. grow on page fault, JIT) or not (asynchronous, e.g. IOCTL calls from user-space). This param will be 0 if it is an asynchronous operation. + * @mmu_lock_addr: start address of regions to be locked/unlocked/invalidated + * @mmu_lock_page_num: number of pages to be locked/unlocked/invalidated + */ +#define KBASE_TLSTREAM_AUX_MMU_COMMAND( \ + kbdev, \ + kernel_ctx_id, \ + mmu_cmd_id, \ + mmu_synchronicity, \ + mmu_lock_addr, \ + mmu_lock_page_num \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_mmu_command( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + kernel_ctx_id, \ + mmu_cmd_id, \ + mmu_synchronicity, \ + mmu_lock_addr, \ + mmu_lock_page_num \ + ); \ + } while (0) /* Gator tracepoints are hooked into TLSTREAM interface. * When the following tracepoints are called, corresponding diff --git a/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_csf.c index 32bf82526aa3..1a040a9a3e54 100644 --- a/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_csf.c @@ -31,6 +31,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_DEBUG_FS) #include @@ -89,6 +90,7 @@ static const struct kbase_context_init context_init[] = { "Memory pool group initialization failed" }, { kbase_mem_evictable_init, kbase_mem_evictable_deinit, "Memory evictable initialization failed" }, + { kbase_ctx_sched_init_ctx, NULL, NULL }, { kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" }, { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, diff --git a/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_jm.c index 97cd46e0e5b5..348f36164259 100644 --- a/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_jm.c +++ b/drivers/gpu/arm/bv_r38p1/context/backend/mali_kbase_context_jm.c @@ -134,6 +134,7 @@ static const struct kbase_context_init context_init[] = { "Memory pool group initialization failed" }, { kbase_mem_evictable_init, kbase_mem_evictable_deinit, "Memory evictable initialization failed" }, + { kbase_ctx_sched_init_ctx, NULL, NULL }, { kbase_context_mmu_init, kbase_context_mmu_term, "MMU initialization failed" }, { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, diff --git a/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c b/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c index 95bd641187c6..f67dddd15e42 100644 --- a/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,12 @@ /* * Base kernel context APIs */ +#include +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #include #include @@ -176,17 +182,50 @@ int kbase_context_common_init(struct kbase_context *kctx) /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); - kctx->as_nr = KBASEP_AS_NR_INVALID; - - atomic_set(&kctx->refcount, 0); - - spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; + kctx->task = NULL; atomic_set(&kctx->nonmapped_pages, 0); atomic_set(&kctx->permanent_mapped_pages, 0); kctx->tgid = current->tgid; kctx->pid = current->pid; + /* Check if this is a Userspace created context */ + if (likely(kctx->filp)) { + struct pid *pid_struct; + + rcu_read_lock(); + pid_struct = find_get_pid(kctx->tgid); + if (likely(pid_struct)) { + struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID); + + if (likely(task)) { + /* Take a reference on the task to avoid slow lookup + * later on from the page allocation loop. + */ + get_task_struct(task); + kctx->task = task; + } else { + dev_err(kctx->kbdev->dev, + "Failed to get task pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + + put_pid(pid_struct); + } else { + dev_err(kctx->kbdev->dev, + "Failed to get pid pointer for %s/%d", + current->comm, current->pid); + err = -ESRCH; + } + rcu_read_unlock(); + + if (unlikely(err)) + return err; + kbase_mem_mmgrab(); + kctx->process_mm = current->mm; + } + atomic_set(&kctx->used_pages, 0); mutex_init(&kctx->reg_lock); @@ -217,13 +256,16 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; mutex_lock(&kctx->kbdev->kctx_list_lock); - err = kbase_insert_kctx_to_process(kctx); - if (err) - dev_err(kctx->kbdev->dev, - "(err:%d) failed to insert kctx to kbase_process\n", err); - mutex_unlock(&kctx->kbdev->kctx_list_lock); + if (err) { + dev_err(kctx->kbdev->dev, + "(err:%d) failed to insert kctx to kbase_process", err); + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } + } return err; } @@ -298,15 +340,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx) void kbase_context_common_term(struct kbase_context *kctx) { - unsigned long flags; int pages; - mutex_lock(&kctx->kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->kbdev->mmu_hw_mutex); - pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kctx->kbdev->dev, @@ -318,6 +353,11 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); + if (likely(kctx->filp)) { + mmdrop(kctx->process_mm); + put_task_struct(kctx->task); + } + KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); } diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c index 0a70ae7c7f4a..5f63cbf9efde 100644 --- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c @@ -345,7 +345,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, ret = kbase_mem_pool_alloc_pages( &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - num_pages, queue->phys, false); + num_pages, queue->phys, false, kctx->task); if (ret != num_pages) goto phys_alloc_failed; @@ -1126,7 +1126,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, /* Get physical page for a normal suspend buffer */ err = kbase_mem_pool_alloc_pages( &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - nr_pages, &s_buf->phy[0], false); + nr_pages, &s_buf->phy[0], false, kctx->task); if (err < 0) goto phy_pages_alloc_failed; @@ -3025,7 +3025,7 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - 1, &phys, false); + 1, &phys, false, NULL); if (ret <= 0) { fput(filp); @@ -3061,7 +3061,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, - false); + false, NULL); if (ret <= 0) return ret; diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c index 74df40c48d52..e840d3bc6cc2 100644 --- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -627,7 +627,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, ret = kbase_mem_pool_alloc_pages( kbase_mem_pool_group_select( kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), - num_pages_aligned, phys, false); + num_pages_aligned, phys, false, NULL); } } @@ -2653,7 +2653,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + num_pages, phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c index c71612226d38..353339af02ca 100644 --- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1485,7 +1485,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + num_pages, phys, false, NULL); if (ret <= 0) goto phys_mem_pool_alloc_error; diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c index 2e0c26d1672d..9115a695cf4e 100644 --- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c @@ -356,6 +356,14 @@ static int kbase_kcpu_jit_allocate_prepare( lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + if (!kbase_mem_allow_alloc(kctx)) { + dev_dbg(kctx->kbdev->dev, + "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", + current->comm, current->pid, kctx->tgid, kctx->id); + ret = -EINVAL; + goto out; + } + if (!data || count > kcpu_queue->kctx->jit_max_allocations || count > ARRAY_SIZE(kctx->jit_alloc)) { ret = -EINVAL; diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_scheduler.c index b8892799ae5d..3fc4d741bc3e 100644 --- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_scheduler.c @@ -5871,7 +5871,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (!kctx->csf.sched.sync_update_wq) { dev_err(kctx->kbdev->dev, "Failed to initialize scheduler context workqueue"); - return -ENOMEM; + err = -ENOMEM; + goto alloc_wq_failed; } INIT_WORK(&kctx->csf.sched.sync_update_work, @@ -5882,10 +5883,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) if (err) { dev_err(kctx->kbdev->dev, "Failed to register a sync update callback"); - destroy_workqueue(kctx->csf.sched.sync_update_wq); + goto event_wait_add_failed; } return err; + +event_wait_add_failed: + destroy_workqueue(kctx->csf.sched.sync_update_wq); +alloc_wq_failed: + kbase_ctx_sched_remove_ctx(kctx); + return err; } void kbase_csf_scheduler_context_term(struct kbase_context *kctx) @@ -5893,6 +5900,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); cancel_work_sync(&kctx->csf.sched.sync_update_work); destroy_workqueue(kctx->csf.sched.sync_update_wq); + + kbase_ctx_sched_remove_ctx(kctx); } int kbase_csf_scheduler_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bv_r38p1/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bv_r38p1/jm/mali_kbase_jm_defs.h index 6e9aa1108482..da81981170ed 100644 --- a/drivers/gpu/arm/bv_r38p1/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bv_r38p1/jm/mali_kbase_jm_defs.h @@ -344,19 +344,6 @@ enum kbase_atom_exit_protected_state { KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, }; -/** - * struct kbase_ext_res - Contains the info for external resources referred - * by an atom, which have been mapped on GPU side. - * @gpu_address: Start address of the memory region allocated for - * the resource from GPU virtual address space. - * @alloc: pointer to physical pages tracking object, set on - * mapping the external resource on GPU side. - */ -struct kbase_ext_res { - u64 gpu_address; - struct kbase_mem_phy_alloc *alloc; -}; - /** * struct kbase_jd_atom - object representing the atom, containing the complete * state and attributes of an atom. @@ -390,7 +377,8 @@ struct kbase_ext_res { * each allocation is read in order to enforce an * overall physical memory usage limit. * @nr_extres: number of external resources referenced by the atom. - * @extres: pointer to the location containing info about + * @extres: Pointer to @nr_extres VA regions containing the external + * resource allocation and other information. * @nr_extres external resources referenced by the atom. * @device_nr: indicates the coregroup with which the atom is * associated, when @@ -519,7 +507,7 @@ struct kbase_jd_atom { #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ u16 nr_extres; - struct kbase_ext_res *extres; + struct kbase_va_region **extres; u32 device_nr; u64 jc; diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase.h b/drivers/gpu/arm/bv_r38p1/mali_kbase.h index 9cb79e98aed7..45cf06860694 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase.h +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase.h @@ -439,19 +439,6 @@ static inline void kbase_free_user_buffer( } } -/** - * kbase_mem_copy_from_extres() - Copy from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @buf_data: Pointer to the information about external resources: - * pages pertaining to the external resource, number of - * pages to copy. - * - * Return: 0 on success, error code otherwise. - */ -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data); - #if !MALI_USE_CSF int kbase_process_soft_job(struct kbase_jd_atom *katom); int kbase_prepare_soft_job(struct kbase_jd_atom *katom); diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.c index f4a46c12ac92..f2ae61b2cd34 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.c @@ -69,6 +69,13 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev) } } +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +{ + kctx->as_nr = KBASEP_AS_NR_INVALID; + atomic_set(&kctx->refcount, 0); + return 0; +} + /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space * * @kbdev: The context for which to find a free address space @@ -201,9 +208,10 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) { struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(atomic_read(&kctx->refcount) != 0); @@ -215,6 +223,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); } void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.h index f787cc34ba48..8c47c7986e64 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_ctx_sched.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,6 +59,17 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev); */ void kbase_ctx_sched_term(struct kbase_device *kbdev); +/** + * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling + * @kctx: The context to initialize + * + * This must be called during context initialization before any other context + * scheduling functions are called on @kctx + * + * Return: 0 + */ +int kbase_ctx_sched_init_ctx(struct kbase_context *kctx); + /** * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context * @kctx: The context to which to retain a reference @@ -113,9 +124,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); * This function should be called when a context is being destroyed. The * context must no longer have any reference. If it has been assigned an * address space before then the AS will be unprogrammed. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. */ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h b/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h index 3552496852b4..bc556d9444eb 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -555,7 +555,7 @@ struct kbase_devfreq_opp { * @entry_set_pte: program the pte to be a valid entry to encode the physical * address of the next lower level page table and also update * the number of valid entries. - * @entry_invalidate: clear out or invalidate the pte. + * @entries_invalidate: clear out or invalidate a range of ptes. * @get_num_valid_entries: returns the number of valid entries for a specific pgd. * @set_num_valid_entries: sets the number of valid entries for a specific pgd * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. @@ -573,7 +573,7 @@ struct kbase_mmu_mode { void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, unsigned long flags, int level); void (*entry_set_pte)(u64 *entry, phys_addr_t phy); - void (*entry_invalidate)(u64 *entry); + void (*entries_invalidate)(u64 *entry, u32 count); unsigned int (*get_num_valid_entries)(u64 *pgd); void (*set_num_valid_entries)(u64 *pgd, unsigned int num_of_valid_entries); @@ -1598,11 +1598,13 @@ struct kbase_sub_alloc { * is scheduled in and an atom is pulled from the context's per * slot runnable tree in JM GPU or GPU command queue * group is programmed on CSG slot in CSF GPU. - * @mm_update_lock: lock used for handling of special tracking page. * @process_mm: Pointer to the memory descriptor of the process which * created the context. Used for accounting the physical * pages used for GPU allocations, done for the context, - * to the memory consumed by the process. + * to the memory consumed by the process. A reference is taken + * on this descriptor for the Userspace created contexts so that + * Kbase can safely access it to update the memory usage counters. + * The reference is dropped on context termination. * @gpu_va_end: End address of the GPU va space (in 4KB page units) * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all * tiler heaps of the kbase context. @@ -1730,7 +1732,10 @@ struct kbase_sub_alloc { * @limited_core_mask: The mask that is applied to the affinity in case of atoms * marked with BASE_JD_REQ_LIMITED_CORE_MASK. * @platform_data: Pointer to platform specific per-context data. - * + * @task: Pointer to the task structure of the main thread of the process + * that created the Kbase context. It would be set only for the + * contexts created by the Userspace and not for the contexts + * created internally by the Kbase.* * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. * Up to one context can be created for each client that opens the device file @@ -1823,8 +1828,7 @@ struct kbase_context { atomic_t refcount; - spinlock_t mm_update_lock; - struct mm_struct __rcu *process_mm; + struct mm_struct *process_mm; u64 gpu_va_end; #if MALI_USE_CSF u32 running_total_tiler_heap_nr_chunks; @@ -1888,6 +1892,8 @@ struct kbase_context { #if !MALI_USE_CSF void *platform_data; #endif + + struct task_struct *task; }; #ifdef CONFIG_MALI_CINSTR_GWT @@ -1916,17 +1922,15 @@ struct kbasep_gwt_list_element { * to a @kbase_context. * @ext_res_node: List head for adding the metadata to a * @kbase_context. - * @alloc: The physical memory allocation structure - * which is mapped. - * @gpu_addr: The GPU virtual address the resource is - * mapped to. + * @reg: External resource information, containing + * the corresponding VA region * @ref: Reference count. * * External resources can be mapped into multiple contexts as well as the same * context multiple times. - * As kbase_va_region itself isn't refcounted we can't attach our extra - * information to it as it could be removed under our feet leaving external - * resources pinned. + * As kbase_va_region is refcounted, we guarantee that it will be available + * for the duration of the external resource, meaning it is sufficient to use + * it to rederive any additional data, like the GPU address. * This metadata structure binds a single external resource to a single * context, ensuring that per context mapping is tracked separately so it can * be overridden when needed and abuses by the application (freeing the resource @@ -1934,8 +1938,7 @@ struct kbasep_gwt_list_element { */ struct kbase_ctx_ext_res_meta { struct list_head ext_res_node; - struct kbase_mem_phy_alloc *alloc; - u64 gpu_addr; + struct kbase_va_region *reg; u32 ref; }; diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_jd.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_jd.c index 79442188f76a..c11e4f3d2b31 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_jd.c +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_jd.c @@ -192,13 +192,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - kbase_unmap_external_resource(katom->kctx, reg, alloc); + kbase_unmap_external_resource(katom->kctx, katom->extres[res_no]); } kfree(katom->extres); katom->extres = NULL; @@ -214,7 +208,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) { - int err_ret_val = -EINVAL; + int err = -EINVAL; u32 res_no; #ifdef CONFIG_MALI_DMA_FENCE struct kbase_dma_fence_resv_info info = { @@ -247,21 +241,10 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!katom->extres) return -ENOMEM; - /* copy user buffer to the end of our real buffer. - * Make sure the struct sizes haven't changed in a way - * we don't support - */ - BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); - input_extres = (struct base_external_resource *) - (((unsigned char *)katom->extres) + - (sizeof(*katom->extres) - sizeof(*input_extres)) * - katom->nr_extres); - - if (copy_from_user(input_extres, - get_compat_pointer(katom->kctx, user_atom->extres_list), - sizeof(*input_extres) * katom->nr_extres) != 0) { - err_ret_val = -EINVAL; - goto early_err_out; + input_extres = kmalloc_array(katom->nr_extres, sizeof(*input_extres), GFP_KERNEL); + if (!input_extres) { + err = -ENOMEM; + goto failed_input_alloc; } #ifdef CONFIG_MALI_DMA_FENCE @@ -275,40 +258,45 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #endif GFP_KERNEL); if (!info.resv_objs) { - err_ret_val = -ENOMEM; - goto early_err_out; + err = -ENOMEM; + goto failed_input_copy; } info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), sizeof(unsigned long), GFP_KERNEL); if (!info.dma_fence_excl_bitmap) { - err_ret_val = -ENOMEM; - goto early_err_out; + err = -ENOMEM; + goto failed_input_copy; } } #endif /* CONFIG_MALI_DMA_FENCE */ + if (copy_from_user(input_extres, + get_compat_pointer(katom->kctx, user_atom->extres_list), + sizeof(*input_extres) * katom->nr_extres) != 0) { + err = -EINVAL; + goto failed_input_copy; + } + /* Take the processes mmap lock */ down_read(kbase_mem_get_process_mmap_lock()); /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { - struct base_external_resource *res = &input_extres[res_no]; + struct base_external_resource *user_res = &input_extres[res_no]; struct kbase_va_region *reg; - struct kbase_mem_phy_alloc *alloc; #ifdef CONFIG_MALI_DMA_FENCE bool exclusive; - exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + exclusive = (user_res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) ? true : false; #endif reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, - res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + katom->kctx, user_res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); /* did we find a matching region object? */ - if (kbase_is_region_invalid_or_free(reg)) { + if (unlikely(kbase_is_region_invalid_or_free(reg))) { /* roll back */ goto failed_loop; } @@ -318,12 +306,9 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; } - alloc = kbase_map_external_resource(katom->kctx, reg, - current->mm); - if (!alloc) { - err_ret_val = -EINVAL; + err = kbase_map_external_resource(katom->kctx, reg, current->mm); + if (err) goto failed_loop; - } #ifdef CONFIG_MALI_DMA_FENCE if (implicit_sync && @@ -340,14 +325,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st } #endif /* CONFIG_MALI_DMA_FENCE */ - /* finish with updating out array with the data we found */ - /* NOTE: It is important that this is the last thing we do (or - * at least not before the first write) as we overwrite elements - * as we loop and could be overwriting ourself, so no writes - * until the last read for an element. - */ - katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = alloc; + katom->extres[res_no] = reg; } /* successfully parsed the extres array */ /* drop the vm lock now */ @@ -370,12 +348,13 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(info.dma_fence_excl_bitmap); } #endif /* CONFIG_MALI_DMA_FENCE */ + /* Free the buffer holding data from userspace */ + kfree(input_extres); /* all done OK */ return 0; /* error handling section */ - #ifdef CONFIG_MALI_DMA_FENCE failed_dma_fence_setup: /* Lock the processes mmap lock */ @@ -385,19 +364,23 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kbase_gpu_vm_lock(katom->kctx); #endif - failed_loop: - /* undo the loop work */ +failed_loop: + /* undo the loop work. We are guaranteed to have access to the VA region + * as we hold a reference to it until it's unmapped + */ while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg = katom->extres[res_no]; - kbase_unmap_external_resource(katom->kctx, NULL, alloc); + kbase_unmap_external_resource(katom->kctx, reg); } kbase_gpu_vm_unlock(katom->kctx); /* Release the processes mmap lock */ up_read(kbase_mem_get_process_mmap_lock()); - early_err_out: +failed_input_copy: + kfree(input_extres); +failed_input_alloc: kfree(katom->extres); katom->extres = NULL; #ifdef CONFIG_MALI_DMA_FENCE @@ -406,7 +389,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(info.dma_fence_excl_bitmap); } #endif - return err_ret_val; + return err; } static inline void jd_resolve_dep(struct list_head *out_list, diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_js.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_js.c index de956b8216c7..47d8047fa406 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_js.c +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_js.c @@ -724,6 +724,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&kbdev->js_data.runpool_mutex); } + + kbase_ctx_sched_remove_ctx(kctx); } /* @@ -4039,4 +4041,3 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr req_priority); return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority); } - diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c index c1b5600168d8..2ed908ed0c56 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -380,6 +380,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev, struct rb_node *rbnext; struct kbase_va_region *next = NULL; struct rb_root *reg_rbtree = NULL; + struct kbase_va_region *orig_reg = reg; int merged_front = 0; int merged_back = 0; @@ -477,6 +478,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev, rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); } + /* This operation is always safe because the function never frees + * the region. If the region has been merged to both front and back, + * then it's the previous region that is supposed to be freed. + */ + orig_reg->start_pfn = 0; + out: return; } @@ -827,6 +834,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); @@ -1747,7 +1758,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info); if (err) - goto bad_insert; + goto bad_aliased_insert; /* Note: mapping count is tracked at alias * creation time @@ -1761,7 +1772,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, group_id, mmu_sync_info); if (err) - goto bad_insert; + goto bad_aliased_insert; } } } else { @@ -1802,9 +1813,16 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, return err; +bad_aliased_insert: + while (i-- > 0) { + + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + reg->nr_pages, kctx->as_nr); + + } + + bad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - reg->nr_pages, kctx->as_nr); kbase_remove_va_region(kctx->kbdev, reg); @@ -1813,8 +1831,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, KBASE_EXPORT_TEST_API(kbase_gpu_mmap); -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable); +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable); int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -1880,7 +1898,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* The allocation could still have active mappings. */ if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, alloc, + kbase_jd_user_buf_unmap(kctx, alloc, reg, (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); } @@ -2032,6 +2050,7 @@ void kbase_sync_single(struct kbase_context *kctx, src = ((unsigned char *)kmap(gpu_page)) + offset; dst = ((unsigned char *)kmap(cpu_page)) + offset; } + memcpy(dst, src, size); kunmap(gpu_page); kunmap(cpu_page); @@ -2469,7 +2488,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, &kctx->mem_pools.large[alloc->group_id], nr_lp * (SZ_2M / SZ_4K), tp, - true); + true, kctx->task); if (res > 0) { nr_left -= res; @@ -2523,7 +2542,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, err = kbase_mem_pool_grow( &kctx->mem_pools.large[alloc->group_id], - 1); + 1, kctx->task); if (err) break; } while (1); @@ -2570,7 +2589,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_left) { res = kbase_mem_pool_alloc_pages( &kctx->mem_pools.small[alloc->group_id], - nr_left, tp, false); + nr_left, tp, false, kctx->task); if (res <= 0) goto alloc_failed; } @@ -3062,6 +3081,13 @@ KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); /** * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. * @alloc: The allocation for the imported user buffer. + * + * This must only be called when terminating an alloc, when its refcount + * (number of users) has become 0. This also ensures it is only called once all + * CPU mappings have been closed. + * + * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active + * allocations */ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); #endif @@ -4052,7 +4078,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, spin_unlock(&kctx->mem_partials_lock); kbase_gpu_vm_unlock(kctx); - ret = kbase_mem_pool_grow(pool, pool_delta); + ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); kbase_gpu_vm_lock(kctx); if (ret) @@ -4765,7 +4791,23 @@ void kbase_unpin_user_buf_page(struct page *page) #if MALI_USE_CSF static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) { - if (alloc->nents) { + /* In CSF builds, we keep pages pinned until the last reference is + * released on the alloc. A refcount of 0 also means we can be sure + * that all CPU mappings have been closed on this alloc, and no more + * mappings of it will be created. + * + * Further, the WARN() below captures the restriction that this + * function will not handle anything other than the alloc termination + * path, because the caller of kbase_mem_phy_alloc_put() is not + * required to hold the kctx's reg_lock, and so we could not handle + * removing an existing CPU mapping here. + * + * Refer to this function's kernel-doc comments for alternatives for + * unpinning a User buffer. + */ + + if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, + "must only be called on terminating an allocation")) { struct page **pages = alloc->imported.user_buf.pages; long i; @@ -4773,6 +4815,8 @@ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) for (i = 0; i < alloc->nents; i++) kbase_unpin_user_buf_page(pages[i]); + + alloc->nents = 0; } } #endif @@ -4788,6 +4832,8 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, long i; int write; + lockdep_assert_held(&kctx->reg_lock); + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) return -EINVAL; @@ -4818,6 +4864,9 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, return pinned_pages; if (pinned_pages != alloc->imported.user_buf.nr_pages) { + /* Above code already ensures there will not have been a CPU + * mapping by ensuring alloc->nents is 0 + */ for (i = 0; i < pinned_pages; i++) kbase_unpin_user_buf_page(pages[i]); return -ENOMEM; @@ -4831,43 +4880,60 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct kbase_va_region *reg) { - long pinned_pages; + int err; + long pinned_pages = 0; struct kbase_mem_phy_alloc *alloc; struct page **pages; struct tagged_addr *pa; long i; - unsigned long address; struct device *dev; - unsigned long offset; - unsigned long local_size; unsigned long gwt_mask = ~0; - int err = kbase_jd_user_buf_pin_pages(kctx, reg); /* Calls to this function are inherently asynchronous, with respect to * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kctx->reg_lock); + + err = kbase_jd_user_buf_pin_pages(kctx, reg); + if (err) return err; alloc = reg->gpu_alloc; pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; pinned_pages = alloc->nents; pages = alloc->imported.user_buf.pages; dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; + + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ + for (i = 0; i < pinned_pages; i++) { dma_addr_t dma_addr; - unsigned long min; + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); err = dma_mapping_error(dev, dma_addr); if (err) goto unwind; @@ -4875,8 +4941,6 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; } #ifdef CONFIG_MALI_CINSTR_GWT @@ -4894,11 +4958,33 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, /* fall down */ unwind: alloc->nents = 0; + + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This is precautionary measure in case a GPU job has taken + * advantage of a partially GPU-mapped range to write and corrupt the + * content of memory, either inside or outside the imported region. + * + * Notice that this error recovery path doesn't try to be optimal and just + * flushes the entire page range. + */ + while (i--) { - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); - } + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif + } + + /* The user buffer could already have been previously pinned before + * entering this function, and hence there could potentially be CPU + * mappings of it + */ + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); while (++i < pinned_pages) { kbase_unpin_user_buf_page(pages[i]); @@ -4912,22 +4998,113 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT * have a corresponding call to kbase_jd_user_buf_unpin_pages(). */ -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable) { long i; struct page **pages; + unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; unsigned long size = alloc->imported.user_buf.size; + lockdep_assert_held(&kctx->reg_lock); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); pages = alloc->imported.user_buf.pages; + +#if !MALI_USE_CSF + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); +#else + CSTD_UNUSED(reg); +#endif + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; + unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page); + /* Notice: this is a temporary variable that is used for DMA sync + * operations, and that could be incremented by an offset if the + * current page contains both imported and non-imported memory + * sub-regions. + * + * It is valid to add an offset to this value, because the offset + * is always kept within the physically contiguous dma-mapped range + * and there's no need to translate to physical address to offset it. + * + * This variable is not going to be used for the actual DMA unmap + * operation, that shall always use the original DMA address of the + * whole memory page. + */ + + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - offset_within_page + * | |/ + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } + + /* Notice: use the original DMA address to unmap the whole memory page. */ + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + #endif + + + + if (writeable) set_page_dirty_lock(pages[i]); #if !MALI_USE_CSF @@ -4935,7 +5112,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, pages[i] = NULL; #endif - size -= local_size; + size -= imported_size; } #if !MALI_USE_CSF alloc->nents = 0; @@ -4982,11 +5159,11 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, return 0; } -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm) +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm) { - int err; + int err = 0; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; lockdep_assert_held(&kctx->reg_lock); @@ -4995,7 +5172,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents)) - goto exit; + return -EINVAL; reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; if (reg->gpu_alloc->imported.user_buf @@ -5003,7 +5180,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( err = kbase_jd_user_buf_map(kctx, reg); if (err) { reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto exit; + return err; } } } @@ -5011,21 +5188,29 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_UMM: { err = kbase_mem_umm_map(kctx, reg); if (err) - goto exit; + return err; break; } default: - goto exit; + WARN(1, "Invalid external resource GPU allocation type (%x) on mapping", + alloc->type); + return -EINVAL; } - return kbase_mem_phy_alloc_get(reg->gpu_alloc); -exit: - return NULL; + kbase_va_region_alloc_get(kctx, reg); + kbase_mem_phy_alloc_get(alloc); + return err; } -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) { + /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the + * unmapping operation. + */ + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + + lockdep_assert_held(&kctx->reg_lock); + switch (alloc->type) { case KBASE_MEM_TYPE_IMPORTED_UMM: { kbase_mem_umm_unmap(kctx, reg, alloc); @@ -5037,24 +5222,32 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, if (alloc->imported.user_buf.current_mapping_usage_count == 0) { bool writeable = true; - if (!kbase_is_region_invalid_or_free(reg) && - reg->gpu_alloc == alloc) + if (!kbase_is_region_invalid_or_free(reg)) { kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), kctx->as_nr); + } - if (reg && ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)) + if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) writeable = false; - kbase_jd_user_buf_unmap(kctx, alloc, writeable); + kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); } } break; default: - break; + WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", + alloc->type); + return; } kbase_mem_phy_alloc_put(alloc); + kbase_va_region_alloc_put(kctx, reg); +} + +static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) +{ + return reg->start_pfn << PAGE_SHIFT; } struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( @@ -5070,7 +5263,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * metadata which matches the region which is being acquired. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { - if (walker->gpu_addr == gpu_addr) { + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { meta = walker; meta->ref++; break; @@ -5082,8 +5275,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( struct kbase_va_region *reg; /* Find the region */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) goto failed; @@ -5096,13 +5288,15 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * Fill in the metadata object and acquire a reference * for the physical resource. */ - meta->alloc = kbase_map_external_resource(kctx, reg, NULL); - meta->ref = 1; + meta->reg = reg; - if (!meta->alloc) + /* Map the external resource to the GPU allocation of the region + * and acquire the reference to the VA region + */ + if (kbase_map_external_resource(kctx, meta->reg, NULL)) goto fail_map; - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + meta->ref = 1; list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); } @@ -5127,7 +5321,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) * metadata which matches the region which is being released. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) - if (walker->gpu_addr == gpu_addr) + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) return walker; return NULL; @@ -5136,14 +5330,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) static void release_sticky_resource_meta(struct kbase_context *kctx, struct kbase_ctx_ext_res_meta *meta) { - struct kbase_va_region *reg; - - /* Drop the physical memory reference and free the metadata. */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, - meta->gpu_addr); - - kbase_unmap_external_resource(kctx, reg, meta->alloc); + kbase_unmap_external_resource(kctx, meta->reg); list_del(&meta->ext_res_node); kfree(meta); } diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h index 0a09c318f746..a9a0d7e86b4a 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -947,7 +947,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * @pages: Pointer to array where the physical address of the allocated * pages will be stored. * @partial_allowed: If fewer pages allocated is allowed - * + * @page_owner: Pointer to the task that created the Kbase context for which + * the pages are being allocated. It can be NULL if the pages + * won't be associated with any Kbase context. * Like kbase_mem_pool_alloc() but optimized for allocating many pages. * * Return: @@ -963,7 +965,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. */ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed); + struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner); /** * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool @@ -1075,13 +1077,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); * kbase_mem_pool_grow - Grow the pool * @pool: Memory pool to grow * @nr_to_grow: Number of pages to add to the pool - * + * @page_owner: Pointer to the task that created the Kbase context for which + * the memory pool is being grown. It can be NULL if the pages + * to be allocated won't be associated with any Kbase context. * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to * become larger than the maximum size specified. * * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages */ -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner); /** * kbase_mem_pool_trim - Grow or shrink the pool to a new size @@ -1311,6 +1315,7 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); +#if defined(CONFIG_MALI_VECTOR_DUMP) /** * kbase_mmu_dump() - Dump the MMU tables to a buffer. * @@ -1330,7 +1335,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); * (including if the @c nr_pages is too small) */ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); - +#endif /** * kbase_sync_now - Perform cache maintenance on a memory region * @@ -1868,28 +1873,36 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx); /** * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. - * @reg: The region to map. + * @reg: External resource to map. * @locked_mm: The mm_struct which has been locked for this operation. * - * Return: The physical allocation which backs the region on success or NULL - * on failure. + * On successful mapping, the VA region and the gpu_alloc refcounts will be + * increased, making it safe to use and store both values directly. + * + * Return: Zero on success, or negative error code. */ -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm); +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm); /** * kbase_unmap_external_resource - Unmap an external resource from the GPU. * @kctx: kbase context. - * @reg: The region to unmap or NULL if it has already been released. - * @alloc: The physical allocation being unmapped. + * @reg: VA region corresponding to external resource + * + * On successful unmapping, the VA region and the gpu_alloc refcounts will + * be decreased. If the refcount reaches zero, both @reg and the corresponding + * allocation may be freed, so using them after returning from this function + * requires the caller to explicitly check their state. */ -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg); /** * kbase_unpin_user_buf_page - Unpin a page of a user buffer. * @page: page to unpin + * + * The caller must have ensured that there are no CPU mappings for @page (as + * might be created from the struct kbase_mem_phy_alloc that tracks @page), and + * that userspace will not be able to recreate the CPU mappings again. */ void kbase_unpin_user_buf_page(struct page *page); @@ -2194,8 +2207,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed * @kctx: Pointer to kbase context * - * Don't allow the allocation of GPU memory until user space has set up the - * tracking page (which sets kctx->process_mm) or if the ioctl has been issued + * Don't allow the allocation of GPU memory if the ioctl has been issued * from the forked child process using the mali device file fd inherited from * the parent process. * @@ -2203,13 +2215,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) */ static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx) { - bool allow_alloc = true; - - rcu_read_lock(); - allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm); - rcu_read_unlock(); - - return allow_alloc; + return (kctx->process_mm == current->mm); } /** @@ -2227,6 +2233,22 @@ static inline int kbase_mem_group_id_get(base_mem_alloc_flags flags) return (int)BASE_MEM_GROUP_ID_GET(flags); } +/** + * * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process + * */ +static inline void kbase_mem_mmgrab(void) +{ + /* This merely takes a reference on the memory descriptor structure + * * i.e. mm_struct of current process and not on its address space and + * * so won't block the freeing of address space on process exit. + * */ +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + atomic_inc(¤t->mm->mm_count); +#else + mmgrab(current->mm); +#endif +} + /** * kbase_mem_group_id_set - Set group ID into base_mem_alloc_flags * @id: group ID(0~15) you want to encode diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c index 7d06aa58d139..f46e2b596fa7 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -966,6 +967,15 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations + * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. + * This would usually include JIT allocations, Tiler heap related allocations + * & GPU queue ringbuffer and none of them needs to be explicitly marked + * as evictable by Userspace. + */ + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + /* Is the region being transitioning between not needed and needed? */ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; @@ -1547,6 +1557,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + struct tagged_addr *pa; + struct device *dev; int write; /* Flag supported only for dma-buf imported memory */ @@ -1688,31 +1700,48 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( reg->gpu_alloc->nents = 0; reg->extension = 0; + pa = kbase_get_gpu_phy_pages(reg); + dev = kctx->kbdev->dev; + if (pages) { - struct device *dev = kctx->kbdev->dev; - unsigned long local_size = user_buf->size; - unsigned long offset = user_buf->address & ~PAGE_MASK; - struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ + for (i = 0; i < faulted_pages; i++) { dma_addr_t dma_addr; - unsigned long min; + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + #endif - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + } reg->gpu_alloc->nents = faulted_pages; @@ -1721,10 +1750,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( return reg; unwind_dma_map: + + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This precautionary measure is kept here to keep this code + * aligned with kbase_jd_user_buf_map() to allow for a potential refactor + * in the future. + */ while (i--) { - dma_unmap_page(kctx->kbdev->dev, - user_buf->dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr_t dma_addr = user_buf->dma_addrs[i]; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + #else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + #endif } fault_mismatch: if (pages) { @@ -1740,7 +1781,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( no_region: bad_size: return NULL; - } @@ -2020,7 +2060,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - + if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { + dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); + goto bad_flags; + } if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { dev_warn(kctx->kbdev->dev, "padding is only supported for UMM"); @@ -2248,6 +2291,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (reg->flags & KBASE_REG_DONT_NEED) goto out_unlock; + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED /* Reject resizing commit size */ if (reg->flags & KBASE_REG_PF_GROW) @@ -2630,7 +2676,6 @@ static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) while (kbase_jit_evict(kctx)) ; } -#endif static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, @@ -2647,9 +2692,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; -#ifdef CONFIG_MALI_VECTOR_DUMP kbase_free_unused_jit_allocations(kctx); -#endif kaddr = kbase_mmu_dump(kctx, nr_pages); @@ -2697,7 +2740,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, out: return err; } - +#endif void kbase_os_mem_map_lock(struct kbase_context *kctx) { @@ -2838,6 +2881,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = -EINVAL; goto out_unlock; case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): +#if defined(CONFIG_MALI_VECTOR_DUMP) /* MMU dump */ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); if (err != 0) @@ -2845,6 +2889,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, /* free the region on munmap */ free_on_close = 1; break; +#else + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ #if MALI_USE_CSF case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): kbase_gpu_vm_unlock(kctx); @@ -2932,7 +2981,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); - +#if defined(CONFIG_MALI_VECTOR_DUMP) if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on * the pages, so we can now free the kernel mapping @@ -2951,7 +3000,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, */ vma->vm_pgoff = PFN_DOWN(vma->vm_start); } - +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: kbase_gpu_vm_unlock(kctx); out: @@ -3087,6 +3136,10 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto out_unlock; + + /* check access permissions can be satisfied * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ @@ -3171,79 +3224,27 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { - struct mm_struct *mm; - - rcu_read_lock(); - mm = rcu_dereference(kctx->process_mm); - if (mm) { - atomic_add(pages, &kctx->nonmapped_pages); -#ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); -#else - spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); - spin_unlock(&mm->page_table_lock); -#endif - } - rcu_read_unlock(); -} - -static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) -{ - int pages; - struct mm_struct *mm; - - spin_lock(&kctx->mm_update_lock); - mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); - if (!mm) { - spin_unlock(&kctx->mm_update_lock); - return; - } - - rcu_assign_pointer(kctx->process_mm, NULL); - spin_unlock(&kctx->mm_update_lock); - synchronize_rcu(); - - pages = atomic_xchg(&kctx->nonmapped_pages, 0); + struct mm_struct *mm = kctx->process_mm; + if (unlikely(!mm)) + return; + atomic_add(pages, &kctx->nonmapped_pages); #ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); #else spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); spin_unlock(&mm->page_table_lock); #endif } -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx; - - kctx = vma->vm_private_data; - kbasep_os_process_page_usage_drain(kctx); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .close = kbase_special_vm_close, -}; - static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { - /* check that this is the only tracking page */ - spin_lock(&kctx->mm_update_lock); - if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { - spin_unlock(&kctx->mm_update_lock); - return -EFAULT; - } - - rcu_assign_pointer(kctx->process_mm, current->mm); - - spin_unlock(&kctx->mm_update_lock); + if (vma_pages(vma) != 1) + return -EINVAL; /* no real access */ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; return 0; } diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c index 4103bd1c93d0..1889e2049b1b 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,11 @@ #include #include #include - +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE +#include +#else +#include +#endif #define pool_dbg(pool, format, ...) \ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ (pool->next_pool) ? "kctx" : "kbdev", \ @@ -37,6 +41,46 @@ #define NOT_DIRTY false #define NOT_RECLAIMED false +/** +* can_alloc_page() - Check if the current thread can allocate a physical page +* +* @pool: Pointer to the memory pool. +* @page_owner: Pointer to the task/process that created the Kbase context +* for which a page needs to be allocated. It can be NULL if +* the page won't be associated with Kbase context. +* @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. +* +* This function checks if the current thread is a kernel thread and can make a +* request to kernel to allocate a physical page. If the kernel thread is allocating +* a page for the Kbase context and the process that created the context is exiting +* or is being killed, then there is no point in doing a page allocation. +* +* The check done by the function is particularly helpful when the system is running +* low on memory. When a page is allocated from the context of a kernel thread, OoM +* killer doesn't consider the kernel thread for killing and kernel keeps retrying +* to allocate the page as long as the OoM killer is able to kill processes. +* The check allows kernel thread to quickly exit the page allocation loop once OoM +* killer has initiated the killing of @page_owner, thereby unblocking the context +* termination for @page_owner and freeing of GPU memory allocated by it. This helps +* in preventing the kernel panic and also limits the number of innocent processes +* that get killed. +* +* Return: true if the page can be allocated otherwise false. +*/ +static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner, + const bool alloc_from_kthread) +{ + if (likely(!alloc_from_kthread || !page_owner)) + return true; + + if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) { + dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm, + task_pid_nr(page_owner)); + return false; + } + + return true; +} static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) { @@ -126,7 +170,6 @@ static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, struct page *p) { struct device *dev = pool->kbdev->dev; - dma_sync_single_for_device(dev, kbase_dma_addr(p), (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); } @@ -233,11 +276,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, } int kbase_mem_pool_grow(struct kbase_mem_pool *pool, - size_t nr_to_grow) + size_t nr_to_grow, struct task_struct *page_owner) { struct page *p; size_t i; - + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); kbase_mem_pool_lock(pool); pool->dont_reclaim = true; @@ -250,6 +293,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, return -ENOMEM; } kbase_mem_pool_unlock(pool); + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + return -ENOMEM; p = kbase_mem_alloc_page(pool); if (!p) { @@ -282,7 +327,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) if (new_size < cur_size) kbase_mem_pool_shrink(pool, cur_size - new_size); else if (new_size > cur_size) - err = kbase_mem_pool_grow(pool, new_size - cur_size); + err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL); if (err) { size_t grown_size = kbase_mem_pool_size(pool); @@ -528,13 +573,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, } int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed) + struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner) { struct page *p; size_t nr_from_pool; size_t i = 0; int err = -ENOMEM; size_t nr_pages_internal; + const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); nr_pages_internal = nr_4k_pages / (1u << (pool->order)); @@ -549,7 +595,6 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); while (nr_from_pool--) { int j; - p = kbase_mem_pool_remove_locked(pool); if (pool->order) { pages[i++] = as_tagged_tag(page_to_phys(p), @@ -567,7 +612,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, if (i != nr_4k_pages && pool->next_pool) { /* Allocate via next pool */ err = kbase_mem_pool_alloc_pages(pool->next_pool, - nr_4k_pages - i, pages + i, partial_allowed); + nr_4k_pages - i, pages + i, partial_allowed,page_owner); if (err < 0) goto err_rollback; @@ -576,6 +621,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, } else { /* Get any remaining pages from kernel */ while (i != nr_4k_pages) { + if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) + goto err_rollback; + p = kbase_mem_alloc_page(pool); if (!p) { if (partial_allowed) diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c index d41d88aebfab..066a871fd8f3 100644 --- a/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c @@ -503,6 +503,7 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) kbase_js_sched_all(katom->kctx->kbdev); } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -729,7 +730,6 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) return ret; } -#endif /* !MALI_USE_CSF */ #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, @@ -761,8 +761,18 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, } #endif -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data) +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) { unsigned int i; unsigned int target_page_nr = 0; @@ -849,7 +859,6 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, return ret; } -#if !MALI_USE_CSF static int kbase_debug_copy(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -867,6 +876,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) return 0; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ #endif /* !MALI_USE_CSF */ #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) @@ -963,6 +973,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) jit_info_copy_size_for_jit_version[kctx->jit_version]; WARN_ON(jit_info_user_copy_size > sizeof(*info)); + if (!kbase_mem_allow_alloc(kctx)) { + dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", + current->comm, current->pid, kctx->tgid, kctx->id); + ret = -EINVAL; + goto fail; + } + /* For backwards compatibility, and to prevent reading more than 1 jit * info struct on jit version 1 */ @@ -1477,10 +1494,11 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr)) goto failed_loop; - } else + } else { if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr)) failed = true; + } } /* @@ -1569,6 +1587,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) case BASE_JD_REQ_SOFT_EVENT_RESET: kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: { int res = kbase_debug_copy(katom); @@ -1577,6 +1596,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) katom->event_code = BASE_JD_EVENT_JOB_INVALID; break; } +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: ret = kbase_jit_allocate_process(katom); break; @@ -1693,8 +1713,10 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) if (katom->jc == 0) return -EINVAL; break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: return kbase_debug_copy_prepare(katom); +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_EXT_RES_MAP: return kbase_ext_res_prepare(katom); case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: @@ -1734,9 +1756,11 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) break; #endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_JD_REQ_SOFT_DEBUG_COPY: kbase_debug_copy_finish(katom); break; +#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ case BASE_JD_REQ_SOFT_JIT_ALLOC: kbase_jit_allocate_finish(katom); break; diff --git a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c index 04f5cdf42b84..6c52f0c15f36 100644 --- a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c @@ -149,17 +149,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, "true" : "false"; int as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at PA 0x%016llX\n" + "GPU bus fault in AS%d at PA %pK\n" "PA_VALID: %s\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, addr_valid, status, exception_type, kbase_gpu_exception_name(exception_type), diff --git a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c index 3130b332dec2..244214947587 100644 --- a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c @@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, u32 const exception_data = (status >> 8) & 0xFFFFFF; int const as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at PA 0x%016llX\n" + "GPU bus fault in AS%d at PA %pK\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "exception data 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, status, exception_type, kbase_gpu_exception_name(exception_type), exception_data, diff --git a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c index 9d18037ac45d..f82f77de2c4c 100644 --- a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -387,22 +387,75 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, * @level: The level of MMU page table. * @flush_op: The type of MMU flush operation to perform. * @dirty_pgds: Flags to track every level where a PGD has been updated. + * @free_pgds_list: Linked list of the page directory pages to free. */ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + struct list_head *free_pgds_list); /** * kbase_mmu_free_pgd() - Free memory of the page directory * * @kbdev: Device pointer. * @mmut: GPU MMU page table. * @pgd: Physical address of page directory to be freed. - * @dirty: Flag to indicate whether the page may be dirty in the cache. + * + * This function is supposed to be called with mmu_lock held and after + * ensuring that GPU won't be able to access the page. +*/ +static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t pgd) +{ + struct page *p; + + lockdep_assert_held(&mmut->mmu_lock); + + p = pfn_to_page(PFN_DOWN(pgd)); + + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +/** + * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @free_pgds_list: Linked list of the page directory pages to free. + * + * This function will call kbase_mmu_free_pgd() on each page directory page + * present in the @free_pgds_list. + * + * The function is supposed to be called after the GPU cache and MMU TLB has + * been invalidated post the teardown loop. */ -static void kbase_mmu_free_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, phys_addr_t pgd, - bool dirty); +static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct list_head *free_pgds_list) +{ + struct page *page, *next_page; + + mutex_lock(&mmut->mmu_lock); + + list_for_each_entry_safe(page, next_page, free_pgds_list, lru) { + list_del_init(&page->lru); + kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page)); + } + + mutex_unlock(&mmut->mmu_lock); +} + /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to * a region on a GPU page fault @@ -1305,6 +1358,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) kbase_gpu_vm_unlock(kctx); } else { int ret = -ENOMEM; + const u8 group_id = region->gpu_alloc->group_id; kbase_gpu_vm_unlock(kctx); @@ -1316,23 +1370,21 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) if (grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = - &kctx->mem_pools.large[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.large[group_id]; pages_to_grow = (pages_to_grow + ((1 << lp_mem_pool->order) - 1)) >> lp_mem_pool->order; ret = kbase_mem_pool_grow(lp_mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); } else { #endif struct kbase_mem_pool *const mem_pool = - &kctx->mem_pools.small[ - region->gpu_alloc->group_id]; + &kctx->mem_pools.small[group_id]; ret = kbase_mem_pool_grow(mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); #ifdef CONFIG_MALI_2MB_ALLOC } #endif @@ -1376,7 +1428,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { u64 *page; - int i; + struct page *p; phys_addr_t pgd; @@ -1410,8 +1462,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) - kbdev->mmu_mode->entry_invalidate(&page[i]); + kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); /* MMU cache flush strategy is NONE because this page is newly created, therefore * there is no content to clean or invalidate in the GPU caches. @@ -1544,9 +1595,9 @@ static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 from_vpfn, - u64 to_vpfn, u64 *dirty_pgds) + u64 to_vpfn, u64 *dirty_pgds, + struct list_head *free_pgds_list) { - phys_addr_t pgd; u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; @@ -1559,7 +1610,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode = kbdev->mmu_mode; while (vpfn < to_vpfn) { - unsigned int i; + unsigned int idx = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; unsigned int pcount = 0; @@ -1567,6 +1618,8 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, int level; u64 *page; phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); register unsigned int num_of_valid_entries; @@ -1574,18 +1627,17 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, count = left; /* need to check if this is a 2MB page or a 4kB */ - pgd = mmut->pgd; - for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; pgds[level] = pgd; - page = kmap(phys_to_page(pgd)); + page = kmap(p); if (mmu_mode->ate_is_valid(page[idx], level)) break; /* keep the mapping */ - kunmap(phys_to_page(pgd)); + kunmap(p); pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); + p = phys_to_page(pgd); } switch (level) { @@ -1612,35 +1664,69 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, else num_of_valid_entries -= pcount; - if (!num_of_valid_entries) { - kunmap(phys_to_page(pgd)); + /* Invalidate the entries we added */ + mmu_mode->entries_invalidate(&page[idx], pcount); - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); + if (!num_of_valid_entries) { + kunmap(p); + list_add(&p->lru, free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - KBASE_MMU_OP_NONE, dirty_pgds); + KBASE_MMU_OP_NONE, dirty_pgds, + free_pgds_list); vpfn += count; continue; } - /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[idx + i]); - mmu_mode->set_num_valid_entries(page, num_of_valid_entries); /* MMU cache flush strategy is NONE because GPU cache maintenance is * going to be done by the caller */ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), - kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, 8 * pcount, + kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, KBASE_MMU_OP_NONE); - kunmap(phys_to_page(pgd)); + kunmap(p); next: vpfn += count; } } +static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, const u64 vpfn, + size_t nr, u64 dirty_pgds, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + struct kbase_mmu_hw_op_param op_param; + int as_nr = 0; + + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; + op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + +#if MALI_USE_CSF + as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; +#else + WARN_ON(!mmut->kctx); +#endif + + /* MMU cache flush strategy depends on whether GPU control commands for + * flushing physical address ranges are supported. The new physical pages + * are not present in GPU caches therefore they don't need any cache + * maintenance, but PGDs in the page table may or may not be created anew. + * + * Operations that affect the whole GPU cache shall only be done if it's + * impossible to update physical ranges. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); + else + mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); +} + /* * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' */ @@ -1661,8 +1747,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, int err; struct kbase_device *kbdev; enum kbase_mmu_op_type flush_op; - struct kbase_mmu_hw_op_param op_param; u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); if (WARN_ON(kctx == NULL)) return -EINVAL; @@ -1676,15 +1762,6 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; - /* Set up MMU flush operation parameters. */ - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = vpfn, - .nr = nr, - .op = KBASE_MMU_OP_FLUSH_PT, - .kctx_id = kctx->id, - .mmu_sync_info = mmu_sync_info, - }; - mutex_lock(&kctx->mmu.mmu_lock); while (remain) { @@ -1717,7 +1794,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, err = kbase_mem_pool_grow( &kbdev->mem_pools.small[ kctx->mmu.group_id], - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL,kctx->task); mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { @@ -1729,7 +1806,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds); + &dirty_pgds, &free_pgds_list); } goto fail_unlock; } @@ -1744,7 +1821,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds); + &dirty_pgds, &free_pgds_list); } err = -ENOMEM; goto fail_unlock; @@ -1795,53 +1872,18 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, } mutex_unlock(&kctx->mmu.mmu_lock); - op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); - /* If FLUSH_PA_RANGE is supported then existing PGDs will have been flushed - * and all that remains is TLB (or MMU cache) invalidation which is done via - * MMU UNLOCK command. - */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) - mmu_invalidate(kbdev, kctx, kctx->as_nr, &op_param); - else - mmu_flush_invalidate(kbdev, kctx, kctx->as_nr, &op_param); + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); return 0; fail_unlock: mutex_unlock(&kctx->mmu.mmu_lock); - op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) - mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, kctx->as_nr, &op_param); - else - mmu_flush_invalidate(kbdev, kctx, kctx->as_nr, &op_param); + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); + kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); return err; } -static void kbase_mmu_free_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, phys_addr_t pgd, - bool dirty) -{ - struct page *p; - - lockdep_assert_held(&mmut->mmu_lock); - - p = pfn_to_page(PFN_DOWN(pgd)); - - kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], - p, dirty); - - atomic_sub(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - atomic_sub(1, &mmut->kctx->used_pages); - } - - kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); -} - u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy, unsigned long const flags, int const level, int const group_id) @@ -1863,6 +1905,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu size_t remain = nr; int err; struct kbase_mmu_mode const *mmu_mode; + LIST_HEAD(free_pgds_list); /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ @@ -1912,7 +1955,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu mutex_unlock(&mmut->mmu_lock); err = kbase_mem_pool_grow( &kbdev->mem_pools.small[mmut->group_id], - cur_level); + cur_level,mmut->kctx ? mmut->kctx->task : NULL); mutex_lock(&mmut->mmu_lock); } while (!err); @@ -1923,7 +1966,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds); + insert_vpfn, dirty_pgds, + &free_pgds_list); } goto fail_unlock; } @@ -1938,7 +1982,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds); + insert_vpfn, dirty_pgds, + &free_pgds_list); } err = -ENOMEM; goto fail_unlock; @@ -1949,20 +1994,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu if (cur_level == MIDGARD_MMU_LEVEL(2)) { int level_index = (insert_vpfn >> 9) & 0x1FF; - u64 *target = &pgd_page[level_index]; - - if (mmu_mode->pte_is_valid(*target, cur_level)) { - kbase_mmu_free_pgd( - kbdev, mmut, - kbdev->mmu_mode->pte_to_phy_addr( - kbdev->mgm_dev->ops.mgm_pte_to_original_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, - cur_level, *target)), - false); - num_of_valid_entries--; - } - *target = kbase_mmu_create_ate(kbdev, *phys, flags, - cur_level, group_id); + pgd_page[level_index] = + kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id); num_of_valid_entries++; } else { @@ -2013,10 +2046,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu kunmap(p); } - err = 0; + mutex_unlock(&mmut->mmu_lock); + return 0; fail_unlock: mutex_unlock(&mmut->mmu_lock); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds, + CALLER_MMU_ASYNC); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; } @@ -2031,8 +2068,8 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, enum kbase_caller_mmu_sync_info mmu_sync_info) { int err; - struct kbase_mmu_hw_op_param op_param = { 0 }; u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); /* Early out if there is nothing to do */ if (nr == 0) @@ -2041,27 +2078,12 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds); - op_param.vpfn = vpfn; - op_param.nr = nr; - op_param.op = KBASE_MMU_OP_FLUSH_PT; - op_param.mmu_sync_info = mmu_sync_info; - op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; - op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + if (err) + return err; - /* MMU cache flush strategy depends on whether GPU control commands for - * flushing physical address ranges are supported. The new physical pages - * are not present in GPU caches there for they don't need any cache - * maintenance, but PGDs in the page table may or may not be created anew. - * - * Operations that affect the whole GPU cache shall only be done if it's - * impossible to update physical ranges. - */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) - mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); - else - mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info); - return err; + return 0; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); @@ -2177,7 +2199,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + struct list_head *free_pgds_list) { int current_level; @@ -2185,36 +2208,34 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); current_level--) { - u64 *current_page = kmap(phys_to_page(pgds[current_level])); + phys_addr_t current_pgd = pgds[current_level]; + struct page *p = phys_to_page(current_pgd); + u64 *current_page = kmap(p); unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page); + int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; /* We need to track every level that needs updating */ if (dirty_pgds) *dirty_pgds |= 1ULL << current_level; + kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); + if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { - kunmap(phys_to_page(pgds[current_level])); + kunmap(p); - kbase_mmu_free_pgd(kbdev, mmut, pgds[current_level], - true); + list_add(&p->lru, free_pgds_list); } else { - int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; - - kbdev->mmu_mode->entry_invalidate(¤t_page[index]); - current_valid_entries--; kbdev->mmu_mode->set_num_valid_entries( current_page, current_valid_entries); + kunmap(p); - kbase_mmu_sync_pgd( - kbdev, mmut->kctx, pgds[current_level] + (index * sizeof(u64)), - kbase_dma_addr(phys_to_page(pgds[current_level])) + 8 * index, - 8 * 1, flush_op); - - kunmap(phys_to_page(pgds[current_level])); + kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), + kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), + flush_op); break; } } @@ -2286,15 +2307,15 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, int as_nr) { - phys_addr_t pgd; u64 start_vpfn = vpfn; size_t requested_nr = nr; enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; struct kbase_mmu_hw_op_param op_param; - unsigned int i; + int err = -EFAULT; u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -2332,19 +2353,19 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table u64 *page; phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; register unsigned int num_of_valid_entries; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); if (count > nr) count = nr; - /* need to check if this is a 2MB or a 4kB page */ - pgd = mmut->pgd; - + /* need to check if this is a 2MB page or a 4kB */ for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { phys_addr_t next_pgd; index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); + page = kmap(p); if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { @@ -2370,9 +2391,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table next_pgd = mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); + kunmap(p); pgds[level] = pgd; - kunmap(phys_to_page(pgd)); pgd = next_pgd; + p = phys_to_page(pgd); } switch (level) { @@ -2381,7 +2403,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table dev_warn(kbdev->dev, "%s: No support for ATEs at level %d\n", __func__, level); - kunmap(phys_to_page(pgd)); + kunmap(p); goto out; case MIDGARD_MMU_LEVEL(2): /* can only teardown if count >= 512 */ @@ -2416,30 +2438,29 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table else num_of_valid_entries -= pcount; - if (!num_of_valid_entries) { - kunmap(phys_to_page(pgd)); + /* Invalidate the entries we added */ + mmu_mode->entries_invalidate(&page[index], pcount); - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); + if (!num_of_valid_entries) { + kunmap(p); + list_add(&p->lru, &free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - flush_op, &dirty_pgds); + flush_op, &dirty_pgds, + &free_pgds_list); vpfn += count; nr -= count; continue; } - /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[index + i]); - mmu_mode->set_num_valid_entries(page, num_of_valid_entries); kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - kbase_dma_addr(phys_to_page(pgd)) + 8 * index, 8 * pcount, + kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); next: - kunmap(phys_to_page(pgd)); + kunmap(p); vpfn += count; nr -= count; } @@ -2457,6 +2478,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), }; mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); return err; } @@ -2631,41 +2653,39 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t pgd, int level) { - phys_addr_t target_pgd; u64 *pgd_page; int i; - struct kbase_mmu_mode const *mmu_mode; - u64 *pgd_page_buffer; + struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; + struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; + u64 *pgd_page_buffer = NULL; lockdep_assert_held(&mmut->mmu_lock); - /* Early-out. No need to kmap to check entries for L3 PGD. */ - if (level == MIDGARD_MMU_BOTTOMLEVEL) { - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); - return; - } - pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ if (WARN_ON(pgd_page == NULL)) return; - /* Copy the page to our preallocated buffer so that we can minimize - * kmap_atomic usage - */ - pgd_page_buffer = mmut->mmu_teardown_pages[level]; - memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); - kunmap_atomic(pgd_page); - pgd_page = pgd_page_buffer; - mmu_mode = kbdev->mmu_mode; + if (level != MIDGARD_MMU_BOTTOMLEVEL) { + /* Copy the page to our preallocated buffer so that we can minimize + * kmap_atomic usage + */ + pgd_page_buffer = mmut->mmu_teardown_pages[level]; + memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + } - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { - target_pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, - level, pgd_page[i])); + /* Invalidate page after copying */ + mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); + kunmap_atomic(pgd_page); + pgd_page = pgd_page_buffer; - if (target_pgd) { + if (level != MIDGARD_MMU_BOTTOMLEVEL) { + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { if (mmu_mode->pte_is_valid(pgd_page[i], level)) { + phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr( + mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev, + MGM_DEFAULT_PTE_GROUP, + level, pgd_page[i])); mmu_teardown_level(kbdev, mmut, target_pgd, level + 1); @@ -2673,7 +2693,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, } } - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, @@ -2712,7 +2732,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, err = kbase_mem_pool_grow( &kbdev->mem_pools.small[mmut->group_id], - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL); if (err) { kbase_mmu_term(kbdev, mmut); return -ENOMEM; @@ -2730,6 +2750,10 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { int level; + WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", + mmut->kctx->tgid, mmut->kctx->id); + if (mmut->pgd) { mutex_lock(&mmut->mmu_lock); mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); @@ -2754,6 +2778,7 @@ void kbase_mmu_as_term(struct kbase_device *kbdev, int i) destroy_workqueue(kbdev->as[i].pf_wq); } +#if defined(CONFIG_MALI_VECTOR_DUMP) static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) { @@ -2895,6 +2920,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ void kbase_mmu_bus_fault_worker(struct work_struct *data) { diff --git a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu_mode_aarch64.c index dfbdee17782b..fcbccae40ffe 100644 --- a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu_mode_aarch64.c @@ -194,25 +194,26 @@ static void entry_set_pte(u64 *entry, phys_addr_t phy) page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); } -static void entry_invalidate(u64 *entry) +static void entries_invalidate(u64 *entry, u32 count) { - page_table_entry_set(entry, ENTRY_IS_INVAL); + u32 i; + + for (i = 0; i < count; i++) + page_table_entry_set(entry + i, ENTRY_IS_INVAL); } -static const struct kbase_mmu_mode aarch64_mode = { - .update = mmu_update, - .get_as_setup = kbase_mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .get_num_valid_entries = get_num_valid_entries, - .set_num_valid_entries = set_num_valid_entries, - .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE -}; +static const struct kbase_mmu_mode aarch64_mode = { .update = mmu_update, + .get_as_setup = kbase_mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entries_invalidate = entries_invalidate, + .get_num_valid_entries = get_num_valid_entries, + .set_num_valid_entries = set_num_valid_entries, + .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE }; struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) { diff --git a/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.c b/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.c index 9fd463eb6962..f11a6e8a5dcd 100644 --- a/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.c +++ b/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.c @@ -192,6 +192,8 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { int rcode; + if (!timeline_is_permitted()) + return -EPERM; #if MALI_USE_CSF if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { diff --git a/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.h b/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.h index 96a4b181a285..10f286a45a75 100644 --- a/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.h +++ b/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline.h @@ -117,4 +117,6 @@ void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); #endif /* MALI_UNIT_TEST */ +bool timeline_is_permitted(void); + #endif /* _KBASE_TIMELINE_H */ diff --git a/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline_io.c index af8b3d8c8c35..33e4f4fab7a6 100644 --- a/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bv_r38p1/tl/mali_kbase_timeline_io.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,59 @@ #include #include +#ifndef MALI_STRIP_KBASE_DEVELOPMENT +/* Development builds need to test instrumentation and enable unprivileged + * processes to acquire timeline streams, in order to avoid complications + * with configurations across multiple platforms and systems. + * + * Release builds, instead, shall deny access to unprivileged processes + * because there are no use cases where they are allowed to acquire timeline + * streams, unless they're given special permissions by a privileged process. + */ +static int kbase_unprivileged_global_profiling = 1; +#else +static int kbase_unprivileged_global_profiling; +#endif + +/** + * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes + * + * @val: String containing value to set. Only strings representing positive + * integers are accepted as valid; any non-positive integer (including 0) + * is rejected. + * @kp: Module parameter associated with this method. + * + * This method can only be used to enable permissions for unprivileged processes, + * if they are disabled: for this reason, the only values which are accepted are + * strings representing positive integers. Since it's impossible to disable + * permissions once they're set, any integer which is non-positive is rejected, + * including 0. + * + * Return: 0 if success, otherwise error code. + */ +static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp) +{ + int new_val; + int ret = kstrtoint(val, 0, &new_val); + + if (ret == 0) { + if (new_val < 1) + return -EINVAL; + + kbase_unprivileged_global_profiling = 1; + } + + return ret; +} + +static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = { + .get = param_get_int, + .set = kbase_unprivileged_global_profiling_set, +}; + +module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops, + &kbase_unprivileged_global_profiling, 0600); + /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, size_t size, loff_t *f_pos); @@ -45,6 +98,15 @@ const struct file_operations kbasep_tlstream_fops = { .fsync = kbasep_timeline_io_fsync, }; +bool timeline_is_permitted(void) +{ +#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE + return kbase_unprivileged_global_profiling || perfmon_capable(); +#else + return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN); +#endif +} + /** * kbasep_timeline_io_packet_pending - check timeline streams for pending * packets diff --git a/include/uapi/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_regmap_jm.h b/include/uapi/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_regmap_jm.h index 1be3541c930f..a5d8303f5a10 100644 --- a/include/uapi/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/include/uapi/gpu/arm/bv_r32p1/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -261,6 +261,12 @@ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ #define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ +#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES +#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES +/* Merge cache flush commands */ +#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ + ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) + /* IRQ flags */ #define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ #define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ @@ -269,6 +275,8 @@ #define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ #define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ #define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define FLUSH_PA_RANGE_COMPLETED \ + (1 << 20) /* Set when a physical range cache clean operation has completed. */ /* * In Debug build, diff --git a/include/uapi/gpu/arm/bv_r32p1/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bv_r32p1/jm/mali_kbase_jm_ioctl.h index 72d75cb34ec5..2fed03f0a119 100644 --- a/include/uapi/gpu/arm/bv_r32p1/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bv_r32p1/jm/mali_kbase_jm_ioctl.h @@ -119,6 +119,8 @@ * 11.31: * - Added BASE_JD_REQ_LIMITED_CORE_MASK. * - Added ioctl 55: set_limited_core_count. + * 11.38: + * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 11 #define BASE_UK_VERSION_MINOR 31 diff --git a/include/uapi/gpu/arm/bv_r38p1/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bv_r38p1/jm/mali_kbase_jm_ioctl.h index 20d931adc9b8..454249d6226e 100644 --- a/include/uapi/gpu/arm/bv_r38p1/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bv_r38p1/jm/mali_kbase_jm_ioctl.h @@ -127,6 +127,8 @@ * - First release of new HW performance counters interface. * 11.35: * - Dummy model (no mali) backend will now clear HWC values after each sample + * 11.38: + * - CPU mappings of USER_BUFFER imported memory handles must be cached. */ #define BASE_UK_VERSION_MAJOR 11 #define BASE_UK_VERSION_MINOR 35