Skip to content

Commit 3f001ea

Browse files
authored
Merge pull request #22 from cybergaszcz/nvdla
Added nvdla driver
2 parents e865a75 + 164b938 commit 3f001ea

33 files changed

+32611
-0
lines changed

drivers/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,4 +235,6 @@ source "drivers/interconnect/Kconfig"
235235
source "drivers/counter/Kconfig"
236236

237237
source "drivers/most/Kconfig"
238+
239+
source "drivers/nvdla/Kconfig"
238240
endmenu

drivers/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,4 @@ obj-$(CONFIG_GNSS) += gnss/
189189
obj-$(CONFIG_INTERCONNECT) += interconnect/
190190
obj-$(CONFIG_COUNTER) += counter/
191191
obj-$(CONFIG_MOST) += most/
192+
obj-$(CONFIG_NVDLA) += nvdla/

drivers/nvdla/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
config NVDLA
2+
bool "The NVIDIA Deep Learning Accelerator"
3+
default y
4+
depends on DRM
5+
select DRM_GEM_CMA_HELPER

drivers/nvdla/Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
ccflags-$(CONFIG_NVDLA) += -Idrivers/nvdla
3+
ccflags-$(CONFIG_NVDLA) += -Idrivers/nvdla/include
4+
5+
obj-$(CONFIG_NVDLA) += scheduler.o
6+
obj-$(CONFIG_NVDLA) += engine.o
7+
obj-$(CONFIG_NVDLA) += bdma.o
8+
obj-$(CONFIG_NVDLA) += conv.o
9+
obj-$(CONFIG_NVDLA) += sdp.o
10+
obj-$(CONFIG_NVDLA) += cdp.o
11+
obj-$(CONFIG_NVDLA) += pdp.o
12+
obj-$(CONFIG_NVDLA) += rubik.o
13+
obj-$(CONFIG_NVDLA) += cache.o
14+
obj-$(CONFIG_NVDLA) += common.o
15+
obj-$(CONFIG_NVDLA) += engine_data.o
16+
obj-$(CONFIG_NVDLA) += engine_isr.o
17+
obj-$(CONFIG_NVDLA) += engine_debug.o
18+
obj-$(CONFIG_NVDLA) += nvdla_core_callbacks.o
19+
obj-$(CONFIG_NVDLA) += nvdla_gem.o

drivers/nvdla/bdma.c

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
/*
2+
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions
6+
* are met:
7+
* * Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* * Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in the
11+
* documentation and/or other materials provided with the distribution.
12+
* * Neither the name of NVIDIA CORPORATION nor the names of its
13+
* contributors may be used to endorse or promote products derived
14+
* from this software without specific prior written permission.
15+
*
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
*/
28+
29+
#include <opendla.h>
30+
#include <dla_debug.h>
31+
#include <dla_err.h>
32+
#include <dla_interface.h>
33+
34+
#include "dla_engine_internal.h"
35+
#include "engine_debug.h"
36+
37+
static const uint8_t map_mem[] = {
38+
FIELD_ENUM(BDMA_CFG_CMD_0, SRC_RAM_TYPE, MC),
39+
FIELD_ENUM(BDMA_CFG_CMD_0, SRC_RAM_TYPE, CVSRAM),
40+
};
41+
42+
#if STAT_ENABLE
43+
void
44+
dla_bdma_stat_data(struct dla_processor *processor,
45+
struct dla_processor_group *group)
46+
{
47+
uint64_t end_time = 0;
48+
struct dla_bdma_stat_desc *bdma_stat;
49+
50+
bdma_stat = &processor->stat_data_desc->bdma_stat;
51+
52+
end_time = dla_get_time_us();
53+
54+
if (group->id == (uint32_t)0) {
55+
bdma_stat->read_stall = bdma_reg_read(STATUS_GRP0_READ_STALL);
56+
bdma_stat->write_stall = bdma_reg_read(STATUS_GRP0_WRITE_STALL);
57+
} else {
58+
bdma_stat->read_stall = bdma_reg_read(STATUS_GRP1_READ_STALL);
59+
bdma_stat->write_stall = bdma_reg_read(STATUS_GRP1_WRITE_STALL);
60+
}
61+
bdma_stat->runtime = (uint32_t)(end_time - group->start_time);
62+
}
63+
64+
void
65+
dla_bdma_dump_stat(struct dla_processor *processor)
66+
{
67+
struct dla_bdma_stat_desc *bdma_stat;
68+
69+
bdma_stat = &processor->stat_data_desc->bdma_stat;
70+
71+
dla_debug_bdma_stats(bdma_stat);
72+
}
73+
#endif /* STAT_ENABLE */
74+
75+
void
76+
dla_bdma_set_producer(int32_t group_id, int32_t rdma_group_id)
77+
{
78+
/**
79+
* There is no producer bit for BDMA operation,
80+
* interrupt pointer decides which outstanding request
81+
* to use for this BDMA operation
82+
*/
83+
}
84+
85+
int
86+
dla_bdma_enable(struct dla_processor_group *group)
87+
{
88+
struct dla_engine *engine = dla_get_engine();
89+
90+
dla_debug("Enter: %s\n", __func__);
91+
92+
if (group->surface_desc->bdma_surface.num_transfers == (uint16_t)0) {
93+
group->events |= ((uint8_t)1 << DLA_EVENT_OP_COMPLETED);
94+
goto exit;
95+
}
96+
97+
if (engine->stat_enable == (uint32_t)1) {
98+
bdma_reg_write(CFG_STATUS, FIELD_ENUM(BDMA_CFG_STATUS_0,
99+
STALL_COUNT_EN, YES));
100+
group->start_time = dla_get_time_us();
101+
}
102+
103+
/**
104+
* Launch BDMA transfer
105+
*/
106+
if (group->id == 0)
107+
bdma_reg_write(CFG_LAUNCH0, FIELD_ENUM(BDMA_CFG_LAUNCH0_0,
108+
GRP0_LAUNCH, YES));
109+
else
110+
bdma_reg_write(CFG_LAUNCH1, FIELD_ENUM(BDMA_CFG_LAUNCH1_0,
111+
GRP1_LAUNCH, YES));
112+
113+
exit:
114+
dla_debug("Exit: %s\n", __func__);
115+
return 0;
116+
}
117+
118+
void
119+
dla_bdma_rdma_check(struct dla_processor_group *group)
120+
{
121+
group->is_rdma_needed = 0;
122+
}
123+
124+
/**
125+
* Program BDMA slot for transfer
126+
*/
127+
static int32_t
128+
processor_bdma_program_slot(struct dla_bdma_surface_desc *bdma_surface,
129+
struct dla_bdma_transfer_desc *transfer)
130+
{
131+
int32_t ret = 0;
132+
uint64_t source_addr = 0;
133+
uint64_t destination_addr = 0;
134+
uint32_t high, low, reg;
135+
uint8_t bdma_free_slots = 0;
136+
uint64_t bdma_len = 0;
137+
struct dla_engine *engine = dla_get_engine();
138+
139+
dla_debug("Enter: %s\n", __func__);
140+
141+
/* make sure there're enough free slots */
142+
if (bdma_free_slots <= 0) {
143+
do {
144+
reg = bdma_reg_read(STATUS);
145+
reg = (reg & MASK(BDMA_STATUS_0, FREE_SLOT)) >>
146+
SHIFT(BDMA_STATUS_0, FREE_SLOT);
147+
} while (reg == 0);
148+
bdma_free_slots = (uint8_t)reg;
149+
}
150+
151+
dla_get_dma_address(engine->driver_context, engine->task->task_data,
152+
transfer->source_address,
153+
(void *)&source_addr,
154+
DESTINATION_DMA);
155+
dla_get_dma_address(engine->driver_context, engine->task->task_data,
156+
transfer->destination_address,
157+
(void *)&destination_addr,
158+
DESTINATION_DMA);
159+
160+
ASSERT_GOTO((transfer->line_repeat <= 8192),
161+
ret, ERR(INVALID_INPUT), exit);
162+
ASSERT_GOTO((transfer->surface_repeat <= 8192),
163+
ret, ERR(INVALID_INPUT), exit);
164+
ASSERT_GOTO((transfer->line_size % 32) == 0,
165+
ret, ERR(INVALID_INPUT), exit);
166+
ASSERT_GOTO(transfer->source_line >= transfer->line_size,
167+
ret, ERR(INVALID_INPUT), exit);
168+
ASSERT_GOTO(transfer->destination_line >= transfer->line_size,
169+
ret, ERR(INVALID_INPUT), exit);
170+
ASSERT_GOTO(transfer->source_surface >=
171+
(transfer->source_line * transfer->line_repeat),
172+
ret, ERR(INVALID_INPUT), exit);
173+
ASSERT_GOTO(transfer->destination_surface >=
174+
(transfer->destination_line * transfer->line_repeat),
175+
ret, ERR(INVALID_INPUT), exit);
176+
177+
/* config registers */
178+
high = HIGH32BITS(source_addr);
179+
low = LOW32BITS(source_addr);
180+
bdma_reg_write(CFG_SRC_ADDR_LOW, low);
181+
bdma_reg_write(CFG_SRC_ADDR_HIGH, high);
182+
high = HIGH32BITS(destination_addr);
183+
low = LOW32BITS(destination_addr);
184+
bdma_reg_write(CFG_DST_ADDR_LOW, low);
185+
bdma_reg_write(CFG_DST_ADDR_HIGH, high);
186+
bdma_reg_write(CFG_LINE, (transfer->line_size >> 5) - 1);
187+
reg = (map_mem[bdma_surface->source_type] <<
188+
SHIFT(BDMA_CFG_CMD_0, SRC_RAM_TYPE)) |
189+
(map_mem[bdma_surface->destination_type] <<
190+
SHIFT(BDMA_CFG_CMD_0, DST_RAM_TYPE));
191+
bdma_reg_write(CFG_CMD, reg);
192+
bdma_reg_write(CFG_LINE_REPEAT, transfer->line_repeat - 1);
193+
bdma_reg_write(CFG_SRC_LINE, transfer->source_line);
194+
bdma_reg_write(CFG_DST_LINE, transfer->destination_line);
195+
bdma_reg_write(CFG_SURF_REPEAT, transfer->surface_repeat - 1);
196+
bdma_reg_write(CFG_SRC_SURF, transfer->source_surface);
197+
bdma_reg_write(CFG_DST_SURF, transfer->destination_surface);
198+
bdma_reg_write(CFG_OP, FIELD_ENUM(BDMA_CFG_OP_0, EN, ENABLE));
199+
200+
#ifdef CONFIG_NVDLA_NEED_FLUSH
201+
bdma_len = (transfer->surface_repeat) * (transfer->source_surface); //???
202+
nvdla_flush_dcache((unsigned long)source_addr,bdma_len);
203+
nvdla_flush_dcache((unsigned long)destination_addr,bdma_len);
204+
dla_info("%s():surface_repeat:%#x,source_surface:%#x bdmalen:%#llx\n",__func__,transfer->surface_repeat,transfer->source_surface,bdma_len);
205+
dla_info("%s():dma_src_addr:%#llx,dma_dst_addr:%#llx \n",__func__,source_addr,destination_addr);
206+
#endif
207+
208+
dla_debug("Exit: %s\n", __func__);
209+
210+
exit:
211+
RETURN(ret);
212+
}
213+
214+
int
215+
dla_bdma_is_ready(struct dla_processor *processor,
216+
struct dla_processor_group *group)
217+
{
218+
struct dla_processor_group *next_group;
219+
220+
next_group = &processor->groups[!group->id];
221+
222+
/**
223+
* If another group is already programmed but not active then
224+
* do not program this operation as BDMA does not really
225+
* have shadow copies for groups. It will end programming
226+
* same group. Wait for another group to get enabled.
227+
*/
228+
if ((processor->group_status & (1 << next_group->id)) &&
229+
!next_group->active)
230+
return 0;
231+
232+
return 1;
233+
}
234+
235+
void
236+
dla_bdma_dump_config(struct dla_processor_group *group)
237+
{
238+
struct dla_bdma_op_desc *bdma_op;
239+
struct dla_bdma_surface_desc *bdma_surface;
240+
241+
bdma_surface = &group->surface_desc->bdma_surface;
242+
bdma_op = &group->operation_desc->bdma_op;
243+
244+
dla_debug_bdma_surface_desc(bdma_surface, group->roi_index);
245+
dla_debug_bdma_op_desc(bdma_op, group->roi_index);
246+
}
247+
248+
int
249+
dla_bdma_program(struct dla_processor_group *group)
250+
{
251+
int32_t i;
252+
int32_t ret = 0;
253+
struct dla_bdma_surface_desc *bdma_surface;
254+
struct dla_engine *engine = dla_get_engine();
255+
256+
dla_debug("Enter: %s\n", __func__);
257+
258+
if (!engine->config_data->bdma_enable) {
259+
dla_error("BDMA is not supported for this configuration\n");
260+
ret = ERR(INVALID_INPUT);
261+
goto exit;
262+
}
263+
264+
bdma_surface = &group->surface_desc->bdma_surface;
265+
266+
dla_debug("Num of transfers %u\n", bdma_surface->num_transfers);
267+
if (bdma_surface->num_transfers == (uint16_t)0)
268+
goto exit;
269+
270+
if (bdma_surface->num_transfers > NUM_MAX_BDMA_OPS) {
271+
dla_error("Invalid number of transfers\n");
272+
ret = ERR(INVALID_INPUT);
273+
goto exit;
274+
}
275+
276+
for (i = 0; i < bdma_surface->num_transfers; i++) {
277+
ret = processor_bdma_program_slot(bdma_surface,
278+
&bdma_surface->transfers[i]);
279+
if (ret)
280+
goto exit;
281+
}
282+
283+
dla_enable_intr(MASK(GLB_S_INTR_MASK_0, BDMA_DONE_MASK1) |
284+
MASK(GLB_S_INTR_MASK_0, BDMA_DONE_MASK0));
285+
286+
exit:
287+
dla_debug("Exit: %s\n", __func__);
288+
RETURN(ret);
289+
}

0 commit comments

Comments
 (0)