@@ -585,6 +585,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
585
585
586
586
int hl_mmu_if_set_funcs (struct hl_device * hdev )
587
587
{
588
+ struct asic_fixed_properties * prop = & hdev -> asic_prop ;
589
+
588
590
if (hdev -> mmu_disable )
589
591
return 0 ;
590
592
@@ -597,8 +599,9 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
597
599
case ASIC_GAUDI2 :
598
600
case ASIC_GAUDI2B :
599
601
case ASIC_GAUDI2C :
600
- /* MMUs in Gaudi2 are always host resident */
601
- hl_mmu_v2_hr_set_funcs (hdev , & hdev -> mmu_func [MMU_HR_PGT ]);
602
+ hl_mmu_v2_set_funcs (hdev , & hdev -> mmu_func [MMU_DR_PGT ]);
603
+ if (prop -> pmmu .host_resident )
604
+ hl_mmu_v2_hr_set_funcs (hdev , & hdev -> mmu_func [MMU_HR_PGT ]);
602
605
break ;
603
606
default :
604
607
dev_err (hdev -> dev , "Unrecognized ASIC type %d\n" ,
@@ -1209,3 +1212,219 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
1209
1212
return 0 ;
1210
1213
}
1211
1214
1215
+ struct pgt_info * hl_mmu_dr_get_pgt_info (struct hl_ctx * ctx , u64 hop_addr )
1216
+ {
1217
+ struct pgt_info * pgt_info = NULL ;
1218
+
1219
+ hash_for_each_possible (ctx -> mmu_shadow_hash , pgt_info , node ,
1220
+ (unsigned long ) hop_addr )
1221
+ if (hop_addr == pgt_info -> shadow_addr )
1222
+ break ;
1223
+
1224
+ return pgt_info ;
1225
+ }
1226
+
1227
+ void hl_mmu_dr_free_hop (struct hl_ctx * ctx , u64 hop_addr )
1228
+ {
1229
+ struct pgt_info * pgt_info = hl_mmu_dr_get_pgt_info (ctx , hop_addr );
1230
+
1231
+ hl_mmu_dr_free_pgt_node (ctx , pgt_info );
1232
+ }
1233
+
1234
+ void hl_mmu_dr_free_pgt_node (struct hl_ctx * ctx , struct pgt_info * pgt_info )
1235
+ {
1236
+ struct hl_device * hdev = ctx -> hdev ;
1237
+
1238
+ gen_pool_free (hdev -> mmu_priv .dr .mmu_pgt_pool , pgt_info -> phys_addr ,
1239
+ hdev -> asic_prop .mmu_hop_table_size );
1240
+ hash_del (& pgt_info -> node );
1241
+ kfree ((u64 * ) (uintptr_t ) pgt_info -> shadow_addr );
1242
+ kfree (pgt_info );
1243
+ }
1244
+
1245
+ u64 hl_mmu_dr_get_phys_hop0_addr (struct hl_ctx * ctx )
1246
+ {
1247
+ return ctx -> hdev -> asic_prop .mmu_pgt_addr +
1248
+ (ctx -> asid * ctx -> hdev -> asic_prop .mmu_hop_table_size );
1249
+ }
1250
+
1251
+ u64 hl_mmu_dr_get_hop0_addr (struct hl_ctx * ctx )
1252
+ {
1253
+ return (u64 ) (uintptr_t ) ctx -> hdev -> mmu_priv .dr .mmu_shadow_hop0 +
1254
+ (ctx -> asid * ctx -> hdev -> asic_prop .mmu_hop_table_size );
1255
+ }
1256
+
1257
+ u64 hl_mmu_dr_get_phys_addr (struct hl_ctx * ctx , u64 shadow_addr )
1258
+ {
1259
+ u64 page_mask = ctx -> hdev -> asic_prop .mmu_hop_table_size - 1 ;
1260
+ u64 shadow_hop_addr = shadow_addr & (~page_mask );
1261
+ u64 pte_offset = shadow_addr & page_mask ;
1262
+ u64 phys_hop_addr ;
1263
+
1264
+ if (shadow_hop_addr != hl_mmu_dr_get_hop0_addr (ctx ))
1265
+ phys_hop_addr = hl_mmu_dr_get_pgt_info (ctx , shadow_hop_addr )-> phys_addr ;
1266
+ else
1267
+ phys_hop_addr = hl_mmu_dr_get_phys_hop0_addr (ctx );
1268
+
1269
+ return phys_hop_addr + pte_offset ;
1270
+ }
1271
+
1272
+ void hl_mmu_dr_write_pte (struct hl_ctx * ctx , u64 shadow_pte_addr , u64 val )
1273
+ {
1274
+ u64 phys_val = hl_mmu_dr_get_phys_addr (ctx , val );
1275
+
1276
+ ctx -> hdev -> asic_funcs -> write_pte (ctx -> hdev , hl_mmu_dr_get_phys_addr (ctx , shadow_pte_addr ),
1277
+ phys_val );
1278
+
1279
+ * (u64 * ) (uintptr_t ) shadow_pte_addr = val ;
1280
+ }
1281
+
1282
+ void hl_mmu_dr_write_final_pte (struct hl_ctx * ctx , u64 shadow_pte_addr , u64 val )
1283
+ {
1284
+ ctx -> hdev -> asic_funcs -> write_pte (ctx -> hdev ,
1285
+ hl_mmu_dr_get_phys_addr (ctx , shadow_pte_addr ), val );
1286
+ * (u64 * ) (uintptr_t ) shadow_pte_addr = val ;
1287
+ }
1288
+
1289
+ void hl_mmu_dr_clear_pte (struct hl_ctx * ctx , u64 pte_addr )
1290
+ {
1291
+ hl_mmu_dr_write_final_pte (ctx , pte_addr , 0 );
1292
+ }
1293
+
1294
+ void hl_mmu_dr_get_pte (struct hl_ctx * ctx , u64 hop_addr )
1295
+ {
1296
+ hl_mmu_dr_get_pgt_info (ctx , hop_addr )-> num_of_ptes ++ ;
1297
+ }
1298
+
1299
+ int hl_mmu_dr_put_pte (struct hl_ctx * ctx , u64 hop_addr )
1300
+ {
1301
+ struct pgt_info * pgt_info = hl_mmu_dr_get_pgt_info (ctx , hop_addr );
1302
+ int num_of_ptes_left ;
1303
+
1304
+ pgt_info -> num_of_ptes -- ;
1305
+
1306
+ /*
1307
+ * Need to save the number of ptes left because hl_mmu_free_hop might free
1308
+ * the pgt_info
1309
+ */
1310
+ num_of_ptes_left = pgt_info -> num_of_ptes ;
1311
+ if (!num_of_ptes_left )
1312
+ hl_mmu_dr_free_pgt_node (ctx , pgt_info );
1313
+
1314
+ return num_of_ptes_left ;
1315
+ }
1316
+
1317
+ u64 hl_mmu_dr_alloc_hop (struct hl_ctx * ctx )
1318
+ {
1319
+ struct hl_device * hdev = ctx -> hdev ;
1320
+ struct asic_fixed_properties * prop = & hdev -> asic_prop ;
1321
+ struct pgt_info * pgt_info ;
1322
+ u64 phys_addr , shadow_addr ;
1323
+
1324
+ pgt_info = kmalloc (sizeof (* pgt_info ), GFP_KERNEL );
1325
+ if (!pgt_info )
1326
+ return ULLONG_MAX ;
1327
+
1328
+ phys_addr = (u64 ) gen_pool_alloc (hdev -> mmu_priv .dr .mmu_pgt_pool ,
1329
+ prop -> mmu_hop_table_size );
1330
+ if (!phys_addr ) {
1331
+ dev_err (hdev -> dev , "failed to allocate page\n" );
1332
+ goto pool_add_err ;
1333
+ }
1334
+
1335
+ shadow_addr = (u64 ) (uintptr_t ) kzalloc (prop -> mmu_hop_table_size ,
1336
+ GFP_KERNEL );
1337
+ if (!shadow_addr )
1338
+ goto shadow_err ;
1339
+
1340
+ pgt_info -> phys_addr = phys_addr ;
1341
+ pgt_info -> shadow_addr = shadow_addr ;
1342
+ pgt_info -> ctx = ctx ;
1343
+ pgt_info -> num_of_ptes = 0 ;
1344
+ hash_add (ctx -> mmu_shadow_hash , & pgt_info -> node , shadow_addr );
1345
+
1346
+ return shadow_addr ;
1347
+
1348
+ shadow_err :
1349
+ gen_pool_free (hdev -> mmu_priv .dr .mmu_pgt_pool ,
1350
+ phys_addr , prop -> mmu_hop_table_size );
1351
+ pool_add_err :
1352
+ kfree (pgt_info );
1353
+
1354
+ return ULLONG_MAX ;
1355
+ }
1356
+
1357
+ u64 hl_mmu_dr_get_alloc_next_hop_addr (struct hl_ctx * ctx , u64 curr_pte , bool * is_new_hop )
1358
+ {
1359
+ u64 hop_addr = hl_mmu_get_next_hop_addr (ctx , curr_pte );
1360
+
1361
+ if (hop_addr == ULLONG_MAX ) {
1362
+ hop_addr = hl_mmu_dr_alloc_hop (ctx );
1363
+ * is_new_hop = (hop_addr != ULLONG_MAX );
1364
+ }
1365
+
1366
+ return hop_addr ;
1367
+ }
1368
+
1369
+ void hl_mmu_dr_flush (struct hl_ctx * ctx )
1370
+ {
1371
+ /* flush all writes from all cores to reach PCI */
1372
+ mb ();
1373
+ ctx -> hdev -> asic_funcs -> read_pte (ctx -> hdev , hl_mmu_dr_get_phys_hop0_addr (ctx ));
1374
+ }
1375
+
1376
+ int hl_mmu_dr_init (struct hl_device * hdev )
1377
+ {
1378
+ struct asic_fixed_properties * prop = & hdev -> asic_prop ;
1379
+ int rc ;
1380
+
1381
+ hdev -> mmu_priv .dr .mmu_pgt_pool =
1382
+ gen_pool_create (__ffs (prop -> mmu_hop_table_size ), -1 );
1383
+
1384
+ if (!hdev -> mmu_priv .dr .mmu_pgt_pool ) {
1385
+ dev_err (hdev -> dev , "Failed to create page gen pool\n" );
1386
+ return - ENOMEM ;
1387
+ }
1388
+
1389
+ rc = gen_pool_add (hdev -> mmu_priv .dr .mmu_pgt_pool , prop -> mmu_pgt_addr +
1390
+ prop -> mmu_hop0_tables_total_size ,
1391
+ prop -> dmmu .pgt_size - prop -> mmu_hop0_tables_total_size ,
1392
+ -1 );
1393
+ if (rc ) {
1394
+ dev_err (hdev -> dev , "Failed to add memory to page gen pool\n" );
1395
+ goto err_pool_add ;
1396
+ }
1397
+
1398
+ hdev -> mmu_priv .dr .mmu_shadow_hop0 = kvcalloc (prop -> max_asid ,
1399
+ prop -> mmu_hop_table_size , GFP_KERNEL );
1400
+ if (ZERO_OR_NULL_PTR (hdev -> mmu_priv .dr .mmu_shadow_hop0 )) {
1401
+ rc = - ENOMEM ;
1402
+ goto err_pool_add ;
1403
+ }
1404
+
1405
+ /* MMU H/W init will be done in device hw_init() */
1406
+
1407
+ return 0 ;
1408
+
1409
+ err_pool_add :
1410
+ gen_pool_destroy (hdev -> mmu_priv .dr .mmu_pgt_pool );
1411
+
1412
+ return rc ;
1413
+ }
1414
+
1415
+ void hl_mmu_dr_fini (struct hl_device * hdev )
1416
+ {
1417
+ /* MMU H/W fini was already done in device hw_fini() */
1418
+
1419
+ if (ZERO_OR_NULL_PTR (hdev -> mmu_priv .dr .mmu_shadow_hop0 ))
1420
+ return ;
1421
+
1422
+ kvfree (hdev -> mmu_priv .dr .mmu_shadow_hop0 );
1423
+ gen_pool_destroy (hdev -> mmu_priv .dr .mmu_pgt_pool );
1424
+
1425
+ /* Make sure that if we arrive here again without init was
1426
+ * called we won't cause kernel panic. This can happen for
1427
+ * example if we fail during hard reset code at certain points
1428
+ */
1429
+ hdev -> mmu_priv .dr .mmu_shadow_hop0 = NULL ;
1430
+ }
0 commit comments