Skip to content

Commit be9f480

Browse files
committed
One can write such tests if one wants
Signed-off-by: Krzysztof Lecki <klecki@nvidia.com>
1 parent 540d6d5 commit be9f480

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

dali/pipeline/executor/executor_test.cc

+104
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <chrono>
1818
#include <future>
1919

20+
#include "dali/core/tensor_shape.h"
21+
#include "dali/pipeline/data/backend.h"
2022
#include "dali/test/dali_test_decoder.h"
2123
#include "dali/pipeline/executor/executor.h"
2224
#include "dali/pipeline/executor/pipelined_executor.h"
@@ -603,4 +605,106 @@ TYPED_TEST(ExecutorSyncTest, TestPrefetchedExecution) {
603605
test::CheckResults(ws, batch_size, 1, tl);
604606
}
605607

608+
609+
TYPED_TEST(ExecutorTest, TestPinning) {
610+
auto exe = this->GetExecutor(this->batch_size_, this->num_threads_, 0, 1);
611+
exe->Init();
612+
613+
// Build a basic cpu->gpu graph
614+
OpGraph graph;
615+
graph.AddOp(this->PrepareSpec(
616+
OpSpec("ExternalSource")
617+
.AddArg("device", "cpu")
618+
.AddArg("device_id", 0)
619+
.AddOutput("data_0", "cpu")), "ExternalSource_0");
620+
621+
// First set of Copy + Copy and Pass Through
622+
graph.AddOp(this->PrepareSpec(
623+
OpSpec("Copy")
624+
.AddArg("device", "cpu")
625+
.AddInput("data_0", "cpu")
626+
.AddOutput("copy_0", "cpu")), "Copy_0");
627+
628+
graph.AddOp(this->PrepareSpec(
629+
OpSpec("Copy")
630+
.AddArg("device", "cpu")
631+
.AddInput("data_0", "cpu")
632+
.AddOutput("copy_1", "cpu")), "Copy_1");
633+
634+
graph.AddOp(this->PrepareSpec(
635+
OpSpec("Reshape")
636+
.AddArg("device", "cpu")
637+
.AddArg("layout", "")
638+
.AddInput("copy_0", "cpu")
639+
.AddOutput("pass_through_0", "cpu")), "PassThrough_0");
640+
641+
// Trigger pinning of first set when it moves CPU -> GPU
642+
graph.AddOp(this->PrepareSpec(
643+
OpSpec("MakeContiguous")
644+
.AddArg("device", "mixed")
645+
.AddInput("pass_through_0", "cpu")
646+
.AddOutput("out_0", "gpu")), "MakeContiguous_0");
647+
648+
// but not the Copy_1 to compare against
649+
graph.AddOp(this->PrepareSpec(
650+
OpSpec("MakeContiguous")
651+
.AddArg("device", "mixed")
652+
.AddInput("copy_1", "cpu")
653+
.AddOutput("out_1", "cpu")), "MakeContiguous_1");
654+
655+
656+
// Second set of Copy and Pass Through
657+
graph.AddOp(this->PrepareSpec(
658+
OpSpec("Copy")
659+
.AddArg("device", "cpu")
660+
.AddInput("data_0", "cpu")
661+
.AddOutput("copy_2", "cpu")), "Copy_2");
662+
663+
graph.AddOp(this->PrepareSpec(
664+
OpSpec("Reshape")
665+
.AddArg("device", "cpu")
666+
.AddArg("layout", "")
667+
.AddInput("copy_2", "cpu")
668+
.AddOutput("pass_through_1", "cpu")), "PassThrough_1");
669+
670+
// Check pinning argument inputs to operators in GPU stage
671+
graph.AddOp(this->PrepareSpec(
672+
OpSpec("random__CoinFlip")
673+
.AddArg("device", "gpu")
674+
.AddArgumentInput("probability", "pass_through_1")
675+
.AddOutput("out_2", "gpu")), "CoinFlip");
676+
677+
678+
graph.SaveToDotFile("cheating.dot", true, true, true);
679+
vector<string> outputs = {"copy_0_cpu", "copy_1_cpu", "pass_through_0_cpu", "copy_2_cpu",
680+
"pass_through_1_cpu", "out_0_gpu", "out_1_cpu", "out_2_gpu"};
681+
682+
exe->Build(&graph, outputs);
683+
684+
// Set the data for the external source
685+
auto *src_op = dynamic_cast<ExternalSource<CPUBackend> *>(graph.Node(OpType::CPU, 0).op.get());
686+
TensorList<CPUBackend> tl;
687+
tl.Resize(uniform_list_shape(this->batch_size_, TensorShape<>{}), DALI_FLOAT);
688+
src_op->SetDataSource(tl);
689+
690+
exe->RunCPU();
691+
exe->RunMixed();
692+
exe->RunGPU();
693+
694+
DeviceWorkspace ws;
695+
exe->Outputs(&ws);
696+
697+
// Utilize the fact that the outputs are shared from the executor, so we can check if they are
698+
// pinned in a way we expect
699+
// Currently we expect to pin anything that is CPU argument input into GPU operator, and
700+
// is a CPU -> GPU copy (not via a decoder), so CPU input to Mixed operator that returns GPU data.
701+
// The whole pass-through group should be pinned as well.
702+
703+
EXPECT_TRUE(ws.Output<CPUBackend>(0).is_pinned()); // copy_0_cpu
704+
EXPECT_FALSE(ws.Output<CPUBackend>(1).is_pinned()); // copy_1_cpu
705+
EXPECT_TRUE(ws.Output<CPUBackend>(2).is_pinned()); // pass_through_0_cpu
706+
EXPECT_TRUE(ws.Output<CPUBackend>(3).is_pinned()); // copy_2_cpu
707+
EXPECT_TRUE(ws.Output<CPUBackend>(4).is_pinned()); // pass_through_1_cpu
708+
}
709+
606710
} // namespace dali

0 commit comments

Comments
 (0)