Skip to content

Commit

Permalink
Merge branch 'main' into main2
Browse files Browse the repository at this point in the history
  • Loading branch information
sraikund16 authored Oct 10, 2024
2 parents 366ec1f + 00a00e0 commit f8c6e6f
Show file tree
Hide file tree
Showing 107 changed files with 3,080 additions and 2,369 deletions.
7 changes: 5 additions & 2 deletions libkineto/include/AbstractConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,11 @@ class AbstractConfig {
// multiple options.
// Throw std::invalid_argument if automatic correction can not be made.
//
// @param fallbackProfileStartTime Specify a fallback profile start timestamp in case it was never specified by the client
virtual void validate(const std::chrono::time_point<std::chrono::system_clock>& fallbackProfileStartTime) = 0;
// @param fallbackProfileStartTime Specify a fallback profile start timestamp
// in case it was never specified by the client
virtual void validate(
const std::chrono::time_point<std::chrono::system_clock>&
fallbackProfileStartTime) = 0;

// TODO: Separate out each profiler type into features?
virtual void printActivityProfilerConfig(std::ostream& s) const;
Expand Down
23 changes: 11 additions & 12 deletions libkineto/include/ActivityProfilerInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#include <thread>
#include <vector>

#include "ActivityType.h"
#include "ActivityTraceInterface.h"
#include "ActivityType.h"
#include "IActivityProfiler.h"

namespace libkineto {
Expand All @@ -24,15 +24,14 @@ struct CpuTraceBuffer;
class Config;

class ActivityProfilerInterface {

public:
virtual ~ActivityProfilerInterface() {}

virtual void init() {}
virtual bool isInitialized() {
return false;
}
virtual bool isActive(){
virtual bool isActive() {
return false;
}

Expand All @@ -56,8 +55,7 @@ class ActivityProfilerInterface {
const std::string& configStr = "") {}

// Toggle GPU tracing as a trace is running to omit certain parts of a graph
virtual void toggleCollectionDynamic(
const bool enable) {}
virtual void toggleCollectionDynamic(const bool enable) {}

// Start recording, potentially reusing any buffers allocated since
// prepareTrace was called.
Expand All @@ -75,22 +73,23 @@ class ActivityProfilerInterface {

// *** TraceActivity API ***
// FIXME: Pass activityProfiler interface into clientInterface?
virtual void pushCorrelationId(uint64_t id){}
virtual void popCorrelationId(){}
virtual void transferCpuTrace(
std::unique_ptr<CpuTraceBuffer> traceBuffer){}
virtual void pushCorrelationId(uint64_t id) {}
virtual void popCorrelationId() {}
virtual void transferCpuTrace(std::unique_ptr<CpuTraceBuffer> traceBuffer) {}

// Correlation ids for user defined spans
virtual void pushUserCorrelationId(uint64_t){}
virtual void popUserCorrelationId(){}
virtual void pushUserCorrelationId(uint64_t) {}
virtual void popUserCorrelationId() {}

// Saves information for the current thread to be used in profiler output
// Client must record any new kernel thread where the activity has occured.
virtual void recordThreadInfo() {}

// Record trace metadata, currently supporting only string key and values,
// values with the same key are overwritten
virtual void addMetadata(const std::string& key, const std::string& value) = 0;
virtual void addMetadata(
const std::string& key,
const std::string& value) = 0;

// Add a child activity profiler, this enables frameworks in the application
// to enable custom framework events.
Expand Down
70 changes: 36 additions & 34 deletions libkineto/include/ActivityType.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,54 +9,56 @@
#pragma once

#include <array>
#include <string>
#include <set>
#include <string>

namespace libkineto {

// Note : All activity types are not enabled by default. Please add them
// at correct position in the enum
enum class ActivityType {
// Activity types enabled by default
CPU_OP = 0, // cpu side ops
USER_ANNOTATION,
GPU_USER_ANNOTATION,
GPU_MEMCPY,
GPU_MEMSET,
CONCURRENT_KERNEL, // on-device kernels
EXTERNAL_CORRELATION,
CUDA_RUNTIME, // host side cuda runtime events
CUDA_DRIVER, // host side cuda driver events
CPU_INSTANT_EVENT, // host side point-like events
PYTHON_FUNCTION,
OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
MTIA_RUNTIME, // host side MTIA runtime events
MTIA_CCP_EVENTS, // MTIA ondevice CCP events
CUDA_SYNC, // synchronization events between runtime and kernels

// Optional Activity types
GLOW_RUNTIME, // host side glow runtime events
CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
HPU_OP, // HPU host side runtime event
XPU_RUNTIME, // host side xpu runtime events
COLLECTIVE_COMM, // collective communication
MTIA_WORKLOADD, // MTIA workloadd events

// PRIVATEUSE1 Activity types are used for custom backends.
// The corresponding device type is `DeviceType::PrivateUse1` in PyTorch.
PRIVATEUSE1_RUNTIME, // host side privateUse1 runtime events
PRIVATEUSE1_DRIVER, // host side privateUse1 driver events

ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME,
// Activity types enabled by default
CPU_OP = 0, // cpu side ops
USER_ANNOTATION,
GPU_USER_ANNOTATION,
GPU_MEMCPY,
GPU_MEMSET,
CONCURRENT_KERNEL, // on-device kernels
EXTERNAL_CORRELATION,
CUDA_RUNTIME, // host side cuda runtime events
CUDA_DRIVER, // host side cuda driver events
CPU_INSTANT_EVENT, // host side point-like events
PYTHON_FUNCTION,
OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
MTIA_RUNTIME, // host side MTIA runtime events
MTIA_CCP_EVENTS, // MTIA ondevice CCP events
CUDA_SYNC, // synchronization events between runtime and kernels

// Optional Activity types
GLOW_RUNTIME, // host side glow runtime events
CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
HPU_OP, // HPU host side runtime event
XPU_RUNTIME, // host side xpu runtime events
COLLECTIVE_COMM, // collective communication
MTIA_WORKLOADD, // MTIA workloadd events

// PRIVATEUSE1 Activity types are used for custom backends.
// The corresponding device type is `DeviceType::PrivateUse1` in PyTorch.
PRIVATEUSE1_RUNTIME, // host side privateUse1 runtime events
PRIVATEUSE1_DRIVER, // host side privateUse1 driver events

ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add
// your new type before it.
OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME,
};

const char* toString(ActivityType t);
ActivityType toActivityType(const std::string& str);

// Return an array of all activity types except COUNT
constexpr int activityTypeCount = (int)ActivityType::ENUM_COUNT;
constexpr int defaultActivityTypeCount = (int)ActivityType::OPTIONAL_ACTIVITY_TYPE_START;
constexpr int defaultActivityTypeCount =
(int)ActivityType::OPTIONAL_ACTIVITY_TYPE_START;
const std::array<ActivityType, activityTypeCount> activityTypes();
const std::array<ActivityType, defaultActivityTypeCount> defaultActivityTypes();

Expand Down
13 changes: 9 additions & 4 deletions libkineto/include/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Config : public AbstractConfig {

bool activityProfilerEnabled() const {
return activityProfilerEnabled_ ||
activitiesOnDemandTimestamp_.time_since_epoch().count() > 0;
activitiesOnDemandTimestamp_.time_since_epoch().count() > 0;
}

// Log activitiy trace to this file
Expand Down Expand Up @@ -353,7 +353,7 @@ class Config : public AbstractConfig {

void printActivityProfilerConfig(std::ostream& s) const override;
void setActivityDependentConfig() override;

void validate(const std::chrono::time_point<std::chrono::system_clock>&
fallbackProfileStartTime) override;

Expand All @@ -369,7 +369,7 @@ class Config : public AbstractConfig {
// correct destruction order can be ensured.
static std::shared_ptr<void> getStaticObjectsLifetimeHandle();

bool getTSCTimestampFlag() const{
bool getTSCTimestampFlag() const {
return useTSCTimestamp_;
}

Expand Down Expand Up @@ -447,7 +447,8 @@ class Config : public AbstractConfig {
bool activitiesCudaSyncWaitEvents_;

// Enable Profiler Config Options
// Temporarily disable shape collection until we re-roll out the feature for on-demand cases
// Temporarily disable shape collection until we re-roll out the feature for
// on-demand cases
bool enableReportInputShapes_{false};
bool enableProfileMemory_{false};
bool enableWithStack_{false};
Expand Down Expand Up @@ -502,4 +503,8 @@ class Config : public AbstractConfig {

constexpr char kUseDaemonEnvVar[] = "KINETO_USE_DAEMON";

#if __linux__
bool isDaemonEnvVarSet();
#endif

} // namespace libkineto
11 changes: 7 additions & 4 deletions libkineto/include/GenericTraceActivity.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#pragma once

#include <fmt/format.h>
#include <sstream>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
#include <sstream>

#include "ITraceActivity.h"
#include "ThreadUtil.h"
Expand All @@ -25,15 +25,18 @@ namespace libkineto {
constexpr unsigned int kLinkFwdBwd = 1;
constexpr unsigned int kLinkAsyncCpuGpu = 2;

// @lint-ignore-every CLANGTIDY cppcoreguidelines-non-private-member-variables-in-classes
// @lint-ignore-every CLANGTIDY
// cppcoreguidelines-non-private-member-variables-in-classes
// @lint-ignore-every CLANGTIDY cppcoreguidelines-pro-type-member-init
class GenericTraceActivity : public ITraceActivity {
public:
GenericTraceActivity()
: activityType(ActivityType::ENUM_COUNT), traceSpan_(nullptr) {}

GenericTraceActivity(
const TraceSpan& trace, ActivityType type, const std::string& name)
const TraceSpan& trace,
ActivityType type,
const std::string& name)
: activityType(type), activityName(name), traceSpan_(&trace) {}

int64_t deviceId() const override {
Expand Down Expand Up @@ -132,7 +135,7 @@ class GenericTraceActivity : public ITraceActivity {
ActivityType activityType;
std::string activityName;
struct Flow {
Flow(): id(0), type(0), start(0) {}
Flow() : id(0), type(0), start(0) {}
// Ids must be unique within each type
uint32_t id : 27;
// Type will be used to connect flows between profilers, as
Expand Down
30 changes: 14 additions & 16 deletions libkineto/include/IActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ struct DeviceInfo {
const std::string& name,
const std::string& label)
: id(id), sortIndex(sortIndex), name(name), label(label) {}
int64_t id; // process id
int64_t sortIndex; // position in trace view
const std::string name; // process name
const std::string label; // device label
int64_t id; // process id
int64_t sortIndex; // position in trace view
const std::string name; // process name
const std::string label; // device label
};

/* ResourceInfo:
Expand All @@ -67,21 +67,20 @@ struct ResourceInfo {
int64_t sortIndex,
const std::string& name)
: id(id), sortIndex(sortIndex), deviceId(deviceId), name(name) {}
int64_t id; // resource id
int64_t sortIndex; // position in trace view
int64_t deviceId; // id of device which owns this resource (specified in DeviceInfo.id)
int64_t id; // resource id
int64_t sortIndex; // position in trace view
int64_t deviceId; // id of device which owns this resource (specified in
// DeviceInfo.id)
const std::string name; // resource name
};

using getLinkedActivityCallback =
std::function<const ITraceActivity*(int32_t)>;
using getLinkedActivityCallback = std::function<const ITraceActivity*(int32_t)>;

/* IActivityProfilerSession:
* an opaque object that can be used by a high level profiler to
* start/stop and return trace events.
*/
class IActivityProfilerSession {

public:
virtual ~IActivityProfilerSession() {}

Expand All @@ -101,9 +100,11 @@ class IActivityProfilerSession {
// processes trace activities using logger
virtual void processTrace(ActivityLogger& logger) = 0;

virtual void processTrace(ActivityLogger& logger,
getLinkedActivityCallback /*getLinkedActivity*/,
int64_t /*startTime*/, int64_t /*endTime*/) {
virtual void processTrace(
ActivityLogger& logger,
getLinkedActivityCallback /*getLinkedActivity*/,
int64_t /*startTime*/,
int64_t /*endTime*/) {
processTrace(logger);
}

Expand All @@ -129,17 +130,14 @@ class IActivityProfilerSession {
TraceStatus status_ = TraceStatus::READY;
};


/* Activity Profiler Plugins:
* These allow other frameworks to integrate into Kineto's primariy
* activity profiler. While the primary activity profiler handles
* timing the trace collections and correlating events the plugins
* can become source of new trace activity types.
*/
class IActivityProfiler {

public:

virtual ~IActivityProfiler() {}

// name of profiler
Expand Down
10 changes: 6 additions & 4 deletions libkineto/include/ILoggerObserver.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,14 @@ enum LoggerOutputType {
const char* toString(LoggerOutputType t);
LoggerOutputType toLoggerOutputType(const std::string& str);

constexpr int LoggerTypeCount = (int) LoggerOutputType::ENUM_COUNT;
constexpr int LoggerTypeCount = (int)LoggerOutputType::ENUM_COUNT;

class ILoggerObserver {
public:
virtual ~ILoggerObserver() = default;
virtual void write(const std::string& message, LoggerOutputType ot) = 0;
virtual const std::map<LoggerOutputType, std::vector<std::string>> extractCollectorMetadata() = 0;
virtual const std::map<LoggerOutputType, std::vector<std::string>>
extractCollectorMetadata() = 0;
virtual void reset() = 0;
virtual void addDevice(const int64_t device) = 0;
virtual void setTraceDurationMS(const int64_t duration) = 0;
Expand All @@ -51,8 +52,9 @@ class ILoggerObserver {
virtual void setGroupTraceID(const std::string&) {}
virtual void addDestination(const std::string& dest) = 0;
virtual void setTriggerOnDemand() {}
virtual void addMetadata(const std::string& key, const std::string& value) = 0;

virtual void addMetadata(
const std::string& key,
const std::string& value) = 0;
};

} // namespace libkineto
Expand Down
4 changes: 2 additions & 2 deletions libkineto/include/LoggingAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
#pragma once

namespace libkineto {
int getLogSeverityLevel();
void setLogSeverityLevel(int level);
int getLogSeverityLevel();
void setLogSeverityLevel(int level);
} // namespace libkineto
8 changes: 6 additions & 2 deletions libkineto/include/ThreadUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,20 @@

namespace libkineto {

int32_t systemThreadId();
int32_t systemThreadId(bool cache = true);
int32_t threadId();
bool setThreadName(const std::string& name);
std::string getThreadName();

int32_t processId();
int32_t processId(bool cache = true);
std::string processName(int32_t pid);

// Return a list of pids and process names for the current process
// and its parents.
std::vector<std::pair<int32_t, std::string>> pidCommandPairsOfAncestors();

// Resets all cached Thread local state, this must be done on
// forks to prevent stale values from being retained.
void resetTLS();

} // namespace libkineto
Loading

0 comments on commit f8c6e6f

Please sign in to comment.