|
22 | 22 |
|
23 | 23 | #include "src/common/base/base.h" |
24 | 24 | #include "src/shared/upid/upid.h" |
25 | | -#include "src/stirling/core/pub_sub_manager.h" |
26 | | -#include "src/stirling/core/source_registry.h" |
| 25 | +#include "src/stirling/core/unit_connector.h" |
27 | 26 | #include "src/stirling/source_connectors/perf_profiler/perf_profile_connector.h" |
28 | 27 | #include "src/stirling/source_connectors/perf_profiler/stack_traces_table.h" |
29 | | -#include "src/stirling/stirling.h" |
30 | | - |
31 | | -using ::px::ProcessStatsMonitor; |
32 | 28 |
|
33 | 29 | using ::px::Status; |
34 | | -using ::px::StatusOr; |
35 | 30 |
|
36 | | -using ::px::stirling::IndexPublication; |
37 | | -using ::px::stirling::PerfProfileConnector; |
38 | | -using ::px::stirling::SourceRegistry; |
39 | | -using ::px::stirling::Stirling; |
40 | | -using ::px::stirling::stirlingpb::InfoClass; |
41 | | -using ::px::stirling::stirlingpb::Publish; |
| 31 | +DEFINE_uint32(time, 30, "Number of seconds to run the profiler."); |
42 | 32 |
|
43 | | -using ::px::md::UPID; |
44 | | -using ::px::types::ColumnWrapperRecordBatch; |
45 | | -using ::px::types::TabletID; |
| 33 | +namespace px { |
| 34 | +namespace stirling { |
| 35 | + |
| 36 | +class Profiler : public UnitConnector<PerfProfileConnector> { |
| 37 | + public: |
| 38 | + Status PrintData() { |
| 39 | + // Build the stack traces histogram. |
| 40 | + PX_RETURN_IF_ERROR(BuildHistogram()); |
| 41 | + |
| 42 | + // Print the stack traces histogram. |
| 43 | + // TODO(jps): replace this with a pprof proto file writer. |
| 44 | + // 15x: libc.so;main;foo;bar |
| 45 | + // 12x: libc.so;main;foo;qux |
| 46 | + for (const auto& [str, count] : histo_) { |
| 47 | + LOG(INFO) << count << "x: " << str; |
| 48 | + } |
| 49 | + return Status::OK(); |
| 50 | + } |
46 | 51 |
|
47 | | -DEFINE_uint32(time, 30, "Number of seconds to run the profiler."); |
48 | | -DEFINE_uint32(pid, 0, "PID to profile. Leave unspecified to profile everything."); |
49 | | - |
50 | | -// Put this in global space, so we can kill it in the signal handler. |
51 | | -Stirling* g_stirling = nullptr; |
52 | | -ProcessStatsMonitor* g_process_stats_monitor = nullptr; |
53 | | -std::atomic<bool> g_data_received = false; |
54 | | - |
55 | | -Status StirlingWrapperCallback(uint64_t /* table_id */, TabletID /* tablet_id */, |
56 | | - std::unique_ptr<ColumnWrapperRecordBatch> record_batch) { |
57 | | - auto& upid_col = (*record_batch)[px::stirling::kStackTraceUPIDIdx]; |
58 | | - auto& stack_trace_str_col = (*record_batch)[px::stirling::kStackTraceStackTraceStrIdx]; |
59 | | - auto& count_col = (*record_batch)[px::stirling::kStackTraceCountIdx]; |
60 | | - |
61 | | - std::string out; |
62 | | - for (size_t i = 0; i < stack_trace_str_col->Size(); ++i) { |
63 | | - UPID upid(upid_col->Get<px::types::UInt128Value>(i).val); |
64 | | - |
65 | | - if (FLAGS_pid == upid.pid() || FLAGS_pid == 0) { |
66 | | - std::cout << stack_trace_str_col->Get<px::types::StringValue>(i); |
67 | | - std::cout << " "; |
68 | | - std::cout << count_col->Get<px::types::Int64Value>(i).val; |
69 | | - std::cout << "\n"; |
| 52 | + private: |
| 53 | + Status BuildHistogram() { |
| 54 | + PX_ASSIGN_OR_RETURN(const auto& records, ConsumeRecords(0)); |
| 55 | + |
| 56 | + const uint64_t num_rows = records[kStackTraceStackTraceStrIdx]->Size(); |
| 57 | + const auto traces_column = records[kStackTraceStackTraceStrIdx]; |
| 58 | + const auto counts_column = records[kStackTraceCountIdx]; |
| 59 | + |
| 60 | + // Build the stack traces histogram. |
| 61 | + for (uint64_t row_idx = 0; row_idx < num_rows; ++row_idx) { |
| 62 | + const std::string& stack_trace_str = traces_column->Get<types::StringValue>(row_idx); |
| 63 | + const int64_t count = counts_column->Get<types::Int64Value>(row_idx).val; |
| 64 | + histo_[stack_trace_str] += count; |
70 | 65 | } |
| 66 | + return Status::OK(); |
71 | 67 | } |
72 | 68 |
|
73 | | - g_data_received = true; |
| 69 | + // A local stack trace histo (for convenience, to be populated after all samples are collected). |
| 70 | + absl::flat_hash_map<std::string, uint64_t> histo_; |
| 71 | +}; |
74 | 72 |
|
75 | | - return Status::OK(); |
76 | | -} |
| 73 | +} // namespace stirling |
| 74 | +} // namespace px |
| 75 | + |
| 76 | +std::unique_ptr<px::stirling::Profiler> g_profiler; |
77 | 77 |
|
78 | 78 | void SignalHandler(int signum) { |
79 | 79 | std::cerr << "\n\nStopping, might take a few seconds ..." << std::endl; |
80 | | - // Important to call Stop(), because it releases BPF resources, |
| 80 | + |
| 81 | + // Important to call Stop(), because it releases eBPF resources, |
81 | 82 | // which would otherwise leak. |
82 | | - if (g_stirling != nullptr) { |
83 | | - g_stirling->Stop(); |
84 | | - } |
85 | | - if (g_process_stats_monitor != nullptr) { |
86 | | - g_process_stats_monitor->PrintCPUTime(); |
| 83 | + if (g_profiler != nullptr) { |
| 84 | + PX_UNUSED(g_profiler->Stop()); |
| 85 | + g_profiler = nullptr; |
87 | 86 | } |
| 87 | + |
88 | 88 | exit(signum); |
89 | 89 | } |
90 | 90 |
|
| 91 | +Status RunProfiler() { |
| 92 | + // Bring up eBPF. |
| 93 | + PX_RETURN_IF_ERROR(g_profiler->Init()); |
| 94 | + |
| 95 | + // Separate thread to periodically wake up and read the eBPF perf buffer & maps. |
| 96 | + PX_RETURN_IF_ERROR(g_profiler->Start()); |
| 97 | + |
| 98 | + // Collect data for the user specified amount of time. |
| 99 | + sleep(FLAGS_time); |
| 100 | + |
| 101 | + // Stop collecting data and do a final read out of eBPF perf buffer & maps. |
| 102 | + PX_RETURN_IF_ERROR(g_profiler->Stop()); |
| 103 | + |
| 104 | + // Print the info. We will replace this with a pprof proto file write out. |
| 105 | + PX_RETURN_IF_ERROR(g_profiler->PrintData()); |
| 106 | + |
| 107 | + // Phew. We are outta here. |
| 108 | + return Status::OK(); |
| 109 | +} |
| 110 | + |
91 | 111 | int main(int argc, char** argv) { |
92 | 112 | // Register signal handlers to clean-up on exit. |
| 113 | + signal(SIGHUP, SignalHandler); |
93 | 114 | signal(SIGINT, SignalHandler); |
94 | 115 | signal(SIGQUIT, SignalHandler); |
95 | 116 | signal(SIGTERM, SignalHandler); |
96 | | - signal(SIGHUP, SignalHandler); |
97 | 117 |
|
98 | 118 | px::EnvironmentGuard env_guard(&argc, argv); |
99 | 119 |
|
100 | | - // Make Stirling. |
101 | | - auto registry = std::make_unique<SourceRegistry>(); |
102 | | - registry->RegisterOrDie<PerfProfileConnector>(); |
103 | | - std::unique_ptr<Stirling> stirling = Stirling::Create(std::move(registry)); |
104 | | - g_stirling = stirling.get(); |
105 | | - stirling->RegisterDataPushCallback(StirlingWrapperCallback); |
106 | | - |
107 | | - // Enable use of USR1/USR2 for controlling debug. |
108 | | - stirling->RegisterUserDebugSignalHandlers(); |
| 120 | + // Need to do this after env setup. |
| 121 | + g_profiler = std::make_unique<px::stirling::Profiler>(); |
109 | 122 |
|
110 | | - // Start measuring process stats after init. |
111 | | - ProcessStatsMonitor process_stats_monitor; |
112 | | - g_process_stats_monitor = &process_stats_monitor; |
| 123 | + // Run the profiler (in more detail: setup, collect data, and tear down). |
| 124 | + const auto status = RunProfiler(); |
113 | 125 |
|
114 | | - // Run Stirling. |
115 | | - std::thread run_thread = std::thread(&Stirling::Run, stirling.get()); |
116 | | - |
117 | | - // Run for the specified amount of time. |
118 | | - std::this_thread::sleep_for(std::chrono::seconds(FLAGS_time)); |
119 | | - |
120 | | - // This is not likely because a table push is triggered immediately. But, just in case, |
121 | | - // provide some help if no data was received. |
122 | | - LOG_IF(WARNING, !g_data_received) << "No data received from profiler. Try increasing -time or " |
123 | | - "reducing -stirling_profiler_table_update_period_seconds."; |
| 126 | + // Something happened, log that. |
| 127 | + LOG_IF(WARNING, !status.ok()) << status.msg(); |
124 | 128 |
|
125 | 129 | // Cleanup. |
126 | | - stirling->Stop(); |
127 | | - |
128 | | - // Wait for the thread to return. |
129 | | - run_thread.join(); |
| 130 | + g_profiler = nullptr; |
130 | 131 |
|
131 | | - return 0; |
| 132 | + return status.ok() ? 0 : -1; |
132 | 133 | } |
0 commit comments