@@ -137,6 +137,9 @@ def _http_events(start_time, end_time, include_health_checks, include_ready_chec
137137def inbound_http_summary(start_time, end_time):
138138 ''' Gets a summary of requests inbound to `pod`.
139139
140+ Equivalent to a single window of `pxviews.inbound_http_latency_timeseries`
141+ if you aggregate the results of this function by pod.
142+
140143 Args:
141144 @start_time Starting time of the data to examine.
142145 @end_time Ending time of the data to examine.
@@ -154,6 +157,7 @@ def inbound_http_summary(start_time, end_time):
154157
155158 df = df.groupby(['pod_id', 'remote_addr']).agg(
156159 latency_quantiles=('latency', px.quantiles),
160+ latency_sum=('latency', px.sum),
157161 num_requests=('time_', px.count),
158162 stop_time=('time_', px.max),
159163 num_errors=('failure', px.sum),
@@ -167,9 +171,14 @@ def inbound_http_summary(start_time, end_time):
167171 df.requestor_pod = px.pod_id_to_pod_name(df.requestor_pod_id)
168172 df.requestor_service = px.pod_id_to_service_name(df.requestor_pod_id)
169173 df.time_ = df.stop_time
174+ df.pod = df.ctx['pod']
175+ df.service = df.ctx['service']
176+ df.namespace = px.pod_name_to_namespace(df.pod)
177+ df.node = df.ctx['node']
170178
171- return df[['time_', 'pod_id', 'requestor_ip', 'requestor_pod', 'requestor_service', 'latency_quantiles',
172- 'num_errors', 'num_requests', 'req_bytes', 'resp_bytes']]
179+ return df[['time_', 'pod_id', 'pod', 'service', 'namespace', 'node',
180+ 'requestor_ip', 'requestor_pod', 'requestor_service',
181+ 'latency_quantiles', 'latency_sum', 'num_requests', 'num_errors', 'req_bytes', 'resp_bytes']]
173182
174183
175184def http_graph(start_time, end_time):
@@ -182,7 +191,6 @@ def http_graph(start_time, end_time):
182191 @start_time Starting time of the data to examine.
183192 @end_time Ending time of the data to examine.
184193 '''
185- # TODO(philkuz) fix pxviews can't use False because it's not in scope.
186194 df = _http_events(start_time,
187195 end_time,
188196 include_health_checks=False,
@@ -191,6 +199,7 @@ def http_graph(start_time, end_time):
191199 df.pod_id = df.ctx['pod_id']
192200 df = df.groupby(['pod_id', 'remote_addr', 'trace_role']).agg(
193201 latency_quantiles=('latency', px.quantiles),
202+ latency_sum=('latency', px.sum),
194203 total_request_count=('latency', px.count)
195204 num_requests=('time_', px.count),
196205 stop_time=('time_', px.max),
@@ -201,6 +210,7 @@ def http_graph(start_time, end_time):
201210
202211 df.traced_pod = df.ctx['pod']
203212 df.traced_ip = px.pod_name_to_pod_ip(df.traced_pod)
213+ df.traced_pod_id = df.pod_id
204214 df.traced_service = df.ctx['service']
205215
206216 # Get the traced and remote pod/service/IP information.
@@ -214,6 +224,10 @@ def http_graph(start_time, end_time):
214224 df.traced_service,
215225 px.service_id_to_service_name(px.ip_to_service_id(df.remote_ip)))
216226
227+ df.remote_pod_id = px.select(df.is_remote_addr_localhost,
228+ df.traced_pod_id,
229+ px.ip_to_pod_id(df.remote_ip))
230+
217231 # Assign requestor and responder based on the trace_role.
218232 df.is_server_side_tracing = df.trace_role == 2
219233 df.responder_ip = px.select(df.is_server_side_tracing, df.traced_ip, df.remote_ip)
@@ -222,6 +236,9 @@ def http_graph(start_time, end_time):
222236 df.responder_pod = px.select(df.is_server_side_tracing, df.traced_pod, df.remote_pod)
223237 df.requestor_pod = px.select(df.is_server_side_tracing, df.remote_pod, df.traced_pod)
224238
239+ df.responder_pod_id = px.select(df.is_server_side_tracing, df.traced_pod_id, df.remote_pod_id)
240+ df.requestor_pod_id = px.select(df.is_server_side_tracing, df.remote_pod_id, df.traced_pod_id)
241+
225242 df.responder_service = px.select(df.is_server_side_tracing, df.traced_service, df.remote_service)
226243 df.requestor_service = px.select(df.is_server_side_tracing, df.remote_service, df.traced_service)
227244
@@ -230,6 +247,8 @@ def http_graph(start_time, end_time):
230247 df.latency_p99 = px.DurationNanos(px.floor(px.pluck_float64(df.latency_quantiles, 'p99')))
231248
232249 df = df.groupby([
250+ 'responder_pod_id',
251+ 'requestor_pod_id',
233252 'responder_pod',
234253 'requestor_pod',
235254 'responder_ip',
@@ -240,6 +259,7 @@ def http_graph(start_time, end_time):
240259 # TODO(philkuz) build a combine_quantiles udf that does this properly.
241260 # latency_quantiles=('latency_quantiles', px.combine_quantiles),
242261 latency_quantiles=('latency_quantiles', px.any),
262+ latency_sum=('latency_sum', px.sum),
243263 latency_p50=('latency_p50', px.mean),
244264 latency_p90=('latency_p90', px.mean),
245265 latency_p99=('latency_p99', px.mean),
@@ -250,11 +270,21 @@ def http_graph(start_time, end_time):
250270 stop_time=('stop_time', px.max),
251271 )
252272
273+
253274 df.time_ = df.stop_time
254- return df[['time_', 'responder_pod', 'requestor_pod',
275+ df.responder_node = px.pod_id_to_node_name(df.responder_pod_id)
276+ df.requestor_node = px.pod_id_to_node_name(df.requestor_pod_id)
277+
278+ df.responder_namespace = px.pod_name_to_namespace(df.responder_pod)
279+ df.requestor_namespace = px.pod_name_to_namespace(df.requestor_pod)
280+ df.window_start_time = start_time
281+ df.window_end_time = end_time
282+ return df[['time_', 'responder_pod', 'requestor_pod', 'responder_namespace', 'requestor_namespace',
283+ 'responder_node', 'requestor_node',
255284 'responder_ip', 'requestor_ip','responder_service', 'requestor_service',
256- 'latency_p50', 'latency_p90', 'latency_p99', 'latency_quantiles', 'num_requests',
257- 'num_errors', 'req_bytes', 'resp_bytes']]
285+ 'latency_p50', 'latency_p90', 'latency_p99', 'latency_quantiles', 'latency_sum', 'num_requests',
286+ 'num_errors', 'req_bytes', 'resp_bytes',
287+ 'window_start_time', 'window_end_time']]
258288
259289
260290def container_process_timeseries(start_time, end_time, window_ns):
@@ -273,7 +303,7 @@ def container_process_timeseries(start_time, end_time, window_ns):
273303
274304 # First calculate CPU usage by process (UPID) in each k8s_object
275305 # over all windows.
276- df = df.groupby(['upid', 'container','pod_id', 'timestamp']).agg(
306+ df = df.groupby(['upid', 'container', 'pod_id', 'timestamp']).agg(
277307 rss=('rss_bytes', px.mean),
278308 vsize=('vsize_bytes', px.mean),
279309 # The fields below are counters, so we take the min and the max to subtract them.
@@ -315,9 +345,15 @@ def container_process_timeseries(start_time, end_time, window_ns):
315345 # Finally, calculate total (kernel + user time) percentage used over window.
316346 df.cpu_usage = px.Percent((df.cpu_ktime_ns + df.cpu_utime_ns) / window_ns)
317347 df.time_ = df.timestamp
318- return df[['time_', 'pod_id', 'container', 'cpu_usage', 'actual_disk_read_throughput',
319- 'actual_disk_write_throughput', 'total_disk_read_throughput',
320- 'total_disk_write_throughput', 'rss', 'vsize']]
348+ df.pod = df.ctx['pod']
349+ df.service = df.ctx['service']
350+ df.namespace = px.pod_name_to_namespace(df.pod)
351+ df.node = df.ctx['node']
352+
353+ return df[['time_', 'pod_id', 'container', 'pod', 'service', 'namespace', 'node',
354+ 'cpu_usage', 'actual_disk_read_throughput',
355+ 'actual_disk_write_throughput', 'total_disk_read_throughput',
356+ 'total_disk_write_throughput', 'rss', 'vsize']]
321357
322358
323359def pod_network_timeseries(start_time, end_time, window_ns):
@@ -362,7 +398,12 @@ def pod_network_timeseries(start_time, end_time, window_ns):
362398 df.tx_errors_per_ns = (df.tx_errors_end - df.tx_errors_start) / window_ns
363399
364400 df.time_ = df.timestamp
365- return df[['time_', 'pod_id', 'rx_bytes_per_ns', 'tx_bytes_per_ns',
401+ df.pod = df.ctx['pod']
402+ df.service = df.ctx['service']
403+ df.namespace = df.ctx['namespace']
404+ df.node = df.ctx['node']
405+ return df[['time_', 'pod_id', 'pod', 'service', 'namespace', 'node',
406+ 'rx_bytes_per_ns', 'tx_bytes_per_ns',
366407 'rx_drops_per_ns', 'tx_drops_per_ns', 'rx_errors_per_ns', 'tx_errors_per_ns']]
367408
368409
@@ -395,14 +436,22 @@ def inbound_http_throughput_timeseries(start_time, end_time, window_ns):
395436 num_errors=('failure', px.sum),
396437 )
397438 df.time_ = df.timestamp
439+ df.pod = df.ctx['pod']
440+ df.service = df.ctx['service']
441+ df.namespace = df.ctx['namespace']
442+ df.node = df.ctx['node']
398443
399- return df[['time_', 'pod_id', 'container', 'num_errors', 'num_requests']]
444+ return df[['time_', 'pod_id', 'pod', 'service', 'namespace', 'node',
445+ 'container', 'num_requests', 'num_errors']]
400446
401447
402448def inbound_http_latency_timeseries(start_time, end_time, window_ns):
403449 ''' Compute the inbound HTTP request latency timeseries for each pod
404450 in the cluster.
405451
452+ Equivalent to `pxviews.inbound_http_summary()` extended over multiple
453+ time windows.
454+
406455 Args:
407456 @start_time Starting time of the data to examine.
408457 @end_time Ending time of the data to examine.
@@ -428,8 +477,13 @@ def inbound_http_latency_timeseries(start_time, end_time, window_ns):
428477 resp_bytes=('resp_body_size', px.sum),
429478 )
430479 df.time_ = df.timestamp
480+ df.pod = df.ctx['pod']
481+ df.service = df.ctx['service']
482+ df.namespace = df.ctx['namespace']
483+ df.node = df.ctx['node']
431484
432- return df[['time_', 'pod_id', 'latency_quantiles', 'num_requests', 'num_errors', 'latency_sum', 'req_bytes', 'resp_bytes']]
485+ return df[['time_', 'pod_id', 'pod', 'service', 'namespace', 'node',
486+ 'latency_quantiles', 'num_requests', 'num_errors', 'latency_sum', 'req_bytes', 'resp_bytes']]
433487
434488
435489def stacktraces(start_time, end_time):
@@ -533,10 +587,16 @@ def connection_throughput_stats(start_time, end_time):
533587 df.pod_id = px.select(df.pod_id != '', df.pod_id, df.pod_id_x)
534588 df.remote_addr = px.select(df.remote_addr != '', df.remote_addr, df.remote_addr_x)
535589
590+ df.pod = df.ctx['pod']
591+ df.namespace = df.ctx['namespace']
592+ df.service = df.ctx['service']
593+ df.node = df.ctx['node']
594+
536595 df.inbound_conn_throughput = df.rx_server + df.tx_server
537596 df.outbound_conn_throughput = df.tx_client + df.rx_client
538597
539- return df[['time_', 'remote_addr', 'pod_id', 'inbound_conn_throughput', 'outbound_conn_throughput']]
598+ return df[['time_', 'remote_addr', 'node', 'pod_id', 'pod', 'namespace', 'service',
599+ 'inbound_conn_throughput', 'outbound_conn_throughput']]
540600
541601
542602def processes(start_time, end_time):
0 commit comments