Skip to content

Commit a52b009

Browse files
authored
Add socket size bpftrace tool (#1287)
Summary: Add socket size bpftrace tool for socket-level network workload characterization Detail: For distributed workloads that don't use http (e.g. ML model training) it's desired to be able to do network workload characterization at socket level. This tool (adapted from "BPF Performance Tools" book by Brendan Gregg) enables socket-level profiling which provides socket requests sizes, counts, and throughput. Type of change: /kind feature Test Plan: Tested on an airgapped pixie deployment. Signed-off-by: Ata FatahiBaarzi <afatahibaarzi@linkedin.com>
1 parent 093d66e commit a52b009

3 files changed

Lines changed: 330 additions & 0 deletions

File tree

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
# Copyright 2018- The Pixie Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# SPDX-License-Identifier: Apache-2.0
16+
17+
# WARNING: Depending on the size of your cluster and how network-intensive your Pods and services are, this tool
18+
# can generate a high volume of profiling data because it traces every single socket send/receive call.
19+
# Therefore, run it judiciously and for an approperiate amount of time based on your needs.
20+
21+
import pxtrace
22+
import px
23+
24+
sendmsg_program = """
25+
// Adapted From "BPF Performance Tools" book by Brendan Gregg; Chapter 10: Networking
26+
// Available at: https://github.com/brendangregg/bpf-perf-tools-book/blob/master/originals/Ch10_Networking/socksize.bt
27+
28+
/*
29+
* socket_sendmsg tool shows the socket stats (count, size, and throughput) for write socket I/O requests.
30+
*
31+
* See BPF Performance Tools, Chapter 10, for an explanation of this tool.
32+
*
33+
* Copyright (c) 2019 Brendan Gregg.
34+
* Licensed under the Apache License, Version 2.0 (the "License").
35+
* This was originally created for the BPF Performance Tools book
36+
* published by Addison Wesley. ISBN-13: 9780136554820
37+
* When copying or porting, include this comment.
38+
*
39+
* 12-Apr-2019 Brendan Gregg Created this.
40+
* 5-May-2023 Ata Fatahi modified to extract more info such as source and destination IP:Port pairs.
41+
*/
42+
43+
#include <linux/socket.h>
44+
#include <net/sock.h>
45+
46+
47+
kprobe:sock_sendmsg
48+
{
49+
$AF_INET = (uint16) 2;
50+
51+
$sock = (struct socket *)arg0;
52+
@ssocket[tid] = $sock;
53+
$s = (struct sock *) $sock->sk;
54+
$inet_family = $s->__sk_common.skc_family;
55+
56+
if ($inet_family == $AF_INET) {
57+
$daddr = ntop($s->__sk_common.skc_daddr);
58+
$saddr = ntop($s->__sk_common.skc_rcv_saddr);
59+
} else {
60+
$daddr = ntop($s->__sk_common.skc_v6_daddr.in6_u.u6_addr8);
61+
$saddr = ntop($s->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr8);
62+
}
63+
64+
$lport = $s->__sk_common.skc_num;
65+
$dport = $s->__sk_common.skc_dport;
66+
67+
@ssock[tid] = $s;
68+
@sdad[tid]= $daddr;
69+
@sdp[tid]= $dport;
70+
@ssad[tid] = $saddr;
71+
@slp[tid] = $lport;
72+
}
73+
74+
kretprobe:sock_sendmsg
75+
{
76+
printf(\"time_:%llu tid:%u ssock:%d src_ip:%s lport:%d dst_ip:%s dst_port:%d size:%d",
77+
nsecs,
78+
tid,
79+
@ssock[tid],
80+
@ssad[tid],
81+
@slp[tid],
82+
@sdad[tid],
83+
@sdp[tid],
84+
retval);
85+
86+
delete(@ssocket[tid]);
87+
delete(@ssock[tid]);
88+
delete(@sdad[tid]);
89+
delete(@sdp[tid]);
90+
delete(@ssad[tid]);
91+
delete(@slp[tid]);
92+
}
93+
94+
"""
95+
96+
recvmsg_program = """
97+
// Adapted From "BPF Performance Tools" book by Brendan Gregg; Chapter 10: Networking
98+
// Available at: https://github.com/brendangregg/bpf-perf-tools-book/blob/master/originals/Ch10_Networking/socksize.bt
99+
100+
/*
101+
* socket_recvmsg tool shows the socket stats (count, size, and throughput) for read socket I/O requests.
102+
*
103+
* See BPF Performance Tools, Chapter 10, for an explanation of this tool.
104+
*
105+
* Copyright (c) 2019 Brendan Gregg.
106+
* Licensed under the Apache License, Version 2.0 (the "License").
107+
* This was originally created for the BPF Performance Tools book
108+
* published by Addison Wesley. ISBN-13: 9780136554820
109+
* When copying or porting, include this comment.
110+
*
111+
* 12-Apr-2019 Brendan Gregg Created this.
112+
* 5-May-2023 Ata Fatahi modified to extract more info such as source and destination IP:Port pairs.
113+
*/
114+
115+
#include <linux/socket.h>
116+
#include <net/sock.h>
117+
118+
119+
kprobe:sock_recvmsg
120+
{
121+
$AF_INET = (uint16) 2;
122+
123+
$sock = (struct socket *)arg0;
124+
@rsocket[tid] = $sock;
125+
$s = (struct sock *) $sock->sk;
126+
$inet_family = $s->__sk_common.skc_family;
127+
128+
if ($inet_family == $AF_INET) {
129+
$daddr = ntop($s->__sk_common.skc_daddr);
130+
$saddr = ntop($s->__sk_common.skc_rcv_saddr);
131+
} else{
132+
$daddr = ntop($s->__sk_common.skc_v6_daddr.in6_u.u6_addr8);
133+
$saddr = ntop($s->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr8);
134+
}
135+
136+
$lport = $s->__sk_common.skc_num;
137+
$dport = $s->__sk_common.skc_dport;
138+
139+
@rsock[tid] = $s;
140+
@rdad[tid]= $daddr;
141+
@rdp[tid]= $dport;
142+
@rsad[tid] = $saddr;
143+
@rlp[tid] = $lport;
144+
}
145+
146+
kretprobe:sock_recvmsg
147+
{
148+
printf(\"time_:%llu tid:%u rsock:%d src_ip:%s lport:%d dst_ip:%s dst_port:%d size:%d",
149+
nsecs,
150+
tid,
151+
@rsock[tid],
152+
@rsad[tid],
153+
@rlp[tid],
154+
@rdad[tid],
155+
@rdp[tid],
156+
retval);
157+
158+
delete(@rsocket[tid]);
159+
delete(@rsock[tid]);
160+
delete(@rdad[tid]);
161+
delete(@rdp[tid]);
162+
delete(@rsad[tid]);
163+
delete(@rlp[tid]);
164+
}
165+
166+
"""
167+
168+
169+
def sock_sendmsg_func():
170+
171+
duration = "1m"
172+
table_name = 'sock_sendmsg_table'
173+
pxtrace.UpsertTracepoint('sock_sendmsg_tracer',
174+
table_name,
175+
sendmsg_program,
176+
pxtrace.kprobe(),
177+
duration)
178+
179+
df = px.DataFrame(table=table_name)
180+
181+
# Convert IPs to domain names.
182+
df.src = px.pod_id_to_pod_name(px.ip_to_pod_id(df.src_ip))
183+
df.src = px.select(df.src == '', df.src_ip, df.src)
184+
df.dst = px.pod_id_to_pod_name(px.ip_to_pod_id(df.dst_ip))
185+
df.dst = px.select(df.dst == '', df.dst_ip, df.dst)
186+
187+
df = df.head(100000)
188+
189+
df = df.groupby(['src', 'dst']).agg(count=('size', px.count), total_size=('size', px.sum))
190+
df['avg_size'] = df['total_size'] / df['count']
191+
df['throughput'] = df['total_size'] / (px.atoi(px.substring(duration, 0, px.length(duration) - 1), 1) * 60)
192+
193+
# Filter for a particular service/pod, if desired.
194+
df = df[px.contains(df['src'], '')]
195+
196+
return df
197+
198+
199+
def sock_recvmsg_func():
200+
201+
duration = "1m"
202+
table_name = 'sock_recvmsg_table'
203+
pxtrace.UpsertTracepoint('sock_recvmsg_tracer',
204+
table_name,
205+
recvmsg_program,
206+
pxtrace.kprobe(),
207+
duration)
208+
209+
df = px.DataFrame(table=table_name)
210+
211+
# Convert IPs to domain names.
212+
df.src = px.pod_id_to_pod_name(px.ip_to_pod_id(df.src_ip))
213+
df.src = px.select(df.src == '', df.src_ip, df.src)
214+
df.dst = px.pod_id_to_pod_name(px.ip_to_pod_id(df.dst_ip))
215+
df.dst = px.select(df.dst == '', df.dst_ip, df.dst)
216+
217+
df = df.head(100000)
218+
219+
df = df.groupby(['src', 'dst']).agg(count=('size', px.count), total_size=('size', px.sum))
220+
df['avg_size'] = df['total_size'] / df['count']
221+
df['throughput'] = df['total_size'] / (px.atoi(px.substring(duration, 0, px.length(duration) - 1), 1) * 60)
222+
223+
# Filter for a particular service/pod, if desired.
224+
df = df[px.contains(df['src'], '')]
225+
226+
return df
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
short: Socket Size
3+
long: Shows info and size stats for socket I/O requests.
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
{
2+
"variables": [],
3+
"globalFuncs": [
4+
{
5+
"outputName": "send_results",
6+
"func": {
7+
"name": "sock_sendmsg_func",
8+
"args": []
9+
}
10+
},
11+
{
12+
"outputName": "recv_results",
13+
"func": {
14+
"name": "sock_recvmsg_func",
15+
"args": []
16+
}
17+
}
18+
],
19+
"widgets": [
20+
{
21+
"name": "Socket Send Messages",
22+
"position": {
23+
"x": 0,
24+
"y": 0,
25+
"w": 12,
26+
"h": 5
27+
},
28+
"globalFuncOutputName": "send_results",
29+
"displaySpec": {
30+
"@type": "types.px.dev/px.vispb.Graph",
31+
"adjacencyList": {
32+
"fromColumn": "src",
33+
"toColumn": "dst"
34+
},
35+
"edgeWeightColumn": "total_size",
36+
"edgeColorColumn": "total_size",
37+
"edgeLength": 300,
38+
"edgeThresholds": {
39+
"mediumThreshold": 1000,
40+
"highThreshold": 100000
41+
},
42+
"edgeHoverInfo": [
43+
"total_size"
44+
]
45+
}
46+
},
47+
{
48+
"name": "Socket Receive Messages",
49+
"position": {
50+
"x": 0,
51+
"y": 0,
52+
"w": 12,
53+
"h": 5
54+
},
55+
"globalFuncOutputName": "recv_results",
56+
"displaySpec": {
57+
"@type": "types.px.dev/px.vispb.Graph",
58+
"adjacencyList": {
59+
"fromColumn": "src",
60+
"toColumn": "dst"
61+
},
62+
"edgeWeightColumn": "total_size",
63+
"edgeColorColumn": "total_size",
64+
"edgeLength": 300,
65+
"edgeThresholds": {
66+
"mediumThreshold": 1000,
67+
"highThreshold": 100000
68+
},
69+
"edgeHoverInfo": [
70+
"total_size"
71+
]
72+
}
73+
},
74+
{
75+
"name": "Send Message Table",
76+
"position": {
77+
"x": 0,
78+
"y": 5,
79+
"w": 12,
80+
"h": 4
81+
},
82+
"globalFuncOutputName": "send_results",
83+
"displaySpec": {
84+
"@type": "types.px.dev/px.vispb.Table"
85+
}
86+
},
87+
{
88+
"name": "Receive Message Table",
89+
"position": {
90+
"x": 0,
91+
"y": 5,
92+
"w": 12,
93+
"h": 4
94+
},
95+
"globalFuncOutputName": "recv_results",
96+
"displaySpec": {
97+
"@type": "types.px.dev/px.vispb.Table"
98+
}
99+
}
100+
]
101+
}

0 commit comments

Comments
 (0)