bpf 系统调用
bpf 系统调用 介绍
bpf 系统调用 是 eBPF 用户态程序与内核交互的接口,可通过 man bpf
查看说明,系统调用的格式如下:
#include <linux/bpf.h>
int bpf(int cmd, union bpf_attr *attr, unsigned int size);
cmd
: 操作命令,比如BPF_PROG_LOAD
就是加载 eBPF 程序。attr
: 操作命令对应的属性。size
: 属性的大小。
系统调用源码
在内核源码 kernel/bpf/syscall.c
中定义了 bpf 系统调用:
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
}
可以看到实际调用的是 __sys_bpf
函数:
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
int err;
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
return err;
size = min_t(u32, size, sizeof(attr));
/* copy attributes from user space, may be less than sizeof(bpf_attr) */
memset(&attr, 0, sizeof(attr));
if (copy_from_bpfptr(&attr, uattr, size) != 0)
return -EFAULT;
err = security_bpf(cmd, &attr, size, uattr.is_kernel);
if (err < 0)
return err;
switch (cmd) {
case BPF_MAP_CREATE:
err = map_create(&attr, uattr.is_kernel);
break;
case BPF_MAP_LOOKUP_ELEM:
err = map_lookup_elem(&attr);
break;
case BPF_MAP_UPDATE_ELEM:
err = map_update_elem(&attr, uattr);
break;
case BPF_MAP_DELETE_ELEM:
err = map_delete_elem(&attr, uattr);
break;
case BPF_MAP_GET_NEXT_KEY:
err = map_get_next_key(&attr);
break;
case BPF_MAP_FREEZE:
err = map_freeze(&attr);
break;
case BPF_PROG_LOAD:
err = bpf_prog_load(&attr, uattr, size);
break;
case BPF_OBJ_PIN:
err = bpf_obj_pin(&attr);
break;
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
case BPF_PROG_ATTACH:
err = bpf_prog_attach(&attr);
break;
case BPF_PROG_DETACH:
err = bpf_prog_detach(&attr);
break;
case BPF_PROG_QUERY:
err = bpf_prog_query(&attr, uattr.user);
break;
case BPF_PROG_TEST_RUN:
err = bpf_prog_test_run(&attr, uattr.user);
break;
case BPF_PROG_GET_NEXT_ID:
err = bpf_obj_get_next_id(&attr, uattr.user,
&prog_idr, &prog_idr_lock);
break;
case BPF_MAP_GET_NEXT_ID:
err = bpf_obj_get_next_id(&attr, uattr.user,
&map_idr, &map_idr_lock);
break;
case BPF_BTF_GET_NEXT_ID:
err = bpf_obj_get_next_id(&attr, uattr.user,
&btf_idr, &btf_idr_lock);
break;
case BPF_PROG_GET_FD_BY_ID:
err = bpf_prog_get_fd_by_id(&attr);
break;
case BPF_MAP_GET_FD_BY_ID:
err = bpf_map_get_fd_by_id(&attr);
break;
case BPF_OBJ_GET_INFO_BY_FD:
err = bpf_obj_get_info_by_fd(&attr, uattr.user);
break;
case BPF_RAW_TRACEPOINT_OPEN:
err = bpf_raw_tracepoint_open(&attr);
break;
case BPF_BTF_LOAD:
err = bpf_btf_load(&attr, uattr, size);
break;
case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr);
break;
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(&attr, uattr.user);
break;
case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
err = map_lookup_and_delete_elem(&attr);
break;
case BPF_MAP_LOOKUP_BATCH:
err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH);
break;
case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
err = bpf_map_do_batch(&attr, uattr.user,
BPF_MAP_LOOKUP_AND_DELETE_BATCH);
break;
case BPF_MAP_UPDATE_BATCH:
err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH);
break;
case BPF_MAP_DELETE_BATCH:
err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH);
break;
case BPF_LINK_CREATE:
err = link_create(&attr, uattr);
break;
case BPF_LINK_UPDATE:
err = link_update(&attr);
break;
case BPF_LINK_GET_FD_BY_ID:
err = bpf_link_get_fd_by_id(&attr);
break;
case BPF_LINK_GET_NEXT_ID:
err = bpf_obj_get_next_id(&attr, uattr.user,
&link_idr, &link_idr_lock);
break;
case BPF_ENABLE_STATS:
err = bpf_enable_stats(&attr);
break;
case BPF_ITER_CREATE:
err = bpf_iter_create(&attr);
break;
case BPF_LINK_DETACH:
err = link_detach(&attr);
break;
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
case BPF_TOKEN_CREATE:
err = token_create(&attr);
break;
default:
err = -EINVAL;
break;
}
return err;
}
bpf 操作指令
不同版本的内核所支持的 BPF 命令是不同的,在内核头文件 include/uapi/linux/bpf.h
中的 bpf_cmd
定义就是对应内核版本支持的 BPF 操作命令,在 v6.14
内核已支持 38 个指令:
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
BPF_MAP_UPDATE_ELEM,
BPF_MAP_DELETE_ELEM,
BPF_MAP_GET_NEXT_KEY,
BPF_PROG_LOAD,
BPF_OBJ_PIN,
BPF_OBJ_GET,
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
BPF_PROG_TEST_RUN,
BPF_PROG_RUN = BPF_PROG_TEST_RUN,
BPF_PROG_GET_NEXT_ID,
BPF_MAP_GET_NEXT_ID,
BPF_PROG_GET_FD_BY_ID,
BPF_MAP_GET_FD_BY_ID,
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
BPF_MAP_FREEZE,
BPF_BTF_GET_NEXT_ID,
BPF_MAP_LOOKUP_BATCH,
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
BPF_LINK_CREATE,
BPF_LINK_UPDATE,
BPF_LINK_GET_FD_BY_ID,
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
BPF_TOKEN_CREATE,
};