Extended Berkeley Packet Filter (eBPF) has revolutionized how we observe and secure Linux systems. By allowing safe, sandboxed programs to run in the kernel, eBPF enables unprecedented visibility into system behavior without the risks and overhead of traditional kernel modules. This post explores our journey deploying eBPF in production.
Understanding eBPF
eBPF programs run in the kernel but are verified for safety before execution. They can attach to various hook points:
- Tracepoints: Stable kernel instrumentation points
- Kprobes: Dynamic kernel function instrumentation
- Uprobes: User-space function instrumentation
- XDP: High-performance packet processing
- Sockets: Network traffic monitoring and filtering
// Simple eBPF program to count syscalls
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1024);
__type(key, u32);
__type(value, u64);
} syscall_counts SEC(".maps");
SEC("tracepoint/raw_syscalls/sys_enter")
int count_syscalls(struct trace_event_raw_sys_enter *ctx) {
u32 pid = bpf_get_current_pid_tgid() >> 32;
u64 *count = bpf_map_lookup_elem(&syscall_counts, &pid);
if (count) {
__sync_fetch_and_add(count, 1);
} else {
u64 init_val = 1;
bpf_map_update_elem(&syscall_counts, &pid, &init_val, BPF_ANY);
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";
Use Case 1: Network Performance Monitoring
Traditional network monitoring requires packet capture (tcpdump) which has significant overhead. eBPF with XDP can process packets at line rate:
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct flow_key {
__u32 src_ip;
__u32 dst_ip;
__u16 src_port;
__u16 dst_port;
__u8 protocol;
};
struct flow_metrics {
__u64 packets;
__u64 bytes;
__u64 last_seen;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 100000);
__type(key, struct flow_key);
__type(value, struct flow_metrics);
} flow_stats SEC(".maps");
SEC("xdp")
int monitor_traffic(struct xdp_md *ctx) {
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
// Parse Ethernet header
struct ethhdr *eth = data;
if ((void *)(eth + 1) > data_end)
return XDP_PASS;
if (eth->h_proto != bpf_htons(ETH_P_IP))
return XDP_PASS;
// Parse IP header
struct iphdr *ip = (void *)(eth + 1);
if ((void *)(ip + 1) > data_end)
return XDP_PASS;
struct flow_key key = {
.src_ip = ip->saddr,
.dst_ip = ip->daddr,
.protocol = ip->protocol,
};
// Parse TCP header for ports
if (ip->protocol == IPPROTO_TCP) {
struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
if ((void *)(tcp + 1) > data_end)
return XDP_PASS;
key.src_port = bpf_ntohs(tcp->source);
key.dst_port = bpf_ntohs(tcp->dest);
}
// Update flow statistics
struct flow_metrics *metrics = bpf_map_lookup_elem(&flow_stats, &key);
if (metrics) {
__sync_fetch_and_add(&metrics->packets, 1);
__sync_fetch_and_add(&metrics->bytes, data_end - data);
metrics->last_seen = bpf_ktime_get_ns();
} else {
struct flow_metrics new_metrics = {
.packets = 1,
.bytes = data_end - data,
.last_seen = bpf_ktime_get_ns(),
};
bpf_map_update_elem(&flow_stats, &key, &new_metrics, BPF_ANY);
}
return XDP_PASS; // Allow packet to continue
}
char LICENSE[] SEC("license") = "GPL";
User-Space Consumer
use aya::{Bpf, maps::HashMap};
use std::net::Ipv4Addr;
use std::time::Duration;
#[repr(C)]
#[derive(Clone, Copy)]
struct FlowKey {
src_ip: u32,
dst_ip: u32,
src_port: u16,
dst_port: u16,
protocol: u8,
}
#[repr(C)]
#[derive(Clone, Copy)]
struct FlowMetrics {
packets: u64,
bytes: u64,
last_seen: u64,
}
fn monitor_flows() -> Result<(), Box<dyn std::error::Error>> {
// Load eBPF program
let mut bpf = Bpf::load_file("network_monitor.o")?;
// Attach XDP program to interface
let program: &mut Xdp = bpf.program_mut("monitor_traffic")?.try_into()?;
program.load()?;
program.attach("eth0", XdpFlags::default())?;
// Access flow statistics map
let flow_stats: HashMap<_, FlowKey, FlowMetrics> =
HashMap::try_from(bpf.map("flow_stats")?)?;
loop {
std::thread::sleep(Duration::from_secs(10));
println!("\n=== Top Flows (by packets) ===");
let mut flows: Vec<_> = flow_stats.iter()
.filter_map(|r| r.ok())
.collect();
flows.sort_by_key(|(_, m)| std::cmp::Reverse(m.packets));
for (key, metrics) in flows.iter().take(10) {
let src = Ipv4Addr::from(u32::from_be(key.src_ip));
let dst = Ipv4Addr::from(u32::from_be(key.dst_ip));
println!(
"{}:{} -> {}:{} | {} packets, {} bytes",
src, key.src_port,
dst, key.dst_port,
metrics.packets,
metrics.bytes
);
}
}
}
Use Case 2: Security Monitoring
eBPF can detect suspicious behavior by monitoring syscalls and file access:
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct event {
u32 pid;
u32 uid;
char comm[16];
char filename[256];
};
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
} events SEC(".maps");
// Monitor suspicious file access patterns
SEC("kprobe/do_sys_openat2")
int trace_file_open(struct pt_regs *ctx) {
struct event e = {};
u64 pid_tgid = bpf_get_current_pid_tgid();
e.pid = pid_tgid >> 32;
e.uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;
bpf_get_current_comm(&e.comm, sizeof(e.comm));
// Get filename from syscall arguments
const char *filename = (const char *)PT_REGS_PARM2(ctx);
bpf_probe_read_user_str(&e.filename, sizeof(e.filename), filename);
// Check for suspicious patterns
char sensitive_path[] = "/etc/shadow";
if (bpf_strncmp(e.filename, sizeof(sensitive_path), sensitive_path) == 0) {
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&e, sizeof(e));
}
return 0;
}
// Monitor privilege escalation attempts
SEC("kprobe/commit_creds")
int trace_privilege_escalation(struct pt_regs *ctx) {
u64 uid_gid = bpf_get_current_uid_gid();
u32 old_uid = uid_gid & 0xFFFFFFFF;
// Get new credentials from function argument
struct cred *new_cred = (struct cred *)PT_REGS_PARM1(ctx);
u32 new_uid;
bpf_probe_read_kernel(&new_uid, sizeof(new_uid), &new_cred->uid);
// Alert on privilege escalation to root
if (old_uid != 0 && new_uid == 0) {
struct event e = {};
e.pid = bpf_get_current_pid_tgid() >> 32;
e.uid = old_uid;
bpf_get_current_comm(&e.comm, sizeof(e.comm));
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&e, sizeof(e));
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";
Event Processing in User-Space
use aya::{Bpf, programs::KProbe, util::online_cpus};
use aya::maps::perf::AsyncPerfEventArray;
use bytes::BytesMut;
use tokio::task;
#[repr(C)]
#[derive(Debug, Clone)]
struct Event {
pid: u32,
uid: u32,
comm: [u8; 16],
filename: [u8; 256],
}
async fn process_security_events() -> Result<(), Box<dyn std::error::Error>> {
let mut bpf = Bpf::load_file("security_monitor.o")?;
// Attach kprobes
let program: &mut KProbe = bpf.program_mut("trace_file_open")?.try_into()?;
program.load()?;
program.attach("do_sys_openat2", 0)?;
let program: &mut KProbe = bpf.program_mut("trace_privilege_escalation")?.try_into()?;
program.load()?;
program.attach("commit_creds", 0)?;
// Process events
let mut perf_array = AsyncPerfEventArray::try_from(bpf.map_mut("events")?)?;
for cpu_id in online_cpus()? {
let mut buf = perf_array.open(cpu_id, None)?;
task::spawn(async move {
let mut buffers = (0..10)
.map(|_| BytesMut::with_capacity(1024))
.collect::<Vec<_>>();
loop {
let events = buf.read_events(&mut buffers).await.unwrap();
for buf in buffers.iter_mut().take(events.read) {
let event = unsafe {
std::ptr::read_unaligned(buf.as_ptr() as *const Event)
};
handle_security_event(event);
}
}
});
}
Ok(())
}
fn handle_security_event(event: Event) {
let comm = String::from_utf8_lossy(&event.comm)
.trim_end_matches('\0')
.to_string();
let filename = String::from_utf8_lossy(&event.filename)
.trim_end_matches('\0')
.to_string();
println!(
"SECURITY EVENT: pid={}, uid={}, comm={}, file={}",
event.pid, event.uid, comm, filename
);
// Send alert to security monitoring system
send_alert(SecurityAlert {
timestamp: chrono::Utc::now(),
event_type: "suspicious_file_access".to_string(),
pid: event.pid,
uid: event.uid,
process: comm,
details: format!("Access to {}", filename),
});
}
Use Case 3: Application Performance Profiling
eBPF can profile applications without modifying code or recompiling:
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct stack_trace_key {
u32 pid;
int user_stack_id;
int kernel_stack_id;
};
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 10000);
__uint(key_size, sizeof(u32));
__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
} stack_traces SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10000);
__type(key, struct stack_trace_key);
__type(value, u64);
} stack_counts SEC(".maps");
SEC("perf_event")
int profile_cpu(struct bpf_perf_event_data *ctx) {
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
// Filter for specific process if needed
// if (pid != TARGET_PID) return 0;
struct stack_trace_key key = {
.pid = pid,
.user_stack_id = bpf_get_stackid(ctx, &stack_traces, BPF_F_USER_STACK),
.kernel_stack_id = bpf_get_stackid(ctx, &stack_traces, 0),
};
u64 *count = bpf_map_lookup_elem(&stack_counts, &key);
if (count) {
__sync_fetch_and_add(count, 1);
} else {
u64 init_val = 1;
bpf_map_update_elem(&stack_counts, &key, &init_val, BPF_ANY);
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";
Performance Considerations
eBPF programs must be highly optimized:
1. Map Size Limits
// ❌ Bad: Unbounded map growth
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1000000); // Too large
__type(key, u64);
__type(value, struct large_struct); // 1KB per entry = 1GB total
} huge_map SEC(".maps");
// ✅ Good: Bounded with eviction
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(max_entries, 10000); // Reasonable limit
__type(key, u64);
__type(value, struct metrics);
} bounded_map SEC(".maps");
2. Minimize Per-Packet Work
// ❌ Bad: Too much work per packet
SEC("xdp")
int slow_packet_processing(struct xdp_md *ctx) {
// Multiple hash lookups
for (int i = 0; i < 10; i++) {
do_expensive_lookup(i);
}
// Complex string operations
do_regex_matching(packet_data);
return XDP_PASS;
}
// ✅ Good: Fast path with sampling
SEC("xdp")
int fast_packet_processing(struct xdp_md *ctx) {
// Sample 1 in 100 packets for deep analysis
if (bpf_get_prandom_u32() % 100 == 0) {
do_deep_analysis(ctx);
}
// Fast path for all packets
update_counters(ctx);
return XDP_PASS;
}
3. Use Per-CPU Maps for Lock-Free Updates
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, u32);
__type(value, struct stats);
} percpu_stats SEC(".maps");
SEC("xdp")
int lockfree_stats(struct xdp_md *ctx) {
u32 key = 0;
struct stats *s = bpf_map_lookup_elem(&percpu_stats, &key);
if (s) {
// No atomic operations needed - per-CPU map
s->packets++;
s->bytes += (ctx->data_end - ctx->data);
}
return XDP_PASS;
}
Production Deployment Patterns
Gradual Rollout
use aya::Bpf;
struct EbpfDeployment {
program_path: String,
target_interfaces: Vec<String>,
}
impl EbpfDeployment {
async fn deploy_gradually(&self) -> Result<(), Box<dyn std::error::Error>> {
let bpf = Bpf::load_file(&self.program_path)?;
// Deploy to canary interface first
self.deploy_to_interface(&bpf, &self.target_interfaces[0]).await?;
// Monitor for 5 minutes
tokio::time::sleep(Duration::from_secs(300)).await;
if self.check_health()? {
// Deploy to remaining interfaces
for iface in &self.target_interfaces[1..] {
self.deploy_to_interface(&bpf, iface).await?;
tokio::time::sleep(Duration::from_secs(60)).await;
}
} else {
self.rollback(&self.target_interfaces[0]).await?;
return Err("Health check failed on canary".into());
}
Ok(())
}
fn check_health(&self) -> Result<bool, Box<dyn std::error::Error>> {
// Check metrics: packet drops, errors, CPU usage
let metrics = self.get_metrics()?;
Ok(metrics.error_rate < 0.001 &&
metrics.cpu_usage < 20.0 &&
metrics.packet_drop_rate < 0.0001)
}
}
Monitoring and Alerting
use prometheus::{Counter, Histogram, Registry};
struct EbpfMetrics {
events_processed: Counter,
processing_latency: Histogram,
map_full_errors: Counter,
verifier_errors: Counter,
}
impl EbpfMetrics {
fn new(registry: &Registry) -> Self {
EbpfMetrics {
events_processed: Counter::new(
"ebpf_events_total",
"Total events processed"
).unwrap(),
processing_latency: Histogram::new(
"ebpf_processing_seconds",
"Event processing latency"
).unwrap(),
map_full_errors: Counter::new(
"ebpf_map_full_total",
"Map full errors"
).unwrap(),
verifier_errors: Counter::new(
"ebpf_verifier_errors_total",
"Verifier errors during load"
).unwrap(),
}
}
fn record_event(&self, duration: Duration) {
self.events_processed.inc();
self.processing_latency.observe(duration.as_secs_f64());
}
}
Debugging eBPF Programs
# Check loaded programs
bpftool prog list
# Dump program bytecode
bpftool prog dump xlated id 123
# Inspect maps
bpftool map list
bpftool map dump id 456
# Trace program execution
bpftool prog tracelog
Using bpf_printk for debugging:
SEC("xdp")
int debug_program(struct xdp_md *ctx) {
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
// Debug output (expensive, use sparingly)
bpf_printk("Processing packet, size: %d\n", data_end - data);
// Check with: cat /sys/kernel/debug/tracing/trace_pipe
return XDP_PASS;
}
Lessons Learned
What Worked Well
- XDP for DDoS Mitigation: Dropped malicious traffic before it reached the network stack, 10x performance improvement
- Security Monitoring: Detected intrusions without agent overhead
- Performance Profiling: Found hotspots in production without debug builds
Challenges
- Kernel Version Compatibility: Different kernels have different BPF features
- Verifier Limitations: Some valid programs rejected by verifier
- Debugging Difficulty: Limited debugging tools compared to user-space
- Map Size Management: Requires careful capacity planning
Best Practices
- CO-RE (Compile Once, Run Everywhere): Use BTF for portability
- Gradual Rollout: Test on canary systems first
- Comprehensive Monitoring: Track all metrics, errors, and edge cases
- Resource Limits: Set appropriate map sizes and event rates
- Fallback Mechanisms: Always have non-eBPF fallback
Conclusion
eBPF is a powerful tool for production systems, enabling:
- High-performance observability without application changes
- Security monitoring at the kernel level
- Network optimization with minimal overhead
- Dynamic tracing for debugging production issues
The key to successful eBPF adoption is understanding its constraints, testing thoroughly, and deploying gradually with comprehensive monitoring. When used correctly, eBPF provides capabilities that were previously impossible or impractical.