Extended Berkeley Packet Filter (eBPF) has revolutionized how we observe and secure Linux systems. By allowing safe, sandboxed programs to run in the kernel, eBPF enables unprecedented visibility into system behavior without the risks and overhead of traditional kernel modules. This post explores our journey deploying eBPF in production.

Understanding eBPF

eBPF programs run in the kernel but are verified for safety before execution. They can attach to various hook points:

  • Tracepoints: Stable kernel instrumentation points
  • Kprobes: Dynamic kernel function instrumentation
  • Uprobes: User-space function instrumentation
  • XDP: High-performance packet processing
  • Sockets: Network traffic monitoring and filtering
// Simple eBPF program to count syscalls
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 1024);
    __type(key, u32);
    __type(value, u64);
} syscall_counts SEC(".maps");

SEC("tracepoint/raw_syscalls/sys_enter")
int count_syscalls(struct trace_event_raw_sys_enter *ctx) {
    u32 pid = bpf_get_current_pid_tgid() >> 32;
    u64 *count = bpf_map_lookup_elem(&syscall_counts, &pid);

    if (count) {
        __sync_fetch_and_add(count, 1);
    } else {
        u64 init_val = 1;
        bpf_map_update_elem(&syscall_counts, &pid, &init_val, BPF_ANY);
    }

    return 0;
}

char LICENSE[] SEC("license") = "GPL";

Use Case 1: Network Performance Monitoring

Traditional network monitoring requires packet capture (tcpdump) which has significant overhead. eBPF with XDP can process packets at line rate:

#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>

struct flow_key {
    __u32 src_ip;
    __u32 dst_ip;
    __u16 src_port;
    __u16 dst_port;
    __u8 protocol;
};

struct flow_metrics {
    __u64 packets;
    __u64 bytes;
    __u64 last_seen;
};

struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 100000);
    __type(key, struct flow_key);
    __type(value, struct flow_metrics);
} flow_stats SEC(".maps");

SEC("xdp")
int monitor_traffic(struct xdp_md *ctx) {
    void *data = (void *)(long)ctx->data;
    void *data_end = (void *)(long)ctx->data_end;

    // Parse Ethernet header
    struct ethhdr *eth = data;
    if ((void *)(eth + 1) > data_end)
        return XDP_PASS;

    if (eth->h_proto != bpf_htons(ETH_P_IP))
        return XDP_PASS;

    // Parse IP header
    struct iphdr *ip = (void *)(eth + 1);
    if ((void *)(ip + 1) > data_end)
        return XDP_PASS;

    struct flow_key key = {
        .src_ip = ip->saddr,
        .dst_ip = ip->daddr,
        .protocol = ip->protocol,
    };

    // Parse TCP header for ports
    if (ip->protocol == IPPROTO_TCP) {
        struct tcphdr *tcp = (void *)ip + (ip->ihl * 4);
        if ((void *)(tcp + 1) > data_end)
            return XDP_PASS;

        key.src_port = bpf_ntohs(tcp->source);
        key.dst_port = bpf_ntohs(tcp->dest);
    }

    // Update flow statistics
    struct flow_metrics *metrics = bpf_map_lookup_elem(&flow_stats, &key);

    if (metrics) {
        __sync_fetch_and_add(&metrics->packets, 1);
        __sync_fetch_and_add(&metrics->bytes, data_end - data);
        metrics->last_seen = bpf_ktime_get_ns();
    } else {
        struct flow_metrics new_metrics = {
            .packets = 1,
            .bytes = data_end - data,
            .last_seen = bpf_ktime_get_ns(),
        };
        bpf_map_update_elem(&flow_stats, &key, &new_metrics, BPF_ANY);
    }

    return XDP_PASS;  // Allow packet to continue
}

char LICENSE[] SEC("license") = "GPL";

User-Space Consumer

use aya::{Bpf, maps::HashMap};
use std::net::Ipv4Addr;
use std::time::Duration;

#[repr(C)]
#[derive(Clone, Copy)]
struct FlowKey {
    src_ip: u32,
    dst_ip: u32,
    src_port: u16,
    dst_port: u16,
    protocol: u8,
}

#[repr(C)]
#[derive(Clone, Copy)]
struct FlowMetrics {
    packets: u64,
    bytes: u64,
    last_seen: u64,
}

fn monitor_flows() -> Result<(), Box<dyn std::error::Error>> {
    // Load eBPF program
    let mut bpf = Bpf::load_file("network_monitor.o")?;

    // Attach XDP program to interface
    let program: &mut Xdp = bpf.program_mut("monitor_traffic")?.try_into()?;
    program.load()?;
    program.attach("eth0", XdpFlags::default())?;

    // Access flow statistics map
    let flow_stats: HashMap<_, FlowKey, FlowMetrics> =
        HashMap::try_from(bpf.map("flow_stats")?)?;

    loop {
        std::thread::sleep(Duration::from_secs(10));

        println!("\n=== Top Flows (by packets) ===");
        let mut flows: Vec<_> = flow_stats.iter()
            .filter_map(|r| r.ok())
            .collect();

        flows.sort_by_key(|(_, m)| std::cmp::Reverse(m.packets));

        for (key, metrics) in flows.iter().take(10) {
            let src = Ipv4Addr::from(u32::from_be(key.src_ip));
            let dst = Ipv4Addr::from(u32::from_be(key.dst_ip));

            println!(
                "{}:{} -> {}:{} | {} packets, {} bytes",
                src, key.src_port,
                dst, key.dst_port,
                metrics.packets,
                metrics.bytes
            );
        }
    }
}

Use Case 2: Security Monitoring

eBPF can detect suspicious behavior by monitoring syscalls and file access:

#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

struct event {
    u32 pid;
    u32 uid;
    char comm[16];
    char filename[256];
};

struct {
    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
    __uint(key_size, sizeof(u32));
    __uint(value_size, sizeof(u32));
} events SEC(".maps");

// Monitor suspicious file access patterns
SEC("kprobe/do_sys_openat2")
int trace_file_open(struct pt_regs *ctx) {
    struct event e = {};

    u64 pid_tgid = bpf_get_current_pid_tgid();
    e.pid = pid_tgid >> 32;
    e.uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;

    bpf_get_current_comm(&e.comm, sizeof(e.comm));

    // Get filename from syscall arguments
    const char *filename = (const char *)PT_REGS_PARM2(ctx);
    bpf_probe_read_user_str(&e.filename, sizeof(e.filename), filename);

    // Check for suspicious patterns
    char sensitive_path[] = "/etc/shadow";
    if (bpf_strncmp(e.filename, sizeof(sensitive_path), sensitive_path) == 0) {
        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
                            &e, sizeof(e));
    }

    return 0;
}

// Monitor privilege escalation attempts
SEC("kprobe/commit_creds")
int trace_privilege_escalation(struct pt_regs *ctx) {
    u64 uid_gid = bpf_get_current_uid_gid();
    u32 old_uid = uid_gid & 0xFFFFFFFF;

    // Get new credentials from function argument
    struct cred *new_cred = (struct cred *)PT_REGS_PARM1(ctx);
    u32 new_uid;
    bpf_probe_read_kernel(&new_uid, sizeof(new_uid), &new_cred->uid);

    // Alert on privilege escalation to root
    if (old_uid != 0 && new_uid == 0) {
        struct event e = {};
        e.pid = bpf_get_current_pid_tgid() >> 32;
        e.uid = old_uid;
        bpf_get_current_comm(&e.comm, sizeof(e.comm));

        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
                            &e, sizeof(e));
    }

    return 0;
}

char LICENSE[] SEC("license") = "GPL";

Event Processing in User-Space

use aya::{Bpf, programs::KProbe, util::online_cpus};
use aya::maps::perf::AsyncPerfEventArray;
use bytes::BytesMut;
use tokio::task;

#[repr(C)]
#[derive(Debug, Clone)]
struct Event {
    pid: u32,
    uid: u32,
    comm: [u8; 16],
    filename: [u8; 256],
}

async fn process_security_events() -> Result<(), Box<dyn std::error::Error>> {
    let mut bpf = Bpf::load_file("security_monitor.o")?;

    // Attach kprobes
    let program: &mut KProbe = bpf.program_mut("trace_file_open")?.try_into()?;
    program.load()?;
    program.attach("do_sys_openat2", 0)?;

    let program: &mut KProbe = bpf.program_mut("trace_privilege_escalation")?.try_into()?;
    program.load()?;
    program.attach("commit_creds", 0)?;

    // Process events
    let mut perf_array = AsyncPerfEventArray::try_from(bpf.map_mut("events")?)?;

    for cpu_id in online_cpus()? {
        let mut buf = perf_array.open(cpu_id, None)?;

        task::spawn(async move {
            let mut buffers = (0..10)
                .map(|_| BytesMut::with_capacity(1024))
                .collect::<Vec<_>>();

            loop {
                let events = buf.read_events(&mut buffers).await.unwrap();

                for buf in buffers.iter_mut().take(events.read) {
                    let event = unsafe {
                        std::ptr::read_unaligned(buf.as_ptr() as *const Event)
                    };

                    handle_security_event(event);
                }
            }
        });
    }

    Ok(())
}

fn handle_security_event(event: Event) {
    let comm = String::from_utf8_lossy(&event.comm)
        .trim_end_matches('\0')
        .to_string();

    let filename = String::from_utf8_lossy(&event.filename)
        .trim_end_matches('\0')
        .to_string();

    println!(
        "SECURITY EVENT: pid={}, uid={}, comm={}, file={}",
        event.pid, event.uid, comm, filename
    );

    // Send alert to security monitoring system
    send_alert(SecurityAlert {
        timestamp: chrono::Utc::now(),
        event_type: "suspicious_file_access".to_string(),
        pid: event.pid,
        uid: event.uid,
        process: comm,
        details: format!("Access to {}", filename),
    });
}

Use Case 3: Application Performance Profiling

eBPF can profile applications without modifying code or recompiling:

#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

struct stack_trace_key {
    u32 pid;
    int user_stack_id;
    int kernel_stack_id;
};

struct {
    __uint(type, BPF_MAP_TYPE_STACK_TRACE);
    __uint(max_entries, 10000);
    __uint(key_size, sizeof(u32));
    __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
} stack_traces SEC(".maps");

struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 10000);
    __type(key, struct stack_trace_key);
    __type(value, u64);
} stack_counts SEC(".maps");

SEC("perf_event")
int profile_cpu(struct bpf_perf_event_data *ctx) {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;

    // Filter for specific process if needed
    // if (pid != TARGET_PID) return 0;

    struct stack_trace_key key = {
        .pid = pid,
        .user_stack_id = bpf_get_stackid(ctx, &stack_traces, BPF_F_USER_STACK),
        .kernel_stack_id = bpf_get_stackid(ctx, &stack_traces, 0),
    };

    u64 *count = bpf_map_lookup_elem(&stack_counts, &key);
    if (count) {
        __sync_fetch_and_add(count, 1);
    } else {
        u64 init_val = 1;
        bpf_map_update_elem(&stack_counts, &key, &init_val, BPF_ANY);
    }

    return 0;
}

char LICENSE[] SEC("license") = "GPL";

Performance Considerations

eBPF programs must be highly optimized:

1. Map Size Limits

// ❌ Bad: Unbounded map growth
struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 1000000);  // Too large
    __type(key, u64);
    __type(value, struct large_struct);  // 1KB per entry = 1GB total
} huge_map SEC(".maps");

// ✅ Good: Bounded with eviction
struct {
    __uint(type, BPF_MAP_TYPE_LRU_HASH);
    __uint(max_entries, 10000);  // Reasonable limit
    __type(key, u64);
    __type(value, struct metrics);
} bounded_map SEC(".maps");

2. Minimize Per-Packet Work

// ❌ Bad: Too much work per packet
SEC("xdp")
int slow_packet_processing(struct xdp_md *ctx) {
    // Multiple hash lookups
    for (int i = 0; i < 10; i++) {
        do_expensive_lookup(i);
    }

    // Complex string operations
    do_regex_matching(packet_data);

    return XDP_PASS;
}

// ✅ Good: Fast path with sampling
SEC("xdp")
int fast_packet_processing(struct xdp_md *ctx) {
    // Sample 1 in 100 packets for deep analysis
    if (bpf_get_prandom_u32() % 100 == 0) {
        do_deep_analysis(ctx);
    }

    // Fast path for all packets
    update_counters(ctx);

    return XDP_PASS;
}

3. Use Per-CPU Maps for Lock-Free Updates

struct {
    __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
    __uint(max_entries, 1);
    __type(key, u32);
    __type(value, struct stats);
} percpu_stats SEC(".maps");

SEC("xdp")
int lockfree_stats(struct xdp_md *ctx) {
    u32 key = 0;
    struct stats *s = bpf_map_lookup_elem(&percpu_stats, &key);

    if (s) {
        // No atomic operations needed - per-CPU map
        s->packets++;
        s->bytes += (ctx->data_end - ctx->data);
    }

    return XDP_PASS;
}

Production Deployment Patterns

Gradual Rollout

use aya::Bpf;

struct EbpfDeployment {
    program_path: String,
    target_interfaces: Vec<String>,
}

impl EbpfDeployment {
    async fn deploy_gradually(&self) -> Result<(), Box<dyn std::error::Error>> {
        let bpf = Bpf::load_file(&self.program_path)?;

        // Deploy to canary interface first
        self.deploy_to_interface(&bpf, &self.target_interfaces[0]).await?;

        // Monitor for 5 minutes
        tokio::time::sleep(Duration::from_secs(300)).await;

        if self.check_health()? {
            // Deploy to remaining interfaces
            for iface in &self.target_interfaces[1..] {
                self.deploy_to_interface(&bpf, iface).await?;
                tokio::time::sleep(Duration::from_secs(60)).await;
            }
        } else {
            self.rollback(&self.target_interfaces[0]).await?;
            return Err("Health check failed on canary".into());
        }

        Ok(())
    }

    fn check_health(&self) -> Result<bool, Box<dyn std::error::Error>> {
        // Check metrics: packet drops, errors, CPU usage
        let metrics = self.get_metrics()?;

        Ok(metrics.error_rate < 0.001 &&
           metrics.cpu_usage < 20.0 &&
           metrics.packet_drop_rate < 0.0001)
    }
}

Monitoring and Alerting

use prometheus::{Counter, Histogram, Registry};

struct EbpfMetrics {
    events_processed: Counter,
    processing_latency: Histogram,
    map_full_errors: Counter,
    verifier_errors: Counter,
}

impl EbpfMetrics {
    fn new(registry: &Registry) -> Self {
        EbpfMetrics {
            events_processed: Counter::new(
                "ebpf_events_total",
                "Total events processed"
            ).unwrap(),
            processing_latency: Histogram::new(
                "ebpf_processing_seconds",
                "Event processing latency"
            ).unwrap(),
            map_full_errors: Counter::new(
                "ebpf_map_full_total",
                "Map full errors"
            ).unwrap(),
            verifier_errors: Counter::new(
                "ebpf_verifier_errors_total",
                "Verifier errors during load"
            ).unwrap(),
        }
    }

    fn record_event(&self, duration: Duration) {
        self.events_processed.inc();
        self.processing_latency.observe(duration.as_secs_f64());
    }
}

Debugging eBPF Programs

# Check loaded programs
bpftool prog list

# Dump program bytecode
bpftool prog dump xlated id 123

# Inspect maps
bpftool map list
bpftool map dump id 456

# Trace program execution
bpftool prog tracelog

Using bpf_printk for debugging:

SEC("xdp")
int debug_program(struct xdp_md *ctx) {
    void *data = (void *)(long)ctx->data;
    void *data_end = (void *)(long)ctx->data_end;

    // Debug output (expensive, use sparingly)
    bpf_printk("Processing packet, size: %d\n", data_end - data);

    // Check with: cat /sys/kernel/debug/tracing/trace_pipe

    return XDP_PASS;
}

Lessons Learned

What Worked Well

  1. XDP for DDoS Mitigation: Dropped malicious traffic before it reached the network stack, 10x performance improvement
  2. Security Monitoring: Detected intrusions without agent overhead
  3. Performance Profiling: Found hotspots in production without debug builds

Challenges

  1. Kernel Version Compatibility: Different kernels have different BPF features
  2. Verifier Limitations: Some valid programs rejected by verifier
  3. Debugging Difficulty: Limited debugging tools compared to user-space
  4. Map Size Management: Requires careful capacity planning

Best Practices

  1. CO-RE (Compile Once, Run Everywhere): Use BTF for portability
  2. Gradual Rollout: Test on canary systems first
  3. Comprehensive Monitoring: Track all metrics, errors, and edge cases
  4. Resource Limits: Set appropriate map sizes and event rates
  5. Fallback Mechanisms: Always have non-eBPF fallback

Conclusion

eBPF is a powerful tool for production systems, enabling:

  • High-performance observability without application changes
  • Security monitoring at the kernel level
  • Network optimization with minimal overhead
  • Dynamic tracing for debugging production issues

The key to successful eBPF adoption is understanding its constraints, testing thoroughly, and deploying gradually with comprehensive monitoring. When used correctly, eBPF provides capabilities that were previously impossible or impractical.