|
| 1 | +//go:build linux |
| 2 | +// +build linux |
| 3 | + |
| 4 | +package auditd |
| 5 | + |
| 6 | +import ( |
| 7 | + "context" |
| 8 | + "os" |
| 9 | + "sync" |
| 10 | + "time" |
| 11 | + |
| 12 | + libaudit "github.com/elastic/go-libaudit/v2" |
| 13 | + "github.com/elastic/go-libaudit/v2/auparse" |
| 14 | + "github.com/threatwinds/go-sdk/plugins" |
| 15 | + "github.com/utmstack/UTMStack/agent/utils" |
| 16 | +) |
| 17 | + |
| 18 | +// AuditdCollector collects Linux Audit events via netlink multicast |
| 19 | +type AuditdCollector struct { |
| 20 | + client auditReceiver |
| 21 | + reassembler *libaudit.Reassembler |
| 22 | + cancel context.CancelFunc |
| 23 | + mu sync.Mutex |
| 24 | +} |
| 25 | + |
| 26 | +// New creates a new AuditdCollector |
| 27 | +func New() *AuditdCollector { |
| 28 | + return &AuditdCollector{} |
| 29 | +} |
| 30 | + |
| 31 | +// Name returns the collector name |
| 32 | +func (a *AuditdCollector) Name() string { |
| 33 | + return "auditd" |
| 34 | +} |
| 35 | + |
| 36 | +// Start begins collecting audit events and sending them to the queue |
| 37 | +func (a *AuditdCollector) Start(ctx context.Context, queue chan *plugins.Log) { |
| 38 | + // Preflight check for audit capability |
| 39 | + if err := checkAuditCapability(); err != nil { |
| 40 | + utils.Logger.ErrorF("auditd: preflight check failed: %v", err) |
| 41 | + return |
| 42 | + } |
| 43 | + |
| 44 | + host, err := os.Hostname() |
| 45 | + if err != nil { |
| 46 | + utils.Logger.ErrorF("auditd: error getting hostname: %v", err) |
| 47 | + host = "unknown" |
| 48 | + } |
| 49 | + |
| 50 | + restartDelay := auditdRestartDelay |
| 51 | + |
| 52 | + for { |
| 53 | + select { |
| 54 | + case <-ctx.Done(): |
| 55 | + utils.Logger.Info("auditd collector stopping due to context cancellation") |
| 56 | + return |
| 57 | + default: |
| 58 | + } |
| 59 | + |
| 60 | + exitCode := a.runAuditClient(ctx, host, queue) |
| 61 | + |
| 62 | + if exitCode == 0 { |
| 63 | + utils.Logger.Info("auditd client exited normally") |
| 64 | + } else { |
| 65 | + utils.Logger.ErrorF("auditd client exited with code %d, restarting in %v", exitCode, restartDelay) |
| 66 | + } |
| 67 | + |
| 68 | + select { |
| 69 | + case <-ctx.Done(): |
| 70 | + return |
| 71 | + case <-time.After(restartDelay): |
| 72 | + } |
| 73 | + |
| 74 | + // Exponential backoff |
| 75 | + restartDelay *= 2 |
| 76 | + if restartDelay > auditdMaxRestartDelay { |
| 77 | + restartDelay = auditdMaxRestartDelay |
| 78 | + } |
| 79 | + } |
| 80 | +} |
| 81 | + |
| 82 | +// runAuditClient creates the audit client and runs the receive loop |
| 83 | +func (a *AuditdCollector) runAuditClient(ctx context.Context, host string, queue chan *plugins.Log) int { |
| 84 | + a.mu.Lock() |
| 85 | + clientCtx, cancel := context.WithCancel(ctx) |
| 86 | + a.cancel = cancel |
| 87 | + |
| 88 | + // Attempt to set kernel backlog limit to prevent event loss under high load. |
| 89 | + // This requires CAP_AUDIT_CONTROL; log warning if it fails but continue. |
| 90 | + if err := setKernelBacklogLimit(kernelBacklogLimit); err != nil { |
| 91 | + utils.Logger.ErrorF("auditd: failed to set kernel backlog limit to %d: %v (continuing with default)", kernelBacklogLimit, err) |
| 92 | + } else { |
| 93 | + utils.Logger.Info("auditd: kernel backlog limit set to %d", kernelBacklogLimit) |
| 94 | + } |
| 95 | + |
| 96 | + // Set backlog wait time to 0 to prevent audited processes from blocking |
| 97 | + // when the audit backlog queue is full. The kernel will drop events instead. |
| 98 | + // This is the "kernel" backpressure mitigation strategy from Elastic Auditbeat. |
| 99 | + if err := setBacklogWaitTime(0); err != nil { |
| 100 | + utils.Logger.ErrorF("auditd: failed to set backlog wait time to 0: %v (continuing)", err) |
| 101 | + } else { |
| 102 | + utils.Logger.Info("auditd: backlog wait time set to 0 (non-blocking mode)") |
| 103 | + } |
| 104 | + |
| 105 | + // Create multicast audit client |
| 106 | + client, err := newAuditClient() |
| 107 | + if err != nil { |
| 108 | + a.mu.Unlock() |
| 109 | + utils.Logger.ErrorF("auditd: error creating audit client: %v", err) |
| 110 | + return -1 |
| 111 | + } |
| 112 | + a.client = client |
| 113 | + |
| 114 | + // Create event stream for reassembled events |
| 115 | + stream := newEventStream(queue, host) |
| 116 | + |
| 117 | + // Create reassembler |
| 118 | + reassembler, err := libaudit.NewReassembler(reassemblerMaxInFlight, reassemblerTimeout, stream) |
| 119 | + if err != nil { |
| 120 | + client.Close() |
| 121 | + a.mu.Unlock() |
| 122 | + utils.Logger.ErrorF("auditd: error creating reassembler: %v", err) |
| 123 | + return -1 |
| 124 | + } |
| 125 | + a.reassembler = reassembler |
| 126 | + a.mu.Unlock() |
| 127 | + |
| 128 | + utils.Logger.Info("auditd collector started (netlink multicast)") |
| 129 | + |
| 130 | + // Start maintenance goroutine for reassembler |
| 131 | + go a.runMaintenance(clientCtx) |
| 132 | + |
| 133 | + // Main receive loop |
| 134 | + for { |
| 135 | + select { |
| 136 | + case <-clientCtx.Done(): |
| 137 | + a.cleanup() |
| 138 | + return 0 |
| 139 | + default: |
| 140 | + } |
| 141 | + |
| 142 | + // Receive with non-blocking to allow checking context |
| 143 | + msg, err := client.Receive(false) |
| 144 | + if err != nil { |
| 145 | + utils.Logger.ErrorF("auditd: error receiving message: %v", err) |
| 146 | + a.cleanup() |
| 147 | + return -1 |
| 148 | + } |
| 149 | + |
| 150 | + if msg == nil { |
| 151 | + // No message available, brief sleep to avoid busy loop |
| 152 | + time.Sleep(10 * time.Millisecond) |
| 153 | + continue |
| 154 | + } |
| 155 | + |
| 156 | + // Parse message type from raw data |
| 157 | + msgType := auparse.AuditMessageType(msg.Type) |
| 158 | + |
| 159 | + // Push to reassembler for event grouping |
| 160 | + if err := reassembler.Push(msgType, msg.Data); err != nil { |
| 161 | + utils.Logger.ErrorF("auditd: error pushing to reassembler: %v", err) |
| 162 | + } |
| 163 | + } |
| 164 | +} |
| 165 | + |
| 166 | +// runMaintenance periodically calls Maintain() to flush stale events |
| 167 | +func (a *AuditdCollector) runMaintenance(ctx context.Context) { |
| 168 | + ticker := time.NewTicker(maintainInterval) |
| 169 | + defer ticker.Stop() |
| 170 | + |
| 171 | + for { |
| 172 | + select { |
| 173 | + case <-ctx.Done(): |
| 174 | + return |
| 175 | + case <-ticker.C: |
| 176 | + a.mu.Lock() |
| 177 | + if a.reassembler != nil { |
| 178 | + if err := a.reassembler.Maintain(); err != nil { |
| 179 | + utils.Logger.ErrorF("auditd: error in reassembler maintenance: %v", err) |
| 180 | + } |
| 181 | + } |
| 182 | + a.mu.Unlock() |
| 183 | + } |
| 184 | + } |
| 185 | +} |
| 186 | + |
| 187 | +// cleanup closes the client and reassembler |
| 188 | +func (a *AuditdCollector) cleanup() { |
| 189 | + a.mu.Lock() |
| 190 | + defer a.mu.Unlock() |
| 191 | + |
| 192 | + if a.reassembler != nil { |
| 193 | + a.reassembler.Close() |
| 194 | + a.reassembler = nil |
| 195 | + } |
| 196 | + if a.client != nil { |
| 197 | + a.client.Close() |
| 198 | + a.client = nil |
| 199 | + } |
| 200 | +} |
| 201 | + |
| 202 | +// Stop stops the collector |
| 203 | +func (a *AuditdCollector) Stop() { |
| 204 | + a.mu.Lock() |
| 205 | + defer a.mu.Unlock() |
| 206 | + |
| 207 | + if a.cancel != nil { |
| 208 | + a.cancel() |
| 209 | + } |
| 210 | +} |
0 commit comments