Handle malformed events during JetStream replay with proper error reporting
All checks were successful
CI / build (pull_request) Successful in 17s

Add ReplayError and ReplayResult types to capture information about
malformed events encountered during replay. This allows callers to
inspect and handle corrupted data rather than having it silently skipped.

Key changes:
- Add ReplayError type with sequence number, raw data, and underlying error
- Add ReplayResult type containing both successfully parsed events and errors
- Add EventStoreWithErrors interface for stores that can report replay errors
- Implement GetEventsWithErrors on JetStreamEventStore
- Update GetEvents to maintain backward compatibility (still skips malformed)
- Add comprehensive unit tests for the new types

This addresses the issue of silent data loss during event-sourced replay
by giving callers visibility into data quality issues.

Closes #39

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-10 15:32:46 +01:00
parent 8df36cac7a
commit e77a3a9868
3 changed files with 218 additions and 13 deletions

View File

@@ -33,7 +33,8 @@ func DefaultJetStreamConfig() JetStreamConfig {
}
}
// JetStreamEventStore implements EventStore using NATS JetStream for persistence
// JetStreamEventStore implements EventStore using NATS JetStream for persistence.
// It also implements EventStoreWithErrors to report malformed events during replay.
type JetStreamEventStore struct {
js nats.JetStreamContext
streamName string
@@ -139,18 +140,18 @@ func (jes *JetStreamEventStore) getLatestVersionLocked(actorID string) (int64, e
return version, nil
}
// Fetch from JetStream
events, err := jes.getEventsInternal(actorID, 0)
// Fetch from JetStream - use internal method that returns result
result, err := jes.getEventsWithErrorsInternal(actorID, 0)
if err != nil {
return 0, err
}
if len(events) == 0 {
if len(result.Events) == 0 {
return 0, nil
}
latestVersion := int64(0)
for _, event := range events {
for _, event := range result.Events {
if event.Version > latestVersion {
latestVersion = event.Version
}
@@ -162,13 +163,27 @@ func (jes *JetStreamEventStore) getLatestVersionLocked(actorID string) (int64, e
return latestVersion, nil
}
// GetEvents retrieves all events for an actor since a version
// GetEvents retrieves all events for an actor since a version.
// Note: This method silently skips malformed events for backward compatibility.
// Use GetEventsWithErrors to receive information about malformed events.
func (jes *JetStreamEventStore) GetEvents(actorID string, fromVersion int64) ([]*aether.Event, error) {
return jes.getEventsInternal(actorID, fromVersion)
result, err := jes.getEventsWithErrorsInternal(actorID, fromVersion)
if err != nil {
return nil, err
}
return result.Events, nil
}
// getEventsInternal is the internal implementation of GetEvents
func (jes *JetStreamEventStore) getEventsInternal(actorID string, fromVersion int64) ([]*aether.Event, error) {
// GetEventsWithErrors retrieves events for an actor and reports any malformed
// events encountered. This method allows callers to decide how to handle
// corrupted data rather than silently skipping it.
func (jes *JetStreamEventStore) GetEventsWithErrors(actorID string, fromVersion int64) (*aether.ReplayResult, error) {
return jes.getEventsWithErrorsInternal(actorID, fromVersion)
}
// getEventsWithErrorsInternal is the internal implementation that tracks both
// successfully parsed events and errors for malformed events.
func (jes *JetStreamEventStore) getEventsWithErrorsInternal(actorID string, fromVersion int64) (*aether.ReplayResult, error) {
// Create subject filter for this actor
subject := fmt.Sprintf("%s.events.%s.%s",
jes.streamName,
@@ -182,7 +197,10 @@ func (jes *JetStreamEventStore) getEventsInternal(actorID string, fromVersion in
}
defer consumer.Unsubscribe()
var events []*aether.Event
result := &aether.ReplayResult{
Events: make([]*aether.Event, 0),
Errors: make([]aether.ReplayError, 0),
}
// Fetch messages in batches
for {
@@ -197,12 +215,24 @@ func (jes *JetStreamEventStore) getEventsInternal(actorID string, fromVersion in
for _, msg := range msgs {
var event aether.Event
if err := json.Unmarshal(msg.Data, &event); err != nil {
continue // Skip malformed events
// Record the error with context instead of silently skipping
metadata, _ := msg.Metadata()
seqNum := uint64(0)
if metadata != nil {
seqNum = metadata.Sequence.Stream
}
result.Errors = append(result.Errors, aether.ReplayError{
SequenceNumber: seqNum,
RawData: msg.Data,
Err: err,
})
msg.Ack() // Still ack to prevent redelivery
continue
}
// Filter by version
if event.Version > fromVersion {
events = append(events, &event)
result.Events = append(result.Events, &event)
}
msg.Ack()
@@ -213,7 +243,7 @@ func (jes *JetStreamEventStore) getEventsInternal(actorID string, fromVersion in
}
}
return events, nil
return result, nil
}
// GetLatestVersion returns the latest version for an actor
@@ -316,3 +346,6 @@ func sanitizeSubject(s string) string {
s = strings.ReplaceAll(s, ">", "_")
return s
}
// Compile-time check that JetStreamEventStore implements EventStoreWithErrors
var _ aether.EventStoreWithErrors = (*JetStreamEventStore)(nil)