Transport Security

In a setup with several locations Eventuate uses Akka Remoting for communication between locations. Akka Remoting supports using TLS as transport protocol. In combination with mutual authentication (client and server) this allows to prevent that untrusted nodes can connect to a replication network. The configuration snippet listed here acts as an example for how to set this up. For more details see Configuring SSL/TLS for Akka Remoting.

akka {
  remote {
    enabled-transports = [akka.remote.netty.ssl]
    netty.ssl.security {
      key-store = "/path/to/keystore.jks"
      trust-store = "/path/to/keystore.jks"

      key-store-password = ...
      key-password = ...
      trust-store-password = ...

      protocol = "TLSv1.2"

      enabled-algorithms = ["TLS_DHE_RSA_WITH_AES_256_GCM_SHA384"]

      random-number-generator = "AES256CounterSecureRNG"
      require-mutual-authentication = on
    }
  }
}

This example uses a single file for key- and trust-store. Having a single self-signed certificate in this store suffices to establish mutually authenticated connections via TLS between locations. However a self-signed key makes replacing the certificate in a rolling upgrade tedious. The following script (derived from the scripts of the documentation of Lightbend’s SSL-Config library) creates a root certificate that is used to sign the certificate for client and server authentication. The root certificate is kept in the private keystore rootca.jks, which does not have to be deployed with the application. It is imported as trustedCertEntry into the keystore keystore.jks, which is the one referenced by the akka configuration.

#!/bin/bash -eu

export PW=<...>

CaAlias=rootca
CaKeystore=rootca.jks
CaExport=root.crt

CertAlias=serverandclient
CertKeystore=keystore.jks
CertSigningRequest=serverandclient.csr
CertSigned=serverandclient.crt

logStep() {
    echo
    echo '=========' "$@"
    echo
}

rm -f "$CaKeystore" "$CertKeystore" "$CaExport" "$CertSigningRequest" "$CertSigned"

logStep Create root CA in $CaKeystore
keytool -genkeypair \
  -alias "$CaAlias" \
  -dname "CN=CA" \
  -keystore "$CaKeystore" \
  -keypass:env PW \
  -storepass:env PW \
  -keyalg RSA \
  -keysize 4096 \
  -ext KeyUsage:critical="keyCertSign" \
  -ext BasicConstraints:critical="ca:true" \
  -validity 3700

logStep Export root CA to $CaExport for import into truststore
keytool -export \
  -alias "$CaAlias" \
  -file "$CaExport" \
  -keypass:env PW \
  -storepass:env PW \
  -keystore "$CaKeystore" \
  -rfc

logStep Make $CertKeystore trust CA as signer
keytool -import \
  -alias "$CaAlias" \
  -file "$CaExport" \
  -keystore "$CertKeystore" \
  -storepass:env PW << EOF
yes
EOF

logStep Create certificate for client and server auth in $CertKeystore
keytool -genkeypair \
  -alias "$CertAlias" \
  -dname "CN=Unknown" \
  -keystore "$CertKeystore" \
  -keypass:env PW \
  -storepass:env PW \
  -keyalg RSA \
  -keysize 2048 \
  -validity 385

logStep Create a certificate signing request: $CertSigningRequest
keytool -certreq \
  -alias "$CertAlias" \
  -keypass:env PW \
  -storepass:env PW \
  -keystore "$CertKeystore" \
  -file "$CertSigningRequest"

logStep Sign certificate with CA: $CertSigned
keytool -gencert \
  -alias "$CaAlias" \
  -keypass:env PW \
  -storepass:env PW \
  -keystore "$CaKeystore" \
  -infile "$CertSigningRequest" \
  -outfile "$CertSigned" \
  -ext KeyUsage:critical="digitalSignature,keyEncipherment" \
  -rfc

logStep Import signed certificate back into $CertKeystore
keytool -import \
  -alias "$CertAlias" \
  -file "$CertSigned" \
  -keystore "$CertKeystore" \
  -storepass:env PW

logStep List $CertKeystore
keytool -list \
  -keystore "$CertKeystore" \
  -storepass:env PW

If the certificate for client and server authentication shall be replaced, corresponding key-stores can be deployed location by location without the need to stop the entire replication network at once.

For debugging TLS connections please read Debugging SSL/TLS Connections from the java documentation.

Configuration

This is the reference configuration of Eventuate. It is processed by Typesafe’s config library and can be overridden by applications:

eventuate-core

akka {
  actor {
    serializers {
      eventuate-durable-event = "com.rbmhtechnology.eventuate.serializer.DelegatingDurableEventSerializer"
      eventuate-replication-filter = "com.rbmhtechnology.eventuate.serializer.ReplicationFilterSerializer"
      eventuate-replication-protocol = "com.rbmhtechnology.eventuate.serializer.ReplicationProtocolSerializer"
      eventuate-snapshot = "com.rbmhtechnology.eventuate.serializer.SnapshotSerializer"
    }

    serialization-bindings {
      "com.rbmhtechnology.eventuate.DurableEvent" = eventuate-durable-event
      "com.rbmhtechnology.eventuate.ReplicationFilter$Format" = eventuate-replication-filter
      "com.rbmhtechnology.eventuate.ReplicationProtocol$Format" = eventuate-replication-protocol
      "com.rbmhtechnology.eventuate.ConcurrentVersionsTree" = eventuate-snapshot
      "com.rbmhtechnology.eventuate.Snapshot" = eventuate-snapshot
      "com.rbmhtechnology.eventuate.log.EventLogClock" = eventuate-snapshot
    }
  }
}

eventuate {
  log {
    # Timeout for read operations from a local event log. These are batch
    # event reads during event replay made by event-sourced views, actors,
    # writers and processors and target log reads made by processors.
    read-timeout = 10s

    # Timeout for write operations to a local event log. These are batch
    # event writes made by event-sourced processors and replicators. This
    # timeout does not apply to event-sourced actor writes.
    write-timeout = 10s

    # Target batch size for writing events. It is used by the batching layer
    # to limit the size of event batches to be written atomically to an event
    # log. It also limits the size of event batches replicated from remote
    # source logs and to a local target log. Please note that this is not
    # a strict batch size limit. Event-sourced actors can still emit batches
    # of larger size (although it is very uncommon to emit that many events
    # per command).
    write-batch-size = 64

    # Maximum number of events to be replayed to an event-sourced view, actor,
    # writer or processor before replay is suspended. A suspended replay is
    # resumed automatically after the replayed event batch has been handled
    # (= replay backpressure).
    replay-batch-size = 4096

    # Maximum number of replay attempts before finally stopping the actor itself
    replay-retry-max = 10

    # Delay between consecutive replay attempts
    replay-retry-delay = 10s
  }

  log.circuit-breaker {
    # Number of write retries after which the circuit breaker should be opened.
    open-after-retries = 2
  }

  log.replication {
    # Event replication retry delay. Event replication is delayed for the
    # given duration if a previous transfer batch was empty or failed.
    retry-delay = 5s

    # Timeout for reading events from the remote source log.
    remote-read-timeout = 10s

    # Maximum number of events to scan in a remote event log per replication
    # read request.
    remote-scan-limit = 65536

    # Minimum duration of failed communication with a remote replication
    # endpoint required to consider that endpoint as unavailable.
    failure-detection-limit = 60s

    # If turned on, notifications about updates in a source event log are sent
    # to remote replication endpoints which reduces event replication latency.
    # The impact of sending update notifications on average replication latency
    # decreases with increasing event write load. Applications with high event
    # write load may even experience increased event replication throughput if
    # update notifications are turned off.
    update-notifications = on
  }

  log.recovery {
    # Maximum number of retries of remote operations. These are operations
    # that read from remote replication endpoints.
    remote-operation-retry-max = 3

    # Delay before re-trying a remote operation after a previous failure.
    remote-operation-retry-delay = 10s

    # Timeout of remote operations.
    remote-operation-timeout = 10s

    # Timeout for deleting invalidated snapshots.
    snapshot-deletion-timeout = 30s
  }

  log.dispatchers {
    write-dispatcher {
      executor = "thread-pool-executor"
      type = PinnedDispatcher
    }

    read-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
      fork-join-executor {
        parallelism-min = 2
        parallelism-max = 16
      }
    }
  }

  snapshot {
    # Timeout for loading a snapshot.
    load-timeout = 10m

    # Timeout for saving a snapshot.
    save-timeout = 10m
  }

  snapshot.filesystem {
    # Root directory for storing the snapshot files of individual emitters.
    dir = target

    # Maximum number of stored snapshots per emitter. If this number is
    # exceeded during a snapshot write, older snapshots are automatically
    # deleted.
    snapshots-per-emitter-max = 3

    write-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
      fork-join-executor {
        parallelism-min = 2
        parallelism-max = 8
      }
    }

    read-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
      fork-join-executor {
        parallelism-min = 2
        parallelism-max = 32
      }
    }
  }
}

eventuate-crdt

akka {
  actor {
    serializers {
      eventuate-crdt = "com.rbmhtechnology.eventuate.crdt.CRDTSerializer"
    }

    serialization-bindings {
      "com.rbmhtechnology.eventuate.crdt.CRDTFormat" = eventuate-crdt
    }
  }
}

# Timeout for CRDT read and write operations (incl. saving snapshots).
eventuate.crdt.operation-timeout = 10s

eventuate-log-cassandra

eventuate {
  log.cassandra {
    # Comma-separated list of contact points in the cluster (comma-separated
    # hostname or hostname:port list).
    contact-points = ["127.0.0.1"]

    # Default port of contact points in the cluster. Ports defined in
    # contact-points override this setting.
    default-port = 9042

    # Default Cassandra username
    username = "cassandra"

    # Default Cassandra password
    password = "cassandra"

    # Name of the keyspace created/used by Eventuate.
    keyspace = "eventuate"

    # Whether or not to auto-create the keyspace.
    keyspace-autocreate = true

    # Prefix of all tables created/used by Eventuate.
    table-prefix = "log"

    # Replication factor to use when creating the keyspace.
    replication-factor = 1

    # Maximum number of times a failed write should be retried until the event
    # log actor gives up and stops itself. The delay between write retries can
    # be configured with eventuate.log.write-timeout.
    write-retry-max = 65536

    # Write consistency level
    write-consistency = "QUORUM"

    # Read consistency level
    read-consistency = "QUORUM"

    # Maximum number of events stored per event log partition. Must be greater
    # than eventuate.log.write-batch-size.
    partition-size = 131072

    # Minimum number of new events that must have been written before another
    # index update is triggered.
    index-update-limit = 64

    # Maximum number event log initialization retries. Initialization includes
    # recovery of the current sequence number and version vector as well as
    # indexing of not yet indexed log entries.
    init-retry-max = 3

    # Delay between event log initialization retries. Initialization includes
    # recovery of the current sequence number and version vector as well as
    # indexing of not yet indexed log entries.
    init-retry-delay = 5s

    # Maximum number of initial connection retries to a Cassandra cluster.
    connect-retry-max = 3

    # Delay between initial connection retries to a Cassandra cluster.
    connect-retry-delay = 5s
  }

  log.dispatchers {
    write-dispatcher {
      executor = "thread-pool-executor"
      type = PinnedDispatcher
    }

    read-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
      fork-join-executor {
        parallelism-min = 2
        parallelism-max = 16
      }
    }
  }
}

eventuate-log-leveldb

eventuate {
  log.leveldb {
    # Root directory for storing the log directories of individual event logs.
    dir = target

    # Use fsync on write.
    fsync = on

    # Minimum number of new events that must have been written before another
    # snapshot of the log's internal state (sequence number and merged vector
    # time) is written.
    state-snapshot-limit = 128

    # Maximum number of events that are physically deleted in a single batch
    # operation.
    deletion-batch-size = 100

    # Delay between two tries to physically delete all requested events while
    # keeping those that are not yet replicated.
    deletion-retry-delay = 1m
  }
}