From 61d7578c895ddbba4d51858dcd1d2abeb6ad3c3c Mon Sep 17 00:00:00 2001 From: Samuel Laferriere Date: Fri, 8 Nov 2024 01:17:40 +0400 Subject: [PATCH] test(altda): add test for altda->ethda failover --- op-alt-da/damock.go | 17 +++++- op-e2e/e2eutils/geth/wait.go | 26 +++++++++ op-e2e/e2eutils/transactions/count.go | 18 ++++++- op-e2e/system/altda/concurrent_test.go | 2 +- op-e2e/system/altda/failover_test.go | 74 ++++++++++++++++++++++++++ op-e2e/system/da/multi_test.go | 2 +- op-e2e/system/e2esys/setup.go | 34 +++++++----- 7 files changed, 155 insertions(+), 18 deletions(-) create mode 100644 op-e2e/system/altda/failover_test.go diff --git a/op-alt-da/damock.go b/op-alt-da/damock.go index ad388d0b26535..8d2f918968bc5 100644 --- a/op-alt-da/damock.go +++ b/op-alt-da/damock.go @@ -105,12 +105,16 @@ func (d *AltDADisabled) AdvanceL1Origin(ctx context.Context, l1 L1Fetcher, block } // FakeDAServer is a fake DA server for e2e tests. -// It is a small wrapper around DAServer that allows for setting request latencies, -// to mimic a DA service with slow responses (eg. eigenDA with 10 min batching interval). +// It is a small wrapper around DAServer that allows for setting: +// - request latencies, to mimic a DA service with slow responses +// (eg. eigenDA with 10 min batching interval). +// - response status codes, to mimic a DA service that is down. type FakeDAServer struct { *DAServer putRequestLatency time.Duration getRequestLatency time.Duration + // next failoverCount Put requests will return 503 status code for failover testing + failoverCount uint64 } func NewFakeDAServer(host string, port int, log log.Logger) *FakeDAServer { @@ -130,6 +134,10 @@ func (s *FakeDAServer) HandleGet(w http.ResponseWriter, r *http.Request) { func (s *FakeDAServer) HandlePut(w http.ResponseWriter, r *http.Request) { time.Sleep(s.putRequestLatency) + if s.failoverCount > 0 { + w.WriteHeader(http.StatusServiceUnavailable) + s.failoverCount-- + } s.DAServer.HandlePut(w, r) } @@ -154,6 +162,11 @@ func (s *FakeDAServer) SetGetRequestLatency(latency time.Duration) { s.getRequestLatency = latency } +// SetResponseStatusForNRequests sets the next n Put requests to return 503 status code. +func (s *FakeDAServer) SetPutFailoverForNRequests(n uint64) { + s.failoverCount = uint64(n) +} + type MemStore struct { db map[string][]byte lock sync.RWMutex diff --git a/op-e2e/e2eutils/geth/wait.go b/op-e2e/e2eutils/geth/wait.go index 8356058afda75..17c6f16226ca7 100644 --- a/op-e2e/e2eutils/geth/wait.go +++ b/op-e2e/e2eutils/geth/wait.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/transactions" "github.com/ethereum-optimism/optimism/op-node/rollup" "github.com/ethereum-optimism/optimism/op-node/rollup/derive" "github.com/ethereum/go-ethereum" @@ -86,6 +87,31 @@ func WaitForTransaction(hash common.Hash, client *ethclient.Client, timeout time } } +// WaitForBlockWithTxFromSender waits for a block with a transaction from a specific sender address. +// It starts from the current block and checks the next nBlocks blocks. +func WaitForBlockWithTxFromSender(sender common.Address, client *ethclient.Client, nBlocks uint64) (*types.Block, error) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + blockNum, err := client.BlockNumber(ctx) + if err != nil { + return nil, err + } + for blockNum := blockNum; blockNum < blockNum+nBlocks; blockNum++ { + blockL1, err := WaitForBlock(big.NewInt(0).SetUint64(blockNum), client) + if err != nil { + return nil, err + } + batcherTxCount, err := transactions.TransactionsBySenderCount(blockL1, sender) + if err != nil { + return nil, err + } + if batcherTxCount > 0 { + return blockL1, nil + } + } + return nil, fmt.Errorf("no block with tx from sender %s found in the last %d blocks", sender.Hex(), nBlocks) +} + type waitForBlockOptions struct { noChangeTimeout time.Duration absoluteTimeout time.Duration diff --git a/op-e2e/e2eutils/transactions/count.go b/op-e2e/e2eutils/transactions/count.go index 0f4d41fe04786..7f9f05c2857f5 100644 --- a/op-e2e/e2eutils/transactions/count.go +++ b/op-e2e/e2eutils/transactions/count.go @@ -5,7 +5,8 @@ import ( "github.com/ethereum/go-ethereum/core/types" ) -func TransactionsBySender(block *types.Block, sender common.Address) (int64, error) { +// TransactionsBySenderCount returns the number of transactions in the block that were sent by the given sender. +func TransactionsBySenderCount(block *types.Block, sender common.Address) (int64, error) { txCount := int64(0) for _, tx := range block.Transactions() { signer := types.NewCancunSigner(tx.ChainId()) @@ -19,3 +20,18 @@ func TransactionsBySender(block *types.Block, sender common.Address) (int64, err } return txCount, nil } + +func TransactionsBySender(block *types.Block, sender common.Address) ([]*types.Transaction, error) { + txs := make([]*types.Transaction, 0) + for _, tx := range block.Transactions() { + signer := types.NewCancunSigner(tx.ChainId()) + txSender, err := types.Sender(signer, tx) + if err != nil { + return nil, err + } + if txSender == sender { + txs = append(txs, tx) + } + } + return txs, nil +} diff --git a/op-e2e/system/altda/concurrent_test.go b/op-e2e/system/altda/concurrent_test.go index 32506e5c4a102..cd6dcce911ede 100644 --- a/op-e2e/system/altda/concurrent_test.go +++ b/op-e2e/system/altda/concurrent_test.go @@ -63,7 +63,7 @@ func TestBatcherConcurrentAltDARequests(t *testing.T) { require.NoError(t, err, "Waiting for l1 blocks") // there are possibly other services (proposer/challenger) in the background sending txs // so we only count the batcher txs - batcherTxCount, err := transactions.TransactionsBySender(block, cfg.DeployConfig.BatchSenderAddress) + batcherTxCount, err := transactions.TransactionsBySenderCount(block, cfg.DeployConfig.BatchSenderAddress) require.NoError(t, err) if batcherTxCount > 1 { return diff --git a/op-e2e/system/altda/failover_test.go b/op-e2e/system/altda/failover_test.go new file mode 100644 index 0000000000000..9ca522c8345e6 --- /dev/null +++ b/op-e2e/system/altda/failover_test.go @@ -0,0 +1,74 @@ +package altda + +import ( + "math/big" + "testing" + + op_e2e "github.com/ethereum-optimism/optimism/op-e2e" + "github.com/ethereum-optimism/optimism/op-node/rollup/derive" + "github.com/ethereum/go-ethereum/log" + + "github.com/ethereum-optimism/optimism/op-batcher/flags" + "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/geth" + "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/transactions" + "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" + "github.com/stretchr/testify/require" +) + +// TestBatcher_FailoverToEthDA_FallbackToAltDA tests that the batcher will failover to ethDA +// if the da-server returns 503, and then fallback to altDA once altDA is available again +// (i.e. the da-server doesn't return 503 anymore). +func TestBatcher_FailoverToEthDA_FallbackToAltDA(t *testing.T) { + op_e2e.InitParallel(t) + + nChannelsFailover := uint64(2) + + cfg := e2esys.DefaultSystemConfig(t, e2esys.WithLogLevel(log.LevelCrit)) + cfg.DeployConfig.UseAltDA = true + // With these settings, the batcher will post a single commitment per L1 block, + // so it's easy to trigger failover and observe the commitment changing on the next L1 block. + cfg.BatcherMaxPendingTransactions = 1 // no limit on parallel txs + cfg.BatcherMaxConcurrentDARequest = 1 + cfg.BatcherBatchType = 0 + // We make channels as small as possible, such that they contain a single commitment. + // This is because failover to ethDA happens on a per-channel basis (each new channel is sent to altDA first). + // Hence, we can quickly observe the failover (to ethda) and fallback (to altda) behavior. + // cfg.BatcherMaxL1TxSizeBytes = 1200 + // currently altda commitments can only be sent as calldata + cfg.DataAvailabilityType = flags.CalldataType + + sys, err := cfg.Start(t) + require.NoError(t, err, "Error starting up system") + defer sys.Close() + l1Client := sys.NodeClient("l1") + + startBlockL1, err := geth.WaitForBlockWithTxFromSender(cfg.DeployConfig.BatchSenderAddress, l1Client, 10) + require.NoError(t, err) + + // Simulate altda server returning 503 + sys.FakeAltDAServer.SetPutFailoverForNRequests(nChannelsFailover) + + countEthDACommitment := uint64(0) + + // Most likely, sequence of blocks will be: altDA, ethDA, ethDA, altDA, altDA, altDA. + for blockNumL1 := startBlockL1.NumberU64(); blockNumL1 < startBlockL1.NumberU64()+6; blockNumL1++ { + blockL1, err := geth.WaitForBlock(big.NewInt(0).SetUint64(blockNumL1), l1Client) + require.NoError(t, err) + batcherTxs, err := transactions.TransactionsBySender(blockL1, cfg.DeployConfig.BatchSenderAddress) + require.NoError(t, err) + require.Equal(t, 1, len(batcherTxs)) // sanity check: ensure BatcherMaxPendingTransactions=1 is working + batcherTx := batcherTxs[0] + if batcherTx.Data()[0] == 1 { + t.Log("blockL1", blockNumL1, "batcherTxType", "altda") + } else if batcherTx.Data()[0] == 0 { + t.Log("blockL1", blockNumL1, "batcherTxType", "ethda") + } else { + t.Fatalf("unexpected batcherTxType: %v", batcherTx.Data()[0]) + } + if batcherTx.Data()[0] == byte(derive.DerivationVersion0) { + countEthDACommitment++ + } + } + require.Equal(t, nChannelsFailover, countEthDACommitment, "Expected %v ethDA commitments, got %v", nChannelsFailover, countEthDACommitment) + +} diff --git a/op-e2e/system/da/multi_test.go b/op-e2e/system/da/multi_test.go index 3d150010a0f38..da503f33b16b3 100644 --- a/op-e2e/system/da/multi_test.go +++ b/op-e2e/system/da/multi_test.go @@ -51,7 +51,7 @@ func TestBatcherMultiTx(t *testing.T) { require.NoError(t, err, "Waiting for l1 blocks") // there are possibly other services (proposer/challenger) in the background sending txs // so we only count the batcher txs - batcherTxCount, err := transactions.TransactionsBySender(block, cfg.DeployConfig.BatchSenderAddress) + batcherTxCount, err := transactions.TransactionsBySenderCount(block, cfg.DeployConfig.BatchSenderAddress) require.NoError(t, err) totalBatcherTxsCount += int64(batcherTxCount) diff --git a/op-e2e/system/e2esys/setup.go b/op-e2e/system/e2esys/setup.go index 5d046b3c649c3..1f368cb49b29f 100644 --- a/op-e2e/system/e2esys/setup.go +++ b/op-e2e/system/e2esys/setup.go @@ -6,6 +6,7 @@ import ( "crypto/rand" "errors" "fmt" + "log/slog" "math/big" "net" "os" @@ -85,6 +86,7 @@ var ( type SystemConfigOpts struct { AllocType config.AllocType + LogLevel slog.Level } type SystemConfigOpt func(s *SystemConfigOpts) @@ -95,9 +97,16 @@ func WithAllocType(allocType config.AllocType) SystemConfigOpt { } } +func WithLogLevel(level slog.Level) SystemConfigOpt { + return func(s *SystemConfigOpts) { + s.LogLevel = level + } +} + func DefaultSystemConfig(t testing.TB, opts ...SystemConfigOpt) SystemConfig { sco := &SystemConfigOpts{ AllocType: config.DefaultAllocType, + LogLevel: slog.LevelInfo, } for _, opt := range opts { opt(sco) @@ -108,7 +117,7 @@ func DefaultSystemConfig(t testing.TB, opts ...SystemConfigOpt) SystemConfig { deployConfig := config.DeployConfig(sco.AllocType) deployConfig.L1GenesisBlockTimestamp = hexutil.Uint64(time.Now().Unix()) e2eutils.ApplyDeployConfigForks(deployConfig) - require.NoError(t, deployConfig.Check(testlog.Logger(t, log.LevelInfo)), + require.NoError(t, deployConfig.Check(testlog.Logger(t, sco.LogLevel).New("role", "config-check")), "Deploy config is invalid, do you need to run make devnet-allocs?") l1Deployments := config.L1Deployments(sco.AllocType) require.NoError(t, l1Deployments.Check(deployConfig)) @@ -170,11 +179,12 @@ func DefaultSystemConfig(t testing.TB, opts ...SystemConfigOpt) SystemConfig { }, }, Loggers: map[string]log.Logger{ - RoleVerif: testlog.Logger(t, log.LevelInfo).New("role", RoleVerif), - RoleSeq: testlog.Logger(t, log.LevelInfo).New("role", RoleSeq), - "batcher": testlog.Logger(t, log.LevelInfo).New("role", "batcher"), - "proposer": testlog.Logger(t, log.LevelInfo).New("role", "proposer"), - "da-server": testlog.Logger(t, log.LevelInfo).New("role", "da-server"), + RoleVerif: testlog.Logger(t, sco.LogLevel).New("role", RoleVerif), + RoleSeq: testlog.Logger(t, sco.LogLevel).New("role", RoleSeq), + "batcher": testlog.Logger(t, sco.LogLevel).New("role", "batcher"), + "proposer": testlog.Logger(t, sco.LogLevel).New("role", "proposer"), + "da-server": testlog.Logger(t, sco.LogLevel).New("role", "da-server"), + "config-check": testlog.Logger(t, sco.LogLevel).New("role", "config-check"), }, GethOptions: map[string][]geth.GethOption{}, P2PTopology: nil, // no P2P connectivity by default @@ -265,12 +275,10 @@ type SystemConfig struct { // L1FinalizedDistance is the distance from the L1 head that L1 blocks will be artificially finalized on. L1FinalizedDistance uint64 - Premine map[common.Address]*big.Int - Nodes map[string]*rollupNode.Config // Per node config. Don't use populate rollup.Config - Loggers map[string]log.Logger - GethOptions map[string][]geth.GethOption - ProposerLogger log.Logger - BatcherLogger log.Logger + Premine map[common.Address]*big.Int + Nodes map[string]*rollupNode.Config // Per node config. Don't use populate rollup.Config + Loggers map[string]log.Logger + GethOptions map[string][]geth.GethOption ExternalL2Shim string @@ -519,7 +527,7 @@ func (cfg SystemConfig) Start(t *testing.T, startOpts ...StartOption) (*System, c = sys.TimeTravelClock } - if err := cfg.DeployConfig.Check(testlog.Logger(t, log.LevelInfo)); err != nil { + if err := cfg.DeployConfig.Check(cfg.Loggers["config-check"]); err != nil { return nil, err }