#include "buildboxcommonmetrics_gaugemetricvalue.h"
#include <gtest/gtest.h>

#include <google/devtools/remoteworkers/v1test2/bots_mock.grpc.pb.h>

#include <buildboxcommon_digestgenerator.h>
#include <buildboxcommon_grpctestserver.h>
#include <buildboxcommon_protos.h>
#include <buildboxcommonmetrics_durationmetrictimer.h>
#include <buildboxcommonmetrics_testingutils.h>
#include <buildboxworker_expiretime.h>
#include <buildboxworker_metricnames.h>
#include <buildboxworker_worker.h>

#include <algorithm>
#include <signal.h>
#include <system_error>
#include <thread>
#include <vector>

namespace proto {
using namespace google::bytestream;
using namespace google::devtools::remoteworkers::v1test2;
using namespace build::bazel::remote::execution::v2;
} // namespace proto

using buildboxcommon::buildboxcommonmetrics::collectedByName;
using buildboxcommon::buildboxcommonmetrics::collectedByNameWithValue;
using buildboxcommon::buildboxcommonmetrics::DurationMetricValue;
using buildboxcommon::buildboxcommonmetrics::GaugeMetricValue;
using buildboxworker::Worker;
using google::devtools::remoteworkers::v1test2::MockBotsStub;

using namespace testing;

namespace {
const auto digestFunctionInitializer = []() {
    buildboxcommon::DigestGenerator::init();
    return 0;
}();
} // namespace

struct ActionTestCase {
    proto::Action action;
    proto::Command command;
    proto::ActionResult result;
};

class WorkerTestFixture : public Worker, public ::testing::Test {
  public:
    WorkerTestFixture()
    {
        d_logLevel = "debug";
        d_maxConcurrentJobs = 1;
        d_stopAfterJobs = -1;
        d_maxWaitTime = std::chrono::minutes(10);

        // Set path to test runner
        d_runnerCommand = getenv("BUILDBOX_RUN");

        // Ignore SIGPIPE in case of using sockets + grpc without MSG_NOSIGNAL
        // support configured
        struct sigaction sa = {};
        sa.sa_handler = SIG_IGN;
        sigemptyset(&sa.sa_mask);
        sa.sa_flags = 0;
        if (sigaction(SIGPIPE, &sa, nullptr) < 0) {
            throw std::system_error(errno, std::system_category(),
                                    "Unable to ignore SIGPIPE");
        }
    }
    void addFakeJob(const std::string &jobString)
    {
        d_activeJobs.insert(jobString);
    }
    void upsertSingleLease(const proto::Lease &lease)
    {
        std::lock_guard<std::mutex> lock(d_sessionMutex);
        d_maxConcurrentJobs = 1;
        d_stopAfterJobs = 1;
        if (d_session.leases_size() == 0) {
            d_session.add_leases()->CopyFrom(lease);
        }
        else {
            *d_session.mutable_leases(0) = lease;
        }
    }
    void upsertMultipleLeases(const std::vector<proto::Lease> &leases)
    {
        std::lock_guard<std::mutex> lock(d_sessionMutex);
        d_session.mutable_leases()->Clear();

        for (const auto &lease : leases) {
            d_session.add_leases()->CopyFrom(lease);
        }
    }
    void runSingleLease(const proto::Lease &lease,
                        proto::Lease *completedLease, int processTimes = 0)
    {

        upsertSingleLease(lease);
        if (processTimes == 0) {
            initLocalExecutionClient();
            runWorkerWithoutBotSession();
        }
        else {
            bool skipPollDelay = true;
            for (int i = 0; i < processTimes; i++) {
                processLeases(&skipPollDelay);
            }
        }
        {
            std::lock_guard<std::mutex> lock(d_sessionMutex);
            EXPECT_EQ(d_session.leases_size(), 1);
            completedLease->CopyFrom(d_session.leases(0));
        }
    }
    void runMultipleLeases(const std::vector<proto::Lease> &leases,
                           std::vector<proto::Lease> &completedLeases,
                           int processTimes = 0)
    {
        upsertMultipleLeases(leases);
        if (processTimes == 0) {
            initLocalExecutionClient();
            runWorkerWithoutBotSession();
        }
        else {
            bool skipPollDelay = true;
            for (int i = 0; i < processTimes; i++) {
                processLeases(&skipPollDelay);
            }
        }
        {
            std::lock_guard<std::mutex> lock(d_sessionMutex);
            EXPECT_EQ(d_session.leases_size(), leases.size());

            const int num_leases = static_cast<int>(leases.size());
            for (int i = 0; i < num_leases; i++) {
                completedLeases[i].CopyFrom(d_session.leases(i));
            }
        }
    }
    void setBotStatus(const proto::BotStatus newStatus)
    {
        d_botStatus = newStatus;
    }

    google::rpc::Status runAction(proto::Action *action,
                                  const proto::Command &command,
                                  proto::ActionResult *actionResult,
                                  bool cancel = false)
    {
        const auto commandDigest =
            buildboxcommon::DigestGenerator::hash(command);
        action->mutable_command_digest()->CopyFrom(commandDigest);

        buildboxcommon::GrpcTestServer testServer;
        std::thread serverHandler([&]() {
            buildboxcommon::GrpcTestServerContext ctxCap(
                &testServer, "/build.bazel.remote.execution.v2.Capabilities/"
                             "GetCapabilities");
            ctxCap.finish(grpc::Status(grpc::UNIMPLEMENTED, "Unimplemented"));

            proto::ReadRequest expectedReadRequest;
            proto::ReadResponse readResponse;
            expectedReadRequest.set_resource_name(
                "blobs/" + commandDigest.hash() + "/" +
                std::to_string(commandDigest.size_bytes()));
            readResponse.set_data(command.SerializeAsString());
            buildboxcommon::GrpcTestServerContext ctx(
                &testServer, "/google.bytestream.ByteStream/Read");
            ctx.read(expectedReadRequest);
            ctx.writeAndFinish(readResponse);
        });

        this->d_casServer.setUrl(testServer.url());
        this->d_extraRunArgs.emplace_back("--no-logs-capture");

        this->initLocalExecutionClient();

        proto::Lease updatedLease;

        proto::Lease lease;
        lease.set_id("test-lease");
        lease.mutable_payload()->PackFrom(*action);
        lease.set_state(proto::LeaseState::PENDING);

        try {
            if (cancel) {
                // 2 passes of processing, first: pending -> waiting ack,
                // second: waiting ack -> scheduled
                this->runSingleLease(lease, &updatedLease, 2);
                lease.set_state(proto::LeaseState::CANCELLED);
                // Sleep until the subprocess is created
                // TODO: refactor the code so a real subprocess doesn't have to
                // be created when testing the worker class e.g. create a new
                // abstraction layer to manage the subprocesses instead of
                // using a plain detached thread and use the mock in unit tests
                std::this_thread::sleep_for(std::chrono::seconds(2));
                this->runSingleLease(lease, &updatedLease, 1);
                EXPECT_EQ(updatedLease.state(), proto::LeaseState::CANCELLED);
                EXPECT_EQ(d_activeJobsToOperations.size(), 0);
            }
            else {
                this->runSingleLease(lease, &updatedLease);
                EXPECT_EQ(updatedLease.state(), proto::LeaseState::COMPLETED);
                EXPECT_TRUE(updatedLease.result().UnpackTo(actionResult));
            }
        }
        catch (...) {
            serverHandler.join();
            throw;
        }
        serverHandler.join();

        // Wait for all worker threads to complete
        // (matching graceful shutdown of real worker)
        std::unique_lock<std::mutex> lock(this->d_sessionMutex);
        while (this->d_detachedThreadCount > 0) {
            this->d_sessionCondition.wait_for(lock, this->s_defaultWaitTime);
        }

        return updatedLease.status();
    }

    std::vector<google::rpc::Status>
    runMultipleActions(std::vector<ActionTestCase> &testCases,
                       bool cancel = false)
    {
        // Set the command digests for all actions
        for (auto &[action, command, _] : testCases) {
            const auto commandDigest =
                buildboxcommon::DigestGenerator::hash(command);
            action.mutable_command_digest()->CopyFrom(commandDigest);
        }

        // Setup mock server to serve commands
        buildboxcommon::GrpcGenericTestServer testServer;
        testServer.d_ctx
            ->registerMethod<proto::ReadRequest, proto::ReadResponse>(
                "/google.bytestream.ByteStream/Read");

        for (const auto &[action, command, _] : testCases) {
            proto::ReadRequest expectedReadRequest;
            proto::ReadResponse readResponse;
            expectedReadRequest.set_resource_name(
                "blobs/" + action.command_digest().hash() + "/" +
                std::to_string(action.command_digest().size_bytes()));
            readResponse.set_data(command.SerializeAsString());

            testServer.d_ctx
                ->addMockMethodCall<proto::ReadRequest, proto::ReadResponse>(
                    "/google.bytestream.ByteStream/Read", expectedReadRequest,
                    readResponse);
        }

        this->d_casServer.setUrl(testServer.url());
        this->d_extraRunArgs.emplace_back("--no-logs-capture");

        testServer.startAsyncServer();

        this->initLocalExecutionClient();

        std::vector<proto::Lease> leases;
        std::vector<proto::Lease> updatedLeases;
        int lease_num = 0;

        // Create leases
        for (const auto &[action, command, _] : testCases) {
            proto::Lease updatedLease;
            proto::Lease lease;

            lease.set_id("test-lease" + std::to_string(lease_num++));
            lease.mutable_payload()->PackFrom(action);
            lease.set_state(proto::LeaseState::PENDING);

            leases.push_back(std::move(lease));
            updatedLeases.push_back(std::move(updatedLease));
        }

        try {
            if (cancel) {
                this->runMultipleLeases(leases, updatedLeases, 2);
                for (size_t i = 0; i < leases.size(); ++i) {
                    leases[i].set_state(proto::LeaseState::CANCELLED);
                }

                std::this_thread::sleep_for(std::chrono::seconds(2));
                this->runMultipleLeases(leases, updatedLeases, 1);
                for (const auto &updatedLease : updatedLeases) {
                    EXPECT_EQ(updatedLease.state(),
                              proto::LeaseState::CANCELLED);
                }

                EXPECT_EQ(d_activeJobsToOperations.size(), 0);
            }
            else {
                this->runMultipleLeases(leases, updatedLeases);

                // Populate test cases with results
                for (size_t i = 0; i < updatedLeases.size(); ++i) {
                    if (updatedLeases[i].state() ==
                        proto::LeaseState::COMPLETED) {
                        EXPECT_TRUE(updatedLeases[i].result().UnpackTo(
                            &testCases[i].result));
                    }
                }
            }
        }
        catch (...) {
            testServer.stopServer();
            throw;
        }

        // Wait for all worker threads to complete
        // (matching graceful shutdown of real worker)
        std::unique_lock<std::mutex> lock(this->d_sessionMutex);
        while (this->d_detachedThreadCount > 0) {
            this->d_sessionCondition.wait_for(lock, this->s_defaultWaitTime);
        }

        testServer.stopServer();

        std::vector<google::rpc::Status> statuses;
        statuses.reserve(updatedLeases.size());
        for (const auto &updatedLease : updatedLeases) {
            statuses.push_back(updatedLease.status());
        }
        return statuses;
    }
};

// Test that even if a long deadline is set with no work given
// the worker returned a short wait time
TEST_F(WorkerTestFixture, WaitTimeNoJobs)
{
    const auto currentTime = std::chrono::system_clock::now();
    const auto excessive_wait_time =
        std::chrono::duration_cast<std::chrono::seconds>(2 *
                                                         this->d_maxWaitTime);

    const auto expectedWaitTime = currentTime + this->s_defaultWaitTime;

    this->d_session.mutable_expire_time()->set_seconds(
        std::chrono::system_clock::to_time_t(currentTime) +
        excessive_wait_time.count());

    ASSERT_TRUE(this->d_session.has_expire_time());
    ASSERT_EQ(this->calculateWaitTime(currentTime), expectedWaitTime);
}

// If the bot is in a non-ok status, wait time should be respected
// even if the bot has no work.
TEST_F(WorkerTestFixture, WaitTimeNoJobsUnhealthyBot)
{
    int64_t NANOS_PER_MICRO = 1000;

    // Very specifically calculate the expire time
    // to avoid rounding errors
    const auto currentTime =
        std::chrono::time_point_cast<std::chrono::microseconds>(
            std::chrono::system_clock::now());
    auto deadline = currentTime + this->d_maxWaitTime;
    const auto deadlineSeconds =
        std::chrono::duration_cast<std::chrono::seconds>(
            deadline.time_since_epoch());
    deadline -= deadlineSeconds;
    const auto deadlineRemainder =
        std::chrono::duration_cast<std::chrono::microseconds>(
            deadline.time_since_epoch());

    // Set the expire time in the seconds/nanos format protobuf expects
    this->d_session.mutable_expire_time()->set_seconds(
        deadlineSeconds.count());
    this->d_session.mutable_expire_time()->set_nanos(
        static_cast<int32_t>(deadlineRemainder.count() * NANOS_PER_MICRO));

    // Calculate the expected reduction in duration
    const auto factor = static_cast<int64_t>(
        static_cast<float>(this->d_maxWaitTime.count()) *
        buildboxworker::ExpireTime::updateTimeoutPaddingFactor());

    const auto expectedExpireTime =
        currentTime +
        (this->d_maxWaitTime - std::chrono::microseconds(factor));

    // Set the bot as Unhealthy to make it respect the given expire_time
    // even when there's no assigned work
    setBotStatus(proto::BotStatus::UNHEALTHY);
    ASSERT_TRUE(this->d_session.has_expire_time());
    ASSERT_EQ(this->calculateWaitTime(currentTime), expectedExpireTime);
}

TEST_F(WorkerTestFixture, DefaultWaitTime)
{
    this->addFakeJob("testjob");
    const auto currentTime = std::chrono::system_clock::now();
    const auto expectedWaitTime = currentTime + this->s_defaultWaitTime;

    ASSERT_FALSE(this->d_session.has_expire_time());
    ASSERT_EQ(this->calculateWaitTime(currentTime), expectedWaitTime);
}

TEST_F(WorkerTestFixture, WaitTimeSaturates)
{
    this->addFakeJob("testjob");
    const auto currentTime = std::chrono::system_clock::now();
    const auto excessive_wait_time =
        std::chrono::duration_cast<std::chrono::seconds>(2 *
                                                         this->d_maxWaitTime);
    const auto expectedWaitTime = currentTime + this->d_maxWaitTime;

    this->d_session.mutable_expire_time()->set_seconds(
        std::chrono::system_clock::to_time_t(currentTime) +
        excessive_wait_time.count());

    ASSERT_TRUE(this->d_session.has_expire_time());
    ASSERT_EQ(this->calculateWaitTime(currentTime), expectedWaitTime);
}

TEST_F(WorkerTestFixture, WaitTimePercentage)
{
    int64_t NANOS_PER_MICRO = 1000;

    this->addFakeJob("testjob");
    // Use a starting time with microsecond precision to prevent rounding
    // errors in test as we use `microseconds` throughout
    const auto currentTime =
        std::chrono::time_point_cast<std::chrono::microseconds>(
            std::chrono::system_clock::now());

    // Calculate the deadline in the protobuf format (seconds / nanosecond
    // remainder)
    auto deadline = currentTime + this->d_maxWaitTime;
    const auto deadlineSeconds =
        std::chrono::duration_cast<std::chrono::seconds>(
            deadline.time_since_epoch());
    deadline -= deadlineSeconds;
    const auto deadlineRemainder =
        std::chrono::duration_cast<std::chrono::microseconds>(
            deadline.time_since_epoch());

    this->d_session.mutable_expire_time()->set_seconds(
        deadlineSeconds.count());
    this->d_session.mutable_expire_time()->set_nanos(
        static_cast<int32_t>(deadlineRemainder.count() * NANOS_PER_MICRO));

    // Calculate the expected reduction in duration
    const auto factor = static_cast<int64_t>(
        static_cast<float>(this->d_maxWaitTime.count()) *
        buildboxworker::ExpireTime::updateTimeoutPaddingFactor());

    const auto expectedExpireTime =
        currentTime +
        (this->d_maxWaitTime - std::chrono::microseconds(factor));

    ASSERT_TRUE(this->d_session.has_expire_time());
    ASSERT_EQ(this->calculateWaitTime(currentTime), expectedExpireTime);
}

proto::Command createSleepCommand(int time)
{
    proto::Command command;
    command.add_arguments("/usr/bin/env");
    command.add_arguments("sleep");
    command.add_arguments(std::to_string(time));
    return command;
}

proto::Command createSuccessCommand()
{
    proto::Command command;
    command.add_arguments("/usr/bin/env");
    command.add_arguments("true");
    return command;
}

proto::Command createFailureCommand()
{
    proto::Command command;
    command.add_arguments("/usr/bin/env");
    command.add_arguments("false");
    return command;
}

TEST_F(WorkerTestFixture, RunMultipleLeases)
{
    ActionTestCase testCase1;
    testCase1.command = createSleepCommand(1);

    ActionTestCase testCase2;
    testCase2.command = createSleepCommand(1);

    ActionTestCase testCase3;
    testCase3.action.mutable_timeout()->CopyFrom(
        google::protobuf::util::TimeUtil::SecondsToDuration(1));
    testCase3.command = createSleepCommand(10);

    d_maxConcurrentJobs = 3;
    d_stopAfterJobs = 3;

    std::vector<ActionTestCase> testCases = {testCase1, testCase2, testCase3};
    auto statuses = runMultipleActions(testCases);

    EXPECT_EQ(statuses[0].code(), grpc::StatusCode::OK);
    EXPECT_EQ(statuses[1].code(), grpc::StatusCode::OK);
    EXPECT_EQ(statuses[2].code(), grpc::StatusCode::DEADLINE_EXCEEDED);

    EXPECT_EQ(testCases[0].result.exit_code(), 0);
    EXPECT_EQ(testCases[1].result.exit_code(), 0);

    ASSERT_TRUE(collectedByName<DurationMetricValue>(
        buildboxworker::MetricNames::TIMER_NAME_EXECUTE_ACTION));
}

TEST_F(WorkerTestFixture, RunMultipleLeasesThenCancel)
{
    ActionTestCase testCase1;
    testCase1.command = createSleepCommand(100);

    ActionTestCase testCase2;
    testCase2.command = createSleepCommand(101);

    d_maxConcurrentJobs = 2;
    d_stopAfterJobs = 2;

    std::vector<ActionTestCase> testCases = {testCase1, testCase2};
    auto statuses = runMultipleActions(testCases, true);

    EXPECT_EQ(statuses[0].code(), grpc::StatusCode::CANCELLED);
    EXPECT_EQ(statuses[1].code(), grpc::StatusCode::CANCELLED);

    EXPECT_EQ(testCases[0].result.exit_code(), 0);
    EXPECT_EQ(testCases[1].result.exit_code(), 0);
}

TEST_F(WorkerTestFixture, RunMultipleLeasesCollectRunningJobsMetric)
{
    ActionTestCase testCase1;
    testCase1.action.mutable_timeout()->CopyFrom(
        google::protobuf::util::TimeUtil::SecondsToDuration(10));
    testCase1.command = createSleepCommand(3);

    ActionTestCase testCase2;
    testCase2.action.mutable_timeout()->CopyFrom(
        google::protobuf::util::TimeUtil::SecondsToDuration(10));
    testCase2.command = createSleepCommand(3);

    d_maxConcurrentJobs = 2;
    d_stopAfterJobs = 2;

    std::jthread t([&]() {
        std::vector<ActionTestCase> testCases = {testCase1, testCase2};
        auto statuses = runMultipleActions(testCases);

        EXPECT_EQ(statuses[0].code(), grpc::StatusCode::OK);
        EXPECT_EQ(statuses[1].code(), grpc::StatusCode::OK);

        EXPECT_EQ(testCases[0].result.exit_code(), 0);
        EXPECT_EQ(testCases[1].result.exit_code(), 0);
    });

    // Sleep to allow jobs to start running
    std::this_thread::sleep_for(std::chrono::seconds(1));

    // Notes: since this is an aggregate metric, the metric collection
    // framework is not able to collect it in time before the test ends, unless
    // we run the actions in a separate thread and wait here for a bit.
    ASSERT_TRUE(collectedByNameWithValue<GaugeMetricValue>(
        buildboxworker::MetricNames::GAUGE_NUM_JOBS_RUNNING_ON_WORKER,
        GaugeMetricValue(2)));
}

TEST_F(WorkerTestFixture, ConsecutiveErrorCountingMultipleLeases)
{
    ActionTestCase testCase1;
    testCase1.command = createFailureCommand();

    ActionTestCase testCase2;
    testCase2.command = createFailureCommand();

    ActionTestCase testCase3;
    testCase3.command = createFailureCommand();

    EXPECT_FALSE(d_markedUnhealthyByErrors);

    d_maxConcurrentJobs = 3;
    d_stopAfterJobs = 3;
    d_consecutiveErrorsThreshold = 3;

    std::vector<ActionTestCase> testCases = {testCase1, testCase2, testCase3};
    auto statuses = runMultipleActions(testCases);

    EXPECT_TRUE(d_markedUnhealthyByErrors);
    EXPECT_EQ(d_consecutiveErrorCount, 3);

    for (const auto &status : statuses) {
        EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    }

    // All should have failed
    for (const auto &testCase : testCases) {
        EXPECT_EQ(testCase.result.exit_code(), 1);
    }
}

TEST_F(WorkerTestFixture, ConsecutiveErrorCountingMultipleLeasesSequential)
{
    ActionTestCase testCase1;
    testCase1.command = createFailureCommand();

    ActionTestCase testCase2;
    testCase2.command = createFailureCommand();

    ActionTestCase testCase3;
    testCase3.command = createFailureCommand();

    EXPECT_FALSE(d_markedUnhealthyByErrors);

    d_maxConcurrentJobs = 1; // Run sequentially
    d_stopAfterJobs = 3;
    d_consecutiveErrorsThreshold = 3;

    std::vector<ActionTestCase> testCases = {testCase1, testCase2, testCase3};
    auto statuses = runMultipleActions(testCases);

    EXPECT_TRUE(d_markedUnhealthyByErrors);
    EXPECT_EQ(d_consecutiveErrorCount, 3);

    // All should have failed
    for (const auto &status : statuses) {
        EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    }
    for (const auto &testCase : testCases) {
        EXPECT_EQ(testCase.result.exit_code(), 1);
    }
}

TEST_F(WorkerTestFixture, ConsecutiveErrorCountingMultipleLeasesResetOnSuccess)
{
    ActionTestCase testCase1;
    testCase1.command = createFailureCommand();

    ActionTestCase testCase2;
    testCase2.command = createSuccessCommand();

    ActionTestCase testCase3;
    testCase3.command = createFailureCommand();

    ActionTestCase testCase4;
    testCase4.command = createFailureCommand();

    EXPECT_FALSE(d_markedUnhealthyByErrors);

    d_maxConcurrentJobs = 1; // Run sequentially to ensure ordering
    d_stopAfterJobs = 4;
    d_consecutiveErrorsThreshold = 3;

    std::vector<ActionTestCase> testCases = {testCase1, testCase2, testCase3,
                                             testCase4};
    auto statuses = runMultipleActions(testCases);

    // Should not be marked unhealthy because success in middle resets counter
    EXPECT_FALSE(d_markedUnhealthyByErrors);
    EXPECT_EQ(d_consecutiveErrorCount,
              2); // Reset after testCase2, then 2 more errors

    for (const auto &status : statuses) {
        EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    }
    EXPECT_EQ(testCases[0].result.exit_code(), 1);
    EXPECT_EQ(testCases[1].result.exit_code(), 0);
    EXPECT_EQ(testCases[2].result.exit_code(), 1);
    EXPECT_EQ(testCases[3].result.exit_code(), 1);
}

TEST_F(WorkerTestFixture, ConsecutiveErrorCountingMultipleLeasesMixedResults)
{
    ActionTestCase testCase1;
    testCase1.command = createFailureCommand();

    ActionTestCase testCase2;
    testCase2.command = createSuccessCommand();

    ActionTestCase testCase3;
    testCase3.command = createFailureCommand();

    ActionTestCase testCase4;
    testCase4.command = createFailureCommand();

    EXPECT_FALSE(d_markedUnhealthyByErrors);

    d_maxConcurrentJobs = 1; // Run sequentially
    d_stopAfterJobs = 4;
    d_consecutiveErrorsThreshold = 2;

    std::vector<ActionTestCase> testCases = {testCase1, testCase2, testCase3,
                                             testCase4};
    auto statuses = runMultipleActions(testCases);

    // Should only be marked unhealthy after the last 2 consecutive failures
    // since testCase2 was a success, the error count resets
    EXPECT_TRUE(d_markedUnhealthyByErrors);
    EXPECT_EQ(d_consecutiveErrorCount, 2);

    for (const auto &status : statuses) {
        EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    }
    EXPECT_EQ(testCases[0].result.exit_code(), 1);
    EXPECT_EQ(testCases[1].result.exit_code(), 0);
    EXPECT_EQ(testCases[2].result.exit_code(), 1);
    EXPECT_EQ(testCases[3].result.exit_code(), 1);
}

TEST_F(WorkerTestFixture,
       ConsecutiveErrorCountingMultipleLeasesPartialThreshold)
{
    ActionTestCase testCase1;
    testCase1.command = createFailureCommand();

    ActionTestCase testCase2;
    testCase2.command = createFailureCommand();

    EXPECT_FALSE(d_markedUnhealthyByErrors);

    d_maxConcurrentJobs = 2;
    d_stopAfterJobs = 2;
    d_consecutiveErrorsThreshold = 5;

    std::vector<ActionTestCase> testCases = {testCase1, testCase2};
    auto statuses = runMultipleActions(testCases);

    // Should not be marked unhealthy
    EXPECT_FALSE(d_markedUnhealthyByErrors);
    EXPECT_LE(d_consecutiveErrorCount, 2);

    for (const auto &status : statuses) {
        EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    }
    for (const auto &testCase : testCases) {
        EXPECT_EQ(testCase.result.exit_code(), 1);
    }
}

TEST_F(WorkerTestFixture, RunSingleLease)
{
    proto::Action action;
    proto::Command command = createSleepCommand(1);

    proto::ActionResult actionResult;
    const auto status = runAction(&action, command, &actionResult);

    EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    EXPECT_EQ(actionResult.exit_code(), 0);

    ASSERT_TRUE(collectedByName<DurationMetricValue>(
        buildboxworker::MetricNames::TIMER_NAME_EXECUTE_ACTION));
}

TEST_F(WorkerTestFixture, RunSingleLeaseTimeout)
{
    proto::Action action;
    action.mutable_timeout()->CopyFrom(
        google::protobuf::util::TimeUtil::SecondsToDuration(1));
    proto::Command command = createSleepCommand(5);

    proto::ActionResult actionResult;
    const auto status = runAction(&action, command, &actionResult);

    EXPECT_EQ(status.code(), grpc::StatusCode::DEADLINE_EXCEEDED);
    EXPECT_NE(actionResult.exit_code(), 0);
}

TEST_F(WorkerTestFixture, RunLeaseThenCancel)
{
    proto::Action action;
    // long sleep, should be cancelled
    proto::Command command = createSleepCommand(100);

    proto::ActionResult actionResult;
    const auto status = runAction(&action, command, &actionResult, true);

    EXPECT_EQ(status.code(), grpc::StatusCode::CANCELLED);
    EXPECT_EQ(actionResult.exit_code(), 0);
}

// Consecutive Error Tracking Tests

TEST_F(WorkerTestFixture, ConsecutiveErrorsDisabledByDefault)
{
    // Default threshold should be 0 (disabled)
    EXPECT_EQ(d_consecutiveErrorsThreshold, 0);
    EXPECT_EQ(d_consecutiveErrorCount, 0);
    EXPECT_FALSE(d_markedUnhealthyByErrors);

    // Run failing command - should not affect status when disabled
    proto::Action action;
    proto::Command command = createFailureCommand();
    proto::ActionResult actionResult;

    const auto status = runAction(&action, command, &actionResult);
    EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    EXPECT_EQ(actionResult.exit_code(), 1); // exit code 1 from 'false'

    EXPECT_EQ(d_consecutiveErrorCount, 0); // No counting when disabled
    EXPECT_FALSE(d_markedUnhealthyByErrors);
}

TEST_F(WorkerTestFixture, ConsecutiveErrorCounting)
{
    d_consecutiveErrorsThreshold = 3;

    proto::Action action;
    proto::ActionResult actionResult;

    // First error
    auto command = createFailureCommand();
    auto status = runAction(&action, command, &actionResult);
    EXPECT_EQ(d_consecutiveErrorCount, 1);
    EXPECT_FALSE(d_markedUnhealthyByErrors);

    // Second error
    command = createFailureCommand();
    status = runAction(&action, command, &actionResult);
    EXPECT_EQ(d_consecutiveErrorCount, 2);
    EXPECT_FALSE(d_markedUnhealthyByErrors);

    // Third error - should trigger UNHEALTHY
    command = createFailureCommand();
    status = runAction(&action, command, &actionResult);
    EXPECT_EQ(d_consecutiveErrorCount, 3);
    EXPECT_TRUE(d_markedUnhealthyByErrors);
}

TEST_F(WorkerTestFixture, ConsecutiveErrorResetOnSuccess)
{
    d_consecutiveErrorsThreshold = 5;

    proto::Action action;
    proto::ActionResult actionResult;

    // Two errors
    auto command = createFailureCommand();
    auto status = runAction(&action, command, &actionResult);
    command = createFailureCommand();
    status = runAction(&action, command, &actionResult);
    EXPECT_EQ(d_consecutiveErrorCount, 2);
    EXPECT_FALSE(d_markedUnhealthyByErrors);

    // Success - should reset counter
    command = createSuccessCommand();
    status = runAction(&action, command, &actionResult);
    EXPECT_EQ(status.code(), grpc::StatusCode::OK);
    EXPECT_EQ(actionResult.exit_code(), 0);
    EXPECT_EQ(d_consecutiveErrorCount, 0); // Reset to 0
    EXPECT_FALSE(d_markedUnhealthyByErrors);

    // Start counting again
    command = createFailureCommand();
    status = runAction(&action, command, &actionResult);
    EXPECT_EQ(d_consecutiveErrorCount, 1);
    EXPECT_FALSE(d_markedUnhealthyByErrors);
}

TEST_F(WorkerTestFixture, ConsecutiveErrorPersistence)
{
    d_consecutiveErrorsThreshold = 1;

    proto::Action action;
    proto::ActionResult actionResult;

    // Single error to trigger UNHEALTHY
    auto command = createFailureCommand();
    auto status = runAction(&action, command, &actionResult);

    EXPECT_TRUE(d_markedUnhealthyByErrors);

    // Even after successful commands, the flag should remain true
    command = createSuccessCommand();
    status = runAction(&action, command, &actionResult);
    EXPECT_EQ(d_consecutiveErrorCount, 0);  // Counter resets
    EXPECT_TRUE(d_markedUnhealthyByErrors); // But flag persists
}
