Skip to content

Commit

Permalink
Merge pull request #335 from sourcebots/feature/improve-timeout-termi…
Browse files Browse the repository at this point in the history
…nation

Unclearly exit if terminating takes too long
  • Loading branch information
WillB97 authored Jul 28, 2024
2 parents 2203bae + 2731a57 commit b90f275
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 9 deletions.
32 changes: 29 additions & 3 deletions sbot/timeout.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Functions for killing the robot after a certain amount of time."""
import atexit
import logging
import os
import signal
Expand All @@ -9,6 +10,9 @@

logger = logging.getLogger(__name__)

TIMEOUT_MESSAGE = "Timeout expired: Game Over!"
EXIT_ATTEMPTS = 0


def timeout_handler(signal_type: int, stack_frame: Optional[FrameType]) -> None:
"""
Expand All @@ -17,13 +21,34 @@ def timeout_handler(signal_type: int, stack_frame: Optional[FrameType]) -> None:
This function is called when the timeout expires and will stop the robot's main process.
In order for this to work, any threads that are created must be daemons.
If the process doesn't terminate clearly (perhaps because the exception was caught),
exit less cleanly.
NOTE: This function is not called on Windows.
:param signal_type: The sginal that triggered this handler
:param stack_frame: The stack frame at the time of the signal
"""
logger.info("Timeout expired: Game Over!")
exit(0)
global EXIT_ATTEMPTS
EXIT_ATTEMPTS += 1

if sys.platform == "win32":
raise AssertionError("This function should not be called on Windows")

if EXIT_ATTEMPTS == 1:
# Allow 2 seconds for the process to exit cleanly before killing it
signal.alarm(2)
logger.info(TIMEOUT_MESSAGE)
# Exit cleanly
exit(0)
else:
# The process didn't exit cleanly, so first call the cleanup handlers
# and use an unhanded alarm to force python to exit with a core dump
signal.signal(signal.SIGALRM, signal.SIG_DFL)
signal.alarm(2) # Allow 2 seconds for cleanup

atexit._run_exitfuncs()
exit(0)


def win_timeout_handler() -> None:
Expand All @@ -35,7 +60,7 @@ def win_timeout_handler() -> None:
NOTE: This function is only called on Windows.
"""
logger.info("Timeout expired: Game Over!")
logger.info(TIMEOUT_MESSAGE)
os.kill(os.getpid(), signal.SIGTERM)


Expand All @@ -52,6 +77,7 @@ def kill_after_delay(timeout_seconds: int) -> None:
# Windows doesn't have SIGALRM,
# so we approximate its functionality using a delayed SIGTERM
timer = Timer(timeout_seconds, win_timeout_handler)
timer.daemon = True
timer.start()
else:
signal.signal(signal.SIGALRM, timeout_handler)
Expand Down
10 changes: 10 additions & 0 deletions tests/test_data/timeout_scripts/catch-base-exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from sbot.timeout import kill_after_delay
import time

kill_after_delay(2)

while True:
try:
time.sleep(10)
except BaseException:
pass
10 changes: 10 additions & 0 deletions tests/test_data/timeout_scripts/catch-exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from sbot.timeout import kill_after_delay
import time

kill_after_delay(2)

while True:
try:
time.sleep(10)
except Exception:
pass
7 changes: 7 additions & 0 deletions tests/test_data/timeout_scripts/hot-loop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from sbot.timeout import kill_after_delay

kill_after_delay(2)

# This is a bad idea
while True:
pass
6 changes: 6 additions & 0 deletions tests/test_data/timeout_scripts/sleep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from sbot.timeout import kill_after_delay
import time

kill_after_delay(2)

time.sleep(10)
11 changes: 11 additions & 0 deletions tests/test_data/timeout_scripts/try-finally.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from sbot.timeout import kill_after_delay
import time

kill_after_delay(2)

while True:
try:
time.sleep(10)
finally:
# Imagine we wanted to do something else here
pass
3 changes: 3 additions & 0 deletions tests/test_data/timeout_scripts_extra/early-exit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from sbot.timeout import kill_after_delay

kill_after_delay(2)
5 changes: 5 additions & 0 deletions tests/test_data/timeout_scripts_extra/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from sbot.timeout import kill_after_delay

kill_after_delay(2)

1 / 0
58 changes: 52 additions & 6 deletions tests/test_timeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from time import sleep, time

import os
from pathlib import Path
import signal
import subprocess
import sys
Expand All @@ -10,13 +11,19 @@

from sbot.timeout import kill_after_delay

TEST_FILES = list((Path(__file__).parent / 'test_data/timeout_scripts').iterdir())
EXTRA_TEST_FILES_DIR = Path(__file__).parent / 'test_data/timeout_scripts_extra'

@pytest.mark.skipif(sys.platform == "win32", reason="does not run on Windows")
def test_kill_after_delay() -> None:
"""Test that the process is killed within the time."""
with pytest.raises(SystemExit):
kill_after_delay(2)
sleep(3)

# Clear the set alarm
signal.alarm(0)


@pytest.mark.skipif(sys.platform != "win32", reason="only runs on Windows")
def test_kill_after_delay_windows(monkeypatch) -> None:
Expand All @@ -34,20 +41,59 @@ def test_kill_after_delay_windows(monkeypatch) -> None:
kill.assert_called_once_with(pid, signal.SIGTERM)


def test_kill_after_delay_e2e() -> None:
@pytest.mark.parametrize(
"test_file",
TEST_FILES,
ids=[f.stem for f in TEST_FILES]
)
def test_kill_after_delay_e2e(test_file: Path) -> None:
start_time = time()
child = subprocess.Popen([
sys.executable,
"-c",
'from sbot.timeout import kill_after_delay; import time; kill_after_delay(2); time.sleep(10)',
str(test_file),
], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

child.wait(timeout=5)
child.wait(timeout=8)
run_time = time() - start_time

assert time() - start_time == pytest.approx(2, rel=1)
assert 2 < run_time < 8

if sys.platform == "win32":
# Windows terminates uncleanly
assert child.returncode == signal.SIGTERM
else:
elif run_time < 4:
# If the process terminated quickly, it should be successful
assert child.returncode == 0
else:
# If the process took too long, it was killed ungracefully
assert child.returncode == -signal.SIGALRM

def test_early_exit() -> None:
start_time = time()
child = subprocess.Popen([
sys.executable,
str(EXTRA_TEST_FILES_DIR / "early-exit.py"),
], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

child.wait(timeout=6)

run_time = time() - start_time

assert run_time < 2

assert child.returncode == 0

def test_exception() -> None:
start_time = time()
child = subprocess.Popen([
sys.executable,
str(EXTRA_TEST_FILES_DIR / "exception.py"),
], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

child.wait(timeout=6)

run_time = time() - start_time

assert run_time < 2

assert child.returncode == 1

0 comments on commit b90f275

Please sign in to comment.