Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SlowFast as an optional dependency #80

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ dependencies = [
"ruamel.yaml",
"ultralytics",
"pandas",
"detectron2 @ git+https://github.com/facebookresearch/detectron2.git@2a420edb307c9bdf640f036d3b196bed474b8593",
"pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@1fadaef40dd393ca09680f55582399f4679fc9b7",
"slowfast @ git+https://github.com/Imageomics/SlowFast@797a6f3ae81c49019d006296f1e0f84f431dc356"
"pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@1fadaef40dd393ca09680f55582399f4679fc9b7"
]
keywords = [
"annotation",
Expand Down Expand Up @@ -68,3 +66,9 @@ miniscene2behavior = "kabr_tools.miniscene2behavior:main"

[tool.hatch.version]
path = "src/kabr_tools/__about__.py"

[project.optional-dependencies]
slowfast = [
"detectron2 @ git+https://github.com/facebookresearch/detectron2.git@2a420edb307c9bdf640f036d3b196bed474b8593",
"slowfast @ git+https://github.com/Imageomics/SlowFast@797a6f3ae81c49019d006296f1e0f84f431dc356"
]
126 changes: 78 additions & 48 deletions src/kabr_tools/miniscene2behavior.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,15 @@
import sys
import argparse
import random
import torch
from lxml import etree
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
import slowfast.utils.checkpoint as cu
from slowfast.models import build
from slowfast.utils import parser
from slowfast.datasets.utils import get_sequence
from slowfast.visualization.utils import process_cv2_inputs
from slowfast.datasets.cv2_transform import scale
from fvcore.common.config import CfgNode
from torch import Tensor


def get_input_clip(cap: cv2.VideoCapture, cfg: CfgNode, keyframe_idx: int) -> list[Tensor]:
# https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py
seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
seq = get_sequence(
keyframe_idx,
seq_length // 2,
cfg.DATA.SAMPLING_RATE,
total_frames,
)
clip = []
for frame_idx in seq:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
was_read, frame = cap.read()
if was_read:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = scale(cfg.DATA.TEST_CROP_SIZE, frame)
clip.append(frame)
else:
print("Unable to read frame. Duplicating previous frame.")
clip.append(clip[-1])

clip = process_cv2_inputs(clip, cfg)
return clip
from kabr_tools.utils.slowfast.utils import get_input_clip
from kabr_tools.utils.slowfast.cfg import load_config, CfgNode
from kabr_tools.utils.slowfast.x3d import build_model


def parse_args() -> argparse.Namespace:
Expand Down Expand Up @@ -79,32 +50,77 @@ def parse_args() -> argparse.Namespace:
help="filepath for output csv",
default="annotation_data.csv"
)
local_parser.add_argument(
"--slowfast",
action="store_true",
help="load slowfast model"
)

return local_parser.parse_args()


def create_model(config_path: str, checkpoint_path: str, gpu_num: int) -> tuple[CfgNode, torch.nn.Module]:
def set_seeds(seed):
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)


def create_slowfast(config_path: str, checkpoint_path: str, gpu_num: int) -> tuple[CfgNode, torch.nn.Module]:
import slowfast.utils.checkpoint as cu
from slowfast.models import build
from slowfast.utils import parser

# load model config
try:
cfg = parser.load_config(parser.parse_args(), config_path)
except FileNotFoundError:
checkpoint = torch.load(
checkpoint_path, map_location=torch.device("cpu"))
with open(config_path, "w") as file:
with open(config_path, "w", encoding="utf-8") as file:
file.write(checkpoint["cfg"])
cfg = parser.load_config(parser.parse_args(), config_path)
cfg.NUM_GPUS = gpu_num
cfg.OUTPUT_DIR = ""
model = build.build_model(cfg)

# set random seeds
set_seeds(cfg.RNG_SEED)

# load model checkpoint
model = build.build_model(cfg)
cu.load_checkpoint(checkpoint_path, model, data_parallel=False)

# set model to eval mode
model.eval()
return cfg, model


def create_model(config_path: str, checkpoint_path: str, gpu_num: int) -> tuple[CfgNode, torch.nn.Module]:
# load model checkpoint
checkpoint = torch.load(checkpoint_path, weights_only=True,
map_location=torch.device("cpu"))

# load model config
try:
cfg = load_config(config_path)
except FileNotFoundError:
with open(config_path, "w", encoding="utf-8") as file:
file.write(checkpoint["cfg"])
cfg = load_config(config_path)
cfg.NUM_GPUS = gpu_num
cfg.OUTPUT_DIR = ""

# set random seeds
set_seeds(cfg.RNG_SEED)

# load model
model = build_model(cfg)
model.load_state_dict(checkpoint["model_state"])

# set model to eval mode
model.eval()
return cfg, model


def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module,
miniscene_path: str, video: str,
output_path: str) -> None:
Expand All @@ -124,29 +140,34 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module,

# find all tracks
tracks = []
frames = {}
for track in root.iterfind("track"):
track_id = track.attrib["id"]
tracks.append(track_id)
frames[track_id] = []

# find all frames
# TODO: rewrite - some tracks may have different frames
assert len(tracks) > 0, "No tracks found in track file"
frames = []
for box in track.iterfind("box"):
frames.append(int(box.attrib["frame"]))
# find all frames
for box in track.iterfind("box"):
frames[track_id].append(int(box.attrib["frame"]))

# run model on miniscene
for track in tracks:
video_file = f"{miniscene_path}/{track}.mp4"
cap = cv2.VideoCapture(video_file)
for frame in tqdm(frames, desc=f"{track} frames"):
inputs = get_input_clip(cap, cfg, frame)
index = 0
for frame in tqdm(frames[track], desc=f'{track} frames'):
try:
inputs = get_input_clip(cap, cfg, index)
except AssertionError as e:
print(e)
break
index += 1

if cfg.NUM_GPUS:
# transfer the data to the current GPU device.
if isinstance(inputs, (list,)):
for i in range(len(inputs)):
inputs[i] = inputs[i].cuda(non_blocking=True)
for i, input_clip in enumerate(inputs):
inputs[i] = input_clip.cuda(non_blocking=True)
else:
inputs = inputs.cuda(non_blocking=True)

Expand All @@ -163,14 +184,23 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module,
if frame % 20 == 0:
pd.DataFrame(label_data).to_csv(
output_path, sep=" ", index=False)
cap.release()
pd.DataFrame(label_data).to_csv(output_path, sep=" ", index=False)


def main() -> None:
# clear arguments to avoid slowfast parsing issues
args = parse_args()
sys.argv = [sys.argv[0]]
cfg, model = create_model(args.config, args.checkpoint, args.gpu_num)

# load model
if not args.slowfast:
cfg, model = create_model(args.config, args.checkpoint, args.gpu_num)
else:
cfg, model = create_slowfast(
args.config, args.checkpoint, args.gpu_num)

# annotate
annotate_miniscene(cfg, model, args.miniscene,
args.video, args.output)

Expand Down
Loading
Loading