Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate emotions recognition #51

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion child_lab_framework/demo_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from .core.video import Format, Perspective, Reader, Writer
from .logging import Logger
from .task import depth, face, gaze, pose
from .task import depth, face, gaze, pose, emotion
from .task.camera import transformation
from .task.visualization import Visualizer

Expand Down Expand Up @@ -62,6 +62,9 @@ def main() -> None:
threshold=0.1,
)

emotions_estimator_left = emotion.Estimator(executor)
emotions_estimator_right = emotion.Estimator(executor)

window_left_gaze_estimator = gaze.Estimator(
executor,
input=window_left_reader.properties,
Expand Down Expand Up @@ -106,6 +109,8 @@ def main() -> None:
output_format=Format.MP4,
)

print('Starting sequential processing')

while True:
ceiling_frames = ceiling_reader.read_batch()
if ceiling_frames is None:
Expand Down Expand Up @@ -205,25 +210,35 @@ def main() -> None:
else None
)

window_left_emotions = emotions_estimator_left.predict_batch(
window_left_frames, window_left_faces
)
window_right_emotions = emotions_estimator_right.predict_batch(
window_right_frames, window_right_faces
)

ceiling_annotated_frames = visualizer.annotate_batch(
ceiling_frames,
ceiling_poses,
None,
ceiling_gazes,
None,
)

window_left_annotated_frames = visualizer.annotate_batch(
window_left_frames,
window_left_poses,
window_left_faces,
None,
window_left_emotions,
)

window_right_annotated_frames = visualizer.annotate_batch(
window_right_frames,
window_right_poses,
window_right_faces,
None,
window_right_emotions,
)

ceiling_writer.write_batch(ceiling_annotated_frames)
Expand Down
3 changes: 3 additions & 0 deletions child_lab_framework/task/emotion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .emotion import Estimator, Result

__all__ = ['Estimator', 'Result']
106 changes: 106 additions & 0 deletions child_lab_framework/task/emotion/emotion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import asyncio
from typing import Dict, List
WojciechBarczynski marked this conversation as resolved.
Show resolved Hide resolved
from deepface import DeepFace
from concurrent.futures import ThreadPoolExecutor
from itertools import repeat, starmap

from child_lab_framework.core.sequence import imputed_with_reference_inplace
from child_lab_framework.task import face
WojciechBarczynski marked this conversation as resolved.
Show resolved Hide resolved
from ...core.video import Frame
from ...typing.stream import Fiber
from ...typing.array import FloatArray2

type Input = tuple[
list[Frame | None] | None,
list[face.Result | None] | None,
]
WojciechBarczynski marked this conversation as resolved.
Show resolved Hide resolved


class Result:
emotions: list[float]
boxes: list[FloatArray2]
Comment on lines +19 to +20
Copy link
Member

@integraledelebesgue integraledelebesgue Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could stack these results and store them as np.ndarrays instead of lists. We do so in other Results in general

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, it's no use duplicating face.Result as boxes here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is - that way there is no coupling between emotion recognition and face detection in the visualization layer.

Duplicating it is almost free (the cost of doing that is negligible), but it's much easier to handle it that way in the visualization layer.


def __init__(self, emotions: list[float], boxes: list[FloatArray2]) -> None:
self.emotions = emotions
self.boxes = boxes
Comment on lines +18 to +24
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be a frozen dataclass



class Estimator:
executor: ThreadPoolExecutor

def __init__(self, executor: ThreadPoolExecutor) -> None:
self.executor = executor

def predict(self, frame: Frame, faces: face.Result | None) -> Result:
face_emotions = []
boxes = []
frame_height, frame_width, _ = frame.shape
for face_box in faces.boxes:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

faces.boxes reference is not safe since faces can be None. This function should return Result | None and perform a check on faces

Copy link
Member

@integraledelebesgue integraledelebesgue Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, predict method should take not-none arguments. You can rename this method to __predict_safe and use it in stream and predict_batch.

x_min, y_min, x_max, y_max = face_box
x_min = max(x_min - 50, 0)
x_max = min(x_max + 50, frame_width)
y_min = max(y_min - 50, 0)
y_max = min(y_max + 50, frame_height)
cropped_frame = frame[y_min:y_max, x_min:x_max]
analysis = DeepFace.analyze(
cropped_frame, actions=['emotion'], enforce_detection=False
)
emotion = score_emotions(analysis[0])
face_emotions.append(emotion)
boxes.append(face_box)

return Result(face_emotions, boxes)

def predict_batch(
self,
frames: list[Frame],
faces: list[face.Result | None],
) -> list[Result] | None:
return imputed_with_reference_inplace(
list(starmap(self.predict, zip(frames, faces)))
)

async def stream(
self,
) -> Fiber[list[Frame | None] | None, list[Result | None] | None]:
loop = asyncio.get_running_loop()
executor = self.executor

results: list[Result | None] | None = None

while True:
match (yield results):
case (
list(frames),
faces,
):
results = await loop.run_in_executor(
executor,
lambda: list(
starmap(
self.__predict,
WojciechBarczynski marked this conversation as resolved.
Show resolved Hide resolved
zip(frames, faces or repeat(None)),
)
),
)

case _:
results = None


def score_emotions(emotions: List[Dict[str, float]]) -> float:
WojciechBarczynski marked this conversation as resolved.
Show resolved Hide resolved
WojciechBarczynski marked this conversation as resolved.
Show resolved Hide resolved
# Most of the time, "angry" and "fear" are similar to "neutral" in the reality
scores = {
'angry': -0.05,
'disgust': 0,
'fear': -0.07,
'happy': 1,
'sad': -1,
'surprise': 0,
'neutral': 0,
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dict shouldn't be located here since it is going to be created on each call 💀 Please make it a private constant

Copy link
Member

@integraledelebesgue integraledelebesgue Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this in mind - how about making this dict injected into an Estimator via constructor?

val = 0
for emotion, score in scores.items():
val += emotions['emotion'][emotion] * score

return val
23 changes: 22 additions & 1 deletion child_lab_framework/task/visualization/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from itertools import repeat, starmap

import cv2
import cv2.text
import numpy as np

from ...core.video import Frame, Properties
from ...typing.array import FloatArray1, FloatArray2, IntArray1
from ...typing.stream import Fiber
from .. import face, pose
from .. import face, pose, emotion
from ..gaze import ceiling_projection
from ..pose.keypoint import YOLO_SKELETON

Expand Down Expand Up @@ -123,12 +124,27 @@ def __draw_face_box(self, frame: Frame, result: face.Result) -> Frame:

return frame

def __draw_emotions_text(self, frame: Frame, result: emotion.Result) -> Frame:
color = self.FACE_BOUNDING_BOX_COLOR
for value, box in zip(result.emotions, result.boxes):
cv2.putText(
frame,
str(value),
[box[0], box[3]],
cv2.FONT_HERSHEY_SIMPLEX,
0.9,
color,
2,
)
return frame

def __annotate_safe(
self,
frame: Frame,
poses: pose.Result | None,
faces: face.Result | None,
gazes: ceiling_projection.Result | None,
emotions: emotion.Result | None,
) -> Frame:
out = frame.copy()
out.flags.writeable = True
Expand All @@ -143,6 +159,9 @@ def __annotate_safe(
if gazes is not None:
out = self.__draw_gaze_estimation(out, gazes)

if emotions is not None:
out = self.__draw_emotions_text(out, emotions)

return out

def annotate_batch(
Expand All @@ -151,6 +170,7 @@ def annotate_batch(
poses: list[pose.Result] | None,
faces: list[face.Result] | None,
gazes: list[ceiling_projection.Result] | None,
emotions: list[emotion.Result] | None,
) -> list[Frame]:
return list(
starmap(
Expand All @@ -160,6 +180,7 @@ def annotate_batch(
poses or repeat(None),
faces or repeat(None),
gazes or repeat(None),
emotions or repeat(None),
),
)
)
Expand Down
Loading
Loading