child-lab-uj · WojciechBarczynski · Oct 30, 2024 · Nov 6, 2024 · Nov 6, 2024 · Nov 6, 2024
diff --git a/child_lab_framework/demo_sequential.py b/child_lab_framework/demo_sequential.py
@@ -5,7 +5,7 @@
 
 from .core.video import Format, Perspective, Reader, Writer
 from .logging import Logger
-from .task import depth, face, gaze, pose
+from .task import depth, face, gaze, pose, emotion
 from .task.camera import transformation
 from .task.visualization import Visualizer
 
@@ -62,6 +62,9 @@ def main() -> None:
         threshold=0.1,
     )
 
+    emotions_estimator_left = emotion.Estimator(executor)
+    emotions_estimator_right = emotion.Estimator(executor)
+
     window_left_gaze_estimator = gaze.Estimator(
         executor,
         input=window_left_reader.properties,
@@ -106,6 +109,8 @@ def main() -> None:
         output_format=Format.MP4,
     )
 
+    print('Starting sequential processing')
+
     while True:
         ceiling_frames = ceiling_reader.read_batch()
         if ceiling_frames is None:
@@ -205,25 +210,35 @@ def main() -> None:
             else None
         )
 
+        window_left_emotions = emotions_estimator_left.predict_batch(
+            window_left_frames, window_left_faces
+        )
+        window_right_emotions = emotions_estimator_right.predict_batch(
+            window_right_frames, window_right_faces
+        )
+
         ceiling_annotated_frames = visualizer.annotate_batch(
             ceiling_frames,
             ceiling_poses,
             None,
             ceiling_gazes,
+            None,
         )
 
         window_left_annotated_frames = visualizer.annotate_batch(
             window_left_frames,
             window_left_poses,
             window_left_faces,
             None,
+            window_left_emotions,
         )
 
         window_right_annotated_frames = visualizer.annotate_batch(
             window_right_frames,
             window_right_poses,
             window_right_faces,
             None,
+            window_right_emotions,
         )
 
         ceiling_writer.write_batch(ceiling_annotated_frames)

diff --git a/child_lab_framework/task/emotion/__init__.py b/child_lab_framework/task/emotion/__init__.py
@@ -0,0 +1,3 @@
+from .emotion import Estimator, Result
+
+__all__ = ['Estimator', 'Result']
diff --git a/child_lab_framework/task/emotion/emotion.py b/child_lab_framework/task/emotion/emotion.py
@@ -0,0 +1,106 @@
+import asyncio
+from typing import Dict, List
+from deepface import DeepFace
+from concurrent.futures import ThreadPoolExecutor
+from itertools import repeat, starmap
+
+from child_lab_framework.core.sequence import imputed_with_reference_inplace
+from child_lab_framework.task import face
+from ...core.video import Frame
+from ...typing.stream import Fiber
+from ...typing.array import FloatArray2
+
+type Input = tuple[
+    list[Frame | None] | None,
+    list[face.Result | None] | None,
+]
+
+
+class Result:
+    emotions: list[float]
+    boxes: list[FloatArray2]
+
+    def __init__(self, emotions: list[float], boxes: list[FloatArray2]) -> None:
+        self.emotions = emotions
+        self.boxes = boxes
+
+
+class Estimator:
+    executor: ThreadPoolExecutor
+
+    def __init__(self, executor: ThreadPoolExecutor) -> None:
+        self.executor = executor
+
+    def predict(self, frame: Frame, faces: face.Result | None) -> Result:
+        face_emotions = []
+        boxes = []
+        frame_height, frame_width, _ = frame.shape
+        for face_box in faces.boxes:
+            x_min, y_min, x_max, y_max = face_box
+            x_min = max(x_min - 50, 0)
+            x_max = min(x_max + 50, frame_width)
+            y_min = max(y_min - 50, 0)
+            y_max = min(y_max + 50, frame_height)
+            cropped_frame = frame[y_min:y_max, x_min:x_max]
+            analysis = DeepFace.analyze(
+                cropped_frame, actions=['emotion'], enforce_detection=False
+            )
+            emotion = score_emotions(analysis[0])
+            face_emotions.append(emotion)
+            boxes.append(face_box)
+
+        return Result(face_emotions, boxes)
+
+    def predict_batch(
+        self,
+        frames: list[Frame],
+        faces: list[face.Result | None],
+    ) -> list[Result] | None:
+        return imputed_with_reference_inplace(
+            list(starmap(self.predict, zip(frames, faces)))
+        )
+
+    async def stream(
+        self,
+    ) -> Fiber[list[Frame | None] | None, list[Result | None] | None]:
+        loop = asyncio.get_running_loop()
+        executor = self.executor
+
+        results: list[Result | None] | None = None
+
+        while True:
+            match (yield results):
+                case (
+                    list(frames),
+                    faces,
+                ):
+                    results = await loop.run_in_executor(
+                        executor,
+                        lambda: list(
+                            starmap(
+                                self.__predict,
+                                zip(frames, faces or repeat(None)),
+                            )
+                        ),
+                    )
+
+                case _:
+                    results = None
+
+
+def score_emotions(emotions: List[Dict[str, float]]) -> float:
+    # Most of the time, "angry" and "fear" are similar to "neutral" in the reality
+    scores = {
+        'angry': -0.05,
+        'disgust': 0,
+        'fear': -0.07,
+        'happy': 1,
+        'sad': -1,
+        'surprise': 0,
+        'neutral': 0,
+    }
+    val = 0
+    for emotion, score in scores.items():
+        val += emotions['emotion'][emotion] * score
+
+    return val
diff --git a/child_lab_framework/task/visualization/visualization.py b/child_lab_framework/task/visualization/visualization.py
@@ -3,12 +3,13 @@
 from itertools import repeat, starmap
 
 import cv2
+import cv2.text
 import numpy as np
 
 from ...core.video import Frame, Properties
 from ...typing.array import FloatArray1, FloatArray2, IntArray1
 from ...typing.stream import Fiber
-from .. import face, pose
+from .. import face, pose, emotion
 from ..gaze import ceiling_projection
 from ..pose.keypoint import YOLO_SKELETON
 
@@ -123,12 +124,27 @@ def __draw_face_box(self, frame: Frame, result: face.Result) -> Frame:
 
         return frame
 
+    def __draw_emotions_text(self, frame: Frame, result: emotion.Result) -> Frame:
+        color = self.FACE_BOUNDING_BOX_COLOR
+        for value, box in zip(result.emotions, result.boxes):
+            cv2.putText(
+                frame,
+                str(value),
+                [box[0], box[3]],
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.9,
+                color,
+                2,
+            )
+        return frame
+
     def __annotate_safe(
         self,
         frame: Frame,
         poses: pose.Result | None,
         faces: face.Result | None,
         gazes: ceiling_projection.Result | None,
+        emotions: emotion.Result | None,
     ) -> Frame:
         out = frame.copy()
         out.flags.writeable = True
@@ -143,6 +159,9 @@ def __annotate_safe(
         if gazes is not None:
             out = self.__draw_gaze_estimation(out, gazes)
 
+        if emotions is not None:
+            out = self.__draw_emotions_text(out, emotions)
+
         return out
 
     def annotate_batch(
@@ -151,6 +170,7 @@ def annotate_batch(
         poses: list[pose.Result] | None,
         faces: list[face.Result] | None,
         gazes: list[ceiling_projection.Result] | None,
+        emotions: list[emotion.Result] | None,
     ) -> list[Frame]:
         return list(
             starmap(
@@ -160,6 +180,7 @@ def annotate_batch(
                     poses or repeat(None),
                     faces or repeat(None),
                     gazes or repeat(None),
+                    emotions or repeat(None),
                 ),
             )
         )