diff --git a/embedding-calculator/Dockerfile b/embedding-calculator/Dockerfile
index 026186edf8..9c3dc8ff1f 100644
--- a/embedding-calculator/Dockerfile
+++ b/embedding-calculator/Dockerfile
@@ -13,8 +13,6 @@ WORKDIR /app/ml
COPY requirements.txt .
RUN pip --no-cache-dir install -r requirements.txt
-ARG SCANNER=Facenet2018
-ENV SCANNER=$SCANNER
ARG BE_VERSION
ARG APP_VERSION_STRING
ENV BE_VERSION=$BE_VERSION
@@ -26,35 +24,25 @@ ENV PYTHONUNBUFFERED=0
ENV JOBLIB_MULTIPROCESSING=0
# download ML models
-ARG DETECTION_MODEL
-ARG CALCULATION_MODEL
-ENV DETECTION_MODEL=${DETECTION_MODEL:-retinaface_r50_v1}
-ENV CALCULATION_MODEL=${CALCULATION_MODEL:-arcface_r100_v1}
+ARG INTEL_OPTIMIZATION=false
+ARG GPU_IDX=-1
+ENV GPU_IDX=$GPU_IDX INTEL_OPTIMIZATION=$INTEL_OPTIMIZATION
+ARG FACE_DETECTION_PLUGIN="facenet.FaceDetector"
+ARG CALCULATION_PLUGIN="facenet.Calculator"
+ARG EXTRA_PLUGINS=""
+ENV FACE_DETECTION_PLUGIN=$FACE_DETECTION_PLUGIN CALCULATION_PLUGIN=$CALCULATION_PLUGIN \
+ EXTRA_PLUGINS=$EXTRA_PLUGINS
+COPY src src
COPY srcext srcext
-COPY pytest.ini .
-COPY *.sh ./
-RUN chmod +x *.sh
-RUN ./prepare_scanners.sh
-
-# install InsightFace packages
-ARG INTEL_OPTIMIZATION
-ARG GPU_IDX
-ENV GPU_IDX=$GPU_IDX
-ENV MXNET_MOD=${GPU_IDX:+cu101}${INTEL_OPTIMIZATION:+mkl}
-ENV MXNET_LIB=mxnet${MXNET_MOD:+-$MXNET_MOD}
-ENV MXNET_VER="<1.7"
-
-RUN if [[ "$SCANNER" == "InsightFace" ]]; then \
- pip --no-cache-dir install "$MXNET_LIB$MXNET_VER" -e srcext/insightface/python-package; \
- fi
+RUN python -m src.services.facescan.plugins.setup
# copy rest of the code
-COPY src src
COPY tools tools
COPY sample_images sample_images
# run tests
ARG SKIP_TESTS
+COPY pytest.ini .
RUN if [ -z $SKIP_TESTS ]; then pytest -m "not performance" /app/ml/src; fi
EXPOSE 3000
diff --git a/embedding-calculator/gpu.Dockerfile b/embedding-calculator/gpu.Dockerfile
index b9fb8bcdca..20e349532e 100644
--- a/embedding-calculator/gpu.Dockerfile
+++ b/embedding-calculator/gpu.Dockerfile
@@ -12,6 +12,7 @@ ARG CUDNN_MAJOR_VERSION=7
ARG LIB_DIR_PREFIX=x86_64
ARG LIBNVINFER=6.0.1-1
ARG LIBNVINFER_MAJOR_VERSION=6
+ENV CUDA=$CUDA
# Needed for string substitution
SHELL ["/bin/bash", "-c"]
@@ -66,7 +67,7 @@ RUN ln -s $(which $PYTHON) /usr/local/bin/python
# Variables for MXNET
-ENV MXNET=mxnet_cu101mkl MXNET_CPU_WORKER_NTHREADS=24
+ENV MXNET_CPU_WORKER_NTHREADS=24
ENV MXNET_ENGINE_TYPE=ThreadedEnginePerDevice MXNET_CUDNN_AUTOTUNE_DEFAULT=0
# No access to GPU devices in the build stage, so skip tests
diff --git a/embedding-calculator/prepare_scanners.sh b/embedding-calculator/prepare_scanners.sh
deleted file mode 100644
index 1ab30ec602..0000000000
--- a/embedding-calculator/prepare_scanners.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash -e
-
-if [[ "$SCANNER" == "InsightFace" ]]; then
- MODELS_PATH=~/.insightface/models
- mkdir -p $MODELS_PATH
- for MODEL in $DETECTION_MODEL $CALCULATION_MODEL
- do
- # trying to find a pre-downloaded model
- DIR=~/srcext/insightface/models/$MODEL
- [ -d "$DIR" ] && echo "Coping $MODEL from repository..." && cp -r $DIR $MODELS_PATH && continue
-
- # download a model
- URL=http://insightface.ai/files/models/$MODEL.zip
- echo "Downloading $URL..."
- mkdir -p $MODELS_PATH/$MODEL && cd "$_" && curl -L $URL -o m.zip \
- && unzip m.zip && rm m.zip
-
- # MXNET_CUDNN_AUTOTUNE_DEFAULT=0 doesn't work, need to make changes in a models
- # https://github.com/deepinsight/insightface/issues/764
- sed -i 's/limited_workspace/None/g' $MODELS_PATH/$MODEL/*.json
- done
-else
- echo " --ignore=src/services/facescan/scanner/insightface" >> pytest.ini
-fi
-
-if [[ "$SCANNER" == "Facenet2018" ]]; then
- pip install --no-cache-dir tensorflow~=1.15.4
-else
- echo " --ignore=src/services/facescan/scanner/facenet" >> pytest.ini
-fi
\ No newline at end of file
diff --git a/embedding-calculator/requirements.txt b/embedding-calculator/requirements.txt
index 074deae96d..553853de94 100644
--- a/embedding-calculator/requirements.txt
+++ b/embedding-calculator/requirements.txt
@@ -3,6 +3,7 @@ cached-property==1.5.2
colour==0.1.5
flasgger==0.9.5
Flask==1.1.2
+gdown~=3.12
Werkzeug==1.0.1
# tests
diff --git a/embedding-calculator/src/_endpoints.py b/embedding-calculator/src/_endpoints.py
index 3bdb3c5c44..939ec1d500 100644
--- a/embedding-calculator/src/_endpoints.py
+++ b/embedding-calculator/src/_endpoints.py
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
-from typing import List
+from typing import List, Optional
from flask import request
from flask.json import jsonify
@@ -19,40 +19,51 @@
from src.constants import ENV
from src.exceptions import NoFaceFoundError
+from src.services.facescan.plugins import managers
from src.services.facescan.scanner.facescanners import scanner
from src.services.flask_.constants import ARG
from src.services.flask_.needs_attached_file import needs_attached_file
from src.services.imgtools.read_img import read_img
+from src.services.utils.pyutils import Constants
def endpoints(app):
@app.route('/status')
def status_get():
+ available_plugins = {p.slug: str(p)
+ for p in managers.plugin_manager.plugins}
+ calculator = managers.plugin_manager.calculator
return jsonify(status='OK', build_version=ENV.BUILD_VERSION,
- calculator_version=ENV.SCANNER)
+ calculator_version=str(calculator),
+ available_plugins=available_plugins)
@app.route('/find_faces', methods=['POST'])
@needs_attached_file
def find_faces_post():
- faces = scanner.find_faces(
+ detector = managers.plugin_manager.detector
+ face_plugins = managers.plugin_manager.filter_face_plugins(
+ _get_face_plugin_names()
+ )
+ faces = detector(
img=read_img(request.files['file']),
- det_prob_threshold=_get_det_prob_threshold(request),
+ det_prob_threshold=_get_det_prob_threshold(),
+ face_plugins=face_plugins
)
- faces = _limit(faces, request.values.get(ARG.LIMIT))
- return jsonify(calculator_version=scanner.ID, result=faces)
+ plugins_versions = {p.slug: str(p) for p in [detector] + face_plugins}
+ return jsonify(results=faces, plugins_versions=plugins_versions)
@app.route('/scan_faces', methods=['POST'])
@needs_attached_file
def scan_faces_post():
faces = scanner.scan(
img=read_img(request.files['file']),
- det_prob_threshold=_get_det_prob_threshold(request)
+ det_prob_threshold=_get_det_prob_threshold()
)
faces = _limit(faces, request.values.get(ARG.LIMIT))
return jsonify(calculator_version=scanner.ID, result=faces)
-def _get_det_prob_threshold(request):
+def _get_det_prob_threshold():
det_prob_threshold_val = request.values.get(ARG.DET_PROB_THRESHOLD)
if det_prob_threshold_val is None:
return None
@@ -62,6 +73,14 @@ def _get_det_prob_threshold(request):
return det_prob_threshold
+def _get_face_plugin_names() -> Optional[List[str]]:
+ if ARG.FACE_PLUGINS not in request.values:
+ return
+ return [
+ name for name in Constants.split(request.values[ARG.FACE_PLUGINS])
+ ]
+
+
def _limit(faces: List, limit: str = None) -> List:
"""
>>> _limit([1, 2, 3], None)
diff --git a/embedding-calculator/src/constants.py b/embedding-calculator/src/constants.py
index 593bd626ee..492013ffc9 100644
--- a/embedding-calculator/src/constants.py
+++ b/embedding-calculator/src/constants.py
@@ -14,24 +14,25 @@
import logging
-from src.services.utils.pyutils import get_env, Constants
+from src.services.utils.pyutils import get_env, get_env_split, get_env_bool, Constants
_DEFAULT_SCANNER = 'Facenet2018'
class ENV(Constants):
ML_PORT = int(get_env('ML_PORT', '3000'))
- SCANNER = get_env('SCANNER', _DEFAULT_SCANNER)
- SCANNERS = [SCANNER]
IMG_LENGTH_LIMIT = int(get_env('IMG_LENGTH_LIMIT', '640'))
+ FACE_DETECTION_PLUGIN = get_env('FACE_DETECTION_PLUGIN', 'facenet.FaceDetector')
+ CALCULATION_PLUGIN = get_env('CALCULATION_PLUGIN', 'facenet.Calculator')
+ EXTRA_PLUGINS = get_env_split('EXTRA_PLUGINS', '')
+
LOGGING_LEVEL_NAME = get_env('LOGGING_LEVEL_NAME', 'debug').upper()
IS_DEV_ENV = get_env('FLASK_ENV', 'production') == 'development'
BUILD_VERSION = get_env('APP_VERSION_STRING', 'dev')
GPU_IDX = int(get_env('GPU_IDX', '-1'))
- DETECTION_MODEL = get_env('DETECTION_MODEL', 'retinaface_r50_v1')
- CALCULATION_MODEL = get_env('CALCULATION_MODEL', 'arcface_r100_v1')
+ INTEL_OPTIMIZATION = get_env_bool('INTEL_OPTIMIZATION')
LOGGING_LEVEL = logging._nameToLevel[ENV.LOGGING_LEVEL_NAME]
diff --git a/embedding-calculator/src/docs/find_faces_post.yml b/embedding-calculator/src/docs/find_faces_post.yml
index 9a0d8de8ef..79b2bcd697 100644
--- a/embedding-calculator/src/docs/find_faces_post.yml
+++ b/embedding-calculator/src/docs/find_faces_post.yml
@@ -2,7 +2,7 @@ tags:
- Core
summary: 'Find faces in the given image and return their bounding boxes.'
description: 'Returns bounding boxes of detected faces on the image.'
-operationId: scanFacesPost
+operationId: findFacesPost
consumes:
- multipart/form-data
produces:
diff --git a/embedding-calculator/src/exceptions.py b/embedding-calculator/src/exceptions.py
index c955e47540..e55aafc765 100644
--- a/embedding-calculator/src/exceptions.py
+++ b/embedding-calculator/src/exceptions.py
@@ -37,20 +37,10 @@ class OneDimensionalImageIsGivenError(BadRequest):
description = "Given image has only one dimension"
-class MoreThanOneFaceFoundError(BadRequest):
- description = "Found more than one face in the given image"
-
-
class ClassifierIsAlreadyTrainingError(Locked):
description = "Classifier training is already in progress"
-class NoTrainedEmbeddingClassifierFoundError(BadRequest):
- description = "No classifier model is yet trained, please train a classifier first. If the problem persists, " \
- "check the amount of unique faces saved, and whether all face embeddings have been migrated to " \
- f"version '{ENV.SCANNER}'"
-
-
class NoFileFoundInDatabaseError(InternalServerError):
description = "File is not found in the database"
diff --git a/embedding-calculator/src/services/dto/plugin_result.py b/embedding-calculator/src/services/dto/plugin_result.py
new file mode 100644
index 0000000000..e7a95a72a5
--- /dev/null
+++ b/embedding-calculator/src/services/dto/plugin_result.py
@@ -0,0 +1,50 @@
+import attr
+from typing import Tuple, List, Optional, Dict
+
+from src.services.dto.bounding_box import BoundingBoxDTO
+from src.services.dto.json_encodable import JSONEncodable
+from src.services.imgtools.types import Array1D, Array3D
+
+
+class PluginResultDTO(JSONEncodable):
+ def to_json(self) -> dict:
+ """ Serialize only public properties """
+ return {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
+
+
+@attr.s(auto_attribs=True, frozen=True)
+class EmbeddingDTO(PluginResultDTO):
+ embedding: Array1D
+
+
+@attr.s(auto_attribs=True, frozen=True)
+class GenderDTO(PluginResultDTO):
+ gender: str
+ gender_probability: float = attr.ib(converter=float, default=None)
+
+
+@attr.s(auto_attribs=True, frozen=True)
+class AgeDTO(PluginResultDTO):
+ age: Tuple[int, int]
+ age_probability: float = attr.ib(converter=float, default=None)
+
+
+@attr.s(auto_attribs=True)
+class FaceDTO(PluginResultDTO):
+ box: BoundingBoxDTO
+ _img: Optional[Array3D]
+ _face_img: Optional[Array3D]
+ _plugins_dto: List[PluginResultDTO] = attr.Factory(list)
+ execution_time: Dict[str, float] = attr.Factory(dict)
+
+ def to_json(self):
+ data = super().to_json()
+ for plugin_dto in self._plugins_dto:
+ data.update(plugin_dto.to_json())
+ return data
+
+ @property
+ def embedding(self):
+ for dto in self._plugins_dto:
+ if isinstance(dto, EmbeddingDTO):
+ return dto.embedding
diff --git a/embedding-calculator/src/services/dto/scanned_face.py b/embedding-calculator/src/services/dto/scanned_face.py
index b21cc9f63f..d44b9982e2 100644
--- a/embedding-calculator/src/services/dto/scanned_face.py
+++ b/embedding-calculator/src/services/dto/scanned_face.py
@@ -12,7 +12,7 @@
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
-from typing import Union
+from typing import Union, Optional
import attr
@@ -22,23 +22,27 @@
from src.services.imgtools.types import Array1D, Array3D
+@attr.s(auto_attribs=True)
+class Face(JSONEncodable):
+ _img: Optional[Array3D]
+ _face_img: Optional[Array3D]
+ box: BoundingBoxDTO
+
+
@attr.s(auto_attribs=True, frozen=True)
class ScannedFaceDTO(JSONEncodable):
box: BoundingBoxDTO
embedding: Array1D
-class ScannedFace(JSONEncodable):
- def __init__(self, box: BoundingBoxDTO, embedding: Array1D, img: Union[Array3D, None], face_img: Array3D = None):
- self.box = box
- self.embedding = embedding
- self.img = img
- self._face_img = face_img
+@attr.s(auto_attribs=True)
+class ScannedFace(Face):
+ embedding: Array1D
@property
def face_img(self):
if not self._face_img:
- self._face_img = crop_img(self.img, self.box)
+ self._face_img = crop_img(self._img, self.box)
return self._face_img
@property
@@ -53,4 +57,5 @@ def from_request(cls, result):
y_min=box_result['y_min'],
y_max=box_result['y_max'],
probability=box_result['probability']),
- embedding=result['embedding'], img=None)
+ embedding=result['embedding'],
+ img=None, face_img=None)
diff --git a/embedding-calculator/srcext/facenet/__init__.py b/embedding-calculator/src/services/facescan/plugins/__init__.py
similarity index 100%
rename from embedding-calculator/srcext/facenet/__init__.py
rename to embedding-calculator/src/services/facescan/plugins/__init__.py
diff --git a/embedding-calculator/src/services/facescan/plugins/base.py b/embedding-calculator/src/services/facescan/plugins/base.py
new file mode 100644
index 0000000000..c8ff51f3df
--- /dev/null
+++ b/embedding-calculator/src/services/facescan/plugins/base.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2020 the original author or authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+import os
+import logging
+import tempfile
+from time import time
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import List, Tuple, Optional
+from zipfile import ZipFile
+
+import attr
+import gdown
+from src.services.dto.bounding_box import BoundingBoxDTO
+from src.services.dto import plugin_result
+from src.services.imgtools.types import Array3D
+from src.services.facescan.plugins import exceptions
+
+
+logger = logging.getLogger(__name__)
+MODELS_ROOT = os.path.expanduser(os.path.join('~', '.models'))
+
+
+@attr.s(auto_attribs=True)
+class MLModel:
+ plugin: 'BasePlugin'
+ name: str
+ is_default: bool = False
+
+ def __attrs_post_init__(self):
+ """ Set first model as default """
+ if not self.name:
+ self.name = self.plugin.ml_models[0][0]
+ self.is_default = True
+
+ def __str__(self):
+ return self.name
+
+ @property
+ def path(self):
+ return Path(MODELS_ROOT) / self.plugin.backend / self.plugin.slug / self.name
+
+ def exists(self):
+ return os.path.exists(self.path)
+
+ def download_if_not_exists(self):
+ """
+ Download a zipped model from url and extract it to models directory.
+ """
+ if self.exists():
+ logger.debug(f'Already exists {self.plugin} model {self.name}')
+ return
+ logger.debug(f'Getting {self.plugin} model {self.name}')
+ url = dict(self.plugin.ml_models)[self.name]
+ with tempfile.NamedTemporaryFile() as tmpfile:
+ self._download(url, tmpfile)
+ self._extract(tmpfile.name)
+
+ @classmethod
+ def _download(cls, url: str, output):
+ return gdown.download(cls._prepare_url(url), output)
+
+ @staticmethod
+ def _prepare_url(url) -> str:
+ """ Convert Google Drive fileId to url """
+ if not url.startswith('http') and len(url) < 40:
+ return f'https://drive.google.com/uc?id={url}'
+ return url
+
+ def _extract(self, filename: str):
+ os.makedirs(self.path, exist_ok=True)
+ with ZipFile(filename, 'r') as zf:
+ for info in zf.infolist():
+ if info.is_dir():
+ continue
+ file_path = Path(self.path) / Path(info.filename).name
+ file_path.write_bytes(zf.read(info))
+
+
+class BasePlugin(ABC):
+ # pairs of model name and Google Drive fileID or URL to file
+ ml_models: Tuple[Tuple[str, str], ...] = ()
+ ml_model: Optional[MLModel] = None
+
+ def __new__(cls, ml_model_name: str = None):
+ """
+ Plugins might cache pre-trained models and neural networks in properties
+ so it has to be Singleton.
+ """
+ if not hasattr(cls, 'instance'):
+ cls.instance = super(BasePlugin, cls).__new__(cls)
+ if cls.instance.ml_models:
+ cls.instance.ml_model = MLModel(cls.instance, ml_model_name)
+ return cls.instance
+
+ @property
+ @abstractmethod
+ def slug(self):
+ pass
+
+ @property
+ def backend(self) -> str:
+ return self.__class__.__module__.rsplit('.', 1)[-1]
+
+ @property
+ def name(self) -> str:
+ return f'{self.backend}.{self.__class__.__name__}'
+
+ def __str__(self):
+ if self.ml_model and not self.ml_model.is_default:
+ return f'{self.name}@{self.ml_model.name}'
+ else:
+ return self.name
+
+ @abstractmethod
+ def __call__(self, face_img: Array3D) -> plugin_result.PluginResultDTO:
+ raise NotImplementedError
+
+
+class BaseFaceDetector(BasePlugin):
+ slug = 'detector'
+ IMAGE_SIZE: int
+ face_plugins: List[BasePlugin] = []
+
+ def __call__(self, img: Array3D, det_prob_threshold: float = None,
+ face_plugins: Tuple[BasePlugin] = ()):
+ """ Returns cropped and normalized faces."""
+ faces = self._fetch_faces(img, det_prob_threshold)
+ for face in faces:
+ self._apply_face_plugins(face, face_plugins)
+ return faces
+
+ def _fetch_faces(self, img: Array3D, det_prob_threshold: float = None):
+ start = time()
+ boxes = self.find_faces(img, det_prob_threshold)
+ return [
+ plugin_result.FaceDTO(
+ img=img, face_img=self.crop_face(img, box), box=box,
+ execution_time={self.slug: (time() - start) / len(boxes)}
+ ) for box in boxes
+ ]
+
+ def _apply_face_plugins(self, face: plugin_result.FaceDTO,
+ face_plugins: Tuple[BasePlugin]):
+ for plugin in face_plugins:
+ start = time()
+ try:
+ result_dto = plugin(face._face_img)
+ face._plugins_dto.append(result_dto)
+ except Exception as e:
+ raise exceptions.PluginError(f'{plugin} error - {e}')
+ else:
+ face.execution_time[plugin.slug] = time() - start
+
+ @abstractmethod
+ def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[BoundingBoxDTO]:
+ """ Find face bounding boxes, without calculating embeddings"""
+ raise NotImplementedError
+
+ @abstractmethod
+ def crop_face(self, img: Array3D, box: BoundingBoxDTO) -> Array3D:
+ """ Crop face by bounding box and resize/squish it """
+ raise NotImplementedError
+
+
+class BaseCalculator(BasePlugin):
+ slug = 'calculator'
+
+ DIFFERENCE_THRESHOLD: float
+
+ def __call__(self, face_img: Array3D):
+ return plugin_result.EmbeddingDTO(
+ embedding=self.calc_embedding(face_img)
+ )
+
+ @abstractmethod
+ def calc_embedding(self, face_img: Array3D) -> Array3D:
+ """ Calculate embedding of a given face """
+ raise NotImplementedError
diff --git a/embedding-calculator/src/services/facescan/plugins/conftest.py b/embedding-calculator/src/services/facescan/plugins/conftest.py
new file mode 100644
index 0000000000..ce396b7346
--- /dev/null
+++ b/embedding-calculator/src/services/facescan/plugins/conftest.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2020 the original author or authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+import os
+from importlib.util import find_spec
+
+modules_by_lib = {
+ 'tensorflow': ('facenet', 'rude_carnie'),
+ 'mxnet': ('insightface',)
+}
+modules_to_skip = []
+for lib, modules in modules_by_lib.items():
+ if find_spec(lib) is None:
+ modules_to_skip.extend(modules)
+
+
+def pytest_ignore_collect(path):
+ _, tail = os.path.split(path)
+ for module in modules_to_skip:
+ if tail.startswith(module):
+ return True
diff --git a/embedding-calculator/src/services/facescan/scanner/insightface/__init__.py b/embedding-calculator/src/services/facescan/plugins/exceptions.py
similarity index 85%
rename from embedding-calculator/src/services/facescan/scanner/insightface/__init__.py
rename to embedding-calculator/src/services/facescan/plugins/exceptions.py
index 301293cd08..0522c27535 100644
--- a/embedding-calculator/src/services/facescan/scanner/insightface/__init__.py
+++ b/embedding-calculator/src/services/facescan/plugins/exceptions.py
@@ -1,13 +1,20 @@
# Copyright (c) 2020 the original author or authors
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# https://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
+
+class PluginError(RuntimeError):
+ pass
+
+
+class ModelImportException(PluginError):
+ pass
diff --git a/embedding-calculator/src/services/facescan/scanner/facenet/__init__.py b/embedding-calculator/src/services/facescan/plugins/facenet/__init__.py
similarity index 90%
rename from embedding-calculator/src/services/facescan/scanner/facenet/__init__.py
rename to embedding-calculator/src/services/facescan/plugins/facenet/__init__.py
index 301293cd08..b73206a064 100644
--- a/embedding-calculator/src/services/facescan/scanner/facenet/__init__.py
+++ b/embedding-calculator/src/services/facescan/plugins/facenet/__init__.py
@@ -1,13 +1,15 @@
# Copyright (c) 2020 the original author or authors
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# https://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
+
+requirements = ('tensorflow~=1.15.4', 'facenet~=1.0.5')
diff --git a/embedding-calculator/src/services/facescan/scanner/facenet/facenet.py b/embedding-calculator/src/services/facescan/plugins/facenet/facenet.py
similarity index 76%
rename from embedding-calculator/src/services/facescan/scanner/facenet/facenet.py
rename to embedding-calculator/src/services/facescan/plugins/facenet/facenet.py
index d9bbb80791..dce7321978 100644
--- a/embedding-calculator/src/services/facescan/scanner/facenet/facenet.py
+++ b/embedding-calculator/src/services/facescan/plugins/facenet/facenet.py
@@ -1,11 +1,11 @@
# Copyright (c) 2020 the original author or authors
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# https://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
@@ -19,34 +19,33 @@
import numpy as np
import tensorflow as tf
-from cached_property import cached_property
-from srcext.facenet.align import detect_face
from tensorflow.python.platform import gfile
+from cached_property import cached_property
+from facenet.src.align import detect_face
from src.constants import ENV
from src.services.dto.bounding_box import BoundingBoxDTO
-from src.services.dto.scanned_face import ScannedFace
from src.services.facescan.imgscaler.imgscaler import ImgScaler
-from src.services.facescan.scanner.facescanner import FaceScanner
from src.services.imgtools.proc_img import crop_img, squish_img
from src.services.imgtools.types import Array3D
from src.services.utils.pyutils import get_current_dir
+from src.services.facescan.plugins import base
+
CURRENT_DIR = get_current_dir(__file__)
+
logger = logging.getLogger(__name__)
_EmbeddingCalculator = namedtuple('_EmbeddingCalculator', 'graph sess')
_FaceDetectionNets = namedtuple('_FaceDetectionNets', 'pnet rnet onet')
-class Facenet2018(FaceScanner):
- ID = 'Facenet2018'
+class FaceDetector(base.BaseFaceDetector):
BATCH_SIZE = 25
FACE_MIN_SIZE = 20
SCALE_FACTOR = 0.709
BOX_MARGIN = 32
IMAGE_SIZE = 160
IMG_LENGTH_LIMIT = ENV.IMG_LENGTH_LIMIT
- EMBEDDING_MODEL_PATH = CURRENT_DIR / 'model' / 'embedding_calc_model_20180402.pb'
# detection settings
det_prob_threshold = 0.65
@@ -54,22 +53,15 @@ class Facenet2018(FaceScanner):
det_threshold_b = 0.7059968943
det_threshold_c = 0.5506904359
- @cached_property
- def _embedding_calculator(self):
- with tf.Graph().as_default() as graph:
- graph_def = tf.GraphDef()
- with gfile.FastGFile(str(self.EMBEDDING_MODEL_PATH), 'rb') as f:
- model = f.read()
- graph_def.ParseFromString(model)
- tf.import_graph_def(graph_def, name='')
- return _EmbeddingCalculator(graph=graph, sess=tf.Session(graph=graph))
-
@cached_property
def _face_detection_nets(self):
with tf.Graph().as_default():
sess = tf.Session()
return _FaceDetectionNets(*detect_face.create_mtcnn(sess, None))
+ def crop_face(self, img: Array3D, box: BoundingBoxDTO) -> Array3D:
+ return squish_img(crop_img(img, box), (self.IMAGE_SIZE, self.IMAGE_SIZE))
+
def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[BoundingBoxDTO]:
if det_prob_threshold is None:
det_prob_threshold = self.det_prob_threshold
@@ -105,11 +97,40 @@ def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[Bou
filtered_bounding_boxes.append(box)
return filtered_bounding_boxes
+
+class Calculator(base.BaseCalculator):
+ ml_models = (
+ # VGGFace2 training set, 0.9965 LFW accuracy
+ ('20180402-114759', '1im5Qq006ZEV_tViKh3cgia_Q4jJ13bRK'),
+ # CASIA-WebFace training set, 0.9905 LFW accuracy
+ ('20180408-102900', '100w4JIUz44Tkwte9F-wEH0DOFsY-bPaw'),
+ )
+ BATCH_SIZE = 25
+ DIFFERENCE_THRESHOLD = 0.2
+
+ @property
+ def ml_model_file(self):
+ return str(self.ml_model.path / f'{self.ml_model.name}.pb')
+
+ def calc_embedding(self, face_img: Array3D) -> Array3D:
+ return self._calculate_embeddings([face_img])[0]
+
+ @cached_property
+ def _embedding_calculator(self):
+ with tf.Graph().as_default() as graph:
+ graph_def = tf.GraphDef()
+ with gfile.FastGFile(self.ml_model_file, 'rb') as f:
+ model = f.read()
+ graph_def.ParseFromString(model)
+ tf.import_graph_def(graph_def, name='')
+ return _EmbeddingCalculator(graph=graph, sess=tf.Session(graph=graph))
+
def _calculate_embeddings(self, cropped_images):
"""Run forward pass to calculate embeddings"""
- graph_images_placeholder = self._embedding_calculator.graph.get_tensor_by_name("input:0")
- graph_embeddings = self._embedding_calculator.graph.get_tensor_by_name("embeddings:0")
- graph_phase_train_placeholder = self._embedding_calculator.graph.get_tensor_by_name("phase_train:0")
+ calc_model = self._embedding_calculator
+ graph_images_placeholder = calc_model.graph.get_tensor_by_name("input:0")
+ graph_embeddings = calc_model.graph.get_tensor_by_name("embeddings:0")
+ graph_phase_train_placeholder = calc_model.graph.get_tensor_by_name("phase_train:0")
embedding_size = graph_embeddings.get_shape()[1]
image_count = len(cropped_images)
batches_per_epoch = int(math.ceil(1.0 * image_count / self.BATCH_SIZE))
@@ -118,18 +139,6 @@ def _calculate_embeddings(self, cropped_images):
start_index = i * self.BATCH_SIZE
end_index = min((i + 1) * self.BATCH_SIZE, image_count)
feed_dict = {graph_images_placeholder: cropped_images, graph_phase_train_placeholder: False}
- embeddings[start_index:end_index, :] = self._embedding_calculator.sess.run(graph_embeddings,
- feed_dict=feed_dict)
+ embeddings[start_index:end_index, :] = calc_model.sess.run(
+ graph_embeddings, feed_dict=feed_dict)
return embeddings
-
- def scan(self, img: Array3D, det_prob_threshold: float = None) -> List[ScannedFace]:
- scanned_faces = []
- for box in self.find_faces(img, det_prob_threshold):
- face_img = squish_img(crop_img(img, box), (self.IMAGE_SIZE, self.IMAGE_SIZE))
- scanned_faces.append(
- ScannedFace(
- embedding=self._calculate_embeddings([face_img])[0],
- box=box, img=img, face_img=face_img
- )
- )
- return scanned_faces
diff --git a/embedding-calculator/src/services/facescan/plugins/insightface/__init__.py b/embedding-calculator/src/services/facescan/plugins/insightface/__init__.py
new file mode 100644
index 0000000000..dba69f354c
--- /dev/null
+++ b/embedding-calculator/src/services/facescan/plugins/insightface/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2020 the original author or authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+
+from src.constants import ENV
+from src.services.utils.pyutils import get_env
+
+
+def get_requirements():
+ cuda_version = get_env('CUDA', '').replace('.', '')
+
+ mxnet_lib = 'mxnet-'
+ if ENV.GPU_IDX > -1 and cuda_version:
+ mxnet_lib += f"cu{cuda_version}"
+ if ENV.INTEL_OPTIMIZATION:
+ mxnet_lib += 'mkl'
+ mxnet_lib = mxnet_lib.rstrip('-')
+ return (
+ f'{mxnet_lib}<1.7',
+ 'insightface==0.1.5',
+ )
+
+requirements = get_requirements()
diff --git a/embedding-calculator/src/services/facescan/scanner/insightface/insightface.py b/embedding-calculator/src/services/facescan/plugins/insightface/insightface.py
similarity index 56%
rename from embedding-calculator/src/services/facescan/scanner/insightface/insightface.py
rename to embedding-calculator/src/services/facescan/plugins/insightface/insightface.py
index 23fcf14892..ff06b38534 100644
--- a/embedding-calculator/src/services/facescan/scanner/insightface/insightface.py
+++ b/embedding-calculator/src/services/facescan/plugins/insightface/insightface.py
@@ -1,11 +1,11 @@
# Copyright (c) 2020 the original author or authors
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# https://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
@@ -20,26 +20,19 @@
import numpy as np
from cached_property import cached_property
from insightface.app import FaceAnalysis
-from insightface.model_zoo import model_zoo, model_store, face_recognition, face_detection
+from insightface.model_zoo import (model_store, face_detection,
+ face_recognition, face_genderage)
from insightface.utils import face_align
from src.constants import ENV
from src.services.dto.bounding_box import BoundingBoxDTO
-from src.services.dto.scanned_face import ScannedFace
from src.services.facescan.imgscaler.imgscaler import ImgScaler
-from src.services.facescan.scanner.facescanner import FaceScanner
+from src.services.facescan.plugins import base, exceptions
+from src.services.dto import plugin_result
from src.services.imgtools.types import Array3D
-logger = logging.getLogger(__name__)
-
-def _get_model_file(name):
- """ Return location for the pretrained on local file system.
- InsightFace `get_model_file` works only with build in models.
- """
- root = os.path.expanduser(os.path.join('~', '.insightface', 'models'))
- dir_path = os.path.join(root, name)
- return model_store.find_params_file(dir_path)
+logger = logging.getLogger(__name__)
@attr.s(auto_attribs=True, frozen=True)
@@ -60,55 +53,43 @@ def scaled(self, coefficient: float) -> 'InsightFaceBoundingBox':
landmark=self.landmark * coefficient)
+class InsightFaceMixin:
+ _CTX_ID = ENV.GPU_IDX
+ _NMS = 0.4
+
+ def get_model_file(self, ml_model: base.MLModel):
+ if not ml_model.exists():
+ raise exceptions.ModelImportException(
+ f'Model {ml_model.name} does not exists')
+ return model_store.find_params_file(ml_model.path)
+
+
class DetectionOnlyFaceAnalysis(FaceAnalysis):
rec_model = None
ga_model = None
- def __init__(self, det_name):
- try:
- self.det_model = model_zoo.get_model(det_name)
- except ValueError:
- file = _get_model_file(det_name)
- self.det_model = face_detection.FaceDetector(file, 'net3')
+ def __init__(self, file):
+ self.det_model = face_detection.FaceDetector(file, 'net3')
-class InsightFace(FaceScanner):
- ID = 'InsightFace'
- DETECTION_MODEL_NAME = ENV.DETECTION_MODEL
- CALCULATION_MODEL_NAME = ENV.CALCULATION_MODEL
- IMG_LENGTH_LIMIT = ENV.IMG_LENGTH_LIMIT
+class FaceDetector(InsightFaceMixin, base.BaseFaceDetector):
+ ml_models = (
+ ('retinaface_r50_v1', '1uyiIvAYhVPeTjHa8Gm7TfNXIGM5jqrMQ'),
+ ('retinaface_mnet025_v1', '1h5rHDGE7qXC3jZwphObh9mW55YQYKY8Y'),
+ ('retinaface_mnet025_v2', '1lAnFcBXoMKqE-SkZKTmi6MsYAmzG0tFw'),
+ )
- _CTX_ID = ENV.GPU_IDX
- # detection settings
- _NMS = 0.4
+ IMG_LENGTH_LIMIT = ENV.IMG_LENGTH_LIMIT
+ IMAGE_SIZE = 112
det_prob_threshold = 0.8
@cached_property
def _detection_model(self):
- model = DetectionOnlyFaceAnalysis(self.DETECTION_MODEL_NAME)
+ model_file = self.get_model_file(self.ml_model)
+ model = DetectionOnlyFaceAnalysis(model_file)
model.prepare(ctx_id=self._CTX_ID, nms=self._NMS)
return model
- @cached_property
- def _calculation_model(self):
- name = self.CALCULATION_MODEL_NAME
- try:
- model = model_zoo.get_model(name)
- except ValueError:
- file = _get_model_file(name)
- model = face_recognition.FaceRecognition(name, True, file)
- model.prepare(ctx_id=self._CTX_ID)
- return model
-
- def scan(self, img: Array3D, det_prob_threshold: float = None) -> List[ScannedFace]:
- scanned_faces = []
- for box in self.find_faces(img, det_prob_threshold):
- face_img = face_align.norm_crop(img, landmark=box.landmark)
- embedding = self._calculation_model.get_embedding(face_img).flatten()
- scanned_faces.append(ScannedFace(box=box, embedding=embedding,
- img=img, face_img=face_img))
- return scanned_faces
-
def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[InsightFaceBoundingBox]:
if det_prob_threshold is None:
det_prob_threshold = self.det_prob_threshold
@@ -132,3 +113,58 @@ def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[Ins
logger.debug(f"Found: {box.dto}")
boxes.append(box)
return boxes
+
+ def crop_face(self, img: Array3D, box: InsightFaceBoundingBox) -> Array3D:
+ return face_align.norm_crop(img, landmark=box.landmark,
+ image_size=self.IMAGE_SIZE)
+
+
+class Calculator(InsightFaceMixin, base.BaseCalculator):
+ ml_models = (
+ ('arcface_r100_v1', '11xFaEHIQLNze3-2RUV1cQfT-q6PKKfYp'),
+ ('arcface_resnet34', '1J9hqSWqZz6YvMMNrDrmrzEW9anhvdKuC'),
+ ('arcface_resnet50', '1gNuvRNHCNgvFtz7SjhW82v2-znlAYaRO'),
+ ('arcface_mobilefacenet', '17TpxpyHuUc1ZTm3RIbfvhnBcZqhyKszV'),
+ ('arcface-r50-msfdrop75', '1ECp5XrLgfEAnwyTYFEhJgIsOAw6KaHa7'),
+ ('arcface-r100-msfdrop75', '1EYTMxgcNdlvoL1fSC8N1zkaWrX75ZoNL'),
+ )
+
+ DIFFERENCE_THRESHOLD = 400
+
+ def calc_embedding(self, face_img: Array3D) -> Array3D:
+ return self._calculation_model.get_embedding(face_img).flatten()
+
+ @cached_property
+ def _calculation_model(self):
+ model_file = self.get_model_file(self.ml_model)
+ model = face_recognition.FaceRecognition(
+ self.ml_model.name, True, model_file)
+ model.prepare(ctx_id=self._CTX_ID)
+ return model
+
+
+@attr.s(auto_attribs=True, frozen=True)
+class GenderAgeDTO(plugin_result.PluginResultDTO):
+ gender: str
+ age: Tuple[int, int]
+
+
+class GenderAgeDetector(InsightFaceMixin, base.BasePlugin):
+ slug = 'gender_age'
+ ml_models = (
+ ('genderage_v1', '1ggNFFqpe0abWz6V1A82rnxD6fyxB8W2c'),
+ )
+
+ GENDERS = ('female', 'male')
+
+ def __call__(self, face_img: Array3D):
+ gender, age = self._genderage_model.get(face_img)
+ return GenderAgeDTO(gender=self.GENDERS[int(gender)], age=(age, age))
+
+ @cached_property
+ def _genderage_model(self):
+ model_file = self.get_model_file(self.ml_model)
+ model = face_genderage.FaceGenderage(
+ self.ml_model.name, True, model_file)
+ model.prepare(ctx_id=self._CTX_ID)
+ return model
diff --git a/embedding-calculator/src/services/facescan/plugins/managers.py b/embedding-calculator/src/services/facescan/plugins/managers.py
new file mode 100644
index 0000000000..f863c81764
--- /dev/null
+++ b/embedding-calculator/src/services/facescan/plugins/managers.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2020 the original author or authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+from collections import defaultdict
+from importlib import import_module
+from typing import List, Type, Dict, Tuple
+from types import ModuleType
+from cached_property import cached_property
+
+from src import constants
+from src.services.facescan.plugins import base
+
+
+ML_MODEL_SEPARATOR = '@'
+
+
+def import_classes(class_path: str):
+ module, class_name = class_path.rsplit('.', 1)
+ return getattr(import_module(module, __package__), class_name)
+
+
+class PluginManager:
+ plugins_modules: Dict[ModuleType, List[str]]
+
+ def __init__(self):
+ self.plugins_modules = defaultdict(list)
+ for plugin_name in self.get_plugins_names():
+ module = import_module(f'{__package__}.{plugin_name.split(".")[0]}')
+ self.plugins_modules[module].append(plugin_name)
+
+ @property
+ def requirements(self):
+ requirements = set()
+ for module in self.plugins_modules:
+ requirements |= set(module.requirements)
+ return requirements
+
+ def get_plugins_names(self):
+ return list(filter(None, [
+ constants.ENV.FACE_DETECTION_PLUGIN,
+ constants.ENV.CALCULATION_PLUGIN,
+ *constants.ENV.EXTRA_PLUGINS
+ ]))
+
+ @cached_property
+ def plugins(self):
+ plugins = []
+ for module, plugins_names in self.plugins_modules.items():
+ for pl_name in plugins_names:
+ mlmodel_name = None
+ if ML_MODEL_SEPARATOR in pl_name:
+ pl_name, mlmodel_name = pl_name.split(ML_MODEL_SEPARATOR)
+ pl_path = f'{module.__package__}.{pl_name}'
+ pl_class = import_classes(pl_path)
+ plugin = pl_class(ml_model_name=mlmodel_name)
+ plugins.append(plugin)
+ return plugins
+
+ @cached_property
+ def detector(self) -> base.BaseFaceDetector:
+ return [pl for pl in self.plugins
+ if isinstance(pl, base.BaseFaceDetector)][0]
+
+ @cached_property
+ def calculator(self) -> base.BaseCalculator:
+ return [pl for pl in self.plugins
+ if isinstance(pl, base.BaseCalculator)][0]
+
+ @cached_property
+ def face_plugins(self) -> List[base.BasePlugin]:
+ return [pl for pl in self.plugins
+ if not isinstance(pl, base.BaseFaceDetector)]
+
+ def filter_face_plugins(self, slugs: List[str]) -> List[base.BasePlugin]:
+ return [pl for pl in self.face_plugins
+ if slugs is None or pl.slug in slugs]
+
+
+plugin_manager = PluginManager()
diff --git a/embedding-calculator/src/services/facescan/plugins/setup.py b/embedding-calculator/src/services/facescan/plugins/setup.py
new file mode 100644
index 0000000000..39591fa559
--- /dev/null
+++ b/embedding-calculator/src/services/facescan/plugins/setup.py
@@ -0,0 +1,22 @@
+import subprocess
+import sys
+
+from src.services.facescan.plugins.managers import plugin_manager
+
+
+def install_requirements(requirements: set):
+ print(f'Install dependencies: {requirements}')
+ cmd = f"{sys.executable} -m pip install --no-cache-dir {' '.join(requirements)}"
+ try:
+ subprocess.run(cmd.split(), check=True)
+ except subprocess.CalledProcessError:
+ exit(1)
+
+
+if __name__ == '__main__':
+ install_requirements(plugin_manager.requirements)
+
+ for plugin in plugin_manager.plugins:
+ if plugin.ml_model:
+ print(f'Checking models for {plugin}...')
+ plugin.ml_model.download_if_not_exists()
diff --git a/embedding-calculator/src/services/facescan/scanner/facenet/model/embedding_calc_model_20180402.pb b/embedding-calculator/src/services/facescan/scanner/facenet/model/embedding_calc_model_20180402.pb
deleted file mode 100644
index 39b4ed7630..0000000000
Binary files a/embedding-calculator/src/services/facescan/scanner/facenet/model/embedding_calc_model_20180402.pb and /dev/null differ
diff --git a/embedding-calculator/src/services/facescan/scanner/facescanner.py b/embedding-calculator/src/services/facescan/scanner/facescanner.py
index 4ed8f49b89..2e24ec90f4 100644
--- a/embedding-calculator/src/services/facescan/scanner/facescanner.py
+++ b/embedding-calculator/src/services/facescan/scanner/facescanner.py
@@ -17,10 +17,10 @@
import numpy as np
-from src.exceptions import MoreThanOneFaceFoundError, NoFaceFoundError
from src.services.dto.bounding_box import BoundingBoxDTO
from src.services.dto.scanned_face import ScannedFace
from src.services.imgtools.types import Array3D
+from src.services.facescan.plugins.managers import plugin_manager
class FaceScanner(ABC):
@@ -44,14 +44,30 @@ def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[Bou
""" Find face bounding boxes, without calculating embeddings"""
raise NotImplementedError
- def scan_one(self, img: Array3D,
- det_prob_threshold: float = None) -> ScannedFace:
- results = self.scan(img=img, det_prob_threshold=det_prob_threshold)
- if len(results) > 1:
- raise MoreThanOneFaceFoundError
- if len(results) == 0:
- raise NoFaceFoundError
- return results[0]
+ @property
+ @abstractmethod
+ def difference_threshold(self) -> float:
+ """ Difference threshold between two embeddings"""
+ raise NotImplementedError
+
+
+class ScannerWithPluggins(FaceScanner):
+ """
+ Class for backward compatibility.
+ The scanner only performs face detection and embedding calculation.
+ """
+ ID = "ScannerWithPlugins"
+
+ def scan(self, img: Array3D, det_prob_threshold: float = None):
+ return plugin_manager.detector(img, det_prob_threshold,
+ [plugin_manager.calculator])
+
+ def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[BoundingBoxDTO]:
+ return plugin_manager.detector.find_faces(img, det_prob_threshold)
+
+ @property
+ def difference_threshold(self):
+ return plugin_manager.calculator.DIFFERENCE_THRESHOLD
class MockScanner(FaceScanner):
diff --git a/embedding-calculator/src/services/facescan/scanner/facescanners.py b/embedding-calculator/src/services/facescan/scanner/facescanners.py
index 0fcf661661..b916b8320a 100644
--- a/embedding-calculator/src/services/facescan/scanner/facescanners.py
+++ b/embedding-calculator/src/services/facescan/scanner/facescanners.py
@@ -12,19 +12,11 @@
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
-from src.constants import ENV_MAIN
-from src.services.facescan.scanner.facescanner import MockScanner
+from src.services.facescan.scanner import facescanner
-_ALL_SCANNERS = [MockScanner]
-
-if ENV_MAIN.SCANNER == 'InsightFace':
- from src.services.facescan.scanner.insightface.insightface import InsightFace
- _ALL_SCANNERS.append(InsightFace)
-
-if ENV_MAIN.SCANNER == 'Facenet2018':
- from src.services.facescan.scanner.facenet.facenet import Facenet2018
- _ALL_SCANNERS.append(Facenet2018)
+_ALL_SCANNERS = [facescanner.MockScanner, facescanner.ScannerWithPluggins]
id_2_face_scanner_cls = {backend.ID: backend for backend in _ALL_SCANNERS}
-TESTED_SCANNERS = [id_2_face_scanner_cls[k] for k in ENV_MAIN.SCANNERS]
-scanner = id_2_face_scanner_cls[ENV_MAIN.SCANNER]()
+TESTED_SCANNERS = [facescanner.ScannerWithPluggins]
+
+scanner = facescanner.ScannerWithPluggins()
diff --git a/embedding-calculator/src/services/facescan/scanner/test/_scanner_cache.py b/embedding-calculator/src/services/facescan/scanner/test/_cache.py
similarity index 66%
rename from embedding-calculator/src/services/facescan/scanner/test/_scanner_cache.py
rename to embedding-calculator/src/services/facescan/scanner/test/_cache.py
index 2740a5835f..750839f98e 100644
--- a/embedding-calculator/src/services/facescan/scanner/test/_scanner_cache.py
+++ b/embedding-calculator/src/services/facescan/scanner/test/_cache.py
@@ -13,18 +13,9 @@
# permissions and limitations under the License.
from functools import lru_cache
-from src.services.imgtools.read_img import read_img
+from src.services.imgtools.read_img import read_img as org_read_img
@lru_cache(maxsize=None)
-def get_scanner(scanner_cls):
- scanner = scanner_cls()
-
- @lru_cache(maxsize=None)
- def scan(img_path, *args, **kwargs):
- img = read_img(img_path)
- return scanner.scan_(img, *args, **kwargs)
-
- scanner.scan_ = scanner.scan
- scanner.scan = scan
- return scanner
+def read_img(img_path):
+ return org_read_img(img_path)
diff --git a/embedding-calculator/src/services/facescan/scanner/test/test_detector.py b/embedding-calculator/src/services/facescan/scanner/test/test_detector.py
index 80e2b02a65..030611e821 100644
--- a/embedding-calculator/src/services/facescan/scanner/test/test_detector.py
+++ b/embedding-calculator/src/services/facescan/scanner/test/test_detector.py
@@ -13,13 +13,14 @@
# permissions and limitations under the License.
import pytest
+from typing import Type, Union
from sample_images import IMG_DIR
from sample_images.annotations import SAMPLE_IMAGES
from src.services.dto.bounding_box import BoundingBoxDTO
from src.services.facescan.scanner.facescanner import FaceScanner
from src.services.facescan.scanner.facescanners import TESTED_SCANNERS
-from src.services.facescan.scanner.test._scanner_cache import get_scanner
+from src.services.facescan.scanner.test._cache import read_img
from src.services.facescan.scanner.test.calculate_errors import calculate_errors
from src.services.utils.pytestutils import is_sorted
@@ -27,8 +28,8 @@
@pytest.mark.integration
@pytest.mark.parametrize('scanner_cls', TESTED_SCANNERS)
def test__given_no_faces_img__when_scanned__then_returns_no_faces(scanner_cls):
- scanner: FaceScanner = get_scanner(scanner_cls)
- img = IMG_DIR / '017_0.jpg'
+ scanner: FaceScanner = scanner_cls()
+ img = read_img(IMG_DIR / '017_0.jpg')
result = scanner.scan(img)
@@ -43,8 +44,8 @@ def test__given_5face_img__when_scanned__then_returns_5_correct_bounding_boxes_s
BoundingBoxDTO(161, 36, 266, 160, 1),
BoundingBoxDTO(342, 160, 437, 268, 1),
BoundingBoxDTO(243, 174, 352, 309, 1)]
- scanner: FaceScanner = get_scanner(scanner_cls)
- img = IMG_DIR / '000_5.jpg'
+ scanner: FaceScanner = scanner_cls()
+ img = read_img(IMG_DIR / '000_5.jpg')
faces = scanner.scan(img)
@@ -56,8 +57,8 @@ def test__given_5face_img__when_scanned__then_returns_5_correct_bounding_boxes_s
@pytest.mark.integration
@pytest.mark.parametrize('scanner_cls', TESTED_SCANNERS)
def test__given_threshold_set_to_1__when_scanned__then_returns_no_faces(scanner_cls):
- scanner: FaceScanner = get_scanner(scanner_cls)
- img = IMG_DIR / '000_5.jpg'
+ scanner: FaceScanner = scanner_cls()
+ img = read_img(IMG_DIR / '000_5.jpg')
result = scanner.scan(img, det_prob_threshold=1)
@@ -68,8 +69,8 @@ def test__given_threshold_set_to_1__when_scanned__then_returns_no_faces(scanner_
@pytest.mark.parametrize('scanner_cls', TESTED_SCANNERS)
@pytest.mark.parametrize('row', (k for k in SAMPLE_IMAGES if k.include_to_tests))
def test__given_img__when_scanned__then_1_to_1_relationship_between_all_returned_boxes_and_faces(scanner_cls, row):
- scanner: FaceScanner = get_scanner(scanner_cls)
- img = IMG_DIR / row.img_name
+ scanner: FaceScanner = scanner_cls()
+ img = read_img(IMG_DIR / row.img_name)
scanned_faces = scanner.scan(img)
diff --git a/embedding-calculator/src/services/facescan/scanner/test/test_embedder.py b/embedding-calculator/src/services/facescan/scanner/test/test_embedder.py
index e22573d6da..c88788f20b 100644
--- a/embedding-calculator/src/services/facescan/scanner/test/test_embedder.py
+++ b/embedding-calculator/src/services/facescan/scanner/test/test_embedder.py
@@ -17,13 +17,9 @@
from sample_images import IMG_DIR, PERSON_B, PERSON_C
from src.services.facescan.scanner.facescanner import FaceScanner
from src.services.facescan.scanner.facescanners import TESTED_SCANNERS
-from src.services.facescan.scanner.test._scanner_cache import get_scanner
+from src.services.facescan.scanner.test._cache import read_img
from src.services.utils.pyutils import first_and_only
-DIFFERENCE_THRESHOLD = {
- 'InsightFace': 400,
- 'Facenet2018': 0.2
-}
def embeddings_are_equal(embedding1, embedding2, difference_threshold):
@@ -35,31 +31,32 @@ def embeddings_are_equal(embedding1, embedding2, difference_threshold):
@pytest.mark.integration
@pytest.mark.parametrize('scanner_cls', TESTED_SCANNERS)
def test__given_same_face_images__when_scanned__then_returns_same_embeddings(scanner_cls):
- scanner: FaceScanner = get_scanner(scanner_cls)
- img1 = IMG_DIR / PERSON_B[0]
- img2 = IMG_DIR / PERSON_B[1]
+ scanner: FaceScanner = scanner_cls()
+ img1 = read_img(IMG_DIR / PERSON_B[0])
+ img2 = read_img(IMG_DIR / PERSON_B[1])
emb1 = first_and_only(scanner.scan(img1)).embedding
emb2 = first_and_only(scanner.scan(img2)).embedding
- assert embeddings_are_equal(emb1, emb2, DIFFERENCE_THRESHOLD[scanner_cls.ID])
+ assert embeddings_are_equal(emb1, emb2, scanner.difference_threshold)
@pytest.mark.integration
@pytest.mark.parametrize('scanner_cls', TESTED_SCANNERS)
def test__given_diff_face_images__when_scanned__then_returns_diff_embeddings(scanner_cls):
- scanner: FaceScanner = get_scanner(scanner_cls)
- img1 = IMG_DIR / PERSON_B[0]
- img2 = IMG_DIR / PERSON_C[0]
+ scanner: FaceScanner = scanner_cls()
+ img1 = read_img(IMG_DIR / PERSON_B[0])
+ img2 = read_img(IMG_DIR / PERSON_C[0])
emb1 = first_and_only(scanner.scan(img1)).embedding
emb2 = first_and_only(scanner.scan(img2)).embedding
- assert not embeddings_are_equal(emb1, emb2, DIFFERENCE_THRESHOLD[scanner_cls.ID])
+ assert not embeddings_are_equal(emb1, emb2, scanner.difference_threshold)
@pytest.mark.integration
@pytest.mark.parametrize('scanner_cls', TESTED_SCANNERS)
def test__size_of_embeddings(scanner_cls):
- scanner: FaceScanner = get_scanner(scanner_cls)
- emb = first_and_only(scanner.scan(IMG_DIR / '007_B.jpg')).embedding
+ scanner: FaceScanner = scanner_cls()
+ img = read_img(IMG_DIR / PERSON_B[0])
+ emb = first_and_only(scanner.scan(img)).embedding
assert len(emb) == 512
diff --git a/embedding-calculator/src/services/facescan/scanner/test/test_gender_age.py.disabled b/embedding-calculator/src/services/facescan/scanner/test/test_gender_age.py.disabled
new file mode 100644
index 0000000000..741049c5eb
--- /dev/null
+++ b/embedding-calculator/src/services/facescan/scanner/test/test_gender_age.py.disabled
@@ -0,0 +1,38 @@
+# Copyright (c) 2020 the original author or authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+import pytest
+from typing import Type, Union
+
+from sample_images import IMG_DIR, annotations
+from src.services.facescan.scanner.facescanner import FaceScanner
+from src.services.facescan.scanner.facescanners import TESTED_SCANNERS
+from src.services.facescan.scanner.test._cache import read_img
+
+
+GENDER_AGE_SCANNERS = [s for s in TESTED_SCANNERS if issubclass(s, GenderAgeMixin)]
+GENDER_AGE_SAMPLE_IMAGES = [row for row in annotations.SAMPLE_IMAGES
+ if row.is_male is not None and row.age is not None]
+
+
+@pytest.mark.performance
+@pytest.mark.parametrize('scanner_cls', GENDER_AGE_SCANNERS)
+@pytest.mark.parametrize('row', GENDER_AGE_SAMPLE_IMAGES)
+def test__getting_gender_and_age(scanner_cls: Type[FaceScanner], row: annotations.Row):
+ scanner: Union[FaceScanner, GenderAgeMixin] = scanner_cls()
+
+ img = read_img(IMG_DIR / row.img_name)
+ face = scanner.genderage(img)[0]
+ assert face.is_male == row.is_male, f'{row.img_name}: Wrong gender - {face.is_male}'
+ assert abs(face.age - row.age) < 5, f'{row.img_name}: Age mismatched: {face.age} ~= {row.age}'
diff --git a/embedding-calculator/src/services/flask_/constants.py b/embedding-calculator/src/services/flask_/constants.py
index 2c217d6f98..81958f48c8 100644
--- a/embedding-calculator/src/services/flask_/constants.py
+++ b/embedding-calculator/src/services/flask_/constants.py
@@ -18,3 +18,4 @@
class ARG:
LIMIT = 'limit'
DET_PROB_THRESHOLD = 'det_prob_threshold'
+ FACE_PLUGINS = 'face_plugins'
diff --git a/embedding-calculator/srcext/facenet/LICENSE.md b/embedding-calculator/srcext/facenet/LICENSE.md
deleted file mode 100644
index 9804f4b06c..0000000000
--- a/embedding-calculator/srcext/facenet/LICENSE.md
+++ /dev/null
@@ -1,24 +0,0 @@
-Version: 2018-04-10
-Link: https://github.com/davidsandberg/facenet/commit/096ed770f163957c1e56efa7feeb194773920f6e
-
-MIT License
-
-Copyright (c) 2016 David Sandberg
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
diff --git a/embedding-calculator/srcext/facenet/README.md b/embedding-calculator/srcext/facenet/README.md
deleted file mode 100644
index 838134a964..0000000000
--- a/embedding-calculator/srcext/facenet/README.md
+++ /dev/null
@@ -1,58 +0,0 @@
-Version: 2018-04-10
-Link: https://github.com/davidsandberg/facenet/commit/096ed770f163957c1e56efa7feeb194773920f6e
-
-# Face Recognition using Tensorflow [![Build Status][travis-image]][travis]
-
-[travis-image]: http://travis-ci.org/davidsandberg/facenet.svg?branch=master
-[travis]: http://travis-ci.org/davidsandberg/facenet
-
-This is a TensorFlow implementation of the face recognizer described in the paper
-["FaceNet: A Unified Embedding for Face Recognition and Clustering"](http://arxiv.org/abs/1503.03832). The project also uses ideas from the paper ["Deep Face Recognition"](http://www.robots.ox.ac.uk/~vgg/publications/2015/Parkhi15/parkhi15.pdf) from the [Visual Geometry Group](http://www.robots.ox.ac.uk/~vgg/) at Oxford.
-
-## Compatibility
-The code is tested using Tensorflow r1.7 under Ubuntu 14.04 with Python 2.7 and Python 3.5. The test cases can be found [here](https://github.com/davidsandberg/facenet/tree/master/test) and the results can be found [here](http://travis-ci.org/davidsandberg/facenet).
-
-## News
-| Date | Update |
-|----------|--------|
-| 2018-04-10 | Added new models trained on Casia-WebFace and VGGFace2 (see below). Note that the models uses fixed image standardization (see [wiki](https://github.com/davidsandberg/facenet/wiki/Training-using-the-VGGFace2-dataset)). |
-| 2018-03-31 | Added a new, more flexible input pipeline as well as a bunch of minor updates. |
-| 2017-05-13 | Removed a bunch of older non-slim models. Moved the last bottleneck layer into the respective models. Corrected normalization of Center Loss. |
-| 2017-05-06 | Added code to [train a classifier on your own images](https://github.com/davidsandberg/facenet/wiki/Train-a-classifier-on-own-images). Renamed facenet_train.py to train_tripletloss.py and facenet_train_classifier.py to train_softmax.py. |
-| 2017-03-02 | Added pretrained models that generate 128-dimensional embeddings.|
-| 2017-02-22 | Updated to Tensorflow r1.0. Added Continuous Integration using Travis-CI.|
-| 2017-02-03 | Added models where only trainable variables has been stored in the checkpoint. These are therefore significantly smaller. |
-| 2017-01-27 | Added a model trained on a subset of the MS-Celeb-1M dataset. The LFW accuracy of this model is around 0.994. |
-| 2017‑01‑02 | Updated to run with Tensorflow r0.12. Not sure if it runs with older versions of Tensorflow though. |
-
-## Pre-trained models
-| Model name | LFW accuracy | Training dataset | Architecture |
-|-----------------|--------------|------------------|-------------|
-| [20180408-102900](https://drive.google.com/open?id=1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz) | 0.9905 | CASIA-WebFace | [Inception ResNet v1](https://github.com/davidsandberg/facenet/blob/master/src/models/inception_resnet_v1.py) |
-| [20180402-114759](https://drive.google.com/open?id=1EXPBSXwTaqrSC0OhUdXNmKSh9qJUQ55-) | 0.9965 | VGGFace2 | [Inception ResNet v1](https://github.com/davidsandberg/facenet/blob/master/src/models/inception_resnet_v1.py) |
-
-NOTE: If you use any of the models, please do not forget to give proper credit to those providing the training dataset as well.
-
-## Inspiration
-The code is heavily inspired by the [OpenFace](https://github.com/cmusatyalab/openface) implementation.
-
-## Training data
-The [CASIA-WebFace](http://www.cbsr.ia.ac.cn/english/CASIA-WebFace-Database.html) dataset has been used for training. This training set consists of total of 453 453 images over 10 575 identities after face detection. Some performance improvement has been seen if the dataset has been filtered before training. Some more information about how this was done will come later.
-The best performing model has been trained on the [VGGFace2](https://www.robots.ox.ac.uk/~vgg/data/vgg_face2/) dataset consisting of ~3.3M faces and ~9000 classes.
-
-## Pre-processing
-
-### Face alignment using MTCNN
-One problem with the above approach seems to be that the Dlib face detector misses some of the hard examples (partial occlusion, silhouettes, etc). This makes the training set too "easy" which causes the model to perform worse on other benchmarks.
-To solve this, other face landmark detectors has been tested. One face landmark detector that has proven to work very well in this setting is the
-[Multi-task CNN](https://kpzhang93.github.io/MTCNN_face_detection_alignment/index.html). A Matlab/Caffe implementation can be found [here](https://github.com/kpzhang93/MTCNN_face_detection_alignment) and this has been used for face alignment with very good results. A Python/Tensorflow implementation of MTCNN can be found [here](https://github.com/davidsandberg/facenet/tree/master/src/align). This implementation does not give identical results to the Matlab/Caffe implementation but the performance is very similar.
-
-## Running training
-Currently, the best results are achieved by training the model using softmax loss. Details on how to train a model using softmax loss on the CASIA-WebFace dataset can be found on the page [Classifier training of Inception-ResNet-v1](https://github.com/davidsandberg/facenet/wiki/Classifier-training-of-inception-resnet-v1) and .
-
-## Pre-trained models
-### Inception-ResNet-v1 model
-A couple of pretrained models are provided. They are trained using softmax loss with the Inception-Resnet-v1 model. The datasets has been aligned using [MTCNN](https://github.com/davidsandberg/facenet/tree/master/src/align).
-
-## Performance
-The accuracy on LFW for the model [20180402-114759](https://drive.google.com/open?id=1EXPBSXwTaqrSC0OhUdXNmKSh9qJUQ55-) is 0.99650+-0.00252. A description of how to run the test can be found on the page [Validate on LFW](https://github.com/davidsandberg/facenet/wiki/Validate-on-lfw). Note that the input images to the model need to be standardized using fixed image standardization (use the option `--use_fixed_image_standardization` when running e.g. `validate_on_lfw.py`).
\ No newline at end of file
diff --git a/embedding-calculator/srcext/facenet/align/__init__.py b/embedding-calculator/srcext/facenet/align/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/embedding-calculator/srcext/facenet/align/det1.npy b/embedding-calculator/srcext/facenet/align/det1.npy
deleted file mode 100644
index 7c05a2c562..0000000000
Binary files a/embedding-calculator/srcext/facenet/align/det1.npy and /dev/null differ
diff --git a/embedding-calculator/srcext/facenet/align/det2.npy b/embedding-calculator/srcext/facenet/align/det2.npy
deleted file mode 100644
index 85d5bf09c9..0000000000
Binary files a/embedding-calculator/srcext/facenet/align/det2.npy and /dev/null differ
diff --git a/embedding-calculator/srcext/facenet/align/det3.npy b/embedding-calculator/srcext/facenet/align/det3.npy
deleted file mode 100644
index 90d5ba9754..0000000000
Binary files a/embedding-calculator/srcext/facenet/align/det3.npy and /dev/null differ
diff --git a/embedding-calculator/srcext/facenet/align/detect_face.py b/embedding-calculator/srcext/facenet/align/detect_face.py
deleted file mode 100644
index e557dcaeba..0000000000
--- a/embedding-calculator/srcext/facenet/align/detect_face.py
+++ /dev/null
@@ -1,798 +0,0 @@
-""" Tensorflow implementation of the face detection / alignment algorithm found at
-https://github.com/kpzhang93/MTCNN_face_detection_alignment
-"""
-# Version: 2018-04-10
-# Link: https://github.com/davidsandberg/facenet/commit/096ed770f163957c1e56efa7feeb194773920f6e
-#
-#
-# MIT License
-#
-# Copyright (c) 2016 David Sandberg
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation _files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-# from math import floor
-import cv2
-import numpy as np
-import tensorflow as tf
-from six import string_types, iteritems
-
-
-def layer(op):
- """Decorator for composable network layers."""
-
- def layer_decorated(self, *args, **kwargs):
- # Automatically set a name if not provided.
- name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
- # Figure out the layer inputs.
- if len(self.terminals) == 0:
- raise RuntimeError('No input variables found for layer %s.' % name)
- elif len(self.terminals) == 1:
- layer_input = self.terminals[0]
- else:
- layer_input = list(self.terminals)
- # Perform the operation and get_embedding_classifier the output.
- layer_output = op(self, layer_input, *args, **kwargs)
- # Add to layer LUT.
- self.layers[name] = layer_output
- # This output is now the input for the next layer.
- self.feed(layer_output)
- # Return self for chained calls.
- return self
-
- return layer_decorated
-
-
-class Network(object):
-
- def __init__(self, inputs, trainable=True):
- # The input nodes for this network
- self.inputs = inputs
- # The current list of terminal nodes
- self.terminals = []
- # Mapping from layer names to layers
- self.layers = dict(inputs)
- # If true, the resulting variables are set as trainable
- self.trainable = trainable
-
- self.setup()
-
- def setup(self):
- """Construct the network. """
- raise NotImplementedError('Must be implemented by the subclass.')
-
- def load(self, data_path, session, ignore_missing=False):
- """Load network weights.
- data_path: The path to the numpy-serialized network weights
- session: The current TensorFlow session
- ignore_missing: If true, serialized weights for missing layers are ignored.
- """
- data_dict = np.load(data_path, encoding='latin1', allow_pickle=True).item() # pylint: disable=no-member
-
- for op_name in data_dict:
- with tf.variable_scope(op_name, reuse=True):
- for param_name, data in iteritems(data_dict[op_name]):
- try:
- var = tf.get_variable(param_name)
- session.run(var.assign(data))
- except ValueError:
- if not ignore_missing:
- raise
-
- def feed(self, *args):
- """Set the input(s) for the next operation by replacing the terminal nodes.
- The arguments can be either layer names or the actual layers.
- """
- assert len(args) != 0
- self.terminals = []
- for fed_layer in args:
- if isinstance(fed_layer, string_types):
- try:
- fed_layer = self.layers[fed_layer]
- except KeyError:
- raise KeyError('Unknown layer name fed: %s' % fed_layer)
- self.terminals.append(fed_layer)
- return self
-
- def get_output(self):
- """Returns the current network output."""
- return self.terminals[-1]
-
- def get_unique_name(self, prefix):
- """Returns an index-suffixed unique name for the given prefix.
- This is used for auto-generating layer names based on the type-prefix.
- """
- ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
- return '%s_%d' % (prefix, ident)
-
- def make_var(self, name, shape):
- """Creates a new TensorFlow variable."""
- return tf.get_variable(name, shape, trainable=self.trainable)
-
- def validate_padding(self, padding):
- """Verifies that the padding is one of the supported ones."""
- assert padding in ('SAME', 'VALID')
-
- @layer
- def conv(self,
- inp,
- k_h,
- k_w,
- c_o,
- s_h,
- s_w,
- name,
- relu=True,
- padding='SAME',
- group=1,
- biased=True):
- # Verify that the padding is acceptable
- self.validate_padding(padding)
- # Get the number of channels in the input
- c_i = int(inp.get_shape()[-1])
- # Verify that the grouping parameter is valid
- assert c_i % group == 0
- assert c_o % group == 0
- # Convolution for a given input and kernel
- convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
- with tf.variable_scope(name) as scope:
- kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
- # This is the common-case. Convolve the input without any further complications.
- output = convolve(inp, kernel)
- # Add the biases
- if biased:
- biases = self.make_var('biases', [c_o])
- output = tf.nn.bias_add(output, biases)
- if relu:
- # ReLU non-linearity
- output = tf.nn.relu(output, name=scope.name)
- return output
-
- @layer
- def prelu(self, inp, name):
- with tf.variable_scope(name):
- i = int(inp.get_shape()[-1])
- alpha = self.make_var('alpha', shape=(i,))
- output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
- return output
-
- @layer
- def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
- self.validate_padding(padding)
- return tf.nn.max_pool(inp,
- ksize=[1, k_h, k_w, 1],
- strides=[1, s_h, s_w, 1],
- padding=padding,
- name=name)
-
- @layer
- def fc(self, inp, num_out, name, relu=True):
- with tf.variable_scope(name):
- input_shape = inp.get_shape()
- if input_shape.ndims == 4:
- # The input is spatial. Vectorize it first.
- dim = 1
- for d in input_shape[1:].as_list():
- dim *= int(d)
- feed_in = tf.reshape(inp, [-1, dim])
- else:
- feed_in, dim = (inp, input_shape[-1].value)
- weights = self.make_var('weights', shape=[dim, num_out])
- biases = self.make_var('biases', [num_out])
- op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
- fc = op(feed_in, weights, biases, name=name)
- return fc
-
- """
- Multi dimensional softmax,
- refer to https://github.com/tensorflow/tensorflow/issues/210
- compute softmax along the dimension of target
- the native softmax only supports batch_size x dimension
- """
-
- @layer
- def softmax(self, target, axis, name=None):
- max_axis = tf.reduce_max(target, axis, keep_dims=True)
- target_exp = tf.exp(target - max_axis)
- normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
- softmax = tf.div(target_exp, normalize, name)
- return softmax
-
-
-class PNet(Network):
- def setup(self):
- (self.feed('data') # pylint: disable=no-value-for-parameter, no-member
- .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
- .prelu(name='PReLU1')
- .max_pool(2, 2, 2, 2, name='pool1')
- .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
- .prelu(name='PReLU2')
- .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
- .prelu(name='PReLU3')
- .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
- .softmax(3, name='prob1'))
-
- (self.feed('PReLU3') # pylint: disable=no-value-for-parameter
- .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
-
-
-class RNet(Network):
- def setup(self):
- (self.feed('data') # pylint: disable=no-value-for-parameter, no-member
- .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
- .prelu(name='prelu1')
- .max_pool(3, 3, 2, 2, name='pool1')
- .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
- .prelu(name='prelu2')
- .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
- .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
- .prelu(name='prelu3')
- .fc(128, relu=False, name='conv4')
- .prelu(name='prelu4')
- .fc(2, relu=False, name='conv5-1')
- .softmax(1, name='prob1'))
-
- (self.feed('prelu4') # pylint: disable=no-value-for-parameter
- .fc(4, relu=False, name='conv5-2'))
-
-
-class ONet(Network):
- def setup(self):
- (self.feed('data') # pylint: disable=no-value-for-parameter, no-member
- .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
- .prelu(name='prelu1')
- .max_pool(3, 3, 2, 2, name='pool1')
- .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
- .prelu(name='prelu2')
- .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
- .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
- .prelu(name='prelu3')
- .max_pool(2, 2, 2, 2, name='pool3')
- .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
- .prelu(name='prelu4')
- .fc(256, relu=False, name='conv5')
- .prelu(name='prelu5')
- .fc(2, relu=False, name='conv6-1')
- .softmax(1, name='prob1'))
-
- (self.feed('prelu5') # pylint: disable=no-value-for-parameter
- .fc(4, relu=False, name='conv6-2'))
-
- (self.feed('prelu5') # pylint: disable=no-value-for-parameter
- .fc(10, relu=False, name='conv6-3'))
-
-
-def create_mtcnn(sess, model_path):
- if not model_path:
- model_path, _ = os.path.split(os.path.realpath(__file__))
-
- with tf.variable_scope('pnet'):
- data = tf.placeholder(tf.float32, (None, None, None, 3), 'input')
- pnet = PNet({'data': data})
- pnet.load(os.path.join(model_path, 'det1.npy'), sess)
- with tf.variable_scope('rnet'):
- data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input')
- rnet = RNet({'data': data})
- rnet.load(os.path.join(model_path, 'det2.npy'), sess)
- with tf.variable_scope('onet'):
- data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input')
- onet = ONet({'data': data})
- onet.load(os.path.join(model_path, 'det3.npy'), sess)
-
- pnet_fun = lambda img: sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0': img})
- rnet_fun = lambda img: sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0': img})
- onet_fun = lambda img: sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'),
- feed_dict={'onet/input:0': img})
- return pnet_fun, rnet_fun, onet_fun
-
-
-def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
- """Detects faces in an image, and returns bounding boxes and points for them.
- img: input image
- minsize: minimum faces' size
- pnet, rnet, onet: caffemodel
- threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
- factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
- """
- factor_count = 0
- total_boxes = np.empty((0, 9))
- points = np.empty(0)
- h = img.shape[0]
- w = img.shape[1]
- minl = np.amin([h, w])
- m = 12.0 / minsize
- minl = minl * m
- # create scale pyramid
- scales = []
- while minl >= 12:
- scales += [m * np.power(factor, factor_count)]
- minl = minl * factor
- factor_count += 1
-
- # first stage
- for scale in scales:
- hs = int(np.ceil(h * scale))
- ws = int(np.ceil(w * scale))
- im_data = imresample(img, (hs, ws))
- im_data = (im_data - 127.5) * 0.0078125
- img_x = np.expand_dims(im_data, 0)
- img_y = np.transpose(img_x, (0, 2, 1, 3))
- out = pnet(img_y)
- out0 = np.transpose(out[0], (0, 2, 1, 3))
- out1 = np.transpose(out[1], (0, 2, 1, 3))
-
- boxes, _ = generateBoundingBox(out1[0, :, :, 1].copy(), out0[0, :, :, :].copy(), scale, threshold[0])
-
- # inter-scale nms
- pick = nms(boxes.copy(), 0.5, 'Union')
- if boxes.size > 0 and pick.size > 0:
- boxes = boxes[pick, :]
- total_boxes = np.append(total_boxes, boxes, axis=0)
-
- numbox = total_boxes.shape[0]
- if numbox > 0:
- pick = nms(total_boxes.copy(), 0.7, 'Union')
- total_boxes = total_boxes[pick, :]
- regw = total_boxes[:, 2] - total_boxes[:, 0]
- regh = total_boxes[:, 3] - total_boxes[:, 1]
- qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
- qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
- qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
- qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
- total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
- total_boxes = rerec(total_boxes.copy())
- total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32)
- dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
-
- numbox = total_boxes.shape[0]
- if numbox > 0:
- # second stage
- tempimg = np.zeros((24, 24, 3, numbox))
- for k in range(0, numbox):
- tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
- tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = img[y[k] - 1:ey[k], x[k] - 1:ex[k], :]
- if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
- tempimg[:, :, :, k] = imresample(tmp, (24, 24))
- else:
- return np.empty()
- tempimg = (tempimg - 127.5) * 0.0078125
- tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
- out = rnet(tempimg1)
- out0 = np.transpose(out[0])
- out1 = np.transpose(out[1])
- score = out1[1, :]
- ipass = np.where(score > threshold[1])
- total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
- mv = out0[:, ipass[0]]
- if total_boxes.shape[0] > 0:
- pick = nms(total_boxes, 0.7, 'Union')
- total_boxes = total_boxes[pick, :]
- total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:, pick]))
- total_boxes = rerec(total_boxes.copy())
-
- numbox = total_boxes.shape[0]
- if numbox > 0:
- # third stage
- total_boxes = np.fix(total_boxes).astype(np.int32)
- dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
- tempimg = np.zeros((48, 48, 3, numbox))
- for k in range(0, numbox):
- tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
- tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = img[y[k] - 1:ey[k], x[k] - 1:ex[k], :]
- if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
- tempimg[:, :, :, k] = imresample(tmp, (48, 48))
- else:
- return np.empty()
- tempimg = (tempimg - 127.5) * 0.0078125
- tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
- out = onet(tempimg1)
- out0 = np.transpose(out[0])
- out1 = np.transpose(out[1])
- out2 = np.transpose(out[2])
- score = out2[1, :]
- points = out1
- ipass = np.where(score > threshold[2])
- points = points[:, ipass[0]]
- total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
- mv = out0[:, ipass[0]]
-
- w = total_boxes[:, 2] - total_boxes[:, 0] + 1
- h = total_boxes[:, 3] - total_boxes[:, 1] + 1
- points[0:5, :] = np.tile(w, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1
- points[5:10, :] = np.tile(h, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1
- if total_boxes.shape[0] > 0:
- total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
- pick = nms(total_boxes.copy(), 0.7, 'Min')
- total_boxes = total_boxes[pick, :]
- points = points[:, pick]
-
- return total_boxes, points
-
-
-def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
- """Detects faces in a list of images
- images: list containing input images
- detection_window_size_ratio: ratio of minimum face size to smallest image dimension
- pnet, rnet, onet: caffemodel
- threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
- factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
- """
- all_scales = [None] * len(images)
- images_with_boxes = [None] * len(images)
-
- for i in range(len(images)):
- images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
-
- # create scale pyramid
- for index, img in enumerate(images):
- all_scales[index] = []
- h = img.shape[0]
- w = img.shape[1]
- minsize = int(detection_window_size_ratio * np.minimum(w, h))
- factor_count = 0
- minl = np.amin([h, w])
- if minsize <= 12:
- minsize = 12
-
- m = 12.0 / minsize
- minl = minl * m
- while minl >= 12:
- all_scales[index].append(m * np.power(factor, factor_count))
- minl = minl * factor
- factor_count += 1
-
- # # # # # # # # # # # # #
- # first stage - fast proposal network (pnet) to obtain face candidates
- # # # # # # # # # # # # #
-
- images_obj_per_resolution = {}
-
- # TO DO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
-
- for index, scales in enumerate(all_scales):
- h = images[index].shape[0]
- w = images[index].shape[1]
-
- for scale in scales:
- hs = int(np.ceil(h * scale))
- ws = int(np.ceil(w * scale))
-
- if (ws, hs) not in images_obj_per_resolution:
- images_obj_per_resolution[(ws, hs)] = []
-
- im_data = imresample(images[index], (hs, ws))
- im_data = (im_data - 127.5) * 0.0078125
- img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering
- images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
-
- for resolution in images_obj_per_resolution:
- images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
- outs = pnet(images_per_resolution)
-
- for index in range(len(outs[0])):
- scale = images_obj_per_resolution[resolution][index]['scale']
- image_index = images_obj_per_resolution[resolution][index]['index']
- out0 = np.transpose(outs[0][index], (1, 0, 2))
- out1 = np.transpose(outs[1][index], (1, 0, 2))
-
- boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
-
- # inter-scale nms
- pick = nms(boxes.copy(), 0.5, 'Union')
- if boxes.size > 0 and pick.size > 0:
- boxes = boxes[pick, :]
- images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
- boxes,
- axis=0)
-
- for index, image_obj in enumerate(images_with_boxes):
- numbox = image_obj['total_boxes'].shape[0]
- if numbox > 0:
- h = images[index].shape[0]
- w = images[index].shape[1]
- pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
- image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
- regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
- regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
- qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
- qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
- qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
- qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
- image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
- image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
- image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
- dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
-
- numbox = image_obj['total_boxes'].shape[0]
- tempimg = np.zeros((24, 24, 3, numbox))
-
- if numbox > 0:
- for k in range(0, numbox):
- tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
- tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
- if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
- tempimg[:, :, :, k] = imresample(tmp, (24, 24))
- else:
- return np.empty()
-
- tempimg = (tempimg - 127.5) * 0.0078125
- image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
-
- # # # # # # # # # # # # #
- # second stage - refinement of face candidates with rnet
- # # # # # # # # # # # # #
-
- bulk_rnet_input = np.empty((0, 24, 24, 3))
- for index, image_obj in enumerate(images_with_boxes):
- if 'rnet_input' in image_obj:
- bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
-
- out = rnet(bulk_rnet_input)
- out0 = np.transpose(out[0])
- out1 = np.transpose(out[1])
- score = out1[1, :]
-
- i = 0
- for index, image_obj in enumerate(images_with_boxes):
- if 'rnet_input' not in image_obj:
- continue
-
- rnet_input_count = image_obj['rnet_input'].shape[0]
- score_per_image = score[i:i + rnet_input_count]
- out0_per_image = out0[:, i:i + rnet_input_count]
-
- ipass = np.where(score_per_image > threshold[1])
- image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
- np.expand_dims(score_per_image[ipass].copy(), 1)])
-
- mv = out0_per_image[:, ipass[0]]
-
- if image_obj['total_boxes'].shape[0] > 0:
- h = images[index].shape[0]
- w = images[index].shape[1]
- pick = nms(image_obj['total_boxes'], 0.7, 'Union')
- image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
- image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
- image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
-
- numbox = image_obj['total_boxes'].shape[0]
-
- if numbox > 0:
- tempimg = np.zeros((48, 48, 3, numbox))
- image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
- dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
-
- for k in range(0, numbox):
- tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
- tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
- if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
- tempimg[:, :, :, k] = imresample(tmp, (48, 48))
- else:
- return np.empty()
- tempimg = (tempimg - 127.5) * 0.0078125
- image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
-
- i += rnet_input_count
-
- # # # # # # # # # # # # #
- # third stage - further refinement and facial landmarks positions with onet
- # # # # # # # # # # # # #
-
- bulk_onet_input = np.empty((0, 48, 48, 3))
- for index, image_obj in enumerate(images_with_boxes):
- if 'onet_input' in image_obj:
- bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
-
- out = onet(bulk_onet_input)
-
- out0 = np.transpose(out[0])
- out1 = np.transpose(out[1])
- out2 = np.transpose(out[2])
- score = out2[1, :]
- points = out1
-
- i = 0
- ret = []
- for index, image_obj in enumerate(images_with_boxes):
- if 'onet_input' not in image_obj:
- ret.append(None)
- continue
-
- onet_input_count = image_obj['onet_input'].shape[0]
-
- out0_per_image = out0[:, i:i + onet_input_count]
- score_per_image = score[i:i + onet_input_count]
- points_per_image = points[:, i:i + onet_input_count]
-
- ipass = np.where(score_per_image > threshold[2])
- points_per_image = points_per_image[:, ipass[0]]
-
- image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
- np.expand_dims(score_per_image[ipass].copy(), 1)])
- mv = out0_per_image[:, ipass[0]]
-
- w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
- h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
- points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
- image_obj['total_boxes'][:, 0], (5, 1)) - 1
- points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
- image_obj['total_boxes'][:, 1], (5, 1)) - 1
-
- if image_obj['total_boxes'].shape[0] > 0:
- image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
- pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
- image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
- points_per_image = points_per_image[:, pick]
-
- ret.append((image_obj['total_boxes'], points_per_image))
- else:
- ret.append(None)
-
- i += onet_input_count
-
- return ret
-
-
-# function [boundingbox] = bbreg(boundingbox,reg)
-def bbreg(boundingbox, reg):
- """Calibrate bounding boxes"""
- if reg.shape[1] == 1:
- reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
-
- w = boundingbox[:, 2] - boundingbox[:, 0] + 1
- h = boundingbox[:, 3] - boundingbox[:, 1] + 1
- b1 = boundingbox[:, 0] + reg[:, 0] * w
- b2 = boundingbox[:, 1] + reg[:, 1] * h
- b3 = boundingbox[:, 2] + reg[:, 2] * w
- b4 = boundingbox[:, 3] + reg[:, 3] * h
- boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4]))
- return boundingbox
-
-
-def generateBoundingBox(imap, reg, scale, t):
- """Use heatmap to generate bounding boxes"""
- stride = 2
- cellsize = 12
-
- imap = np.transpose(imap)
- dx1 = np.transpose(reg[:, :, 0])
- dy1 = np.transpose(reg[:, :, 1])
- dx2 = np.transpose(reg[:, :, 2])
- dy2 = np.transpose(reg[:, :, 3])
- y, x = np.where(imap >= t)
- if y.shape[0] == 1:
- dx1 = np.flipud(dx1)
- dy1 = np.flipud(dy1)
- dx2 = np.flipud(dx2)
- dy2 = np.flipud(dy2)
- score = imap[(y, x)]
- reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]]))
- if reg.size == 0:
- reg = np.empty((0, 3))
- bb = np.transpose(np.vstack([y, x]))
- q1 = np.fix((stride * bb + 1) / scale)
- q2 = np.fix((stride * bb + cellsize - 1 + 1) / scale)
- boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg])
- return boundingbox, reg
-
-
-# function pick = nms(boxes,threshold,type)
-def nms(boxes, threshold, method):
- if boxes.size == 0:
- return np.empty((0, 3))
- x1 = boxes[:, 0]
- y1 = boxes[:, 1]
- x2 = boxes[:, 2]
- y2 = boxes[:, 3]
- s = boxes[:, 4]
- area = (x2 - x1 + 1) * (y2 - y1 + 1)
- I = np.argsort(s)
- pick = np.zeros_like(s, dtype=np.int16)
- counter = 0
- while I.size > 0:
- i = I[-1]
- pick[counter] = i
- counter += 1
- idx = I[0:-1]
- xx1 = np.maximum(x1[i], x1[idx])
- yy1 = np.maximum(y1[i], y1[idx])
- xx2 = np.minimum(x2[i], x2[idx])
- yy2 = np.minimum(y2[i], y2[idx])
- w = np.maximum(0.0, xx2 - xx1 + 1)
- h = np.maximum(0.0, yy2 - yy1 + 1)
- inter = w * h
- if method is 'Min':
- o = inter / np.minimum(area[i], area[idx])
- else:
- o = inter / (area[i] + area[idx] - inter)
- I = I[np.where(o <= threshold)]
- pick = pick[0:counter]
- return pick
-
-
-# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
-def pad(total_boxes, w, h):
- """Compute the padding coordinates (pad the bounding boxes to square)"""
- tmpw = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32)
- tmph = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32)
- numbox = total_boxes.shape[0]
-
- dx = np.ones((numbox), dtype=np.int32)
- dy = np.ones((numbox), dtype=np.int32)
- edx = tmpw.copy().astype(np.int32)
- edy = tmph.copy().astype(np.int32)
-
- x = total_boxes[:, 0].copy().astype(np.int32)
- y = total_boxes[:, 1].copy().astype(np.int32)
- ex = total_boxes[:, 2].copy().astype(np.int32)
- ey = total_boxes[:, 3].copy().astype(np.int32)
-
- tmp = np.where(ex > w)
- edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmpw[tmp], 1)
- ex[tmp] = w
-
- tmp = np.where(ey > h)
- edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmph[tmp], 1)
- ey[tmp] = h
-
- tmp = np.where(x < 1)
- dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1)
- x[tmp] = 1
-
- tmp = np.where(y < 1)
- dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1)
- y[tmp] = 1
-
- return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
-
-
-# function [bboxA] = rerec(bboxA)
-def rerec(bboxA):
- """Convert bboxA to square."""
- h = bboxA[:, 3] - bboxA[:, 1]
- w = bboxA[:, 2] - bboxA[:, 0]
- l = np.maximum(w, h)
- bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - l * 0.5
- bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - l * 0.5
- bboxA[:, 2:4] = bboxA[:, 0:2] + np.transpose(np.tile(l, (2, 1)))
- return bboxA
-
-
-def imresample(img, sz):
- im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) # @UndefinedVariable
- return im_data
-
- # This method is kept for debugging purpose
-# h=img.shape[0]
-# w=img.shape[1]
-# hs, ws = sz
-# dx = float(w) / ws
-# dy = float(h) / hs
-# im_data = np.zeros((hs,ws,3))
-# for a1 in range(0,hs):
-# for a2 in range(0,ws):
-# for a3 in range(0,3):
-# im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
-# return im_data
diff --git a/embedding-calculator/srcext/facenet/facenet.py b/embedding-calculator/srcext/facenet/facenet.py
deleted file mode 100644
index 375e534787..0000000000
--- a/embedding-calculator/srcext/facenet/facenet.py
+++ /dev/null
@@ -1,597 +0,0 @@
-"""Functions for building the face recognition network.
-"""
-# Version: 2018-04-10
-# Link: https://github.com/davidsandberg/facenet/commit/096ed770f163957c1e56efa7feeb194773920f6e
-#
-#
-# MIT License
-#
-# Copyright (c) 2016 David Sandberg
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation _files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-# pylint: disable=missing-docstring
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import random
-import re
-from subprocess import Popen, PIPE
-
-import numpy as np
-import tensorflow as tf
-from scipy import interpolate
-from scipy import misc
-from six import iteritems
-from sklearn.model_selection import KFold
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.training import training
-
-
-def triplet_loss(anchor, positive, negative, alpha):
- """Calculate the triplet loss according to the FaceNet paper
-
- Args:
- anchor: the embeddings for the anchor images.
- positive: the embeddings for the positive images.
- negative: the embeddings for the negative images.
-
- Returns:
- the triplet loss according to the FaceNet paper as a float tensor.
- """
- with tf.variable_scope('triplet_loss'):
- pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
- neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
-
- basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
- loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
-
- return loss
-
-
-def decov_loss(xs):
- """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
- 'Reducing Overfitting In Deep Networks by Decorrelating Representation'
- """
- x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
- m = tf.reduce_mean(x, 0, True)
- z = tf.expand_dims(x - m, 2)
- corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0)
- corr_frob_sqr = tf.reduce_sum(tf.square(corr))
- corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
- loss = 0.5 * (corr_frob_sqr - corr_diag_sqr)
- return loss
-
-
-def center_loss(features, label, alfa, nrof_classes):
- """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
- (http://ydwen.github.io/papers/WenECCV16.pdf)
- """
- nrof_features = features.get_shape()[1]
- centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
- initializer=tf.constant_initializer(0), trainable=False)
- label = tf.reshape(label, [-1])
- centers_batch = tf.gather(centers, label)
- diff = (1 - alfa) * (centers_batch - features)
- centers = tf.scatter_sub(centers, label, diff)
- loss = tf.reduce_mean(tf.square(features - centers_batch))
- return loss, centers
-
-
-def get_image_paths_and_labels(dataset):
- image_paths_flat = []
- labels_flat = []
- for i in range(len(dataset)):
- image_paths_flat += dataset[i].image_paths
- labels_flat += [i] * len(dataset[i].image_paths)
- return image_paths_flat, labels_flat
-
-
-def shuffle_examples(image_paths, labels):
- shuffle_list = list(zip(image_paths, labels))
- random.shuffle(shuffle_list)
- image_paths_shuff, labels_shuff = zip(*shuffle_list)
- return image_paths_shuff, labels_shuff
-
-
-def read_images_from_disk(input_queue):
- """Consumes a single filename and label as a ' '-delimited string.
- Args:
- filename_and_label_tensor: A scalar string tensor.
- Returns:
- Two tensors: the decoded image, and the string label.
- """
- label = input_queue[1]
- file_contents = tf.read_file(input_queue[0])
- example = tf.image.decode_image(file_contents, channels=3)
- return example, label
-
-
-def random_rotate_image(image):
- angle = np.random.uniform(low=-10.0, high=10.0)
- return misc.imrotate(image, angle, 'bicubic')
-
-
-def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs,
- random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True):
- images = ops.convert_to_tensor(image_list, dtype=tf.string)
- labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
-
- # Makes an input queue
- input_queue = tf.train.slice_input_producer([images, labels],
- num_epochs=max_nrof_epochs, shuffle=shuffle)
-
- images_and_labels = []
- for _ in range(nrof_preprocess_threads):
- image, label = read_images_from_disk(input_queue)
- if random_rotate:
- image = tf.py_func(random_rotate_image, [image], tf.uint8)
- if random_crop:
- image = tf.random_crop(image, [image_size, image_size, 3])
- else:
- image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)
- if random_flip:
- image = tf.image.random_flip_left_right(image)
- # pylint: disable=no-member
- image.set_shape((image_size, image_size, 3))
- image = tf.image.per_image_standardization(image)
- images_and_labels.append([image, label])
-
- image_batch, label_batch = tf.train.batch_join(
- images_and_labels, batch_size=batch_size,
- capacity=4 * nrof_preprocess_threads * batch_size,
- allow_smaller_final_batch=True)
-
- return image_batch, label_batch
-
-
-def _add_loss_summaries(total_loss):
- """Add summaries for losses.
-
- Generates moving average for all losses and associated summaries for
- visualizing the performance of the network.
-
- Args:
- total_loss: Total loss from loss().
- Returns:
- loss_averages_op: op for generating moving averages of losses.
- """
- # Compute the moving average of all individual losses and the total loss.
- loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
- losses = tf.get_collection('losses')
- loss_averages_op = loss_averages.apply(losses + [total_loss])
-
- # Attach a scalar summmary to all individual losses and the total loss; do the
- # same for the averaged version of the losses.
- for l in losses + [total_loss]:
- # Name each loss as '(raw)' and name the moving average version of the loss
- # as the original loss name.
- tf.summary.scalar(l.op.name + ' (raw)', l)
- tf.summary.scalar(l.op.name, loss_averages.average(l))
-
- return loss_averages_op
-
-
-def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars,
- log_histograms=True):
- # Generate moving averages of all losses and associated summaries.
- loss_averages_op = _add_loss_summaries(total_loss)
-
- # Compute gradients.
- with tf.control_dependencies([loss_averages_op]):
- if optimizer == 'ADAGRAD':
- opt = tf.train.AdagradOptimizer(learning_rate)
- elif optimizer == 'ADADELTA':
- opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
- elif optimizer == 'ADAM':
- opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1)
- elif optimizer == 'RMSPROP':
- opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0)
- elif optimizer == 'MOM':
- opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
- else:
- raise ValueError('Invalid optimization algorithm')
-
- grads = opt.compute_gradients(total_loss, update_gradient_vars)
-
- # Apply gradients.
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- # Add histograms for trainable variables.
- if log_histograms:
- for var in tf.trainable_variables():
- tf.summary.histogram(var.op.name, var)
-
- # Add histograms for gradients.
- if log_histograms:
- for grad, var in grads:
- if grad is not None:
- tf.summary.histogram(var.op.name + '/gradients', grad)
-
- # Track the moving averages of all trainable variables.
- variable_averages = tf.train.ExponentialMovingAverage(
- moving_average_decay, global_step)
- variables_averages_op = variable_averages.apply(tf.trainable_variables())
-
- with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
- train_op = tf.no_op(name='train')
-
- return train_op
-
-
-def prewhiten(x):
- mean = np.mean(x)
- std = np.std(x)
- std_adj = np.maximum(std, 1.0 / np.sqrt(x.size))
- y = np.multiply(np.subtract(x, mean), 1 / std_adj)
- return y
-
-
-def crop(image, random_crop, image_size):
- if image.shape[1] > image_size:
- sz1 = int(image.shape[1] // 2)
- sz2 = int(image_size // 2)
- if random_crop:
- diff = sz1 - sz2
- (h, v) = (np.random.randint(-diff, diff + 1), np.random.randint(-diff, diff + 1))
- else:
- (h, v) = (0, 0)
- image = image[(sz1 - sz2 + v):(sz1 + sz2 + v), (sz1 - sz2 + h):(sz1 + sz2 + h), :]
- return image
-
-
-def flip(image, random_flip):
- if random_flip and np.random.choice([True, False]):
- image = np.fliplr(image)
- return image
-
-
-def to_rgb(img):
- w, h = img.shape
- ret = np.empty((w, h, 3), dtype=np.uint8)
- ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
- return ret
-
-
-def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
- nrof_samples = len(image_paths)
- images = np.zeros((nrof_samples, image_size, image_size, 3))
- for i in range(nrof_samples):
- img = misc.imread(image_paths[i])
- if img.ndim == 2:
- img = to_rgb(img)
- if do_prewhiten:
- img = prewhiten(img)
- img = crop(img, do_random_crop, image_size)
- img = flip(img, do_random_flip)
- images[i, :, :, :] = img
- return images
-
-
-def get_label_batch(label_data, batch_size, batch_index):
- nrof_examples = np.size(label_data, 0)
- j = batch_index * batch_size % nrof_examples
- if j + batch_size <= nrof_examples:
- batch = label_data[j:j + batch_size]
- else:
- x1 = label_data[j:nrof_examples]
- x2 = label_data[0:nrof_examples - j]
- batch = np.vstack([x1, x2])
- batch_int = batch.astype(np.int64)
- return batch_int
-
-
-def get_batch(image_data, batch_size, batch_index):
- nrof_examples = np.size(image_data, 0)
- j = batch_index * batch_size % nrof_examples
- if j + batch_size <= nrof_examples:
- batch = image_data[j:j + batch_size, :, :, :]
- else:
- x1 = image_data[j:nrof_examples, :, :, :]
- x2 = image_data[0:nrof_examples - j, :, :, :]
- batch = np.vstack([x1, x2])
- batch_float = batch.astype(np.float32)
- return batch_float
-
-
-def get_triplet_batch(triplets, batch_index, batch_size):
- ax, px, nx = triplets
- a = get_batch(ax, int(batch_size / 3), batch_index)
- p = get_batch(px, int(batch_size / 3), batch_index)
- n = get_batch(nx, int(batch_size / 3), batch_index)
- batch = np.vstack([a, p, n])
- return batch
-
-
-def get_learning_rate_from_file(filename, epoch):
- with open(filename, 'r') as f:
- for line in f.readlines():
- line = line.split('#', 1)[0]
- if line:
- par = line.strip().split(':')
- e = int(par[0])
- lr = float(par[1])
- if e <= epoch:
- learning_rate = lr
- else:
- return learning_rate
-
-
-class ImageClass():
- """Stores the paths to images for a given class"""
-
- def __init__(self, name, image_paths):
- self.name = name
- self.image_paths = image_paths
-
- def __str__(self):
- return self.name + ', ' + str(len(self.image_paths)) + ' images'
-
- def __len__(self):
- return len(self.image_paths)
-
-
-def get_dataset(path, has_class_directories=True):
- dataset = []
- path_exp = os.path.expanduser(path)
- classes = [path for path in os.listdir(path_exp) \
- if os.path.isdir(os.path.join(path_exp, path))]
- classes.sort()
- nrof_classes = len(classes)
- for i in range(nrof_classes):
- class_name = classes[i]
- facedir = os.path.join(path_exp, class_name)
- image_paths = get_image_paths(facedir)
- dataset.append(ImageClass(class_name, image_paths))
-
- return dataset
-
-
-def get_image_paths(facedir):
- image_paths = []
- if os.path.isdir(facedir):
- images = os.listdir(facedir)
- image_paths = [os.path.join(facedir, img) for img in images]
- return image_paths
-
-
-def split_dataset(dataset, split_ratio, mode):
- if mode == 'SPLIT_CLASSES':
- nrof_classes = len(dataset)
- class_indices = np.arange(nrof_classes)
- np.random.shuffle(class_indices)
- split = int(round(nrof_classes * split_ratio))
- train_set = [dataset[i] for i in class_indices[0:split]]
- test_set = [dataset[i] for i in class_indices[split:-1]]
- elif mode == 'SPLIT_IMAGES':
- train_set = []
- test_set = []
- min_nrof_images = 2
- for cls in dataset:
- paths = cls.image_paths
- np.random.shuffle(paths)
- split = int(round(len(paths) * split_ratio))
- if split < min_nrof_images:
- continue # Not enough images for test_old set. Skip class...
- train_set.append(ImageClass(cls.name, paths[0:split]))
- test_set.append(ImageClass(cls.name, paths[split:-1]))
- else:
- raise ValueError('Invalid train/test_old split mode "%s"' % mode)
- return train_set, test_set
-
-
-def load_model(model):
- # Check if the model is a model directory (containing a metagraph and a checkpoint file)
- # or if it is a protobuf file with a frozen graph
- model_exp = os.path.expanduser(model)
- if (os.path.isfile(model_exp)):
- print('Model filename: %s' % model_exp)
- with gfile.FastGFile(model_exp, 'rb') as f:
- graph_def = tf.GraphDef()
- graph_def.ParseFromString(f.read())
- tf.import_graph_def(graph_def, name='')
- else:
- print('Model directory: %s' % model_exp)
- meta_file, ckpt_file = get_model_filenames(model_exp)
-
- print('Metagraph file: %s' % meta_file)
- print('Checkpoint file: %s' % ckpt_file)
-
- saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file))
- saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
-
-
-def get_model_filenames(model_dir):
- files = os.listdir(model_dir)
- meta_files = [s for s in files if s.endswith('.meta')]
- if len(meta_files) == 0:
- raise ValueError('No meta file found in the model directory (%s)' % model_dir)
- elif len(meta_files) > 1:
- raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
- meta_file = meta_files[0]
- ckpt = tf.train.get_checkpoint_state(model_dir)
- if ckpt and ckpt.model_checkpoint_path:
- ckpt_file = os.path.basename(ckpt.model_checkpoint_path)
- return meta_file, ckpt_file
-
- meta_files = [s for s in files if '.ckpt' in s]
- max_step = -1
- for f in files:
- step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
- if step_str is not None and len(step_str.groups()) >= 2:
- step = int(step_str.groups()[1])
- if step > max_step:
- max_step = step
- ckpt_file = step_str.groups()[0]
- return meta_file, ckpt_file
-
-
-def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
- assert (embeddings1.shape[0] == embeddings2.shape[0])
- assert (embeddings1.shape[1] == embeddings2.shape[1])
- nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
- nrof_thresholds = len(thresholds)
- k_fold = KFold(n_splits=nrof_folds, shuffle=False)
-
- tprs = np.zeros((nrof_folds, nrof_thresholds))
- fprs = np.zeros((nrof_folds, nrof_thresholds))
- accuracy = np.zeros((nrof_folds))
-
- diff = np.subtract(embeddings1, embeddings2)
- dist = np.sum(np.square(diff), 1)
- indices = np.arange(nrof_pairs)
-
- for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
-
- # Find the best threshold for the fold
- acc_train = np.zeros((nrof_thresholds))
- for threshold_idx, threshold in enumerate(thresholds):
- _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
- best_threshold_index = np.argmax(acc_train)
- for threshold_idx, threshold in enumerate(thresholds):
- tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold,
- dist[test_set],
- actual_issame[
- test_set])
- _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set],
- actual_issame[test_set])
-
- tpr = np.mean(tprs, 0)
- fpr = np.mean(fprs, 0)
- return tpr, fpr, accuracy
-
-
-def calculate_accuracy(threshold, dist, actual_issame):
- predict_issame = np.less(dist, threshold)
- tp = np.sum(np.logical_and(predict_issame, actual_issame))
- fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
- tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
- fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
-
- tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
- fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
- acc = float(tp + tn) / dist.size
- return tpr, fpr, acc
-
-
-def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
- assert (embeddings1.shape[0] == embeddings2.shape[0])
- assert (embeddings1.shape[1] == embeddings2.shape[1])
- nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
- nrof_thresholds = len(thresholds)
- k_fold = KFold(n_splits=nrof_folds, shuffle=False)
-
- val = np.zeros(nrof_folds)
- far = np.zeros(nrof_folds)
-
- diff = np.subtract(embeddings1, embeddings2)
- dist = np.sum(np.square(diff), 1)
- indices = np.arange(nrof_pairs)
-
- for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
-
- # Find the threshold that gives FAR = far_target
- far_train = np.zeros(nrof_thresholds)
- for threshold_idx, threshold in enumerate(thresholds):
- _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
- if np.max(far_train) >= far_target:
- f = interpolate.interp1d(far_train, thresholds, kind='slinear')
- threshold = f(far_target)
- else:
- threshold = 0.0
-
- val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
-
- val_mean = np.mean(val)
- far_mean = np.mean(far)
- val_std = np.std(val)
- return val_mean, val_std, far_mean
-
-
-def calculate_val_far(threshold, dist, actual_issame):
- predict_issame = np.less(dist, threshold)
- true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
- false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
- n_same = np.sum(actual_issame)
- n_diff = np.sum(np.logical_not(actual_issame))
- val = float(true_accept) / float(n_same)
- far = float(false_accept) / float(n_diff)
- return val, far
-
-
-def store_revision_info(src_path, output_dir, arg_string):
- try:
- # Get git hash
- cmd = ['git', 'rev-parse', 'HEAD']
- gitproc = Popen(cmd, stdout=PIPE, cwd=src_path)
- (stdout, _) = gitproc.communicate()
- git_hash = stdout.strip()
- except OSError as e:
- git_hash = ' '.join(cmd) + ': ' + e.strerror
-
- try:
- # Get local changes
- cmd = ['git', 'diff', 'HEAD']
- gitproc = Popen(cmd, stdout=PIPE, cwd=src_path)
- (stdout, _) = gitproc.communicate()
- git_diff = stdout.strip()
- except OSError as e:
- git_diff = ' '.join(cmd) + ': ' + e.strerror
-
- # Store a text file in the log directory
- rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
- with open(rev_info_filename, "w") as text_file:
- text_file.write('arguments: %s\n--------------------\n' % arg_string)
- text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__) # @UndefinedVariable
- text_file.write('git hash: %s\n--------------------\n' % git_hash)
- text_file.write('%s' % git_diff)
-
-
-def list_variables(filename):
- reader = training.NewCheckpointReader(filename)
- variable_map = reader.get_variable_to_shape_map()
- names = sorted(variable_map.keys())
- return names
-
-
-def put_images_on_grid(images, shape=(16, 8)):
- nrof_images = images.shape[0]
- img_size = images.shape[1]
- bw = 3
- img = np.zeros((shape[1] * (img_size + bw) + bw, shape[0] * (img_size + bw) + bw, 3), np.float32)
- for i in range(shape[1]):
- x_start = i * (img_size + bw) + bw
- for j in range(shape[0]):
- img_index = i * shape[0] + j
- if img_index >= nrof_images:
- break
- y_start = j * (img_size + bw) + bw
- img[x_start:x_start + img_size, y_start:y_start + img_size, :] = images[img_index, :, :, :]
- if img_index >= nrof_images:
- break
- return img
-
-
-def write_arguments_to_file(args, filename):
- with open(filename, 'w') as f:
- for key, value in iteritems(vars(args)):
- f.write('%s: %s\n' % (key, str(value)))
diff --git a/embedding-calculator/srcext/insightface/.gitignore b/embedding-calculator/srcext/insightface/.gitignore
deleted file mode 100644
index 7bbc71c092..0000000000
--- a/embedding-calculator/srcext/insightface/.gitignore
+++ /dev/null
@@ -1,101 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-env/
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-
-# PyInstaller
-# Usually these files are written by a python script from a template
-# before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
-
-# SageMath parsed files
-*.sage.py
-
-# dotenv
-.env
-
-# virtualenv
-.venv
-venv/
-ENV/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
diff --git a/embedding-calculator/srcext/insightface/.gitmodules b/embedding-calculator/srcext/insightface/.gitmodules
deleted file mode 100644
index 6c4c7f9803..0000000000
--- a/embedding-calculator/srcext/insightface/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "alignment/SDUNet"]
- path = alignment/SDUNet
- url = https://github.com/deepinsight/SDUNet
diff --git a/embedding-calculator/srcext/insightface/3rdparty/operator/amsoftmax-inl.h b/embedding-calculator/srcext/insightface/3rdparty/operator/amsoftmax-inl.h
deleted file mode 100644
index d899bbbfa1..0000000000
--- a/embedding-calculator/srcext/insightface/3rdparty/operator/amsoftmax-inl.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*!
- * Copyright (c) 2018 by Contributors
- * \file amsoftmax-inl.h
- * \brief AmSoftmax from
- * \author Jia Guo
- */
-#ifndef MXNET_OPERATOR_AMSOFTMAX_INL_H_
-#define MXNET_OPERATOR_AMSOFTMAX_INL_H_
-
-#include
-#include
-#include
-#include
-#include