Skip to content

Commit

Permalink
Provide err msg empty file location (#120)
Browse files Browse the repository at this point in the history
* added exception for empty file list

* added batching, randomness and multiple file extensions to find_files

* added more default filetypes to find_files

* changed summmary.py logic (#121)

* changed summmary.py logic

* fixing test_summary

* added macos for testing

* fixed_display_test

* fixed docs and exceptions

* added dropout menu for summary

* added new SummaryDetector to AnalysisExplorer

* bug fixing

* code improving

* fixed test_display

* fixed code smells

* reduce tests for macos

* added some tests and exceptions for summary init

* changed CI, runs pytest independently

* exclude test_analysisExplorer from macos in CI

* moved some tests from test_init_summary to test_advanced_init_summary and mark them as long

---------

Co-authored-by: Inga Ulusoy <[email protected]>

* fixing coverage report (#126)

* fixed coverage report

---------

Co-authored-by: Petr Andriushchenko <[email protected]>
Co-authored-by: Inga Ulusoy <[email protected]>
  • Loading branch information
3 people authored Jun 28, 2023
1 parent 187d380 commit 2a87bc5
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 14 deletions.
69 changes: 67 additions & 2 deletions ammico/test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,76 @@
import json
import pandas as pd
import ammico.utils as ut
import pytest
import os


def test_find_files(get_path):
result = ut.find_files(path=get_path, pattern="*.png", recursive=True, limit=10)
assert len(result) > 0
with pytest.raises(FileNotFoundError):
ut.find_files(path=".", pattern="*.png")

result_jpg = ut.find_files(path=get_path, pattern=".jpg", recursive=True, limit=10)
assert 0 < len(result_jpg) <= 10

result_png = ut.find_files(path=get_path, pattern=".png", recursive=True, limit=10)
assert 0 < len(result_png) <= 10

result_png_jpg = ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=10
)
assert 0 < len(result_png_jpg) <= 10

result_png_jpg = ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=4
)
assert 0 < len(result_png_jpg) <= 4

result_png_jpg = ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=[2, 4]
)
assert 0 < len(result_png_jpg) <= 2

one_dir_up_path = os.path.join(get_path, "..")
with pytest.raises(FileNotFoundError):
ut.find_files(
path=one_dir_up_path, pattern=["png", "jpg"], recursive=False, limit=[2, 4]
)

result_png_jpg = ut.find_files(
path=one_dir_up_path, pattern=["png", "jpg"], recursive=True, limit=[2, 4]
)
assert 0 < len(result_png_jpg) <= 2

result_png_jpg = ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=None
)
assert 0 < len(result_png_jpg)
result_png_jpg = ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=-1
)
assert 0 < len(result_png_jpg)

result_png_jpg_rdm1 = ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=10, random_seed=1
)
result_png_jpg_rdm2 = ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=10, random_seed=2
)
assert result_png_jpg_rdm1 != result_png_jpg_rdm2
assert len(result_png_jpg_rdm1) == len(result_png_jpg_rdm2)

with pytest.raises(ValueError):
ut.find_files(path=get_path, pattern=["png", "jpg"], recursive=True, limit=-2)
with pytest.raises(ValueError):
ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit=[2, 4, 5]
)
with pytest.raises(ValueError):
ut.find_files(path=get_path, pattern=["png", "jpg"], recursive=True, limit=[2])
with pytest.raises(ValueError):
ut.find_files(
path=get_path, pattern=["png", "jpg"], recursive=True, limit="limit"
)


def test_initialize_dict(get_path):
Expand Down
89 changes: 77 additions & 12 deletions ammico/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@
from pandas import DataFrame, read_csv
import pooch
import importlib_resources
import collections
import random


pkg = importlib_resources.files("ammico")


def iterable(arg):
return isinstance(arg, collections.abc.Iterable) and not isinstance(arg, str)


class DownloadResource:
"""A remote resource that needs on demand downloading.
Expand Down Expand Up @@ -48,31 +54,90 @@ def analyse_image(self):
raise NotImplementedError()


def _match_pattern(path, pattern, recursive):
# helper function for find_files
# find all matches for a single pattern.

if pattern.startswith("."):
pattern = pattern[1:]
if recursive:
search_path = f"{path}/**/*.{pattern}"
else:
search_path = f"{path}/*.{pattern}"
return list(glob.glob(search_path, recursive=recursive))


def _limit_results(results, limit):
# helper function for find_files
# use -1 or None to return all images
if limit == -1 or limit is None:
limit = len(results)

# limit or batch the images
if isinstance(limit, int):
if limit < -1:
raise ValueError("limit must be an integer greater than 0 or equal to -1")
results = results[:limit]

elif iterable(limit):
if len(limit) == 2:
results = results[limit[0] : limit[1]]
else:
raise ValueError(
f"limit must be an integer or a tuple of length 2, but is {limit}"
)
else:
raise ValueError(
f"limit must be an integer or a tuple of length 2, but is {limit}"
)

return results


def find_files(
path: str = None, pattern: str = "*.png", recursive: bool = True, limit: int = 20
path: str = None,
pattern=["png", "jpg", "jpeg", "gif", "webp", "avif", "tiff"],
recursive: bool = True,
limit=20,
random_seed: int = None,
) -> list:
"""Find image files on the file system.
Args:
path (str, optional): The base directory where we are looking for the images. Defaults
to None, which uses the XDG data directory if set or the current
working directory otherwise.
pattern (str, optional): The naming pattern that the filename should match. Defaults to
"*.png". Can be used to allow other patterns or to only include
specific prefixes or suffixes.
recursive (bool, optional): Whether to recurse into subdirectories. Default is set to False.
limit (int, optional): The maximum number of images to be found.
Defaults to 20. To return all images, set to None.
pattern (str|list, optional): The naming pattern that the filename should match.
Use either '.ext' or just 'ext'
Defaults to ["png", "jpg", "jpeg", "gif", "webp", "avif","tiff"]. Can be used to allow other patterns or to only include
specific prefixes or suffixes.
recursive (bool, optional): Whether to recurse into subdirectories. Default is set to True.
limit (int/list, optional): The maximum number of images to be found.
Provide a list or tuple of length 2 to batch the images.
Defaults to 20. To return all images, set to None or -1.
random_seed (int, optional): The random seed to use for shuffling the images.
If None is provided the data will not be shuffeled. Defaults to None.
Returns:
list: A list with all filenames including the path.
"""

if path is None:
path = os.environ.get("XDG_DATA_HOME", ".")
result = list(glob.glob(f"{path}/{pattern}", recursive=recursive))
if limit is not None:
result = result[:limit]
return result

if isinstance(pattern, str):
pattern = [pattern]
results = []
for p in pattern:
results.extend(_match_pattern(path, p, recursive=recursive))

if len(results) == 0:
raise FileNotFoundError(f"No files found in {path} with pattern '{pattern}'")

if random_seed is not None:
random.seed(random_seed)
random.shuffle(results)

return _limit_results(results, limit)


def initialize_dict(filelist: list) -> dict:
Expand Down

0 comments on commit 2a87bc5

Please sign in to comment.