Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changed summmary.py logic #121

Merged
merged 32 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d21160d
changed summmary.py logic
piterand Jun 21, 2023
a3b799e
fixing test_summary
piterand Jun 21, 2023
aa6ed41
added macos for testing
piterand Jun 21, 2023
f2fee74
fixed_display_test
piterand Jun 21, 2023
8ab9139
fixed docs and exceptions
piterand Jun 22, 2023
5bdf325
fixed code smells
piterand Jun 22, 2023
0beb9c5
main merge
piterand Jun 22, 2023
19fee20
main merge 2
piterand Jun 22, 2023
6435f82
Merge branch 'main' into summary_fixing
piterand Jun 23, 2023
d642c8e
fixed code smells
piterand Jun 23, 2023
e4aa158
adding dropout menu for summary
piterand Jun 23, 2023
14e8147
added new SummaryDetector to AnalysisExplorer
piterand Jun 23, 2023
0100b67
bug fixing
piterand Jun 23, 2023
978c836
code improving
piterand Jun 23, 2023
05725d9
fixed test_display
piterand Jun 23, 2023
9559a7b
fixed code smells
piterand Jun 23, 2023
c84be2a
reduce tests for macos
iulusoy Jun 26, 2023
59fb515
try different tqdm version
iulusoy Jun 26, 2023
fd985a6
disabled MacOS CI testing
piterand Jun 26, 2023
884d2db
enable tests again; don't run on macos
iulusoy Jun 26, 2023
0dcc0a6
more tests and exceptions for init
iulusoy Jun 26, 2023
9ff20b5
reduce number of tests
iulusoy Jun 26, 2023
07f2a2c
put some tests in again
iulusoy Jun 26, 2023
23af428
changed CI, runs pytest independently
piterand Jun 26, 2023
d094809
added not gcv flag for pytest test/test_text.py command in CI
piterand Jun 26, 2023
0e911e0
added not long flag to CI tests
piterand Jun 26, 2023
332b344
added macos to CI and disable long test in multimodal_search
piterand Jun 26, 2023
e91a462
exclude test_analysisExplorer from macos in CI
piterand Jun 26, 2023
17a1804
Exclude another test from macos
piterand Jun 26, 2023
a4ab433
Exclude another 3 tests from macos
piterand Jun 26, 2023
f29a01c
moved some tests from test_init_summary to test_advanced_init_summary…
piterand Jun 26, 2023
54d86f9
Merge remote-tracking branch 'origin' into summary_fixing
piterand Jun 26, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-22.04,windows-latest,macos-latest]
os: [ubuntu-22.04,windows-latest]
python-version: [3.9]
steps:
- name: Checkout repository
Expand Down
83 changes: 79 additions & 4 deletions ammico/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
"CAM16-UCS",
"DIN99",
]
SUMMARY_ANALYSIS_TYPE = ["summary_and_questions", "summary", "questions"]
SUMMARY_MODEL = ["base", "large"]


class AnalysisExplorer:
Expand Down Expand Up @@ -111,13 +113,17 @@ def __init__(self, mydict: dict) -> None:
State("setting_Emotion_emotion_threshold", "value"),
State("setting_Emotion_race_threshold", "value"),
State("setting_Color_delta_e_method", "value"),
State("setting_Summary_analysis_type", "value"),
State("setting_Summary_model", "value"),
State("setting_Summary_list_of_questions", "value"),
prevent_initial_call=True,
)(self._right_output_analysis)

self.app.callback(
Output("settings_TextDetector", "style"),
Output("settings_EmotionDetector", "style"),
Output("settings_ColorDetector", "style"),
Output("settings_Summary_Detector", "style"),
Input("Dropdown_select_Detector", "value"),
)(self._update_detector_setting)

Expand Down Expand Up @@ -240,6 +246,60 @@ def _create_setting_layout(self):
)
],
),
html.Div(
id="settings_Summary_Detector",
style={"display": "none"},
children=[
html.Div(
[
dcc.Dropdown(
options=SUMMARY_ANALYSIS_TYPE,
value="summary_and_questions",
id="setting_Summary_analysis_type",
)
],
style={
"width": "33%",
"display": "inline-block",
},
),
html.Div(
[
dcc.Dropdown(
options=SUMMARY_MODEL,
value="base",
id="setting_Summary_model",
)
],
style={
"width": "33%",
"display": "inline-block",
"margin-top": "10px",
},
),
html.Div(
[
html.Div(
"Please enter a question",
style={
"height": "50px",
"margin-top": "5px",
},
),
dcc.Input(
type="text",
id="setting_Summary_list_of_questions",
style={"height": "auto", "margin-bottom": "auto"},
),
],
style={
"width": "33%",
"display": "inline-block",
"margin-top": "10px",
},
),
],
),
],
)
return settings_layout
Expand Down Expand Up @@ -334,16 +394,19 @@ def _update_detector_setting(self, setting_input):
}

if setting_input == "TextDetector":
return display_flex, display_none, display_none
return display_flex, display_none, display_none, display_none

if setting_input == "EmotionDetector":
return display_none, display_flex, display_none
return display_none, display_flex, display_none, display_none

if setting_input == "ColorDetector":
return display_none, display_none, display_flex
return display_none, display_none, display_flex, display_none

if setting_input == "SummaryDetector":
return display_none, display_none, display_none, display_flex

else:
return display_none, display_none, display_none
return display_none, display_none, display_none, display_none

def _right_output_analysis(
self,
Expand All @@ -355,6 +418,9 @@ def _right_output_analysis(
setting_emotion_emotion_threshold: int,
setting_emotion_race_threshold: int,
setting_color_delta_e_method: str,
setting_summary_analysis_type: str,
setting_summary_model: str,
setting_summary_list_of_questions: str,
) -> dict:
"""Callback function to perform analysis on the selected image and return the output.

Expand Down Expand Up @@ -396,6 +462,15 @@ def _right_output_analysis(
image_copy,
delta_e_method=setting_color_delta_e_method,
)
elif detector_value == "SummaryDetector":
detector_class = identify_function(
image_copy,
analysis_type=setting_summary_analysis_type,
summary_model_type=setting_summary_model,
list_of_questions=[setting_summary_list_of_questions]
if (setting_summary_list_of_questions is not None)
else None,
)
else:
detector_class = identify_function(image_copy)
return detector_class.analyse_image()
163 changes: 140 additions & 23 deletions ammico/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,82 @@


class SummaryDetector(AnalysisMethod):
def __init__(self, subdict: dict) -> None:
def __init__(
self,
subdict: dict = {},
summary_model_type: str = "base",
analysis_type: str = "summary_and_questions",
list_of_questions: str = None,
summary_model=None,
summary_vis_processors=None,
summary_vqa_model=None,
summary_vqa_vis_processors=None,
summary_vqa_txt_processors=None,
) -> None:
"""
SummaryDetector class for analysing images using the blip_caption model.

Args:
subdict (dict, optional): Dictionary containing the image to be analysed. Defaults to {}.
summary_model_type (str, optional): Type of blip_caption model to use. Can be "base" or "large". Defaults to "base".
analysis_type (str, optional): Type of analysis to perform. Can be "summary", "questions" or "summary_and_questions". Defaults to "summary_and_questions".
list_of_questions (list, optional): List of questions to answer. Defaults to ["Are there people in the image?", "What is this picture about?"].
summary_model ([type], optional): blip_caption model. Defaults to None.
summary_vis_processors ([type], optional): Preprocessors for visual inputs. Defaults to None.
summary_vqa_model ([type], optional): blip_vqa model. Defaults to None.
summary_vqa_vis_processors ([type], optional): Preprocessors for vqa visual inputs. Defaults to None.
summary_vqa_txt_processors ([type], optional): Preprocessors for vqa text inputs. Defaults to None.

Raises:
ValueError: If analysis_type is not one of "summary", "questions" or "summary_and_questions".

Returns:
None.
"""

super().__init__(subdict)
if analysis_type not in ["summary", "questions", "summary_and_questions"]:
raise ValueError(
"analysis_type must be one of 'summary', 'questions' or 'summary_and_questions'"
)
self.summary_device = "cuda" if cuda.is_available() else "cpu"
self.summary_model_type = summary_model_type
self.analysis_type = analysis_type
if list_of_questions is None:
self.list_of_questions = [
"Are there people in the image?",
"What is this picture about?",
]
elif (not isinstance(list_of_questions, list)) or (None in list_of_questions):
raise ValueError("list_of_questions must be a list of string (questions)")
else:
self.list_of_questions = list_of_questions
if (
(summary_model is None)
and (summary_vis_processors is None)
and (analysis_type != "questions")
):
self.summary_model, self.summary_vis_processors = self.load_model(
model_type=summary_model_type
)
else:
self.summary_model = summary_model
self.summary_vis_processors = summary_vis_processors
if (
(summary_vqa_model is None)
and (summary_vqa_vis_processors is None)
and (summary_vqa_txt_processors is None)
and (analysis_type != "summary")
):
(
self.summary_vqa_model,
self.summary_vqa_vis_processors,
self.summary_vqa_txt_processors,
) = self.load_vqa_model()
else:
self.summary_vqa_model = summary_vqa_model
self.summary_vqa_vis_processors = summary_vqa_vis_processors
self.summary_vqa_txt_processors = summary_vqa_txt_processors

def load_model_base(self):
"""
Expand Down Expand Up @@ -63,32 +136,71 @@ def load_model(self, model_type: str):
summary_model, summary_vis_processors = select_model[model_type](self)
return summary_model, summary_vis_processors

def analyse_image(self, summary_model=None, summary_vis_processors=None):
def load_vqa_model(self):
"""
Load blip_vqa model and preprocessors for visual and text inputs from lavis.models.

Args:

Returns:
model (torch.nn.Module): model.
vis_processors (dict): preprocessors for visual inputs.
txt_processors (dict): preprocessors for text inputs.

"""
(
summary_vqa_model,
summary_vqa_vis_processors,
summary_vqa_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa",
model_type="vqav2",
is_eval=True,
device=self.summary_device,
)
return summary_vqa_model, summary_vqa_vis_processors, summary_vqa_txt_processors

def analyse_image(self):
"""
Analyse image with blip_caption model.

Args:

Returns:
self.subdict (dict): dictionary with analysis results.
"""
if self.analysis_type == "summary_and_questions":
self.analyse_summary()
self.analyse_questions(self.list_of_questions)
elif self.analysis_type == "summary":
self.analyse_summary()
elif self.analysis_type == "questions":
self.analyse_questions(self.list_of_questions)

return self.subdict

def analyse_summary(self):
"""
Create 1 constant and 3 non deterministic captions for image.

Args:
summary_model (str): model.
summary_vis_processors (str): preprocessors for visual inputs.

Returns:
self.subdict (dict): dictionary with constant image summary and 3 non deterministic summary.
self.subdict (dict): dictionary with analysis results.
"""
if summary_model is None and summary_vis_processors is None:
summary_model, summary_vis_processors = self.load_model_base()

path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
image = (
summary_vis_processors["eval"](raw_image)
self.summary_vis_processors["eval"](raw_image)
.unsqueeze(0)
.to(self.summary_device)
)
with no_grad():
self.subdict["const_image_summary"] = summary_model.generate(
self.subdict["const_image_summary"] = self.summary_model.generate(
{"image": image}
)[0]
self.subdict["3_non-deterministic summary"] = summary_model.generate(
self.subdict["3_non-deterministic summary"] = self.summary_model.generate(
{"image": image}, use_nucleus_sampling=True, num_captions=3
)
return self.subdict
Expand All @@ -103,32 +215,37 @@ def analyse_questions(self, list_of_questions: list[str]) -> dict:
Returns:
self.subdict (dict): dictionary with answers to questions.
"""
(
summary_vqa_model,
summary_vqa_vis_processors,
summary_vqa_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa",
model_type="vqav2",
is_eval=True,
device=self.summary_device,
)
if (
(self.summary_vqa_model is None)
and (self.summary_vqa_vis_processors is None)
and (self.summary_vqa_txt_processors is None)
):
(
self.summary_vqa_model,
self.summary_vqa_vis_processors,
self.summary_vqa_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa",
model_type="vqav2",
is_eval=True,
device=self.summary_device,
)
if len(list_of_questions) > 0:
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
image = (
summary_vqa_vis_processors["eval"](raw_image)
self.summary_vqa_vis_processors["eval"](raw_image)
.unsqueeze(0)
.to(self.summary_device)
)
question_batch = []
for quest in list_of_questions:
question_batch.append(summary_vqa_txt_processors["eval"](quest))
question_batch.append(self.summary_vqa_txt_processors["eval"](quest))
batch_size = len(list_of_questions)
image_batch = image.repeat(batch_size, 1, 1, 1)

with no_grad():
answers_batch = summary_vqa_model.predict_answers(
answers_batch = self.summary_vqa_model.predict_answers(
samples={"image": image_batch, "text_input": question_batch},
inference_method="generate",
)
Expand Down
Loading