From 2c676e885b8e3ed2ad2cbc4fab1ae0f11e0ad537 Mon Sep 17 00:00:00 2001 From: Dariiiii Date: Thu, 4 Jul 2024 13:53:20 +0300 Subject: [PATCH 1/5] v1.0 --- .../slide_text_volume_check.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 app/main/checks/presentation_checks/slide_text_volume_check.py diff --git a/app/main/checks/presentation_checks/slide_text_volume_check.py b/app/main/checks/presentation_checks/slide_text_volume_check.py new file mode 100644 index 00000000..049e029f --- /dev/null +++ b/app/main/checks/presentation_checks/slide_text_volume_check.py @@ -0,0 +1,28 @@ +from ..base_check import BasePresCriterion, answer + + +class SlideTextVolumeCheck(BasePresCriterion): + label = 'Заголовки слайдов не дублируются' + description = 'Проверка на дублируемость заголовков слайдов' + id = 'slide_text_volume_check' + + def __init__(self, file_info, min_count_words_on_slide=40, + min_count_paragraphs=2, min_count_words_in_paragraph=20, + max_count_words_on_slide=100, max_count_paragraphs=5, + max_count_words_in_paragraph=50, + slides_with_required_list=["Цели и задачи", "Заключение"]): + super().__init__(file_info) + self.min_count_words_on_slide = min_count_words_on_slide + self.min_count_paragraphs = min_count_paragraphs + self.min_count_words_in_paragraph = min_count_words_in_paragraph + self.max_count_words_on_slide = max_count_words_on_slide + self.max_count_paragraphs = max_count_paragraphs + self.max_count_words_in_paragraph = max_count_words_in_paragraph + self.slides_with_required_list = slides_with_required_list + + def check(self): + result_str = '' + if not result_str: + return answer(True, 'Пройдена!') + else: + return answer(False, result_str) \ No newline at end of file From 50be9631eee73e40bd6502888d73934c60aad351 Mon Sep 17 00:00:00 2001 From: Dariiiii Date: Thu, 4 Jul 2024 18:23:40 +0300 Subject: [PATCH 2/5] v1.1 --- app/main/check_packs/pack_config.py | 1 + .../checks/presentation_checks/__init__.py | 1 + .../slide_text_volume_check.py | 69 +++++++++++++++++-- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 598c3cc2..0e3c3bde 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -18,6 +18,7 @@ ['pres_empty_slide'], ['theme_in_pres_check'], ['verify_git_link'], + ['slide_text_volume_check'], ] BASE_REPORT_CRITERION = [ ["simple_check"], diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py index d605c1d3..f26e20b9 100644 --- a/app/main/checks/presentation_checks/__init__.py +++ b/app/main/checks/presentation_checks/__init__.py @@ -13,3 +13,4 @@ from .find_theme_in_pres import FindThemeInPres from .verify_git_link import PresVerifyGitLinkCheck from .empty_slide_check import PresEmptySlideCheck +from .slide_text_volume_check import SlideTextVolumeCheck \ No newline at end of file diff --git a/app/main/checks/presentation_checks/slide_text_volume_check.py b/app/main/checks/presentation_checks/slide_text_volume_check.py index 049e029f..450b60b3 100644 --- a/app/main/checks/presentation_checks/slide_text_volume_check.py +++ b/app/main/checks/presentation_checks/slide_text_volume_check.py @@ -2,15 +2,15 @@ class SlideTextVolumeCheck(BasePresCriterion): - label = 'Заголовки слайдов не дублируются' - description = 'Проверка на дублируемость заголовков слайдов' + label = 'Проверка объема текста на каждом слайде' + description = 'Объем текста на каждом слайде (за исключением титульного и запасных) должен соответсвовать критериям.' id = 'slide_text_volume_check' - def __init__(self, file_info, min_count_words_on_slide=40, - min_count_paragraphs=2, min_count_words_in_paragraph=20, + def __init__(self, file_info, min_count_words_on_slide=30, + min_count_paragraphs=2, min_count_words_in_paragraph=10, max_count_words_on_slide=100, max_count_paragraphs=5, max_count_words_in_paragraph=50, - slides_with_required_list=["Цели и задачи", "Заключение"]): + slides_with_required_list=["Цель и задачи", "Заключение"]): super().__init__(file_info) self.min_count_words_on_slide = min_count_words_on_slide self.min_count_paragraphs = min_count_paragraphs @@ -22,7 +22,64 @@ def __init__(self, file_info, min_count_words_on_slide=40, def check(self): result_str = '' + text_from_slides = self.file.get_text_from_slides() + titles = self.file.get_titles() + slides_info = [] + if len(titles) == 0 or len(text_from_slides) == 0: + return answer(False, 'Презентация пуста или заголовки не найдены.') + for i in range(len(titles)): + if "Санкт-Петербургский государственный" in titles[i]: + continue + if "Запасные слайды" in titles[i]: + break + required_list = False + if titles[i] in self.slides_with_required_list: + required_list = True + slides_info.append(self.slide_text_analysis(i + 1, text_from_slides[i], required_list)) + for slide_info in slides_info: + res = '' + link = self.format_page_link([slide_info['page']]) + if slide_info['count_words_on_slide'] < self.min_count_words_on_slide: + res += f'Количество слов на слайде: {slide_info["count_words_on_slide"]};
' + elif slide_info['count_words_on_slide'] > self.max_count_words_on_slide: + res += f'Количество слов на слайде: {slide_info["count_words_on_slide"]};
' + if slide_info['count_paragraphs'] < self.min_count_paragraphs: + res += f'Количество абзацев на слайде: {slide_info["count_paragraphs"]};
' + if slide_info['count_paragraphs'] > self.max_count_paragraphs: + res += f'Количество абзацев на слайде: {slide_info["count_paragraphs"]};
' + paragraphs = slide_info['paragraphs'] + for i in range(len(paragraphs)): + if paragraphs[i] < self.min_count_words_in_paragraph: + res += f'Количество слов в абзаце № {i + 1}: {paragraphs[i]};
' + if paragraphs[i] > self.max_count_words_in_paragraph: + res += f'Количество слов в абзаце № {i + 1}: {paragraphs[i]};
' + if slide_info['required_list'] and not slide_info['has_list']: + res += f'На данном слайде наличие списка является обязательным;' + if res: + result_str = result_str + f'Слайд {link}:
' + res + if not result_str: return answer(True, 'Пройдена!') else: - return answer(False, result_str) \ No newline at end of file + result_str += f'Количество слов на слайде должно быть больше {self.min_count_words_on_slide} и меньше {self.max_count_words_on_slide};
' \ + f'Количество абзацев на слайде должно быть больше {self.min_count_paragraphs} и меньше {self.max_count_paragraphs};
' \ + f'Количество слов в абзаце должно быть больше {self.min_count_words_in_paragraph} и меньше {self.max_count_words_in_paragraph};
' + return answer(False, result_str) + + def slide_text_analysis(self, page, text, required_list): + if text is None: + text = '' + paragraphs = [p for p in text.split('\n') if p.strip()] + slide_info = { + 'page': page, + 'required_list': required_list, + 'paragraphs': [], + 'count_paragraphs': len(paragraphs), + 'count_words_on_slide': 0, + 'has_list': False + } + for paragraph in paragraphs: + slide_info['paragraphs'].append(len(paragraph.split())) + # has_list??? + slide_info['count_words_on_slide'] = sum(slide_info['paragraphs']) + return slide_info \ No newline at end of file From d53b4ca00edaffcc0b822211b69416c0fb8d5fb9 Mon Sep 17 00:00:00 2001 From: Dariiiii Date: Fri, 5 Jul 2024 00:22:33 +0300 Subject: [PATCH 3/5] v1.2 --- .../presentation_checks/slide_text_volume_check.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/app/main/checks/presentation_checks/slide_text_volume_check.py b/app/main/checks/presentation_checks/slide_text_volume_check.py index 450b60b3..9723b779 100644 --- a/app/main/checks/presentation_checks/slide_text_volume_check.py +++ b/app/main/checks/presentation_checks/slide_text_volume_check.py @@ -27,9 +27,7 @@ def check(self): slides_info = [] if len(titles) == 0 or len(text_from_slides) == 0: return answer(False, 'Презентация пуста или заголовки не найдены.') - for i in range(len(titles)): - if "Санкт-Петербургский государственный" in titles[i]: - continue + for i in range(1, len(titles)): if "Запасные слайды" in titles[i]: break required_list = False @@ -54,14 +52,14 @@ def check(self): if paragraphs[i] > self.max_count_words_in_paragraph: res += f'Количество слов в абзаце № {i + 1}: {paragraphs[i]};
' if slide_info['required_list'] and not slide_info['has_list']: - res += f'На данном слайде наличие списка является обязательным;' + res += f'На данном слайде наличие списка является обязательным;
' if res: - result_str = result_str + f'Слайд {link}:
' + res + result_str = result_str + f'
Слайд {link}:
' + res if not result_str: return answer(True, 'Пройдена!') else: - result_str += f'Количество слов на слайде должно быть больше {self.min_count_words_on_slide} и меньше {self.max_count_words_on_slide};
' \ + result_str += f'
Количество слов на слайде должно быть больше {self.min_count_words_on_slide} и меньше {self.max_count_words_on_slide};
' \ f'Количество абзацев на слайде должно быть больше {self.min_count_paragraphs} и меньше {self.max_count_paragraphs};
' \ f'Количество слов в абзаце должно быть больше {self.min_count_words_in_paragraph} и меньше {self.max_count_words_in_paragraph};
' return answer(False, result_str) @@ -70,6 +68,7 @@ def slide_text_analysis(self, page, text, required_list): if text is None: text = '' paragraphs = [p for p in text.split('\n') if p.strip()] + paragraphs = paragraphs[1:-1] slide_info = { 'page': page, 'required_list': required_list, From ac7513b2504ac3f7375b7b2a3ec813d7e46fd2c0 Mon Sep 17 00:00:00 2001 From: Dariiiii Date: Sat, 6 Jul 2024 14:26:42 +0300 Subject: [PATCH 4/5] allow_only_image_or_table --- .../slide_text_volume_check.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/app/main/checks/presentation_checks/slide_text_volume_check.py b/app/main/checks/presentation_checks/slide_text_volume_check.py index 9723b779..48fa5376 100644 --- a/app/main/checks/presentation_checks/slide_text_volume_check.py +++ b/app/main/checks/presentation_checks/slide_text_volume_check.py @@ -10,7 +10,8 @@ def __init__(self, file_info, min_count_words_on_slide=30, min_count_paragraphs=2, min_count_words_in_paragraph=10, max_count_words_on_slide=100, max_count_paragraphs=5, max_count_words_in_paragraph=50, - slides_with_required_list=["Цель и задачи", "Заключение"]): + slides_with_required_list=["Цель и задачи", "Заключение"], + allow_only_image_or_table=True): super().__init__(file_info) self.min_count_words_on_slide = min_count_words_on_slide self.min_count_paragraphs = min_count_paragraphs @@ -19,24 +20,31 @@ def __init__(self, file_info, min_count_words_on_slide=30, self.max_count_paragraphs = max_count_paragraphs self.max_count_words_in_paragraph = max_count_words_in_paragraph self.slides_with_required_list = slides_with_required_list + self.allow_only_image_or_table = allow_only_image_or_table def check(self): result_str = '' + slides = self.file.slides text_from_slides = self.file.get_text_from_slides() titles = self.file.get_titles() slides_info = [] if len(titles) == 0 or len(text_from_slides) == 0: return answer(False, 'Презентация пуста или заголовки не найдены.') for i in range(1, len(titles)): + page_with_images_or_tables = False + required_list = False if "Запасные слайды" in titles[i]: break - required_list = False if titles[i] in self.slides_with_required_list: required_list = True - slides_info.append(self.slide_text_analysis(i + 1, text_from_slides[i], required_list)) + if len(slides[i].get_images()) > 0 or len(slides[i].get_table()) > 0: + page_with_images_or_tables = True + slides_info.append(self.slide_text_analysis(i + 1, text_from_slides[i], required_list, page_with_images_or_tables, titles[i])) for slide_info in slides_info: res = '' link = self.format_page_link([slide_info['page']]) + if self.allow_only_image_or_table and slide_info['has_image_or_table']: + continue if slide_info['count_words_on_slide'] < self.min_count_words_on_slide: res += f'Количество слов на слайде: {slide_info["count_words_on_slide"]};
' elif slide_info['count_words_on_slide'] > self.max_count_words_on_slide: @@ -64,21 +72,22 @@ def check(self): f'Количество слов в абзаце должно быть больше {self.min_count_words_in_paragraph} и меньше {self.max_count_words_in_paragraph};
' return answer(False, result_str) - def slide_text_analysis(self, page, text, required_list): + def slide_text_analysis(self, page, text, required_list, page_with_images_or_tables, title): if text is None: text = '' - paragraphs = [p for p in text.split('\n') if p.strip()] - paragraphs = paragraphs[1:-1] + paragraphs = [p.strip() for p in text.split('\n') if p.strip() and not p.strip().isnumeric() and not p.strip() in title] slide_info = { 'page': page, 'required_list': required_list, 'paragraphs': [], 'count_paragraphs': len(paragraphs), 'count_words_on_slide': 0, - 'has_list': False + 'has_list': True, + 'has_image_or_table': page_with_images_or_tables, } + for paragraph in paragraphs: slide_info['paragraphs'].append(len(paragraph.split())) - # has_list??? + # The variable has_list is currently set to true; after creating the check, set it to false." slide_info['count_words_on_slide'] = sum(slide_info['paragraphs']) return slide_info \ No newline at end of file From 28dc5c88a40a4a0bc7895fb681dd19b8c286619e Mon Sep 17 00:00:00 2001 From: Dariiiii Date: Sat, 6 Jul 2024 16:54:36 +0300 Subject: [PATCH 5/5] 558_upgrade_slide_text_volume_check --- app/main/check_packs/pack_config.py | 1 + .../slide_text_volume_check.py | 66 ++++++++++++------- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 0e3c3bde..845d6f8e 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -19,6 +19,7 @@ ['theme_in_pres_check'], ['verify_git_link'], ['slide_text_volume_check'], + ['slide_text_volume_check', {'work_mode': 'Заключение'}], ] BASE_REPORT_CRITERION = [ ["simple_check"], diff --git a/app/main/checks/presentation_checks/slide_text_volume_check.py b/app/main/checks/presentation_checks/slide_text_volume_check.py index 48fa5376..d212ebcd 100644 --- a/app/main/checks/presentation_checks/slide_text_volume_check.py +++ b/app/main/checks/presentation_checks/slide_text_volume_check.py @@ -1,18 +1,23 @@ from ..base_check import BasePresCriterion, answer +from utils import get_text_from_slides +from nlp.stemming import Stemming +import re +WORK_MODE = ["all_slides", "Заключение"] class SlideTextVolumeCheck(BasePresCriterion): label = 'Проверка объема текста на каждом слайде' description = 'Объем текста на каждом слайде (за исключением титульного и запасных) должен соответсвовать критериям.' id = 'slide_text_volume_check' - def __init__(self, file_info, min_count_words_on_slide=30, + def __init__(self, file_info, work_mode="all_slides",min_count_words_on_slide=30, min_count_paragraphs=2, min_count_words_in_paragraph=10, max_count_words_on_slide=100, max_count_paragraphs=5, max_count_words_in_paragraph=50, slides_with_required_list=["Цель и задачи", "Заключение"], allow_only_image_or_table=True): super().__init__(file_info) + self.work_mode = work_mode self.min_count_words_on_slide = min_count_words_on_slide self.min_count_paragraphs = min_count_paragraphs self.min_count_words_in_paragraph = min_count_words_in_paragraph @@ -24,46 +29,41 @@ def __init__(self, file_info, min_count_words_on_slide=30, def check(self): result_str = '' - slides = self.file.slides - text_from_slides = self.file.get_text_from_slides() - titles = self.file.get_titles() + slides = self.install_work_mode() slides_info = [] - if len(titles) == 0 or len(text_from_slides) == 0: - return answer(False, 'Презентация пуста или заголовки не найдены.') - for i in range(1, len(titles)): + if len(slides) == 0: + return answer(False, 'Презентация пуста или слайды не найдены.') + for slide in slides: + title = slide.get_title() + text = slide.get_text() + page = slide.get_page_number()[0] page_with_images_or_tables = False required_list = False - if "Запасные слайды" in titles[i]: + if "Запасные слайды" in title: break - if titles[i] in self.slides_with_required_list: + if title in self.slides_with_required_list: required_list = True - if len(slides[i].get_images()) > 0 or len(slides[i].get_table()) > 0: + if len(slide.get_images()) > 0 or len(slide.get_table()) > 0: page_with_images_or_tables = True - slides_info.append(self.slide_text_analysis(i + 1, text_from_slides[i], required_list, page_with_images_or_tables, titles[i])) + slides_info.append(self.slide_text_analysis(title, page, text, required_list, page_with_images_or_tables,)) for slide_info in slides_info: res = '' link = self.format_page_link([slide_info['page']]) if self.allow_only_image_or_table and slide_info['has_image_or_table']: continue - if slide_info['count_words_on_slide'] < self.min_count_words_on_slide: + if slide_info['count_words_on_slide'] <= self.min_count_words_on_slide or slide_info['count_words_on_slide'] >= self.max_count_words_on_slide: res += f'Количество слов на слайде: {slide_info["count_words_on_slide"]};
' - elif slide_info['count_words_on_slide'] > self.max_count_words_on_slide: - res += f'Количество слов на слайде: {slide_info["count_words_on_slide"]};
' - if slide_info['count_paragraphs'] < self.min_count_paragraphs: - res += f'Количество абзацев на слайде: {slide_info["count_paragraphs"]};
' - if slide_info['count_paragraphs'] > self.max_count_paragraphs: + if slide_info['count_paragraphs'] <= self.min_count_paragraphs or slide_info['count_paragraphs'] >= self.max_count_paragraphs: res += f'Количество абзацев на слайде: {slide_info["count_paragraphs"]};
' paragraphs = slide_info['paragraphs'] for i in range(len(paragraphs)): - if paragraphs[i] < self.min_count_words_in_paragraph: - res += f'Количество слов в абзаце № {i + 1}: {paragraphs[i]};
' - if paragraphs[i] > self.max_count_words_in_paragraph: + if paragraphs[i] <= self.min_count_words_in_paragraph or paragraphs[i] >= self.max_count_words_in_paragraph: res += f'Количество слов в абзаце № {i + 1}: {paragraphs[i]};
' if slide_info['required_list'] and not slide_info['has_list']: res += f'На данном слайде наличие списка является обязательным;
' if res: result_str = result_str + f'
Слайд {link}:
' + res - + if not result_str: return answer(True, 'Пройдена!') else: @@ -72,7 +72,7 @@ def check(self): f'Количество слов в абзаце должно быть больше {self.min_count_words_in_paragraph} и меньше {self.max_count_words_in_paragraph};
' return answer(False, result_str) - def slide_text_analysis(self, page, text, required_list, page_with_images_or_tables, title): + def slide_text_analysis(self, title, page, text, required_list, page_with_images_or_tables): if text is None: text = '' paragraphs = [p.strip() for p in text.split('\n') if p.strip() and not p.strip().isnumeric() and not p.strip() in title] @@ -90,4 +90,24 @@ def slide_text_analysis(self, page, text, required_list, page_with_images_or_tab slide_info['paragraphs'].append(len(paragraph.split())) # The variable has_list is currently set to true; after creating the check, set it to false." slide_info['count_words_on_slide'] = sum(slide_info['paragraphs']) - return slide_info \ No newline at end of file + return slide_info + + def install_work_mode(self): + if self.work_mode == WORK_MODE[0]: + slides = self.file.slides[1:] + elif self.work_mode == WORK_MODE[1]: + for slide in self.file.slides: + if self.work_mode in slide.get_title(): + slides = [slide] + break + stemming = Stemming() + goal_and_tasks = get_text_from_slides(self.file,"Цель и задачи") + tasks = stemming.get_sentences(goal_and_tasks, True) + ignore = re.compile('[0-9][.]?|Задачи:|‹#›') + cleaned_tasks = [task for task in tasks if not re.fullmatch(ignore, task)] + task_count = len(cleaned_tasks) + self.min_count_paragraphs = task_count - 1 + self.max_count_paragraphs = task_count + 3 + else: + slides = [] + return slides \ No newline at end of file