diff --git a/Dockerfile b/Dockerfile index 46e01450..372c8d12 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,10 @@ - # Copyright (c) Microsoft Corporation. All rights reserved. -# Dockerfile # https://eng.ms/docs/more/containers-secure-supply-chain/approved-images FROM mcr.microsoft.com/oryx/python:3.10 RUN curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg RUN apt-get update -y -ENV IS_DOCKER True +RUN apt-get install wkhtmltopdf -y # Install dependencies WORKDIR ./ diff --git a/app/Home.py b/app/Home.py index f17a4c60..6ba8561c 100644 --- a/app/Home.py +++ b/app/Home.py @@ -1,36 +1,11 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import streamlit as st +from components.app_loader import load_multipage_app import util.mermaid as mermaid -from streamlit_javascript import st_javascript -import util.session_variables - -def get_user(sv): - if sv.mode.value != 'cloud': - return - css=''' - [data-testid="stSidebarNavItems"] { - max-height: 100vh - } - ''' - st.markdown(f'', unsafe_allow_html=True) - js_code = """await fetch("/.auth/me") - .then(function(response) {return response.json();}) - """ - return_value = st_javascript(js_code) - - username = None - if return_value == 0: - pass # this is the result before the actual value is returned - elif isinstance(return_value, list) and len(return_value) > 0: - username = return_value[0]["user_id"] - sv.username.value = username - st.sidebar.write(f"Logged in as {username}") - else: - st.warning(f"Could not directly read username from azure active directory: {return_value}.") def main(): st.set_page_config(layout="wide", initial_sidebar_state="expanded", page_title='Intelligence Toolkit | Home') - sv = util.session_variables.SessionVariables('home') - get_user(sv) + load_multipage_app() transparency_faq = open('./app/TransparencyFAQ.md', 'r').read() st.markdown(transparency_faq + '\n\n' + f"""\ diff --git a/app/workflows/group_narratives/classes.py b/app/components/__init__.py similarity index 100% rename from app/workflows/group_narratives/classes.py rename to app/components/__init__.py diff --git a/app/components/app_loader.py b/app/components/app_loader.py new file mode 100644 index 00000000..54192886 --- /dev/null +++ b/app/components/app_loader.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. +from javascript.styles import add_styles +import components.app_user as au +import streamlit as st + +def load_multipage_app(): + #Load user if logged in + user = au.app_user() + user.view_get_info() + + #load css + add_styles() + diff --git a/app/components/app_user.py b/app/components/app_user.py new file mode 100644 index 00000000..60f29b22 --- /dev/null +++ b/app/components/app_user.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. +import streamlit as st +from javascript.scripts import get_auth_user +from util.session_variables import SessionVariables + +class app_user: + + sv = None + + def __init__(self, sv = None): + if sv is not None: + self.sv = sv + else: + self.sv = SessionVariables('home') + self.login() + + def _get_info(self): + return self.sv.username.value + + def _set_user(self, username): + self.sv.username.value = username + + def view_get_info(self): + if self.sv.username.value: + st.sidebar.write(f"Logged in as {self.sv.username.value}") + + def _view_error_info(self, return_value): + st.warning(f"Could not directly read username from azure active directory: {return_value}.") + + def login(self): + if self.sv.mode.value != 'cloud': + return + return_value = get_auth_user() + username = None + if return_value == 0: + pass # this is the result before the actual value is returned + elif isinstance(return_value, list) and len(return_value) > 0: + username = return_value[0]["user_id"] + self._set_user(username) + else: + self._view_error_info(return_value) \ No newline at end of file diff --git a/app/workflows/group_narratives/functions.py b/app/javascript/__init__.py similarity index 100% rename from app/workflows/group_narratives/functions.py rename to app/javascript/__init__.py diff --git a/app/javascript/scripts.py b/app/javascript/scripts.py new file mode 100644 index 00000000..85a9fecf --- /dev/null +++ b/app/javascript/scripts.py @@ -0,0 +1,9 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. +from streamlit_javascript import st_javascript +import streamlit as st + +def get_auth_user(): + js_code = """await fetch("/.auth/me") + .then(function(response) {return response.json();}) + """ + return st_javascript(js_code) \ No newline at end of file diff --git a/app/javascript/styles.py b/app/javascript/styles.py new file mode 100644 index 00000000..459a6822 --- /dev/null +++ b/app/javascript/styles.py @@ -0,0 +1,25 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. +import streamlit as st + +style_sidebar = ''' + [data-testid="stSidebarNavItems"] { + max-height: 100vh + } +''' + +style_pdf = '''body { + font-family: 'helvetica'; +} +''' + +style_iframes = ''' + iframe { + display: none; + } +''' + +def add_styles(): + st.markdown(f'''''', unsafe_allow_html=True) diff --git a/app/pages/Attribute_Patterns.py b/app/pages/Attribute_Patterns.py index e66b86c3..ed3be0ec 100644 --- a/app/pages/Attribute_Patterns.py +++ b/app/pages/Attribute_Patterns.py @@ -1,6 +1,11 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import workflows.attribute_patterns.workflow +from components.app_loader import load_multipage_app +import streamlit as st def main(): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Attribute Patterns') + load_multipage_app() workflows.attribute_patterns.workflow.create() if __name__ == '__main__': diff --git a/app/pages/Data_Synthesis.py b/app/pages/Data_Synthesis.py index a03dc469..8b34d465 100644 --- a/app/pages/Data_Synthesis.py +++ b/app/pages/Data_Synthesis.py @@ -1,6 +1,11 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import workflows.data_synthesis.workflow +from components.app_loader import load_multipage_app +import streamlit as st def main(): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Data Synthesis') + load_multipage_app() workflows.data_synthesis.workflow.create() if __name__ == '__main__': diff --git a/app/pages/Group_Narratives.py b/app/pages/Group_Narratives.py index fb6685e8..8e7b41b9 100644 --- a/app/pages/Group_Narratives.py +++ b/app/pages/Group_Narratives.py @@ -1,6 +1,11 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import workflows.group_narratives.workflow +import streamlit as st +from components.app_loader import load_multipage_app def main(): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Group Narratives') + load_multipage_app() workflows.group_narratives.workflow.create() if __name__ == '__main__': diff --git a/app/pages/Question_Answering.py b/app/pages/Question_Answering.py index 5693255c..fac3dd33 100644 --- a/app/pages/Question_Answering.py +++ b/app/pages/Question_Answering.py @@ -1,6 +1,12 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import workflows.question_answering.workflow +from components.app_loader import load_multipage_app +import streamlit as st + def main(): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Question Answering') + load_multipage_app() workflows.question_answering.workflow.create() if __name__ == '__main__': diff --git a/app/pages/Record_Matching.py b/app/pages/Record_Matching.py index c576a645..c3b77acc 100644 --- a/app/pages/Record_Matching.py +++ b/app/pages/Record_Matching.py @@ -1,6 +1,11 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import workflows.record_matching.workflow +from components.app_loader import load_multipage_app +import streamlit as st def main(): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Record Matching') + load_multipage_app() workflows.record_matching.workflow.create() if __name__ == '__main__': diff --git a/app/pages/Risk_Networks.py b/app/pages/Risk_Networks.py index 158cc468..432ddb44 100644 --- a/app/pages/Risk_Networks.py +++ b/app/pages/Risk_Networks.py @@ -1,6 +1,11 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import workflows.risk_networks.workflow +from components.app_loader import load_multipage_app +import streamlit as st def main(): + st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Risk Networks') + load_multipage_app() workflows.risk_networks.workflow.create() if __name__ == '__main__': diff --git a/app/util/download_pdf.py b/app/util/download_pdf.py index 40edee1e..2493a914 100644 --- a/app/util/download_pdf.py +++ b/app/util/download_pdf.py @@ -3,20 +3,17 @@ import pdfkit from util.wkhtmltopdf import config_pdfkit, pdfkit_options import streamlit as st +from javascript.styles import style_pdf -css = '''body { - font-family: 'helvetica'; -} -''' #itk-label text_label = 'Report generated using Intelligence Toolkit (https://aka.ms/itk)' def add_download_pdf(name, text, button_text='Download PDF', is_markdown=True, disabled=False): if not name.endswith('.pdf'): name += '.pdf' - text = f'{text}
{text_label}' # Convert text to HTML if it's in Markdown format text = markdown2.markdown(text) if is_markdown else text + text = f' \n\n {text}
{text_label}' # Generate PDF from HTML string config_pdf = config_pdfkit() diff --git a/app/util/session_variable.py b/app/util/session_variable.py index a9e644d1..69c72a1e 100644 --- a/app/util/session_variable.py +++ b/app/util/session_variable.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. from typing import Any import streamlit as st import traceback diff --git a/app/workflows/attribute_patterns/classes.py b/app/workflows/attribute_patterns/classes.py index 0ed1f2bc..368e4178 100644 --- a/app/workflows/attribute_patterns/classes.py +++ b/app/workflows/attribute_patterns/classes.py @@ -1,6 +1,4 @@ -import streamlit as st -import pandas as pd -import networkx as nx +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import numpy as np from collections import defaultdict diff --git a/app/workflows/attribute_patterns/workflow.py b/app/workflows/attribute_patterns/workflow.py index e84c229b..4310f50e 100644 --- a/app/workflows/attribute_patterns/workflow.py +++ b/app/workflows/attribute_patterns/workflow.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import streamlit as st import pandas as pd import numpy as np @@ -21,7 +22,6 @@ def create(): workflow = 'attribute_patterns' - st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Attribute Patterns') sv = vars.SessionVariables('attribute_patterns') intro_tab, uploader_tab, detect_tab, explain_tab = st.tabs(['Attribute patterns workflow:', 'Create graph model', 'Detect patterns', 'Generate AI pattern reports']) df = None @@ -178,4 +178,4 @@ def create(): is_download_disabled = report_data == '' name = 'attribute_pattern_report' add_download_pdf(f'{name}.pdf', report_data, 'Download PDF report', disabled=is_download_disabled) - st.download_button('Download markdown report', data=report_data, file_name=f'{name}.md', mime='text/markdown', disabled=is_download_disabled) \ No newline at end of file + st.download_button('Download MD report', data=report_data, file_name=f'{name}.md', mime='text/markdown', disabled=is_download_disabled) \ No newline at end of file diff --git a/app/workflows/data_synthesis/workflow.py b/app/workflows/data_synthesis/workflow.py index 356f788a..410523c9 100644 --- a/app/workflows/data_synthesis/workflow.py +++ b/app/workflows/data_synthesis/workflow.py @@ -1,4 +1,4 @@ - +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import streamlit as st import pandas as pd import plotly.io as pio @@ -21,7 +21,6 @@ def create(): workflow = 'data_synthesis' - st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Data Synthesis') sv = vars.SessionVariables('data_synthesis') intro_tab, prepare_tab, generate_tab, queries_tab = st.tabs(['Data synthesis workflow:', 'Upload deidentified sensitive data', 'Generate anonymous synthetic data', 'Query and visualize data']) diff --git a/app/workflows/group_narratives/workflow.py b/app/workflows/group_narratives/workflow.py index d63708b6..bcdd35de 100644 --- a/app/workflows/group_narratives/workflow.py +++ b/app/workflows/group_narratives/workflow.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import streamlit as st import pandas as pd @@ -10,7 +11,6 @@ def create(): workflow = 'group_narratives' - st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Group Narratives') sv = vars.SessionVariables('group_narratives') intro_tab, prepare_tab, summarize_tab, generate_tab = st.tabs(['Group narratives workflow:', 'Upload data to narrate', 'Prepare data summary', 'Generate AI group reports',]) @@ -205,4 +205,4 @@ def create(): is_download_disabled = report_data == '' reports_name = 'narrative_report' add_download_pdf(f'{reports_name}.pdf', report_data, button_text='Download PDF report', disabled=is_download_disabled) - st.download_button('Download markdown report', data=report_data, file_name=f'{reports_name}.md', mime='text/markdown', disabled=is_download_disabled) \ No newline at end of file + st.download_button('Download MD report', data=report_data, file_name=f'{reports_name}.md', mime='text/markdown', disabled=is_download_disabled) \ No newline at end of file diff --git a/app/workflows/question_answering/classes.py b/app/workflows/question_answering/classes.py index e617c773..0b4638de 100644 --- a/app/workflows/question_answering/classes.py +++ b/app/workflows/question_answering/classes.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import numpy as np import tiktoken import scipy.spatial.distance diff --git a/app/workflows/question_answering/workflow.py b/app/workflows/question_answering/workflow.py index a6ce95a8..357b9fa8 100644 --- a/app/workflows/question_answering/workflow.py +++ b/app/workflows/question_answering/workflow.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import numpy as np import streamlit as st from collections import Counter @@ -17,7 +18,6 @@ embedder = util.Embedder.create_embedder(config.cache_dir) def create(): - st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Question Answering') sv = vars.SessionVariables('question_answering') intro_tab, uploader_tab, mining_tab, report_tab = st.tabs(['Question answering workflow:', 'Upload data', 'Mine & match questions', 'Generate AI answer reports']) @@ -224,4 +224,4 @@ def create(): full_text = sv.answering_lazy_answer_text.value + '\n\n## Supporting FAQ\n\n' + re.sub(r' Q[\d]+: ', ' ', '\n\n'.join(sv.answering_matches.value.split('\n\n')[2:]), re.MULTILINE).replace('###### ', '### ') add_download_pdf(f'{name}.pdf', full_text, 'Download PDF report', disabled=is_download_disabled) - st.download_button('Download markdown report', data=full_text, file_name=f'{name}.md', mime='text/markdown', disabled=sv.answering_lazy_answer_text.value == '', key='lazy_download_button') + st.download_button('Download MD report', data=full_text, file_name=f'{name}.md', mime='text/markdown', disabled=sv.answering_lazy_answer_text.value == '', key='lazy_download_button') diff --git a/app/workflows/record_matching/workflow.py b/app/workflows/record_matching/workflow.py index d17a92dc..65513b6b 100644 --- a/app/workflows/record_matching/workflow.py +++ b/app/workflows/record_matching/workflow.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import streamlit as st import polars as pl import pandas as pd @@ -18,7 +19,6 @@ embedder = util.Embedder.create_embedder(config.cache_dir) def create(): - st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Record Matching') sv = vars.SessionVariables('record_matching') if not os.path.exists(config.outputs_dir): diff --git a/app/workflows/risk_networks/workflow.py b/app/workflows/risk_networks/workflow.py index 8856fc5b..0046eaa6 100644 --- a/app/workflows/risk_networks/workflow.py +++ b/app/workflows/risk_networks/workflow.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import streamlit as st import pandas as pd import networkx as nx @@ -28,7 +29,6 @@ embedder = util.Embedder.create_embedder(config.cache_dir) def create(): - st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_title='Intelligence Toolkit | Risk Networks') sv = vars.SessionVariables('risk_networks') if not os.path.exists(config.outputs_dir):