VGOS/app.py at main · hubblehox/VGOS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import pandas as pd

# Title of the app
st.title("Teacher Data Processing App")

try:
    # Upload files
    teacher_data_file = st.file_uploader("Upload the teacher data Excel file", type="xlsx")
    criteria_file = st.file_uploader("Upload the subject mapping Excel file", type="xlsx")

    if teacher_data_file and criteria_file:
        try:
            # Read files
            teacher_df = pd.read_excel(teacher_data_file)
            criteria_df = pd.read_excel(criteria_file)

            # Extract criteria information
            try:
                criteria_subjects = criteria_df[['subject', 'subsubjects', 'grades']].values.tolist()
                criteria_grades = criteria_df['grades'].tolist()
            except KeyError as e:
                st.error(f"Error: The column '{e.args[0]}' is missing from the subject mapping file. Please check the file and try again.")
                st.stop()

            teacher_df['Teacher Name'] = teacher_df['firstName']
            teacher_df['Employee Code'] = teacher_df['employeeCode']
            teacher_df['Highest Grade'] = 'NA'
            teacher_df['Subject'] = 'NA'

            def refined_subject_matches(subject, criteria_subjects, highest_grade):
                subject = subject.lower()
                matched_subjects = set()
                for main_subject, sub_subject, grade in criteria_subjects:
                    main_subject = main_subject.lower()
                    sub_subject = sub_subject.lower()
                    if grade == highest_grade and ((main_subject == subject) or (sub_subject == subject)):
                        matched_subjects.add(main_subject)
                return matched_subjects

            def find_highest_grade_subject_refined(row):
                highest_grade = 0
                main_subjects = set()

                for grade_col in range(1, 23):
                    grade_col_name = f'grade{grade_col}'
                    subject_col_name = f'subject{grade_col}'
                    grade = row.get(grade_col_name)
                    subject = row.get(subject_col_name)

                    if pd.notna(grade) and pd.notna(subject):
                        try:
                            grade = int(grade)
                        except ValueError:
                            st.warning(f"Warning: Non-integer grade found in '{grade_col_name}' for teacher {row['firstName']}. Skipping this entry.")
                            continue

                        if grade in criteria_grades:
                            matched_subject = refined_subject_matches(subject, criteria_subjects, grade)
                            if matched_subject:
                                if grade > highest_grade:
                                    highest_grade = grade
                                    main_subjects = matched_subject
                                elif grade == highest_grade:
                                    main_subjects.update(matched_subject)

                if highest_grade > 0:
                    return highest_grade, ','.join(main_subjects)

                return highest_grade, 'NA'

            # Apply the function to the DataFrame
            for index, row in teacher_df.iterrows():
                highest_grade, main_subject = find_highest_grade_subject_refined(row)
                teacher_df.at[index, 'Highest Grade'] = highest_grade
                teacher_df.at[index, 'Subject'] = main_subject

            teacher_df['Subject'] = teacher_df['Subject'].apply(lambda x: ','.join(sorted(set(x.split(',')))))

            # Save the output file
            output_file_refined_v3 = 'VGOS_Teacher_Data.xlsx'
            teacher_df.to_excel(output_file_refined_v3, index=False)

            # Provide download link for the output file
            with open(output_file_refined_v3, "rb") as file:
                st.download_button(
                    label="Download updated teacher data",
                    data=file,
                    file_name=output_file_refined_v3,
                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                )

            st.success("Data processing complete. You can download the file now.")

        except Exception as e:
            st.error(f"An unexpected error occurred while processing the files: {str(e)}")
            st.stop()

    else:
        st.info("Please upload both the teacher data file and criteria file to proceed.")

except Exception as e:
    st.error(f"An error occurred: {str(e)}")