Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
28f6990
Test Scripts for Backend Algorithms
ShwethaSureshKumar Apr 15, 2025
2ad0a8a
Merge pull request #27 from TCS-2021/main
ShwethaSureshKumar Apr 15, 2025
b2e66dc
Delete tests/PrescriptiveAnalysis1 directory
ShwethaSureshKumar Apr 15, 2025
5fa650a
test for apriori graph
ShwethaSureshKumar Apr 15, 2025
55ca1b5
test for FP Growth
ShwethaSureshKumar Apr 15, 2025
3fefb46
Test for GSP Algorithm
ShwethaSureshKumar Apr 15, 2025
45ebd10
Test for GSpan algo
ShwethaSureshKumar Apr 15, 2025
011210b
Test for Apriori algo
ShwethaSureshKumar Apr 15, 2025
12707b7
with ui for SPADE algo
ShwethaSureshKumar Apr 15, 2025
f7c30b6
Implemented SPADE algorithm
ShwethaSureshKumar Apr 15, 2025
c932ae3
Added dataset for SPADE
ShwethaSureshKumar Apr 15, 2025
568f2d9
Update readme.txt
Santa-k27 Apr 15, 2025
96922ac
Tests for SPADE
ShwethaSureshKumar Apr 15, 2025
e9ec265
updated spade
ShwethaSureshKumar Apr 15, 2025
08b597d
fixed : spade implementation
ShwethaSureshKumar Apr 15, 2025
b3e9870
updated SPADE test
ShwethaSureshKumar Apr 15, 2025
94b8c12
Delete src/PrescriptiveAnalysis1/Backend/spade.py
ShwethaSureshKumar Apr 15, 2025
6ab56a6
updated SPADE
ShwethaSureshKumar Apr 15, 2025
5f8fa90
Update readme.txt
ShwethaSureshKumar Apr 15, 2025
ed81fd4
spade update
ShwethaSureshKumar Apr 16, 2025
eeb6336
dataset update
ShwethaSureshKumar Apr 16, 2025
451fc77
test update
ShwethaSureshKumar Apr 16, 2025
5733535
Merge branch 'main' into PrescriptiveAnalysis1
nitishkrish16 Apr 16, 2025
bf36d0a
resolved merge conflicts in the import - Prescriptive analysis 1
nitishkrish16 Apr 16, 2025
227d2bf
Delete tests/PrescriptiveAnalysis1/test_gsp_algorithm.py
ShwethaSureshKumar Apr 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Datasets/PrescriptiveAnalysis1/SPADE/example2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
NAME,INVOICEDATE,PRODUCTNAME
1,1/1/2025,"C,D"
1,1/3/2025,"A,B,C"
1,1/4/2025,"A,B,F"
1,1/4/2025,"A,C,D,F"
2,1/1/2025,"A,B,F"
2,1/1/2025,E
3,1/1/2025,"A,B,F"
4,1/2/2025,"D,H,G"
4,1/2/2025,B
4,1/3/2025,"A,G,H"
31 changes: 31 additions & 0 deletions Datasets/PrescriptiveAnalysis1/SPADE/groceries_own .csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
NAME,INVOICEDATE,PRODUCTNAME
1,01-01-2025,Milk
1,01-01-2025,Bread
1,01-02-2025,Eggs
1,01-02-2025,Apples
1,01-03-2025,Bananas
1,01-03-2025,Orange Juice
1,01-04-2025,Cereal
2,01-01-2025,Butter
2,01-01-2025,Cheese
2,01-02-2025,Yogurt
2,01-02-2025,Chicken
2,01-03-2025,Beef
2,01-03-2025,Pasta
3,01-01-2025,Tomato Sauce
3,01-01-2025,Olive Oil
3,01-02-2025,Rice
3,01-02-2025,Potatoes
3,01-03-2025,Carrots
3,01-03-2025,Broccoli
4,01-01-2025,Toothpaste
4,01-01-2025,Milk
4,01-02-2025,Bread
4,01-02-2025,Eggs
4,01-03-2025,Apples
4,01-03-2025,Bananas
5,01-01-2025,Orange Juice
5,01-01-2025,Cereal
5,01-02-2025,Butter
5,01-02-2025,Cheese
5,01-03-2025,Yogurt
18 changes: 18 additions & 0 deletions src/PrescriptiveAnalysis1/Backend/readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,21 @@ File: "groceries_own.csv"
Random Dataset
Min Support = 0.2 / 0.3 / 0.4
----------------------------------------------------------------------------------------------------



-SPADE
----------------------------------------------------------------------------------------------------
File: "example2.csv"
Example Question given in Sir PPT
Min Support = 0.5
(Answer cross-checked)

File: "groceries_own.csv"
Random Dataset (same as used for GSP)
Min Support = 0.3
(Answer cross-checked)

The file must have columns named:
"INVOICENO", "PRODUCTNAME"
----------------------------------------------------------------------------------------------------
446 changes: 446 additions & 0 deletions src/PrescriptiveAnalysis1/Backend/spade.py

Large diffs are not rendered by default.

88 changes: 79 additions & 9 deletions src/PrescriptiveAnalysis1/Frontend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ..Backend.gsp import preprocess_sequences_ordered, gsp_algorithm
from ..Backend.apriori import run_apriori_analysis
from ..Backend.fp_growth import run_fp_growth_analysis
from ..Backend.spade import preprocess_data_vertical, get_transaction_table, run_spade_analysis, format_pattern, get_pattern_length

def apriori_graph_mining_app():
st.title("Apriori-Based Graph Mining")
Expand Down Expand Up @@ -51,14 +52,12 @@ def gsp_algorithm_app():
)
if st.button("Run GSP Algorithm"):
with st.spinner("Processing..."):
start_time = time.time()
customer_sequences = preprocess_sequences_ordered(df)
sequences = customer_sequences['SEQUENCE'].tolist()
with st.expander("View Processed Sequences"):
st.write(sequences)
results = gsp_algorithm(sequences, min_support)
end_time = time.time()
st.success(f"Processing completed in {end_time - start_time:.2f} seconds!")
st.success("Processing completed!")
st.header("GSP Algorithm Results")
st.subheader("Frequent 1-Item Sequences")
frequent_1 = results['1_item']['frequent']
Expand Down Expand Up @@ -88,7 +87,7 @@ def gsp_algorithm_app():
st.error(f"An error occurred: {str(e)}")

def gspan_algorithm_app():
st.title("GSPan Algorithm Implementation")
st.title("gSpan Algorithm Implementation")
uploaded_file = st.file_uploader("Upload your JSON graph dataset file", type=['json'], key="gspan_file")
if uploaded_file is not None:
temp_file_path = "temp_graphs.json"
Expand All @@ -102,7 +101,7 @@ def gspan_algorithm_app():

if graphs_dict is not None:
min_support = st.slider("Minimum Support", 1, len(graphs_dict), 2, key="gspan_min_support")
if st.button("Run GSPan Algorithm"):
if st.button("Run gSpan Algorithm"):
with st.spinner("Processing..."):
st.header("DFS Codes for Each Graph")
all_dfs_codes = {}
Expand Down Expand Up @@ -163,7 +162,7 @@ def apriori_algorithm_app():
if error:
st.error(f"Error: {error}")
else:
st.success(f"Processing completed in {execution_time:.2f} seconds!")
st.success("Processing completed!")
if not itemsets_df.empty:
st.header("Frequent Itemsets")
for level in sorted(itemsets_df["Level"].unique()):
Expand Down Expand Up @@ -214,7 +213,7 @@ def fp_growth_algorithm_app():
if error:
st.error(f"Error: {error}")
else:
st.success(f"Processing completed in {execution_time:.2f} seconds!")
st.success("Processing completed!")
if not itemsets_df.empty:
st.header("Frequent Itemsets")
for level in sorted(itemsets_df["Level"].unique()):
Expand All @@ -233,18 +232,89 @@ def fp_growth_algorithm_app():
except Exception as e:
st.error(f"An error occurred: {str(e)}")

def spade_algorithm_app():
st.title("SPADE Algorithm Implementation")
st.write("This app performs sequential pattern mining using the SPADE algorithm.")

uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"], key="spade_file")
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file)
st.success("File successfully uploaded and read!")
with st.expander("View Uploaded Data"):
st.dataframe(df)

min_support = st.slider(
"Select minimum support threshold (0-1)",
min_value=0.01,
max_value=1.0,
value=0.5,
step=0.01,
key="spade_min_support"
)

if st.button("Run SPADE Algorithm"):
with st.spinner("Processing..."):
transactions_df, detailed_results, all_frequent_df, error = run_spade_analysis(df, min_support)
if error:
st.error(f"Error: {error}")
else:
st.success("Processing completed!")

# Display vertical format sample
if "vertical_format_sample" in detailed_results:
st.header("Vertical Format Sample")
st.dataframe(detailed_results["vertical_format_sample"])

# Display transaction table
if transactions_df is not None and not transactions_df.empty:
st.header("Transaction Table")
st.dataframe(transactions_df)
st.write(f"Total unique sequences (customers): {detailed_results['total_sequences']}")
st.write(f"Minimum support threshold: {detailed_results['min_support']}")

# Display Frequent 1-Sequences
if "frequent_1" in detailed_results:
st.header("SPADE Algorithm Results")
st.subheader("Frequent 1-Sequences")
st.dataframe(detailed_results["frequent_1"])

# Display each level of candidate and frequent sequences
for k, candidates_df in detailed_results.get("candidates", []):
st.subheader(f"Generating {k}-Sequences")
st.write(f"Candidate {k}-Sequences:")
st.dataframe(candidates_df)

# Find the corresponding frequent sequences for this k
frequent_df = next((df for level, df in detailed_results.get("frequent", []) if level == k), None)
if frequent_df is not None:
st.write(f"Frequent {k}-Sequences:")
st.dataframe(frequent_df)

# Display all frequent sequences
if not all_frequent_df.empty:
st.subheader("All Frequent Sequences (Ordered by Length)")
st.dataframe(all_frequent_df)
else:
st.write("No frequent sequences found.")

except Exception as e:
st.error(f"An error occurred: {str(e)}")

def main():
st.sidebar.title("Algorithm Selection")
algorithm = st.sidebar.selectbox("Choose an algorithm", ["Apriori Algorithm", "FP-Growth Algorithm", "Apriori Graph Mining", "GSP Algorithm", "GSPan Algorithm"])
algorithm = st.sidebar.selectbox("Choose an algorithm", ["Apriori Algorithm", "FP-Growth Algorithm", "SPADE Algorithm", "Apriori Graph Mining", "GSP Algorithm", "gSpan Algorithm"])
if algorithm == "Apriori Algorithm":
apriori_algorithm_app()
elif algorithm == "FP-Growth Algorithm":
fp_growth_algorithm_app()
elif algorithm == "SPADE Algorithm":
spade_algorithm_app()
elif algorithm == "Apriori Graph Mining":
apriori_graph_mining_app()
elif algorithm == "GSP Algorithm":
gsp_algorithm_app()
elif algorithm == "GSPan Algorithm":
elif algorithm == "gSpan Algorithm":
gspan_algorithm_app()

if __name__ == "__main__":
Expand Down
141 changes: 141 additions & 0 deletions tests/PrescriptiveAnalysis1/test_apriori.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import unittest
import pandas as pd
import sys
import os
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
sys.path.insert(0, project_root)
sys.path.append('../../src') # Relative path from tests/PrescriptiveAnalysis1/ to src/
from src.PrescriptiveAnalysis1.Backend.apriori import AprioriAlgorithm, BusinessRuleGenerator, run_apriori_analysis

class TestApriori(unittest.TestCase):
def setUp(self):
# Sample transactional data
self.transactions = [
{'A', 'B', 'C'},
{'A', 'B'},
{'B', 'C'},
{'A', 'C'},
{'A', 'B', 'D'}
]
self.min_support = 0.4 # 40% (2 out of 5 transactions)
self.min_confidence = 0.5
# Sample DataFrame for run_apriori_analysis
data = {
'INVOICENO': [1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5],
'PRODUCTNAME': ['A', 'B', 'C', 'A', 'B', 'B', 'C', 'A', 'C', 'A', 'B', 'D']
}
self.df = pd.DataFrame(data)

def test_apriori_algorithm_initialization(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
self.assertEqual(apriori.transactions, self.transactions)
self.assertEqual(apriori.min_support, self.min_support)
self.assertEqual(apriori.frequent_patterns, {})

def test_count_item_frequencies(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
candidates = [frozenset({'A'}), frozenset({'B'}), frozenset({'C'}), frozenset({'D'})]
frequent_items = apriori.count_item_frequencies(candidates)
expected = [
(frozenset({'A'}), 4/5),
(frozenset({'B'}), 4/5),
(frozenset({'C'}), 3/5),
]
self.assertEqual(len(frequent_items), 3) # D has support 1/5 < 0.4
for item, support in frequent_items:
self.assertTrue((item, support) in expected)

def test_create_new_combinations(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
prev_frequent = [frozenset({'A'}), frozenset({'B'}), frozenset({'C'})]
new_combinations = apriori.create_new_combinations(prev_frequent, 2)
expected = {frozenset({'A', 'B'}), frozenset({'A', 'C'}), frozenset({'B', 'C'})}
self.assertEqual(new_combinations, expected)

def test_find_frequent_itemsets(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
frequent_patterns = apriori.find_frequent_itemsets()
self.assertIn(1, frequent_patterns)
self.assertIn(2, frequent_patterns)
# Level 1: A, B, C
level_1 = frequent_patterns[1]
self.assertEqual(len(level_1), 3)
expected_1 = {frozenset({'A'}), frozenset({'B'}), frozenset({'C'})}
self.assertTrue(all(item in [x[0] for x in level_1] for item in expected_1))
# Level 2: A,B; A,C; B,C
level_2 = frequent_patterns[2]
self.assertEqual(len(level_2), 3)
expected_2 = {frozenset({'A', 'B'}), frozenset({'A', 'C'}), frozenset({'B', 'C'})}
self.assertTrue(all(item in [x[0] for x in level_2] for item in expected_2))

def test_execute(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
patterns, execution_time = apriori.execute()
self.assertEqual(patterns, apriori.frequent_patterns)
self.assertGreaterEqual(execution_time, 0)
self.assertIn(1, patterns)
self.assertIn(2, patterns)
self.assertEqual(len(patterns[1]), 3) # A, B, C
self.assertEqual(len(patterns[2]), 3) # A,B; A,C; B,C

def test_business_rule_generator(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
frequent_patterns = apriori.find_frequent_itemsets()
rule_generator = BusinessRuleGenerator(frequent_patterns, self.transactions, self.min_confidence)
rules = rule_generator.derive_rules()
self.assertTrue(rules)
# Check a sample rule: A => B
for antecedent, consequent, support, confidence in rules:
if antecedent == 'A' and consequent == 'B':
self.assertAlmostEqual(support, 3/5) # A,B appears in 3 transactions
self.assertAlmostEqual(confidence, (3/5) / (4/5)) # Support(A,B) / Support(A)
self.assertGreaterEqual(confidence, self.min_confidence)

def test_compute_confidence(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
frequent_patterns = apriori.find_frequent_itemsets()
rule_generator = BusinessRuleGenerator(frequent_patterns, self.transactions, self.min_confidence)
confidence = rule_generator.compute_confidence(frozenset({'A'}), frozenset({'B'}))
self.assertAlmostEqual(confidence, (3/5) / (4/5)) # Support(A,B) / Support(A)
confidence = rule_generator.compute_confidence(frozenset({'D'}), frozenset({'A'}))
self.assertEqual(confidence, 0) # D not frequent

def test_fetch_support(self):
apriori = AprioriAlgorithm(self.transactions, self.min_support)
frequent_patterns = apriori.find_frequent_itemsets()
rule_generator = BusinessRuleGenerator(frequent_patterns, self.transactions, self.min_confidence)
support = rule_generator.fetch_support(frozenset({'A', 'B'}))
self.assertAlmostEqual(support, 3/5)
support = rule_generator.fetch_support(frozenset({'A', 'D'}))
self.assertEqual(support, 0) # A,D not frequent

def test_run_apriori_analysis(self):
itemsets_df, rules_df, execution_time, error = run_apriori_analysis(self.df, self.min_support, self.min_confidence)
self.assertIsNone(error)
self.assertIsNotNone(itemsets_df)
self.assertIsNotNone(rules_df)
self.assertGreaterEqual(execution_time, 0)
# Check DataFrame columns
self.assertEqual(list(itemsets_df.columns), ['Level', 'Frequent Itemset', 'Support'])
self.assertEqual(list(rules_df.columns), ['Antecedent', 'Consequent', 'Support', 'Confidence'])
# Verify some frequent itemsets
self.assertTrue(any('A, B' in itemset for itemset in itemsets_df['Frequent Itemset']))
# Verify a rule
self.assertTrue(any((row['Antecedent'] == 'A') & (row['Consequent'] == 'B')
for _, row in rules_df.iterrows()))

def test_run_apriori_analysis_empty(self):
empty_df = pd.DataFrame({'INVOICENO': [], 'PRODUCTNAME': []})
itemsets_df, rules_df, execution_time, error = run_apriori_analysis(empty_df, self.min_support, self.min_confidence)
self.assertEqual(error, "No valid transactions found.")
self.assertIsNone(itemsets_df)
self.assertIsNone(rules_df)
self.assertIsNone(execution_time)

def test_run_apriori_analysis_high_support(self):
apriori = AprioriAlgorithm(self.transactions, 0.9)
patterns = apriori.find_frequent_itemsets()
self.assertEqual(patterns, {}) # No itemsets with support >= 0.9

if __name__ == '__main__':
unittest.main()
Loading