Skip to content

Commit 42a3850

Browse files
authored
Merge pull request #24 from TCS-2021/PredictiveAnalysis2
Predictive Analytics 2 - Implemented NER-LSTM for Claim Appeals and LSTM for Stock Prediction
2 parents 1d6e589 + 4d6464a commit 42a3850

14 files changed

Lines changed: 62720 additions & 64 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,4 @@ cython_debug/
172172

173173
# PyPI configuration file
174174
.pypirc
175+
.DS_Store
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Dear Sir/Madam,
2+
I am writing to appeal the denial of claim CLM654321. The reason provided was "Pre-authorization required". The treatment was provided by Dr. Alan Moore under Cigna.
3+
Please reconsider this decision.
4+
Sincerely,
5+
Jane Doe
Binary file not shown.
Binary file not shown.

Datasets/predictive-analytics2/stocks-datasets/nifty50_5y_full_data.csv

Lines changed: 61851 additions & 0 deletions
Large diffs are not rendered by default.

requirements.txt

Lines changed: 43 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,43 @@
1-
altair==5.5.0
2-
astroid==3.3.9
3-
attrs==25.3.0
4-
blinker==1.9.0
5-
cachetools==5.5.2
6-
certifi==2025.1.31
7-
charset-normalizer==3.4.1
8-
click==8.1.8
9-
colorama==0.4.6
10-
dill==0.3.9
11-
et_xmlfile==2.0.0
12-
exceptiongroup==1.2.2
13-
gitdb==4.0.12
14-
GitPython==3.1.44
15-
idna==3.10
16-
iniconfig==2.1.0
17-
isort==6.0.1
18-
Jinja2==3.1.6
19-
joblib==1.4.2
20-
jsonschema==4.23.0
21-
jsonschema-specifications==2024.10.1
22-
markdown-it-py==3.0.0
23-
MarkupSafe==3.0.2
24-
mccabe==0.7.0
25-
mdurl==0.1.2
26-
narwhals==1.32.0
27-
numpy==2.2.4
28-
openpyxl==3.1.5
29-
packaging==24.2
30-
pandas==2.2.3
31-
patsy==1.0.1
32-
pillow==11.1.0
33-
platformdirs==4.3.7
34-
plotly==6.0.1
35-
pluggy==1.5.0
36-
prettytable==3.16.0
37-
protobuf==5.29.4
38-
pyarrow==19.0.1
39-
pydeck==0.9.1
40-
Pygments==2.19.1
41-
pylint==3.3.6
42-
pytest==8.3.5
43-
python-dateutil==2.9.0.post0
44-
pytz==2025.2
45-
referencing==0.36.2
46-
requests==2.32.3
47-
rich==13.9.4
48-
rpds-py==0.24.0
49-
scikit-learn==1.6.1
50-
scipy==1.15.2
51-
six==1.17.0
52-
smmap==5.0.2
53-
statsmodels==0.14.4
54-
streamlit==1.44.0
55-
tenacity==9.0.0
56-
threadpoolctl==3.6.0
57-
toml==0.10.2
58-
tomli==2.2.1
59-
tomlkit==0.13.2
60-
tornado==6.4.2
61-
typing_extensions==4.13.0
62-
tzdata==2025.2
63-
urllib3==2.3.0
64-
watchdog==6.0.0
1+
absl-py==2.0.0
2+
astunparse==1.6.3
3+
cachetools==5.3.0
4+
certifi==2022.12.7
5+
chardet==4.0.0
6+
charset-normalizer==3.1.0
7+
Faker==25.9.1
8+
flatbuffers==25.2.10
9+
gast==0.4.0
10+
google-pasta==0.2.0
11+
grpcio==1.71.0
12+
h5py==3.13.0
13+
idna==2.10
14+
ipykernel==6.25.1
15+
ipython==8.14.0
16+
Jinja2==3.1.2
17+
keras==3.9.2
18+
MarkupSafe==2.1.3
19+
ml-dtypes==0.4.1
20+
numpy==1.26.4
21+
opt-einsum==3.4.0
22+
pandas==2.0.2
23+
plotly==5.16.1
24+
protobuf==3.20.3
25+
pylint==3.3.6
26+
pytest==8.3.5
27+
PyPDF2==3.0.1
28+
python-docx==1.1.2
29+
requests==2.31.0
30+
scikit-learn==1.3.0
31+
six==1.16.0
32+
statsmodels==0.14.4
33+
streamlit==1.41.0
34+
tensorboard==2.18.0
35+
tensorboard-data-server==0.7.2
36+
tensorboard-plugin-wit==1.8.1
37+
tensorflow==2.18.0
38+
tensorflow-io-gcs-filesystem==0.31.0
39+
typing_extensions==4.12.2
40+
urllib3==1.26.15
41+
Werkzeug==3.0.5
42+
wrapt==1.15.0
43+
yfinance==0.2.55
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
data_utils.py file for Streamlit application
3+
"""
4+
5+
import pandas as pd
6+
import yfinance as yf
7+
import streamlit as st
8+
9+
def load_full_data(file):
10+
"""Load and preprocess historical stock data from CSV"""
11+
data_frame = pd.read_csv(file, parse_dates=['Date'], dayfirst=True)
12+
data_frame.columns = data_frame.columns.str.strip()
13+
data_frame['Date'] = pd.to_datetime(data_frame['Date'], format='%d-%m-%Y', errors='coerce')
14+
data_frame = data_frame.dropna(subset=['Date']).sort_values('Date')
15+
data_frame.set_index('Date', inplace=True)
16+
17+
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
18+
data_frame[numeric_cols] = data_frame[numeric_cols].apply(pd.to_numeric, errors='coerce')
19+
data_frame = data_frame.dropna(subset=numeric_cols)
20+
21+
return data_frame[['Stock Name'] + numeric_cols]
22+
23+
@st.cache_data(ttl=300)
24+
def get_realtime_data():
25+
"""Fetch recent weekly NIFTY50 data"""
26+
ticker_data = yf.Ticker("^NSEI")
27+
return ticker_data.history(period="5y", interval="1wk")
28+
29+
30+
@st.cache_data(ttl=300)
31+
def get_realtime_daily_data():
32+
"""Fetch recent daily NIFTY50 data"""
33+
ticker_data = yf.Ticker("^NSEI")
34+
return ticker_data.history(period="2y", interval="1d")
35+
36+
@st.cache_data
37+
def load_and_cache_file(uploaded_file):
38+
"""Cache uploaded CSV file content"""
39+
return load_full_data(uploaded_file)
120 Bytes
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
�K(.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
model_utils.py file for Streamlit application
3+
"""
4+
5+
import numpy as np
6+
from tensorflow import keras
7+
8+
def create_sequences(data, sequence_length, feature_index):
9+
"""Create LSTM sequences from time series data"""
10+
x_seq, y_seq = [], []
11+
for i in range(len(data) - sequence_length):
12+
window = data[i:i + sequence_length]
13+
target = data[i + sequence_length, feature_index]
14+
if not np.any(np.isnan(window)) and not np.isnan(target):
15+
x_seq.append(window)
16+
y_seq.append(target)
17+
return np.array(x_seq), np.array(y_seq)
18+
19+
20+
def build_lstm_model(input_shape):
21+
"""Build a simple stacked LSTM model"""
22+
keras.backend.clear_session()
23+
model = keras.Sequential([
24+
keras.layers.LSTM(50, return_sequences=True, input_shape=input_shape),
25+
keras.layers.Dropout(0.2),
26+
keras.layers.LSTM(50, return_sequences=False),
27+
keras.layers.Dropout(0.2),
28+
keras.layers.Dense(25),
29+
keras.layers.Dense(1)
30+
])
31+
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error')
32+
return model
33+
34+
35+
def build_bidirectional_lstm(sequence_length):
36+
"""Build improved bidirectional LSTM model"""
37+
model = keras.Sequential([
38+
keras.layers.Bidirectional(
39+
keras.layers.LSTM(128, return_sequences=True),
40+
input_shape=(sequence_length, 1)),
41+
keras.layers.Dropout(0.3),
42+
keras.layers.Bidirectional(keras.layers.LSTM(64)),
43+
keras.layers.Dropout(0.3),
44+
keras.layers.Dense(32, activation='relu'),
45+
keras.layers.Dense(1)
46+
])
47+
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error')
48+
return model
49+
50+
51+
def rolling_forecast(model, normalized_data, sequence_length):
52+
"""Perform rolling forecast using trained model"""
53+
predictions = []
54+
if len(normalized_data) < sequence_length:
55+
return predictions
56+
57+
last_sequence = normalized_data[:sequence_length].reshape(1, sequence_length, -1)
58+
59+
for i in range(sequence_length, len(normalized_data)):
60+
prediction = model.predict(last_sequence, verbose=0)[0][0]
61+
predictions.append(prediction)
62+
next_input = normalized_data[i].reshape(1, 1, -1)
63+
last_sequence = np.append(last_sequence[:, 1:, :], next_input, axis=1)
64+
65+
return predictions

0 commit comments

Comments
 (0)