This is a reply to closed issue #279 @AMALSHINY0000.
I have a similar idea and my approach is as follows:
I'm downloading chart data for a stock (in my example NVDA from 2020 until present 1D Timeframe) from TradingView as a .csv file.
time,open,high,low,close,Volume 2020-01-02,5.96875,5.99775,5.918,5.99775,237678760 2020-01-03,5.8775,5.94575,5.8525,5.90175,205772320 2020-01-06,5.808,5.93175,5.78175,5.9265,262912480 2020-01-07,5.955,6.04425,5.90975,5.99825,319205760 2020-01-08,5.994,6.051,5.953725,6.0095,277240680 2020-01-09,6.09625,6.1482425,6.021375,6.0755,255442120 2020-01-10,6.18325,6.21375,6.09375,6.108,316462400 2020-01-13,6.1915,6.32471,6.16875,6.2995,320338040
Than a Python script creates an .md file containing overlapping time windows of OHLCV data with precomputed features (trend, volatility, momentum, return, volume trend, tags).
Example output of the script
`## NVDA | Period: 2020-01-02 → 2020-01-30
Market Structure
Trend: bullish
Volatility: medium
Momentum: increasing
Quantitative Features
return: 0.0296
volatility: 0.0171
range_avg: 0.0216
volume_trend: increasing
Tags
['bullish_trend', 'volume_increase']
Raw Data
2020-01-02 | o:5.97 h:6.00 l:5.92 c:6.00 v:237678760
2020-01-03 | o:5.88 h:5.95 l:5.85 c:5.90 v:205772320
2020-01-06 | o:5.81 h:5.93 l:5.78 c:5.93 v:262912480
2020-01-07 | o:5.96 h:6.04 l:5.91 c:6.00 v:319205760
2020-01-08 | o:5.99 h:6.05 l:5.95 c:6.01 v:277240680
2020-01-09 | o:6.10 h:6.15 l:6.02 c:6.08 v:255442120
2020-01-10 | o:6.18 h:6.21 l:6.09 c:6.11 v:316462400
2020-01-13 | o:6.19 h:6.32 l:6.17 c:6.30 v:320338040
2020-01-14 | o:6.26 h:6.28 l:6.17 c:6.18 v:359579320
2020-01-15 | o:6.19 h:6.22 l:6.11 c:6.14 v:263105440
2020-01-16 | o:6.19 h:6.23 l:6.17 c:6.22 v:284191640
2020-01-17 | o:6.24 h:6.25 l:6.18 c:6.23 v:253790560
2020-01-21 | o:6.20 h:6.23 l:6.16 c:6.20 v:217917000
2020-01-22 | o:6.24 h:6.34 l:6.22 c:6.25 v:239238680
2020-01-23 | o:6.29 h:6.33 l:6.20 c:6.32 v:244514440
2020-01-24 | o:6.44 h:6.49 l:6.21 c:6.26 v:373513560
2020-01-27 | o:5.96 h:6.06 l:5.81 c:6.00 v:470534160
2020-01-28 | o:6.07 h:6.23 l:6.02 c:6.20 v:310976040
2020-01-29 | o:6.18 h:6.22 l:6.10 c:6.14 v:259451160
2020-01-30 | o:6.04 h:6.16 l:5.99 c:6.15 v:290503600
---`
Here is my python code:
import pandas as pd
import numpy as np
=========================
CONFIG
=========================
WINDOW_SIZE = 20
OVERLAP = 5
INPUT_FILE = "nvda.csv"
OUTPUT_FILE = "nvda_rag_ready.md"
=========================
LOAD CSV (ROBUST)
=========================
def load_csv(file_path):
df = pd.read_csv(file_path)
# normalize column names
df.columns = [c.strip().lower() for c in df.columns]
# flexible mapping
col_map = {
"time": ["time", "timestamp", "date"],
"open": ["open"],
"high": ["high"],
"low": ["low"],
"close": ["close"],
"volume": ["volume", "vol"]
}
mapped = {}
for key, options in col_map.items():
for opt in options:
if opt in df.columns:
mapped[key] = opt
break
df = df.rename(columns={v: k for k, v in mapped.items()})
# ensure required columns exist
required = ["time", "open", "high", "low", "close", "volume"]
for r in required:
if r not in df.columns:
raise ValueError(f"Missing column: {r}")
# convert types
df["time"] = df["time"]
df["open"] = df["open"].astype(float)
df["high"] = df["high"].astype(float)
df["low"] = df["low"].astype(float)
df["close"] = df["close"].astype(float)
df["volume"] = df["volume"].astype(float)
return df.reset_index(drop=True)
=========================
FEATURE ENGINEERING
=========================
def compute_features(chunk):
open = chunk["open"]
close = chunk["close"]
high = chunk["high"]
low = chunk["low"]
volume = chunk["volume"]
returns = (close.iloc[-1] - open.iloc[0]) / open.iloc[0]
volatility = close.pct_change().std()
avg_range = ((high - low) / close).mean()
x = np.arange(len(close))
slope = np.polyfit(x, close, 1)[0]
volume_trend = "increasing" if volume.iloc[-1] > volume.iloc[0] else "decreasing"
return {
"return": returns,
"volatility": volatility,
"avg_range": avg_range,
"slope": slope,
"volume_trend": volume_trend
}
=========================
SEMANTICS
=========================
def describe_market(features):
r = features["return"]
slope = features["slope"]
if r > 0.02 and slope > 0:
trend = "bullish"
elif r < -0.02:
trend = "bearish"
else:
trend = "sideways"
if features["volatility"] > 0.02:
vol = "high"
elif features["volatility"] > 0.01:
vol = "medium"
else:
vol = "low"
momentum = "increasing" if slope > 0 else "decreasing"
return trend, vol, momentum
def generate_tags(features, trend):
tags = []
if trend == "bullish":
tags.append("bullish_trend")
elif trend == "bearish":
tags.append("bearish_trend")
if features["volatility"] > 0.02:
tags.append("high_volatility")
if features["volume_trend"] == "increasing":
tags.append("volume_increase")
if abs(features["return"]) > 0.05:
tags.append("strong_move")
return tags
=========================
FORMAT
=========================
def format_chunk(chunk, features, trend, vol, momentum, tags):
start = chunk.iloc[0]["time"]
end = chunk.iloc[-1]["time"]
text = []
text.append(f"## NVDA | Period: {start} → {end}\n")
text.append("### Market Structure")
text.append(f"Trend: {trend}")
text.append(f"Volatility: {vol}")
text.append(f"Momentum: {momentum}\n")
text.append("### Quantitative Features")
text.append(f"return: {features['return']:.4f}")
text.append(f"volatility: {features['volatility']:.4f}")
text.append(f"range_avg: {features['avg_range']:.4f}")
text.append(f"volume_trend: {features['volume_trend']}\n")
text.append("### Tags")
text.append(f"{tags}\n")
text.append("### Raw Data")
for _, row in chunk.iterrows():
line = f"{row['time']} | o:{row['open']:.2f} h:{row['high']:.2f} l:{row['low']:.2f} c:{row['close']:.2f} v:{int(row['volume'])}"
text.append(line)
text.append("\n---\n")
return "\n".join(text)
=========================
PIPELINE
=========================
def build_rag_file(df):
chunks = []
step = WINDOW_SIZE - OVERLAP
for i in range(0, len(df), step):
chunk = df.iloc[i:i + WINDOW_SIZE]
features = compute_features(chunk)
trend, vol, momentum = describe_market(features)
tags = generate_tags(features, trend)
formatted = format_chunk(chunk, features, trend, vol, momentum, tags)
chunks.append(formatted)
return "\n".join(chunks)
=========================
RUN
=========================
if name == "main":
df = load_csv(INPUT_FILE)
rag_text = build_rag_file(df)
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
f.write(rag_text)
print("RAG-ready Markdown created:", OUTPUT_FILE)
Maybe this approach will help you.
But you have to edit the entire ontology generator, also the oasis profile generator
Best Regards
This is a reply to closed issue #279 @AMALSHINY0000.
I have a similar idea and my approach is as follows:
I'm downloading chart data for a stock (in my example NVDA from 2020 until present 1D Timeframe) from TradingView as a .csv file.
time,open,high,low,close,Volume 2020-01-02,5.96875,5.99775,5.918,5.99775,237678760 2020-01-03,5.8775,5.94575,5.8525,5.90175,205772320 2020-01-06,5.808,5.93175,5.78175,5.9265,262912480 2020-01-07,5.955,6.04425,5.90975,5.99825,319205760 2020-01-08,5.994,6.051,5.953725,6.0095,277240680 2020-01-09,6.09625,6.1482425,6.021375,6.0755,255442120 2020-01-10,6.18325,6.21375,6.09375,6.108,316462400 2020-01-13,6.1915,6.32471,6.16875,6.2995,320338040Than a Python script creates an .md file containing overlapping time windows of OHLCV data with precomputed features (trend, volatility, momentum, return, volume trend, tags).
Example output of the script
`## NVDA | Period: 2020-01-02 → 2020-01-30
Market Structure
Trend: bullish
Volatility: medium
Momentum: increasing
Quantitative Features
return: 0.0296
volatility: 0.0171
range_avg: 0.0216
volume_trend: increasing
Tags
['bullish_trend', 'volume_increase']
Raw Data
2020-01-02 | o:5.97 h:6.00 l:5.92 c:6.00 v:237678760
2020-01-03 | o:5.88 h:5.95 l:5.85 c:5.90 v:205772320
2020-01-06 | o:5.81 h:5.93 l:5.78 c:5.93 v:262912480
2020-01-07 | o:5.96 h:6.04 l:5.91 c:6.00 v:319205760
2020-01-08 | o:5.99 h:6.05 l:5.95 c:6.01 v:277240680
2020-01-09 | o:6.10 h:6.15 l:6.02 c:6.08 v:255442120
2020-01-10 | o:6.18 h:6.21 l:6.09 c:6.11 v:316462400
2020-01-13 | o:6.19 h:6.32 l:6.17 c:6.30 v:320338040
2020-01-14 | o:6.26 h:6.28 l:6.17 c:6.18 v:359579320
2020-01-15 | o:6.19 h:6.22 l:6.11 c:6.14 v:263105440
2020-01-16 | o:6.19 h:6.23 l:6.17 c:6.22 v:284191640
2020-01-17 | o:6.24 h:6.25 l:6.18 c:6.23 v:253790560
2020-01-21 | o:6.20 h:6.23 l:6.16 c:6.20 v:217917000
2020-01-22 | o:6.24 h:6.34 l:6.22 c:6.25 v:239238680
2020-01-23 | o:6.29 h:6.33 l:6.20 c:6.32 v:244514440
2020-01-24 | o:6.44 h:6.49 l:6.21 c:6.26 v:373513560
2020-01-27 | o:5.96 h:6.06 l:5.81 c:6.00 v:470534160
2020-01-28 | o:6.07 h:6.23 l:6.02 c:6.20 v:310976040
2020-01-29 | o:6.18 h:6.22 l:6.10 c:6.14 v:259451160
2020-01-30 | o:6.04 h:6.16 l:5.99 c:6.15 v:290503600
---`
Here is my python code:
import pandas as pd
import numpy as np
=========================
CONFIG
=========================
WINDOW_SIZE = 20
OVERLAP = 5
INPUT_FILE = "nvda.csv"
OUTPUT_FILE = "nvda_rag_ready.md"
=========================
LOAD CSV (ROBUST)
=========================
def load_csv(file_path):
df = pd.read_csv(file_path)
=========================
FEATURE ENGINEERING
=========================
def compute_features(chunk):
open = chunk["open"]
close = chunk["close"]
high = chunk["high"]
low = chunk["low"]
volume = chunk["volume"]
=========================
SEMANTICS
=========================
def describe_market(features):
r = features["return"]
slope = features["slope"]
def generate_tags(features, trend):
tags = []
=========================
FORMAT
=========================
def format_chunk(chunk, features, trend, vol, momentum, tags):
start = chunk.iloc[0]["time"]
end = chunk.iloc[-1]["time"]
=========================
PIPELINE
=========================
def build_rag_file(df):
chunks = []
step = WINDOW_SIZE - OVERLAP
for i in range(0, len(df), step):
chunk = df.iloc[i:i + WINDOW_SIZE]
features = compute_features(chunk)
trend, vol, momentum = describe_market(features)
tags = generate_tags(features, trend)
=========================
RUN
=========================
if name == "main":
df = load_csv(INPUT_FILE)
rag_text = build_rag_file(df)
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
f.write(rag_text)
Maybe this approach will help you.
But you have to edit the entire ontology generator, also the oasis profile generator
Best Regards