Skip to content

Stock & Crypto Market Prediction Capability Reply #279 #535

@thomasfischer089

Description

@thomasfischer089

This is a reply to closed issue #279 @AMALSHINY0000.

I have a similar idea and my approach is as follows:
I'm downloading chart data for a stock (in my example NVDA from 2020 until present 1D Timeframe) from TradingView as a .csv file.
time,open,high,low,close,Volume 2020-01-02,5.96875,5.99775,5.918,5.99775,237678760 2020-01-03,5.8775,5.94575,5.8525,5.90175,205772320 2020-01-06,5.808,5.93175,5.78175,5.9265,262912480 2020-01-07,5.955,6.04425,5.90975,5.99825,319205760 2020-01-08,5.994,6.051,5.953725,6.0095,277240680 2020-01-09,6.09625,6.1482425,6.021375,6.0755,255442120 2020-01-10,6.18325,6.21375,6.09375,6.108,316462400 2020-01-13,6.1915,6.32471,6.16875,6.2995,320338040

Than a Python script creates an .md file containing overlapping time windows of OHLCV data with precomputed features (trend, volatility, momentum, return, volume trend, tags).

Example output of the script
`## NVDA | Period: 2020-01-02 → 2020-01-30

Market Structure

Trend: bullish
Volatility: medium
Momentum: increasing

Quantitative Features

return: 0.0296
volatility: 0.0171
range_avg: 0.0216
volume_trend: increasing

Tags

['bullish_trend', 'volume_increase']

Raw Data

2020-01-02 | o:5.97 h:6.00 l:5.92 c:6.00 v:237678760
2020-01-03 | o:5.88 h:5.95 l:5.85 c:5.90 v:205772320
2020-01-06 | o:5.81 h:5.93 l:5.78 c:5.93 v:262912480
2020-01-07 | o:5.96 h:6.04 l:5.91 c:6.00 v:319205760
2020-01-08 | o:5.99 h:6.05 l:5.95 c:6.01 v:277240680
2020-01-09 | o:6.10 h:6.15 l:6.02 c:6.08 v:255442120
2020-01-10 | o:6.18 h:6.21 l:6.09 c:6.11 v:316462400
2020-01-13 | o:6.19 h:6.32 l:6.17 c:6.30 v:320338040
2020-01-14 | o:6.26 h:6.28 l:6.17 c:6.18 v:359579320
2020-01-15 | o:6.19 h:6.22 l:6.11 c:6.14 v:263105440
2020-01-16 | o:6.19 h:6.23 l:6.17 c:6.22 v:284191640
2020-01-17 | o:6.24 h:6.25 l:6.18 c:6.23 v:253790560
2020-01-21 | o:6.20 h:6.23 l:6.16 c:6.20 v:217917000
2020-01-22 | o:6.24 h:6.34 l:6.22 c:6.25 v:239238680
2020-01-23 | o:6.29 h:6.33 l:6.20 c:6.32 v:244514440
2020-01-24 | o:6.44 h:6.49 l:6.21 c:6.26 v:373513560
2020-01-27 | o:5.96 h:6.06 l:5.81 c:6.00 v:470534160
2020-01-28 | o:6.07 h:6.23 l:6.02 c:6.20 v:310976040
2020-01-29 | o:6.18 h:6.22 l:6.10 c:6.14 v:259451160
2020-01-30 | o:6.04 h:6.16 l:5.99 c:6.15 v:290503600

---`

Here is my python code:
import pandas as pd
import numpy as np

=========================

CONFIG

=========================

WINDOW_SIZE = 20
OVERLAP = 5

INPUT_FILE = "nvda.csv"
OUTPUT_FILE = "nvda_rag_ready.md"

=========================

LOAD CSV (ROBUST)

=========================

def load_csv(file_path):
df = pd.read_csv(file_path)

# normalize column names
df.columns = [c.strip().lower() for c in df.columns]

# flexible mapping
col_map = {
    "time": ["time", "timestamp", "date"],
    "open": ["open"],
    "high": ["high"],
    "low": ["low"],
    "close": ["close"],
    "volume": ["volume", "vol"]
}

mapped = {}
for key, options in col_map.items():
    for opt in options:
        if opt in df.columns:
            mapped[key] = opt
            break

df = df.rename(columns={v: k for k, v in mapped.items()})

# ensure required columns exist
required = ["time", "open", "high", "low", "close", "volume"]
for r in required:
    if r not in df.columns:
        raise ValueError(f"Missing column: {r}")

# convert types
df["time"] = df["time"]
df["open"] = df["open"].astype(float)
df["high"] = df["high"].astype(float)
df["low"] = df["low"].astype(float)
df["close"] = df["close"].astype(float)
df["volume"] = df["volume"].astype(float)

return df.reset_index(drop=True)

=========================

FEATURE ENGINEERING

=========================

def compute_features(chunk):
open = chunk["open"]
close = chunk["close"]
high = chunk["high"]
low = chunk["low"]
volume = chunk["volume"]

returns = (close.iloc[-1] - open.iloc[0]) / open.iloc[0]
volatility = close.pct_change().std()
avg_range = ((high - low) / close).mean()

x = np.arange(len(close))
slope = np.polyfit(x, close, 1)[0]

volume_trend = "increasing" if volume.iloc[-1] > volume.iloc[0] else "decreasing"

return {
    "return": returns,
    "volatility": volatility,
    "avg_range": avg_range,
    "slope": slope,
    "volume_trend": volume_trend
}

=========================

SEMANTICS

=========================

def describe_market(features):
r = features["return"]
slope = features["slope"]

if r > 0.02 and slope > 0:
    trend = "bullish"
elif r < -0.02:
    trend = "bearish"
else:
    trend = "sideways"

if features["volatility"] > 0.02:
    vol = "high"
elif features["volatility"] > 0.01:
    vol = "medium"
else:
    vol = "low"

momentum = "increasing" if slope > 0 else "decreasing"

return trend, vol, momentum

def generate_tags(features, trend):
tags = []

if trend == "bullish":
    tags.append("bullish_trend")
elif trend == "bearish":
    tags.append("bearish_trend")

if features["volatility"] > 0.02:
    tags.append("high_volatility")

if features["volume_trend"] == "increasing":
    tags.append("volume_increase")

if abs(features["return"]) > 0.05:
    tags.append("strong_move")

return tags

=========================

FORMAT

=========================

def format_chunk(chunk, features, trend, vol, momentum, tags):
start = chunk.iloc[0]["time"]
end = chunk.iloc[-1]["time"]

text = []
text.append(f"## NVDA | Period: {start} → {end}\n")

text.append("### Market Structure")
text.append(f"Trend: {trend}")
text.append(f"Volatility: {vol}")
text.append(f"Momentum: {momentum}\n")

text.append("### Quantitative Features")
text.append(f"return: {features['return']:.4f}")
text.append(f"volatility: {features['volatility']:.4f}")
text.append(f"range_avg: {features['avg_range']:.4f}")
text.append(f"volume_trend: {features['volume_trend']}\n")

text.append("### Tags")
text.append(f"{tags}\n")

text.append("### Raw Data")
for _, row in chunk.iterrows():
    line = f"{row['time']} | o:{row['open']:.2f} h:{row['high']:.2f} l:{row['low']:.2f} c:{row['close']:.2f} v:{int(row['volume'])}"
    text.append(line)

text.append("\n---\n")
return "\n".join(text)

=========================

PIPELINE

=========================

def build_rag_file(df):
chunks = []
step = WINDOW_SIZE - OVERLAP
for i in range(0, len(df), step):
chunk = df.iloc[i:i + WINDOW_SIZE]
features = compute_features(chunk)
trend, vol, momentum = describe_market(features)
tags = generate_tags(features, trend)

    formatted = format_chunk(chunk, features, trend, vol, momentum, tags)
    chunks.append(formatted)

return "\n".join(chunks)

=========================

RUN

=========================

if name == "main":
df = load_csv(INPUT_FILE)
rag_text = build_rag_file(df)
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
f.write(rag_text)

print("RAG-ready Markdown created:", OUTPUT_FILE)

Maybe this approach will help you.
But you have to edit the entire ontology generator, also the oasis profile generator

Best Regards

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions