Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 64 additions & 43 deletions data/tiingo_data_fetcher.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import time

import pandas as pd
import requests
from dotenv import load_dotenv
from tqdm import tqdm

# Load the .env.local file if it exists, otherwise load .env
if os.path.exists(".env.local"):
Expand Down Expand Up @@ -76,56 +78,75 @@ def fetch_tiingo_stock_data(self, symbol, start_date, end_date, frequency="daily
return df

def fetch_tiingo_crypto_data(self, symbol, start_date, end_date, frequency="5min"):
"""Fetch historical cryptocurrency data from Tiingo."""
"""Fetch historical cryptocurrency data from Tiingo in 3-day batches."""

filename = self._generate_filename(symbol, start_date, end_date, frequency)

# Check if the CSV file already exists
if os.path.exists(filename):
print(f"Loading stock data from {filename}...")
print(f"Loading crypto data from {filename}...")
return pd.read_csv(filename)

# Define the URL, headers, and parameters for the request
url = f"{BASE_APIURL}/tiingo/crypto/prices"
headers = {
"Content-Type": "application/json",
"Authorization": f"Token {TIINGO_API_KEY}",
}
params = {
"tickers": symbol,
"startDate": start_date,
"endDate": end_date,
# The minimum value is "1min". Units in minutes (min), hours (hour), and days (day) are accepted.
# Format is # + (min/hour/day); e.g. "15min", "4hour" or "1day".
# If no value is provided, defaults to 5min.
"resampleFreq": frequency,
}

# Send request to Tiingo API
try:
response = requests.get(url, headers=headers, params=params, timeout=10)
response.raise_for_status() # Raise an exception for HTTP errors
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
return pd.DataFrame()

# Parse JSON response
try:
data = response.json()
if not data or "priceData" not in data[0]:
print(f"No crypto data found for {symbol}")
return pd.DataFrame()
except (ValueError, KeyError) as e:
print(f"Error parsing response data: {e}")
return pd.DataFrame()

df = self._normalize_tiingo_data(data[0]["priceData"], symbol)

# Save the fetched data to CSV
print(f"Saving crypto data to {filename}...")
df.to_csv(filename, index=False)

return df
# Convert dates to pandas datetime for easier manipulation
start = pd.to_datetime(start_date)
end = pd.to_datetime(end_date)

# Calculate total number of 3-day periods
total_days = (end - start).days
total_batches = (total_days + 2) // 3 # Round up to nearest batch

# Initialize empty DataFrame for all results
all_data = pd.DataFrame()

# Process in 3-day chunks with progress bar
with tqdm(total=total_batches, desc=f"Fetching {symbol} data") as pbar:
current_start = start
while current_start < end:
current_end = min(current_start + pd.Timedelta(days=3), end)

# Define the URL, headers, and parameters for the request
url = f"{BASE_APIURL}/tiingo/crypto/prices"
headers = {
"Content-Type": "application/json",
"Authorization": f"Token {TIINGO_API_KEY}",
}
params = {
"tickers": symbol,
"startDate": current_start.strftime("%Y-%m-%d"),
"endDate": current_end.strftime("%Y-%m-%d"),
"resampleFreq": frequency,
}

# Send request to Tiingo API
try:
response = requests.get(url, headers=headers, params=params, timeout=10)
response.raise_for_status()

data = response.json()
if data and "priceData" in data[0]:
batch_df = self._normalize_tiingo_data(data[0]["priceData"], symbol)
all_data = pd.concat([all_data, batch_df], ignore_index=True)

# Add a small delay to avoid hitting rate limits
time.sleep(0.5)

except requests.exceptions.RequestException as e:
print(f"\nRequest error for period {current_start.date()} to {current_end.date()}: {e}")
except (ValueError, KeyError) as e:
print(f"\nError parsing response data: {e}")

current_start = current_end
pbar.update(1)

# Remove any duplicate rows that might occur at batch boundaries
all_data = all_data.drop_duplicates()

if not all_data.empty:
# Save the fetched data to CSV
print(f"Saving crypto data to {filename}...")
all_data.to_csv(filename, index=False)

return all_data

def _normalize_tiingo_data(self, data, asset_name):
"""Normalize Tiingo stock data to match the required schema."""
Expand Down
27 changes: 27 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# conda activate modelmaker
# pip install setuptools==72.1.0 Cython==3.0.11 numpy==1.24.3
# pip install -r requirements.txt
import os
import sys
from datetime import datetime, timedelta

Expand Down Expand Up @@ -81,6 +82,32 @@ def select_data(fetcher, default_selection=None, file_path=None):

if selection == "3":
print("You selected to load data from a CSV file.")

# Add numbered directory listing functionality
sets_dir = os.path.join(os.path.dirname(__file__), "data", "sets")
if os.path.exists(sets_dir):
print("\nAvailable files in data/sets:")
csv_files = [f for f in os.listdir(sets_dir) if f.endswith('.csv')]

if not csv_files:
print("No CSV files found in data/sets directory.")
else:
for idx, file in enumerate(csv_files, 1):
print(f"{idx}. {file}")
print()

if file_path is None:
file_selection = input("Enter the number of the file or provide a custom path: ").strip()
try:
file_idx = int(file_selection)
if 1 <= file_idx <= len(csv_files):
file_path = os.path.join(sets_dir, csv_files[file_idx - 1])
else:
print_colored("Invalid file number. Using provided path as-is.", "warning")
except ValueError:
# If input is not a number, treat it as a custom path
file_path = file_selection

if file_path is None:
file_path = input("Enter the CSV file path: ").strip()
return CSVLoader.load_csv(file_path)
Expand Down