Extracting Ingested Data to CSV


#1

Hello,
Is there a built-in method to extract the ingested data to csv? Currently, I can only think of output-ing the data to csv as part of a backtest run, but my implementation of it is very clumsy.
Does anyone have a good way to do that?

Thanks!


#2

I haven’t seen a built-in method, but you can get a dataframe from data.history() and then convert the dataframe to a csv


#3

Hi @dumberchild,

There is no built in method for that but you can refer to this section in the docs for ideas on how to save market data to a csv.

Lena


#4

I’m having some trouble with this as well. I am following the Catalyst docs and I am able to save the data into a csv, but it is not being accessed in my algorithm file and instead using the ingested values I believe, as I have not seen any performance improvement, it takes 3 hours to run a year backtest with 1D timeframe.

# Simple momentum script.  When the stock goes up quickly, we're going to buy; when 
#it goes down quickly, we're going to sell.  
import os
import tempfile
import time

import numpy as np
import pandas as pd
import talib
from logbook import Logger
from datetime import timedelta

from catalyst import run_algorithm
from catalyst.api import symbol, record, order_target_percent
from catalyst.exchange.utils.stats_utils import extract_transactions
# Catalyst will create the `.catalyst/data/live_algos`
# directory. If we stop and start the algorithm, Catalyst will resume its
# state using the files included in the folder.
from catalyst.utils.paths import ensure_directory

NAMESPACE = 'RSI'
log = Logger(NAMESPACE)


# To run an algorithm in Catalyst, you need two functions: initialize and
# handle_data.

def initialize(context):
    # This initialize function sets any data or variables that you'll use in
    # your algorithm.  For instance, you'll want to define the trading pair (or
    # trading pairs) you want to backtest.  You'll also want to define any
    # parameters or values you're going to use.
    
    context.data=pd.read_csv("grabdata.csv")
    data = context.data
    data.head()
    #print (data)
    
    # In our example, we're looking at BTC in USD
    context.market = symbol('btc_usd')
    context.asset = symbol('btc_usd')
    context.base_price = None
    context.current_day = None

    context.RSI_OVERSOLD = 30 #Very oversold conditions (buy)
    context.RSI_OVERBOUGHT = 70 #Very overbought conditions (sell)
    context.CANDLE_SIZE = '1D' #15T = 15min

    context.start_time = time.time()

    # Fees and slippage
    context.set_commission(maker=0.001, taker=0.002)
    context.set_slippage(slippage=0.001)
    
    context.i = 0

### ****ALGORITHMS****
def handle_data(context, data):
    # This handle_data function is where the real work is done.  Our data is
    # minute-level tick data, and each minute is called a frame.  This function
    # runs on each frame of the data.
    
    open = data.current(context.asset, 'open')
    high = data.current(context.asset, 'high')
    low = data.current(context.asset, 'low')
    close = data.current(context.asset, 'close')
    volume = data.current(context.asset, 'volume')

    # We flag the first period of each day.
    # Since cryptocurrencies trade 24/7 the `before_trading_starts` handle
    # would only execute once. This method works with minute and daily
    # frequencies.
    today = data.current_dt.floor('1D')
    if today != context.current_day:
        context.traded_today = False
        context.current_day = today

    # We're computing the volume-weighted-average-price of the asset
    # defined above, in the context.market variable.  For this example, we're
    # using three bars on the 15 min bars.

    # The frequency attribute determine the bar size. We use this convention
    # for the frequency alias:
    # http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    prices = data.history(
        context.market,
        fields='close',
        bar_count=50,
        frequency=context.CANDLE_SIZE
    )

    # Ta-lib calculates various technical indicator based on price and
    # volume arrays.
    
    # define the windows for the moving averages
    short_window = 50
    long_window = 200
    
    # Skip as many bars as long_window to properly compute the average
    context.i += 1
    if context.i < long_window:
        return
    
    # Compute moving averages calling data.history() for each
    # moving average with the appropriate parameters. We choose to use
    # minute bars for this simulation -> freq="1m"
    # Returns a pandas dataframe.
    short_data = data.history(context.asset,
                              'price',
                              bar_count=short_window,
                              frequency="1T",
                              )
    short_mavg = short_data.mean()
    long_data = data.history(context.asset,
                             'price',
                             bar_count=long_window,
                             frequency="1T",
                             )
    long_mavg = long_data.mean()

    # Simple RSI indicator
    rsi = talib.RSI(prices.values, timeperiod=14)

    # We need a variable for the current price of the asset to compare to
    # the average. Since we are requesting two fields, data.current()
    # returns a DataFrame with
    current = data.current(context.market, fields=['open','high','low','close', 'volume'])
    close_price = current['close']
    current_price = data.current(context.asset, 'price')

    # If base_price is not set, we use the current value. This is the
    # price at the first bar which we reference to calculate price_change.
    if context.base_price is None:
        context.base_price = close_price

    price_change = (close_price - context.base_price) / context.base_price
    cash = context.portfolio.cash

    # Now that we've collected all current data for this frame, we use
    # the record() method to save it. This data will be available as
    # a parameter of the analyze() function for further analysis.

    record(
        volume=current['volume'],
        open=open,
        high=high,
        low=low,
        close_price=close,
        current_price=current_price,
        price_change=price_change,
        rsi=rsi[-1],
        cash=cash
    )
    
    # We are trying to avoid over-trading by limiting our trades to
    # one per day.
    if context.traded_today:
        return

    # TODO: retest with open orders
    # Since we are using limit orders, some orders may not execute immediately
    # we wait until all orders are executed before considering more trades.
    orders = context.blotter.open_orders
    if len(orders) > 0:
        log.info('exiting because orders are open: {}'.format(orders))
        return

    # Exit if we cannot trade
    if not data.can_trade(context.market):
        return

    # Another powerful built-in feature of the Catalyst backtester is the
    # portfolio object.  The portfolio object tracks your positions, cash,
    # cost basis of specific holdings, and more.  In this line, we calculate
    # how long or short our position are at this minute.
    pos_amount = context.portfolio.positions[context.market].amount

    # Trading Logic. Buy if OVERSOLD, Sell if OVERBOUGHT
    if rsi[-1] <= context.RSI_OVERSOLD and pos_amount == 0: #BUY
        log.info(
            '{}: \nBuying Price: {}, \nRSI: {}'.format(
                data.current_dt, close_price, rsi[-1]
            )
        )
        # Set a style for limit orders,
        limit_price = close_price * 1.005
        order_target_percent(
            context.market, 1, limit_price=limit_price
        )
        context.traded_today = True

    elif rsi[-1] >= context.RSI_OVERBOUGHT and pos_amount > 0: #SELL
        log.info(
            '{}: \nSelling Price: {}, \nRSI: {}'.format(
                data.current_dt, close_price, rsi[-1]
            )
        )
        limit_price = close_price * 0.995
        order_target_percent(
            context.market, 0, limit_price=limit_price
        )
        context.traded_today = True

#5

See here: sam31415/catalyst/blob/develop/research/data_accessor.py