Simple machine learning strategy


#1

This is basically a slightly modified (in order to work properly) version of a machine learning strategy I found in the Quantopian forums, and as you’ll see from backtesting, it doesn’t work particularly well, but I figured it would be of interest, and possibly a good starting point for setting up a decent machine learning strategy.

import math as _math
import numbers as _numbers
import numpy as np
import pandas as pd
import os
import tempfile
import time

from sklearn.ensemble import RandomForestRegressor
from catalyst import run_algorithm
from catalyst.api import cancel_order, date_rules, get_open_orders, get_order, order_target, 
order_target_percent, order_target_value, record, schedule_function, symbol, time_rules
from catalyst.exchange.utils.stats_utils import extract_transactions
from catalyst.utils.paths import ensure_directory
from logbook import Logger
from decimal import *

algo_namespace = 'BTCUSD'
log = Logger(algo_namespace)

def initialize(context):

    getcontext().prec = 8
    context.market = symbol('btc_usd')
    context.candle_size = '1D'
    context.model = RandomForestRegressor()
    context.lookback = 7
    context.price = 'price'
    context.zero = 0
    context.price_history = 180
    context.buy_pct = 1
    context.sell_pct = 0
    context.set_commission(maker=0.001, taker=0.002)
    context.start_time = time.time()
    context.current = 1

    schedule_function(rebalance, date_rules.every_day(), time_rules.market_close())

def rebalance(context, data):

    price = data.history(context.market, context.price, context.price_history, context.candle_size)
    current_price = data.current(context.market, context.price)
    pos_amount = context.portfolio.positions[context.market].amount
    cash = context.portfolio.cash
    value = context.portfolio.portfolio_value

    price_change = np.diff(price.values).tolist()

    X = [] 
    Y = []

    for i in range(context.price_history-context.lookback-context.current):
        X.append(price_change[i:i+context.lookback])
        Y.append(price_change[i+context.lookback])
    
    context.model.fit(X, Y)

    if context.model:
    
        price = data.history(context.market, context.price, context.lookback+context.current, context.candle_size)
    
        price_change = np.diff(price.values).tolist()
    
        prediction = context.model.predict([price_change])
        prediction = float(prediction)
        prediction_price = prediction + current_price
    
    if prediction > context.zero and pos_amount == context.zero:
        order_target_percent(context.market, context.buy_pct)
            
    if prediction < context.zero and pos_amount > context.zero:
        order_target_percent(context.market, context.sell_pct)

    record(cash=cash, price=current_price, prediction_price = prediction_price)
    
def analyze(context=None, perf=None):
    end = time.time()
    log.info('elapsed time: {}'.format(end - context.start_time))

    import matplotlib.pyplot as plt

    quote_currency = list(context.exchanges.values())[0].quote_currency.upper()

    ax1 = plt.subplot(611)
    perf.loc[:, 'portfolio_value'].plot(ax=ax1)
    ax1.set_ylabel('Portfolio\nValue\n({})'.format(quote_currency))

    ax2 = plt.subplot(612, sharex=ax1)
    perf.loc[:, ['price', 'prediction_price']].plot(ax=ax2, label='Price')

    ax2.set_ylabel('{asset}\n({base})'.format(
        asset=context.market.symbol, base=quote_currency
    ))

    transaction_df = extract_transactions(perf)
    if not transaction_df.empty:
        buy_df = transaction_df[transaction_df['amount'] > 0]
        sell_df = transaction_df[transaction_df['amount'] < 0]
        ax2.scatter(
            buy_df.index.to_pydatetime(),
            perf.loc[buy_df.index.floor('1 min'), 'price'],
            marker='^',
            s=100,
            c='green',
            label=''
         )
        ax2.scatter(
            sell_df.index.to_pydatetime(),
            perf.loc[sell_df.index.floor('1 min'), 'price'],
            marker='v',
            s=100,
            c='red',
            label=''
        )

    plt.gcf().set_size_inches(18, 18)
    plt.show()
    pass

if __name__ == '__main__':
    # Backtest
    run_algorithm(
        capital_base=10000,
        data_frequency='daily',
        initialize=initialize,
        analyze=analyze,
        exchange_name='bitfinex',
        algo_namespace=algo_namespace,
        quote_currency='usd',
        start=pd.to_datetime('2018-1-1', utc=True),
        end=pd.to_datetime('2018-7-22', utc=True),
    )


#2

Brilliant work, thanks for posting this. I’m struggling to integrate ML into my stat-arb strategy so this is a great help.


#3

thanks, let me know if you need any help. What I posted is basically garbage compared to what I learned in the months since starting to play with ML.