ucb.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. import ccxt
  2. import pandas as pd
  3. import numpy as np
  4. from datetime import datetime, timedelta
  5. import math
  6. from scipy.stats import beta
  7. import backtrader as bt
  8. import os
  9. # Top 15 coins (symbols against USDT)
  10. coins = ['BTC/USDT', 'ETH/USDT', 'BNB/USDT', 'SOL/USDT', 'XRP/USDT', 'ADA/USDT', 'DOGE/USDT',
  11. 'AVAX/USDT', 'SHIB/USDT', 'DOT/USDT', 'LINK/USDT', 'TRX/USDT', 'UNI/USDT', 'LTC/USDT']
  12. exchange = ccxt.binance({'enableRateLimit': True})
  13. def fetch_ohlcv(symbol, timeframe='1h', days=90):
  14. # Create a filename based on symbol and timeframe
  15. safe_symbol = symbol.replace('/', '_')
  16. filename = f"ohlcv_{safe_symbol}_{timeframe}_{days}d.csv"
  17. if os.path.exists(filename):
  18. df = pd.read_csv(filename, parse_dates=['timestamp'], index_col='timestamp')
  19. return df
  20. since = int((datetime.now() - timedelta(days=days)).timestamp() * 1000)
  21. ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since=since)
  22. df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
  23. df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
  24. df.set_index('timestamp', inplace=True)
  25. df.to_csv(filename)
  26. return df
  27. def compute_atr(df, period=14):
  28. high_low = df['high'] - df['low']
  29. high_close = np.abs(df['high'] - df['close'].shift())
  30. low_close = np.abs(df['low'] - df['close'].shift())
  31. tr = np.maximum(high_low, high_close, low_close)
  32. atr = tr.rolling(period).mean()
  33. return atr
  34. def compute_ema(df, short=12, long=26):
  35. df['ema_short'] = df['close'].ewm(span=short, adjust=False).mean()
  36. df['ema_long'] = df['close'].ewm(span=long, adjust=False).mean()
  37. df['trend'] = np.where(df['ema_short'] > df['ema_long'], 1, -1) # 1 = uptrend
  38. def compute_rewards(df):
  39. df['return'] = (df['close'] - df['open']) / df['open']
  40. df['atr'] = compute_atr(df)
  41. compute_ema(df)
  42. df['reward'] = df['return'] * df['trend'] / df['atr'].replace(0, np.nan) # Adjust by vol and trend
  43. if len(df) > 0:
  44. print('%d %f' % (len(df), df['reward'].iloc[len(df)-1]))
  45. else:
  46. print('DataFrame is empty, no rewards computed.')
  47. return df.dropna()
  48. class ThompsonSampling:
  49. def __init__(self, num_arms):
  50. self.alpha = np.ones(num_arms) # Successes +1
  51. self.beta = np.ones(num_arms) # Failures +1
  52. def select_arm(self):
  53. samples = [beta.rvs(a, b) for a, b in zip(self.alpha, self.beta)]
  54. return np.argmax(samples)
  55. def update(self, arm, reward): # Assume reward >0 is success
  56. self.alpha[arm] += 1 if reward > 0 else 0
  57. self.beta[arm] += 0 if reward > 0 else 1
  58. class UCB:
  59. def __init__(self, num_arms, c=2.0): # c is exploration constant
  60. self.num_arms = num_arms
  61. self.counts = np.zeros(num_arms) # Trades per coin
  62. self.mean_rewards = np.zeros(num_arms)
  63. self.total_pulls = 0
  64. self.c = c
  65. def select_arm(self):
  66. ucb_scores = np.zeros(self.num_arms)
  67. for i in range(self.num_arms):
  68. if self.counts[i] == 0:
  69. return i # Explore unpulled arms first
  70. ucb_scores[i] = self.mean_rewards[i] + self.c * math.sqrt(math.log(self.total_pulls) / self.counts[i])
  71. return np.argmax(ucb_scores)
  72. def update(self, arm, reward):
  73. self.counts[arm] += 1
  74. self.total_pulls += 1
  75. self.mean_rewards[arm] = (self.mean_rewards[arm] * (self.counts[arm] - 1) + reward) / self.counts[arm]
  76. class BanditStrategy(bt.Strategy):
  77. def __init__(self):
  78. self.ucb = UCB(len(coins)) # One arm per coin
  79. self.coin_map = {i: coin for i, coin in enumerate(coins)} # Map arm index to coin symbol
  80. # self.datas[0] is first coin, etc.
  81. def next(self):
  82. # Get current rewards for all coins (from custom 'reward' column)
  83. current_rewards = [self.datas[i].reward[0] for i in range(len(self.datas))] # [0] is current bar
  84. # Select arm (coin) using UCB
  85. arm = self.ucb.select_arm()
  86. # Get the data for the selected coin
  87. selected_data = self.datas[arm]
  88. # ucb_score = self.ucb.mean_rewards[arm] + self.ucb.c * math.sqrt(math.log(self.ucb.total_pulls ) / (self.ucb.counts[arm] + 1e-6))
  89. if self.ucb.total_pulls > 0:
  90. ucb_score = self.ucb.mean_rewards[arm] + self.ucb.c * math.sqrt(
  91. math.log(self.ucb.total_pulls) / (self.ucb.counts[arm] + 1e-6)
  92. )
  93. else:
  94. ucb_score = self.ucb.mean_rewards[arm]
  95. # Signal logic: Enter if score > threshold (e.g., 0.05)
  96. threshold = 0.05
  97. if ucb_score > threshold and not self.position: # Not already in position
  98. self.buy(data=selected_data, size=1) # Buy 1 unit (adjust for portfolio size)
  99. print(f"Entering trade on {self.coin_map[arm]} at {selected_data.close[0]}")
  100. # Exit logic: If in position, check for vol-adjusted profit target (e.g., 5-10%)
  101. if self.position:
  102. entry_price = self.position.price
  103. current_price = selected_data.close[0]
  104. atr = selected_data.atr[0] # Use custom ATR column
  105. profit_target = 0.05 + 0.05 * atr # Vol-adjusted (e.g., higher for volatile coins)
  106. if (current_price - entry_price) / entry_price > profit_target:
  107. self.sell(data=selected_data, size=self.position.size)
  108. print(f"Exiting trade on {self.coin_map[arm]} at {current_price}")
  109. # Update UCB with the realized reward for the selected arm
  110. realized_reward = current_rewards[arm] # Or calculate from trade if executed
  111. self.ucb.update(arm, realized_reward)
  112. class PandasCustom(bt.feeds.PandasData):
  113. lines = ('reward', 'atr',) # Add more if needed
  114. params = (
  115. ('reward', -1), # -1 means auto-detect column
  116. ('atr', -1),
  117. )
  118. def main():
  119. # Fetch data for all coins
  120. data = {coin: fetch_ohlcv(coin, '1h', 30) for coin in coins}
  121. # Apply to all data
  122. for coin in data:
  123. print(f"Computing rewards for {coin}...")
  124. data[coin] = compute_rewards(data[coin])
  125. # Usage example (simulate over time steps)
  126. ucb = UCB(len(coins))
  127. # for t in range(len(data[coins[0]])): # Assume aligned timestamps
  128. # arm = ucb.select_arm()
  129. # coin = coins[arm]
  130. # reward = data[coin].iloc[t]['reward'] # Or simulate trade reward
  131. # ucb.update(arm, reward)
  132. # min_len = min(len(df) for df in data.values()) # Ensure no out-of-bounds
  133. # print (min_len)
  134. # for t in range(min_len):
  135. # arm = ucb.select_arm()
  136. # coin = coins[arm]
  137. # reward = data[coin].iloc[t]['reward']
  138. # ucb.update(arm, reward)
  139. max_len = max(len(df) for df in data.values()) # Use the max length
  140. # for t in range(max_len):
  141. # arm = ucb.select_arm()
  142. # coin = coins[arm]
  143. # df = data[coin]
  144. # if t < len(df):
  145. # reward = df.iloc[t]['reward']
  146. # ucb.update(arm, reward)
  147. # # else: skip this step for this coin
  148. for t in range(max_len):
  149. arm = ucb.select_arm()
  150. coin = coins[arm]
  151. df = data[coin]
  152. # Check if DataFrame is non-empty and has 'reward' column
  153. if len(df) > 0 and 'reward' in df.columns and t < len(df):
  154. reward = df.iloc[t]['reward']
  155. ucb.update(arm, reward)
  156. # else: skip this step for this coin
  157. # Setup Cerebro and add data feeds
  158. cerebro = bt.Cerebro()
  159. cerebro.addstrategy(BanditStrategy)
  160. cerebro.broker.setcash(100000.0) # Starting capital
  161. cerebro.broker.setcommission(0.001) # 0.1% fees
  162. # Add each coin's data as a PandasData feed
  163. for i, (coin, df) in enumerate(data.items()):
  164. # Ensure DF is sorted and has no duplicates
  165. df = df.sort_index().drop_duplicates()
  166. #data_feed = bt.feeds.PandasData(
  167. data_feed = PandasCustom(
  168. dataname=df,
  169. datetime=None, # Uses index
  170. open='open',
  171. high='high',
  172. low='low',
  173. close='close',
  174. volume='volume',
  175. # Add custom lines for features (accessible as self.data.reward, etc.)
  176. #reward=-1, # -1 means auto-detect column
  177. #atr=-1,
  178. # Add more if needed (e.g., ema_short=-1)
  179. )
  180. cerebro.adddata(data_feed, name=coin) # Name for reference
  181. # Resample if needed (e.g., to daily), but hourly is fine
  182. # cerebro.resampledata(data_feed, timeframe=bt.TimeFrame.Days) # Optional
  183. # Run backtest
  184. print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
  185. cerebro.run()
  186. print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
  187. # Optional: Plot results
  188. # cerebro.plot()
  189. if __name__ == "__main__":
  190. main()