Quellcode durchsuchen

add supporting files

larry vor 10 Jahren
Ursprung
Commit
b1e6577333
100 geänderte Dateien mit 356 neuen und 41 gelöschten Zeilen
  1. 87 41
      cep/ib_mds.py
  2. 181 0
      cep/md_std.py
  3. 88 0
      cep/t1.py
  4. BIN
      checkpoint/.checkpoint-1443881014000.crc
  5. BIN
      checkpoint/.checkpoint-1443881016000.crc
  6. BIN
      checkpoint/.checkpoint-1443881018000.crc
  7. BIN
      checkpoint/.checkpoint-1443881020000.crc
  8. BIN
      checkpoint/.checkpoint-1443881022000.crc
  9. BIN
      checkpoint/.checkpoint-1443881024000.crc
  10. BIN
      checkpoint/.checkpoint-1443881026000.crc
  11. BIN
      checkpoint/.checkpoint-1443881028000.crc
  12. BIN
      checkpoint/.checkpoint-1443881030000.crc
  13. BIN
      checkpoint/.checkpoint-1443881032000.crc
  14. BIN
      checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-146/.part-00000.crc
  15. BIN
      checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-146/part-00000
  16. BIN
      checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-183/.part-00000.crc
  17. BIN
      checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-183/part-00000
  18. BIN
      checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-220/.part-00000.crc
  19. BIN
      checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-220/part-00000
  20. BIN
      checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-121/.part-00000.crc
  21. BIN
      checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-121/part-00000
  22. BIN
      checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-164/.part-00000.crc
  23. BIN
      checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-164/part-00000
  24. BIN
      checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-35/.part-00000.crc
  25. BIN
      checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-35/part-00000
  26. BIN
      checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-72/.part-00000.crc
  27. BIN
      checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-72/part-00000
  28. BIN
      checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-35/.part-00000.crc
  29. BIN
      checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-35/part-00000
  30. BIN
      checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-72/.part-00000.crc
  31. BIN
      checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-72/part-00000
  32. BIN
      checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-35/.part-00000.crc
  33. BIN
      checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-35/part-00000
  34. BIN
      checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-73/.part-00000.crc
  35. BIN
      checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-73/part-00000
  36. BIN
      checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-35/.part-00000.crc
  37. BIN
      checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-35/part-00000
  38. BIN
      checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-72/.part-00000.crc
  39. BIN
      checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-72/part-00000
  40. BIN
      checkpoint/3b0f6098-0da8-4ceb-b5ac-3a1b80fbaadb/rdd-72/.part-00000.crc
  41. BIN
      checkpoint/3b0f6098-0da8-4ceb-b5ac-3a1b80fbaadb/rdd-72/part-00000
  42. BIN
      checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-35/.part-00000.crc
  43. BIN
      checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-35/part-00000
  44. BIN
      checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-74/.part-00000.crc
  45. BIN
      checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-74/part-00000
  46. BIN
      checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1023/.part-00000.crc
  47. BIN
      checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1023/part-00000
  48. BIN
      checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1061/.part-00000.crc
  49. BIN
      checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1061/part-00000
  50. BIN
      checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3038/.part-00000.crc
  51. BIN
      checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3038/part-00000
  52. BIN
      checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3077/.part-00000.crc
  53. BIN
      checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3077/part-00000
  54. BIN
      checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-35/.part-00000.crc
  55. BIN
      checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-35/part-00000
  56. BIN
      checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-77/.part-00000.crc
  57. BIN
      checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-77/part-00000
  58. BIN
      checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-119/.part-00000.crc
  59. BIN
      checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-119/part-00000
  60. BIN
      checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-77/.part-00000.crc
  61. BIN
      checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-77/part-00000
  62. BIN
      checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-947/.part-00000.crc
  63. BIN
      checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-947/part-00000
  64. BIN
      checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-985/.part-00000.crc
  65. BIN
      checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-985/part-00000
  66. BIN
      checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54387/.part-00000.crc
  67. BIN
      checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54387/part-00000
  68. BIN
      checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54424/.part-00000.crc
  69. BIN
      checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54424/part-00000
  70. BIN
      checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-111/.part-00000.crc
  71. BIN
      checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-111/part-00000
  72. BIN
      checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-149/.part-00000.crc
  73. BIN
      checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-149/part-00000
  74. BIN
      checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-73/.part-00000.crc
  75. BIN
      checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-73/part-00000
  76. BIN
      checkpoint/b988699d-ee81-4bcd-952b-224086d63c95/rdd-35/.part-00000.crc
  77. BIN
      checkpoint/b988699d-ee81-4bcd-952b-224086d63c95/rdd-35/part-00000
  78. BIN
      checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-331/.part-00000.crc
  79. BIN
      checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-331/part-00000
  80. BIN
      checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-368/.part-00000.crc
  81. BIN
      checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-368/part-00000
  82. BIN
      checkpoint/checkpoint-1443881014000
  83. BIN
      checkpoint/checkpoint-1443881016000
  84. BIN
      checkpoint/checkpoint-1443881018000
  85. BIN
      checkpoint/checkpoint-1443881020000
  86. BIN
      checkpoint/checkpoint-1443881022000
  87. BIN
      checkpoint/checkpoint-1443881024000
  88. BIN
      checkpoint/checkpoint-1443881026000
  89. BIN
      checkpoint/checkpoint-1443881028000
  90. BIN
      checkpoint/checkpoint-1443881030000
  91. BIN
      checkpoint/checkpoint-1443881032000
  92. BIN
      checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-146/.part-00000.crc
  93. BIN
      checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-146/part-00000
  94. BIN
      checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-183/.part-00000.crc
  95. BIN
      checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-183/part-00000
  96. BIN
      checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-553/.part-00000.crc
  97. BIN
      checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-553/part-00000
  98. BIN
      checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-590/.part-00000.crc
  99. BIN
      checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-590/part-00000
  100. BIN
      checkpoint/efe4f6e3-67ee-4ec8-9575-a2dc4360a290/rdd-51409/.part-00000.crc

+ 87 - 41
cep/ib_mds.py

@@ -13,7 +13,7 @@ from os.path import isfile, join
 from threading import Lock
 from comms.ib_heartbeat import IbHeartBeat
 import threading, urllib2
-
+from optparse import OptionParser
 #from options_data import ContractHelper
 
 import finopt.options_data as options_data
@@ -45,13 +45,13 @@ class IbKafkaProducer():
     id2contract = {'conId': 1, 'id2contracts': {} }
     
     
-    def __init__(self, config):
+    def __init__(self, config, replay = False):
         
         self.config = config
         self.tlock = Lock()
-        host = self.config.get("ib_mds", "ib_mds.gateway").strip('"').strip("'")
-        port = int(self.config.get("ib_mds", "ib_mds.ib_port"))
-        appid = int(self.config.get("ib_mds", "ib_mds.appid.id"))  
+#         host = self.config.get("ib_mds", "ib_mds.gateway").strip('"').strip("'")
+#         port = int(self.config.get("ib_mds", "ib_mds.ib_port"))
+#         appid = int(self.config.get("ib_mds", "ib_mds.appid.id"))  
         kafka_host = self.config.get("cep", "kafka.host").strip('"').strip("'")
         
         self.persist['is_persist'] = self.config.get("ib_mds", "ib_mds.is_persist")
@@ -62,25 +62,32 @@ class IbKafkaProducer():
         
         IbKafkaProducer.IB_TICK_PRICE = self.config.get("cep", "kafka.ib.topic.tick_price").strip('"').strip("'")
         IbKafkaProducer.IB_TICK_SIZE = self.config.get("cep", "kafka.ib.topic.tick_size").strip('"').strip("'")
-        self.con = ibConnection(host, port, appid)
-        self.con.registerAll(self.on_ib_message)
-        rc = self.con.connect()
-        if rc:
-            self.ib_conn_status = 'OK'
+#         self.con = ibConnection(host, port, appid)
+#         self.con.registerAll(self.on_ib_message)
+#         rc = self.con.connect()
+#         if rc:
+#             self.ib_conn_status = 'OK'
+        
+#        logging.info('IbKafkaProducer: connection status to IB is %d' % rc)
+
         logging.info('******* Starting IbKafkaProducer')
-        logging.info('IbKafkaProducer: connection status to IB is %d' % rc)
         logging.info('IbKafkaProducer: connecting to kafka host: %s...' % kafka_host)
         logging.info('IbKafkaProducer: message mode is async')
         
         client = KafkaClient(kafka_host)
         self.producer = SimpleProducer(client, async=False)
         
-        # start heart beat monitor
-        self.ibh = IbHeartBeat(config)
-        self.ibh.register_listener([self.on_ib_conn_broken])
-        self.ibh.run()        
+        if not replay:
+            self.start_ib_connection()
+        
+#         # start heart beat monitor
+#         self.ibh = IbHeartBeat(config)
+#         self.ibh.register_listener([self.on_ib_conn_broken])
+#         #self.ibh.run()        
     
     def pub_cn_index(self, sec):
+        #http://blog.csdn.net/moneyice/article/details/7877030
+        # hkHSI, hkHSCEI, hkHSCCI
         
         qs = '0000001,1399001,1399300'
         url = 'http://api.money.126.net/data/feed/%s?callback=ne3587367b7387dc' % qs
@@ -118,10 +125,10 @@ class IbKafkaProducer():
     def on_ib_conn_broken(self, msg):
         logging.error('IbKafkaProducer: connection is broken!')
         self.ib_conn_status = 'ERROR'
-        self.tlock.acquire()
-        try:
-            if self.ib_conn_status == 'OK':
-                return
+        self.tlock.acquire() # this function may get called multiple times
+        try:                 # block until another party finishes executing
+            if self.ib_conn_status == 'OK': # check status
+                return                      # if already fixed up while waiting, return 
             
             self.con.eDisconnect()
     
@@ -138,6 +145,7 @@ class IbKafkaProducer():
                 logging.info('IbKafkaProducer: connection status to IB is %d (0-broken 1-good)' % rc)
                 sleep(2)
             
+            # we arrived here because the connection has been restored
             if not self.quit:
                 # resubscribe tickers again!
                 self.load_tickers()                
@@ -204,13 +212,31 @@ class IbKafkaProducer():
             self.persist['fp'].close()
             self.persist['file_exist'] = False
 
+
+    def start_ib_connection(self):
+        host = self.config.get("ib_mds", "ib_mds.gateway").strip('"').strip("'")
+        port = int(self.config.get("ib_mds", "ib_mds.ib_port"))
+        appid = int(self.config.get("ib_mds", "ib_mds.appid.id"))          
+        self.con = ibConnection(host, port, appid)
+        self.con.registerAll(self.on_ib_message)
+        rc = self.con.connect()
+        if rc:
+            self.ib_conn_status = 'OK'
+        logging.info('start_ib_connection: connection status to IB is %d' % rc)
+        
+        # start heart beat monitor
+        self.ibh = IbHeartBeat(config)
+        self.ibh.register_listener([self.on_ib_conn_broken])
+        self.ibh.run()        
+        
+
     def load_tickers(self, path=None):
         
         self.id2contract = {'conId': 1, 'id2contracts': {} }
         
         
         if path is None:
-            path = self.config.get("cep", "ib.subscription.fileloc").strip('"').strip("'")
+            path = self.config.get("ib_mds", "ib_mds.subscription.fileloc").strip('"').strip("'")
         logging.info('load_tickers: attempt to open file %s' % path)
         fr = open(path)
         for l in fr.readlines():
@@ -245,15 +271,27 @@ class IbKafkaProducer():
 
     def replay(self, dir_loc):
         
+         
+        
         def process_msg(fn):
             fp = open(fn)
-            last_ts = None
+            logging.info('replay file %s' % fn)
+            last_record_ts = None
             for line in fp:
-                msg = line.split('|')[1]
-                msg_ts = json.loads(msg)['ts']
-#         
-#         files = [f if f.size() > 0 else None for f in dir_loc]
-        files = sorted([ f for f in listdir(dir_loc) if isfile(join(dir_loc,f)) ])            
+                
+                s_msg = line.split('|')[1]
+                msg = json.loads(s_msg)
+                msg_ts = datetime.datetime.fromtimestamp(msg['ts'])
+                interval = (msg_ts - (last_record_ts if last_record_ts <> None else msg_ts)).microseconds / 1000000.0
+                
+                print '%s %s %s' % (msg_ts.strftime('%Y-%m-%d %H:%M:%S.%f'), s_msg, fn)
+                self.producer.send_messages(IbKafkaProducer.IB_TICK_PRICE if msg['typeName'] == 'tickPrice' else IbKafkaProducer.IB_TICK_SIZE, s_msg)
+                 
+                last_record_ts = msg_ts
+                sleep(interval)
+
+        files = sorted([ join(dir_loc,f) for f in listdir(dir_loc) if isfile(join(dir_loc,f)) ])   
+                 
         for f in files:
             process_msg(f)
         
@@ -284,23 +322,24 @@ class IbKafkaProducer():
 
 if __name__ == '__main__':
            
-#     logging.basicConfig(#filename = "log/port.log", filemode = 'w', 
-#                         level=logging.INFO,
-#                         format='%(asctime)s %(levelname)-8s %(message)s')      
-#     
-#     config = ConfigParser.ConfigParser()
-#     config.read("../config/app.cfg")
-#     ik = IbKafkaProducer(config)
-#     ik.load_tickers()    
-#     ik.run_forever()
     
+    parser = OptionParser()
+
+    parser.add_option("-r", "--replay",
+                      dest="replay_dir",
+                      help="replay recorded mds files stored in the specified directory")
+                      
+
     
+    options, arguments = parser.parse_args()
+
+    #print options, arguments
     
-    if len(sys.argv) != 2:
-        print("Usage: %s <config file>" % sys.argv[0])
+    if len(sys.argv) < 2:
+        print("Usage: %s [options] <config file>" % sys.argv[0])
         exit(-1)    
 
-    cfg_path= sys.argv[1:]
+    cfg_path= arguments[0]
     config = ConfigParser.SafeConfigParser()
     if len(config.read(cfg_path)) == 0:      
         raise ValueError, "Failed to open config file" 
@@ -309,6 +348,13 @@ if __name__ == '__main__':
     logconfig = eval(config.get("ib_mds", "ib_mds.logconfig").strip('"').strip("'"))
     logconfig['format'] = '%(asctime)s %(levelname)-8s %(message)s'    
     logging.basicConfig(**logconfig)    
-    ik = IbKafkaProducer(config)
-    ik.load_tickers()    
-    ik.run_forever()
+    replay = True if options.replay_dir <> None else False 
+    ik = IbKafkaProducer(config, replay)
+    
+    if not replay:
+        ik.load_tickers()    
+    else:
+        ik.replay(options.replay_dir)
+        
+    ik.run_forever()
+    

+ 181 - 0
cep/md_std.py

@@ -0,0 +1,181 @@
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.kafka import KafkaUtils
+from numpy import *
+import pylab
+from scipy import stats
+import time, datetime
+import threading
+import time
+import os
+from finopt import ystockquote
+##
+##
+##
+## This example demonstrates the use of accumulators and broadcast 
+## and how to terminate spark running jobs
+## 
+## it also demonstrates how to send alerts via xmpp
+## (requires prosody server running and redisQueue)
+##
+##
+
+##
+##
+## insert the path so spark-submit knows where
+## to look for a file located in a given directory
+##
+## the other method is to export PYTHONPATH before 
+## calling spark-submit
+##
+# import sys
+# sys.path.insert(0, '/home/larry-13.04/workspace/finopt/cep')
+print sys.path
+
+
+#import optcal
+import json
+import numpy
+#from finopt.cep.redisQueue import RedisQueue
+from comms.redisQueue import RedisQueue
+from comms.alert_bot import AlertHelper
+
+
+def f1(time, rdd):
+    lt =  rdd.collect()
+    print '**** f1'
+    print lt
+    print '**** end f1'
+    f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/std.txt', 'a')
+    f.write(''.join('%s,%s,%s\n'%(s[0].strftime('%Y-%m-%d %H:%M:%S.%f'),s[1],s[2]) for s in lt))
+    d = Q.value
+    if float(lt[0][1]) > 8.0:
+        msg = 'Stock SD alert triggered: '.join('%s,%s,%s\n'%(s[0].strftime('%Y-%m-%d %H:%M:%S.%f'),s[1],s[2]) for s in lt)
+        print msg
+        q = RedisQueue(d['alert_bot_q'][1], d['alert_bot_q'][0], d['host'], d['port'], d['db'])
+        q.put(msg)
+
+def f2(time, rdd):
+    lt =  rdd.collect()
+    if lt:
+        change = lt[0][0]
+        d = Q.value
+        print '********** f2'
+        print lt[0][0], Threshold.value, lt[0][1]
+        print '********** end f2'
+
+        
+        if change > Threshold.value:
+            msg = 'Stock alert triggered: %0.6f, mean: %0.2f' % (change, lt[0][1])
+            print msg
+#             q = RedisQueue(d['alert_bot_q'][1], d['alert_bot_q'][0], d['host'], d['port'], d['db'])
+#             q.put(msg)
+    
+
+
+ 
+
+
+       
+        
+       
+
+
+
+    
+
+# to run from command prompt
+# 0. start kafka broker
+# 1. edit subscription.txt and prepare 2 stocks
+# 2. run ib_mds.py 
+# 3. spark-submit  --jars spark-streaming-kafka-assembly_2.10-1.4.1.jar ./alerts/pairs_corr.py vsu-01:2181 
+
+# http://stackoverflow.com/questions/3425439/why-does-corrcoef-return-a-matrix
+# 
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print("Usage: %s <broker_list ex: vsu-01:2181>  <rdd_name> <tick id> <fn name>" % sys.argv[0])
+        print("Usage: to gracefully shutdown type echo 1 > /tmp/flag at the terminal")
+        exit(-1)
+
+    app_name = "std_deviation_analysis"
+    sc = SparkContext(appName= app_name) #, pyFiles = ['./cep/redisQueue.py'])
+    ssc = StreamingContext(sc, 2)
+    ssc.checkpoint('../checkpoint')
+
+    
+
+    brokers, qname, id, fn  = sys.argv[1:]
+    id = int(id)
+    
+    #
+    # demonstrate how to use broadcast variable
+    #
+    NumProcessed = sc.accumulator(0)
+    
+    cls = float(ystockquote.get_historical_prices('^HSI', '20150930', '20150930')[1][4])
+    
+    print 'closing price of HSI %f' % cls
+    
+    Q = sc.broadcast({'cls': cls, \
+                      'rname': 'rname', 'qname': qname, 'namespace': 'mdq', 'host': 'localhost', 'port':6379, 'db': 3, 'alert_bot_q': ('alert_bot', 'chatq')})
+    Threshold = sc.broadcast(0.020)
+    #kvs = KafkaUtils.createDirectStream(ssc, ['ib_tick_price', 'ib_tick_size'], {"metadata.broker.list": brokers})
+    kvs = KafkaUtils.createStream(ssc, brokers, app_name, {'ib_tick_price':1, 'ib_tick_size':1})
+
+    lns = kvs.map(lambda x: x[1])
+
+    mdl = lns.map(lambda x: json.loads(x))\
+            .filter(lambda x: (x['typeName'] == 'tickPrice' and x['contract'] == "HSI-20151029-0--FUT-HKD-102"))\
+            .map(lambda x: (x['contract'], (x['ts'], x['price']) ))\
+            .groupByKeyAndWindow(12, 10, 1)
+            
+    s1 = mdl.map(lambda x: (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ), numpy.std([a[1] for a in x[1]]),\
+                 numpy.mean([a[1] for a in x[1]])\
+                 )) 
+
+    s2 = s1.map(lambda x: (abs(x[2] - Q.value['cls']) / Q.value['cls'], x[2]))   
+
+#     s1 = lines.map(lambda line: json.loads(line)).filter(lambda x: (x['tickerId'] in [1,2] and x['typeName']== 'tickPrice'))\
+#                 .filter(lambda x: (x['field'] == 4))\
+#                 .map(lambda x: (x['tickerId'], x['price'])).reduceByKey(lambda x,y: (x+y)/2).groupByKeyAndWindow(30, 20, 1)    
+    
+    
+    s1.foreachRDD(f1)
+    s2.foreachRDD(f2)
+
+    #trades.pprint()
+    #trades.foreachRDD(eval(fn))
+    
+        
+    def do_work():
+
+        while 1:
+            # program will stop after processing 40 rdds
+#             if NumProcessed.value == 70:
+#                 break            
+            # program will stop on detecting a 1 in the flag file
+            try:
+                f = open('/tmp/flag')
+                l = f.readlines()
+                print 'reading %s' % l[0]
+                if '1' in l[0]:
+                    os.remove('/tmp/flag') 
+                    print 'terminating..........'        
+                    ssc.stop(True, False) 
+                    sys.exit()          
+                f.close()
+                time.sleep(2)
+            except IOError:
+                continue
+            
+            
+        
+    t = threading.Thread(target = do_work, args=())
+    t.start()
+    ssc.start()
+    ssc.awaitTermination()
+

+ 88 - 0
cep/t1.py

@@ -0,0 +1,88 @@
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.kafka import KafkaUtils
+from numpy import *
+import pylab
+from scipy import stats
+import json
+import numpy
+import time, datetime
+from os import listdir
+from os.path import isfile, join
+
+path = '/home/larry/l1304/workspace/finopt/data/mds_files/large_up_1002/ibkdump-20151002105314.txt'
+path = '/home/larry/l1304/workspace/finopt/data/mds_files/large_up_1002/ibkdump-20151002110412.txt'
+
+
+
+#
+# demo how to compute standard deviation in a RDD
+
+ 
+
+
+
+def process_msg(file):
+
+    try:
+        md = sc.textFile(file)    
+        mdl = md.map(lambda lines: (lines.split('|'))).map(lambda x: json.loads(x[1]))\
+                .filter(lambda x: (x['typeName'] == 'tickPrice' and x['contract'] == "HSI-20151029-0--FUT-HKD-102"))\
+                .map(lambda x: x['price'])
+        sd = numpy.std(mdl.collect())
+        #print file[len(file)-20:], sd, mdl.count()
+        return (sd, mdl.count())
+    except:
+        return None
+    
+def process_msg_by_key(file):
+
+    
+
+
+    try:
+        md = sc.textFile(file)    
+        print file
+        mdl = md.map(lambda lines: (lines.split('|'))).map(lambda x: json.loads(x[1]))
+        
+                
+                
+        #mdp = mdl.filter(lambda x: (x['typeName'] == 'tickPrice')) and x['contract'] in ["HSI-20151029-0--FUT-HKD-102"]))\
+        mdp = mdl.filter(lambda x: (x['typeName'] == 'tickPrice'))\
+                .map(lambda x: (x['contract'], (x['ts'], x['price']) )).groupByKey()
+                
+        #mds = mdl.filter(lambda x: (x['typeName'] == 'tickSize'  and x['contract'] in ["HSI-20151029-0--FUT-HKD-102"]))\
+        mds = mdl.filter(lambda x: (x['typeName'] == 'tickSize'))\
+                .map(lambda x: (x['contract'], (x['ts'], x['size']) )).groupByKey()                
+        
+        sdp = mdp.map(lambda x: (x[0],\
+                                 (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ),\
+                                  numpy.std([a[1] for a in x[1]]),\
+                                  numpy.mean([a[1] for a in x[1]]))\
+                                ))
+        
+        sds = mds.map(lambda x: (x[0],\
+                                 (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ),\
+                                  numpy.std([a[1] for a in x[1]]),\
+                                  numpy.mean([a[1] for a in x[1]]))\
+                                )) 
+         
+        #print sds.take(2)
+        sdsp = sdp.cogroup(sds)
+        elems = sdsp.collect()
+        for e in elems:            
+            print '%s %s %s' % (e[0], ''.join('[%s %0.2f %0.2f]'%(p[0],p[1],p[2]) for p in e[1][0]), ''.join('[%s %0.2f %0.2f]'%(p[0],p[1],p[2]) for p in e[1][1]))
+        return sdsp 
+    except:
+        return 
+    
+
+if __name__ == '__main__':
+    sc = SparkContext(appName= 't1')    
+    #dir_loc = '/home/larry/l1304/workspace/finopt/data/mds_files/large_up_1002'
+    dir_loc = '/home/larry/l1304/workspace/finopt/data/mds_files'
+    files = sorted([ join(dir_loc,f) for f in listdir(dir_loc) if isfile(join(dir_loc,f)) ])
+
+    a = [(process_msg_by_key(f)) for f in files]
+    #print a
+    #print ''.join('%s,%s,%s\n' % (aa[0], aa[1], aa[2]) if aa <> None else '' for aa in a )

BIN
checkpoint/.checkpoint-1443881014000.crc


BIN
checkpoint/.checkpoint-1443881016000.crc


BIN
checkpoint/.checkpoint-1443881018000.crc


BIN
checkpoint/.checkpoint-1443881020000.crc


BIN
checkpoint/.checkpoint-1443881022000.crc


BIN
checkpoint/.checkpoint-1443881024000.crc


BIN
checkpoint/.checkpoint-1443881026000.crc


BIN
checkpoint/.checkpoint-1443881028000.crc


BIN
checkpoint/.checkpoint-1443881030000.crc


BIN
checkpoint/.checkpoint-1443881032000.crc


BIN
checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-146/.part-00000.crc


BIN
checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-146/part-00000


BIN
checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-183/.part-00000.crc


BIN
checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-183/part-00000


BIN
checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-220/.part-00000.crc


BIN
checkpoint/07b37ff1-6282-44c6-b03e-2d0f8f1b42c8/rdd-220/part-00000


BIN
checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-121/.part-00000.crc


BIN
checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-121/part-00000


BIN
checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-164/.part-00000.crc


BIN
checkpoint/0f3ca102-41c5-4255-9fab-26747d706f23/rdd-164/part-00000


BIN
checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-35/.part-00000.crc


BIN
checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-35/part-00000


BIN
checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-72/.part-00000.crc


BIN
checkpoint/0ff90949-f8a9-46e1-8606-67636921b96d/rdd-72/part-00000


BIN
checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-35/.part-00000.crc


BIN
checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-35/part-00000


BIN
checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-72/.part-00000.crc


BIN
checkpoint/2a3f5844-ae0d-4861-b6aa-a2ae0e4c2cc3/rdd-72/part-00000


BIN
checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-35/.part-00000.crc


BIN
checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-35/part-00000


BIN
checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-73/.part-00000.crc


BIN
checkpoint/2e7e3f6e-f5eb-491d-ab63-979be936c305/rdd-73/part-00000


BIN
checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-35/.part-00000.crc


BIN
checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-35/part-00000


BIN
checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-72/.part-00000.crc


BIN
checkpoint/34e57529-905e-4506-978c-8104709cd108/rdd-72/part-00000


BIN
checkpoint/3b0f6098-0da8-4ceb-b5ac-3a1b80fbaadb/rdd-72/.part-00000.crc


BIN
checkpoint/3b0f6098-0da8-4ceb-b5ac-3a1b80fbaadb/rdd-72/part-00000


BIN
checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-35/.part-00000.crc


BIN
checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-35/part-00000


BIN
checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-74/.part-00000.crc


BIN
checkpoint/4629c96b-c226-459b-9d18-434b812ba58a/rdd-74/part-00000


BIN
checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1023/.part-00000.crc


BIN
checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1023/part-00000


BIN
checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1061/.part-00000.crc


BIN
checkpoint/51f0d060-9a80-4f6e-9720-32806b07d3ba/rdd-1061/part-00000


BIN
checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3038/.part-00000.crc


BIN
checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3038/part-00000


BIN
checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3077/.part-00000.crc


BIN
checkpoint/55610612-e770-4445-8053-6641d4e6c7be/rdd-3077/part-00000


BIN
checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-35/.part-00000.crc


BIN
checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-35/part-00000


BIN
checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-77/.part-00000.crc


BIN
checkpoint/5b8852fd-9ae3-4173-8496-92aa59eae4db/rdd-77/part-00000


BIN
checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-119/.part-00000.crc


BIN
checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-119/part-00000


BIN
checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-77/.part-00000.crc


BIN
checkpoint/720c50ae-6d7c-49ba-8341-acefdd0b8497/rdd-77/part-00000


BIN
checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-947/.part-00000.crc


BIN
checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-947/part-00000


BIN
checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-985/.part-00000.crc


BIN
checkpoint/9b46677e-2099-4e28-b2e6-61d367c13cee/rdd-985/part-00000


BIN
checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54387/.part-00000.crc


BIN
checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54387/part-00000


BIN
checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54424/.part-00000.crc


BIN
checkpoint/9d00edc2-b521-4c95-af16-317063b8d9f4/rdd-54424/part-00000


BIN
checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-111/.part-00000.crc


BIN
checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-111/part-00000


BIN
checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-149/.part-00000.crc


BIN
checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-149/part-00000


BIN
checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-73/.part-00000.crc


BIN
checkpoint/a60aaf54-41b3-4bf3-a36f-1d4e2bfd6811/rdd-73/part-00000


BIN
checkpoint/b988699d-ee81-4bcd-952b-224086d63c95/rdd-35/.part-00000.crc


BIN
checkpoint/b988699d-ee81-4bcd-952b-224086d63c95/rdd-35/part-00000


BIN
checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-331/.part-00000.crc


BIN
checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-331/part-00000


BIN
checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-368/.part-00000.crc


BIN
checkpoint/cbdbb1dc-1ae8-44b5-b718-7251b138f579/rdd-368/part-00000


BIN
checkpoint/checkpoint-1443881014000


BIN
checkpoint/checkpoint-1443881016000


BIN
checkpoint/checkpoint-1443881018000


BIN
checkpoint/checkpoint-1443881020000


BIN
checkpoint/checkpoint-1443881022000


BIN
checkpoint/checkpoint-1443881024000


BIN
checkpoint/checkpoint-1443881026000


BIN
checkpoint/checkpoint-1443881028000


BIN
checkpoint/checkpoint-1443881030000


BIN
checkpoint/checkpoint-1443881032000


BIN
checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-146/.part-00000.crc


BIN
checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-146/part-00000


BIN
checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-183/.part-00000.crc


BIN
checkpoint/d3dfaa31-7df6-4f72-86df-ff58fb72c825/rdd-183/part-00000


BIN
checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-553/.part-00000.crc


BIN
checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-553/part-00000


BIN
checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-590/.part-00000.crc


BIN
checkpoint/e8071275-9e96-47a5-83e6-5d6783e5b71a/rdd-590/part-00000


BIN
checkpoint/efe4f6e3-67ee-4ec8-9575-a2dc4360a290/rdd-51409/.part-00000.crc


Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.