소스 검색

kafka EPC

larry 10 년 전
부모
커밋
0697588a48
25개의 변경된 파일668개의 추가작업 그리고 46개의 파일을 삭제
  1. 1 0
      cep/ib_mds.py
  2. 12 17
      cep/md_std.py
  3. 196 0
      cep/md_std2.py
  4. 177 0
      cep/port_stream.py
  5. 25 10
      cep/t1.py
  6. 10 1
      comms/alert_bot.py
  7. BIN
      comms/alert_bot.pyc
  8. 81 4
      comms/epc.py
  9. BIN
      comms/epc.pyc
  10. 4 1
      config/app.cfg
  11. 2 2
      config/mds.cfg
  12. BIN
      finopt/finopt.pyc
  13. 19 3
      finopt/portfolio.py
  14. BIN
      finopt/portfolio.pyc
  15. 72 2
      finopt/test2.py
  16. 2 2
      html/opt-pos-chart-tmpl.html
  17. 16 0
      html/wpc-debug.html
  18. 16 0
      html/wpc-debug.html~
  19. 2 1
      sh/md_std.sh
  20. 13 0
      sh/md_std2.sh
  21. 13 0
      sh/port_stream.sh
  22. 2 2
      sh/run_mds.sh
  23. 0 0
      sh/start-alert.sh
  24. 4 0
      sh/stop-alert.sh
  25. 1 1
      sh/t1.sh

+ 1 - 0
cep/ib_mds.py

@@ -247,6 +247,7 @@ class IbKafkaProducer():
 
     def do_work(self):
         while not self.quit:
+            sleep(1)
             pass
         
     def run_forever(self):

+ 12 - 17
cep/md_std.py

@@ -48,11 +48,11 @@ def f1(time, rdd):
     print '**** f1'
     print lt
     print '**** end f1'
-    f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/std.txt', 'a')
+    f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/std-%s.txt' % datetime.datetime.now().strftime('%Y%m%d%H%M'), 'a')
     f.write(''.join('%s,%s,%s\n'%(s[0].strftime('%Y-%m-%d %H:%M:%S.%f'),s[1],s[2]) for s in lt))
     d = Q.value
-    if float(lt[0][1]) > 8.0:
-        msg = 'Stock SD alert triggered: '.join('%s,%s,%s\n'%(s[0].strftime('%Y-%m-%d %H:%M:%S.%f'),s[1],s[2]) for s in lt)
+    if float(lt[0][1]) > 4.6:
+        msg = 'Stock SD alert triggered: '.join('%s,%0.2f,%02.f\n'%(s[0].strftime('%Y-%m-%d %H:%M:%S.%f'),s[1],s[2]) for s in lt)
         print msg
         q = RedisQueue(d['alert_bot_q'][1], d['alert_bot_q'][0], d['host'], d['port'], d['db'])
         q.put(msg)
@@ -75,15 +75,6 @@ def f2(time, rdd):
     
 
 
- 
-
-
-       
-        
-       
-
-
-
     
 
 # to run from command prompt
@@ -104,7 +95,7 @@ if __name__ == "__main__":
     app_name = "std_deviation_analysis"
     sc = SparkContext(appName= app_name) #, pyFiles = ['./cep/redisQueue.py'])
     ssc = StreamingContext(sc, 2)
-    ssc.checkpoint('../checkpoint')
+    ssc.checkpoint('/home/larry-13.04/workspace/finopt/log/checkpoint')
 
     
 
@@ -132,6 +123,13 @@ if __name__ == "__main__":
             .filter(lambda x: (x['typeName'] == 'tickPrice' and x['contract'] == "HSI-20151029-0--FUT-HKD-102"))\
             .map(lambda x: (x['contract'], (x['ts'], x['price']) ))\
             .groupByKeyAndWindow(12, 10, 1)
+
+#     mdl = lns.map(lambda x: json.loads(x))\
+#             .filter(lambda x: (x['typeName'] == 'tickSize' and x['contract'] == "HSI-20151029-0--FUT-HKD-102"))\
+#             .map(lambda x: (x['contract'], (x['ts'], x['size']) ))\
+#             .groupByKeyAndWindow(12, 10, 1)
+
+
             
     s1 = mdl.map(lambda x: (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ), numpy.std([a[1] for a in x[1]]),\
                  numpy.mean([a[1] for a in x[1]])\
@@ -139,10 +137,7 @@ if __name__ == "__main__":
 
     s2 = s1.map(lambda x: (abs(x[2] - Q.value['cls']) / Q.value['cls'], x[2]))   
 
-#     s1 = lines.map(lambda line: json.loads(line)).filter(lambda x: (x['tickerId'] in [1,2] and x['typeName']== 'tickPrice'))\
-#                 .filter(lambda x: (x['field'] == 4))\
-#                 .map(lambda x: (x['tickerId'], x['price'])).reduceByKey(lambda x,y: (x+y)/2).groupByKeyAndWindow(30, 20, 1)    
-    
+
     
     s1.foreachRDD(f1)
     s2.foreachRDD(f2)

+ 196 - 0
cep/md_std2.py

@@ -0,0 +1,196 @@
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.kafka import KafkaUtils
+from numpy import *
+import pylab
+from scipy import stats
+import time, datetime
+import threading
+import time
+import os
+from finopt import ystockquote
+##
+##
+##
+## This example demonstrates the use of accumulators and broadcast 
+## and how to terminate spark running jobs
+## 
+## it also demonstrates how to send alerts via xmpp
+## (requires prosody server running and redisQueue)
+##
+##
+
+##
+##
+## insert the path so spark-submit knows where
+## to look for a file located in a given directory
+##
+## the other method is to export PYTHONPATH before 
+## calling spark-submit
+##
+# import sys
+# sys.path.insert(0, '/home/larry-13.04/workspace/finopt/cep')
+print sys.path
+
+
+#import optcal
+import json
+import numpy
+#from finopt.cep.redisQueue import RedisQueue
+from comms.redisQueue import RedisQueue
+from comms.alert_bot import AlertHelper
+
+
+def f1(time, rdd):
+    lt =  rdd.collect()
+    if not lt:
+        return
+    print '**** f1'
+    print lt
+    print '**** end f1'
+    f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/std-20151008.txt', 'a') # % datetime.datetime.now().strftime('%Y%m%d%H%M'), 'a')
+    msg = ''.join('%s,%s,%s,%s,%s\n'%(s[0], s[1][0][0].strftime('%Y-%m-%d %H:%M:%S.%f'),s[1][0][1],s[1][0][2], s[1][1]) for s in lt)
+    f.write(msg)
+    d = Q.value
+    
+    # return rdd tuple (-,((-,-),-)): name = 0--, time 100, sd 101, mean 102, vol 11-
+    
+    for s in lt:
+        if s[0].find('HSI-20151029-0') > 0 and (s[1][0][1] > 4.5 or s[1][1] > 100000):      
+            msg  = 'Unusal trading activity: %s (SD=%0.2f, mean px=%d, vol=%d) at %s\n'\
+                 % (s[0], \
+                    s[1][0][1], s[1][0][2],\
+                    s[1][1],\
+                    s[1][0][0].strftime('%m-%d %H:%M:%S'))   
+            q = RedisQueue(d['alert_bot_q'][1], d['alert_bot_q'][0], d['host'], d['port'], d['db'])
+            q.put(msg)
+            
+
+def f2(time, rdd):
+    lt =  rdd.collect()
+    if lt:
+        change = lt[0][0]
+        d = Q.value
+        print '********** f2'
+        print lt[0][0], Threshold.value, lt[0][1]
+        print '********** end f2'
+
+        
+        if change > Threshold.value:
+            msg = 'Stock alert triggered: %0.6f, mean: %0.2f' % (change, lt[0][1])
+            print msg
+#             q = RedisQueue(d['alert_bot_q'][1], d['alert_bot_q'][0], d['host'], d['port'], d['db'])
+#             q.put(msg)
+    
+
+
+    
+
+# to run from command prompt
+# 0. start kafka broker
+# 1. edit subscription.txt and prepare 2 stocks
+# 2. run ib_mds.py 
+# 3. spark-submit  --jars spark-streaming-kafka-assembly_2.10-1.4.1.jar ./alerts/pairs_corr.py vsu-01:2181 
+
+# http://stackoverflow.com/questions/3425439/why-does-corrcoef-return-a-matrix
+# 
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print("Usage: %s <broker_list ex: vsu-01:2181>  <rdd_name> <tick id> <fn name>" % sys.argv[0])
+        print("Usage: to gracefully shutdown type echo 1 > /tmp/flag at the terminal")
+        exit(-1)
+
+
+
+
+
+
+    p_rdd_window = 10
+    p_rdd_slide_window = 12
+    p_sc_window = 2
+    
+    p_price_deviate_percent = 0.02
+    
+
+
+
+    app_name = "std_deviation_analysis"
+    sc = SparkContext(appName= app_name) #, pyFiles = ['./cep/redisQueue.py'])
+    ssc = StreamingContext(sc, 2)
+    ssc.checkpoint('/home/larry-13.04/workspace/finopt/log/checkpoint')
+
+
+
+    brokers, qname, id, fn  = sys.argv[1:]
+    id = int(id)
+    
+    #
+    # demonstrate how to use broadcast variable
+    #
+    NumProcessed = sc.accumulator(0)
+    
+    cls = float(ystockquote.get_historical_prices('^HSI', '20151005', '20151005')[1][4])
+    
+    print 'closing price of HSI %f' % cls
+    
+    Q = sc.broadcast({'cls': cls, \
+                      'rname': 'rname', 'qname': qname, 'namespace': 'mdq', 'host': 'localhost', 'port':6379, 'db': 3, 'alert_bot_q': ('alert_bot', 'chatq')})
+    Threshold = sc.broadcast(0.25)
+    #kvs = KafkaUtils.createDirectStream(ssc, ['ib_tick_price', 'ib_tick_size'], {"metadata.broker.list": brokers})
+    kvs = KafkaUtils.createStream(ssc, brokers, app_name, {'ib_tick_price':1, 'ib_tick_size':1})
+
+    lns = kvs.map(lambda x: x[1])
+
+    mdp = lns.map(lambda x: json.loads(x))\
+            .filter(lambda x: (x['typeName'] == 'tickPrice'))\
+            .map(lambda x: (x['contract'], (x['ts'], x['price']) ))\
+            .groupByKeyAndWindow(12, 10, 1)
+
+    mds = lns.map(lambda x: json.loads(x))\
+            .filter(lambda x: (x['typeName'] == 'tickSize'))\
+            .map(lambda x: (x['contract'], x['size'] ))\
+            .reduceByKeyAndWindow(lambda x, y: (x + y), None, 12, 10, 1)
+    s1 = mdp.map(lambda x: (x[0], (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ), numpy.std([a[1] for a in x[1]]),\
+                 numpy.mean([a[1] for a in x[1]]))\
+                 )) 
+    
+    mds.pprint()            
+    sps = s1.join(mds)
+    sps.foreachRDD(f1)
+
+    sps.pprint()
+    #trades.foreachRDD(eval(fn))
+    
+        
+    def do_work():
+
+        while 1:
+            # program will stop after processing 40 rdds
+#             if NumProcessed.value == 70:
+#                 break            
+            # program will stop on detecting a 1 in the flag file
+            try:
+                f = open('/tmp/flag')
+                l = f.readlines()
+                print 'reading %s' % l[0]
+                if '1' in l[0]:
+                    os.remove('/tmp/flag') 
+                    print 'terminating..........'        
+                    ssc.stop(True, False) 
+                    sys.exit(0)          
+                f.close()
+                time.sleep(2)
+            except IOError:
+                continue
+            
+            
+        
+    t = threading.Thread(target = do_work, args=())
+    t.start()
+    ssc.start()
+    ssc.awaitTermination()
+    
+

+ 177 - 0
cep/port_stream.py

@@ -0,0 +1,177 @@
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.kafka import KafkaUtils
+from numpy import *
+import pylab
+from scipy import stats
+import time, datetime
+import threading
+import time
+import os
+#from finopt import ystockquote
+from comms.epc import ExternalProcessComm
+
+ 
+##
+##
+##
+## This example demonstrates the use of accumulators and broadcast 
+## and how to terminate spark running jobs
+## 
+## it also demonstrates how to send alerts via xmpp
+## (requires prosody server running and redisQueue)
+##
+##
+
+##
+##
+## insert the path so spark-submit knows where
+## to look for a file located in a given directory
+##
+## the other method is to export PYTHONPATH before 
+## calling spark-submit
+##
+# import sys
+# sys.path.insert(0, '/home/larry-13.04/workspace/finopt/cep')
+print sys.path
+
+
+#import optcal
+import json
+import numpy
+#from finopt.cep.redisQueue import RedisQueue
+from comms.redisQueue import RedisQueue
+from comms.alert_bot import AlertHelper
+
+
+def f1(time, rdd):
+    lt =  rdd.collect()
+    if not lt:
+        return
+    print '**** f1'
+    print lt
+    print '**** end f1'
+    f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/std-20151007.txt', 'a') # % datetime.datetime.now().strftime('%Y%m%d%H%M'), 'a')
+    msg = ''.join('%s,%s,%s,%s,%s\n'%(s[0], s[1][0][0].strftime('%Y-%m-%d %H:%M:%S.%f'),s[1][0][1],s[1][0][2], s[1][1]) for s in lt)
+    f.write(msg)
+    d = Q.value
+    
+    # return rdd tuple (-,((-,-),-)): name = 0--, time 100, sd 101, mean 102, vol 11-
+    
+    for s in lt:
+        if s[0].find('HSI-20151029-0') > 0 and (s[1][0][1] > 4.5 or s[1][1] > 100000):      
+            msg  = 'Unusal trading activity: %s (SD=%0.2f, mean px=%d, vol=%d) at %s\n'\
+                 % (s[0], \
+                    s[1][0][1], s[1][0][2],\
+                    s[1][1],\
+                    s[1][0][0].strftime('%m-%d %H:%M:%S'))   
+            q = RedisQueue(d['alert_bot_q'][1], d['alert_bot_q'][0], d['host'], d['port'], d['db'])
+            q.put(msg)
+            
+
+def f2(time, rdd):
+    lt =  rdd.collect()
+    if lt:
+        change = lt[0][0]
+        d = Q.value
+        print '********** f2'
+        print lt[0][0], Threshold.value, lt[0][1]
+        print '********** end f2'
+
+        
+        if change > Threshold.value:
+            msg = 'Stock alert triggered: %0.6f, mean: %0.2f' % (change, lt[0][1])
+            print msg
+#             q = RedisQueue(d['alert_bot_q'][1], d['alert_bot_q'][0], d['host'], d['port'], d['db'])
+#             q.put(msg)
+    
+
+
+    
+
+# to run from command prompt
+# 0. start kafka broker
+# 1. edit subscription.txt and prepare 2 stocks
+# 2. run ib_mds.py 
+# 3. spark-submit  --jars spark-streaming-kafka-assembly_2.10-1.4.1.jar ./alerts/pairs_corr.py vsu-01:2181 
+
+# http://stackoverflow.com/questions/3425439/why-does-corrcoef-return-a-matrix
+# 
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print("Usage: %s <broker_list ex: vsu-01:2181>  " % sys.argv[0])
+        print("Usage: to gracefully shutdown type echo 1 > /tmp/flag at the terminal")
+        exit(-1)
+
+
+    app_name = "portfolio.stream"
+    sc = SparkContext(appName= app_name) #, pyFiles = ['./cep/redisQueue.py'])
+    ssc = StreamingContext(sc, 5)
+    ssc.checkpoint('/home/larry-13.04/workspace/finopt/log/checkpoint')
+
+
+
+    brokers, qname, id, fn  = sys.argv[1:]
+    id = int(id)
+    
+
+    NumProcessed = sc.accumulator(0)
+    
+    
+
+    
+    Q = sc.broadcast({ \
+                      'rname': 'rname', 'qname': qname, 'namespace': 'mdq', 'host': 'localhost', 'port':6379, 'db': 3, 'alert_bot_q': ('alert_bot', 'chatq')})
+    
+    
+#     s = {v:1 for k,v in ExternalProcessComm.EPC_TOPICS.iteritems()}
+#     print s
+    kvs = KafkaUtils.createStream(ssc, brokers, app_name, \
+                                  {v:1 for k,v in ExternalProcessComm.EPC_TOPICS.iteritems()}) #{'ib_tick_price':1, 'ib_tick_size':1})
+
+    lns = kvs.map(lambda x: x[1]).map(lambda x: json.loads(x))
+    ps = lns.filter(lambda x: x)
+    lns.pprint()
+#     ps = lns.map(lambda x: json.loads(x))\
+#             .filter(lambda x: (x['typeName'] == 'tickPrice'))\
+#             .map(lambda x: (x['contract'], (x['ts'], x['price']) ))\
+#             .groupByKeyAndWindow(12, 10, 1)
+# 
+#     sps.foreachRDD(f1)
+# 
+#     sps.pprint()
+    #trades.foreachRDD(eval(fn))
+    
+        
+    def do_work():
+
+        while 1:
+            # program will stop after processing 40 rdds
+#             if NumProcessed.value == 70:
+#                 break            
+            # program will stop on detecting a 1 in the flag file
+            try:
+                f = open('/tmp/flag')
+                l = f.readlines()
+                print 'reading %s' % l[0]
+                if '1' in l[0]:
+                    os.remove('/tmp/flag') 
+                    print 'terminating..........'        
+                    ssc.stop(True, False) 
+                    sys.exit(0)          
+                f.close()
+                time.sleep(2)
+            except IOError:
+                continue
+            
+            
+        
+    t = threading.Thread(target = do_work, args=())
+    t.start()
+    ssc.start()
+    ssc.awaitTermination()
+    
+

+ 25 - 10
cep/t1.py

@@ -54,29 +54,44 @@ def process_msg_by_key(file):
                 
         #mds = mdl.filter(lambda x: (x['typeName'] == 'tickSize'  and x['contract'] in ["HSI-20151029-0--FUT-HKD-102"]))\
         mds = mdl.filter(lambda x: (x['typeName'] == 'tickSize'))\
-                .map(lambda x: (x['contract'], (x['ts'], x['size']) )).groupByKey()                
+                .map(lambda x: (x['contract'], x['size'] ) )\
+                .reduceByKey(lambda x, y: (x + y))              
         
         sdp = mdp.map(lambda x: (x[0],\
-                                 (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ),\
+                                 (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]   ),\
                                   numpy.std([a[1] for a in x[1]]),\
                                   numpy.mean([a[1] for a in x[1]]))\
                                 ))
         
-        sds = mds.map(lambda x: (x[0],\
-                                 (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ),\
-                                  numpy.std([a[1] for a in x[1]]),\
-                                  numpy.mean([a[1] for a in x[1]]))\
-                                )) 
+#         sds = mds.map(lambda x: (x[0],\
+#                                  (datetime.datetime.fromtimestamp( [a[0] for a in x[1]][0]  ),\
+#                                   numpy.std([a[1] for a in x[1]]),\
+#                                   numpy.mean([a[1] for a in x[1]]))\
+#                                 )) 
          
-        #print sds.take(2)
-        sdsp = sdp.cogroup(sds)
+        #sdsp = sdp.cogroup(sds)
+        sdsp = mds.join(sdp)
+        print sdsp.take(2)
         elems = sdsp.collect()
+        f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/sd-rdd.txt', 'a')
         for e in elems:            
-            print '%s %s %s' % (e[0], ''.join('[%s %0.2f %0.2f]'%(p[0],p[1],p[2]) for p in e[1][0]), ''.join('[%s %0.2f %0.2f]'%(p[0],p[1],p[2]) for p in e[1][1]))
+            s =  '%s,%s,%s' % (e[0], ''.join('[%s %0.2f %0.2f]'%(p[0],p[1],p[2]) for p in e[1][0]), ''.join('[%s %0.2f %0.2f]'%(p[0],p[1],p[2]) for p in e[1][1]))
+            print s
+            f.write(s + '\n')
         return sdsp 
     except:
         return 
     
+    
+def process_port(file):    
+    try:
+        md = sc.textFile(file)    
+        print file
+        mdl = md.map(lambda x: json.loads(x)).filter()
+            
+    except:
+        return
+    
 
 if __name__ == '__main__':
     

+ 10 - 1
comms/alert_bot.py

@@ -140,6 +140,13 @@ class AlertHelper():
     def post_msg(self, msg):
         self.q.put(msg)      
         
+    def flush_all(self):
+        i = 0
+        while not self.q.empty():
+            self.q.get()
+            i+=1
+        return i
+        
 
 if __name__ == '__main__':
     if len(sys.argv) != 2:
@@ -163,6 +170,8 @@ if __name__ == '__main__':
         xmpp.process(block=False)
         logging.info('Complete initialization...Bot will now run forever')
         a = AlertHelper(config)
-        a.post_msg('from AlertHelper: testing 123')
+        i = a.flush_all()
+        a.post_msg('from AlertHelper: flushed %d old messages.' % i)
+        
     else:
         print("Unable to connect.")

BIN
comms/alert_bot.pyc


+ 81 - 4
comms/epc.py

@@ -10,17 +10,54 @@ import time, datetime
 import sleekxmpp
 from threading import Lock
 from kafka.client import KafkaClient
-from kafka.consumer import SimpleConsumer
+from kafka import KafkaConsumer
 from kafka.producer import SimpleProducer
+from kafka.common import LeaderNotAvailableError
 import threading
 
+class EPCPub():
+    
+    producer = None
+
+    EPC_TOPICS= {'EPC_PORT_SUMMARY_TOPIC': 'port_summary', 
+                 'EPC_PORT_ITEM_TOPIC': 'port_item'}
+    
+    def __init__(self, config):
+
+        
+        host = config.get("epc", "kafka.host").strip('"').strip("'")
+        port = config.get("epc", "kafka.port")
+
+        client = KafkaClient('%s:%s' % (host, port))
+        self.producer = SimpleProducer(client, async=False)
+        
+                
+    def post_msg(self, topic, msg):
+        self.producer.send_messages(topic, msg)
+
 
+    def post_portfolio_summary(self, dict):
         
+        msg= (time.time(), ExternalProcessComm.EPC_TOPICS['EPC_PORT_SUMMARY_TOPIC'], dict)
+        
+        self.post_msg(ExternalProcessComm.EPC_TOPICS['EPC_PORT_SUMMARY_TOPIC'], json.dumps(msg))
+
+
+    def post_portfolio_items(self, ldict):
+        msg= (time.time(), ExternalProcessComm.EPC_TOPICS['EPC_PORT_ITEM_TOPIC'], ldict)
+        self.post_msg(ExternalProcessComm.EPC_TOPICS['EPC_PORT_ITEM_TOPIC'], json.dumps(msg))
+                             
+
+
+
         
 class ExternalProcessComm(threading.Thread):
     
     producer = None
     consumer = None
+    EPC_TOPICS= {'EPC_PORT_SUMMARY_TOPIC': 'port_summary', 
+                 'EPC_PORT_ITEM_TOPIC': 'port_item'}
+    
     def __init__(self, config):
 
         super(ExternalProcessComm, self).__init__()
@@ -29,17 +66,56 @@ class ExternalProcessComm(threading.Thread):
 
         client = KafkaClient('%s:%s' % (host, port))
         self.producer = SimpleProducer(client, async=False)
-        self.consumer = SimpleConsumer(client, "epc.group", "epc.topic")
+        #sleep(1)
+        
+        
+        print 'create EPC'
+        
+        
+        # the kafkaConsumer will fail with a no topic error if the topic is not found in the broker
+        # the next line uses the producer to produce the required topic which will create one 
+        # if it has not been created already
+        
+        [self.post_msg(v, 'init msg') for k,v in ExternalProcessComm.EPC_TOPICS.iteritems()] 
+        self.consumer = KafkaConsumer( *[(v,0) for k,v in ExternalProcessComm.EPC_TOPICS.iteritems()], \
+                                       metadata_broker_list=['%s:%s' % (host, port)],\
+                                       group_id = 'epc.group',\
+                                       auto_commit_enable=True,\
+                                       auto_commit_interval_ms=30 * 1000,\
+                                       auto_offset_reset='largest') # discard old ones
+#         https://kafka.apache.org/08/configuration.html
+#         What to do when there is no initial offset in Zookeeper or if an offset is out of range:
+#         * smallest : automatically reset the offset to the smallest offset
+#         * largest : automatically reset the offset to the largest offset
+#         * anything else: throw exception to the consumer. If this is set to largest, 
+#         the consumer may lose some messages when the number of partitions, for the topics 
+#         it subscribes to, changes on the broker. To prevent data loss during partition addition, set auto.offset.reset to smallest
                 
     def post_msg(self, topic, msg):
         self.producer.send_messages(topic, msg)
 
 
+    def post_portfolio_summary(self, dict):
+        msg= (time.time(), dict)
+        
+        self.post_msg(ExternalProcessComm.EPC_TOPICS['EPC_PORT_SUMMARY_TOPIC'], json.dumps(msg))
+
+
+    def post_portfolio_items(self, ldict):
+        msg= (time.time(), ldict)
+        self.post_msg(ExternalProcessComm.EPC_TOPICS['EPC_PORT_ITEM_TOPIC'], json.dumps(msg))
+                             
     def run(self):
+        
         for message in self.consumer:
             
-            logging.info(message)
+            logging.info("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
+                                         message.offset, message.key,
+                                         message.value))
 
+            print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
+                                         message.offset, message.key,
+                                         message.value))
 
 
 
@@ -58,4 +134,5 @@ if __name__ == '__main__':
 
     e = ExternalProcessComm(config)
     e.start()
-    e.post_msg('epc.topic', 'test msg')
+    
+    e.post_msg(ExternalProcessComm.EPC_TOPICS['EPC_PORT_SUMMARY_TOPIC'], 'test msg')

BIN
comms/epc.pyc


+ 4 - 1
config/app.cfg

@@ -36,9 +36,12 @@ options_data.logconfig: "{'filename': '/home/larry-13.04/workspace/finopt/log/op
 
 [portfolio]
 portfolio.logconfig: "{'filename': '/home/larry-13.04/workspace/finopt/log/port.log', 'filemode': 'w','level': logging.INFO}"
+portfolio.epc: "{'stream_to_Kafka': True}"   
+#, 'msg_port_summary':{'topic': 'port_summary'}, 'msg_port_line':{'topic': 'port_line'}}"
 
 [opt_serve]
-opt_serve.logconfig: "{'filename': '/home/larry-13.04/workspace/finopt/log/opt.log', 'filemode': 'w','level': logging.INFO}"
+opt_serve.logconfig: "{'filename': '/home/larry-13.04/workspace/finopt/log/serve.log', 'filemode': 'w','level': logging.INFO}"
+
 
 [cep]
 kafka.host: 'vsu-01'

+ 2 - 2
config/mds.cfg

@@ -25,7 +25,7 @@ msg_bot.logconfig: "{'level': logging.INFO}"
 
 [ib_mds]
 ib_mds.logconfig: "{'filename': '/home/larry-13.04/workspace/finopt/log/ib_mds.log', 'filemode': 'w','level': logging.INFO}"
-ib_mds.ib_port: 8496
+ib_mds.ib_port: 7496
 #ib_mds.ib_port: 4001
 ib_mds.appid.id: 9800
 ib_mds.gateway: 'localhost'
@@ -38,7 +38,7 @@ ib_mds.spill_over_limit: 10000
 [ib_heartbeat]
 ib_heartbeat.logconfig: "{'filename': '/home/larry-13.04/workspace/finopt/log/ib_mds.log', 'filemode': 'w','level': logging.INFO}"
 #ib_heartbeat.ib_port: 4001
-ib_heartbeat.ib_port: 8496
+ib_heartbeat.ib_port: 7496
 ib_heartbeat.appid.id: 9911
 ib_heartbeat.gateway: 'localhost'
 #ib_heartbeat.gateway: '192.168.1.118'

BIN
finopt/finopt.pyc


+ 19 - 3
finopt/portfolio.py

@@ -14,6 +14,7 @@ import optcal
 import opt_serve
 import cherrypy
 import redis
+from comms.epc import EPCPub
 
 
 
@@ -55,6 +56,7 @@ class PortfolioManager():
     rs_port_keys = {}
     ib_port_msg = []
     tlock = None
+    epc = None
     
     def __init__(self, config):
         self.config = config
@@ -66,7 +68,10 @@ class PortfolioManager():
         self.rs_port_keys['port_conid_set'] = config.get("redis", "redis.datastore.key.port_conid_set").strip('"').strip("'")
         self.rs_port_keys['port_prefix'] = config.get("redis", "redis.datastore.key.port_prefix").strip('"').strip("'")        
         self.rs_port_keys['port_summary'] = config.get("redis", "redis.datastore.key.port_summary").strip('"').strip("'")
-        
+        self.epc = eval(config.get("portfolio", "portfolio.epc").strip('"').strip("'"))
+        # instantiate a epc object if the config says so
+        if self.epc['stream_to_Kafka']:
+            self.epc['epc'] = EPCPub(config) 
         
         r_host = config.get("redis", "redis.server").strip('"').strip("'")
         r_port = config.get("redis", "redis.port")
@@ -434,9 +439,19 @@ class PortfolioManager():
         pos_summary['entries_skipped'] = l_skipped_pos
         pos_summary['status'] = 'OK' if len(l_skipped_pos) == 0 else 'NOT_OK'
         #self.r_set(self.rs_port_keys['port_summary'], json.dumps(pos_summary) )
-        self.r_conn.set(self.rs_port_keys['port_summary'], json.dumps(pos_summary) )
+        t_pos_summary = json.dumps(pos_summary)
+        self.r_conn.set(self.rs_port_keys['port_summary'], t_pos_summary )
+  
+        #print pos_summary
+        #print l_gmap      
+        # broadcast 
+        if self.epc['epc']:
+            
+            self.epc['epc'].post_portfolio_summary(pos_summary)
+            self.epc['epc'].post_portfolio_items(l_gmap)
+        
 
-        logging.info(pos_summary)
+        #logging.info(pos_summary)
         
         logging.warn('-------------- Entries for which the greeks are not computed!! %s' %\
                         ','.join(' %s' % k for k in l_skipped_pos))
@@ -555,3 +570,4 @@ if __name__ == '__main__':
     
          
     
+

BIN
finopt/portfolio.pyc


+ 72 - 2
finopt/test2.py

@@ -1,10 +1,11 @@
 import redis, json
-from finopt.cep.redisQueue import RedisQueue
+from comms import redisQueue
 from numpy import *
 import pylab
 import ystockquote
 from datetime import datetime
 from scipy import stats
+from os.path import isfile, join
 
 def f1():
     pall = set(rs.keys(pattern='PT_*'))
@@ -173,8 +174,75 @@ def extrapolate2(ric):
     return (ric, score)
 
 
+def stdan(path, tday):
+    #f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/std20151005.txt')
+    pylab.switch_backend('agg') # switch to agg backend that support writing in non-main threads    
+    f = open(join(path, '%s.txt' % tday))
+    l = f.readlines()
+    m = map(lambda x: (x.split(',')), l)
+    q = filter(lambda y: y[0] in ['HSI-20151029-0--FUT-HKD-102'] and y[1] > '2015-10-06 08:55:34' , m)
+    n = filter(lambda y: float(y[3]) > 21500.0, q)
+    
+    p = map(lambda y: float(y[2]) if float(y[2]) < 15.0 else 0.0, n)
+    
+    yy = map(lambda y: float(y[3]), n)
+    xx = map(lambda x: datetime.strptime(x[1], '%Y-%m-%d %H:%M:%S.%f'), n)
+    
+    print len(p), len(yy)
+    pylab.figure(figsize=(20,10))
+    pylab.figure(1)
+    pylab.subplot(211)
+    pylab.plot(xx,yy, 'g-' )
+    pylab.subplot(212)
+    pylab.plot(xx,p, 'ro')
+    #pylab.axis(['20150930', '20151001', 20000, 22000])
+    
+    pylab.show()
+    pylab.savefig('%s/std-%s.png' % (path, tday))
+    pylab.close()
+    
+    
+def stdan2(path, tday):
+    #f = open('/home/larry/l1304/workspace/finopt/data/mds_files/std/std20151005.txt')
+    #pylab.switch_backend('agg') # switch to agg backend that support writing in non-main threads    
+    f = open(join(path, '%s.txt' % tday))
+    l = f.readlines()
+    m = map(lambda x: (x.split(',')), l)
 
+    pylab.figure(figsize=(20,10))
+    l_legend = []
+    for strike in range(22000, 23000, 200):
+        right = 'C'    
+        q = filter(lambda y: y[0] in ['HSI-20151029-%s-%s-OPT-HKD-102' % (strike, right)] and y[1] > '2015-10-07 08:55:34' , m)
+        #n = filter(lambda y: float(y[3]) > 21500.0, q)
+    
+        p = map(lambda y: float(y[2]) if float(y[2]) < 20.0 else 0.0, q) #n)
+        
+        yy = map(lambda y: float(y[3]), q) #n)
+        xx = map(lambda x: datetime.strptime(x[1], '%Y-%m-%d %H:%M:%S.%f'), q) #n)
+        
+        print len(p), len(yy)
         
+        pylab.figure(1)
+        pylab.subplot(211)
+        p1, = pylab.plot(xx,yy, label = '%s%s' % (strike, right)) #, 'g-' )
+        
+        pylab.subplot(212)
+        p2, = pylab.plot(xx,p, 'o', label = '%s%s' % (strike, right)) #, 'ro')
+        
+        
+        l_legend.append(p1)
+        l_legend.append(p2)
+        #pylab.axis(['20150930', '20151001', 20000, 22000])
+        
+        
+        #pylab.savefig('%s/std-%s.png' % (path, tday))
+        #pylab.close()
+    print l_legend
+    pylab.legend(handles = l_legend )            
+    pylab.show()
+    
+    
 def mark6():
     
     
@@ -262,7 +330,9 @@ if __name__ == '__main__':
 #    mark6()
 
 #    analyze()
-    
+
+
+    stdan2('/home/larry/l1304/workspace/finopt/data/mds_files/std/', 'std-20151007')
     
     
 

+ 2 - 2
html/opt-pos-chart-tmpl.html

@@ -3,8 +3,8 @@
   <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
     <script type="text/javascript" src="https://www.google.com/jsapi"></script>
     <script type="text/javascript">
-      google.load("visualization", "1.1", {packages:["bar"]});
-      google.load('visualization', '1', {'packages':['table']});
+      google.load("visualization", "1.1", {packages:["bar", 'table']});
+//      google.load('visualization', '1', {'packages':['table']});
 //      google.load("visualization", "1.1", {packages:["corechart"]});
       google.setOnLoadCallback(drawChart);
       function drawChart() {

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 16 - 0
html/wpc-debug.html


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 16 - 0
html/wpc-debug.html~


+ 2 - 1
sh/md_std.sh

@@ -3,10 +3,11 @@ ROOT=/home/larry-13.04/workspace/finopt
 SRC=$ROOT/src
 KAFKA_ASSEMBLY_JAR=$ROOT/src/jar/spark-streaming-kafka-assembly_2.10-1.4.1.jar
 export PYTHONPATH=$SRC:$PYTHONPATH
+OPTIONS="--driver-memory 2g"
 
 #spark-submit  --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 cal_trend 
 #spark-submit --master spark://192.168.1.118:7077   --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 simple 
 #spark-submit --total-executor-cores 2 --master spark://192.168.1.118:7077   --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 cal_trend 
-spark-submit   --jars  $KAFKA_ASSEMBLY_JAR $SRC/cep/md_std.py vsu-01:2181 hsi 1 cal_trend 
+spark-submit   --driver-memory 2g --jars  $KAFKA_ASSEMBLY_JAR $SRC/cep/md_std.py vsu-01:2181 hsi 1 cal_trend 
 #spark-submit  --jars  $KAFKA_ASSEMBLY_JAR $SRC/cep/t1.py
 

+ 13 - 0
sh/md_std2.sh

@@ -0,0 +1,13 @@
+#!/bin/bash
+ROOT=/home/larry-13.04/workspace/finopt
+SRC=$ROOT/src
+KAFKA_ASSEMBLY_JAR=$ROOT/src/jar/spark-streaming-kafka-assembly_2.10-1.4.1.jar
+export PYTHONPATH=$SRC:$PYTHONPATH
+OPTIONS="--driver-memory 2g"
+
+#spark-submit  --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 cal_trend 
+#spark-submit --master spark://192.168.1.118:7077   --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 simple 
+#spark-submit --total-executor-cores 2 --master spark://192.168.1.118:7077   --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 cal_trend 
+spark-submit   $OPTIONS --jars  $KAFKA_ASSEMBLY_JAR $SRC/cep/md_std2.py vsu-01:2181 hsi 1 cal_trend 
+#spark-submit  --jars  $KAFKA_ASSEMBLY_JAR $SRC/cep/t1.py
+

+ 13 - 0
sh/port_stream.sh

@@ -0,0 +1,13 @@
+#!/bin/bash
+ROOT=/home/larry-13.04/workspace/finopt
+SRC=$ROOT/src
+KAFKA_ASSEMBLY_JAR=$ROOT/src/jar/spark-streaming-kafka-assembly_2.10-1.4.1.jar
+export PYTHONPATH=$SRC:$PYTHONPATH
+OPTIONS="--driver-memory 2g"
+
+#spark-submit  --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 cal_trend 
+#spark-submit --master spark://192.168.1.118:7077   --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 simple 
+#spark-submit --total-executor-cores 2 --master spark://192.168.1.118:7077   --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 cal_trend 
+spark-submit   $OPTIONS --jars  $KAFKA_ASSEMBLY_JAR $SRC/cep/port_stream.py vsu-01:2181 hsi 1 cal_trend 
+#spark-submit  --jars  $KAFKA_ASSEMBLY_JAR $SRC/cep/t1.py
+

+ 2 - 2
sh/run_mds.sh

@@ -2,7 +2,7 @@
 ROOT=$FINOPT_HOME
 export PYTHONPATH=$FINOPT_HOME:$PYTHONPATH
 # real time mode
-#python $FINOPT_HOME/cep/ib_mds.py $FINOPT_HOME/config/mds.cfg
+python $FINOPT_HOME/cep/ib_mds.py $FINOPT_HOME/config/mds.cfg
 # replay mode
-python $FINOPT_HOME/cep/ib_mds.py -r $FINOPT_HOME/../data/mds_files/large_up_1002 $FINOPT_HOME/config/mds.cfg
+#python $FINOPT_HOME/cep/ib_mds.py -r $FINOPT_HOME/../data/mds_files/20151006 $FINOPT_HOME/config/mds.cfg
 

+ 0 - 0
sh/alert.sh → sh/start-alert.sh


+ 4 - 0
sh/stop-alert.sh

@@ -0,0 +1,4 @@
+#!/bin/bash
+ps ax | grep -i 'alert' | grep python | grep -v grep | awk '{print $1}' | xargs kill -SIGTERM
+
+

+ 1 - 1
sh/t1.sh

@@ -3,7 +3,7 @@
 ROOT=$FINOPT_HOME
 FINDATA=$ROOT/../data 
 SRC=$ROOT
-KAFKA_ASSEMBLY_JAR=$ROOT/src/jar/spark-streaming-kafka-assembly_2.10-1.4.1.jar
+KAFKA_ASSEMBLY_JAR=$ROOT/jar/spark-streaming-kafka-assembly_2.10-1.4.1.jar
 export PYTHONPATH=$SRC:$PYTHONPATH
 
 #spark-submit  --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/cep/momentum.py vsu-01:2181 hsi 1 cal_trend 

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.