9 tahun lalu · a0f6ddf997
--- a/alerts/__init__.py
+++ b/alerts/__init__.py
--- a/alerts/fund_alerts.py
+++ b/alerts/fund_alerts.py
@@ -0,0 +1,159 @@
 
				+from bs4 import BeautifulSoup
			
 
				+from urllib2 import urlopen
			
 
				+from time import strftime
			
 
				+import time
			
 
				+
			
 
				+def send_email(user, pwd, recipient, subject, body):
			
 
				+    import smtplib
			
 
				+
			
 
				+    gmail_user = user
			
 
				+    gmail_pwd = pwd
			
 
				+    FROM = user
			
 
				+    TO = recipient if type(recipient) is list else [recipient]
			
 
				+    SUBJECT = subject
			
 
				+    TEXT = body
			
 
				+
			
 
				+    # Prepare actual message
			
 
				+    message = """\From: %s\nTo: %s\nSubject: %s\n\n%s
			
 
				+    """ % (FROM, ", ".join(TO), SUBJECT, TEXT)
			
 
				+    try:
			
 
				+        server = smtplib.SMTP("smtp.gmail.com", 587)
			
 
				+        server.ehlo()
			
 
				+        server.starttls()
			
 
				+        server.login(gmail_user, gmail_pwd)
			
 
				+        server.sendmail(FROM, TO, message)
			
 
				+        server.close()
			
 
				+        print 'successfully sent the mail'
			
 
				+    except:
			
 
				+        print "failed to send mail"
			
 
				+
			
 
				+
			
 
				+def allianz():
			
 
				+    try:
			
 
				+        url = 'http://www.allianzgi.hk/en/retail/fund-prices?type=&sort=&order=&page_no=&action=&fund_series=3&fund_name=74&fund_class=SHARE_CLASS_RETAIL_CLASS_AM&fund_currency=CCY_DISPLAY_H2_AUD&fund_name_text='
			
 
				+        html = urlopen(url).read()
			
 
				+        soup = BeautifulSoup(html, 'html5lib')
			
 
				+        lookups = {'unit_price': 'Unit Price','mth_nav': '12 month NAV', 'daily_change_pct': 'daily chg%','valuation_date': 'valuation date'}
			
 
				+        tvs= soup.findAll('td', [k for k in lookups.keys()])
			
 
				+        fund = soup.findAll('td', 'fund_name')[0].a.text
			
 
				+        purchase_px = 10.07
			
 
				+        curr_px = float(soup.find('td', 'unit_price').text)
			
 
				+        percent_chg = (curr_px - purchase_px) / purchase_px * 100
			
 
				+        
			
 
				+        s= '*****************************************\n'
			
 
				+        s+= '      %s\n' % fund
			
 
				+        s+= '\n'
			
 
				+        s+= ', '.join('%s: %s' % (lookups[e.get('class') [0]] if e.get('class')[0] in lookups else 'Daily Chg%', e.text) for e in tvs)
			
 
				+        s+= '\n\n'
			
 
				+        s+= '      purchase price = %0.2f (%0.2f%%)\n' % (purchase_px, percent_chg)
			
 
				+        s+= '\n'
			
 
				+        s+= '\n'
			
 
				+        s+= ' %s\n' % url
			
 
				+        s+= '\n'
			
 
				+        s+= '*****************************************\n'
			
 
				+        return s 
			
 
				+    
			
 
				+        url = 'http://finance.sina.com.cn/fund/quotes/000011/bc.shtml'
			
 
				+        html = urlopen(url).read()
			
 
				+        soup = BeautifulSoup(html, 'html5lib')
			
 
				+    #     lookups = {'unit_price': 'Unit Price','mth_nav': '12 month NAV', 'daily_change_pct': 'daily chg%','valuation_date': 'valuation date'}
			
 
				+    #     fundname = soup.findAll('div', {'class': 'top_fixed_fund_name'})
			
 
				+    #     funddiv = soup.findAll('div', {'class': 'top_fixed_fund_dwjz'})
			
 
				+    
			
 
				+        blk = soup.findAll('div', {'class': 'fund_info_blk2'})
			
 
				+        fundpx = blk[0].findAll('div', {'class': 'fund_data_item'})[0].find('span', {'class':'fund_data'}).text
			
 
				+
			
 
				+    except:
			
 
				+        return 'error extracting allianz fund price'
			
 
				+    return '%s' % (fundpx)
			
 
				+
			
 
				+def cn_huaxia():
			
 
				+    try:
			
 
				+        url = 'http://finance.sina.com.cn/fund/quotes/000011/bc.shtml'
			
 
				+        html = urlopen(url).read()
			
 
				+        soup = BeautifulSoup(html, 'html5lib')
			
 
				+    #     lookups = {'unit_price': 'Unit Price','mth_nav': '12 month NAV', 'daily_change_pct': 'daily chg%','valuation_date': 'valuation date'}
			
 
				+    #     fundname = soup.findAll('div', {'class': 'top_fixed_fund_name'})
			
 
				+    #     funddiv = soup.findAll('div', {'class': 'top_fixed_fund_dwjz'})
			
 
				+    
			
 
				+        blk = soup.findAll('div', {'class': 'fund_info_blk2'})
			
 
				+        fundpx = blk[0].findAll('div', {'class': 'fund_data_item'})[0].find('span', {'class':'fund_data'}).text
			
 
				+    
			
 
				+    
			
 
				+        return 'huaxia %s' % (fundpx)
			
 
				+    except:
			
 
				+        return 'error extracting cn fund price'
			
 
				+
			
 
				+
			
 
				+def bct_funds():
			
 
				+    
			
 
				+    try:
			
 
				+        url = 'http://www.bcthk.com/BCT/html/eng/page/WMP0240/FIF0100/fund.jsp'
			
 
				+        html = urlopen(url).read()
			
 
				+        soup = BeautifulSoup(html, 'html5lib')
			
 
				+        lookups = ['BCT (Pro) Absolute Return Fund', 'BCT (Pro) Global Bond Fund', 'BCT (Pro) Hong Kong Dollar Bond Fund', 'BCT (Pro) MPF Conservative Fund']
			
 
				+    #     for e in soup.findAll('a', 'green03'):
			
 
				+    #         print e.text 
			
 
				+            
			
 
				+        anchors= filter(lambda x: x.text[:(x.text.find('Fund')+4)] in lookups, soup.findAll('a', 'green03'))
			
 
				+        
			
 
				+        def fundinfo_extract(felem):
			
 
				+            node = felem.parent.parent.findAll('td')
			
 
				+            e = {}
			
 
				+            e['name'] = felem.text[:(felem.text.find('Fund')+4)]
			
 
				+            e['bid'] = node[2].text
			
 
				+            e['ask'] = node[3].text
			
 
				+            return e
			
 
				+        finfo=  map(fundinfo_extract, anchors)
			
 
				+        return ''.join('%s:%s\n' % (e['name'], e['ask']) for e in finfo)
			
 
				+    except:
			
 
				+        return 'error extracting bct fund price'
			
 
				+    
			
 
				+    
			
 
				+def send_daily_alert():
			
 
				+    user='cigarbar@gmail.com'
			
 
				+    pwd='taipeii0i'
			
 
				+    recipient='larry1chan@gmail.com'
			
 
				+    
			
 
				+    body = '%s\n%s\n%s' % (allianz(), cn_huaxia(), bct_funds() )
			
 
				+    subject='Daily fund price' 
			
 
				+    send_email(user, pwd, recipient, subject, body)
			
 
				+
			
 
				+
			
 
				+def retrieve_hk_holidays(year):
			
 
				+    month_names = ['January',
			
 
				+                    'February',
			
 
				+                    'March',
			
 
				+                    'April',
			
 
				+                    'May',
			
 
				+                    'June',
			
 
				+                    'July',
			
 
				+                    'August',
			
 
				+                    'September',
			
 
				+                    'October',
			
 
				+                    'November',
			
 
				+                    'December',
			
 
				+                    ]
			
 
				+    try:
			
 
				+        url = 'http://www.gov.hk/en/about/abouthk/holiday/{{year}}.htm'
			
 
				+        url = url.replace('{{year}}', str(year))
			
 
				+        html = urlopen(url).read()
			
 
				+        soup = BeautifulSoup(html, 'html5lib')
			
 
				+        
			
 
				+        tds = soup.findAll('h3')[0].parent.findAll('td', 'date')
			
 
				+        
			
 
				+        d1 = map(lambda x: (x.text.split(' ')[0], x.text.split(' ')[1]), tds[1:])
			
 
				+        return map(lambda x: strftime('%Y%m%d', time.strptime('%s %s %s' % (month_names.index(x[1])+1, x[0], '2015'), "%m %d %Y")), d1)
			
 
				+    except:
			
 
				+        print 'error'
			
 
				+
			
 
				+if __name__ == '__main__':      
			
 
				+    #send_daily_alert()
			
 
				+    print retrieve_hk_holidays(2015)
			
 
				+     
			
 
				+#     print allianz()
			
 
				+#     
			
 
				+#     print cn_huaxia()
			
 
				+#     print bct_funds()
			
 
				+    
			
 
				+    
			
--- a/alerts/nb_test.py
+++ b/alerts/nb_test.py
@@ -0,0 +1,46 @@
 
				+from pyspark import SparkContext
			
 
				+from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
			
 
				+from pyspark.mllib.linalg import Vectors
			
 
				+from pyspark.mllib.regression import LabeledPoint
			
 
				+
			
 
				+
			
 
				+def parseLine(line):
			
 
				+    parts = line.split(',')
			
 
				+    label = float(parts[0])
			
 
				+    features = Vectors.dense([float(x) for x in parts[1].split(' ')])
			
 
				+    return LabeledPoint(label, features)
			
 
				+
			
 
				+
			
 
				+def train():
			
 
				+    sc = SparkContext(appName= 'nb_test')    
			
 
				+    data = sc.textFile('../../data/mllib/sample_naive_bayes_data.txt').map(parseLine)
			
 
				+    
			
 
				+    # Split data aproximately into training (60%) and test (40%)
			
 
				+    training, test = data.randomSplit([0.6, 0.4], seed=0)
			
 
				+    print training.collect()
			
 
				+    # Train a naive Bayes model.
			
 
				+    model = NaiveBayes.train(training, 1.0)
			
 
				+    predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
			
 
				+    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
			
 
				+    print accuracy
			
 
				+    # Save and load model
			
 
				+    #model.save(sc, "../../target/myNaiveBayesModel")
			
 
				+
			
 
				+
			
 
				+def predict():
			
 
				+# Make prediction and test accuracy.
			
 
				+    sc = SparkContext(appName= 'nb_test')    
			
 
				+    sameModel = NaiveBayesModel.load(sc, "../../target/myNaiveBayesModel")
			
 
				+    data = sc.textFile('../../data/mllib/sample_naive_bayes_data.txt').map(parseLine)
			
 
				+    
			
 
				+    # Split data aproximately into training (60%) and test (40%)
			
 
				+    training, test = data.randomSplit([0.1, 0.9], seed=0)
			
 
				+    print test.collect()
			
 
				+    predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
			
 
				+    print predictionAndLabel.collect()
			
 
				+    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
			
 
				+    print accuracy
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    predict()
			
--- a/finopt/opt_serve.py
+++ b/finopt/opt_serve.py
@@ -19,6 +19,7 @@ from comms.alert_bot import AlertHelper
 
				 from ws4py.server.cherrypyserver import WebSocketPlugin, WebSocketTool
			
 
				 from ws4py.websocket import WebSocket
			
 
				 from ws4py.websocket import EchoWebSocket
			
 
				+from sets import Set
			
 
				 import thread
			
 
				 
			
 
				 
			
@@ -40,7 +41,7 @@ class QServer(object):
 
				     @cherrypy.expose
			
 
				     def index(self):
			
 
				         
			
 
				-        s_line = 'welcome!'
			
 
				+        #s_line = 'welcome!'
			
 
				         #r_host = cherrypy.request.app.config['redis']['redis.server']
			
 
				         #r_port = cherrypy.request.app.config['redis']['redis.port']
			
 
				         #r_db = cherrypy.request.app.config['redis']['redis.db']
			
@@ -49,14 +50,20 @@ class QServer(object):
 
				         #rs = redis.Redis(r_host, r_port, r_db)
			
 
				         rs = QServer.r_conn
			
 
				         s_line = rs.info()
			
 
				+	
			
 
				         html =''
			
 
				         for k, v in cherrypy.request.app.config.iteritems():
			
 
				             html = html + '<dt>%s</dt><dd>%s</dd>' % (k, v)
			
 
				        
			
 
				-        impl_link = "<a href='./opt_implv'>options implied vol curves</a>"
			
 
				-        pos_link = "<a href=./ws_position_chart>Positions</a>" 
			
 
				-        bubble_link = "<a href=./port_bubble_chart>Risk Distributions</a>"
			
 
				-        return """<html><body><li>%s</li><li>%s</li><li>%s</li><br><dl>%s</dl></br>%s</body></html>""" % (bubble_link, impl_link, pos_link, html, s_line)
			
 
				+        impl_link = "<a href=./opt_implv><img src='public/chart.png' width='42' height=42'/>options implied vol curves</a>"
			
 
				+        pos_link = "<a href=./ws_position_chart><img src='public/moneyup.png' width='42' height='42'/>Positions</a>" 
			
 
				+        stackpos_link = "<a href=./ws_position_chart_ex><img src='public/scale.png' width='42' height='42' />Positions (Stacked View)</a>"
			
 
				+        bubble_link = "<a href=./port_bubble_chart><img src='public/Market-Risk-Icon.png' width='42' height='42' />Risk Distributions</a>"
			
 
				+
			
 
				+	html = ''
			
 
				+	s_line = ''
			
 
				+
			
 
				+        return """<html><body><li>%s</li><li>%s</li><li>%s</li><li>%s</li><br><dl>%s</dl></br>%s</body></html>""" % (bubble_link, impl_link, pos_link, stackpos_link, html, s_line)
			
 
				  
			
 
				  
			
 
				     @cherrypy.expose
			
@@ -315,6 +322,104 @@ class QServer(object):
 
				         
			
 
				         
			
 
				         return html_tmpl
			
 
				+    
			
 
				+    #
			
 
				+    # ws_position_chart_ex
			
 
				+    #
			
 
				+    # 
			
 
				+    # this is an extended version of ws_position_chart
			
 
				+    # shows options by month, strikes, right instead of just strikes and right
			
 
				+    # 2016-03-23
			
 
				+    def generate_garray(self, plist):
			
 
				+        
			
 
				+        
			
 
				+        # generate a key map with month-right-strike
			
 
				+        # example: ('20160330-C-20000', 0.2),...
			
 
				+        
			
 
				+        klist = map(lambda x: ('%s-%s-%s' % (x[2], x[3], x[4]), float(x[5])/50.0*float(x[6])), plist)
			
 
				+        # for e in sorted(klist):
			
 
				+        #     print e
			
 
				+        
			
 
				+        # get the unique keys in klist
			
 
				+        unique_keys= Set(map(lambda x:x[0], klist))
			
 
				+        strikes =[e for e in Set(map(lambda x:x[4], plist))]
			
 
				+        # sort the months in ascending order
			
 
				+        months = sorted([e for e in Set(map(lambda x:x[2], plist))])
			
 
				+	print klist
			
 
				+        print strikes
			
 
				+        # print months
			
 
				+        # print len(klist), len(s)
			
 
				+        
			
 
				+        # group and sum position by month, strike, right
			
 
				+        grouped_pos = []
			
 
				+        for elem in unique_keys:
			
 
				+            grp1 = filter(lambda x: x[0] == elem, klist)
			
 
				+            print grp1
			
 
				+            # sum items with same key
			
 
				+            # example: [('20160330-P-19600', -1.0), ('20160330-P-19600', 0.2)]
			
 
				+            grouped_pos.append( grp1[0] if len(grp1) == 1 else reduce(lambda x,y: (x[0], x[1]+y[1]), grp1) )
			
 
				+            print '---'
			
 
				+        
			
 
				+        print grouped_pos    
			
 
				+            
			
 
				+        garr = {}
			
 
				+        def init_garray(x):
			
 
				+            garr[x] = {}
			
 
				+        map(init_garray, sorted(strikes))
			
 
				+        print garr
			
 
				+        
			
 
				+        def set_garray(x):
			
 
				+            vals = x[0].split(('-'))
			
 
				+            
			
 
				+            if vals[0] == months[0]:
			
 
				+                
			
 
				+                if vals[1] == 'C':
			
 
				+                    garr[vals[2]]['NEAR_C'] = x[1]
			
 
				+                else:
			
 
				+                    garr[vals[2]]['NEAR_P'] = x[1]
			
 
				+            elif vals[0] == months[1]:
			
 
				+        
			
 
				+                if vals[1] == 'C':
			
 
				+                    garr[vals[2]]['FAR_C'] = x[1]
			
 
				+                else:
			
 
				+                    garr[vals[2]]['FAR_P'] = x[1]
			
 
				+                  
			
 
				+        # find all C of near month
			
 
				+        map(set_garray, grouped_pos)
			
 
				+        print garr
			
 
				+        s=''
			
 
				+        for k, v in garr.iteritems():
			
 
				+            s+= '[%s, %s,%s,%s,%s],' % (k, v['NEAR_P'] if 'NEAR_P' in v else '0',
			
 
				+                                         v['NEAR_C'] if 'NEAR_C' in v else '0',  
			
 
				+                                         v['FAR_P'] if 'FAR_P' in v else '0', 
			
 
				+                                         v['FAR_C'] if 'FAR_C' in v else '0', )
			
 
				+        return s    
			
 
				+    
			
 
				+    
			
 
				+    @cherrypy.expose
			
 
				+    def ws_position_chart_ex(self):
			
 
				+        p = portfolio.PortfolioManager(config)
			
 
				+        p.retrieve_position()
			
 
				+        opt_pos_chart_tmpl = '%s%s/opt-pos-chart-stacked-tmpl.html' % (cherrypy.request.app.config['/']['tools.staticdir.root'], cherrypy.request.app.config['/static']['tools.staticdir.tmpl'])
			
 
				+        f = open(opt_pos_chart_tmpl)
			
 
				+        html_tmpl = f.read()
			
 
				+
			
 
				+        html_tmpl = html_tmpl.replace('{{{dataPCpos}}}', self.generate_garray(p.get_tbl_pos_list()))
			
 
				+        
			
 
				+        html_tmpl = html_tmpl.replace('{{{dataTablePos}}}', p.get_tbl_pos_csv())
			
 
				+        
			
 
				+        html_tmpl = html_tmpl.replace('{{{option_months}}}', ''.join(('%s, ' % m) for m in p.get_traded_months()))
			
 
				+        v = p.group_pos_by_right()
			
 
				+        html_tmpl = html_tmpl.replace('{{{PRvsCR}}}}', '%0.2f : %0.2f' % (v[0][1], v[1][1]))
			
 
				+        
			
 
				+        #print p.get_portfolio_summary()
			
 
				+        #html_tmpl = html_tmpl.replace('{{{pos_summary}}}', ''.join('<li>%s:   %s</li>' % (x[0],x[1]) for x in p.get_portfolio_summary() ))
			
 
				+        #print '\n'.join('%s:\t\t%s' % (k,v) for k,v in sorted(json.loads(DataMap.rs.get(port_key)).iteritems()))
			
 
				+        
			
 
				+        
			
 
				+        return html_tmpl
			
 
				+
			
 
				+
			
 
				  
			
 
				     @cherrypy.expose
			
 
				     def ws_position_summary(self):
			
@@ -591,4 +696,4 @@ if __name__ == '__main__':
 
				     cherrypy.tools.websocket = WebSocketTool()    
			
 
				     cherrypy.quickstart(QServer(r_conn, config), '/', cfg_path[0])
			
 
				     
			
 
				-   
			
 
				+   
			
--- a/finopt/opt_serve.pyc
+++ b/finopt/opt_serve.pyc
--- a/finopt/portfolio.py
+++ b/finopt/portfolio.py
@@ -374,6 +374,25 @@ class PortfolioManager():
 
				             s += "[%s]," % content
			
 
				         return s  
			
 
				 
			
 
				+
			
 
				+    def get_tbl_pos_list(self, calGreeks=False):
			
 
				+        s_cols = [0,1,2,3,4]
			
 
				+        i_cols = [5,6,7]
			
 
				+        s = []
			
 
				+        
			
 
				+        for l in sorted(self.port):
			
 
				+            content = []    
			
 
				+            toks= l.split(',')
			
 
				+ #           print toks
			
 
				+            for i in s_cols:
			
 
				+                content.append(toks[i])
			
 
				+            for i in i_cols:
			
 
				+                content.append(toks[i])
			
 
				+            
			
 
				+                
			
 
				+            s.append(content)
			
 
				+        return s 
			
 
				+
			
 
				     def get_portfolio_summary(self):
			
 
				 
			
 
				         
			
--- a/finopt/portfolio.pyc
+++ b/finopt/portfolio.pyc
--- a/finopt/test2.py
+++ b/finopt/test2.py
@@ -324,19 +324,19 @@ class DummyClient(WebSocketClient):
 
				     
			
 
				     
			
 
				 if __name__ == '__main__':
			
 
				-#     rs = redis.Redis('localhost', 6379,3)
			
 
				-#     
			
 
				-#     s = f2()
			
 
				-#     q = RedisQueue('test', host='localhost', port=6379, db=3)
			
 
				-#     #[q.put(item) for item in s.split(',')]
			
 
				-#     
			
 
				-#     while not q.empty():
			
 
				-#         q.get()
			
 
				-#     [q.put(item) for item in range(1,100)]
			
 
				-#     print q.qsize()
			
 
				-#     #print q.qsize()
			
 
				-#     print q.peek()
			
 
				-#     print q.peek(50)
			
 
				+    rs = redis.Redis('localhost', 6379,3)
			
 
				+     
			
 
				+    s = f2()
			
 
				+    q = redisQueue.RedisQueue('test', host='localhost', port=6379, db=3)
			
 
				+    #[q.put(item) for item in s.split(',')]
			
 
				+     
			
 
				+    while not q.empty():
			
 
				+        q.get()
			
 
				+    [q.put(item) for item in range(1,100)]
			
 
				+    print q.qsize()
			
 
				+    #print q.qsize()
			
 
				+    print q.peek()
			
 
				+    print q.peek(50)
			
 
				     #stk = ['1398.HK', '0992.HK', '0787.HK', 'DUG', 'USO']
			
 
				     #stk = ['0700.HK', '0787.HK'] #,'0941.HK',  '2822.HK', '2823.HK', '0939.HK', '2318.HK', '1299.HK', '3988.HK', '1398.HK']
			
 
				 #     rics = analyze_all('./data/hkex-stock-list.txt')
			
@@ -353,19 +353,19 @@ if __name__ == '__main__':
 
				 #             
			
 
				 #     print st
			
 
				 #    mark6()
			
 
				-
			
 
				+   
			
 
				 #    analyze()
			
 
				 
			
 
				 
			
 
				 #    stdan2('/home/larry/l1304/workspace/finopt/data/mds_files/std/', 'std-20151007')
			
 
				 
			
 
				 
			
 
				-    try:
			
 
				-        ws = DummyClient('ws://localhost:8082/ws', protocols=['http-only', 'chat'])
			
 
				-        ws.connect()
			
 
				-        ws.run_forever()
			
 
				-    except KeyboardInterrupt:
			
 
				-        ws.close()
			
 
				+#     try:
			
 
				+#         ws = DummyClient('ws://localhost:8082/ws', protocols=['http-only', 'chat'])
			
 
				+#         ws.connect()
			
 
				+#         ws.run_forever()
			
 
				+#     except KeyboardInterrupt:
			
 
				+#         ws.close()
			
 
				     
			
 
				     
			
 
				 
			
--- a/finopt/test_quick.py
+++ b/finopt/test_quick.py
--- a/html/opt-pos-chart-stacked-tmpl.html
+++ b/html/opt-pos-chart-stacked-tmpl.html
@@ -0,0 +1,181 @@
 
				+<html>
			
 
				+  <head>
			
 
				+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
			
 
				+    <script type="text/javascript" src="https://www.google.com/jsapi"></script>
			
 
				+    <script type="text/javascript">
			
 
				+      google.load("visualization", "1.1", {packages:["corechart", 'table','gauge']});
			
 
				+//      google.load('visualization', '1', {'packages':['table']});
			
 
				+//      google.load("visualization", "1.1", {packages:["corechart"]});
			
 
				+      google.setOnLoadCallback(drawChart);
			
 
				+      function drawChart() {
			
 
				+        var data = google.visualization.arrayToDataTable([
			
 
				+          ['strike', 'Near-P', 'Near-C', 'Far-P', 'Far-C'],
			
 
				+{{{dataPCpos}}}
			
 
				+        ]);
			
 
				+
			
 
				+	var dataTbl = google.visualization.arrayToDataTable([
			
 
				+{{{dataTablePos}}}
			
 
				+
			
 
				+        ]);
			
 
				+
			
 
				+
			
 
				+        var options = {
			
 
				+	  title: 'PUT:CALL Ratio [{{{PRvsCR}}}}] for the month of {{{option_months}}}',
			
 
				+          width: 900,
			
 
				+ 	  legend:{ position: 'top', maxLines: 2},
			
 
				+	  bar:{groupWidth: '20%'},
			
 
				+	  chartArea:{left:40,top:50,width:"80%",height:"70%"},
			
 
				+ 	  isStacked: true,
			
 
				+	  series:{
			
 
				+		0: {color:'#0066ff'},		  
			
 
				+		1: {color:'#e60000'},
			
 
				+		2: {color:'#80b3ff'},
			
 
				+		3: {color:'#ff8080'},
			
 
				+
			
 
				+		
			
 
				+	  },
			
 
				+        };
			
 
				+
			
 
				+        
			
 
				+        var chart = new google.visualization.ColumnChart(document.getElementById('barchart_material'));
			
 
				+        chart.draw(data, options);
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	// chart table display implied volatilities in a nicely formatted table	
			
 
				+       var chartTbl = new google.visualization.Table(document.getElementById('chartTbl_div'));
			
 
				+
			
 
				+
			
 
				+       //var portTbl = new google.visualization.Table(document.getElementById('portTblDiv'));
			
 
				+
			
 
				+	var formatter = new google.visualization.NumberFormat({pattern:'0.00'});
			
 
				+	for (var i = 2; i < 9; i++)
			
 
				+		formatter.format(dataTbl, i); 
			
 
				+        var options2 = {
			
 
				+          displayAnnotations: true,
			
 
				+	  showRowNumber: true, width: '100%', height: '100%', allowHtml: true,
			
 
				+        };
			
 
				+	chartTbl.draw(dataTbl, options2);
			
 
				+
			
 
				+	$('input[id="b_reload"]').click();
			
 
				+
			
 
				+
			
 
				+
			
 
				+////////////////////////////////////////////////////////////////////////////////
			
 
				+// guage functions
			
 
				+
			
 
				+		var dataG = google.visualization.arrayToDataTable([
			
 
				+		  ['Label', 'Value'],
			
 
				+		  ['Delta', 0],
			
 
				+		  ['Delta-C', 0],
			
 
				+		  ['Delta-P', 0]
			
 
				+		]);
			
 
				+
			
 
				+		var optionsG = {
			
 
				+
			
 
				+		  width: 400, height: 120,
			
 
				+		  redFrom: -50, redTo: -25,
			
 
				+		  yellowFrom:-24, yellowTo: -15,
			
 
				+		  greenFrom: -10, greenTo: 10,	
			
 
				+		  minorTicks: 5,
			
 
				+		  min: -50, max: 50
			
 
				+		};
			
 
				+
			
 
				+		var dataT = google.visualization.arrayToDataTable([
			
 
				+		  ['Label', 'Value'],
			
 
				+		  ['Theta', 0],
			
 
				+		  ['Theta-C', 0],
			
 
				+		  ['Theta-P', 0]
			
 
				+		]);
			
 
				+
			
 
				+		var optionsT = {
			
 
				+
			
 
				+		  width: 400, height: 120,
			
 
				+		  redFrom: -3000, redTo: -2000,
			
 
				+		  yellowFrom:-1999, yellowTo: 500,
			
 
				+		  greenFrom: 5000, greenTo: 15000,	
			
 
				+		  minorTicks: 5,
			
 
				+		  min: -3000, max: 15000
			
 
				+		};
			
 
				+
			
 
				+		var gchart = new google.visualization.Gauge(document.getElementById('chart_div'));
			
 
				+//		refreshPortSummary();
			
 
				+//		gchart.draw(dataG, optionsG);
			
 
				+
			
 
				+		var tchart = new google.visualization.Gauge(document.getElementById('chartTheta_div'));
			
 
				+		refreshPortSummary();
			
 
				+		
			
 
				+		function refreshPortSummary(){
			
 
				+		    $.ajax({
			
 
				+			type: 'Post',
			
 
				+			url: '/ws_port_summary',
			
 
				+			success: function (data) {
			
 
				+				//alert(data);
			
 
				+				var jdata = JSON.parse(data);
			
 
				+				$('#temp').text(data);				
			
 
				+				//alert(jdata.delta_all);
			
 
				+				
			
 
				+				dataG.setValue(0, 1, Math.round(jdata.delta_all*100)/100);
			
 
				+				dataG.setValue(1, 1, Math.round(jdata.delta_c*100)/100);
			
 
				+				dataG.setValue(2, 1, Math.round(jdata.delta_p*100)/100);
			
 
				+				gchart.draw(dataG, optionsG);
			
 
				+
			
 
				+				dataT.setValue(0, 1, Math.round(jdata.theta_all));
			
 
				+				dataT.setValue(1, 1, Math.round(jdata.theta_c));
			
 
				+				dataT.setValue(2, 1, Math.round(jdata.theta_p));
			
 
				+				tchart.draw(dataT, optionsT);
			
 
				+
			
 
				+				$('#pt_status').text("Unreal P/L: " + Math.round(jdata.unreal_pl*100)/100 + "   Cal Status: " + jdata.status);
			
 
				+
			
 
				+			}
			
 
				+		    });
			
 
				+
			
 
				+		};
			
 
				+
			
 
				+		setInterval(function() {
			
 
				+			refreshPortSummary();
			
 
				+		}, 9500);    
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+      } // end drawChart
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+		//})
			
 
				+
			
 
				+//	});
			
 
				+
			
 
				+    </script>
			
 
				+
			
 
				+
			
 
				+  </head>
			
 
				+  <body>
			
 
				+<div id='pt_status'></div>
			
 
				+<div id="chart_div" style="width: 400px; height: 120px;"></div>
			
 
				+<div id="chartTheta_div" style="width: 400px; height: 120px;"></div>
			
 
				+
			
 
				+    <div id="barchart_material" style="width: 900px; height: 500px;"></div>
			
 
				+    <div id='chartTbl_div' style='width: 900px; height: 500px;'></div>
			
 
				+    <div id="portTbl">
			
 
				+        <input type="button" id="b_reload" value="Reload" />
			
 
				+    </div>
			
 
				+    <div id='portTblDiv' style='width: 900px; height: 500px;'></div>
			
 
				+<div id='temp'></div>
			
 
				+
			
 
				+  </body>
			
 
				+</html>
			
 
				+
			
--- a/html/opt-pos-chart-stacked-tmpl.html~
+++ b/html/opt-pos-chart-stacked-tmpl.html~
@@ -0,0 +1,172 @@
 
				+<html>
			
 
				+  <head>
			
 
				+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
			
 
				+    <script type="text/javascript" src="https://www.google.com/jsapi"></script>
			
 
				+    <script type="text/javascript">
			
 
				+      google.load("visualization", "1.1", {packages:["bar", 'table','gauge']});
			
 
				+//      google.load('visualization', '1', {'packages':['table']});
			
 
				+//      google.load("visualization", "1.1", {packages:["corechart"]});
			
 
				+      google.setOnLoadCallback(drawChart);
			
 
				+      function drawChart() {
			
 
				+        var data = google.visualization.arrayToDataTable([
			
 
				+          ['Strike Price', 'P', 'C', ],
			
 
				+{{{dataPCpos}}}
			
 
				+        ]);
			
 
				+
			
 
				+	var dataTbl = google.visualization.arrayToDataTable([
			
 
				+{{{dataTablePos}}}
			
 
				+
			
 
				+        ]);
			
 
				+
			
 
				+
			
 
				+        var options = {
			
 
				+          width: 900,
			
 
				+          chart: {
			
 
				+            title: 'Option Postion Distribution. PUT:CALL Ratio [{{{PRvsCR}}}}]',
			
 
				+            subtitle: 'for the month of {{{option_months}}}',
			
 
				+          },
			
 
				+ 
			
 
				+        };
			
 
				+
			
 
				+        var chart = new google.charts.Bar(document.getElementById('barchart_material'));
			
 
				+        chart.draw(data, options);
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	// chart table display implied volatilities in a nicely formatted table	
			
 
				+       var chartTbl = new google.visualization.Table(document.getElementById('chartTbl_div'));
			
 
				+
			
 
				+
			
 
				+       //var portTbl = new google.visualization.Table(document.getElementById('portTblDiv'));
			
 
				+
			
 
				+	var formatter = new google.visualization.NumberFormat({pattern:'0.00'});
			
 
				+	for (var i = 2; i < 9; i++)
			
 
				+		formatter.format(dataTbl, i); 
			
 
				+        var options2 = {
			
 
				+          displayAnnotations: true,
			
 
				+	  showRowNumber: true, width: '100%', height: '100%', allowHtml: true,
			
 
				+        };
			
 
				+	chartTbl.draw(dataTbl, options2);
			
 
				+
			
 
				+	$('input[id="b_reload"]').click();
			
 
				+
			
 
				+
			
 
				+
			
 
				+////////////////////////////////////////////////////////////////////////////////
			
 
				+// guage functions
			
 
				+
			
 
				+		var dataG = google.visualization.arrayToDataTable([
			
 
				+		  ['Label', 'Value'],
			
 
				+		  ['Delta', 0],
			
 
				+		  ['Delta-C', 0],
			
 
				+		  ['Delta-P', 0]
			
 
				+		]);
			
 
				+
			
 
				+		var optionsG = {
			
 
				+
			
 
				+		  width: 400, height: 120,
			
 
				+		  redFrom: -50, redTo: -25,
			
 
				+		  yellowFrom:-24, yellowTo: -15,
			
 
				+		  greenFrom: -10, greenTo: 10,	
			
 
				+		  minorTicks: 5,
			
 
				+		  min: -50, max: 50
			
 
				+		};
			
 
				+
			
 
				+		var dataT = google.visualization.arrayToDataTable([
			
 
				+		  ['Label', 'Value'],
			
 
				+		  ['Theta', 0],
			
 
				+		  ['Theta-C', 0],
			
 
				+		  ['Theta-P', 0]
			
 
				+		]);
			
 
				+
			
 
				+		var optionsT = {
			
 
				+
			
 
				+		  width: 400, height: 120,
			
 
				+		  redFrom: -3000, redTo: -2000,
			
 
				+		  yellowFrom:-1999, yellowTo: 500,
			
 
				+		  greenFrom: 5000, greenTo: 15000,	
			
 
				+		  minorTicks: 5,
			
 
				+		  min: -3000, max: 15000
			
 
				+		};
			
 
				+
			
 
				+		var gchart = new google.visualization.Gauge(document.getElementById('chart_div'));
			
 
				+//		refreshPortSummary();
			
 
				+//		gchart.draw(dataG, optionsG);
			
 
				+
			
 
				+		var tchart = new google.visualization.Gauge(document.getElementById('chartTheta_div'));
			
 
				+		refreshPortSummary();
			
 
				+		
			
 
				+		function refreshPortSummary(){
			
 
				+		    $.ajax({
			
 
				+			type: 'Post',
			
 
				+			url: '/ws_port_summary',
			
 
				+			success: function (data) {
			
 
				+				//alert(data);
			
 
				+				var jdata = JSON.parse(data);
			
 
				+				$('#temp').text(data);				
			
 
				+				//alert(jdata.delta_all);
			
 
				+				
			
 
				+				dataG.setValue(0, 1, Math.round(jdata.delta_all*100)/100);
			
 
				+				dataG.setValue(1, 1, Math.round(jdata.delta_c*100)/100);
			
 
				+				dataG.setValue(2, 1, Math.round(jdata.delta_p*100)/100);
			
 
				+				gchart.draw(dataG, optionsG);
			
 
				+
			
 
				+				dataT.setValue(0, 1, Math.round(jdata.theta_all));
			
 
				+				dataT.setValue(1, 1, Math.round(jdata.theta_c));
			
 
				+				dataT.setValue(2, 1, Math.round(jdata.theta_p));
			
 
				+				tchart.draw(dataT, optionsT);
			
 
				+
			
 
				+				$('#pt_status').text("Unreal P/L: " + Math.round(jdata.unreal_pl*100)/100 + "   Cal Status: " + jdata.status);
			
 
				+
			
 
				+			}
			
 
				+		    });
			
 
				+
			
 
				+		};
			
 
				+
			
 
				+		setInterval(function() {
			
 
				+			refreshPortSummary();
			
 
				+		}, 9500);    
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+      } // end drawChart
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+		//})
			
 
				+
			
 
				+//	});
			
 
				+
			
 
				+    </script>
			
 
				+
			
 
				+
			
 
				+  </head>
			
 
				+  <body>
			
 
				+<div id='pt_status'></div>
			
 
				+<div id="chart_div" style="width: 400px; height: 120px;"></div>
			
 
				+<div id="chartTheta_div" style="width: 400px; height: 120px;"></div>
			
 
				+
			
 
				+    <div id="barchart_material" style="width: 900px; height: 500px;"></div>
			
 
				+    <div id='chartTbl_div' style='width: 900px; height: 500px;'></div>
			
 
				+    <div id="portTbl">
			
 
				+        <input type="button" id="b_reload" value="Reload" />
			
 
				+    </div>
			
 
				+    <div id='portTblDiv' style='width: 900px; height: 500px;'></div>
			
 
				+<div id='temp'></div>
			
 
				+
			
 
				+  </body>
			
 
				+</html>
			
 
				+
			
--- a/html/public/Market-Risk-Icon.png
+++ b/html/public/Market-Risk-Icon.png
--- a/html/public/chart.png
+++ b/html/public/chart.png
--- a/html/public/moneyup.png
+++ b/html/public/moneyup.png
--- a/html/public/scale.png
+++ b/html/public/scale.png
--- a/ml/__init__.py
+++ b/ml/__init__.py
--- a/ml/ml_nb1.py
+++ b/ml/ml_nb1.py
@@ -0,0 +1,315 @@
 
				+from sklearn.naive_bayes import BernoulliNB
			
 
				+import numpy as np
			
 
				+import finopt.ystockquote as yq
			
 
				+import datetime
			
 
				+from dateutil import rrule
			
 
				+import itertools
			
 
				+
			
 
				+def weather_play():
			
 
				+    
			
 
				+    # implementing the example in the blog link below
			
 
				+    # http://www.analyticsvidhya.com/blog/2015/09/naive-bayes-explained/
			
 
				+    # each vector in x represents a predictor of type 'weather' with
			
 
				+    # attributes = ['sunny', 'overcast', 'rainy']
			
 
				+    # the label / class in y are ['NO', 'YES'] or 0,1
			
 
				+    
			
 
				+    # using Bernoulli because the vectors are in binary 
			
 
				+    
			
 
				+    x= np.array([[1,0,0],[1,0,0],[1,0,0],[1,0,0],
			
 
				+                [0,1,0],[0,1,0],[0,1,0],[0,1,0],[0,1,0],
			
 
				+                [0,0,1],[0,0,1],[0,0,1],[0,0,1],[0,0,1]])
			
 
				+                
			
 
				+    y = np.array([1,1,1,1,0,0,0,1,1,0,0,1,1,1])
			
 
				+    
			
 
				+    model = BernoulliNB()
			
 
				+    model.fit(x,y)
			
 
				+    predicted = model.predict([[0,0,1],[1,0,0]])
			
 
				+    print predicted
			
 
				+    print model.predict_proba([[0,0,1],[1,0,0],[0,1,0]])
			
 
				+    print model.feature_count_
			
 
				+
			
 
				+
			
 
				+
			
 
				+def str2datetime(yyyymmdd):
			
 
				+    #print '%d%d%d'% (int(yyyymmdd[6:8]), int(yyyymmdd[4:6])-1 , int(yyyymmdd[0:4])) 
			
 
				+    return datetime.datetime(int(yyyymmdd[0:4]), int(yyyymmdd[4:6]), int(yyyymmdd[6:8]))
			
 
				+
			
 
				+
			
 
				+def ystr2datetime(yyyymmdd):
			
 
				+    #print '%d%d%d'% (int(yyyymmdd[6:8]), int(yyyymmdd[4:6])-1 , int(yyyymmdd[0:4])) 
			
 
				+    return datetime.datetime(int(yyyymmdd[0:4]), int(yyyymmdd[5:7]), int(yyyymmdd[8:10]))
			
 
				+
			
 
				+def datetime2ystr(dt):
			
 
				+    return '{:%Y-%m-%d}'.format(dt)
			
 
				+
			
 
				+def ewh_hsi(rs):
			
 
				+
			
 
				+    def daily_change(code, frdate, todate, base, numerator):
			
 
				+        e0 = yq.get_historical_prices(code, frdate, todate)
			
 
				+        print e0
			
 
				+        e1 = e0[1:]
			
 
				+        e2 = e0[2:]
			
 
				+        
			
 
				+        e3 = map(lambda i: (e2[i][0], 
			
 
				+                            1 if (float(e2[i][numerator]) - float(e1[i][base])) / float(e1[i][base]) > 0 else 0,
			
 
				+                            e2[i][numerator], e1[i][base]
			
 
				+                            ), 
			
 
				+                            [i for i in range(len(e2))])
			
 
				+        return e3
			
 
				+    
			
 
				+    idx = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Clos']
			
 
				+    EWH = daily_change('^DJI', '20150901', '20160330', idx.index('Adj Clos'), idx.index('Adj Clos'))
			
 
				+    #EWH = EWH[:20]
			
 
				+    # 1 if opens high and 0 otherwise
			
 
				+    HSI = daily_change('^HSI', '20150901', '20160330', idx.index('Open'), idx.index('Adj Clos'))
			
 
				+    #HSI = HSI[:20]
			
 
				+    print len(EWH), ''.join('%s,' % x[0] for x in EWH)
			
 
				+    print len(HSI), ''.join('%s,' % x[0] for x in HSI)
			
 
				+    HSI_dates = map(lambda x: x[0], HSI)
			
 
				+    # filter EWH entries for which a record has a corresponding next trade record in HSI
			
 
				+    # example, EWH trade date 2016-02-29 the corresponding record for HSI is 2016-03-01
			
 
				+    EWH_filtered = filter(lambda x: datetime2ystr(rs.after(ystr2datetime(x[0]))) in HSI_dates,EWH)
			
 
				+    print len(EWH_filtered),  EWH_filtered
			
 
				+    hsi_ewh = map(lambda x:(HSI[HSI_dates.index(
			
 
				+                                            datetime2ystr(rs.after(ystr2datetime(x[0]))))
			
 
				+                                            ][1], x[1]), EWH_filtered)
			
 
				+    
			
 
				+    xx = np.array(map(lambda x: [x[1], 0], hsi_ewh))
			
 
				+    yy = np.array(map(lambda x: x[0], hsi_ewh))
			
 
				+    
			
 
				+    model = BernoulliNB()
			
 
				+    model.fit(xx,yy)
			
 
				+    predicted = model.predict([[0,0], [1,0]])
			
 
				+    print predicted
			
 
				+    print model.predict_proba([[0,0], [1,0]])
			
 
				+    print model.feature_count_    
			
 
				+    
			
 
				+    
			
 
				+def cartesian_product(a, b):
			
 
				+    return [[a0,b0] for a0 in a for b0 in b]
			
 
				+    
			
 
				+def permutations(size):
			
 
				+    #http://thomas-cokelaer.info/blog/2012/11/how-do-use-itertools-in-python-to-build-permutation-or-combination/
			
 
				+    return list(itertools.product([0,1], repeat=size))
			
 
				+
			
 
				+def predict(rs):
			
 
				+    
			
 
				+    def daily_change(code, frdate, todate, base, numerator):
			
 
				+        # compute the next day price change % and return a new binary series where 
			
 
				+        # 1 - means UP
			
 
				+        # 0 - means DOWN
			
 
				+        # normailly this is calculated as (price of today - price of yesterday) / price of yesterday
			
 
				+        # price type can be specified using the 'base' and 'numerator' parameters
			
 
				+        
			
 
				+        e0 = yq.get_historical_prices(code, frdate, todate)
			
 
				+        print e0
			
 
				+        e1 = e0[1:]
			
 
				+        e2 = e0[2:]
			
 
				+        
			
 
				+        e3 = map(lambda i: (e2[i][0], 
			
 
				+                            1 if (float(e2[i][numerator]) - float(e1[i][base])) / float(e1[i][base]) > 0 else 0,
			
 
				+                            e2[i][numerator], e1[i][base],                            
			
 
				+                            (float(e2[i][numerator]) - float(e1[i][base])) / float(e1[i][base])
			
 
				+                            ),
			
 
				+                            [i for i in range(len(e2))])
			
 
				+        return e3
			
 
				+   
			
 
				+    def save_lf_series(name, series):
			
 
				+        now = datetime.datetime.now().strftime('%Y%m%d%H%M')
			
 
				+        f = open('%s/%s-%s' % ('../dat', name, now), 'w')
			
 
				+        f.write(''.join('%s %s,' % (x[0], x[1]) for x in series))
			
 
				+        f.close()
			
 
				+    
			
 
				+    def lbl_predictor_parse(c_stock, f_stock, frdate, todate):
			
 
				+    
			
 
				+        idx = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Clos']
			
 
				+        feature = daily_change(f_stock, frdate, todate, idx.index('Adj Clos'), idx.index('Adj Clos'))
			
 
				+
			
 
				+    
			
 
				+        label = daily_change(c_stock, frdate, todate, idx.index('Open'), idx.index('Adj Clos'))
			
 
				+        #HSI = HSI[:20]
			
 
				+        print 'F: [%s] Num elements: %d ' % (f_stock, len(feature)), ''.join('(%s,%d,%0.4f), ' % (x[0],x[1],x[4]) for x in feature)
			
 
				+        print 'L: [%s] Num elements: %d ' % (c_stock, len(label)), ''.join('(%s,%d,%0.4f), ' % (x[0],x[1],x[4])  for x in label)
			
 
				+        
			
 
				+        # extract all the label dates
			
 
				+        label_trade_dates = map(lambda x: x[0], label)
			
 
				+        # filter feature series -  
			
 
				+        # example, for a record with trade date (T) 2016-02-29, expect to find a label record with date = T+1
			
 
				+        # if a match in the lable series couldn't be found, drop the feature record
			
 
				+        #
			
 
				+        # logic:
			
 
				+        # for each record in feature
			
 
				+        #     determine the next business date of "label" given the feature record's date
			
 
				+        #     if found, retrain, else, drop
			
 
				+        feature_filtered = filter(lambda x: datetime2ystr(rs.after(ystr2datetime(x[0]))) in label_trade_dates,feature)
			
 
				+        print 'Filtered F:[%s] Num elements: %d ' % (f_stock, len(feature_filtered)),  feature_filtered
			
 
				+        #
			
 
				+        # generate a labeledPoint (label, feature)
			
 
				+        label_feature = map(lambda x:(label[label_trade_dates.index(
			
 
				+                                                datetime2ystr(rs.after(ystr2datetime(x[0]))))
			
 
				+                                                ][1], x[1]), feature_filtered)
			
 
				+        print 'Matched Series [%s:%s] %s' % (c_stock, f_stock, ''.join('(%s,%s),' % (x[0], x[1]) for x in label_feature))
			
 
				+        
			
 
				+        save_lf_series('%s_%s' % (c_stock,f_stock), label_feature)
			
 
				+        
			
 
				+        return label_feature
			
 
				+    
			
 
				+
			
 
				+        
			
 
				+    
			
 
				+    #features_config = {'cstock': '^HSI', 'fstocks': ['^DJI', '^FCHI', '^FVX', '^FTSE','VNQ','QQQ','GOOG','BAC'], 'date_range': ['20150901', '20160330']}
			
 
				+    features_config = {'cstock': '^HSI', 'fstocks': ['^DJI', 'EUR=X', 'JPY=X'], 'date_range': ['20150901', '20160330']}
			
 
				+    lf = []
			
 
				+    for fs in features_config['fstocks']:
			
 
				+        lf.append(lbl_predictor_parse(features_config['cstock'], fs, features_config['date_range'][0], features_config['date_range'][1]))
			
 
				+                  
			
 
				+#     lf1 = lbl_predictor_parse('^HSI', '^DJI', '20150901', '20160325')
			
 
				+#     lf2 = lbl_predictor_parse('^HSI', '^FTSE', '20150901', '20160325')
			
 
				+#     lf3 = lbl_predictor_parse('^HSI', '^HSCE', '20150901', '20160325')
			
 
				+#     xx1 = np.array(map(lambda x: [x[1], 0,    0], lf1))
			
 
				+#     xx2 = np.array(map(lambda x: [0   , x[1] ,0], lf2))
			
 
				+#     xx3 = np.array(map(lambda x: [0   , 0, x[1]], lf3))
			
 
				+#     xx = np.concatenate((xx1, xx2, xx3))
			
 
				+#     #print xx
			
 
				+# #     yy = np.array(map(lambda x: x[0], lf1+lf2+lf3))
			
 
				+#     model = BernoulliNB()
			
 
				+#     model.fit(xx,yy)
			
 
				+#     scenarios = [[0,0,0], [1,1,1],[0,0,1],[0,1,1],[1,0,0],[1,1,0]]
			
 
				+#     predicted = model.predict(scenarios)
			
 
				+#     print predicted
			
 
				+#     print model.predict_proba(scenarios)
			
 
				+#     print model.feature_count_     
			
 
				+
			
 
				+    # build vector
			
 
				+    #[DJI, FTSE, HSCE]
			
 
				+    points_sp = []
			
 
				+    points_sk = []
			
 
				+    for i in range(len(lf)):
			
 
				+        
			
 
				+        def spark_friendly(v):
			
 
				+            # init a bunch of zeros [0,0,...]
			
 
				+            point = [0] * len(lf)
			
 
				+            # set the value at column i of the vector
			
 
				+            point[i] = v[1] 
			
 
				+            #print 'spark label:%s feature#:%d' %  (v[0], i),  point
			
 
				+            # retrun  a tuple of label, feature
			
 
				+            return (v[0], point)
			
 
				+        
			
 
				+        def sklearn_friendly(v):
			
 
				+            point = [0] * len(lf)
			
 
				+            point[i] = v[1] 
			
 
				+            #print 'sklearn label:%s feature#:%d' %  (v[0], i),  point
			
 
				+            return point
			
 
				+        #print 'len: ' , len(lf[i])
			
 
				+        points_sp.append(map(spark_friendly , lf[i]))
			
 
				+        points_sk.append(np.array(map(sklearn_friendly, lf[i])))
			
 
				+        
			
 
				+    #
			
 
				+    # format  [[(1, [1, 0, 0]), (1, [1, 0, 0])], [(0, [0, 0, 0]),...]] 
			
 
				+    def save_labelled_points(name, pt):
			
 
				+        now = datetime.datetime.now().strftime('%Y%m%d%H%M')
			
 
				+        now = ''
			
 
				+        f = open('%s/%s-%s' % ('../dat', name, now), 'w')
			
 
				+        
			
 
				+        for i in range(len(points_sp)):
			
 
				+            for j in range(len(points_sp[i])):
			
 
				+                print '%s,%s' % (points_sp[i][j][0], ' '.join('%d' % s for s in points_sp[i][j][1]))
			
 
				+                f.write('%s,%s\n' % (points_sp[i][j][0], ' '.join('%d' % s for s in points_sp[i][j][1])))
			
 
				+                
			
 
				+        f.close()                
			
 
				+        
			
 
				+    print "For pyspark LabeledPoint format: ", points_sp
			
 
				+    save_labelled_points('%s-%s' % (features_config['cstock'], '_'.join(s for s in features_config['fstocks'])), points_sp)
			
 
				+    
			
 
				+    points_sk = np.concatenate((points_sk))
			
 
				+    print "For sklearn numpy format:\n ", points_sk
			
 
				+    #print np.concatenate((points))        
			
 
				+  
			
 
				+    #print len(lf[0]+lf[1]+lf[2]), len(reduce(lambda x,y:x+y, lf))  , len(points_sp)
			
 
				+    yy = np.array(map(lambda x: x[0], reduce(lambda x,y:x+y, lf)))
			
 
				+    model = BernoulliNB()
			
 
				+    model.fit(points_sk,yy)
			
 
				+    #scenarios = [[0,0,0], [1,1,1],[0,0,1],[0,1,1],[1,0,0],[1,1,0]]
			
 
				+    num_features= len(points_sk[0])
			
 
				+    scenarios = permutations(num_features)
			
 
				+    
			
 
				+    predicted = model.predict(scenarios)
			
 
				+    print predicted, scenarios
			
 
				+    predicted_proba = model.predict_proba(scenarios)
			
 
				+    print predicted_proba
			
 
				+    print model.feature_count_   
			
 
				+
			
 
				+    print '************** SUMMARY REPORT **************'
			
 
				+    print 'Likelihood (%s) GIVEN (%s)' % (features_config['cstock'], ', '.join(s for s in features_config['fstocks']))
			
 
				+    print 'Expected\t\tResult\t\tScneario'
			
 
				+    
			
 
				+    for i in range(len(predicted)):
			
 
				+        print '%s:\t\t %s\t\t%s' % ('UP' if predicted[i] == 1 else 'DOWN', scenarios[i], predicted_proba[i]) 
			
 
				+        
			
 
				+def test():
			
 
				+    #[DJI, FTSE, HSCE]
			
 
				+    points = []
			
 
				+    for i in range(3):
			
 
				+        
			
 
				+        def f1(v):
			
 
				+            
			
 
				+            point = [0] * len(range(3))
			
 
				+            point[i] = v
			
 
				+            print i, point
			
 
				+            return point
			
 
				+        
			
 
				+        points.append(np.array(map(f1 , [7,8,9])))
			
 
				+            
			
 
				+
			
 
				+    
			
 
				+    print points
			
 
				+    print np.concatenate((points))
			
 
				+  
			
 
				+    
			
 
				+def set_biz_calendar():    
			
 
				+    #hk holidays
			
 
				+    holidays = [str2datetime('20150903'),
			
 
				+                str2datetime('20150928'),
			
 
				+                str2datetime('20151225'),
			
 
				+                str2datetime('20151226'),
			
 
				+                str2datetime('20150701'),
			
 
				+                str2datetime('20160101'),
			
 
				+                str2datetime('20160208'),
			
 
				+                str2datetime('20160209'),
			
 
				+                str2datetime('20160210'),
			
 
				+                str2datetime('20160325'),
			
 
				+                str2datetime('20160326'),
			
 
				+                str2datetime('20160328'),
			
 
				+                str2datetime('20160404'),
			
 
				+                str2datetime('20160502')]
			
 
				+    
			
 
				+    r = rrule.rrule(rrule.DAILY, 
			
 
				+                    byweekday=[rrule.MO, rrule.TU, rrule.WE, rrule.TH, rrule.FR],
			
 
				+                    dtstart = str2datetime('20151201'))
			
 
				+    rs = rrule.rruleset()
			
 
				+    rs.rrule(r)
			
 
				+    for exdate in holidays:
			
 
				+        rs.exdate(exdate)
			
 
				+    
			
 
				+    
			
 
				+    return rs
			
 
				+    
			
 
				+    #print np.array(s1)
			
 
				+if __name__ == '__main__':
			
 
				+    #weather_play()
			
 
				+    #test()
			
 
				+    
			
 
				+    #
			
 
				+    # 
			
 
				+    # Model: 
			
 
				+    # 
			
 
				+    # What is the likelihood of HSI opens high given
			
 
				+    # the dow jones or some other indices closed high on 
			
 
				+    # the previous trading day?
			
 
				+    #
			
 
				+    rs = set_biz_calendar()
			
 
				+    print ''.join('%s,\n' % rs[i] for i in range(5)), rs.after(str2datetime('20160324')),\
			
 
				+                                                           datetime2ystr(rs.after(str2datetime('20160324')))
			
 
				+     
			
 
				+    #ewh_hsi(rs)
			
 
				+    predict(rs)
			
--- a/ml/ml_nb2.py
+++ b/ml/ml_nb2.py
@@ -0,0 +1,46 @@
 
				+from pyspark import SparkContext
			
 
				+from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
			
 
				+from pyspark.mllib.linalg import Vectors
			
 
				+from pyspark.mllib.regression import LabeledPoint
			
 
				+
			
 
				+
			
 
				+def parseLine(line):
			
 
				+    parts = line.split(',')
			
 
				+    label = float(parts[0])
			
 
				+    features = Vectors.dense([float(x) for x in parts[1].split(' ')])
			
 
				+    return LabeledPoint(label, features)
			
 
				+
			
 
				+
			
 
				+def train():
			
 
				+    sc = SparkContext(appName= 'nb_test')    
			
 
				+    data = sc.textFile('../dat/^HSI-^DJI_^FCHI_^FVX_^FTSE_VNQ_QQQ_GOOG_BAC-').map(parseLine)
			
 
				+    
			
 
				+    # Split data aproximately into training (60%) and test (40%)
			
 
				+    training, test = data.randomSplit([0.7, 0.3], seed=0)
			
 
				+    print training.collect()
			
 
				+    # Train a naive Bayes model.
			
 
				+    model = NaiveBayes.train(training, 1.0) #, "bernoulli")
			
 
				+    predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
			
 
				+    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
			
 
				+    print '**** ACCURACY', accuracy
			
 
				+    # Save and load model
			
 
				+    #model.save(sc, "../../target/myNaiveBayesModel")
			
 
				+
			
 
				+
			
 
				+def predict():
			
 
				+# Make prediction and test accuracy.
			
 
				+    sc = SparkContext(appName= 'nb_test')    
			
 
				+    sameModel = NaiveBayesModel.load(sc, "../../target/myNaiveBayesModel")
			
 
				+    data = sc.textFile('../../data/mllib/sample_naive_bayes_data.txt').map(parseLine)
			
 
				+    
			
 
				+    # Split data aproximately into training (60%) and test (40%)
			
 
				+    training, test = data.randomSplit([0.1, 0.9], seed=0)
			
 
				+    print test.collect()
			
 
				+    predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
			
 
				+    print predictionAndLabel.collect()
			
 
				+    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
			
 
				+    print accuracy
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    train()
			
--- a/sh/spk.sh
+++ b/sh/spk.sh
@@ -0,0 +1,6 @@
 
				+#!/bin/bash
			
 
				+ROOT=$FINOPT_HOME
			
 
				+export PYTHONPATH=$FINOPT_HOME:$PYTHONPATH
			
 
				+KAFKA_ASSEMBLY_JAR=$FINOPT_HOME/jar/spark-streaming-kafka-assembly_2.10-1.4.1.jar
			
 
				+spark-submit  --jars  $KAFKA_ASSEMBLY_JAR /home/larry-13.04/workspace/finopt/src/alerts/nb_test.py
			
 
				+