daily_download.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # -*- coding: utf-8 -*-
  2. import traceback
  3. import logging
  4. from misc2.helpers import ConfigMap
  5. from optparse import OptionParser
  6. import time, datetime
  7. import os, wget
  8. import ntpath
  9. class HkexStatDownloader():
  10. def __init__(self, kwargs):
  11. #print '\n'.join("x:{##, '%s'}" % (k) for k,v in sorted(kwargs.iteritems()))
  12. self.kwargs = kwargs
  13. self.data_list = eval(kwargs['data_list'])
  14. if 'auto' in kwargs.keys():
  15. self.download_auto()
  16. else:
  17. self.download_stat(kwargs['download_types'], kwargs['day'], kwargs['output_path'], 'compress')
  18. # day parameter must be in YYMMDD format
  19. def is_valid_day(self, day):
  20. the_day = datetime.datetime.strptime(day,'%y%m%d')
  21. print (the_day)
  22. holidays = eval(self.kwargs['hk_holiday'])
  23. # format as YYYY
  24. year = the_day.strftime('%Y')
  25. weekday = the_day.weekday()
  26. if the_day.strftime('%m%d') not in holidays[year] and weekday not in [5,6]:
  27. return True
  28. else:
  29. return False
  30. def download_auto(self):
  31. download_type = 'abcdefghijklm'
  32. today = datetime.datetime.now().strftime('%y%m%d')
  33. if self.is_valid_day(today):
  34. self.download_stat(download_type, today , kwargs['output_path'])
  35. else:
  36. logging.info('%s not a business day, no download' % today)
  37. # day_str in yymmdd
  38. def download_stat(self, download_type, day_str, output_path, compress=False):
  39. if not self.is_valid_day(day_str):
  40. logging.info('%s not a business day, no download' % day_str)
  41. return
  42. dir = '%s%s' % (output_path, day_str)
  43. if not os.path.exists(dir):
  44. os.makedirs(dir)
  45. for ch in download_type:
  46. link = self.kwargs[self.data_list[ch][1]]
  47. # special check for 'k' type dayily short sell
  48. # if the date supplied is not today's date, the download is not available
  49. if day_str != datetime.datetime.now().strftime('%y%m%d') and ch == 'k':
  50. continue
  51. # there is no need to concat day str for daily download html page
  52. # for other downloads, insert the day str into the url
  53. link = link % day_str if ch != 'k' else link
  54. try:
  55. path = '%s/%s' % (dir, ntpath.basename(link))
  56. logging.info('HkexStatDownloader:[%c] url:%s path %s to download' % (ch, link, path))
  57. wget.download(link, path)
  58. except:
  59. print ('exception: check log for additional error messages.')
  60. logging.error(traceback.format_exc())
  61. '''
  62. Download daily stat files from HKEX website
  63. The script requires a config file to run
  64. check finopt/config/daily_download.cfg for
  65. details
  66. '''
  67. if __name__ == '__main__':
  68. # kwargs = {
  69. # 'logconfig': {'level': logging.INFO, 'filemode': 'w', 'filename': '/tmp/daily_download.log'},
  70. #
  71. # }
  72. usage = """usage: %prog [options]
  73. a:{'HSI options after market HTML', 'dha_url'}
  74. b:{'HSI options after market ZIP', 'dza_url'},
  75. c:{'HSI options normal hours ZIP', 'dzn_url'},
  76. d:{'HSI futures normal hours HTML', 'fhn_url'},
  77. e:{'HSI futures normal hours ZIP', 'fzn_url'},
  78. f:{'HHI CN futures normal hours HTML', 'hhn_url'},
  79. g:{'HHI CN futures normal hours ZIP', 'hzn_url'},
  80. h:{'HHI CN options normal hours HTML', 'ohn_url'},
  81. i:{'HHI CN options normal hours ZIP', 'ozn_url'},
  82. j:{'Cash market daily quotes HTML ', 'shd_url'},
  83. k:{'Cash market short sell HTML', 'shs_url'},
  84. l:{'HSI volatility HTML', 'vh_url'},
  85. m:{'HSI volatility ZIP', 'vz_url'}
  86. """
  87. parser = OptionParser(usage=usage)
  88. parser.add_option("-d", "--download_types",
  89. action="store", dest='download_types')
  90. parser.add_option("-a", "--auto",
  91. action="store_true", dest='auto')
  92. parser.add_option("-s", "--day",
  93. action="store", dest='day')
  94. parser.add_option("-f", "--config_file",
  95. action="store", dest="config_file",
  96. help="path to the config file")
  97. (options, args) = parser.parse_args()
  98. try:
  99. print (options)
  100. kwargs = ConfigMap().kwargs_from_file(options.config_file)
  101. for option, value in options.__dict__.iteritems():
  102. if value != None:
  103. kwargs[option] = value
  104. logconfig = kwargs['logconfig']
  105. logconfig['format'] = '%(asctime)s %(levelname)-8s %(message)s'
  106. logging.basicConfig(**logconfig)
  107. logging.info('config settings: %s' % kwargs)
  108. hkex = HkexStatDownloader(kwargs)
  109. except:
  110. print ('exception: check log for additional error messages.')
  111. logging.error(traceback.format_exc())