# # https://programmer.ink/think/5d872dc23f36a.html # from lxml import etree import requests import csv import os def get_shareholdings(stkcode, tdate, lang='c'): headers={"User-Agent":"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36"} url= "https://www.hkexnews.hk/sdw/search/searchsdw%s.aspx" % ('_c' if lang == 'c' else '') print(url) shs_hold_date = '%s/%s/%s' % (tdate[0:4], tdate[4:6], tdate[6:8]) datatime = {'__EVENTTARGET': 'btnSearch', '__EVENTARGUMENT':'', '__VIEWSTATE': '/wEPDwULLTIwNTMyMzMwMThkZHNjXATvSlyVIlPSDhuziMEZMG94', '__VIEWSTATEGENERATOR': '3B50BBBD', 'today': tdate, 'sortBy': 'shareholding', 'sortDirection': 'desc', 'alertMsg':'', 'txtShareholdingDate': shs_hold_date, 'txtStockCode': stkcode, 'txtStockName': '', 'txtParticipantID':'', 'txtParticipantName':'' } repensoe=requests.post(url,headers=headers,data=datatime) text=repensoe.text html1 = etree.HTML(text) div = html1.xpath("//div[@id='pnlResultNormal']//tbody%s" % ('/tr' if lang != 'en' else '')) data = [] print("--" * 2) valuetime = html1.xpath("//input[@id='txtShareholdingDate']/@value") print(valuetime[0]+"Data available") filename = 'shshold-%s-%s%s.csv' % (stkcode, tdate, ('_c' if lang != 'en' else '')) for tb in div: try: participant_id = tb.xpath("./td[1]/div/text()")[1] except IndexError: participant_id = '' participant_name = tb.xpath("./td[2]/div/text()")[1] print (participant_name) participant_address = tb.xpath("./td[3]/div/text()")[1] right = tb.xpath("./td[4]/div/text()")[1] percent = tb.xpath("./td[5]/div/text()")[1] datadic = {"date": valuetime[0], "Participant number": participant_id, "Central System Participant Name": participant_name, "address": participant_address, "Shareholding": right,"Proportion": percent} data.append(datadic) csvhead = ["date", "Participant number", "Central System Participant Name", "address", "Shareholding","Proportion"] with open(filename, 'w', newline='') as fp: write = csv.DictWriter(fp, csvhead) write.writeheader() write.writerows(data) if __name__ == '__main__': os.getcwd() os.chdir("./holding") get_shareholdings('01810', '20210204')