| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- from lxml import etree
- import requests
- import csv
- import os
- headers={"User-Agent":"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36"}
- url="https://www.hkexnews.hk/sdw/search/searchsdw.aspx"
- datatime = {'__EVENTTARGET': 'btnSearch',
- '__EVENTARGUMENT':'',
- '__VIEWSTATE': '/wEPDwULLTIwNTMyMzMwMThkZHNjXATvSlyVIlPSDhuziMEZMG94',
- '__VIEWSTATEGENERATOR': '3B50BBBD',
- 'today': '20210205',
- 'sortBy': 'shareholding',
- 'sortDirection': 'desc',
- 'alertMsg':'',
- 'txtShareholdingDate': '2021/02/05',
- 'txtStockCode': '03800',
- 'txtStockName': '',
- 'txtParticipantID':'',
- 'txtParticipantName':''
- }
- repensoe=requests.post(url,headers=headers,data=datatime)
- text=repensoe.text
- html1 = etree.HTML(text)
- #print (text)
- div = html1.xpath("//div[@id='pnlResultNormal']//tbody/tr")
- data = []
- print("--" * 2)
- valuetime = html1.xpath("//input[@id='txtShareholdingDate']/@value")
- print(valuetime[0]+"Data available")
- filenme = '20190822' + ".csv"
- for tb in div:
- try:
- participant_id = tb.xpath("./td[1]/div/text()")[1]
- except IndexError:
- continue
- participant_name = tb.xpath("./td[2]/div/text()")[1]
- participant_address = tb.xpath("./td[3]/div/text()")[1]
-
- right = tb.xpath("./td[4]/div/text()")[1]
- percent = tb.xpath("./td[5]/div/text()")[1]
- datadic = {"date": valuetime[0], "Participant number": participant_id, "Central System Participant Name": participant_name, "address": participant_address, "Shareholding": right,"Proportion": percent}
- print (datadic)
- data.append(datadic)
-
- csvhead = ["date", "Participant number", "Central System Participant Name", "address", "Shareholding","Proportion"]
- os.getcwd()
- os.chdir("./holding")
- with open(filenme, 'w', newline='') as fp:
- write = csv.DictWriter(fp, csvhead)
- write.writeheader()
- write.writerows(data)
|