4 年之前 · 546a15ac3e
--- a/calibre/recipe/oriental_daily_pure.recipe
+++ b/calibre/recipe/oriental_daily_pure.recipe
@@ -49,47 +49,23 @@ class OrientalDailyPure(BasicNewsRecipe):
 
				             return str(tag.contents[0]).replace('<em>', '').replace('</em>', '')
			
 
				  
			
 
				 
			
 
				-
			
 
				-        def old_scrap_feed(feed):
			
 
				-            f_url = '%s%s' % (urlRoot, feed[0])
			
 
				-            print (f_url)
			
 
				-            #f_html = urlopen(f_url).read()
			
 
				-            sf = self.index_to_soup(f_url)
			
 
				-            # extract all h2 headlines
			
 
				-            l_h2 = map(lambda x: extract_text(x), sf.findAll('h2'))
			
 
				-            l_h2 = list(l_h2)[:len(list(l_h2))-2]
			
 
				-            
			
 
				-            # for each headline, look for the feed title and feed url
			
 
				-             
			
 
				-#	    print '--------------------'
			
 
				-            print ('LEVEL H2: %s' % l_h2)
			
 
				-#            for hl in l_h2:
			
 
				-#                print 'h1: ' + hl
			
 
				-#                print sf.findAll('ul', {'title': hl })[0].findAll('li')
			
 
				-            l_feed = map(lambda x: sf.findAll('ul', {'title': x })[0].findAll('li'), l_h2)
			
 
				-            print ('-----------l_feed')
			
 
				-            l_feed = [item  for sublist in l_feed for item in sublist]
			
 
				-            print (l_feed[0])
			
 
				-            print ('------END -----l_feed')
			
 
				-#            l_feed = map(lambda x: {'url': '%s%s' % (urlRoot, x[0].a['href']), 'title': extract_text(x[0].a),
			
 
				-#                                    'date': strftime('%a, %d %b'), 
			
 
				-#                                    'description': '%s' % (extract_text(x[0].a)),
			
 
				-#                                    'content': ''}, l_feed)
			
 
				-            l_feed = map(lambda x: {'url': '%s%s' % (urlRoot, x.a['href']), 'title': extract_text(x.a),
			
 
				-                                    'date': strftime('%a, %d %b'), 
			
 
				-                                    'description': '%s' % (extract_text(x.a)),
			
 
				-                                    'content': ''}, l_feed)
			
 
				-            print ('****************************')
			
 
				-            
			
 
				-            l_feed = list(l_feed)
			
 
				-            print (list(l_feed))
			
 
				-            return l_feed
			
 
				         
			
 
				         def scrap_feed(feed):
			
 
				             f_url = '%s%s' % (urlRoot, feed[0])
			
 
				-            print (f_url)
			
 
				+            print ('feed url %s ' % f_url)
			
 
				             soup = self.index_to_soup(f_url)
			
 
				-            articles = soup.findAll('div', 'sectionList')[0].findAll('li')
			
 
				+            # verify a section is available for download on the day this script is run.
			
 
				+            # skip a section if unavailable   
			
 
				+            # for instance, finance section is unavailable on Sunday, so is "lifestyle"
			
 
				+            try:
			
 
				+               articles = soup.findAll('div', 'sectionList')[0].findAll('li')
			
 
				+            except:
			
 
				+               print ('--- this section [%s] is not available today ---' % feed[1]) 
			
 
				+               raise Exception ('--- this section [%s] is not available today ---' % feed[1]) 
			
 
				+
			
 
				+
			
 
				+ 		
			
 
				+
			
 
				             articles = map(lambda x:{'url': '%s%s' % (urlRoot, x.a['href']), 
			
 
				                             'title': x.findAll('div', attrs={'class' : 'text'})[0].text, 
			
 
				                             'date': strftime('%a, %d %b'),
			
@@ -102,15 +78,13 @@ class OrientalDailyPure(BasicNewsRecipe):
 
				                
			
 
				 
			
 
				         urlRoot = 'https://orientaldaily.on.cc'
			
 
				-        #url = '%s/cnt/news/%s/index.html' % (urlRoot, time.strftime('%Y%m%d'))
			
 
				         url = urlRoot 
			
 
				-        #url = '%s/cnt/news/%s/index.html' % (urlRoot, '20201127')
			
 
				         soup = self.index_to_soup(url)
			
 
				-        #lookups = ['要聞港聞','兩岸國際','財經','娛樂','副刊','男極圈','體育','馬經','波經','社論專欄','慈善基金','昔日東方']
			
 
				-        lookups = ['news', 'china_world', 'finance', 'lifestyle', 'sport']
			
 
				+        #lookups = ['news', 'china_world', 'finance', 'lifestyle', 'sport']
			
 
				+        lookups = ['news', 'china_world', 'finance', 'entertainment', 'lifestyle', 'adult', 'sport']
			
 
				         # no finanical news on Sunday
			
 
				-        if time.strftime('%w') == '0':
			
 
				-           lookups.remove('finance') 
			
 
				+        #if time.strftime('%w') == '0':
			
 
				+        #   lookups.remove('finance') 
			
 
				 
			
 
				         feeds = soup.findAll('ul', 'menuList clear')[0].findAll('li', attrs={'section':lookups})
			
 
				         feeds = map(lambda x: (x.a['href'], x.text), feeds)
			
@@ -119,8 +93,13 @@ class OrientalDailyPure(BasicNewsRecipe):
 
				         print ('----------------------- The feeds are: %s' % feeds)
			
 
				         ans = []
			
 
				         for e in feeds:
			
 
				-            print ('e[1] is: %s | %s\n' % (e[1], e[0]))
			
 
				-            ans.append((e[1], scrap_feed(e)))
			
 
				+            try:
			
 
				+               print ('e[1] is: %s | %s\n' % (e[1], e[0]))
			
 
				+               ans.append((e[1], scrap_feed(e)))
			
 
				+            except Exception as e:
			
 
				+               print('while processing feed: %s' % e) 
			
 
				+               continue
			
 
				+
			
 
				         print ('############')
			
 
				         print (ans)
			
 
				         return ans
			
@@ -141,7 +120,7 @@ class OrientalDailyPure(BasicNewsRecipe):
 
				             #print (pic)
			
 
				             if pic != None:
			
 
				                html += '<a href="%s"><img src="%s"></img></a>' % (str(pic.a['href']), str(pic.img['src'])) 
			
 
				-            print('>>>>>>>>>>>>>>> %s' % html)
			
 
				+            #print('>>>>>>>>>>>>>>> %s' % html)
			
 
				             return BeautifulSoup(html) 
			
 
				         except Exception as e:
			
 
				             print (e)
			
--- a/calibre/recipe/upkindle.sh
+++ b/calibre/recipe/upkindle.sh
@@ -5,11 +5,6 @@ RECIPEPATH=$ROOTPATH/recipe
 
				 MOBIPATH=$ROOTPATH/daily_news/oriental
			
 
				 OPTIONS="--output-profile kindle_pw"
			
 
				 echo $RECIPEPATH
			
 
				-#
			
 
				-# download oriental daily
			
 
				-#
			
 
				-#ebook-convert "$RECIPEPATH/oriental_daily_pure.recipe" $MOBIPATH/$TODAY-orient.mobi $OPTIONS
			
 
				-
			
 
				 #
			
 
				 #  download news and save output to epub
			
 
				 #