4 лет назад · 546a15ac3e
--- a/calibre/recipe/oriental_daily_pure.recipe
+++ b/calibre/recipe/oriental_daily_pure.recipe
@@ -49,47 +49,23 @@ class OrientalDailyPure(BasicNewsRecipe):
 
															             return str(tag.contents[0]).replace('<em>', '').replace('</em>', '')
														
 
															-
														
 
															-        def old_scrap_feed(feed):
														
 
															-            f_url = '%s%s' % (urlRoot, feed[0])
														
 
															-            print (f_url)
														
 
															-            #f_html = urlopen(f_url).read()
														
 
															-            sf = self.index_to_soup(f_url)
														
 
															-            # extract all h2 headlines
														
 
															-            l_h2 = map(lambda x: extract_text(x), sf.findAll('h2'))
														
 
															-            l_h2 = list(l_h2)[:len(list(l_h2))-2]
														
 
															-            
														
 
															-            # for each headline, look for the feed title and feed url
														
 
															-             
														
 
															-#	    print '--------------------'
														
 
															-            print ('LEVEL H2: %s' % l_h2)
														
 
															-#            for hl in l_h2:
														
 
															-#                print 'h1: ' + hl
														
 
															-#                print sf.findAll('ul', {'title': hl })[0].findAll('li')
														
 
															-            l_feed = map(lambda x: sf.findAll('ul', {'title': x })[0].findAll('li'), l_h2)
														
 
															-            print ('-----------l_feed')
														
 
															-            l_feed = [item  for sublist in l_feed for item in sublist]
														
 
															-            print (l_feed[0])
														
 
															-            print ('------END -----l_feed')
														
 
															-#            l_feed = map(lambda x: {'url': '%s%s' % (urlRoot, x[0].a['href']), 'title': extract_text(x[0].a),
														
 
															-#                                    'date': strftime('%a, %d %b'), 
														
 
															-#                                    'description': '%s' % (extract_text(x[0].a)),
														
 
															-#                                    'content': ''}, l_feed)
														
 
															-            l_feed = map(lambda x: {'url': '%s%s' % (urlRoot, x.a['href']), 'title': extract_text(x.a),
														
 
															-                                    'date': strftime('%a, %d %b'), 
														
 
															-                                    'description': '%s' % (extract_text(x.a)),
														
 
															-                                    'content': ''}, l_feed)
														
 
															-            print ('****************************')
														
 
															-            
														
 
															-            l_feed = list(l_feed)
														
 
															-            print (list(l_feed))
														
 
															-            return l_feed
														
 
															         def scrap_feed(feed):
														
 
															             f_url = '%s%s' % (urlRoot, feed[0])
														
 
															-            print (f_url)
														
 
															+            print ('feed url %s ' % f_url)
														
 
															             soup = self.index_to_soup(f_url)
														
 
															-            articles = soup.findAll('div', 'sectionList')[0].findAll('li')
														
 
															+            # verify a section is available for download on the day this script is run.
														
 
															+            # skip a section if unavailable   
														
 
															+            # for instance, finance section is unavailable on Sunday, so is "lifestyle"
														
 
															+            try:
														
 
															+               articles = soup.findAll('div', 'sectionList')[0].findAll('li')
														
 
															+            except:
														
 
															+               print ('--- this section [%s] is not available today ---' % feed[1]) 
														
 
															+               raise Exception ('--- this section [%s] is not available today ---' % feed[1]) 
														
 
															+
														
 
															+
														
 
															+ 		
														
 
															+
														
 
															             articles = map(lambda x:{'url': '%s%s' % (urlRoot, x.a['href']), 
														
 
															                             'title': x.findAll('div', attrs={'class' : 'text'})[0].text, 
														
 
															                             'date': strftime('%a, %d %b'),
														
@@ -102,15 +78,13 @@ class OrientalDailyPure(BasicNewsRecipe):
 
															         urlRoot = 'https://orientaldaily.on.cc'
														
 
															-        #url = '%s/cnt/news/%s/index.html' % (urlRoot, time.strftime('%Y%m%d'))
														
 
															         url = urlRoot 
														
 
															-        #url = '%s/cnt/news/%s/index.html' % (urlRoot, '20201127')
														
 
															         soup = self.index_to_soup(url)
														
 
															-        #lookups = ['要聞港聞','兩岸國際','財經','娛樂','副刊','男極圈','體育','馬經','波經','社論專欄','慈善基金','昔日東方']
														
 
															-        lookups = ['news', 'china_world', 'finance', 'lifestyle', 'sport']
														
 
															+        #lookups = ['news', 'china_world', 'finance', 'lifestyle', 'sport']
														
 
															+        lookups = ['news', 'china_world', 'finance', 'entertainment', 'lifestyle', 'adult', 'sport']
														
 
															         # no finanical news on Sunday
														
 
															-        if time.strftime('%w') == '0':
														
 
															-           lookups.remove('finance') 
														
 
															+        #if time.strftime('%w') == '0':
														
 
															+        #   lookups.remove('finance') 
														
 
															         feeds = soup.findAll('ul', 'menuList clear')[0].findAll('li', attrs={'section':lookups})
														
 
															         feeds = map(lambda x: (x.a['href'], x.text), feeds)
														
@@ -119,8 +93,13 @@ class OrientalDailyPure(BasicNewsRecipe):
 
															         print ('----------------------- The feeds are: %s' % feeds)
														
 
															         ans = []
														
 
															         for e in feeds:
														
 
															-            print ('e[1] is: %s | %s\n' % (e[1], e[0]))
														
 
															-            ans.append((e[1], scrap_feed(e)))
														
 
															+            try:
														
 
															+               print ('e[1] is: %s | %s\n' % (e[1], e[0]))
														
 
															+               ans.append((e[1], scrap_feed(e)))
														
 
															+            except Exception as e:
														
 
															+               print('while processing feed: %s' % e) 
														
 
															+               continue
														
 
															+
														
 
															         print ('############')
														
 
															         print (ans)
														
 
															         return ans
														
@@ -141,7 +120,7 @@ class OrientalDailyPure(BasicNewsRecipe):
 
															             #print (pic)
														
 
															             if pic != None:
														
 
															                html += '<a href="%s"><img src="%s"></img></a>' % (str(pic.a['href']), str(pic.img['src'])) 
														
 
															-            print('>>>>>>>>>>>>>>> %s' % html)
														
 
															+            #print('>>>>>>>>>>>>>>> %s' % html)
														
 
															             return BeautifulSoup(html) 
														
 
															         except Exception as e:
														
 
															             print (e)
														
--- a/calibre/recipe/upkindle.sh
+++ b/calibre/recipe/upkindle.sh
@@ -5,11 +5,6 @@ RECIPEPATH=$ROOTPATH/recipe
 
															 MOBIPATH=$ROOTPATH/daily_news/oriental
														
 
															 OPTIONS="--output-profile kindle_pw"
														
 
															 echo $RECIPEPATH
														
 
															-#
														
 
															-# download oriental daily
														
 
															-#
														
 
															-#ebook-convert "$RECIPEPATH/oriental_daily_pure.recipe" $MOBIPATH/$TODAY-orient.mobi $OPTIONS
														
 
															-
														
 
															 #
														
 
															 #  download news and save output to epub
														
 
															 #