larry1chan@qq.com преди 11 месеца
родител
ревизия
cb4dccadad

+ 4 - 0
calibre-cron/.env

@@ -0,0 +1,4 @@
+CALIBRE_NAME=calibre-cron
+CALIBRE_RECIPE=~/projects/gog/Dockers_real/calibre-cron/recipe
+CALIBRE_CRON=~/projects/gog/Dockers_real/calibre-cron/crond
+NEWS_PATH=~/projects/gog/Dockers_real/calibre-cron/news

+ 29 - 0
calibre-cron/build/Dockerfile

@@ -0,0 +1,29 @@
+# Use the official Ubuntu as the base image
+FROM ubuntu:latest
+
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    cron \
+    calibre \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create a directory for Calibre library
+RUN mkdir -p /calibre/library
+
+# Copy your cron job file into the container
+COPY crontab /etc/cron.d/calibre-cron
+
+# Give execution rights to the cron job
+RUN chmod 0644 /etc/cron.d/calibre-cron
+
+# Apply the cron job
+RUN crontab /etc/cron.d/calibre-cron
+
+# Create the log file (optional, for debugging)
+RUN touch /var/log/calibre-cron.log
+
+# Set the working directory
+WORKDIR /calibre
+
+# Start cron in the foreground
+CMD ["cron", "-f"]

+ 2 - 0
calibre-cron/build/crontab

@@ -0,0 +1,2 @@
+# Run a script every minute
+* * * * * root echo "Hello, World!" >> /var/log/cron.log 2>&1

+ 4 - 0
calibre-cron/crond/calibre-cron

@@ -0,0 +1,4 @@
+# Run a script every minute
+*/5 * * * *  echo "Hello, World Wide !" >> /var/log/cron.log 2>&1
+#*/3 * * * *  /calibre/recipe/upkindle.sh >> /var/log/upkindle.log 2>&1
+0 5  * * *  /calibre/recipe/upkindle.sh >> /var/log/upkindle.log 2>&1

+ 20 - 0
calibre-cron/docker-compose.yml

@@ -0,0 +1,20 @@
+---
+services:
+
+  calibre-cron:
+    image: calibre-cron:latest
+    container_name: ${CALIBRE_NAME}
+    environment:
+      - PUID=1000
+      - PGID=1000
+      - TZ=Asia/Hong_Kong
+    volumes:
+      - ${CALIBRE_CRON}:/etc/cron.d
+      - ${CALIBRE_RECIPE}:/calibre/recipe 
+      - ${NEWS_PATH}:/news
+    ports:
+      - 8780:8080
+      - 8781:8081
+    restart: "no"
+
+

BIN
calibre-cron/news/20250207-orient.epub


+ 5 - 0
calibre-cron/recipe/docker_refreshcron.sh

@@ -0,0 +1,5 @@
+#!/bin/sh
+#
+# run this script everytime you changed the cron file 
+# to make the changes take effect
+docker exec -it  calibre-cron crontab /etc/cron.d/calibre-cron

+ 135 - 0
calibre-cron/recipe/oriental_daily_pure.recipe

@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+import string, re
+#import HTMLParser 
+from html.parser import HTMLParser
+from calibre import strftime
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class OrientalDailyPure(BasicNewsRecipe):
+
+    title       = 'Oriental Daily - '  + time.strftime('%d %b %Y')
+    __author__  = 'Larry Chan'
+    description = 'Oriental Daily, Hong Kong'
+    publication_type = 'newspaper'
+    language    = 'zh'
+    timefmt = ' [%a, %d %b, %Y]'
+    masthead_url = 'http://orientaldaily.on.cc/img/v2/logo_odn.png'
+    #cover_url = 'http://orientaldaily.on.cc/cnt/news/' + time.strftime('%Y%m%d') + '/photo/' + time.strftime('%m%d') + '-00174-001k1.jpg'
+    cover_url = 'https://orientaldaily.on.cc/asset/main/%s/photo/337_sectMain.jpg' % time.strftime('%Y%m%d')
+    #print ("cover %s" % cover_url)
+    delay = 0
+
+    no_stylesheets = True
+    extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+   
+
+#    keep_only_tags    = [
+#                       dict(name='h1'),
+#                       dict(name='a'),                                  
+#                       dict(name='img'),                                  
+#                       dict(name='div'),                                  
+#                       dict(attrs={'div': 'content'})                                  
+#                        ]
+
+    #dict(name='p', attrs={'class':['photoCaption','paragraph']})
+    #remove_tags = [dict(name=['script', 'input'])]
+    HTMLParser.attrfind = re.compile(
+                        r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
+                        r'(\'[^\']*\'|"[^"]*"|[^\s>^\[\]{}\|\'\"]*))?') 
+    
+
+
+
+    
+    def parse_index(self):
+
+
+       	def extract_text(tag):
+            return str(tag.contents[0]).replace('<em>', '').replace('</em>', '')
+ 
+
+        
+        def scrap_feed(feed):
+            f_url = '%s%s' % (urlRoot, feed[0])
+            print ('feed url %s ' % f_url)
+            soup = self.index_to_soup(f_url)
+            # verify a section is available for download on the day this script is run.
+            # skip a section if unavailable   
+            # for instance, finance section is unavailable on Sunday, so is "lifestyle"
+            try:
+               articles = soup.findAll('div', 'sectionList')[0].findAll('li')
+            except:
+               print ('--- this section [%s] is not available today ---' % feed[1]) 
+               raise Exception ('--- this section [%s] is not available today ---' % feed[1]) 
+
+
+ 		
+
+            articles = map(lambda x:{'url': '%s%s' % (urlRoot, x.a['href']), 
+                            'title': x.findAll('div', attrs={'class' : 'text'})[0].text, 
+                            'date': strftime('%a, %d %b'),
+                            'description': x.findAll('div', attrs={'class' : 'text'})[0].text,
+                            'content': ''}, articles)
+            ans = []
+            for article in articles:
+                ans.append(article)    
+            return ans 
+               
+
+        urlRoot = 'https://orientaldaily.on.cc'
+        url = urlRoot 
+        soup = self.index_to_soup(url)
+        #lookups = ['news', 'china_world', 'finance', 'lifestyle', 'sport']
+        lookups = ['news', 'china_world', 'finance', 'entertainment', 'lifestyle', 'adult', 'sport']
+        # no finanical news on Sunday
+        #if time.strftime('%w') == '0':
+        #   lookups.remove('finance') 
+
+        feeds = soup.findAll('ul', 'menuList clear')[0].findAll('li', attrs={'section':lookups})
+        feeds = map(lambda x: (x.a['href'], x.text), feeds)
+        feeds = list(feeds)
+
+        print ('----------------------- The feeds are: %s' % feeds)
+        ans = []
+        for e in feeds:
+            try:
+               print ('e[1] is: %s | %s\n' % (e[1], e[0]))
+               ans.append((e[1], scrap_feed(e)))
+            except Exception as e:
+               print('while processing feed: %s' % e) 
+               continue
+
+        print ('############')
+        print (ans)
+        return ans
+  
+
+
+
+    
+
+    def preprocess_html(self, soup):
+         
+        print('((((( begin article ))))')
+        try:
+            #print(soup)
+            html = str(soup.find('h1'))  + ''.join(str(t) for t in soup.findAll('div', 'content'))	
+            # download photo
+            pic = soup.find('div', 'paragraph photoParagraph')
+            #print (pic)
+            if pic != None:
+               html += '<a href="%s"><img src="%s"></img></a>' % (str(pic.a['href']), str(pic.img['src'])) 
+            #print('>>>>>>>>>>>>>>> %s' % html)
+            return BeautifulSoup(html) 
+        except Exception as e:
+            print (e)
+            print('other article...')	
+        print('((((( end article ))))')
+        return soup 
+
+
+    def get_browser(self, *args, **kwargs):
+        br = BasicNewsRecipe.get_browser(self)
+        br.set_header('User-Agent', value='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36') 
+        return br

+ 29 - 0
calibre-cron/recipe/upkindle.sh

@@ -0,0 +1,29 @@
+#!/bin/sh
+TODAY=`date +"%Y%m%d"`
+ROOTPATH=/calibre
+RECIPEPATH=$ROOTPATH/recipe
+MOBIPATH=/news
+OPTIONS="--output-profile kindle_pw"
+echo $RECIPEPATH
+#
+#
+#  convert epub to mobi 
+#
+ebook-convert "$RECIPEPATH/oriental_daily_pure.recipe" $MOBIPATH/$TODAY-orient.mobi $OPTIONS
+#
+#
+#  download news and save output to epub
+#
+#
+ebook-convert $MOBIPATH/$TODAY-orient.mobi $MOBIPATH/$TODAY-orient.epub 
+#
+#
+#  send book to kindle
+#
+calibre-smtp  --subject "oriental news $TODAY" --attachment $MOBIPATH/$TODAY-orient.epub --relay hwsmtp.exmail.qq.com --port 465 --username vortify-lc@algometic.com --password "adverS@1e" --encryption-method SSL vortify-lc@algometic.com larry1chan11@kindle.com ""
+#calibre-smtp  --subject "oriental news $TODAY" --attachment $MOBIPATH/$TODAY-orient.epub --relay hwsmtp.exmail.qq.com --port 465 --username vortify-lc@algometic.com --password "ins@neS00n" --encryption-method SSL vortify-lc@algometic.com larry1chan11@kindle.com ""
+
+#
+#  rm mobi file
+#
+rm $MOBIPATH/$TODAY-orient.mobi

+ 24 - 0
calibre-cron/recipe/upkindle.sh.bak

@@ -0,0 +1,24 @@
+#!/bin/sh
+TODAY=`date +"%Y%m%d"`
+ROOTPATH=/config
+RECIPEPATH=$ROOTPATH/recipe
+MOBIPATH=$ROOTPATH/daily_news/oriental
+OPTIONS="--output-profile kindle_pw"
+echo $RECIPEPATH
+#
+#  download news and save output to epub
+#
+#ebook-convert "$RECIPEPATH/oriental_daily_pure.recipe" $MOBIPATH/$TODAY-orient.epub 
+#
+#  convert epub to mobi 
+#
+#ebook-convert $MOBIPATH/$TODAY-orient.epub $MOBIPATH/$TODAY-orient.mobi $OPTIONS
+#
+#  send book to kindle
+#
+#calibre-smtp  --attachment $MOBIPATH/$TODAY-orient.mobi --relay smtp.gmail.com --port 587 --username cigarbar@gmail.com --password "on2next1" --encryption-method TLS cigarbar@gmail.com larry1chan@kindle.cn ""
+calibre-smtp  --attachment $MOBIPATH/$TODAY-orient.epub   --relay hwsmtp.exmail.qq.com --port 465 --username sales@algometic.com --password "y6t8Fk^" --encryption-method SSL sales@algometic.com larry1chan@kindle.cn ""
+#
+#  rm mobi file
+#
+#rm $MOBIPATH/$TODAY-orient.mobi

+ 0 - 0
calibre-cron/scripts/app-orig.cb


BIN
calibre-cron/scripts/app-orig.db


BIN
calibre-cron/scripts/app.db


BIN
calibre-cron/scripts/app.db.NAS


BIN
calibre-cron/scripts/app.db.clean


+ 138 - 0
calibre-cron/scripts/migratedb.py

@@ -0,0 +1,138 @@
+import sqlite3
+import json
+from collections import namedtuple
+
+Column = namedtuple('Column', ['name', 'type', 'notnull', 'dflt_value'])
+ErrorReport = {
+    'schema_errors': [],
+    'data_errors': []
+}
+
+def get_tables(conn):
+    cursor = conn.cursor()
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
+    return [row[0] for row in cursor.fetchall()]
+
+def get_columns(conn, table_name):
+    cursor = conn.execute(f"PRAGMA table_info('{table_name}')")
+    columns = []
+    for row in cursor:
+        columns.append(Column(row[1], row[2], bool(row[3]), row[4]))
+    return columns
+
+def compare_and_alter_schema(source_conn, target_conn):
+    source_tables = get_tables(source_conn)
+    target_tables = get_tables(target_conn)
+
+    for table in source_tables:
+        if table not in target_tables:
+            try:
+                create_stmt = source_conn.execute(
+                    "SELECT sql FROM sqlite_master WHERE type='table' AND name=?;",
+                    (table,)
+                ).fetchone()[0]
+                target_conn.execute(create_stmt)
+                target_conn.commit()
+            except Exception as e:
+                ErrorReport['schema_errors'].append(f"Failed to create table {table}: {str(e)}")
+            continue
+
+        source_cols = get_columns(source_conn, table)
+        target_cols = get_columns(target_conn, table)
+        target_col_names = [col.name for col in target_cols]
+
+        for col in source_cols:
+            if col.name not in target_col_names:
+                try:
+                    alter_sql = f'ALTER TABLE "{table}" ADD COLUMN "{col.name}" {col.type}'
+                    if col.notnull:
+                        alter_sql += ' NOT NULL'
+                    if col.dflt_value is not None:
+                        alter_sql += f' DEFAULT {col.dflt_value}'
+                    target_conn.execute(alter_sql)
+                    target_conn.commit()
+                except Exception as e:
+                    ErrorReport['schema_errors'].append(
+                        f"Table {table}: Failed to add column {col.name}: {str(e)}"
+                    )
+
+        for tcol in target_cols:
+            if tcol.name not in [col.name for col in source_cols]:
+                if tcol.notnull and tcol.dflt_value is None:
+                    ErrorReport['schema_errors'].append(
+                        f"Table {table}: Target column '{tcol.name}' is NOT NULL with no default but missing in source"
+                    )
+
+def generate_data_migration_script(source_conn, filename='migration_script.sql'):
+    with open(filename, 'w') as f:
+        tables = get_tables(source_conn)
+        for table in tables:
+            cols = get_columns(source_conn, table)
+            col_names = [f'"{col.name}"' for col in cols]
+            cols_str = ', '.join(col_names)
+            f.write(f'INSERT INTO "{table}" ({cols_str})\n')
+            f.write(f'SELECT {cols_str} FROM source_db."{table}";\n\n')
+
+def migrate_data(source_conn, target_conn):
+    source_tables = get_tables(source_conn)
+    
+    for table in source_tables:
+        try:
+            source_cols = get_columns(source_conn, table)
+            col_names = [col.name for col in source_cols]
+            placeholders = ', '.join(['?'] * len(col_names))
+            insert_sql = f'INSERT INTO "{table}" ({", ".join(col_names)}) VALUES ({placeholders})'
+            
+            source_cur = source_conn.cursor()
+            source_cur.execute(f'SELECT * FROM "{table}"')
+            
+            target_cur = target_conn.cursor()
+            
+            while True:
+                rows = source_cur.fetchmany(100)
+                if not rows:
+                    break
+                
+                for row in rows:
+                    try:
+                        target_cur.execute(insert_sql, row)
+                        target_conn.commit()
+                    except sqlite3.Error as e:
+                        ErrorReport['data_errors'].append({
+                            'table': table,
+                            'row_data': row,
+                            'error': str(e)
+                        })
+                        target_conn.rollback()
+        except sqlite3.Error as e:
+            ErrorReport['data_errors'].append({
+                'table': table,
+                'error': f"General migration error: {str(e)}"
+            })
+
+def save_error_report(filename='migration_errors.json'):
+    with open(filename, 'w') as f:
+        json.dump(ErrorReport, f, indent=2)
+
+def main(source_db, target_db):
+    source_conn = sqlite3.connect(source_db)
+    target_conn = sqlite3.connect(target_db)
+    
+    # Attach source DB to target connection for SQL script generation
+    target_conn.execute(f"ATTACH DATABASE '{source_db}' AS source_db")
+    
+    compare_and_alter_schema(source_conn, target_conn)
+    generate_data_migration_script(source_conn)
+    migrate_data(source_conn, target_conn)
+    save_error_report()
+    
+    source_conn.close()
+    target_conn.close()
+
+if __name__ == '__main__':
+    import sys
+    if len(sys.argv) != 3:
+        print("Usage: python migrate_db.py <source.db> <target.db>")
+        sys.exit(1)
+    
+    main(sys.argv[1], sys.argv[2])

+ 85 - 0
calibre-cron/scripts/migration_errors.json

@@ -0,0 +1,85 @@
+{
+  "schema_errors": [],
+  "data_errors": [
+    {
+      "table": "settings",
+      "row_data": [
+        1,
+        "smtp.exmail.qq.com",
+        465,
+        2,
+        "vortify-lc@algometic.com",
+        "adverS@1e",
+        "vortify-lc@algometic.com",
+        "/books/my_books",
+        8083,
+        "",
+        "",
+        "Calibre-Web",
+        60,
+        4,
+        0,
+        "^(A|The|An|Der|Die|Das|Den|Ein|Eine|Einen|Dem|Des|Einem|Eines)\\s+",
+        20,
+        1,
+        1,
+        0,
+        0,
+        4607,
+        "",
+        0,
+        null,
+        "{}",
+        0,
+        0,
+        "",
+        "",
+        "",
+        "",
+        0,
+        "",
+        "",
+        "/usr/bin/unrar",
+        0,
+        1,
+        0,
+        "",
+        0,
+        "",
+        "",
+        0,
+        "",
+        "",
+        0,
+        0,
+        "localhost",
+        389,
+        "ldap",
+        "cn=admin,dc=example,dc=org",
+        "",
+        0,
+        0,
+        0,
+        "",
+        "dc=example,dc=org",
+        "uid=%s",
+        0,
+        0,
+        "",
+        0,
+        26214400,
+        0,
+        "{}",
+        8083,
+        0,
+        0,
+        0,
+        "",
+        "",
+        "",
+        "(&(objectclass=posixGroup)(cn=%s))",
+        "memberUid",
+        "calibreweb",
+        "",
+        "m4b,doc,cbt,mobi,pdf,rtf,m4a,fb2,cbz,ogg,flac,lit,epub,cbr,wav,html,opus,txt,azw,docx,mp3,kepub,mp4,djvu,odt,azw3,prc",
+        

Файловите разлики са ограничени, защото са твърде много
+ 0 - 0
calibre-cron/scripts/migration_script.sql


Някои файлове не бяха показани, защото твърде много файлове са промени