Prefetch Manager

Progress:0/0
Success
0
Failed
0
Pending
0
Time
0s
Success URLs(0)
Failed URLs(0)
Pending URLs(0)

Auto Post Blogger to Channel Telegram V3

KODE DIBAWAH ITU PYTHON YA, DI UPLOAD KE https://www.pythonanywhere.com/user/hanhaoyu/files/home/hanhaoyu/auto_post_blogger_by_hanhaoyu_v3.py?edit.
import feedparser
import requests
import time
import json
import os
import re
from datetime import datetime, timezone
from bs4 import BeautifulSoup
import logging
from urllib.parse import urljoin, urlparse

# ===========================================
# KONFIGURASI LENGKAP
# ===========================================

# Konfigurasi Website RSS
LinkWebsiteRSSbyHHC = "https://www.hanhaoyu.com/feeds/posts/default?alt=rss"
ShowJudulRSSbyHHC = "true"
ShowLinkJudulRSSbyHHC = "true"
ShowDatePublicationRSSbyHHC = "true"
ShowAuthorRSSbyHHC = "true"
ShowLabelRSSbyHHC = "true"
ShowSummaryRSSbyHHC = "true"
ShowImageRSSbyHHC = "true"

# Konfigurasi Telegram Bot
TokenBotTelegrambyHHC = "7163346072:AAEP_G38vgoR5Bq2jtsVEq2B6qOIKRK5oMo"
ChannelIDTelegrambyHHC = "-1002417488695"  # atau -1001234567890
AdminIDTelegrambyHHC = "1090870321"  # untuk notifikasi error

# Konfigurasi Format Pesan
CustomPrefixMessagebyHHC = "🚀 POST TERBARU!\n\n"
CustomSuffixMessagebyHHC = "\n\n#BlogPost #Update"
MaxLengthSummarybyHHC = 200
MaxLengthTitlebyHHC = 100

# Konfigurasi Waktu & Interval
IntervalCheckRSSbyHHC = 300  # detik (5 menit)
TimezonebyHHC = "Asia/Jakarta"
FormatDatebyHHC = "%d/%m/%Y %H:%M WIB"

# Konfigurasi File & Database
DatabaseFilebyHHC = "posted_articles.json"
LogFilebyHHC = "rss_uploader.log"
MaxLogSizebyHHC = 5242880  # 5MB

# Konfigurasi Error Handling
MaxRetriesbyHHC = 3
RetryDelaybyHHC = 5  # detik
EnableErrorNotificationbyHHC = "true"

# Konfigurasi Gambar
DefaultImagebyHHC = "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgcVcG9bmlJ5phU07kzvObOSyQSOUYrrtv9aIthIBrGzFTZ1vXYhFkMEZ1es_9hya8qk-_CC9-JRLLjsq9xVcE2iciPUeiMO22uzEsD_QvtKgv8UZawWGMSOdhCQ78QiArL1-hReTCSKbbuVHeFLnO1D099KonBvTfPpVB1JyttGiZOIg_oLZ4jVoUBdAE/s1080/thumnail%20x%20hanhaoyu.png"
CompressImagebyHHC = "true"
MaxImageSizebyHHC = 1048576  # 1MB

# Konfigurasi Filtering
ExcludeKeywordsbyHHC = ["", ""]  # kata kunci yang akan difilter
MinWordCountbyHHC = 2  # minimal kata dalam summary

# ===========================================
# SETUP LOGGING
# ===========================================

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(LogFilebyHHC, encoding='utf-8'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# ===========================================
# FUNGSI UTILITY
# ===========================================

def load_posted_articles():
    """Load daftar artikel yang sudah dipost"""
    try:
        if os.path.exists(DatabaseFilebyHHC):
            with open(DatabaseFilebyHHC, 'r', encoding='utf-8') as f:
                return json.load(f)
        return []
    except Exception as e:
        logger.error(f"Error loading posted articles: {e}")
        return []

def save_posted_articles(articles):
    """Save daftar artikel yang sudah dipost"""
    try:
        with open(DatabaseFilebyHHC, 'w', encoding='utf-8') as f:
            json.dump(articles, f, ensure_ascii=False, indent=2)
        logger.info(f"Database updated with {len(articles)} articles")
    except Exception as e:
        logger.error(f"Error saving posted articles: {e}")

def clean_html(html_content):
    """Bersihkan HTML dan ambil text saja"""
    if not html_content:
        return ""

    soup = BeautifulSoup(html_content, 'html.parser')
    # Hapus script dan style elements
    for script in soup(["script", "style"]):
        script.decompose()

    text = soup.get_text()
    # Bersihkan whitespace berlebihan
    text = ' '.join(text.split())
    return text

def extract_first_image(content):
    """Extract gambar pertama dari konten"""
    if not content:
        return None

    soup = BeautifulSoup(content, 'html.parser')
    img_tag = soup.find('img')

    if img_tag and img_tag.get('src'):
        img_url = img_tag.get('src')
        # Handle relative URLs
        if img_url.startswith('//'):
            img_url = 'https:' + img_url
        elif img_url.startswith('/'):
            base_url = urlparse(LinkWebsiteRSSbyHHC).netloc
            img_url = f'https://{base_url}{img_url}'
        return img_url

    return None

def truncate_text(text, max_length):
    """Potong text dengan panjang maksimal"""
    if len(text) <= max_length:
        return text
    return text[:max_length-3] + "..."

def format_date(date_string):
    """Format tanggal sesuai konfigurasi"""
    try:
        # Parse tanggal dari RSS (berbagai format)
        if '+' in date_string or 'GMT' in date_string or 'UTC' in date_string:
            # Format dengan timezone
            try:
                dt = datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z")
            except:
                # Coba format lain
                dt = datetime.strptime(date_string.replace('GMT', '+0000'), "%a, %d %b %Y %H:%M:%S %z")
        else:
            # Format tanpa timezone
            dt = datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S")
            dt = dt.replace(tzinfo=timezone.utc)

        # Format sesuai konfigurasi
        return dt.strftime(FormatDatebyHHC)
    except Exception as e:
        logger.warning(f"Date parsing error: {e}, using original: {date_string}")
        return date_string

def should_filter_post(title, summary):
    """Cek apakah post harus difilter"""
    text_to_check = f"{title} {summary}".lower()

    # Cek kata kunci yang dikecualikan
    for keyword in ExcludeKeywordsbyHHC:
        if keyword.lower() in text_to_check:
            return True

    # Cek minimal jumlah kata
    word_count = len(summary.split()) if summary else 0
    if word_count < MinWordCountbyHHC:
        return True

    return False

def send_telegram_message(message, image_url=None):
    """Kirim pesan ke Telegram dengan retry mechanism"""
    for attempt in range(MaxRetriesbyHHC):
        try:
            url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}"

            if image_url and ShowImageRSSbyHHC.lower() == "true":
                # Kirim dengan gambar
                endpoint = f"{url}/sendPhoto"
                data = {
                    'chat_id': ChannelIDTelegrambyHHC,
                    'photo': image_url,
                    'caption': message,
                    'parse_mode': 'HTML'
                }
                logger.info(f"Sending photo message (attempt {attempt+1})")
            else:
                # Kirim text saja
                endpoint = f"{url}/sendMessage"
                data = {
                    'chat_id': ChannelIDTelegrambyHHC,
                    'text': message,
                    'parse_mode': 'HTML',
                    'disable_web_page_preview': False
                }
                logger.info(f"Sending text message (attempt {attempt+1})")

            response = requests.post(endpoint, data=data, timeout=30)
            logger.info(f"Telegram API response: {response.status_code}")

            if response.status_code == 200:
                result = response.json()
                if result.get('ok'):
                    logger.info("Message sent successfully!")
                    return result
                else:
                    logger.error(f"Telegram API error: {result}")
            else:
                logger.error(f"HTTP error: {response.status_code}, {response.text}")

            response.raise_for_status()

        except requests.exceptions.RequestException as e:
            logger.error(f"Request error (attempt {attempt+1}): {e}")
            if attempt < MaxRetriesbyHHC - 1:
                time.sleep(RetryDelaybyHHC)
        except Exception as e:
            logger.error(f"Unexpected error sending message (attempt {attempt+1}): {e}")
            if attempt < MaxRetriesbyHHC - 1:
                time.sleep(RetryDelaybyHHC)

    # Jika semua retry gagal
    logger.error("All retry attempts failed")
    if EnableErrorNotificationbyHHC.lower() == "true":
        send_error_notification(f"Failed to send message after {MaxRetriesbyHHC} attempts")
    return None

def send_error_notification(error_message):
    """Kirim notifikasi error ke admin"""
    try:
        url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}/sendMessage"
        data = {
            'chat_id': AdminIDTelegrambyHHC,
            'text': f"🚨 RSS Uploader Error:\n{error_message}",
            'parse_mode': 'HTML'
        }
        response = requests.post(url, data=data, timeout=10)
        logger.info(f"Error notification sent: {response.status_code}")
    except Exception as e:
        logger.error(f"Failed to send error notification: {e}")

def create_telegram_message(entry):
    """Buat format pesan untuk Telegram"""
    message_parts = []

    # Prefix custom
    if CustomPrefixMessagebyHHC:
        message_parts.append(CustomPrefixMessagebyHHC.strip())

    # Judul
    if ShowJudulRSSbyHHC.lower() == "true":
        title = truncate_text(entry.get('title', ''), MaxLengthTitlebyHHC)
        if ShowLinkJudulRSSbyHHC.lower() == "true":
            message_parts.append(f"📝 {title}")
        else:
            message_parts.append(f"📝 {title}")

    # Link baca selengkapnya (jika judul tidak di-link)
    if ShowLinkJudulRSSbyHHC.lower() != "true":
        message_parts.append(f"🔗 Baca selengkapnya")

    # Tanggal publikasi
    if ShowDatePublicationRSSbyHHC.lower() == "true" and entry.get('published'):
        formatted_date = format_date(entry.get('published'))
        message_parts.append(f"🕒 Dipublikasikan: {formatted_date}")

    # Penulis
    if ShowAuthorRSSbyHHC.lower() == "true" and entry.get('author'):
        message_parts.append(f"✍️ Penulis: {entry.get('author')}")

    # Label/Kategori
    if ShowLabelRSSbyHHC.lower() == "true" and entry.get('tags'):
        if isinstance(entry.get('tags'), list) and entry.get('tags'):
            tags = ', '.join([tag.get('term', '') if isinstance(tag, dict) else str(tag) for tag in entry.get('tags', [])])
            if tags:
                message_parts.append(f"📌 Label: {tags}")

    # Summary
    if ShowSummaryRSSbyHHC.lower() == "true" and entry.get('summary'):
        summary = truncate_text(entry.get('summary'), MaxLengthSummarybyHHC)
        message_parts.append(f"📝 Ringkasan: {summary}")

    # Suffix custom
    if CustomSuffixMessagebyHHC:
        message_parts.append(CustomSuffixMessagebyHHC.strip())

    return '\n\n'.join(message_parts)

def process_rss_feed():
    """Proses RSS feed dan kirim ke Telegram"""
    try:
        logger.info("=== Memulai proses RSS feed ===")
        logger.info(f"RSS URL: {LinkWebsiteRSSbyHHC}")

        # Parse RSS feed dengan user agent dan proxy bypass
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'application/rss+xml, application/xml, text/xml',
            'Accept-Language': 'en-US,en;q=0.9',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1'
        }

        # Bypass proxy dengan session dan multiple attempts
        session = requests.Session()
        session.trust_env = False  # Disable proxy from environment

        # Try multiple methods to get RSS
        rss_urls = [
            LinkWebsiteRSSbyHHC,
            LinkWebsiteRSSbyHHC.replace('https://', 'http://'),  # Try HTTP
            "https://www.hanhaoyu.com/feeds/posts/default",  # Alternative RSS URL
            "http://www.hanhaoyu.com/feeds/posts/default"
        ]

        response = None
        for url in rss_urls:
            try:
                logger.info(f"Trying RSS URL: {url}")
                response = session.get(url, headers=headers, timeout=30, verify=False)
                if response.status_code == 200:
                    logger.info(f"Successfully connected to: {url}")
                    break
            except Exception as e:
                logger.warning(f"Failed to connect to {url}: {e}")
                continue

        if not response or response.status_code != 200:
            raise Exception("Unable to connect to any RSS URL")

        response.raise_for_status()

        # Parse dengan feedparser
        feed = feedparser.parse(response.content)

        logger.info(f"RSS parsed successfully. Found {len(feed.entries)} entries")

        if feed.bozo:
            logger.warning(f"RSS feed parsing warning: {feed.bozo_exception}")

        if not feed.entries:
            logger.warning("No entries found in RSS feed")
            return

        # Load artikel yang sudah dipost
        posted_articles = load_posted_articles()
        posted_links = [article['link'] for article in posted_articles]
        logger.info(f"Database loaded: {len(posted_articles)} previously posted articles")

        new_articles = []

        # Proses setiap entry (mulai dari yang terbaru)
        for i, entry in enumerate(feed.entries):
            try:
                logger.info(f"Processing entry {i+1}/{len(feed.entries)}: {entry.get('title', 'No Title')}")

                # Cek apakah sudah dipost
                if entry.link in posted_links:
                    logger.info(f"  -> Already posted, skipping")
                    continue

                # Ekstrak data
                title = entry.get('title', 'No Title')
                link = entry.get('link', '')
                published = entry.get('published', '')
                author = entry.get('author', 'Unknown')

                # Ekstrak summary
                summary = ''
                if hasattr(entry, 'summary'):
                    summary = clean_html(entry.summary)
                elif hasattr(entry, 'description'):
                    summary = clean_html(entry.description)
                elif hasattr(entry, 'content'):
                    # Ambil dari content jika ada
                    for content in entry.content:
                        summary = clean_html(content.value)
                        break

                logger.info(f"  -> Title: {title}")
                logger.info(f"  -> Link: {link}")
                logger.info(f"  -> Summary length: {len(summary)} chars")

                # Ekstrak gambar
                image_url = None
                if hasattr(entry, 'content'):
                    for content in entry.content:
                        img = extract_first_image(content.value)
                        if img:
                            image_url = img
                            break

                if not image_url and hasattr(entry, 'summary'):
                    image_url = extract_first_image(entry.summary)

                if not image_url and DefaultImagebyHHC:
                    image_url = DefaultImagebyHHC

                logger.info(f"  -> Image URL: {image_url}")

                # Ekstrak tags/categories
                tags = []
                if hasattr(entry, 'tags'):
                    tags = entry.tags

                # Filter post jika perlu
                if should_filter_post(title, summary):
                    logger.info(f"  -> Filtered out due to filtering rules")
                    continue

                # Buat data entry
                entry_data = {
                    'title': title,
                    'link': link,
                    'published': published,
                    'author': author,
                    'summary': summary,
                    'tags': tags,
                    'image_url': image_url
                }

                # Buat pesan Telegram
                message = create_telegram_message(entry_data)
                logger.info(f"  -> Message created, length: {len(message)} chars")

                # Kirim ke Telegram
                logger.info(f"  -> Sending to Telegram...")
                result = send_telegram_message(message, image_url)

                if result:
                    logger.info(f"  -> ✅ Berhasil mengirim: {title}")

                    # Simpan ke database
                    posted_articles.append({
                        'link': link,
                        'title': title,
                        'posted_at': datetime.now().isoformat()
                    })
                    new_articles.append(entry_data)

                    # Delay antar posting
                    time.sleep(2)
                else:
                    logger.error(f"  -> ❌ Gagal mengirim: {title}")

            except Exception as e:
                logger.error(f"Error processing entry {i+1}: {e}")
                continue

        # Simpan database
        if new_articles:
            save_posted_articles(posted_articles)
            logger.info(f"🎉 Berhasil memproses {len(new_articles)} artikel baru")
        else:
            logger.info("ℹ️ Tidak ada artikel baru")

    except Exception as e:
        logger.error(f"Error in process_rss_feed: {e}")
        if EnableErrorNotificationbyHHC.lower() == "true":
            send_error_notification(f"RSS Feed Error: {e}")

def main():
    """Fungsi utama dengan loop"""
    logger.info("🚀 Starting RSS to Telegram Uploader")
    logger.info(f"RSS Feed: {LinkWebsiteRSSbyHHC}")
    logger.info(f"Telegram Channel: {ChannelIDTelegrambyHHC}")
    logger.info(f"Check Interval: {IntervalCheckRSSbyHHC} seconds")

    while True:
        try:
            process_rss_feed()
            logger.info(f"⏰ Menunggu {IntervalCheckRSSbyHHC} detik untuk check berikutnya...")
            time.sleep(IntervalCheckRSSbyHHC)

        except KeyboardInterrupt:
            logger.info("Program dihentikan oleh user")
            break
        except Exception as e:
            logger.error(f"Unexpected error: {e}")
            if EnableErrorNotificationbyHHC.lower() == "true":
                send_error_notification(f"Unexpected Error: {e}")
            time.sleep(60)  # Wait 1 minute before retry

def test_connection():
    """Test koneksi Telegram dan RSS"""
    print("🧪 Testing connections...")

    # Test RSS Feed with multiple methods
    print("\n--- Testing RSS Feed ---")
    rss_urls = [
        LinkWebsiteRSSbyHHC,
        LinkWebsiteRSSbyHHC.replace('https://', 'http://'),
        "https://www.hanhaoyu.com/feeds/posts/default",
        "http://www.hanhaoyu.com/feeds/posts/default"
    ]

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept': 'application/rss+xml, application/xml, text/xml'
    }

    session = requests.Session()
    session.trust_env = False  # Disable proxy

    rss_success = False
    for url in rss_urls:
        try:
            print(f"Testing: {url}")
            response = session.get(url, headers=headers, timeout=30, verify=False)
            if response.status_code == 200:
                feed = feedparser.parse(response.content)
                print(f"✅ RSS Feed OK - {len(feed.entries)} entries found")
                if feed.entries:
                    print(f"   Latest: {feed.entries[0].get('title', 'No title')}")
                    print(f"   Link: {feed.entries[0].get('link', 'No link')}")
                    print(f"   Published: {feed.entries[0].get('published', 'No date')}")
                rss_success = True
                break
            else:
                print(f"   Status: {response.status_code}")
        except Exception as e:
            print(f"   Error: {e}")

    if not rss_success:
        print("❌ All RSS URLs failed - trying alternative methods...")

        # Try with different approach
        try:
            import urllib.request
            print("Trying with urllib...")
            req = urllib.request.Request(LinkWebsiteRSSbyHHC)
            req.add_header('User-Agent', 'Mozilla/5.0 (compatible; RSS Reader)')
            with urllib.request.urlopen(req, timeout=30) as response:
                content = response.read()
                feed = feedparser.parse(content)
                print(f"✅ RSS Feed OK (urllib) - {len(feed.entries)} entries found")
                rss_success = True
        except Exception as e:
            print(f"   urllib error: {e}")

    # Test Telegram Bot
    print("\n--- Testing Telegram Bot ---")
    try:
        url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}/getMe"
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            bot_info = response.json()
            if bot_info.get('ok'):
                print(f"✅ Telegram Bot OK - @{bot_info['result']['username']}")
            else:
                print(f"❌ Telegram Bot Error: {bot_info}")
        else:
            print(f"❌ Telegram Bot Error: {response.status_code}")
    except Exception as e:
        print(f"❌ Telegram Bot Error: {e}")

    # Test Telegram Channel Access
    print("\n--- Testing Telegram Channel ---")
    try:
        url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}/sendMessage"
        test_data = {
            'chat_id': ChannelIDTelegrambyHHC,
            'text': '🧪 Test message from RSS Uploader',
            'parse_mode': 'HTML'
        }
        response = requests.post(url, data=test_data, timeout=10)
        if response.status_code == 200:
            result = response.json()
            if result.get('ok'):
                print(f"✅ Telegram Channel Access OK")
            else:
                print(f"❌ Telegram Channel Error: {result}")
        else:
            print(f"❌ Telegram Channel Error: {response.status_code} - {response.text}")
    except Exception as e:
        print(f"❌ Telegram Channel Error: {e}")

    return rss_success

if __name__ == "__main__":
    # Test koneksi dulu
    rss_success = test_connection()

    if not rss_success:
        print("\n❌ RSS Feed tidak bisa diakses!")
        print("Kemungkinan penyebab:")
        print("1. Proxy/Firewall memblokir akses")
        print("2. Website hanhaoyu.com down")
        print("3. RSS feed URL berubah")
        print("\nSolusi:")
        print("1. Coba jalankan dari server/VPS lain")
        print("2. Gunakan VPN")
        print("3. Cek manual: https://www.hanhaoyu.com/feeds/posts/default?alt=rss")

        choice = input("\nLanjutkan program? (y/n): ")
        if choice.lower() != 'y':
            exit()

    # Tanya user apakah mau lanjut
    input("\nPress Enter to start the main program...")

    # Jalankan program utama
    main()

Posting Komentar

Terima kasih atas kunjungan Anda di website kami. Kami sangat menghargai setiap komentar dan masukan yang diberikan oleh pembaca kami. Sebelum mengirimkan komentar, harap diingat untuk tetap mengikuti etika dan sopan santun dalam berkomunikasi. Kami tidak akan mentoleransi komentar yang mengandung pelecehan, intimidasi, diskriminasi, atau konten yang tidak pantas.

Pastikan komentar yang Anda sampaikan relevan dengan topik postingan dan tidak melanggar hak cipta atau privasi orang lain. Kami berhak untuk meninjau, mengedit, atau menghapus komentar yang tidak sesuai dengan kebijakan kami tanpa pemberitahuan sebelumnya.

Kami mengundang Anda untuk memberikan masukan yang konstruktif dan berguna bagi pembaca kami. Silakan berikan pendapat, saran, atau pengalaman Anda yang dapat meningkatkan kualitas konten kami dan memberikan manfaat bagi pembaca lainnya. Terima kasih atas partisipasi Anda dalam komunitas kami. Silakan berkomentar dengan bijak dan santun.

Gabung dalam percakapan

Gabung dalam percakapan