Prefetch Manager

Progress:0/0
Success
0
Failed
0
Pending
0
Time
0s
Success URLs(0)
Failed URLs(0)
Pending URLs(0)

Auto Post Blogger to Channel Telegram V4

import feedparser import requests import time import json import os import re from datetime import datetime, timezone from bs4 import BeautifulSoup import logging from urllib.parse import urljoin, urlparse import ssl import urllib.request from urllib.error import URLError, HTTPError # =========================================== # KONFIGURASI LENGKAP # =========================================== # Konfigurasi Website RSS LinkWebsiteRSSbyHHC = "https://www.hanhaoyu.com/feeds/posts/default?alt=rss" ShowJudulRSSbyHHC = "true" ShowLinkJudulRSSbyHHC = "true" ShowDatePublicationRSSbyHHC = "true" ShowAuthorRSSbyHHC = "true" ShowLabelRSSbyHHC = "true" ShowSummaryRSSbyHHC = "true" ShowImageRSSbyHHC = "true" # Konfigurasi Telegram Bot TokenBotTelegrambyHHC = "7163346072:AAEP_G38vgoR5Bq2jtsVEq2B6qOIKRK5oMo" ChannelIDTelegrambyHHC = "-1002417488695" AdminIDTelegrambyHHC = "1090870321" # Konfigurasi Format Pesan CustomPrefixMessagebyHHC = "🚀 POST TERBARU!\n\n" CustomSuffixMessagebyHHC = "\n\n#BlogPost #Update" MaxLengthSummarybyHHC = 200 MaxLengthTitlebyHHC = 100 # Konfigurasi Waktu & Interval IntervalCheckRSSbyHHC = 300 # detik (5 menit) TimezonebyHHC = "Asia/Jakarta" FormatDatebyHHC = "%d/%m/%Y %H:%M WIB" # Konfigurasi File & Database DatabaseFilebyHHC = "posted_articles.json" LogFilebyHHC = "rss_uploader.log" MaxLogSizebyHHC = 5242880 # 5MB # Konfigurasi Error Handling MaxRetriesbyHHC = 3 RetryDelaybyHHC = 5 EnableErrorNotificationbyHHC = "true" # Konfigurasi Gambar DefaultImagebyHHC = "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgcVcG9bmlJ5phU07kzvObOSyQSOUYrrtv9aIthIBrGzFTZ1vXYhFkMEZ1es_9hya8qk-_CC9-JRLLjsq9xVcE2iciPUeiMO22uzEsD_QvtKgv8UZawWGMSOdhCQ78QiArL1-hReTCSKbbuVHeFLnO1D099KonBvTfPpVB1JyttGiZOIg_oLZ4jVoUBdAE/s1080/thumnail%20x%20hanhaoyu.png" CompressImagebyHHC = "true" MaxImageSizebyHHC = 1048576 # Konfigurasi Filtering ExcludeKeywordsbyHHC = ["", ""] MinWordCountbyHHC = 2 # Mode Debug (set False untuk production) DEBUG_MODE = False # =========================================== # SETUP LOGGING # =========================================== # Buat folder logs jika belum ada os.makedirs('logs', exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(f'logs/{LogFilebyHHC}', encoding='utf-8'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) # =========================================== # FUNGSI UTILITY # =========================================== def load_posted_articles(): """Load daftar artikel yang sudah dipost""" try: if os.path.exists(DatabaseFilebyHHC): with open(DatabaseFilebyHHC, 'r', encoding='utf-8') as f: return json.load(f) return [] except Exception as e: logger.error(f"Error loading posted articles: {e}") return [] def save_posted_articles(articles): """Save daftar artikel yang sudah dipost""" try: with open(DatabaseFilebyHHC, 'w', encoding='utf-8') as f: json.dump(articles, f, ensure_ascii=False, indent=2) logger.info(f"Database updated with {len(articles)} articles") except Exception as e: logger.error(f"Error saving posted articles: {e}") def clean_html(html_content): """Bersihkan HTML dan ambil text saja""" if not html_content: return "" soup = BeautifulSoup(html_content, 'html.parser') for script in soup(["script", "style"]): script.decompose() text = soup.get_text() text = ' '.join(text.split()) return text def extract_first_image(content): """Extract gambar pertama dari konten""" if not content: return None soup = BeautifulSoup(content, 'html.parser') img_tag = soup.find('img') if img_tag and img_tag.get('src'): img_url = img_tag.get('src') if img_url.startswith('//'): img_url = 'https:' + img_url elif img_url.startswith('/'): base_url = urlparse(LinkWebsiteRSSbyHHC).netloc img_url = f'https://{base_url}{img_url}' return img_url return None def truncate_text(text, max_length): """Potong text dengan panjang maksimal""" if len(text) <= max_length: return text return text[:max_length-3] + "..." def format_date(date_string): """Format tanggal sesuai konfigurasi""" try: if '+' in date_string or 'GMT' in date_string or 'UTC' in date_string: try: dt = datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z") except: dt = datetime.strptime(date_string.replace('GMT', '+0000'), "%a, %d %b %Y %H:%M:%S %z") else: dt = datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S") dt = dt.replace(tzinfo=timezone.utc) return dt.strftime(FormatDatebyHHC) except Exception as e: logger.warning(f"Date parsing error: {e}, using original: {date_string}") return date_string def should_filter_post(title, summary): """Cek apakah post harus difilter""" text_to_check = f"{title} {summary}".lower() for keyword in ExcludeKeywordsbyHHC: if keyword and keyword.lower() in text_to_check: return True word_count = len(summary.split()) if summary else 0 if word_count < MinWordCountbyHHC: return True return False def send_telegram_message(message, image_url=None): """Kirim pesan ke Telegram dengan retry mechanism""" for attempt in range(MaxRetriesbyHHC): try: url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}" if image_url and ShowImageRSSbyHHC.lower() == "true": endpoint = f"{url}/sendPhoto" data = { 'chat_id': ChannelIDTelegrambyHHC, 'photo': image_url, 'caption': message, 'parse_mode': 'HTML' } logger.info(f"Sending photo message (attempt {attempt+1})") else: endpoint = f"{url}/sendMessage" data = { 'chat_id': ChannelIDTelegrambyHHC, 'text': message, 'parse_mode': 'HTML', 'disable_web_page_preview': False } logger.info(f"Sending text message (attempt {attempt+1})") response = requests.post(endpoint, data=data, timeout=30) logger.info(f"Telegram API response: {response.status_code}") if response.status_code == 200: result = response.json() if result.get('ok'): logger.info("Message sent successfully!") return result else: logger.error(f"Telegram API error: {result}") else: logger.error(f"HTTP error: {response.status_code}, {response.text}") response.raise_for_status() except requests.exceptions.RequestException as e: logger.error(f"Request error (attempt {attempt+1}): {e}") if attempt < MaxRetriesbyHHC - 1: time.sleep(RetryDelaybyHHC) except Exception as e: logger.error(f"Unexpected error sending message (attempt {attempt+1}): {e}") if attempt < MaxRetriesbyHHC - 1: time.sleep(RetryDelaybyHHC) logger.error("All retry attempts failed") return None def send_error_notification(error_message): """Kirim notifikasi error ke admin""" try: url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}/sendMessage" data = { 'chat_id': AdminIDTelegrambyHHC, 'text': f"🚨 RSS Uploader Error:\n{error_message}", 'parse_mode': 'HTML' } response = requests.post(url, data=data, timeout=10) logger.info(f"Error notification sent: {response.status_code}") except Exception as e: logger.error(f"Failed to send error notification: {e}") def create_telegram_message(entry): """Buat format pesan untuk Telegram""" message_parts = [] if CustomPrefixMessagebyHHC: message_parts.append(CustomPrefixMessagebyHHC.strip()) if ShowJudulRSSbyHHC.lower() == "true": title = truncate_text(entry.get('title', ''), MaxLengthTitlebyHHC) if ShowLinkJudulRSSbyHHC.lower() == "true": message_parts.append(f"📝 {title}") else: message_parts.append(f"📝 {title}") if ShowLinkJudulRSSbyHHC.lower() != "true": message_parts.append(f"🔗 Baca selengkapnya") if ShowDatePublicationRSSbyHHC.lower() == "true" and entry.get('published'): formatted_date = format_date(entry.get('published')) message_parts.append(f"🕒 Dipublikasikan: {formatted_date}") if ShowAuthorRSSbyHHC.lower() == "true" and entry.get('author'): message_parts.append(f"✍️ Penulis: {entry.get('author')}") if ShowLabelRSSbyHHC.lower() == "true" and entry.get('tags'): if isinstance(entry.get('tags'), list) and entry.get('tags'): tags = ', '.join([tag.get('term', '') if isinstance(tag, dict) else str(tag) for tag in entry.get('tags', [])]) if tags: message_parts.append(f"📌 Label: {tags}") if ShowSummaryRSSbyHHC.lower() == "true" and entry.get('summary'): summary = truncate_text(entry.get('summary'), MaxLengthSummarybyHHC) message_parts.append(f"📝 Ringkasan: {summary}") if CustomSuffixMessagebyHHC: message_parts.append(CustomSuffixMessagebyHHC.strip()) return '\n\n'.join(message_parts) def fetch_rss_feed(): """Fetch RSS feed dengan multiple methods""" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'application/rss+xml, application/xml, text/xml, */*', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' } # Method 1: Requests dengan session session = requests.Session() session.trust_env = False rss_urls = [ LinkWebsiteRSSbyHHC, "https://www.hanhaoyu.com/feeds/posts/default", "http://www.hanhaoyu.com/feeds/posts/default?alt=rss", "http://www.hanhaoyu.com/feeds/posts/default" ] for url in rss_urls: try: logger.info(f"Trying RSS URL: {url}") response = session.get(url, headers=headers, timeout=30, verify=False) if response.status_code == 200: logger.info(f"Successfully fetched RSS from: {url}") return response.content except Exception as e: logger.warning(f"Failed to fetch from {url}: {e}") # Method 2: urllib with SSL context try: logger.info("Trying with urllib...") ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE req = urllib.request.Request(LinkWebsiteRSSbyHHC, headers=headers) with urllib.request.urlopen(req, timeout=30, context=ssl_context) as response: content = response.read() logger.info("Successfully fetched RSS with urllib") return content except Exception as e: logger.error(f"urllib method failed: {e}") # Method 3: Try with different user agent try: logger.info("Trying with different user agent...") alt_headers = { 'User-Agent': 'RSS Reader Bot/1.0 (+https://example.com/bot)', 'Accept': 'application/rss+xml, application/xml, text/xml' } response = requests.get(LinkWebsiteRSSbyHHC, headers=alt_headers, timeout=30, verify=False) if response.status_code == 200: logger.info("Successfully fetched RSS with alternative user agent") return response.content except Exception as e: logger.error(f"Alternative user agent method failed: {e}") raise Exception("All RSS fetch methods failed") def process_rss_feed(): """Proses RSS feed dan kirim ke Telegram""" try: logger.info("=== Starting RSS feed processing ===") logger.info(f"RSS URL: {LinkWebsiteRSSbyHHC}") # Fetch RSS content rss_content = fetch_rss_feed() # Parse dengan feedparser feed = feedparser.parse(rss_content) logger.info(f"RSS parsed successfully. Found {len(feed.entries)} entries") if feed.bozo: logger.warning(f"RSS feed parsing warning: {feed.bozo_exception}") if not feed.entries: logger.warning("No entries found in RSS feed") return # Load artikel yang sudah dipost posted_articles = load_posted_articles() posted_links = [article['link'] for article in posted_articles] logger.info(f"Database loaded: {len(posted_articles)} previously posted articles") new_articles = [] # Proses setiap entry (mulai dari yang terbaru) for i, entry in enumerate(feed.entries): try: logger.info(f"Processing entry {i+1}/{len(feed.entries)}: {entry.get('title', 'No Title')}") # Cek apakah sudah dipost if entry.link in posted_links: logger.info(f" -> Already posted, skipping") continue # Ekstrak data title = entry.get('title', 'No Title') link = entry.get('link', '') published = entry.get('published', '') author = entry.get('author', 'Unknown') # Ekstrak summary summary = '' if hasattr(entry, 'summary'): summary = clean_html(entry.summary) elif hasattr(entry, 'description'): summary = clean_html(entry.description) elif hasattr(entry, 'content'): for content in entry.content: summary = clean_html(content.value) break logger.info(f" -> Title: {title}") logger.info(f" -> Link: {link}") logger.info(f" -> Summary length: {len(summary)} chars") # Ekstrak gambar image_url = None if hasattr(entry, 'content'): for content in entry.content: img = extract_first_image(content.value) if img: image_url = img break if not image_url and hasattr(entry, 'summary'): image_url = extract_first_image(entry.summary) # Cek media_content untuk gambar if not image_url and hasattr(entry, 'media_content'): for media in entry.media_content: if media.get('type', '').startswith('image/'): image_url = media.get('url') break # Cek enclosures untuk gambar if not image_url and hasattr(entry, 'enclosures'): for enclosure in entry.enclosures: if enclosure.get('type', '').startswith('image/'): image_url = enclosure.get('href') break if not image_url and DefaultImagebyHHC: image_url = DefaultImagebyHHC logger.info(f" -> Image URL: {image_url}") # Ekstrak tags/categories tags = [] if hasattr(entry, 'tags'): tags = entry.tags # Filter post jika perlu if should_filter_post(title, summary): logger.info(f" -> Filtered out due to filtering rules") continue # Buat data entry entry_data = { 'title': title, 'link': link, 'published': published, 'author': author, 'summary': summary, 'tags': tags, 'image_url': image_url } # Buat pesan Telegram message = create_telegram_message(entry_data) logger.info(f" -> Message created, length: {len(message)} chars") # Kirim ke Telegram logger.info(f" -> Sending to Telegram...") result = send_telegram_message(message, image_url) if result: logger.info(f" -> ✅ Successfully sent: {title}") # Simpan ke database posted_articles.append({ 'link': link, 'title': title, 'posted_at': datetime.now().isoformat() }) new_articles.append(entry_data) # Delay antar posting time.sleep(2) else: logger.error(f" -> ❌ Failed to send: {title}") except Exception as e: logger.error(f"Error processing entry {i+1}: {e}") continue # Simpan database if new_articles: save_posted_articles(posted_articles) logger.info(f"🎉 Successfully processed {len(new_articles)} new articles") else: logger.info("ℹ️ No new articles found") except Exception as e: logger.error(f"Error in process_rss_feed: {e}") if EnableErrorNotificationbyHHC.lower() == "true": send_error_notification(f"RSS Feed Error: {e}") def test_connection(): """Test koneksi Telegram dan RSS (hanya untuk debug)""" if not DEBUG_MODE: return True print("🧪 Testing connections...") # Test RSS Feed print("\n--- Testing RSS Feed ---") try: rss_content = fetch_rss_feed() feed = feedparser.parse(rss_content) print(f"✅ RSS Feed OK - {len(feed.entries)} entries found") if feed.entries: print(f" Latest: {feed.entries[0].get('title', 'No title')}") print(f" Link: {feed.entries[0].get('link', 'No link')}") print(f" Published: {feed.entries[0].get('published', 'No date')}") return True except Exception as e: print(f"❌ RSS Feed Error: {e}") return False def main(): """Fungsi utama dengan loop""" logger.info("🚀 Starting RSS to Telegram Uploader") logger.info(f"RSS Feed: {LinkWebsiteRSSbyHHC}") logger.info(f"Telegram Channel: {ChannelIDTelegrambyHHC}") logger.info(f"Check Interval: {IntervalCheckRSSbyHHC} seconds") logger.info(f"Debug Mode: {DEBUG_MODE}") # Test connection hanya jika debug mode if DEBUG_MODE: rss_success = test_connection() if not rss_success: logger.error("RSS connection test failed in debug mode") return # Jalankan sekali untuk check artikel baru logger.info("Running initial RSS check...") process_rss_feed() # Loop utama while True: try: logger.info(f"⏰ Waiting {IntervalCheckRSSbyHHC} seconds for next check...") time.sleep(IntervalCheckRSSbyHHC) process_rss_feed() except KeyboardInterrupt: logger.info("Program stopped by user") break except Exception as e: logger.error(f"Unexpected error: {e}") if EnableErrorNotificationbyHHC.lower() == "true": send_error_notification(f"Unexpected Error: {e}") time.sleep(60) if __name__ == "__main__": # Set DEBUG_MODE ke False untuk production DEBUG_MODE = False # Langsung jalankan program utama tanpa test main()

Posting Komentar

Terima kasih atas kunjungan Anda di website kami. Kami sangat menghargai setiap komentar dan masukan yang diberikan oleh pembaca kami. Sebelum mengirimkan komentar, harap diingat untuk tetap mengikuti etika dan sopan santun dalam berkomunikasi. Kami tidak akan mentoleransi komentar yang mengandung pelecehan, intimidasi, diskriminasi, atau konten yang tidak pantas.

Pastikan komentar yang Anda sampaikan relevan dengan topik postingan dan tidak melanggar hak cipta atau privasi orang lain. Kami berhak untuk meninjau, mengedit, atau menghapus komentar yang tidak sesuai dengan kebijakan kami tanpa pemberitahuan sebelumnya.

Kami mengundang Anda untuk memberikan masukan yang konstruktif dan berguna bagi pembaca kami. Silakan berikan pendapat, saran, atau pengalaman Anda yang dapat meningkatkan kualitas konten kami dan memberikan manfaat bagi pembaca lainnya. Terima kasih atas partisipasi Anda dalam komunitas kami. Silakan berkomentar dengan bijak dan santun.

Gabung dalam percakapan

Gabung dalam percakapan