import feedparser
import requests
import time
import json
import os
import re
from datetime import datetime, timezone
from bs4 import BeautifulSoup
import logging
from urllib.parse import urljoin, urlparse
# ===========================================
# KONFIGURASI LENGKAP
# ===========================================
# Konfigurasi Website RSS
LinkWebsiteRSSbyHHC = "https://www.hanhaoyu.com/feeds/posts/default?alt=rss"
ShowJudulRSSbyHHC = "true"
ShowLinkJudulRSSbyHHC = "true"
ShowDatePublicationRSSbyHHC = "true"
ShowAuthorRSSbyHHC = "true"
ShowLabelRSSbyHHC = "true"
ShowSummaryRSSbyHHC = "true"
ShowImageRSSbyHHC = "true"
# Konfigurasi Telegram Bot
TokenBotTelegrambyHHC = "7163346072:AAEP_G38vgoR5Bq2jtsVEq2B6qOIKRK5oMo"
ChannelIDTelegrambyHHC = "-1002417488695" # atau -1001234567890
AdminIDTelegrambyHHC = "1090870321" # untuk notifikasi error
# Konfigurasi Format Pesan
CustomPrefixMessagebyHHC = "🚀 POST TERBARU!\n\n"
CustomSuffixMessagebyHHC = "\n\n#BlogPost #Update"
MaxLengthSummarybyHHC = 200
MaxLengthTitlebyHHC = 100
# Konfigurasi Waktu & Interval
IntervalCheckRSSbyHHC = 300 # detik (5 menit)
TimezonebyHHC = "Asia/Jakarta"
FormatDatebyHHC = "%d/%m/%Y %H:%M WIB"
# Konfigurasi File & Database
DatabaseFilebyHHC = "posted_articles.json"
LogFilebyHHC = "rss_uploader.log"
MaxLogSizebyHHC = 5242880 # 5MB
# Konfigurasi Error Handling
MaxRetriesbyHHC = 3
RetryDelaybyHHC = 5 # detik
EnableErrorNotificationbyHHC = "true"
# Konfigurasi Gambar
DefaultImagebyHHC = "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgcVcG9bmlJ5phU07kzvObOSyQSOUYrrtv9aIthIBrGzFTZ1vXYhFkMEZ1es_9hya8qk-_CC9-JRLLjsq9xVcE2iciPUeiMO22uzEsD_QvtKgv8UZawWGMSOdhCQ78QiArL1-hReTCSKbbuVHeFLnO1D099KonBvTfPpVB1JyttGiZOIg_oLZ4jVoUBdAE/s1080/thumnail%20x%20hanhaoyu.png"
CompressImagebyHHC = "true"
MaxImageSizebyHHC = 1048576 # 1MB
# Konfigurasi Filtering
ExcludeKeywordsbyHHC = ["", ""] # kata kunci yang akan difilter
MinWordCountbyHHC = 2 # minimal kata dalam summary
# ===========================================
# SETUP LOGGING
# ===========================================
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(LogFilebyHHC, encoding='utf-8'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# ===========================================
# FUNGSI UTILITY
# ===========================================
def load_posted_articles():
"""Load daftar artikel yang sudah dipost"""
try:
if os.path.exists(DatabaseFilebyHHC):
with open(DatabaseFilebyHHC, 'r', encoding='utf-8') as f:
return json.load(f)
return []
except Exception as e:
logger.error(f"Error loading posted articles: {e}")
return []
def save_posted_articles(articles):
"""Save daftar artikel yang sudah dipost"""
try:
with open(DatabaseFilebyHHC, 'w', encoding='utf-8') as f:
json.dump(articles, f, ensure_ascii=False, indent=2)
logger.info(f"Database updated with {len(articles)} articles")
except Exception as e:
logger.error(f"Error saving posted articles: {e}")
def clean_html(html_content):
"""Bersihkan HTML dan ambil text saja"""
if not html_content:
return ""
soup = BeautifulSoup(html_content, 'html.parser')
# Hapus script dan style elements
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text()
# Bersihkan whitespace berlebihan
text = ' '.join(text.split())
return text
def extract_first_image(content):
"""Extract gambar pertama dari konten"""
if not content:
return None
soup = BeautifulSoup(content, 'html.parser')
img_tag = soup.find('img')
if img_tag and img_tag.get('src'):
img_url = img_tag.get('src')
# Handle relative URLs
if img_url.startswith('//'):
img_url = 'https:' + img_url
elif img_url.startswith('/'):
base_url = urlparse(LinkWebsiteRSSbyHHC).netloc
img_url = f'https://{base_url}{img_url}'
return img_url
return None
def truncate_text(text, max_length):
"""Potong text dengan panjang maksimal"""
if len(text) <= max_length:
return text
return text[:max_length-3] + "..."
def format_date(date_string):
"""Format tanggal sesuai konfigurasi"""
try:
# Parse tanggal dari RSS (berbagai format)
if '+' in date_string or 'GMT' in date_string or 'UTC' in date_string:
# Format dengan timezone
try:
dt = datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z")
except:
# Coba format lain
dt = datetime.strptime(date_string.replace('GMT', '+0000'), "%a, %d %b %Y %H:%M:%S %z")
else:
# Format tanpa timezone
dt = datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S")
dt = dt.replace(tzinfo=timezone.utc)
# Format sesuai konfigurasi
return dt.strftime(FormatDatebyHHC)
except Exception as e:
logger.warning(f"Date parsing error: {e}, using original: {date_string}")
return date_string
def should_filter_post(title, summary):
"""Cek apakah post harus difilter"""
text_to_check = f"{title} {summary}".lower()
# Cek kata kunci yang dikecualikan
for keyword in ExcludeKeywordsbyHHC:
if keyword.lower() in text_to_check:
return True
# Cek minimal jumlah kata
word_count = len(summary.split()) if summary else 0
if word_count < MinWordCountbyHHC:
return True
return False
def send_telegram_message(message, image_url=None):
"""Kirim pesan ke Telegram dengan retry mechanism"""
for attempt in range(MaxRetriesbyHHC):
try:
url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}"
if image_url and ShowImageRSSbyHHC.lower() == "true":
# Kirim dengan gambar
endpoint = f"{url}/sendPhoto"
data = {
'chat_id': ChannelIDTelegrambyHHC,
'photo': image_url,
'caption': message,
'parse_mode': 'HTML'
}
logger.info(f"Sending photo message (attempt {attempt+1})")
else:
# Kirim text saja
endpoint = f"{url}/sendMessage"
data = {
'chat_id': ChannelIDTelegrambyHHC,
'text': message,
'parse_mode': 'HTML',
'disable_web_page_preview': False
}
logger.info(f"Sending text message (attempt {attempt+1})")
response = requests.post(endpoint, data=data, timeout=30)
logger.info(f"Telegram API response: {response.status_code}")
if response.status_code == 200:
result = response.json()
if result.get('ok'):
logger.info("Message sent successfully!")
return result
else:
logger.error(f"Telegram API error: {result}")
else:
logger.error(f"HTTP error: {response.status_code}, {response.text}")
response.raise_for_status()
except requests.exceptions.RequestException as e:
logger.error(f"Request error (attempt {attempt+1}): {e}")
if attempt < MaxRetriesbyHHC - 1:
time.sleep(RetryDelaybyHHC)
except Exception as e:
logger.error(f"Unexpected error sending message (attempt {attempt+1}): {e}")
if attempt < MaxRetriesbyHHC - 1:
time.sleep(RetryDelaybyHHC)
# Jika semua retry gagal
logger.error("All retry attempts failed")
if EnableErrorNotificationbyHHC.lower() == "true":
send_error_notification(f"Failed to send message after {MaxRetriesbyHHC} attempts")
return None
def send_error_notification(error_message):
"""Kirim notifikasi error ke admin"""
try:
url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}/sendMessage"
data = {
'chat_id': AdminIDTelegrambyHHC,
'text': f"🚨 RSS Uploader Error:\n{error_message}",
'parse_mode': 'HTML'
}
response = requests.post(url, data=data, timeout=10)
logger.info(f"Error notification sent: {response.status_code}")
except Exception as e:
logger.error(f"Failed to send error notification: {e}")
def create_telegram_message(entry):
"""Buat format pesan untuk Telegram"""
message_parts = []
# Prefix custom
if CustomPrefixMessagebyHHC:
message_parts.append(CustomPrefixMessagebyHHC.strip())
# Judul
if ShowJudulRSSbyHHC.lower() == "true":
title = truncate_text(entry.get('title', ''), MaxLengthTitlebyHHC)
if ShowLinkJudulRSSbyHHC.lower() == "true":
message_parts.append(f"📝 {title}")
else:
message_parts.append(f"📝 {title}")
# Link baca selengkapnya (jika judul tidak di-link)
if ShowLinkJudulRSSbyHHC.lower() != "true":
message_parts.append(f"🔗 Baca selengkapnya")
# Tanggal publikasi
if ShowDatePublicationRSSbyHHC.lower() == "true" and entry.get('published'):
formatted_date = format_date(entry.get('published'))
message_parts.append(f"🕒 Dipublikasikan: {formatted_date}")
# Penulis
if ShowAuthorRSSbyHHC.lower() == "true" and entry.get('author'):
message_parts.append(f"✍️ Penulis: {entry.get('author')}")
# Label/Kategori
if ShowLabelRSSbyHHC.lower() == "true" and entry.get('tags'):
if isinstance(entry.get('tags'), list) and entry.get('tags'):
tags = ', '.join([tag.get('term', '') if isinstance(tag, dict) else str(tag) for tag in entry.get('tags', [])])
if tags:
message_parts.append(f"📌 Label: {tags}")
# Summary
if ShowSummaryRSSbyHHC.lower() == "true" and entry.get('summary'):
summary = truncate_text(entry.get('summary'), MaxLengthSummarybyHHC)
message_parts.append(f"📝 Ringkasan: {summary}")
# Suffix custom
if CustomSuffixMessagebyHHC:
message_parts.append(CustomSuffixMessagebyHHC.strip())
return '\n\n'.join(message_parts)
def process_rss_feed():
"""Proses RSS feed dan kirim ke Telegram"""
try:
logger.info("=== Memulai proses RSS feed ===")
logger.info(f"RSS URL: {LinkWebsiteRSSbyHHC}")
# Parse RSS feed dengan user agent dan proxy bypass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/rss+xml, application/xml, text/xml',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
# Bypass proxy dengan session dan multiple attempts
session = requests.Session()
session.trust_env = False # Disable proxy from environment
# Try multiple methods to get RSS
rss_urls = [
LinkWebsiteRSSbyHHC,
LinkWebsiteRSSbyHHC.replace('https://', 'http://'), # Try HTTP
"https://www.hanhaoyu.com/feeds/posts/default", # Alternative RSS URL
"http://www.hanhaoyu.com/feeds/posts/default"
]
response = None
for url in rss_urls:
try:
logger.info(f"Trying RSS URL: {url}")
response = session.get(url, headers=headers, timeout=30, verify=False)
if response.status_code == 200:
logger.info(f"Successfully connected to: {url}")
break
except Exception as e:
logger.warning(f"Failed to connect to {url}: {e}")
continue
if not response or response.status_code != 200:
raise Exception("Unable to connect to any RSS URL")
response.raise_for_status()
# Parse dengan feedparser
feed = feedparser.parse(response.content)
logger.info(f"RSS parsed successfully. Found {len(feed.entries)} entries")
if feed.bozo:
logger.warning(f"RSS feed parsing warning: {feed.bozo_exception}")
if not feed.entries:
logger.warning("No entries found in RSS feed")
return
# Load artikel yang sudah dipost
posted_articles = load_posted_articles()
posted_links = [article['link'] for article in posted_articles]
logger.info(f"Database loaded: {len(posted_articles)} previously posted articles")
new_articles = []
# Proses setiap entry (mulai dari yang terbaru)
for i, entry in enumerate(feed.entries):
try:
logger.info(f"Processing entry {i+1}/{len(feed.entries)}: {entry.get('title', 'No Title')}")
# Cek apakah sudah dipost
if entry.link in posted_links:
logger.info(f" -> Already posted, skipping")
continue
# Ekstrak data
title = entry.get('title', 'No Title')
link = entry.get('link', '')
published = entry.get('published', '')
author = entry.get('author', 'Unknown')
# Ekstrak summary
summary = ''
if hasattr(entry, 'summary'):
summary = clean_html(entry.summary)
elif hasattr(entry, 'description'):
summary = clean_html(entry.description)
elif hasattr(entry, 'content'):
# Ambil dari content jika ada
for content in entry.content:
summary = clean_html(content.value)
break
logger.info(f" -> Title: {title}")
logger.info(f" -> Link: {link}")
logger.info(f" -> Summary length: {len(summary)} chars")
# Ekstrak gambar
image_url = None
if hasattr(entry, 'content'):
for content in entry.content:
img = extract_first_image(content.value)
if img:
image_url = img
break
if not image_url and hasattr(entry, 'summary'):
image_url = extract_first_image(entry.summary)
if not image_url and DefaultImagebyHHC:
image_url = DefaultImagebyHHC
logger.info(f" -> Image URL: {image_url}")
# Ekstrak tags/categories
tags = []
if hasattr(entry, 'tags'):
tags = entry.tags
# Filter post jika perlu
if should_filter_post(title, summary):
logger.info(f" -> Filtered out due to filtering rules")
continue
# Buat data entry
entry_data = {
'title': title,
'link': link,
'published': published,
'author': author,
'summary': summary,
'tags': tags,
'image_url': image_url
}
# Buat pesan Telegram
message = create_telegram_message(entry_data)
logger.info(f" -> Message created, length: {len(message)} chars")
# Kirim ke Telegram
logger.info(f" -> Sending to Telegram...")
result = send_telegram_message(message, image_url)
if result:
logger.info(f" -> ✅ Berhasil mengirim: {title}")
# Simpan ke database
posted_articles.append({
'link': link,
'title': title,
'posted_at': datetime.now().isoformat()
})
new_articles.append(entry_data)
# Delay antar posting
time.sleep(2)
else:
logger.error(f" -> ❌ Gagal mengirim: {title}")
except Exception as e:
logger.error(f"Error processing entry {i+1}: {e}")
continue
# Simpan database
if new_articles:
save_posted_articles(posted_articles)
logger.info(f"🎉 Berhasil memproses {len(new_articles)} artikel baru")
else:
logger.info("ℹ️ Tidak ada artikel baru")
except Exception as e:
logger.error(f"Error in process_rss_feed: {e}")
if EnableErrorNotificationbyHHC.lower() == "true":
send_error_notification(f"RSS Feed Error: {e}")
def main():
"""Fungsi utama dengan loop"""
logger.info("🚀 Starting RSS to Telegram Uploader")
logger.info(f"RSS Feed: {LinkWebsiteRSSbyHHC}")
logger.info(f"Telegram Channel: {ChannelIDTelegrambyHHC}")
logger.info(f"Check Interval: {IntervalCheckRSSbyHHC} seconds")
while True:
try:
process_rss_feed()
logger.info(f"⏰ Menunggu {IntervalCheckRSSbyHHC} detik untuk check berikutnya...")
time.sleep(IntervalCheckRSSbyHHC)
except KeyboardInterrupt:
logger.info("Program dihentikan oleh user")
break
except Exception as e:
logger.error(f"Unexpected error: {e}")
if EnableErrorNotificationbyHHC.lower() == "true":
send_error_notification(f"Unexpected Error: {e}")
time.sleep(60) # Wait 1 minute before retry
def test_connection():
"""Test koneksi Telegram dan RSS"""
print("🧪 Testing connections...")
# Test RSS Feed with multiple methods
print("\n--- Testing RSS Feed ---")
rss_urls = [
LinkWebsiteRSSbyHHC,
LinkWebsiteRSSbyHHC.replace('https://', 'http://'),
"https://www.hanhaoyu.com/feeds/posts/default",
"http://www.hanhaoyu.com/feeds/posts/default"
]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/rss+xml, application/xml, text/xml'
}
session = requests.Session()
session.trust_env = False # Disable proxy
rss_success = False
for url in rss_urls:
try:
print(f"Testing: {url}")
response = session.get(url, headers=headers, timeout=30, verify=False)
if response.status_code == 200:
feed = feedparser.parse(response.content)
print(f"✅ RSS Feed OK - {len(feed.entries)} entries found")
if feed.entries:
print(f" Latest: {feed.entries[0].get('title', 'No title')}")
print(f" Link: {feed.entries[0].get('link', 'No link')}")
print(f" Published: {feed.entries[0].get('published', 'No date')}")
rss_success = True
break
else:
print(f" Status: {response.status_code}")
except Exception as e:
print(f" Error: {e}")
if not rss_success:
print("❌ All RSS URLs failed - trying alternative methods...")
# Try with different approach
try:
import urllib.request
print("Trying with urllib...")
req = urllib.request.Request(LinkWebsiteRSSbyHHC)
req.add_header('User-Agent', 'Mozilla/5.0 (compatible; RSS Reader)')
with urllib.request.urlopen(req, timeout=30) as response:
content = response.read()
feed = feedparser.parse(content)
print(f"✅ RSS Feed OK (urllib) - {len(feed.entries)} entries found")
rss_success = True
except Exception as e:
print(f" urllib error: {e}")
# Test Telegram Bot
print("\n--- Testing Telegram Bot ---")
try:
url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}/getMe"
response = requests.get(url, timeout=10)
if response.status_code == 200:
bot_info = response.json()
if bot_info.get('ok'):
print(f"✅ Telegram Bot OK - @{bot_info['result']['username']}")
else:
print(f"❌ Telegram Bot Error: {bot_info}")
else:
print(f"❌ Telegram Bot Error: {response.status_code}")
except Exception as e:
print(f"❌ Telegram Bot Error: {e}")
# Test Telegram Channel Access
print("\n--- Testing Telegram Channel ---")
try:
url = f"https://api.telegram.org/bot{TokenBotTelegrambyHHC}/sendMessage"
test_data = {
'chat_id': ChannelIDTelegrambyHHC,
'text': '🧪 Test message from RSS Uploader',
'parse_mode': 'HTML'
}
response = requests.post(url, data=test_data, timeout=10)
if response.status_code == 200:
result = response.json()
if result.get('ok'):
print(f"✅ Telegram Channel Access OK")
else:
print(f"❌ Telegram Channel Error: {result}")
else:
print(f"❌ Telegram Channel Error: {response.status_code} - {response.text}")
except Exception as e:
print(f"❌ Telegram Channel Error: {e}")
return rss_success
if __name__ == "__main__":
# Test koneksi dulu
rss_success = test_connection()
if not rss_success:
print("\n❌ RSS Feed tidak bisa diakses!")
print("Kemungkinan penyebab:")
print("1. Proxy/Firewall memblokir akses")
print("2. Website hanhaoyu.com down")
print("3. RSS feed URL berubah")
print("\nSolusi:")
print("1. Coba jalankan dari server/VPS lain")
print("2. Gunakan VPN")
print("3. Cek manual: https://www.hanhaoyu.com/feeds/posts/default?alt=rss")
choice = input("\nLanjutkan program? (y/n): ")
if choice.lower() != 'y':
exit()
# Tanya user apakah mau lanjut
input("\nPress Enter to start the main program...")
# Jalankan program utama
main()
Auto Post Blogger to Channel Telegram V3
KODE DIBAWAH ITU PYTHON YA, DI UPLOAD KE https://www.pythonanywhere.com/user/hanhaoyu/files/home/hanhaoyu/auto_post_blogger_by_hanhaoyu_v3.py?edit.