update config and podcast bot

This commit is contained in:
Pavel 2026-02-25 11:32:29 +03:00
parent fb912c0190
commit 80d5821e21
12 changed files with 343 additions and 0 deletions

View File

@ -0,0 +1,3 @@
TELEGRAM_TOKEN=CHANGE_ME
CHAT_ID=CHANGE_ME
VK_PLAYLIST_URL=https://vkvideo.ru/playlist/...

1
services/vk-podcast-bot/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.env

View File

@ -0,0 +1,18 @@
FROM python:3.11-slim
# install ffmpeg
RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# create persistent folder
RUN mkdir -p /data/audio
ENV PYTHONUNBUFFERED=1
CMD ["python", "bot.py"]

View File

@ -0,0 +1,53 @@
import subprocess
from pathlib import Path
import math
MAX_SIZE = 49 * 1024 * 1024 # 49 MB safety margin
def get_duration_seconds(file_path: Path) -> float:
cmd = [
"ffprobe",
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
str(file_path)
]
return float(subprocess.check_output(cmd).decode().strip())
def split_audio(file_path: Path):
size = file_path.stat().st_size
if size <= MAX_SIZE:
return [file_path]
existing = sorted(file_path.parent.glob(f"{file_path.stem}_part*.mp3"))
if existing:
return existing
duration = get_duration_seconds(file_path)
parts_count = math.ceil(size / MAX_SIZE)
part_duration = duration / parts_count
output_files = []
for i in range(parts_count):
start = i * part_duration
output = file_path.with_name(f"{file_path.stem}_part{i+1}.mp3")
cmd = [
"ffmpeg",
"-y",
"-i", str(file_path),
"-ss", str(start),
"-t", str(part_duration),
"-acodec", "copy",
str(output)
]
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
output_files.append(output)
return output_files

View File

@ -0,0 +1,66 @@
import time
from state import load_state, save_state
from vk_playlist import get_playlist_videos
from downloader import download_audio, get_video_title
from telegram_sender import send_audio, send_message
CHECK_INTERVAL = 300 # seconds
def process_new_video(video_id, video_url, known_ids):
try:
title = get_video_title(video_url)
print("New podcast detected:", title)
send_message(f"New podcast: {title}")
file_path, _ = download_audio(video_url)
print("Sending audio:", title)
ok = send_audio(file_path, title)
if ok:
print("Upload completed:", title)
known_ids.add(video_id)
save_state(known_ids)
else:
print("Upload failed — will retry later")
except Exception as e:
print("Failed processing:", e)
def run_once():
known_ids, has_state = load_state()
videos = get_playlist_videos()
current_ids = set(videos.keys())
# first launch — remember everything
if not has_state:
print("First run: indexing playlist only")
save_state(current_ids)
return
new_ids = current_ids - known_ids
if new_ids:
print(f"Found {len(new_ids)} new videos")
for vid in new_ids:
process_new_video(vid, videos[vid], known_ids)
def main_loop():
while True:
try:
print("Checking playlist...")
run_once()
except Exception as e:
print("Loop error:", e)
time.sleep(CHECK_INTERVAL)
if __name__ == "__main__":
main_loop()

View File

@ -0,0 +1,9 @@
services:
vk-podcast-bot:
build: .
container_name: vk-podcast-bot
restart: unless-stopped
env_file:
- .env
volumes:
- /srv/vk-podcast-bot/data:/data

View File

@ -0,0 +1,36 @@
import yt_dlp
from pathlib import Path
DOWNLOAD_DIR = Path("/data/audio")
DOWNLOAD_DIR.mkdir(exist_ok=True)
def get_video_title(video_url):
ydl_opts = {
"quiet": True,
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=False)
return info.get("title", "New podcast")
def download_audio(video_url):
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": "/data/audio/%(title)s.%(ext)s",
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}],
"noplaylist": True,
"quiet": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=True)
filename = ydl.prepare_filename(info)
mp3_file = Path(filename).with_suffix(".mp3")
return mp3_file, info.get("title", "audio")

View File

@ -0,0 +1,3 @@
yt-dlp
requests
requests-toolbelt

View File

@ -0,0 +1,15 @@
import json
from pathlib import Path
STATE_FILE = Path("/data/state.json")
def load_state():
if not STATE_FILE.exists():
return set(), False
data = json.loads(STATE_FILE.read_text())
return set(data.get("known_ids", [])), True
def save_state(ids):
STATE_FILE.write_text(json.dumps({
"known_ids": sorted(ids)
}, indent=2))

View File

@ -0,0 +1,103 @@
import requests
import time
import os
from pathlib import Path
from audio_splitter import split_audio
from requests_toolbelt.multipart.encoder import MultipartEncoder, MultipartEncoderMonitor
from requests.exceptions import ConnectionError, ReadTimeout, ChunkedEncodingError
BOT_TOKEN = os.environ["TELEGRAM_TOKEN"]
CHAT_ID = os.environ["CHAT_ID"]
MAX_RETRIES = 5
def progress_callback(monitor):
percent = monitor.bytes_read / monitor.len * 100
print(f"\rUploading: {percent:5.1f}%", end="", flush=True)
def send_message(text):
url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage"
r = requests.post(url, data={
"chat_id": CHAT_ID,
"text": text
}, timeout=30)
if not r.ok:
print("Telegram message error:", r.text)
def upload_file(file_path, caption):
url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendAudio"
for attempt in range(1, MAX_RETRIES + 1):
print(f"Connecting to Telegram (attempt {attempt}/{MAX_RETRIES})")
try:
with open(file_path, "rb") as f:
encoder = MultipartEncoder(
fields={
"chat_id": str(CHAT_ID),
"title": caption,
"audio": (file_path.name, f, "audio/mpeg"),
}
)
monitor = MultipartEncoderMonitor(encoder, progress_callback)
print("Connection established, starting upload...")
r = requests.post(
url,
data=monitor,
headers={"Content-Type": monitor.content_type},
timeout=(20, 3600) # (connect_timeout, read_timeout)
)
print("\nServer responded:", r.status_code)
if r.ok:
print("Upload success")
return True
print("Telegram API error:", r.text)
except ConnectionError:
print("\nConnection dropped by Telegram")
except ReadTimeout:
print("\nUpload timed out")
except ChunkedEncodingError:
print("\nChunked encoding error (connection reset)")
except Exception as e:
print("\nUnexpected error:", repr(e))
wait = 5 * attempt
print(f"Retrying in {wait} sec...\n")
time.sleep(wait)
return False
def send_audio(file_path, title):
parts = split_audio(Path(file_path))
for i, part in enumerate(parts, start=1):
caption = title
if len(parts) > 1:
caption = f"{title} (part {i}/{len(parts)})"
print("Uploading:", part.name)
success = upload_file(part, caption)
if not success:
return False # upload failed -> KEEP files for retry
# if we got here, ALL parts uploaded successfully -> cleanup
for part in parts:
part.unlink(missing_ok=True)
# also delete the original mp3 if we were uploading split parts
original = Path(file_path)
if len(parts) > 1 and original.exists():
original.unlink(missing_ok=True)
return True

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -euo pipefail
echo "==> Stopping service"
docker compose down
echo "==> Rebuilding image with updated source"
docker compose build --no-cache
echo "==> Starting service"
docker compose up -d
echo "==> Showing recent logs"
docker compose logs --tail=100
echo "==> Done. Use: docker compose logs -f"

View File

@ -0,0 +1,21 @@
import yt_dlp
import os
PLAYLIST_URL = os.environ["VK_PLAYLIST_URL"]
def get_playlist_videos():
ydl_opts = {
"extract_flat": True,
"quiet": True,
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(PLAYLIST_URL, download=False)
videos = {}
for entry in info["entries"]:
if entry:
videos[entry["id"]] = entry["url"]
return videos