From 80d5821e21dba5bfeeeab9deeeba6182c43f2877 Mon Sep 17 00:00:00 2001 From: Pavel Date: Wed, 25 Feb 2026 11:32:29 +0300 Subject: [PATCH] update config and podcast bot --- services/vk-podcast-bot/.env.template | 3 + services/vk-podcast-bot/.gitignore | 1 + services/vk-podcast-bot/Dockerfile | 18 ++++ services/vk-podcast-bot/audio_splitter.py | 53 +++++++++++ services/vk-podcast-bot/bot.py | 66 +++++++++++++ services/vk-podcast-bot/docker-compose.yml | 9 ++ services/vk-podcast-bot/downloader.py | 36 +++++++ services/vk-podcast-bot/requirements.txt | 3 + services/vk-podcast-bot/state.py | 15 +++ services/vk-podcast-bot/telegram_sender.py | 103 +++++++++++++++++++++ services/vk-podcast-bot/update.sh | 15 +++ services/vk-podcast-bot/vk_playlist.py | 21 +++++ 12 files changed, 343 insertions(+) create mode 100644 services/vk-podcast-bot/.env.template create mode 100644 services/vk-podcast-bot/.gitignore create mode 100644 services/vk-podcast-bot/Dockerfile create mode 100644 services/vk-podcast-bot/audio_splitter.py create mode 100644 services/vk-podcast-bot/bot.py create mode 100644 services/vk-podcast-bot/docker-compose.yml create mode 100644 services/vk-podcast-bot/downloader.py create mode 100644 services/vk-podcast-bot/requirements.txt create mode 100644 services/vk-podcast-bot/state.py create mode 100644 services/vk-podcast-bot/telegram_sender.py create mode 100755 services/vk-podcast-bot/update.sh create mode 100644 services/vk-podcast-bot/vk_playlist.py diff --git a/services/vk-podcast-bot/.env.template b/services/vk-podcast-bot/.env.template new file mode 100644 index 0000000..97bddb9 --- /dev/null +++ b/services/vk-podcast-bot/.env.template @@ -0,0 +1,3 @@ +TELEGRAM_TOKEN=CHANGE_ME +CHAT_ID=CHANGE_ME +VK_PLAYLIST_URL=https://vkvideo.ru/playlist/... diff --git a/services/vk-podcast-bot/.gitignore b/services/vk-podcast-bot/.gitignore new file mode 100644 index 0000000..4c49bd7 --- /dev/null +++ b/services/vk-podcast-bot/.gitignore @@ -0,0 +1 @@ +.env diff --git a/services/vk-podcast-bot/Dockerfile b/services/vk-podcast-bot/Dockerfile new file mode 100644 index 0000000..2267013 --- /dev/null +++ b/services/vk-podcast-bot/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11-slim + +# install ffmpeg +RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +# create persistent folder +RUN mkdir -p /data/audio + +ENV PYTHONUNBUFFERED=1 + +CMD ["python", "bot.py"] diff --git a/services/vk-podcast-bot/audio_splitter.py b/services/vk-podcast-bot/audio_splitter.py new file mode 100644 index 0000000..4ba5e53 --- /dev/null +++ b/services/vk-podcast-bot/audio_splitter.py @@ -0,0 +1,53 @@ +import subprocess +from pathlib import Path +import math + +MAX_SIZE = 49 * 1024 * 1024 # 49 MB safety margin + + +def get_duration_seconds(file_path: Path) -> float: + cmd = [ + "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + str(file_path) + ] + return float(subprocess.check_output(cmd).decode().strip()) + + +def split_audio(file_path: Path): + size = file_path.stat().st_size + + if size <= MAX_SIZE: + return [file_path] + + existing = sorted(file_path.parent.glob(f"{file_path.stem}_part*.mp3")) + if existing: + return existing + + duration = get_duration_seconds(file_path) + + parts_count = math.ceil(size / MAX_SIZE) + part_duration = duration / parts_count + + output_files = [] + + for i in range(parts_count): + start = i * part_duration + output = file_path.with_name(f"{file_path.stem}_part{i+1}.mp3") + + cmd = [ + "ffmpeg", + "-y", + "-i", str(file_path), + "-ss", str(start), + "-t", str(part_duration), + "-acodec", "copy", + str(output) + ] + + subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + output_files.append(output) + + return output_files diff --git a/services/vk-podcast-bot/bot.py b/services/vk-podcast-bot/bot.py new file mode 100644 index 0000000..5235b42 --- /dev/null +++ b/services/vk-podcast-bot/bot.py @@ -0,0 +1,66 @@ +import time +from state import load_state, save_state +from vk_playlist import get_playlist_videos +from downloader import download_audio, get_video_title +from telegram_sender import send_audio, send_message + +CHECK_INTERVAL = 300 # seconds + + +def process_new_video(video_id, video_url, known_ids): + try: + title = get_video_title(video_url) + + print("New podcast detected:", title) + send_message(f"New podcast: {title}") + + file_path, _ = download_audio(video_url) + + print("Sending audio:", title) + ok = send_audio(file_path, title) + + if ok: + print("Upload completed:", title) + known_ids.add(video_id) + save_state(known_ids) + else: + print("Upload failed — will retry later") + + except Exception as e: + print("Failed processing:", e) + + +def run_once(): + known_ids, has_state = load_state() + + videos = get_playlist_videos() + current_ids = set(videos.keys()) + + # first launch — remember everything + if not has_state: + print("First run: indexing playlist only") + save_state(current_ids) + return + + new_ids = current_ids - known_ids + + if new_ids: + print(f"Found {len(new_ids)} new videos") + + for vid in new_ids: + process_new_video(vid, videos[vid], known_ids) + + +def main_loop(): + while True: + try: + print("Checking playlist...") + run_once() + except Exception as e: + print("Loop error:", e) + + time.sleep(CHECK_INTERVAL) + + +if __name__ == "__main__": + main_loop() diff --git a/services/vk-podcast-bot/docker-compose.yml b/services/vk-podcast-bot/docker-compose.yml new file mode 100644 index 0000000..5a94916 --- /dev/null +++ b/services/vk-podcast-bot/docker-compose.yml @@ -0,0 +1,9 @@ +services: + vk-podcast-bot: + build: . + container_name: vk-podcast-bot + restart: unless-stopped + env_file: + - .env + volumes: + - /srv/vk-podcast-bot/data:/data diff --git a/services/vk-podcast-bot/downloader.py b/services/vk-podcast-bot/downloader.py new file mode 100644 index 0000000..a8c8cf5 --- /dev/null +++ b/services/vk-podcast-bot/downloader.py @@ -0,0 +1,36 @@ +import yt_dlp +from pathlib import Path + +DOWNLOAD_DIR = Path("/data/audio") +DOWNLOAD_DIR.mkdir(exist_ok=True) + +def get_video_title(video_url): + ydl_opts = { + "quiet": True, + "skip_download": True, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(video_url, download=False) + + return info.get("title", "New podcast") + +def download_audio(video_url): + ydl_opts = { + "format": "bestaudio/best", + "outtmpl": "/data/audio/%(title)s.%(ext)s", + "postprocessors": [{ + "key": "FFmpegExtractAudio", + "preferredcodec": "mp3", + "preferredquality": "192", + }], + "noplaylist": True, + "quiet": True, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(video_url, download=True) + filename = ydl.prepare_filename(info) + + mp3_file = Path(filename).with_suffix(".mp3") + return mp3_file, info.get("title", "audio") diff --git a/services/vk-podcast-bot/requirements.txt b/services/vk-podcast-bot/requirements.txt new file mode 100644 index 0000000..56e956c --- /dev/null +++ b/services/vk-podcast-bot/requirements.txt @@ -0,0 +1,3 @@ +yt-dlp +requests +requests-toolbelt diff --git a/services/vk-podcast-bot/state.py b/services/vk-podcast-bot/state.py new file mode 100644 index 0000000..7692bee --- /dev/null +++ b/services/vk-podcast-bot/state.py @@ -0,0 +1,15 @@ +import json +from pathlib import Path + +STATE_FILE = Path("/data/state.json") + +def load_state(): + if not STATE_FILE.exists(): + return set(), False + data = json.loads(STATE_FILE.read_text()) + return set(data.get("known_ids", [])), True + +def save_state(ids): + STATE_FILE.write_text(json.dumps({ + "known_ids": sorted(ids) + }, indent=2)) diff --git a/services/vk-podcast-bot/telegram_sender.py b/services/vk-podcast-bot/telegram_sender.py new file mode 100644 index 0000000..64c2ade --- /dev/null +++ b/services/vk-podcast-bot/telegram_sender.py @@ -0,0 +1,103 @@ +import requests +import time +import os +from pathlib import Path +from audio_splitter import split_audio +from requests_toolbelt.multipart.encoder import MultipartEncoder, MultipartEncoderMonitor +from requests.exceptions import ConnectionError, ReadTimeout, ChunkedEncodingError + +BOT_TOKEN = os.environ["TELEGRAM_TOKEN"] +CHAT_ID = os.environ["CHAT_ID"] +MAX_RETRIES = 5 + +def progress_callback(monitor): + percent = monitor.bytes_read / monitor.len * 100 + print(f"\rUploading: {percent:5.1f}%", end="", flush=True) + +def send_message(text): + url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage" + r = requests.post(url, data={ + "chat_id": CHAT_ID, + "text": text + }, timeout=30) + + if not r.ok: + print("Telegram message error:", r.text) + +def upload_file(file_path, caption): + url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendAudio" + + for attempt in range(1, MAX_RETRIES + 1): + print(f"Connecting to Telegram (attempt {attempt}/{MAX_RETRIES})") + + try: + with open(file_path, "rb") as f: + + encoder = MultipartEncoder( + fields={ + "chat_id": str(CHAT_ID), + "title": caption, + "audio": (file_path.name, f, "audio/mpeg"), + } + ) + + monitor = MultipartEncoderMonitor(encoder, progress_callback) + + print("Connection established, starting upload...") + + r = requests.post( + url, + data=monitor, + headers={"Content-Type": monitor.content_type}, + timeout=(20, 3600) # (connect_timeout, read_timeout) + ) + + print("\nServer responded:", r.status_code) + + if r.ok: + print("Upload success") + return True + + print("Telegram API error:", r.text) + + except ConnectionError: + print("\nConnection dropped by Telegram") + + except ReadTimeout: + print("\nUpload timed out") + + except ChunkedEncodingError: + print("\nChunked encoding error (connection reset)") + + except Exception as e: + print("\nUnexpected error:", repr(e)) + + wait = 5 * attempt + print(f"Retrying in {wait} sec...\n") + time.sleep(wait) + + return False + +def send_audio(file_path, title): + parts = split_audio(Path(file_path)) + + for i, part in enumerate(parts, start=1): + caption = title + if len(parts) > 1: + caption = f"{title} (part {i}/{len(parts)})" + + print("Uploading:", part.name) + + success = upload_file(part, caption) + if not success: + return False # upload failed -> KEEP files for retry + + # if we got here, ALL parts uploaded successfully -> cleanup + for part in parts: + part.unlink(missing_ok=True) + # also delete the original mp3 if we were uploading split parts + original = Path(file_path) + if len(parts) > 1 and original.exists(): + original.unlink(missing_ok=True) + + return True diff --git a/services/vk-podcast-bot/update.sh b/services/vk-podcast-bot/update.sh new file mode 100755 index 0000000..db416cb --- /dev/null +++ b/services/vk-podcast-bot/update.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "==> Stopping service" +docker compose down + +echo "==> Rebuilding image with updated source" +docker compose build --no-cache + +echo "==> Starting service" +docker compose up -d + +echo "==> Showing recent logs" +docker compose logs --tail=100 +echo "==> Done. Use: docker compose logs -f" diff --git a/services/vk-podcast-bot/vk_playlist.py b/services/vk-podcast-bot/vk_playlist.py new file mode 100644 index 0000000..82576aa --- /dev/null +++ b/services/vk-podcast-bot/vk_playlist.py @@ -0,0 +1,21 @@ +import yt_dlp +import os + +PLAYLIST_URL = os.environ["VK_PLAYLIST_URL"] + +def get_playlist_videos(): + ydl_opts = { + "extract_flat": True, + "quiet": True, + "skip_download": True, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(PLAYLIST_URL, download=False) + + videos = {} + for entry in info["entries"]: + if entry: + videos[entry["id"]] = entry["url"] + + return videos