update config and podcast bot

2026-06-03 17:53:50 +00:00 · 2026-02-25 11:32:29 +03:00 · 2026-02-25 11:32:29 +03:00 · 80d5821e21
commit 80d5821e21
parent fb912c0190
12 changed files with 343 additions and 0 deletions
--- a/services/vk-podcast-bot/.env.template
+++ b/services/vk-podcast-bot/.env.template
@ -0,0 +1,3 @@
+TELEGRAM_TOKEN=CHANGE_ME
+CHAT_ID=CHANGE_ME
+VK_PLAYLIST_URL=https://vkvideo.ru/playlist/...
--- a/services/vk-podcast-bot/.gitignore
+++ b/services/vk-podcast-bot/.gitignore
@ -0,0 +1 @@
+.env
--- a/services/vk-podcast-bot/Dockerfile
+++ b/services/vk-podcast-bot/Dockerfile
@ -0,0 +1,18 @@
+FROM python:3.11-slim
+
+# install ffmpeg
+RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+# create persistent folder
+RUN mkdir -p /data/audio
+
+ENV PYTHONUNBUFFERED=1
+
+CMD ["python", "bot.py"]
--- a/services/vk-podcast-bot/audio_splitter.py
+++ b/services/vk-podcast-bot/audio_splitter.py
@ -0,0 +1,53 @@
+import subprocess
+from pathlib import Path
+import math
+
+MAX_SIZE = 49 * 1024 * 1024  # 49 MB safety margin
+
+
+def get_duration_seconds(file_path: Path) -> float:
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        str(file_path)
+    ]
+    return float(subprocess.check_output(cmd).decode().strip())
+
+
+def split_audio(file_path: Path):
+    size = file_path.stat().st_size
+
+    if size <= MAX_SIZE:
+        return [file_path]
+
+    existing = sorted(file_path.parent.glob(f"{file_path.stem}_part*.mp3"))
+    if existing:
+        return existing
+
+    duration = get_duration_seconds(file_path)
+
+    parts_count = math.ceil(size / MAX_SIZE)
+    part_duration = duration / parts_count
+
+    output_files = []
+
+    for i in range(parts_count):
+        start = i * part_duration
+        output = file_path.with_name(f"{file_path.stem}_part{i+1}.mp3")
+
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i", str(file_path),
+            "-ss", str(start),
+            "-t", str(part_duration),
+            "-acodec", "copy",
+            str(output)
+        ]
+
+        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        output_files.append(output)
+
+    return output_files
--- a/services/vk-podcast-bot/bot.py
+++ b/services/vk-podcast-bot/bot.py
@ -0,0 +1,66 @@
+import time
+from state import load_state, save_state
+from vk_playlist import get_playlist_videos
+from downloader import download_audio, get_video_title
+from telegram_sender import send_audio, send_message
+
+CHECK_INTERVAL = 300  # seconds
+
+
+def process_new_video(video_id, video_url, known_ids):
+    try:
+        title = get_video_title(video_url)
+
+        print("New podcast detected:", title)
+        send_message(f"New podcast: {title}")
+
+        file_path, _ = download_audio(video_url)
+
+        print("Sending audio:", title)
+        ok = send_audio(file_path, title)
+
+        if ok:
+            print("Upload completed:", title)
+            known_ids.add(video_id)
+            save_state(known_ids)
+        else:
+            print("Upload failed — will retry later")
+
+    except Exception as e:
+        print("Failed processing:", e)
+
+
+def run_once():
+    known_ids, has_state = load_state()
+
+    videos = get_playlist_videos()
+    current_ids = set(videos.keys())
+
+    # first launch — remember everything
+    if not has_state:
+        print("First run: indexing playlist only")
+        save_state(current_ids)
+        return
+
+    new_ids = current_ids - known_ids
+
+    if new_ids:
+        print(f"Found {len(new_ids)} new videos")
+
+    for vid in new_ids:
+        process_new_video(vid, videos[vid], known_ids)
+
+
+def main_loop():
+    while True:
+        try:
+            print("Checking playlist...")
+            run_once()
+        except Exception as e:
+            print("Loop error:", e)
+
+        time.sleep(CHECK_INTERVAL)
+
+
+if __name__ == "__main__":
+    main_loop()
--- a/services/vk-podcast-bot/docker-compose.yml
+++ b/services/vk-podcast-bot/docker-compose.yml
@ -0,0 +1,9 @@
+services:
+  vk-podcast-bot:
+    build: .
+    container_name: vk-podcast-bot
+    restart: unless-stopped
+    env_file:
+      - .env
+    volumes:
+      - /srv/vk-podcast-bot/data:/data
--- a/services/vk-podcast-bot/downloader.py
+++ b/services/vk-podcast-bot/downloader.py
@ -0,0 +1,36 @@
+import yt_dlp
+from pathlib import Path
+
+DOWNLOAD_DIR = Path("/data/audio")
+DOWNLOAD_DIR.mkdir(exist_ok=True)
+
+def get_video_title(video_url):
+    ydl_opts = {
+        "quiet": True,
+        "skip_download": True,
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(video_url, download=False)
+
+    return info.get("title", "New podcast")
+
+def download_audio(video_url):
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": "/data/audio/%(title)s.%(ext)s",
+        "postprocessors": [{
+            "key": "FFmpegExtractAudio",
+            "preferredcodec": "mp3",
+            "preferredquality": "192",
+        }],
+        "noplaylist": True,
+        "quiet": True,
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(video_url, download=True)
+        filename = ydl.prepare_filename(info)
+
+    mp3_file = Path(filename).with_suffix(".mp3")
+    return mp3_file, info.get("title", "audio")
--- a/services/vk-podcast-bot/requirements.txt
+++ b/services/vk-podcast-bot/requirements.txt
@ -0,0 +1,3 @@
+yt-dlp
+requests
+requests-toolbelt
--- a/services/vk-podcast-bot/state.py
+++ b/services/vk-podcast-bot/state.py
@ -0,0 +1,15 @@
+import json
+from pathlib import Path
+
+STATE_FILE = Path("/data/state.json")
+
+def load_state():
+    if not STATE_FILE.exists():
+        return set(), False
+    data = json.loads(STATE_FILE.read_text())
+    return set(data.get("known_ids", [])), True
+
+def save_state(ids):
+    STATE_FILE.write_text(json.dumps({
+        "known_ids": sorted(ids)
+    }, indent=2))
--- a/services/vk-podcast-bot/telegram_sender.py
+++ b/services/vk-podcast-bot/telegram_sender.py
@ -0,0 +1,103 @@
+import requests
+import time
+import os
+from pathlib import Path
+from audio_splitter import split_audio
+from requests_toolbelt.multipart.encoder import MultipartEncoder, MultipartEncoderMonitor
+from requests.exceptions import ConnectionError, ReadTimeout, ChunkedEncodingError
+
+BOT_TOKEN = os.environ["TELEGRAM_TOKEN"]
+CHAT_ID = os.environ["CHAT_ID"]
+MAX_RETRIES = 5
+
+def progress_callback(monitor):
+    percent = monitor.bytes_read / monitor.len * 100
+    print(f"\rUploading: {percent:5.1f}%", end="", flush=True)
+
+def send_message(text):
+    url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage"
+    r = requests.post(url, data={
+        "chat_id": CHAT_ID,
+        "text": text
+    }, timeout=30)
+
+    if not r.ok:
+        print("Telegram message error:", r.text)
+
+def upload_file(file_path, caption):
+    url = f"https://api.telegram.org/bot{BOT_TOKEN}/sendAudio"
+
+    for attempt in range(1, MAX_RETRIES + 1):
+        print(f"Connecting to Telegram (attempt {attempt}/{MAX_RETRIES})")
+
+        try:
+            with open(file_path, "rb") as f:
+
+                encoder = MultipartEncoder(
+                    fields={
+                        "chat_id": str(CHAT_ID),
+                        "title": caption,
+                        "audio": (file_path.name, f, "audio/mpeg"),
+                    }
+                )
+
+                monitor = MultipartEncoderMonitor(encoder, progress_callback)
+
+                print("Connection established, starting upload...")
+
+                r = requests.post(
+                    url,
+                    data=monitor,
+                    headers={"Content-Type": monitor.content_type},
+                    timeout=(20, 3600)  # (connect_timeout, read_timeout)
+                )
+
+            print("\nServer responded:", r.status_code)
+
+            if r.ok:
+                print("Upload success")
+                return True
+
+            print("Telegram API error:", r.text)
+
+        except ConnectionError:
+            print("\nConnection dropped by Telegram")
+
+        except ReadTimeout:
+            print("\nUpload timed out")
+
+        except ChunkedEncodingError:
+            print("\nChunked encoding error (connection reset)")
+
+        except Exception as e:
+            print("\nUnexpected error:", repr(e))
+
+        wait = 5 * attempt
+        print(f"Retrying in {wait} sec...\n")
+        time.sleep(wait)
+
+    return False
+
+def send_audio(file_path, title):
+    parts = split_audio(Path(file_path))
+
+    for i, part in enumerate(parts, start=1):
+        caption = title
+        if len(parts) > 1:
+            caption = f"{title} (part {i}/{len(parts)})"
+
+        print("Uploading:", part.name)
+
+        success = upload_file(part, caption)
+        if not success:
+            return False   # upload failed -> KEEP files for retry
+
+    # if we got here, ALL parts uploaded successfully -> cleanup
+    for part in parts:
+        part.unlink(missing_ok=True)
+    # also delete the original mp3 if we were uploading split parts
+    original = Path(file_path)
+    if len(parts) > 1 and original.exists():
+        original.unlink(missing_ok=True)
+
+    return True
--- a/services/vk-podcast-bot/update.sh
+++ b/services/vk-podcast-bot/update.sh
@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+echo "==> Stopping service"
+docker compose down
+
+echo "==> Rebuilding image with updated source"
+docker compose build --no-cache
+
+echo "==> Starting service"
+docker compose up -d
+
+echo "==> Showing recent logs"
+docker compose logs --tail=100
+echo "==> Done. Use: docker compose logs -f"
--- a/services/vk-podcast-bot/vk_playlist.py
+++ b/services/vk-podcast-bot/vk_playlist.py
@ -0,0 +1,21 @@
+import yt_dlp
+import os
+
+PLAYLIST_URL = os.environ["VK_PLAYLIST_URL"]
+
+def get_playlist_videos():
+    ydl_opts = {
+        "extract_flat": True,
+        "quiet": True,
+        "skip_download": True,
+    }
+
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(PLAYLIST_URL, download=False)
+
+    videos = {}
+    for entry in info["entries"]:
+        if entry:
+            videos[entry["id"]] = entry["url"]
+
+    return videos