🎉 | Project released

2026-05-16 00:15:37 +03:00
parent 3a3d368c4d
commit 975801ffca
7 changed files with 819 additions and 0 deletions
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,91 @@
 # 🪄 FOBG — AI Background Remover
 Remove backgrounds from **images** and **videos** using AI, powered by [rembg](https://github.com/danielzhanika/rembg).
 ## Features
 - **Image BG Removal** — Upload any image, get a transparent PNG
 - **Video BG Removal** — Frame-by-frame processing with alpha channel
 - **Multiple AI Models** — u2net, u2netp, u2net_human_seg, isnet-general-use, isnet-anime
 - **Gradio UI** — Clean, interactive web interface
 - **CLI Mode** — Script-friendly command-line interface
 - **Fine-tune Controls** — Alpha matting, thresholds, erosion mask
 ## Quick Start
 ### 1. Install Dependencies
 ```bash
 cd /mnt/games/Coding/Python/AI/FOBG
 source .venv/bin/activate
 pip install -r requirements.txt
 ```
 ### 2. Launch the Web UI
 ```bash
 python main.py
 ```
 Opens at [http://localhost:7860](http://localhost:7860)
 ### 3. CLI Mode
 ```bash
 python main.py --cli input.jpg           # Image
 python main.py --cli video.mp4 --video    # Video
 ```
 ## Project Structure
 ```
 FOBG/
 ├── main.py               # Entry point (Gradio + CLI)
 ├── requirements.txt      # Dependencies
 ├── src/
 │   ├── core/
 │   │   ├── bg_remove.py        # Image BG removal (rembg)
 │   │   └── video_bg_remove.py  # Video BG removal (OpenCV + rembg)
 │   └── ui/
 │       └── app.py              # Gradio web interface
 ├── output/               # Processed results saved here
 ├── tests/                # Unit tests
 └── docs/
    └── README.md         # This file
 ```
 ## Available Models
 | Model | Description | Speed |
 |-------|-------------|-------|
 | `u2net` | Default, balanced quality/speed | ⚡ Fast |
 | `u2netp` | Lightweight, slightly lower quality | 🚀 Fastest |
 | `u2net_human_seg` | Optimized for human segmentation | ⚡ Fast |
 | `u2net_3b` | Heavy, higher accuracy | 🐢 Slower |
 | `isnet-general-use` | General purpose, high accuracy | 🐢 Slower |
 | `isnet-anime` | Optimized for anime illustrations | ⚡ Fast |
 ## Controls
 ### Image Tab
 - **Upload Image** — PNG, JPEG, or WebP
 - **Model** — Choose the AI model
 - **Alpha Matting** — Fine-tune edges (slower but cleaner)
 - **Foreground Threshold** — How much is considered foreground (1-255)
 - **Background Threshold** — How much is considered background (1-255)
 - **Erosion Mask Dilation** — Morphological operation on the mask (0-32)
 ### Video Tab
 - **Upload Video** — MP4, AVI, WebM, or MOV
 - **Model** — Same models as image tab
 - **Alpha Matting** — Fine edge detection per frame
 - **Erosion Mask Dilation** — Morphological operation on the mask
 ## Notes
 - Image processing: 1–5 seconds per image
 - Video processing: frame-by-frame, can take minutes for longer clips
 - All outputs saved to `output/` directory
 - First run downloads the AI model (~170 MB for u2net)
--- a/main.py
+++ b/main.py
@@ -0,0 +1,90 @@
 #!/usr/bin/env python3
 """FOBG — AI Background Remover — Entry Point.
 Usage:
    python main.py                          # Launch Gradio UI (default)
    python main.py --host 0.0.0.0 --port 7860
    python main.py --share                  # Create public Gradio share link
    python main.py --cli --help             # CLI mode (scripted use)
 """
 import argparse
 import sys
 import os
 # Ensure the project root is on sys.path
 _PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
 sys.path.insert(0, _PROJECT_ROOT)
 from src.ui.app import launch_app
 def cli_demo():
    """Simple CLI demo for batch image processing."""
    import argparse as ap2
    parser = ap2.ArgumentParser(description="FOBG CLI — process images/videos")
    parser.add_argument("input", help="Input image or video file path")
    parser.add_argument("-o", "--output", help="Output file path (auto-generated if omitted)")
    parser.add_argument("--model", default="u2net", choices=["u2net", "u2netp", "u2net_human_seg", "isnet-general-use", "isnet-anime"], help="rembg model")
    parser.add_argument("--alpha-matting", action="store_true", default=True, help="Use alpha matting")
    parser.add_argument("--no-alpha-matting", action="store_true", help="Disable alpha matting")
    parser.add_argument("--video", action="store_true", help="Treat input as video (frame-by-frame processing)")
    args = parser.parse_args()
    input_path = args.input
    if not os.path.exists(input_path):
        print(f"Error: File not found: {input_path}")
        sys.exit(1)
    is_video = args.video or input_path.lower().endswith((".mp4", ".avi", ".webm", ".mov", ".mkv"))
    output = args.output or f"output_{os.path.splitext(os.path.basename(input_path))[0]}_{os.urandom(4).hex()}"
    if is_video:
        output += ".mp4"
    else:
        output += ".png"
    from src.core.bg_remove import remove_background_image, remove_background_bytes
    from src.core.video_bg_remove import process_video_frame_by_frame
    if is_video:
        print(f"Processing video: {input_path}")
        result = process_video_frame_by_frame(
            input_path, output,
            model=args.model,
            alpha_matting=not args.no_alpha_matting,
        )
        print(f"Video saved to: {result}")
    else:
        print(f"Processing image: {input_path}")
        from PIL import Image
        input_img = Image.open(input_path).convert("RGB")
        result = remove_background_image(input_img, model=args.model, force=True, alpha_matting=not args.no_alpha_matting)
        result.save(output, format="PNG")
        print(f"Image saved to: {output}")
 def main():
    parser = argparse.ArgumentParser(description="FOBG — AI Background Remover")
    parser.add_argument("--cli", action="store_true", help="Run in CLI mode for batch processing")
    parser.add_argument("--host", default="0.0.0.0", help="Gradio server host")
    parser.add_argument("--port", type=int, default=7860, help="Gradio server port")
    parser.add_argument("--share", action="store_true", help="Create a public share link")
    parser.add_argument("--no-browser", action="store_true", help="Do not open browser")
    args = parser.parse_args()
    if args.cli:
        # Parse remaining args for CLI mode
        sys.argv = ["fobg"] + [arg for arg in sys.argv[1:] if arg not in ("--cli",)]
        cli_demo()
    else:
        launch_app(
            host=args.host,
            port=args.port,
            share=args.share,
            inbrowser=not args.no_browser,
        )
 if __name__ == "__main__":
    main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,14 @@
 # AI background removal
 rembg>=2.0.0
 # Video frame processing
 opencv-python-headless>=4.8.0
 # Image I/O and manipulation
 Pillow>=10.0.0
 # Numeric array operations (used by both rembg internals and video processing)
 numpy>=1.24.0
 # Gradio web UI
 gradio>=4.0.0
--- a/src/core/bg_remove.py
+++ b/src/core/bg_remove.py
@@ -0,0 +1,96 @@
 """Core image background removal using rembg."""
 import io
 from typing import Optional
 import numpy as np
 from PIL import Image
 import rembg
 # Pre-load the model once to avoid re-downloading on every call
 _DEFAULT_MODEL = "u2net"
 _DEFAULT_SESSION = None  # Lazy-loaded
 def remove_background_image(
    input_image: Image.Image,
    model: Optional[str] = None,
    session: Optional[rembg.bg.BaseSession] = None,
    force_return_bytes: bool = False,
    alpha_matting: bool = True,
    alpha_matting_foreground_threshold: int = 240,
    alpha_matting_background_threshold: int = 10,
    alpha_matting_erode_size: int = 10,
    **kwargs,
 ) -> Image.Image:
    """
    Remove background from a PIL Image.
    Parameters
    ----------
    input_image : PIL.Image.Image
        RGB input image (will be converted to RGBA internally).
    model : str, optional
        rembg model name (e.g. 'u2net', 'u2net_human_seg', 'isnet-general-use').
    session : rembg.bg.BaseSession, optional
        Re-use an existing session for faster processing. Created lazily if omitted.
    force_return_bytes : bool
        If True, return bytes instead of PIL Image.
    alpha_matting : bool
        Use alpha matting for finer edge detection.
    alpha_matting_foreground_threshold : int
        Threshold for foreground mask.
    alpha_matting_background_threshold : int
        Threshold for background mask.
    alpha_matting_erode_size : int
        Erosion size for the mask.
    Returns
    -------
    PIL.Image.Image
        Image with transparent background (RGBA).
    """
    # Ensure RGBA
    if input_image.mode != "RGBA":
        input_image = input_image.convert("RGBA")
    kwargs = {
        "alpha_matting": alpha_matting,
        "alpha_matting_foreground_threshold": alpha_matting_foreground_threshold,
        "alpha_matting_background_threshold": alpha_matting_background_threshold,
        "alpha_matting_erode_size": alpha_matting_erode_size,
        "force_return_bytes": force_return_bytes,
        **kwargs,
    }
    result = rembg.remove(input_image, session=session, **kwargs)
    if isinstance(result, bytes):
        result = Image.open(io.BytesIO(result))
    return result
 def remove_background_bytes(
    input_bytes: bytes,
    model: Optional[str] = None,
    session: Optional[rembg.Session] = None,
    **kwargs,
 ) -> bytes:
    """
    Remove background from image bytes, return RGBA PNG bytes.
    Parameters
    ----------
    input_bytes : bytes
        Raw image bytes (JPEG, PNG, etc.).
    model, session : see remove_background_image
    Returns
    -------
    bytes
        PNG bytes with transparent background.
    """
    input_image = Image.open(io.BytesIO(input_bytes))
    result = remove_background_image(input_image, session=session, **kwargs)
    output = io.BytesIO()
    result.save(output, format="PNG")
    return output.getvalue()
--- a/src/core/video_bg_remove.py
+++ b/src/core/video_bg_remove.py
@@ -0,0 +1,262 @@
 """Video background removal frame-by-frame using rembg + ffmpeg."""
 import io
 import os
 import subprocess
 import tempfile
 from typing import Optional
 import cv2
 import numpy as np
 from PIL import Image
 import rembg
 def _encode_png_sequence(
    frames: list[np.ndarray],
    tmp_dir: str,
    fps: float,
    output_path: str,
    use_alpha: bool,
 ) -> str:
    """
    Write RGBA frames as PNG in tmp_dir, then encode to video via ffmpeg.
    For .mov output, uses ProRes 4444 with alpha channel.
    For .mp4 (or anything else), uses libx264 (no alpha in MP4 container).
    """
    frames_dir = os.path.join(tmp_dir, "frames")
    os.makedirs(frames_dir, exist_ok=True)
    # Write each frame as PNG (preserves RGBA/alpha)
    for i, frame in enumerate(frames):
        path = os.path.join(frames_dir, f"frame_{i:04d}.png")
        Image.fromarray(frame, "RGBA").save(path, "PNG")
    # Determine pattern for ffmpeg
    png_files = sorted(os.listdir(frames_dir))
    num_digits = len(png_files[0].replace("frame_", "").replace(".png", "")) if png_files else 4
    pattern = f"frame_%0{num_digits}d.png"
    if use_alpha:
        # .mov with ProRes 4444 alpha
        cmd = [
            "ffmpeg", "-y",
            "-framerate", str(fps),
            "-i", os.path.join(frames_dir, pattern),
            "-c:v", "prores",
            "-profile:v", "4",
            "-pix_fmt", "yuva444p10le",
            "-c:a", "none",
            output_path,
        ]
    else:
        # .mp4 with libx264 (solid black background where alpha is 0)
        cmd = [
            "ffmpeg", "-y",
            "-framerate", str(fps),
            "-i", os.path.join(frames_dir, pattern),
            "-c:v", "libx264",
            "-pix_fmt", "yuv420p",
            "-preset", "medium",
            "-crf", "18",
            output_path,
        ]
    subprocess.run(cmd, check=True)
    return output_path
 def _is_mov(output_path: str) -> bool:
    return os.path.splitext(output_path)[1].lower() == ".mov"
 def process_video_frame_by_frame(
    input_path: str,
    output_path: str,
    fps: Optional[float] = None,
    model: Optional[str] = None,
    session: Optional[rembg.bg.BaseSession] = None,
    alpha_matting: bool = True,
    alpha_matting_erode_size: int = 10,
    progress_callback=None,
    **kwargs,
 ) -> str:
    """
    Remove background from a video, frame-by-frame.
    Reads frames from ``input_path``, runs each through rembg,
    and writes the result to ``output_path``. For .mov output, a ProRes
    video with true alpha is produced; otherwise an MP4 with a black
    background (where transparent pixels are black) is produced.
    Parameters
    ----------
    input_path : str
        Path to input video (MP4, AVI, WebM, etc.).
    output_path : str
        Path to write the output video. Use ``.mov`` for alpha, ``.mp4`` for solid.
    fps : float, optional
        Override the output FPS. Auto-detected if omitted.
    model : str, optional
        rembg model name.
    session : rembg.bg.BaseSession, optional
        Shared session for faster processing.
    alpha_matting : bool
        Use alpha matting for finer edges.
    alpha_matting_erode_size : int
        Erosion size for alpha matting mask.
    progress_callback : callable, optional
        Callback of type ``callback(frame_index, total_frames)`` for progress tracking.
    **kwargs : dict
        Extra kwargs passed to rembg.remove.
    Returns
    -------
    str
        Path to the output video file.
    """
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise ValueError(f"Cannot open video file: {input_path}")
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    fps = fps if fps is not None else src_fps
    cap.release()
    use_alpha = _is_mov(output_path)
    with tempfile.TemporaryDirectory() as tmpdir:
        frames: list[np.ndarray] = []
        cap = cv2.VideoCapture(input_path)
        frame_idx = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_img = Image.fromarray(rgb_frame)
            if session is None and model:
                session = rembg.new_session(model or "u2net")
            result = rembg.remove(
                pil_img,
                session=session,
                alpha_matting=alpha_matting,
                alpha_matting_erode_size=alpha_matting_erode_size,
                **kwargs,
            )
            result_img = Image.open(io.BytesIO(result)) if isinstance(result, bytes) else result
            frames.append(np.array(result_img))  # RGBA
            frame_idx += 1
            if progress_callback:
                progress_callback(frame_idx, total_frames)
        cap.release()
        os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
        _encode_png_sequence(frames, tmpdir, fps, output_path, use_alpha)
    return output_path
 def process_video_with_background(
    input_path: str,
    output_path: str,
    bg_image: Optional[str] = None,
    fps: Optional[float] = None,
    model: Optional[str] = None,
    session: Optional[rembg.Session] = None,
    progress_callback=None,
    **kwargs,
 ) -> str:
    """
    Remove background from video and composite onto a new background.
    Parameters
    ----------
    input_path : str
        Input video path.
    output_path : str
        Output video path.
    bg_image : str, optional
        Path to a background image. If None, keeps transparent background.
    fps : float, optional
        Output FPS.
    model : str, optional
        rembg model name.
    session : rembg.Session, optional
        Shared session.
    progress_callback : callable, optional
        Callback of type ``callback(frame_index, total_frames)``.
    **kwargs : dict
        Extra kwargs for rembg.
    Returns
    -------
    str
        Path to the output video.
    """
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise ValueError(f"Cannot open video file: {input_path}")
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    fps = fps if fps is not None else src_fps
    bg_array = None
    if bg_image:
        bg_array = cv2.imread(bg_image)
        bg_array = cv2.cvtColor(bg_array, cv2.COLOR_BGR2RGBA)
        out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        bg_array = cv2.resize(bg_array, (out_w, out_h))
        if bg_array.shape[2] != 4:
            bg_array = np.dstack([bg_array, np.full(bg_array.shape[:2], 255, dtype=np.uint8)])
    cap.release()
    with tempfile.TemporaryDirectory() as tmpdir:
        frames: list[np.ndarray] = []
        cap = cv2.VideoCapture(input_path)
        frame_idx = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_img = Image.fromarray(rgb_frame)
            if session is None and model:
                session = rembg.new_session(model or "u2net")
            result = rembg.remove(pil_img, session=session, **kwargs)
            result_img = Image.open(io.BytesIO(result)) if isinstance(result, bytes) else result
            fg = np.array(result_img)  # RGBA
            if bg_array is not None:
                alpha = fg[:, :, 3:4].astype(np.float64) / 255.0
                fg_rgb = fg[:, :, :3].astype(np.float64)
                bg_rgb = bg_array[:, :, :3].astype(np.float64)
                composited = (fg_rgb * alpha + bg_rgb * (1.0 - alpha)).astype(np.uint8)
                frames.append(composited[:, :, :3])
            else:
                frames.append(fg)
            frame_idx += 1
            if progress_callback:
                progress_callback(frame_idx, total_frames)
        cap.release()
        os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
        _encode_png_sequence(frames, tmpdir, fps, output_path, _is_mov(output_path))
    return output_path
--- a/src/ui/app.py
+++ b/src/ui/app.py
@@ -0,0 +1,266 @@
 """Gradio-based GUI for BG removal (images + video)."""
 import os
 from PIL import Image
 import gradio as gr
 from ..core.bg_remove import remove_background_image
 from ..core.video_bg_remove import process_video_frame_by_frame
 # ── Globals ──────────────────────────────────────────────────────────────────
 OUTPUT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "output")
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 MODEL_OPTIONS = [
    ("u2net (default)", "u2net"),
    ("u2net human_seg", "u2net_human_seg"),
    ("u2netp (light)", "u2netp"),
    ("u2net 3b (heavy)", "u2net_3b"),
    ("isnet-general-use", "isnet-general-use"),
    ("isnet-anime (anime)", "isnet-anime"),
 ]
 def _save_output_filename(input_path: str = "", suffix: str = ".png") -> str:
    """Generate an output file path derived from the input file name."""
    if input_path:
        stem = os.path.splitext(os.path.basename(input_path))[0]
        name = f"{stem}_bg{suffix}"
    else:
        name = f"output{suffix}"
    return os.path.join(OUTPUT_DIR, name)
 def _remove_bg(
    image_input,
    model_name: str = "u2net",
    alpha_matting: bool = True,
    foreground_threshold: int = 240,
    background_threshold: int = 10,
    erosion_dilate: int = 0,
 ) -> Image.Image:
    """
    Gradio callback: remove background from an image.
    Parameters
    ----------
    image_input : str or dict
        Uploaded image file (Gradio handles this).
    model_name : str
        rembg model to use.
    alpha_matting : bool
        Enable fine edge detection.
    foreground_threshold, background_threshold, erosion_dilate : int
        rembg processing parameters.
    Returns
    -------
    PIL.Image.Image
        Result with transparent background.
    """
    if image_input is None:
        raise gr.Error("No image provided. Please upload an image.")
    if isinstance(image_input, Image.Image):
        # gr.Image with type="pil" returns the image directly
        input_img = image_input.convert("RGB")
    elif isinstance(image_input, str):
        input_img = Image.open(image_input).convert("RGB")
    else:
        raise gr.Error("Unsupported image input type.")
    result = remove_background_image(
        input_img,
        model=model_name,
        force=True,
        alpha_matting=alpha_matting,
        alpha_matting_foreground_threshold=foreground_threshold,
        alpha_matting_background_threshold=background_threshold,
        erosion_mask_dilate=erosion_dilate,
    )
    output_path = _save_output_filename(image_input, ".png")
    result.save(output_path, format="PNG")
    return result
 def _set_download(path):
    if path is None:
        return gr.File(value=None)
    return gr.File(value=path)
 def _remove_bg_video(
    video_input,
    model_name: str = "u2net",
    alpha_matting: bool = True,
    foreground_threshold: int = 240,
    background_threshold: int = 10,
    erosion_dilate: int = 0,
    progress=gr.Progress(),
 ):
    """
    Gradio callback: remove background from a video.
    Parameters
    ----------
    video_input : str
        Uploaded video file path.
    model_name : str
        rembg model to use.
    alpha_matting : bool
        Enable fine edge detection per frame.
    erosion_dilate : int
        Dilation kernel for erosion mask.
    progress : gr.Progress
        Gradio progress tracker.
    Returns
    -------
    tuple[str, str]
        (Path to the output video, state path).
    """
    if video_input is None:
        raise gr.Error("No video provided. Please upload a video.")
    output_path = _save_output_filename(video_input, ".mov")
    def _progress_callback(frame_idx, total_frames):
        if total_frames > 0:
            progress(frame_idx / total_frames, desc=f"Processing frame {frame_idx}/{total_frames}")
    result_path = process_video_frame_by_frame(
        video_input,
        output_path,
        model=model_name,
        alpha_matting=alpha_matting,
        alpha_matting_foreground_threshold=foreground_threshold,
        alpha_matting_background_threshold=background_threshold,
        erosion_mask_dilate=erosion_dilate,
        progress_callback=_progress_callback,
    )
    return result_path, result_path
 # ── Gradio Blocks ────────────────────────────────────────────────────────────
 def create_app() -> gr.Blocks:
    """Build and return the Gradio app."""
    with gr.Blocks(title="FOBG — AI Background Remover") as app:
        gr.Markdown(
            """
            # 🪄 FOBG — AI Background Remover
            Remove backgrounds from **images** or **videos** using AI.
            Powered by [rembg](https://github.com/danielzhanika/rembg) with multiple model options.
            """
        )
        with gr.Tabs():
            # ── Image Tab ──────────────────────────────────────────
            with gr.TabItem("🖼️ Image"):
                with gr.Row():
                    with gr.Column():
                        image_input = gr.Image(
                            label="Upload Image",
                            sources=["upload", "clipboard"],
                            type="filepath",
                        )
                        model_select_img = gr.Dropdown(
                            choices=MODEL_OPTIONS,
                            value="u2net",
                            label="Model",
                        )
                        img_alpha = gr.Checkbox(label="Alpha Matting", value=True)
                        img_fg_threshold = gr.Slider(
                            minimum=1, maximum=255, value=240, step=1, label="Foreground Threshold"
                        )
                        img_bg_threshold = gr.Slider(
                            minimum=1, maximum=255, value=10, step=1, label="Background Threshold"
                        )
                        img_erosion = gr.Slider(
                            minimum=0, maximum=32, value=0, step=1, label="Erosion Mask Dilation"
                        )
                        img_btn = gr.Button("Remove Background", variant="primary")
                    with gr.Column():
                        image_output = gr.Image(label="Result")
                img_btn.click(
                    fn=_remove_bg,
                    inputs=[image_input, model_select_img, img_alpha, img_fg_threshold, img_bg_threshold, img_erosion],
                    outputs=[image_output],
                )
            # ── Video Tab ──────────────────────────────────────────
            with gr.TabItem("🎥 Video"):
                with gr.Row():
                    with gr.Column():
                        video_input = gr.Video(
                            label="Upload Video",
                            sources=["upload", "webcam"],
                        )
                        model_select_vid = gr.Dropdown(
                            choices=MODEL_OPTIONS,
                            value="u2net",
                            label="Model",
                        )
                        vid_alpha = gr.Checkbox(label="Alpha Matting", value=True)
                        vid_fg_threshold = gr.Slider(
                            minimum=1, maximum=255, value=240, step=1, label="Foreground Threshold"
                        )
                        vid_bg_threshold = gr.Slider(
                            minimum=1, maximum=255, value=10, step=1, label="Background Threshold"
                        )
                        vid_erosion = gr.Slider(
                            minimum=0, maximum=32, value=0, step=1, label="Erosion Mask Dilation"
                        )
                        vid_btn = gr.Button("Remove Background", variant="primary")
                    with gr.Column():
                        video_output = gr.Video(label="Result")
                        video_download = gr.File(label="Download .MOV")
                        video_path_state = gr.State()
                # First click: process video (shows progress on result), save path to state
                # Then: populate download link from state (no progress shown)
                vid_btn.click(
                    fn=_remove_bg_video,
                    inputs=[video_input, model_select_vid, vid_alpha, vid_fg_threshold, vid_bg_threshold, vid_erosion],
                    outputs=[video_output, video_path_state],
                ).then(
                    fn=_set_download,
                    inputs=[video_path_state],
                    outputs=[video_download],
                )
        gr.Markdown(
            """
            ### Notes
            - **🖼️ Image**: Single images — fast, typically 1-5 seconds depending on resolution and model.
            - **🎥 Video**: Videos (MP4, WebM, etc.) — frame-by-frame processing. Can take minutes for long videos.
            - **Models**: Lighter models (u2netp) are faster; heavier models (u2net, isnet) are more accurate.
            - **Output**: Images are saved as PNG with transparency. Videos are saved as MOV with alpha channel.
            - Results are stored in the `output/` directory.
            """
        )
    return app
 def launch_app(
    host: str = "0.0.0.0",
    port: int = 7860,
    share: bool = False,
    inbrowser: bool = True,
 ) -> None:
    """Launch the Gradio app."""
    app = create_app()
    app.launch(
        server_name=host,
        server_port=port,
        share=share,
        inbrowser=inbrowser,
        theme=gr.themes.Soft(),
    )
--- a/tests/test_image.png
+++ b/tests/test_image.png