From 975801ffca7cac7e2128a75d3daaf9d2e18d708f Mon Sep 17 00:00:00 2001 From: NikkeDoy Date: Sat, 16 May 2026 00:15:37 +0300 Subject: [PATCH] :tada: | Project released --- docs/README.md | 91 ++++++++++++ main.py | 90 ++++++++++++ requirements.txt | 14 ++ src/core/bg_remove.py | 96 +++++++++++++ src/core/video_bg_remove.py | 262 +++++++++++++++++++++++++++++++++++ src/ui/app.py | 266 ++++++++++++++++++++++++++++++++++++ tests/test_image.png | Bin 0 -> 586 bytes 7 files changed, 819 insertions(+) create mode 100644 docs/README.md create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 src/core/bg_remove.py create mode 100644 src/core/video_bg_remove.py create mode 100644 src/ui/app.py create mode 100644 tests/test_image.png diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..8fd1c7a --- /dev/null +++ b/docs/README.md @@ -0,0 +1,91 @@ +# 🪄 FOBG — AI Background Remover + +Remove backgrounds from **images** and **videos** using AI, powered by [rembg](https://github.com/danielzhanika/rembg). + +## Features + +- **Image BG Removal** — Upload any image, get a transparent PNG +- **Video BG Removal** — Frame-by-frame processing with alpha channel +- **Multiple AI Models** — u2net, u2netp, u2net_human_seg, isnet-general-use, isnet-anime +- **Gradio UI** — Clean, interactive web interface +- **CLI Mode** — Script-friendly command-line interface +- **Fine-tune Controls** — Alpha matting, thresholds, erosion mask + +## Quick Start + +### 1. Install Dependencies + +```bash +cd /mnt/games/Coding/Python/AI/FOBG +source .venv/bin/activate +pip install -r requirements.txt +``` + +### 2. Launch the Web UI + +```bash +python main.py +``` + +Opens at [http://localhost:7860](http://localhost:7860) + +### 3. CLI Mode + +```bash +python main.py --cli input.jpg # Image +python main.py --cli video.mp4 --video # Video +``` + +## Project Structure + +``` +FOBG/ +├── main.py # Entry point (Gradio + CLI) +├── requirements.txt # Dependencies +├── src/ +│ ├── core/ +│ │ ├── bg_remove.py # Image BG removal (rembg) +│ │ └── video_bg_remove.py # Video BG removal (OpenCV + rembg) +│ └── ui/ +│ └── app.py # Gradio web interface +├── output/ # Processed results saved here +├── tests/ # Unit tests +└── docs/ + └── README.md # This file +``` + +## Available Models + +| Model | Description | Speed | +|-------|-------------|-------| +| `u2net` | Default, balanced quality/speed | ⚡ Fast | +| `u2netp` | Lightweight, slightly lower quality | 🚀 Fastest | +| `u2net_human_seg` | Optimized for human segmentation | ⚡ Fast | +| `u2net_3b` | Heavy, higher accuracy | 🐢 Slower | +| `isnet-general-use` | General purpose, high accuracy | 🐢 Slower | +| `isnet-anime` | Optimized for anime illustrations | ⚡ Fast | + +## Controls + +### Image Tab + +- **Upload Image** — PNG, JPEG, or WebP +- **Model** — Choose the AI model +- **Alpha Matting** — Fine-tune edges (slower but cleaner) +- **Foreground Threshold** — How much is considered foreground (1-255) +- **Background Threshold** — How much is considered background (1-255) +- **Erosion Mask Dilation** — Morphological operation on the mask (0-32) + +### Video Tab + +- **Upload Video** — MP4, AVI, WebM, or MOV +- **Model** — Same models as image tab +- **Alpha Matting** — Fine edge detection per frame +- **Erosion Mask Dilation** — Morphological operation on the mask + +## Notes + +- Image processing: 1–5 seconds per image +- Video processing: frame-by-frame, can take minutes for longer clips +- All outputs saved to `output/` directory +- First run downloads the AI model (~170 MB for u2net) diff --git a/main.py b/main.py new file mode 100644 index 0000000..4a53174 --- /dev/null +++ b/main.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +"""FOBG — AI Background Remover — Entry Point. + +Usage: + python main.py # Launch Gradio UI (default) + python main.py --host 0.0.0.0 --port 7860 + python main.py --share # Create public Gradio share link + python main.py --cli --help # CLI mode (scripted use) +""" + +import argparse +import sys +import os + +# Ensure the project root is on sys.path +_PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, _PROJECT_ROOT) + +from src.ui.app import launch_app + + +def cli_demo(): + """Simple CLI demo for batch image processing.""" + import argparse as ap2 + parser = ap2.ArgumentParser(description="FOBG CLI — process images/videos") + parser.add_argument("input", help="Input image or video file path") + parser.add_argument("-o", "--output", help="Output file path (auto-generated if omitted)") + parser.add_argument("--model", default="u2net", choices=["u2net", "u2netp", "u2net_human_seg", "isnet-general-use", "isnet-anime"], help="rembg model") + parser.add_argument("--alpha-matting", action="store_true", default=True, help="Use alpha matting") + parser.add_argument("--no-alpha-matting", action="store_true", help="Disable alpha matting") + parser.add_argument("--video", action="store_true", help="Treat input as video (frame-by-frame processing)") + args = parser.parse_args() + + input_path = args.input + if not os.path.exists(input_path): + print(f"Error: File not found: {input_path}") + sys.exit(1) + + is_video = args.video or input_path.lower().endswith((".mp4", ".avi", ".webm", ".mov", ".mkv")) + output = args.output or f"output_{os.path.splitext(os.path.basename(input_path))[0]}_{os.urandom(4).hex()}" + if is_video: + output += ".mp4" + else: + output += ".png" + + from src.core.bg_remove import remove_background_image, remove_background_bytes + from src.core.video_bg_remove import process_video_frame_by_frame + + if is_video: + print(f"Processing video: {input_path}") + result = process_video_frame_by_frame( + input_path, output, + model=args.model, + alpha_matting=not args.no_alpha_matting, + ) + print(f"Video saved to: {result}") + else: + print(f"Processing image: {input_path}") + from PIL import Image + input_img = Image.open(input_path).convert("RGB") + result = remove_background_image(input_img, model=args.model, force=True, alpha_matting=not args.no_alpha_matting) + result.save(output, format="PNG") + print(f"Image saved to: {output}") + + +def main(): + parser = argparse.ArgumentParser(description="FOBG — AI Background Remover") + parser.add_argument("--cli", action="store_true", help="Run in CLI mode for batch processing") + parser.add_argument("--host", default="0.0.0.0", help="Gradio server host") + parser.add_argument("--port", type=int, default=7860, help="Gradio server port") + parser.add_argument("--share", action="store_true", help="Create a public share link") + parser.add_argument("--no-browser", action="store_true", help="Do not open browser") + + args = parser.parse_args() + + if args.cli: + # Parse remaining args for CLI mode + sys.argv = ["fobg"] + [arg for arg in sys.argv[1:] if arg not in ("--cli",)] + cli_demo() + else: + launch_app( + host=args.host, + port=args.port, + share=args.share, + inbrowser=not args.no_browser, + ) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5bae829 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +# AI background removal +rembg>=2.0.0 + +# Video frame processing +opencv-python-headless>=4.8.0 + +# Image I/O and manipulation +Pillow>=10.0.0 + +# Numeric array operations (used by both rembg internals and video processing) +numpy>=1.24.0 + +# Gradio web UI +gradio>=4.0.0 diff --git a/src/core/bg_remove.py b/src/core/bg_remove.py new file mode 100644 index 0000000..6416ad5 --- /dev/null +++ b/src/core/bg_remove.py @@ -0,0 +1,96 @@ +"""Core image background removal using rembg.""" + +import io +from typing import Optional + +import numpy as np +from PIL import Image +import rembg + +# Pre-load the model once to avoid re-downloading on every call +_DEFAULT_MODEL = "u2net" +_DEFAULT_SESSION = None # Lazy-loaded + + +def remove_background_image( + input_image: Image.Image, + model: Optional[str] = None, + session: Optional[rembg.bg.BaseSession] = None, + force_return_bytes: bool = False, + alpha_matting: bool = True, + alpha_matting_foreground_threshold: int = 240, + alpha_matting_background_threshold: int = 10, + alpha_matting_erode_size: int = 10, + **kwargs, +) -> Image.Image: + """ + Remove background from a PIL Image. + + Parameters + ---------- + input_image : PIL.Image.Image + RGB input image (will be converted to RGBA internally). + model : str, optional + rembg model name (e.g. 'u2net', 'u2net_human_seg', 'isnet-general-use'). + session : rembg.bg.BaseSession, optional + Re-use an existing session for faster processing. Created lazily if omitted. + force_return_bytes : bool + If True, return bytes instead of PIL Image. + alpha_matting : bool + Use alpha matting for finer edge detection. + alpha_matting_foreground_threshold : int + Threshold for foreground mask. + alpha_matting_background_threshold : int + Threshold for background mask. + alpha_matting_erode_size : int + Erosion size for the mask. + + Returns + ------- + PIL.Image.Image + Image with transparent background (RGBA). + """ + # Ensure RGBA + if input_image.mode != "RGBA": + input_image = input_image.convert("RGBA") + + kwargs = { + "alpha_matting": alpha_matting, + "alpha_matting_foreground_threshold": alpha_matting_foreground_threshold, + "alpha_matting_background_threshold": alpha_matting_background_threshold, + "alpha_matting_erode_size": alpha_matting_erode_size, + "force_return_bytes": force_return_bytes, + **kwargs, + } + + result = rembg.remove(input_image, session=session, **kwargs) + if isinstance(result, bytes): + result = Image.open(io.BytesIO(result)) + return result + + +def remove_background_bytes( + input_bytes: bytes, + model: Optional[str] = None, + session: Optional[rembg.Session] = None, + **kwargs, +) -> bytes: + """ + Remove background from image bytes, return RGBA PNG bytes. + + Parameters + ---------- + input_bytes : bytes + Raw image bytes (JPEG, PNG, etc.). + model, session : see remove_background_image + + Returns + ------- + bytes + PNG bytes with transparent background. + """ + input_image = Image.open(io.BytesIO(input_bytes)) + result = remove_background_image(input_image, session=session, **kwargs) + output = io.BytesIO() + result.save(output, format="PNG") + return output.getvalue() diff --git a/src/core/video_bg_remove.py b/src/core/video_bg_remove.py new file mode 100644 index 0000000..b2b9022 --- /dev/null +++ b/src/core/video_bg_remove.py @@ -0,0 +1,262 @@ +"""Video background removal frame-by-frame using rembg + ffmpeg.""" + +import io +import os +import subprocess +import tempfile +from typing import Optional + +import cv2 +import numpy as np +from PIL import Image +import rembg + + +def _encode_png_sequence( + frames: list[np.ndarray], + tmp_dir: str, + fps: float, + output_path: str, + use_alpha: bool, +) -> str: + """ + Write RGBA frames as PNG in tmp_dir, then encode to video via ffmpeg. + + For .mov output, uses ProRes 4444 with alpha channel. + For .mp4 (or anything else), uses libx264 (no alpha in MP4 container). + """ + frames_dir = os.path.join(tmp_dir, "frames") + os.makedirs(frames_dir, exist_ok=True) + + # Write each frame as PNG (preserves RGBA/alpha) + for i, frame in enumerate(frames): + path = os.path.join(frames_dir, f"frame_{i:04d}.png") + Image.fromarray(frame, "RGBA").save(path, "PNG") + + # Determine pattern for ffmpeg + png_files = sorted(os.listdir(frames_dir)) + num_digits = len(png_files[0].replace("frame_", "").replace(".png", "")) if png_files else 4 + pattern = f"frame_%0{num_digits}d.png" + + if use_alpha: + # .mov with ProRes 4444 alpha + cmd = [ + "ffmpeg", "-y", + "-framerate", str(fps), + "-i", os.path.join(frames_dir, pattern), + "-c:v", "prores", + "-profile:v", "4", + "-pix_fmt", "yuva444p10le", + "-c:a", "none", + output_path, + ] + else: + # .mp4 with libx264 (solid black background where alpha is 0) + cmd = [ + "ffmpeg", "-y", + "-framerate", str(fps), + "-i", os.path.join(frames_dir, pattern), + "-c:v", "libx264", + "-pix_fmt", "yuv420p", + "-preset", "medium", + "-crf", "18", + output_path, + ] + + subprocess.run(cmd, check=True) + return output_path + + +def _is_mov(output_path: str) -> bool: + return os.path.splitext(output_path)[1].lower() == ".mov" + + +def process_video_frame_by_frame( + input_path: str, + output_path: str, + fps: Optional[float] = None, + model: Optional[str] = None, + session: Optional[rembg.bg.BaseSession] = None, + alpha_matting: bool = True, + alpha_matting_erode_size: int = 10, + progress_callback=None, + **kwargs, +) -> str: + """ + Remove background from a video, frame-by-frame. + + Reads frames from ``input_path``, runs each through rembg, + and writes the result to ``output_path``. For .mov output, a ProRes + video with true alpha is produced; otherwise an MP4 with a black + background (where transparent pixels are black) is produced. + + Parameters + ---------- + input_path : str + Path to input video (MP4, AVI, WebM, etc.). + output_path : str + Path to write the output video. Use ``.mov`` for alpha, ``.mp4`` for solid. + fps : float, optional + Override the output FPS. Auto-detected if omitted. + model : str, optional + rembg model name. + session : rembg.bg.BaseSession, optional + Shared session for faster processing. + alpha_matting : bool + Use alpha matting for finer edges. + alpha_matting_erode_size : int + Erosion size for alpha matting mask. + progress_callback : callable, optional + Callback of type ``callback(frame_index, total_frames)`` for progress tracking. + **kwargs : dict + Extra kwargs passed to rembg.remove. + + Returns + ------- + str + Path to the output video file. + """ + cap = cv2.VideoCapture(input_path) + if not cap.isOpened(): + raise ValueError(f"Cannot open video file: {input_path}") + + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 + fps = fps if fps is not None else src_fps + cap.release() + + use_alpha = _is_mov(output_path) + + with tempfile.TemporaryDirectory() as tmpdir: + frames: list[np.ndarray] = [] + cap = cv2.VideoCapture(input_path) + frame_idx = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + pil_img = Image.fromarray(rgb_frame) + + if session is None and model: + session = rembg.new_session(model or "u2net") + + result = rembg.remove( + pil_img, + session=session, + alpha_matting=alpha_matting, + alpha_matting_erode_size=alpha_matting_erode_size, + **kwargs, + ) + result_img = Image.open(io.BytesIO(result)) if isinstance(result, bytes) else result + + frames.append(np.array(result_img)) # RGBA + + frame_idx += 1 + if progress_callback: + progress_callback(frame_idx, total_frames) + + cap.release() + + os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True) + _encode_png_sequence(frames, tmpdir, fps, output_path, use_alpha) + + return output_path + + +def process_video_with_background( + input_path: str, + output_path: str, + bg_image: Optional[str] = None, + fps: Optional[float] = None, + model: Optional[str] = None, + session: Optional[rembg.Session] = None, + progress_callback=None, + **kwargs, +) -> str: + """ + Remove background from video and composite onto a new background. + + Parameters + ---------- + input_path : str + Input video path. + output_path : str + Output video path. + bg_image : str, optional + Path to a background image. If None, keeps transparent background. + fps : float, optional + Output FPS. + model : str, optional + rembg model name. + session : rembg.Session, optional + Shared session. + progress_callback : callable, optional + Callback of type ``callback(frame_index, total_frames)``. + **kwargs : dict + Extra kwargs for rembg. + + Returns + ------- + str + Path to the output video. + """ + cap = cv2.VideoCapture(input_path) + if not cap.isOpened(): + raise ValueError(f"Cannot open video file: {input_path}") + + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 + fps = fps if fps is not None else src_fps + + bg_array = None + if bg_image: + bg_array = cv2.imread(bg_image) + bg_array = cv2.cvtColor(bg_array, cv2.COLOR_BGR2RGBA) + out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + bg_array = cv2.resize(bg_array, (out_w, out_h)) + if bg_array.shape[2] != 4: + bg_array = np.dstack([bg_array, np.full(bg_array.shape[:2], 255, dtype=np.uint8)]) + + cap.release() + + with tempfile.TemporaryDirectory() as tmpdir: + frames: list[np.ndarray] = [] + cap = cv2.VideoCapture(input_path) + frame_idx = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + pil_img = Image.fromarray(rgb_frame) + + if session is None and model: + session = rembg.new_session(model or "u2net") + + result = rembg.remove(pil_img, session=session, **kwargs) + result_img = Image.open(io.BytesIO(result)) if isinstance(result, bytes) else result + fg = np.array(result_img) # RGBA + + if bg_array is not None: + alpha = fg[:, :, 3:4].astype(np.float64) / 255.0 + fg_rgb = fg[:, :, :3].astype(np.float64) + bg_rgb = bg_array[:, :, :3].astype(np.float64) + composited = (fg_rgb * alpha + bg_rgb * (1.0 - alpha)).astype(np.uint8) + frames.append(composited[:, :, :3]) + else: + frames.append(fg) + + frame_idx += 1 + if progress_callback: + progress_callback(frame_idx, total_frames) + + cap.release() + + os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True) + _encode_png_sequence(frames, tmpdir, fps, output_path, _is_mov(output_path)) + + return output_path diff --git a/src/ui/app.py b/src/ui/app.py new file mode 100644 index 0000000..99a3bcc --- /dev/null +++ b/src/ui/app.py @@ -0,0 +1,266 @@ +"""Gradio-based GUI for BG removal (images + video).""" + +import os +from PIL import Image +import gradio as gr + +from ..core.bg_remove import remove_background_image +from ..core.video_bg_remove import process_video_frame_by_frame + +# ── Globals ────────────────────────────────────────────────────────────────── +OUTPUT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "output") +os.makedirs(OUTPUT_DIR, exist_ok=True) + +MODEL_OPTIONS = [ + ("u2net (default)", "u2net"), + ("u2net human_seg", "u2net_human_seg"), + ("u2netp (light)", "u2netp"), + ("u2net 3b (heavy)", "u2net_3b"), + ("isnet-general-use", "isnet-general-use"), + ("isnet-anime (anime)", "isnet-anime"), +] + + +def _save_output_filename(input_path: str = "", suffix: str = ".png") -> str: + """Generate an output file path derived from the input file name.""" + if input_path: + stem = os.path.splitext(os.path.basename(input_path))[0] + name = f"{stem}_bg{suffix}" + else: + name = f"output{suffix}" + return os.path.join(OUTPUT_DIR, name) + + +def _remove_bg( + image_input, + model_name: str = "u2net", + alpha_matting: bool = True, + foreground_threshold: int = 240, + background_threshold: int = 10, + erosion_dilate: int = 0, +) -> Image.Image: + """ + Gradio callback: remove background from an image. + + Parameters + ---------- + image_input : str or dict + Uploaded image file (Gradio handles this). + model_name : str + rembg model to use. + alpha_matting : bool + Enable fine edge detection. + foreground_threshold, background_threshold, erosion_dilate : int + rembg processing parameters. + + Returns + ------- + PIL.Image.Image + Result with transparent background. + """ + if image_input is None: + raise gr.Error("No image provided. Please upload an image.") + + if isinstance(image_input, Image.Image): + # gr.Image with type="pil" returns the image directly + input_img = image_input.convert("RGB") + elif isinstance(image_input, str): + input_img = Image.open(image_input).convert("RGB") + else: + raise gr.Error("Unsupported image input type.") + + result = remove_background_image( + input_img, + model=model_name, + force=True, + alpha_matting=alpha_matting, + alpha_matting_foreground_threshold=foreground_threshold, + alpha_matting_background_threshold=background_threshold, + erosion_mask_dilate=erosion_dilate, + ) + + output_path = _save_output_filename(image_input, ".png") + result.save(output_path, format="PNG") + return result + + +def _set_download(path): + if path is None: + return gr.File(value=None) + return gr.File(value=path) + + +def _remove_bg_video( + video_input, + model_name: str = "u2net", + alpha_matting: bool = True, + foreground_threshold: int = 240, + background_threshold: int = 10, + erosion_dilate: int = 0, + progress=gr.Progress(), +): + """ + Gradio callback: remove background from a video. + + Parameters + ---------- + video_input : str + Uploaded video file path. + model_name : str + rembg model to use. + alpha_matting : bool + Enable fine edge detection per frame. + erosion_dilate : int + Dilation kernel for erosion mask. + progress : gr.Progress + Gradio progress tracker. + + Returns + ------- + tuple[str, str] + (Path to the output video, state path). + """ + if video_input is None: + raise gr.Error("No video provided. Please upload a video.") + + output_path = _save_output_filename(video_input, ".mov") + + def _progress_callback(frame_idx, total_frames): + if total_frames > 0: + progress(frame_idx / total_frames, desc=f"Processing frame {frame_idx}/{total_frames}") + + result_path = process_video_frame_by_frame( + video_input, + output_path, + model=model_name, + alpha_matting=alpha_matting, + alpha_matting_foreground_threshold=foreground_threshold, + alpha_matting_background_threshold=background_threshold, + erosion_mask_dilate=erosion_dilate, + progress_callback=_progress_callback, + ) + + return result_path, result_path + + +# ── Gradio Blocks ──────────────────────────────────────────────────────────── +def create_app() -> gr.Blocks: + """Build and return the Gradio app.""" + with gr.Blocks(title="FOBG — AI Background Remover") as app: + gr.Markdown( + """ + # 🪄 FOBG — AI Background Remover + + Remove backgrounds from **images** or **videos** using AI. + Powered by [rembg](https://github.com/danielzhanika/rembg) with multiple model options. + """ + ) + + with gr.Tabs(): + # ── Image Tab ────────────────────────────────────────── + with gr.TabItem("🖼️ Image"): + with gr.Row(): + with gr.Column(): + image_input = gr.Image( + label="Upload Image", + sources=["upload", "clipboard"], + type="filepath", + ) + model_select_img = gr.Dropdown( + choices=MODEL_OPTIONS, + value="u2net", + label="Model", + ) + img_alpha = gr.Checkbox(label="Alpha Matting", value=True) + img_fg_threshold = gr.Slider( + minimum=1, maximum=255, value=240, step=1, label="Foreground Threshold" + ) + img_bg_threshold = gr.Slider( + minimum=1, maximum=255, value=10, step=1, label="Background Threshold" + ) + img_erosion = gr.Slider( + minimum=0, maximum=32, value=0, step=1, label="Erosion Mask Dilation" + ) + img_btn = gr.Button("Remove Background", variant="primary") + + with gr.Column(): + image_output = gr.Image(label="Result") + + img_btn.click( + fn=_remove_bg, + inputs=[image_input, model_select_img, img_alpha, img_fg_threshold, img_bg_threshold, img_erosion], + outputs=[image_output], + ) + + # ── Video Tab ────────────────────────────────────────── + with gr.TabItem("🎥 Video"): + with gr.Row(): + with gr.Column(): + video_input = gr.Video( + label="Upload Video", + sources=["upload", "webcam"], + ) + model_select_vid = gr.Dropdown( + choices=MODEL_OPTIONS, + value="u2net", + label="Model", + ) + vid_alpha = gr.Checkbox(label="Alpha Matting", value=True) + vid_fg_threshold = gr.Slider( + minimum=1, maximum=255, value=240, step=1, label="Foreground Threshold" + ) + vid_bg_threshold = gr.Slider( + minimum=1, maximum=255, value=10, step=1, label="Background Threshold" + ) + vid_erosion = gr.Slider( + minimum=0, maximum=32, value=0, step=1, label="Erosion Mask Dilation" + ) + vid_btn = gr.Button("Remove Background", variant="primary") + + with gr.Column(): + video_output = gr.Video(label="Result") + video_download = gr.File(label="Download .MOV") + video_path_state = gr.State() + + # First click: process video (shows progress on result), save path to state + # Then: populate download link from state (no progress shown) + vid_btn.click( + fn=_remove_bg_video, + inputs=[video_input, model_select_vid, vid_alpha, vid_fg_threshold, vid_bg_threshold, vid_erosion], + outputs=[video_output, video_path_state], + ).then( + fn=_set_download, + inputs=[video_path_state], + outputs=[video_download], + ) + + gr.Markdown( + """ + ### Notes + + - **🖼️ Image**: Single images — fast, typically 1-5 seconds depending on resolution and model. + - **🎥 Video**: Videos (MP4, WebM, etc.) — frame-by-frame processing. Can take minutes for long videos. + - **Models**: Lighter models (u2netp) are faster; heavier models (u2net, isnet) are more accurate. + - **Output**: Images are saved as PNG with transparency. Videos are saved as MOV with alpha channel. + - Results are stored in the `output/` directory. + """ + ) + + return app + + +def launch_app( + host: str = "0.0.0.0", + port: int = 7860, + share: bool = False, + inbrowser: bool = True, +) -> None: + """Launch the Gradio app.""" + app = create_app() + app.launch( + server_name=host, + server_port=port, + share=share, + inbrowser=inbrowser, + theme=gr.themes.Soft(), + ) diff --git a/tests/test_image.png b/tests/test_image.png new file mode 100644 index 0000000000000000000000000000000000000000..c91c37d3181a801f1b96ebb19a8503cff68a5d15 GIT binary patch literal 586 zcmeAS@N?(olHy`uVBq!ia0vp^CqS5k2}mkgS)OEIU=s9naSW-L^Y)S>CxZgR0f(*i z>ADXEp6DHvn_YdN_H`*ky^$Da@|Dz Bm(l