🎉 | Project released

This commit is contained in:
2026-05-16 00:15:37 +03:00
parent 3a3d368c4d
commit 975801ffca
7 changed files with 819 additions and 0 deletions

91
docs/README.md Normal file
View File

@@ -0,0 +1,91 @@
# 🪄 FOBG — AI Background Remover
Remove backgrounds from **images** and **videos** using AI, powered by [rembg](https://github.com/danielzhanika/rembg).
## Features
- **Image BG Removal** — Upload any image, get a transparent PNG
- **Video BG Removal** — Frame-by-frame processing with alpha channel
- **Multiple AI Models** — u2net, u2netp, u2net_human_seg, isnet-general-use, isnet-anime
- **Gradio UI** — Clean, interactive web interface
- **CLI Mode** — Script-friendly command-line interface
- **Fine-tune Controls** — Alpha matting, thresholds, erosion mask
## Quick Start
### 1. Install Dependencies
```bash
cd /mnt/games/Coding/Python/AI/FOBG
source .venv/bin/activate
pip install -r requirements.txt
```
### 2. Launch the Web UI
```bash
python main.py
```
Opens at [http://localhost:7860](http://localhost:7860)
### 3. CLI Mode
```bash
python main.py --cli input.jpg # Image
python main.py --cli video.mp4 --video # Video
```
## Project Structure
```
FOBG/
├── main.py # Entry point (Gradio + CLI)
├── requirements.txt # Dependencies
├── src/
│ ├── core/
│ │ ├── bg_remove.py # Image BG removal (rembg)
│ │ └── video_bg_remove.py # Video BG removal (OpenCV + rembg)
│ └── ui/
│ └── app.py # Gradio web interface
├── output/ # Processed results saved here
├── tests/ # Unit tests
└── docs/
└── README.md # This file
```
## Available Models
| Model | Description | Speed |
|-------|-------------|-------|
| `u2net` | Default, balanced quality/speed | ⚡ Fast |
| `u2netp` | Lightweight, slightly lower quality | 🚀 Fastest |
| `u2net_human_seg` | Optimized for human segmentation | ⚡ Fast |
| `u2net_3b` | Heavy, higher accuracy | 🐢 Slower |
| `isnet-general-use` | General purpose, high accuracy | 🐢 Slower |
| `isnet-anime` | Optimized for anime illustrations | ⚡ Fast |
## Controls
### Image Tab
- **Upload Image** — PNG, JPEG, or WebP
- **Model** — Choose the AI model
- **Alpha Matting** — Fine-tune edges (slower but cleaner)
- **Foreground Threshold** — How much is considered foreground (1-255)
- **Background Threshold** — How much is considered background (1-255)
- **Erosion Mask Dilation** — Morphological operation on the mask (0-32)
### Video Tab
- **Upload Video** — MP4, AVI, WebM, or MOV
- **Model** — Same models as image tab
- **Alpha Matting** — Fine edge detection per frame
- **Erosion Mask Dilation** — Morphological operation on the mask
## Notes
- Image processing: 15 seconds per image
- Video processing: frame-by-frame, can take minutes for longer clips
- All outputs saved to `output/` directory
- First run downloads the AI model (~170 MB for u2net)

90
main.py Normal file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""FOBG — AI Background Remover — Entry Point.
Usage:
python main.py # Launch Gradio UI (default)
python main.py --host 0.0.0.0 --port 7860
python main.py --share # Create public Gradio share link
python main.py --cli --help # CLI mode (scripted use)
"""
import argparse
import sys
import os
# Ensure the project root is on sys.path
_PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, _PROJECT_ROOT)
from src.ui.app import launch_app
def cli_demo():
"""Simple CLI demo for batch image processing."""
import argparse as ap2
parser = ap2.ArgumentParser(description="FOBG CLI — process images/videos")
parser.add_argument("input", help="Input image or video file path")
parser.add_argument("-o", "--output", help="Output file path (auto-generated if omitted)")
parser.add_argument("--model", default="u2net", choices=["u2net", "u2netp", "u2net_human_seg", "isnet-general-use", "isnet-anime"], help="rembg model")
parser.add_argument("--alpha-matting", action="store_true", default=True, help="Use alpha matting")
parser.add_argument("--no-alpha-matting", action="store_true", help="Disable alpha matting")
parser.add_argument("--video", action="store_true", help="Treat input as video (frame-by-frame processing)")
args = parser.parse_args()
input_path = args.input
if not os.path.exists(input_path):
print(f"Error: File not found: {input_path}")
sys.exit(1)
is_video = args.video or input_path.lower().endswith((".mp4", ".avi", ".webm", ".mov", ".mkv"))
output = args.output or f"output_{os.path.splitext(os.path.basename(input_path))[0]}_{os.urandom(4).hex()}"
if is_video:
output += ".mp4"
else:
output += ".png"
from src.core.bg_remove import remove_background_image, remove_background_bytes
from src.core.video_bg_remove import process_video_frame_by_frame
if is_video:
print(f"Processing video: {input_path}")
result = process_video_frame_by_frame(
input_path, output,
model=args.model,
alpha_matting=not args.no_alpha_matting,
)
print(f"Video saved to: {result}")
else:
print(f"Processing image: {input_path}")
from PIL import Image
input_img = Image.open(input_path).convert("RGB")
result = remove_background_image(input_img, model=args.model, force=True, alpha_matting=not args.no_alpha_matting)
result.save(output, format="PNG")
print(f"Image saved to: {output}")
def main():
parser = argparse.ArgumentParser(description="FOBG — AI Background Remover")
parser.add_argument("--cli", action="store_true", help="Run in CLI mode for batch processing")
parser.add_argument("--host", default="0.0.0.0", help="Gradio server host")
parser.add_argument("--port", type=int, default=7860, help="Gradio server port")
parser.add_argument("--share", action="store_true", help="Create a public share link")
parser.add_argument("--no-browser", action="store_true", help="Do not open browser")
args = parser.parse_args()
if args.cli:
# Parse remaining args for CLI mode
sys.argv = ["fobg"] + [arg for arg in sys.argv[1:] if arg not in ("--cli",)]
cli_demo()
else:
launch_app(
host=args.host,
port=args.port,
share=args.share,
inbrowser=not args.no_browser,
)
if __name__ == "__main__":
main()

14
requirements.txt Normal file
View File

@@ -0,0 +1,14 @@
# AI background removal
rembg>=2.0.0
# Video frame processing
opencv-python-headless>=4.8.0
# Image I/O and manipulation
Pillow>=10.0.0
# Numeric array operations (used by both rembg internals and video processing)
numpy>=1.24.0
# Gradio web UI
gradio>=4.0.0

96
src/core/bg_remove.py Normal file
View File

@@ -0,0 +1,96 @@
"""Core image background removal using rembg."""
import io
from typing import Optional
import numpy as np
from PIL import Image
import rembg
# Pre-load the model once to avoid re-downloading on every call
_DEFAULT_MODEL = "u2net"
_DEFAULT_SESSION = None # Lazy-loaded
def remove_background_image(
input_image: Image.Image,
model: Optional[str] = None,
session: Optional[rembg.bg.BaseSession] = None,
force_return_bytes: bool = False,
alpha_matting: bool = True,
alpha_matting_foreground_threshold: int = 240,
alpha_matting_background_threshold: int = 10,
alpha_matting_erode_size: int = 10,
**kwargs,
) -> Image.Image:
"""
Remove background from a PIL Image.
Parameters
----------
input_image : PIL.Image.Image
RGB input image (will be converted to RGBA internally).
model : str, optional
rembg model name (e.g. 'u2net', 'u2net_human_seg', 'isnet-general-use').
session : rembg.bg.BaseSession, optional
Re-use an existing session for faster processing. Created lazily if omitted.
force_return_bytes : bool
If True, return bytes instead of PIL Image.
alpha_matting : bool
Use alpha matting for finer edge detection.
alpha_matting_foreground_threshold : int
Threshold for foreground mask.
alpha_matting_background_threshold : int
Threshold for background mask.
alpha_matting_erode_size : int
Erosion size for the mask.
Returns
-------
PIL.Image.Image
Image with transparent background (RGBA).
"""
# Ensure RGBA
if input_image.mode != "RGBA":
input_image = input_image.convert("RGBA")
kwargs = {
"alpha_matting": alpha_matting,
"alpha_matting_foreground_threshold": alpha_matting_foreground_threshold,
"alpha_matting_background_threshold": alpha_matting_background_threshold,
"alpha_matting_erode_size": alpha_matting_erode_size,
"force_return_bytes": force_return_bytes,
**kwargs,
}
result = rembg.remove(input_image, session=session, **kwargs)
if isinstance(result, bytes):
result = Image.open(io.BytesIO(result))
return result
def remove_background_bytes(
input_bytes: bytes,
model: Optional[str] = None,
session: Optional[rembg.Session] = None,
**kwargs,
) -> bytes:
"""
Remove background from image bytes, return RGBA PNG bytes.
Parameters
----------
input_bytes : bytes
Raw image bytes (JPEG, PNG, etc.).
model, session : see remove_background_image
Returns
-------
bytes
PNG bytes with transparent background.
"""
input_image = Image.open(io.BytesIO(input_bytes))
result = remove_background_image(input_image, session=session, **kwargs)
output = io.BytesIO()
result.save(output, format="PNG")
return output.getvalue()

262
src/core/video_bg_remove.py Normal file
View File

@@ -0,0 +1,262 @@
"""Video background removal frame-by-frame using rembg + ffmpeg."""
import io
import os
import subprocess
import tempfile
from typing import Optional
import cv2
import numpy as np
from PIL import Image
import rembg
def _encode_png_sequence(
frames: list[np.ndarray],
tmp_dir: str,
fps: float,
output_path: str,
use_alpha: bool,
) -> str:
"""
Write RGBA frames as PNG in tmp_dir, then encode to video via ffmpeg.
For .mov output, uses ProRes 4444 with alpha channel.
For .mp4 (or anything else), uses libx264 (no alpha in MP4 container).
"""
frames_dir = os.path.join(tmp_dir, "frames")
os.makedirs(frames_dir, exist_ok=True)
# Write each frame as PNG (preserves RGBA/alpha)
for i, frame in enumerate(frames):
path = os.path.join(frames_dir, f"frame_{i:04d}.png")
Image.fromarray(frame, "RGBA").save(path, "PNG")
# Determine pattern for ffmpeg
png_files = sorted(os.listdir(frames_dir))
num_digits = len(png_files[0].replace("frame_", "").replace(".png", "")) if png_files else 4
pattern = f"frame_%0{num_digits}d.png"
if use_alpha:
# .mov with ProRes 4444 alpha
cmd = [
"ffmpeg", "-y",
"-framerate", str(fps),
"-i", os.path.join(frames_dir, pattern),
"-c:v", "prores",
"-profile:v", "4",
"-pix_fmt", "yuva444p10le",
"-c:a", "none",
output_path,
]
else:
# .mp4 with libx264 (solid black background where alpha is 0)
cmd = [
"ffmpeg", "-y",
"-framerate", str(fps),
"-i", os.path.join(frames_dir, pattern),
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
"-preset", "medium",
"-crf", "18",
output_path,
]
subprocess.run(cmd, check=True)
return output_path
def _is_mov(output_path: str) -> bool:
return os.path.splitext(output_path)[1].lower() == ".mov"
def process_video_frame_by_frame(
input_path: str,
output_path: str,
fps: Optional[float] = None,
model: Optional[str] = None,
session: Optional[rembg.bg.BaseSession] = None,
alpha_matting: bool = True,
alpha_matting_erode_size: int = 10,
progress_callback=None,
**kwargs,
) -> str:
"""
Remove background from a video, frame-by-frame.
Reads frames from ``input_path``, runs each through rembg,
and writes the result to ``output_path``. For .mov output, a ProRes
video with true alpha is produced; otherwise an MP4 with a black
background (where transparent pixels are black) is produced.
Parameters
----------
input_path : str
Path to input video (MP4, AVI, WebM, etc.).
output_path : str
Path to write the output video. Use ``.mov`` for alpha, ``.mp4`` for solid.
fps : float, optional
Override the output FPS. Auto-detected if omitted.
model : str, optional
rembg model name.
session : rembg.bg.BaseSession, optional
Shared session for faster processing.
alpha_matting : bool
Use alpha matting for finer edges.
alpha_matting_erode_size : int
Erosion size for alpha matting mask.
progress_callback : callable, optional
Callback of type ``callback(frame_index, total_frames)`` for progress tracking.
**kwargs : dict
Extra kwargs passed to rembg.remove.
Returns
-------
str
Path to the output video file.
"""
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
raise ValueError(f"Cannot open video file: {input_path}")
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
fps = fps if fps is not None else src_fps
cap.release()
use_alpha = _is_mov(output_path)
with tempfile.TemporaryDirectory() as tmpdir:
frames: list[np.ndarray] = []
cap = cv2.VideoCapture(input_path)
frame_idx = 0
while True:
ret, frame = cap.read()
if not ret:
break
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(rgb_frame)
if session is None and model:
session = rembg.new_session(model or "u2net")
result = rembg.remove(
pil_img,
session=session,
alpha_matting=alpha_matting,
alpha_matting_erode_size=alpha_matting_erode_size,
**kwargs,
)
result_img = Image.open(io.BytesIO(result)) if isinstance(result, bytes) else result
frames.append(np.array(result_img)) # RGBA
frame_idx += 1
if progress_callback:
progress_callback(frame_idx, total_frames)
cap.release()
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
_encode_png_sequence(frames, tmpdir, fps, output_path, use_alpha)
return output_path
def process_video_with_background(
input_path: str,
output_path: str,
bg_image: Optional[str] = None,
fps: Optional[float] = None,
model: Optional[str] = None,
session: Optional[rembg.Session] = None,
progress_callback=None,
**kwargs,
) -> str:
"""
Remove background from video and composite onto a new background.
Parameters
----------
input_path : str
Input video path.
output_path : str
Output video path.
bg_image : str, optional
Path to a background image. If None, keeps transparent background.
fps : float, optional
Output FPS.
model : str, optional
rembg model name.
session : rembg.Session, optional
Shared session.
progress_callback : callable, optional
Callback of type ``callback(frame_index, total_frames)``.
**kwargs : dict
Extra kwargs for rembg.
Returns
-------
str
Path to the output video.
"""
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
raise ValueError(f"Cannot open video file: {input_path}")
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
fps = fps if fps is not None else src_fps
bg_array = None
if bg_image:
bg_array = cv2.imread(bg_image)
bg_array = cv2.cvtColor(bg_array, cv2.COLOR_BGR2RGBA)
out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
bg_array = cv2.resize(bg_array, (out_w, out_h))
if bg_array.shape[2] != 4:
bg_array = np.dstack([bg_array, np.full(bg_array.shape[:2], 255, dtype=np.uint8)])
cap.release()
with tempfile.TemporaryDirectory() as tmpdir:
frames: list[np.ndarray] = []
cap = cv2.VideoCapture(input_path)
frame_idx = 0
while True:
ret, frame = cap.read()
if not ret:
break
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(rgb_frame)
if session is None and model:
session = rembg.new_session(model or "u2net")
result = rembg.remove(pil_img, session=session, **kwargs)
result_img = Image.open(io.BytesIO(result)) if isinstance(result, bytes) else result
fg = np.array(result_img) # RGBA
if bg_array is not None:
alpha = fg[:, :, 3:4].astype(np.float64) / 255.0
fg_rgb = fg[:, :, :3].astype(np.float64)
bg_rgb = bg_array[:, :, :3].astype(np.float64)
composited = (fg_rgb * alpha + bg_rgb * (1.0 - alpha)).astype(np.uint8)
frames.append(composited[:, :, :3])
else:
frames.append(fg)
frame_idx += 1
if progress_callback:
progress_callback(frame_idx, total_frames)
cap.release()
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
_encode_png_sequence(frames, tmpdir, fps, output_path, _is_mov(output_path))
return output_path

266
src/ui/app.py Normal file
View File

@@ -0,0 +1,266 @@
"""Gradio-based GUI for BG removal (images + video)."""
import os
from PIL import Image
import gradio as gr
from ..core.bg_remove import remove_background_image
from ..core.video_bg_remove import process_video_frame_by_frame
# ── Globals ──────────────────────────────────────────────────────────────────
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "output")
os.makedirs(OUTPUT_DIR, exist_ok=True)
MODEL_OPTIONS = [
("u2net (default)", "u2net"),
("u2net human_seg", "u2net_human_seg"),
("u2netp (light)", "u2netp"),
("u2net 3b (heavy)", "u2net_3b"),
("isnet-general-use", "isnet-general-use"),
("isnet-anime (anime)", "isnet-anime"),
]
def _save_output_filename(input_path: str = "", suffix: str = ".png") -> str:
"""Generate an output file path derived from the input file name."""
if input_path:
stem = os.path.splitext(os.path.basename(input_path))[0]
name = f"{stem}_bg{suffix}"
else:
name = f"output{suffix}"
return os.path.join(OUTPUT_DIR, name)
def _remove_bg(
image_input,
model_name: str = "u2net",
alpha_matting: bool = True,
foreground_threshold: int = 240,
background_threshold: int = 10,
erosion_dilate: int = 0,
) -> Image.Image:
"""
Gradio callback: remove background from an image.
Parameters
----------
image_input : str or dict
Uploaded image file (Gradio handles this).
model_name : str
rembg model to use.
alpha_matting : bool
Enable fine edge detection.
foreground_threshold, background_threshold, erosion_dilate : int
rembg processing parameters.
Returns
-------
PIL.Image.Image
Result with transparent background.
"""
if image_input is None:
raise gr.Error("No image provided. Please upload an image.")
if isinstance(image_input, Image.Image):
# gr.Image with type="pil" returns the image directly
input_img = image_input.convert("RGB")
elif isinstance(image_input, str):
input_img = Image.open(image_input).convert("RGB")
else:
raise gr.Error("Unsupported image input type.")
result = remove_background_image(
input_img,
model=model_name,
force=True,
alpha_matting=alpha_matting,
alpha_matting_foreground_threshold=foreground_threshold,
alpha_matting_background_threshold=background_threshold,
erosion_mask_dilate=erosion_dilate,
)
output_path = _save_output_filename(image_input, ".png")
result.save(output_path, format="PNG")
return result
def _set_download(path):
if path is None:
return gr.File(value=None)
return gr.File(value=path)
def _remove_bg_video(
video_input,
model_name: str = "u2net",
alpha_matting: bool = True,
foreground_threshold: int = 240,
background_threshold: int = 10,
erosion_dilate: int = 0,
progress=gr.Progress(),
):
"""
Gradio callback: remove background from a video.
Parameters
----------
video_input : str
Uploaded video file path.
model_name : str
rembg model to use.
alpha_matting : bool
Enable fine edge detection per frame.
erosion_dilate : int
Dilation kernel for erosion mask.
progress : gr.Progress
Gradio progress tracker.
Returns
-------
tuple[str, str]
(Path to the output video, state path).
"""
if video_input is None:
raise gr.Error("No video provided. Please upload a video.")
output_path = _save_output_filename(video_input, ".mov")
def _progress_callback(frame_idx, total_frames):
if total_frames > 0:
progress(frame_idx / total_frames, desc=f"Processing frame {frame_idx}/{total_frames}")
result_path = process_video_frame_by_frame(
video_input,
output_path,
model=model_name,
alpha_matting=alpha_matting,
alpha_matting_foreground_threshold=foreground_threshold,
alpha_matting_background_threshold=background_threshold,
erosion_mask_dilate=erosion_dilate,
progress_callback=_progress_callback,
)
return result_path, result_path
# ── Gradio Blocks ────────────────────────────────────────────────────────────
def create_app() -> gr.Blocks:
"""Build and return the Gradio app."""
with gr.Blocks(title="FOBG — AI Background Remover") as app:
gr.Markdown(
"""
# 🪄 FOBG — AI Background Remover
Remove backgrounds from **images** or **videos** using AI.
Powered by [rembg](https://github.com/danielzhanika/rembg) with multiple model options.
"""
)
with gr.Tabs():
# ── Image Tab ──────────────────────────────────────────
with gr.TabItem("🖼️ Image"):
with gr.Row():
with gr.Column():
image_input = gr.Image(
label="Upload Image",
sources=["upload", "clipboard"],
type="filepath",
)
model_select_img = gr.Dropdown(
choices=MODEL_OPTIONS,
value="u2net",
label="Model",
)
img_alpha = gr.Checkbox(label="Alpha Matting", value=True)
img_fg_threshold = gr.Slider(
minimum=1, maximum=255, value=240, step=1, label="Foreground Threshold"
)
img_bg_threshold = gr.Slider(
minimum=1, maximum=255, value=10, step=1, label="Background Threshold"
)
img_erosion = gr.Slider(
minimum=0, maximum=32, value=0, step=1, label="Erosion Mask Dilation"
)
img_btn = gr.Button("Remove Background", variant="primary")
with gr.Column():
image_output = gr.Image(label="Result")
img_btn.click(
fn=_remove_bg,
inputs=[image_input, model_select_img, img_alpha, img_fg_threshold, img_bg_threshold, img_erosion],
outputs=[image_output],
)
# ── Video Tab ──────────────────────────────────────────
with gr.TabItem("🎥 Video"):
with gr.Row():
with gr.Column():
video_input = gr.Video(
label="Upload Video",
sources=["upload", "webcam"],
)
model_select_vid = gr.Dropdown(
choices=MODEL_OPTIONS,
value="u2net",
label="Model",
)
vid_alpha = gr.Checkbox(label="Alpha Matting", value=True)
vid_fg_threshold = gr.Slider(
minimum=1, maximum=255, value=240, step=1, label="Foreground Threshold"
)
vid_bg_threshold = gr.Slider(
minimum=1, maximum=255, value=10, step=1, label="Background Threshold"
)
vid_erosion = gr.Slider(
minimum=0, maximum=32, value=0, step=1, label="Erosion Mask Dilation"
)
vid_btn = gr.Button("Remove Background", variant="primary")
with gr.Column():
video_output = gr.Video(label="Result")
video_download = gr.File(label="Download .MOV")
video_path_state = gr.State()
# First click: process video (shows progress on result), save path to state
# Then: populate download link from state (no progress shown)
vid_btn.click(
fn=_remove_bg_video,
inputs=[video_input, model_select_vid, vid_alpha, vid_fg_threshold, vid_bg_threshold, vid_erosion],
outputs=[video_output, video_path_state],
).then(
fn=_set_download,
inputs=[video_path_state],
outputs=[video_download],
)
gr.Markdown(
"""
### Notes
- **🖼️ Image**: Single images — fast, typically 1-5 seconds depending on resolution and model.
- **🎥 Video**: Videos (MP4, WebM, etc.) — frame-by-frame processing. Can take minutes for long videos.
- **Models**: Lighter models (u2netp) are faster; heavier models (u2net, isnet) are more accurate.
- **Output**: Images are saved as PNG with transparency. Videos are saved as MOV with alpha channel.
- Results are stored in the `output/` directory.
"""
)
return app
def launch_app(
host: str = "0.0.0.0",
port: int = 7860,
share: bool = False,
inbrowser: bool = True,
) -> None:
"""Launch the Gradio app."""
app = create_app()
app.launch(
server_name=host,
server_port=port,
share=share,
inbrowser=inbrowser,
theme=gr.themes.Soft(),
)

BIN
tests/test_image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 586 B