diff --git a/README.md b/README.md index a71318d..6b382f0 100644 --- a/README.md +++ b/README.md @@ -146,13 +146,14 @@ You can now use the virtual camera output (uses pyvirtualcam) by turning on the Additional command line arguments are given below. To learn out what they do, check [this guide](https://github.com/s0md3v/roop/wiki/Advanced-Options). + ``` options: -h, --help show this help message and exit -s SOURCE_PATH, --source SOURCE_PATH select an source image -t TARGET_PATH, --target TARGET_PATH select an target image or video -o OUTPUT_PATH, --output OUTPUT_PATH select output file or directory - --frame-processor FRAME_PROCESSOR [FRAME_PROCESSOR ...] frame processors (choices: face_swapper, face_enhancer, ...) + --frame-processor FRAME_PROCESSOR [FRAME_PROCESSOR ...] frame processors (choices: face_swapper, face_enhancer, super_resolution...) --keep-fps keep original fps --keep-audio keep original audio --keep-frames keep temporary frames @@ -166,11 +167,18 @@ options: --execution-threads EXECUTION_THREADS number of execution threads --headless run in headless mode --enhancer-upscale-factor Sets the upscale factor for the enhancer. Only applies if `face_enhancer` is set as a frame-processor + --source-image-scaling-factor Set the upscale factor for source images. Only applies if `face_swapper` is set as a frame-processor + -r SCALE, --super-resolution-scale-factor SCALE Super resolution scale factor, choices are 2, 3, 4 -v, --version show program's version number and exit ``` Looking for a CLI mode? Using the -s/--source argument will make the run program in cli mode. +To improve the video quality, you can use the `super_resolution` frame processor after swapping the faces. It will enhance the video quality by 2x, 3x or 4x. You can set the upscale factor using the `-r` or `--super-resolution-scale-factor` argument. +Processing time will increase with the upscale factor, but it's quite quick. + +``` + ## Credits - [henryruhs](https://github.com/henryruhs): for being an irreplaceable contributor to the project - [ffmpeg](https://ffmpeg.org/): for making video related operations easy diff --git a/modules/core.py b/modules/core.py index a0d45ca..d248f97 100644 --- a/modules/core.py +++ b/modules/core.py @@ -51,14 +51,19 @@ def parse_args() -> None: program.add_argument('-t', '--target', help='Select a target image or video', dest='target_path') program.add_argument('-o', '--output', help='Select output file or directory', dest='output_path') program.add_argument('--frame-processor', help='Pipeline of frame processors', dest='frame_processor', - default=['face_swapper'], choices=['face_swapper', 'face_enhancer'], nargs='+') + default=['face_swapper'], choices=['face_swapper', 'face_enhancer', 'super_resolution'], + nargs='+') program.add_argument('--keep-fps', help='Keep original fps', dest='keep_fps', action='store_true', default=False) - program.add_argument('--keep-audio', help='Keep original audio', dest='keep_audio', action='store_true', default=True) - program.add_argument('--keep-frames', help='Keep temporary frames', dest='keep_frames', action='store_true', default=False) - program.add_argument('--many-faces', help='Process every face', dest='many_faces', action='store_true', default=False) + program.add_argument('--keep-audio', help='Keep original audio', dest='keep_audio', action='store_true', + default=True) + program.add_argument('--keep-frames', help='Keep temporary frames', dest='keep_frames', action='store_true', + default=False) + program.add_argument('--many-faces', help='Process every face', dest='many_faces', action='store_true', + default=False) program.add_argument('--video-encoder', help='Adjust output video encoder', dest='video_encoder', default='libx264', choices=['libx264', 'libx265', 'libvpx-vp9']) - program.add_argument('--video-quality', help='Adjust output video quality', dest='video_quality', type=int, default=18, + program.add_argument('--video-quality', help='Adjust output video quality', dest='video_quality', type=int, + default=18, choices=range(52), metavar='[0-51]') program.add_argument('--live-mirror', help='The live camera display as you see it in the front-facing camera frame', dest='live_mirror', action='store_true', default=False) @@ -74,6 +79,10 @@ def parse_args() -> None: program.add_argument('--enhancer-upscale-factor', help='Sets the upscale factor for the enhancer. Only applies if `face_enhancer` is set as a frame-processor', dest='enhancer_upscale_factor', type=int, default=1) + program.add_argument('--source-image-scaling-factor', help='Set the upscale factor for source images', + dest='source_image_scaling_factor', default=2, type=int) + program.add_argument('-r', '--super-resolution-scale-factor', dest='super_resolution_scale_factor', + help='Set the upscale factor for super resolution', default=4, choices=[2, 3, 4], type=int) program.add_argument('-v', '--version', action='version', version=f'{modules.metadata.name} {modules.metadata.version}') @@ -104,6 +113,8 @@ def parse_args() -> None: modules.globals.execution_threads = args.execution_threads modules.globals.headless = args.headless modules.globals.enhancer_upscale_factor = args.enhancer_upscale_factor + modules.globals.source_image_scaling_factor = args.source_image_scaling_factor + modules.globals.sr_scale_factor = args.super_resolution_scale_factor # Handle face enhancer tumbler modules.globals.fp_ui['face_enhancer'] = 'face_enhancer' in args.frame_processor @@ -188,12 +199,14 @@ def limit_resources() -> None: try: soft, hard = resource.getrlimit(resource.RLIMIT_DATA) if memory > hard: - print(f"Warning: Requested memory limit {memory / (1024 ** 3)} GB exceeds system's hard limit. Setting to maximum allowed {hard / (1024 ** 3)} GB.") + print( + f"Warning: Requested memory limit {memory / (1024 ** 3)} GB exceeds system's hard limit. Setting to maximum allowed {hard / (1024 ** 3)} GB.") memory = hard resource.setrlimit(resource.RLIMIT_DATA, (memory, memory)) except ValueError as e: print(f"Warning: Could not set memory limit: {e}. Continuing with default limits.") + def release_resources() -> None: if 'cuda' in modules.globals.execution_providers: torch.cuda.empty_cache() @@ -247,7 +260,8 @@ def process_image_to_image() -> None: for frame_processor in get_frame_processors_modules(modules.globals.frame_processors): update_status('Processing...', frame_processor.NAME) - frame_processor.process_image(modules.globals.source_path, modules.globals.output_path, modules.globals.output_path) + frame_processor.process_image(modules.globals.source_path, modules.globals.output_path, + modules.globals.output_path) release_resources() if is_image(modules.globals.target_path): diff --git a/modules/globals.py b/modules/globals.py index a435e4d..a6c63c3 100644 --- a/modules/globals.py +++ b/modules/globals.py @@ -31,3 +31,5 @@ nsfw = None camera_input_combobox = None webcam_preview_running = False enhancer_upscale_factor = 1 +source_image_scaling_factor = 2 +sr_scale_factor = 4 \ No newline at end of file diff --git a/modules/processors/frame/face_swapper.py b/modules/processors/frame/face_swapper.py index 6a228c5..31d5fb4 100644 --- a/modules/processors/frame/face_swapper.py +++ b/modules/processors/frame/face_swapper.py @@ -10,6 +10,7 @@ from modules.core import update_status from modules.face_analyser import get_one_face, get_many_faces from modules.typing import Face, Frame from modules.utilities import conditional_download, resolve_relative_path, is_image, is_video +import numpy as np FACE_SWAPPER = None THREAD_LOCK = threading.Lock() @@ -43,6 +44,22 @@ def get_face_swapper() -> Any: FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=modules.globals.execution_providers) return FACE_SWAPPER +def upscale_image(image: np.ndarray, scaling_factor: int = modules.globals.source_image_scaling_factor) -> np.ndarray: + """ + Upscales the given image by the specified scaling factor. + + Args: + image (np.ndarray): The input image to upscale. + scaling_factor (int): The factor by which to upscale the image. + + Returns: + np.ndarray: The upscaled image. + """ + height, width = image.shape[:2] + new_size = (width * scaling_factor, height * scaling_factor) + upscaled_image = cv2.resize(image, new_size, interpolation=cv2.INTER_CUBIC) + return upscaled_image + def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame: return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True) @@ -59,7 +76,14 @@ def process_frame(source_face: Face, temp_frame: Frame) -> Frame: return temp_frame def process_frames(source_path: str, temp_frame_paths: List[str], progress: Any = None) -> None: - source_face = get_one_face(cv2.imread(source_path)) + source_image = cv2.imread(source_path) + if source_image is None: + print(f"Failed to load source image from {source_path}") + return + # Upscale the source image for better quality + source_image_upscaled = upscale_image(source_image, scaling_factor=2) + source_face = get_one_face(source_image_upscaled) + for temp_frame_path in temp_frame_paths: temp_frame = cv2.imread(temp_frame_path) try: diff --git a/modules/processors/frame/super_resolution.py b/modules/processors/frame/super_resolution.py new file mode 100644 index 0000000..13d9a5f --- /dev/null +++ b/modules/processors/frame/super_resolution.py @@ -0,0 +1,197 @@ +import threading +import traceback +from typing import Any, List +import cv2 + +import os + +import modules.globals +import modules.processors.frame.core +from modules.core import update_status +from modules.face_analyser import get_one_face +from modules.utilities import conditional_download, resolve_relative_path, is_image, is_video +import numpy as np + +NAME = 'DLC.SUPER-RESOLUTION' +THREAD_SEMAPHORE = threading.Semaphore() + +# Singleton class for Super-Resolution +class SuperResolutionModel: + _instance = None + _lock = threading.Lock() + + def __init__(self, sr_model_path: str = f'ESPCN_x{modules.globals.sr_scale_factor}.pb'): + if SuperResolutionModel._instance is not None: + raise Exception("This class is a singleton!") + self.sr = cv2.dnn_superres.DnnSuperResImpl_create() + self.model_path = os.path.join(resolve_relative_path('../models'), sr_model_path) + if not os.path.exists(self.model_path): + raise FileNotFoundError(f"Super-resolution model not found at {self.model_path}") + try: + self.sr.readModel(self.model_path) + self.sr.setModel("espcn", modules.globals.sr_scale_factor) # Using ESPCN with 2,3 or 4x upscaling + except Exception as e: + print(f"Error during super-resolution model initialization: {e}") + raise e + + @classmethod + def get_instance(cls, sr_model_path: str = f'ESPCN_x{modules.globals.sr_scale_factor}.pb'): + if cls._instance is None: + with cls._lock: + if cls._instance is None: + try: + cls._instance = cls(sr_model_path) + except Exception as e: + raise RuntimeError(f"Failed to initialize SuperResolution: {str(e)}") + return cls._instance + + +def pre_check() -> bool: + """ + Checks and downloads necessary models before starting the face swapper. + """ + download_directory_path = resolve_relative_path('../models') + # Download the super-resolution model as well + conditional_download(download_directory_path, [ + f'https://huggingface.co/spaces/PabloGabrielSch/AI_Resolution_Upscaler_And_Resizer/resolve/bcd13b766a9499196e8becbe453c4a848673b3b6/models/ESPCN_x{modules.globals.sr_scale_factor}.pb' + ]) + return True + +def pre_start() -> bool: + if not is_image(modules.globals.source_path): + update_status('Select an image for source path.', NAME) + return False + elif not get_one_face(cv2.imread(modules.globals.source_path)): + update_status('No face detected in the source path.', NAME) + return False + if not is_image(modules.globals.target_path) and not is_video(modules.globals.target_path): + update_status('Select an image or video for target path.', NAME) + return False + return True + + +def apply_super_resolution(image: np.ndarray) -> np.ndarray: + """ + Applies super-resolution to the given image using the provided super-resolver. + + Args: + image (np.ndarray): The input image to enhance. + sr_model_path (str): ESPCN model path for super-resolution. + + Returns: + np.ndarray: The super-resolved image. + """ + with THREAD_SEMAPHORE: + sr_model = SuperResolutionModel.get_instance() + + if sr_model is None: + print("Super-resolution model is not initialized.") + return image + try: + upscaled_image = sr_model.sr.upsample(image) + return upscaled_image + except Exception as e: + print(f"Error during super-resolution: {e}") + return image + + +def process_frame(frame: np.ndarray) -> np.ndarray: + """ + Processes a single frame by swapping the source face into detected target faces. + + Args: + + frame (np.ndarray): The target frame image. + + Returns: + np.ndarray: The processed frame with swapped faces. + """ + + # Apply super-resolution to the entire frame + frame = apply_super_resolution(frame) + + return frame + +def process_frames(source_path: str, temp_frame_paths: List[str], progress: Any = None) -> None: + """ + Processes multiple frames by swapping the source face into each target frame. + + Args: + source_path (str): Path to the source image. + temp_frame_paths (List[str]): List of paths to target frame images. + progress (Any, optional): Progress tracker. Defaults to None. + """ + for idx, temp_frame_path in enumerate(temp_frame_paths): + frame = cv2.imread(temp_frame_path) + if frame is None: + print(f"Failed to load frame from {temp_frame_path}") + continue + try: + result = process_frame(frame) + cv2.imwrite(temp_frame_path, result) + except Exception as exception: + traceback.print_exc() + print(f"Error processing frame {temp_frame_path}: {exception}") + if progress: + progress.update(1) + +def upscale_image(image: np.ndarray, scaling_factor: int = 2) -> np.ndarray: + """ + Upscales the given image by the specified scaling factor. + + Args: + image (np.ndarray): The input image to upscale. + scaling_factor (int): The factor by which to upscale the image. + + Returns: + np.ndarray: The upscaled image. + """ + height, width = image.shape[:2] + new_size = (width * scaling_factor, height * scaling_factor) + upscaled_image = cv2.resize(image, new_size, interpolation=cv2.INTER_CUBIC) + return upscaled_image + +def process_image(source_path: str, target_path: str, output_path: str) -> None: + """ + Processes a single image by swapping the source face into the target image. + + Args: + source_path (str): Path to the source image. + target_path (str): Path to the target image. + output_path (str): Path to save the output image. + """ + source_image = cv2.imread(source_path) + if source_image is None: + print(f"Failed to load source image from {source_path}") + return + + # Upscale the source image for better quality before face detection + source_image_upscaled = upscale_image(source_image, scaling_factor=2) + + # Detect source face from the upscaled image + source_face = get_one_face(source_image_upscaled) + if source_face is None: + print("No source face detected.") + return + + target_frame = cv2.imread(target_path) + if target_frame is None: + print(f"Failed to load target image from {target_path}") + return + + # Process the frame + result = process_frame(target_frame) + + # Save the processed frame + cv2.imwrite(output_path, result) + + +def process_video(source_path: str, temp_frame_paths: List[str]) -> None: + """ + Processes a video by swapping the source face into each frame. + + Args: + source_path (str): Path to the source image. + temp_frame_paths (List[str]): List of paths to video frame images. + """ + modules.processors.frame.core.process_video(None, temp_frame_paths, process_frames) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 96bb6b4..01e46a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ --extra-index-url https://download.pytorch.org/whl/cu118 numpy==1.23.5 -opencv-python==4.8.1.78 +opencv-contrib-python==4.10.0.84 onnx==1.16.0 insightface==0.7.3 psutil==5.9.8