From f1c158c9fae645003c39dcf5c177190b0b28c63d Mon Sep 17 00:00:00 2001 From: Jason Kneen Date: Mon, 19 Aug 2024 21:11:54 +0100 Subject: [PATCH] fixes for ffmpeg, coreml, metal and resolution changes --- .gitignore | 2 ++ modules/core.py | 6 +++--- modules/predicter.py | 4 ++-- modules/ui.py | 22 ++++++++++++---------- requirements.txt | 3 +++ 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index c84282e..daeb53a 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ models/DMDNet.pth .venv/ tf_env/ .tf_env/ +.deepcamlive/ +deep-live-cam/ diff --git a/modules/core.py b/modules/core.py index c39e1da..fef03a7 100644 --- a/modules/core.py +++ b/modules/core.py @@ -75,13 +75,13 @@ def parse_args() -> None: program.add_argument('--keep-audio', help='keep original audio', dest='keep_audio', action='store_true', default=True) program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true', default=True) program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true', default=False) - program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libvpx-vp9', choices=['libx264', 'libx265', 'libvpx-vp9']) + program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libx265', choices=['libx264', 'libx265', 'libvpx-vp9']) program.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=1, choices=range(52), metavar='[0-51]') program.add_argument('--max-memory', help='maximum amount of RAM in GB', dest='max_memory', type=int, default=suggest_max_memory()) program.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default=['coreml'], choices=suggest_execution_providers(), nargs='+') program.add_argument('--execution-threads', help='number of execution threads', dest='execution_threads', type=int, default=suggest_execution_threads()) - program.add_argument('--video-processor', help='video processor to use', dest='video_processor', default='cv2', choices=['cv2', 'ffmpeg']) - program.add_argument('--model', help='model to use for face swapping', dest='model', default='inswapper_128.onnx') + program.add_argument('--video-processor', help='video processor to use', dest='video_processor', default='ffmpeg', choices=['cv2', 'ffmpeg']) + program.add_argument('--model', help='model to use for face swapping', dest='model', default='inswapper_128_fp16.onnx') program.add_argument('-v', '--version', action='version', version=f'{modules.metadata.name} {modules.metadata.version}') args = program.parse_args() diff --git a/modules/predicter.py b/modules/predicter.py index bb51b0a..33e06a4 100644 --- a/modules/predicter.py +++ b/modules/predicter.py @@ -4,7 +4,7 @@ from PIL import Image from modules.typing import Frame -MAX_PROBABILITY = 0.85 +MAX_PROBABILITY = 0.7 def predict_frame(target_frame: Frame) -> bool: @@ -21,5 +21,5 @@ def predict_image(target_path: str) -> bool: def predict_video(target_path: str) -> bool: - _, probabilities = opennsfw2.predict_video_frames(video_path=target_path, frame_interval=100) + _, probabilities = opennsfw2.predict_video_frames(video_path=target_path, frame_interval=20) return any(probability > MAX_PROBABILITY for probability in probabilities) diff --git a/modules/ui.py b/modules/ui.py index a3c4023..c35dbba 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -14,10 +14,12 @@ from modules.processors.frame.core import get_frame_processors_modules from modules.utilities import is_image, is_video, resolve_relative_path ROOT = None -ROOT_HEIGHT = 700 +# this is the main ui +ROOT_HEIGHT = 900 ROOT_WIDTH = 600 PREVIEW = None +# this is the preview ui PREVIEW_MAX_HEIGHT = 720 PREVIEW_MAX_WIDTH = 1280 @@ -88,9 +90,9 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C many_faces_switch = ctk.CTkSwitch(root, text='Many faces', variable=many_faces_value, cursor='hand2', command=lambda: setattr(modules.globals, 'many_faces', many_faces_value.get())) many_faces_switch.place(relx=0.6, rely=0.65) -# nsfw_value = ctk.BooleanVar(value=modules.globals.nsfw) -# nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get())) -# nsfw_switch.place(relx=0.6, rely=0.7) + nsfw_value = ctk.BooleanVar(value=modules.globals.nsfw) + nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get())) + nsfw_switch.place(relx=0.6, rely=0.7) video_processor_label = ctk.CTkLabel(root, text="Video Processor:") video_processor_label.place(relx=0.1, rely=0.75) @@ -296,9 +298,9 @@ def webcam_preview(): if not cap.isOpened(): update_status("Error: Unable to open webcam. Please check your camera connection.") return - cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) # Set the width of the resolution - cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) # Set the height of the resolution - cap.set(cv2.CAP_PROP_FPS, 30) # Set the frame rate of the webcam + cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320) # Set the width of the resolution + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240) # Set the height of the resolution + cap.set(cv2.CAP_PROP_FPS, 60) # Set the frame rate of the webcam else: import ffmpeg import subprocess @@ -307,7 +309,7 @@ def webcam_preview(): 'ffmpeg', '-f', 'avfoundation', '-framerate', '30', - '-video_size', '1280x720', + '-video_size', '240', '-i', '0:none', '-f', 'rawvideo', '-pix_fmt', 'rgb24', @@ -342,10 +344,10 @@ def webcam_preview(): break temp_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: - in_bytes = process.stdout.read(1280 * 720 * 3) + in_bytes = process.stdout.read(320 * 240 * 3) if not in_bytes: break - temp_frame = np.frombuffer(in_bytes, np.uint8).reshape([720, 1280, 3]) + temp_frame = np.frombuffer(in_bytes, np.uint8).reshape([240, 320, 3]) for frame_processor in frame_processors: temp_frame = frame_processor.process_frame(source_face, temp_frame) diff --git a/requirements.txt b/requirements.txt index 5acf630..065f829 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ onnxruntime-silicon==1.16.3 pillow==9.5.0 insightface==0.7.3 torch==2.1.0 +torchvision==0.19.0 tensorflow-macos==2.16.2 tensorflow-metal==1.1.0 @@ -25,6 +26,8 @@ prettytable==3.11.0 opencv-python==4.8.1.78 # Optional: for cv2 video processing ffmpeg-python==0.2.0 # For ffmpeg video processing +customtkinter==5.2.2 + # Optional dependencies (comment out if not needed) # albumentations==1.4.13 # coloredlogs==15.0.1