From f1c158c9fae645003c39dcf5c177190b0b28c63d Mon Sep 17 00:00:00 2001
From: Jason Kneen <jason.kneen@bouncingfish.com>
Date: Mon, 19 Aug 2024 21:11:54 +0100
Subject: [PATCH] fixes for ffmpeg, coreml, metal and resolution changes

---
 .gitignore           |  2 ++
 modules/core.py      |  6 +++---
 modules/predicter.py |  4 ++--
 modules/ui.py        | 22 ++++++++++++----------
 requirements.txt     |  3 +++
 5 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index c84282e..daeb53a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,5 @@ models/DMDNet.pth
 .venv/
 tf_env/
 .tf_env/
+.deepcamlive/
+deep-live-cam/
diff --git a/modules/core.py b/modules/core.py
index c39e1da..fef03a7 100644
--- a/modules/core.py
+++ b/modules/core.py
@@ -75,13 +75,13 @@ def parse_args() -> None:
     program.add_argument('--keep-audio', help='keep original audio', dest='keep_audio', action='store_true', default=True)
     program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true', default=True)
     program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true', default=False)
-    program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libvpx-vp9', choices=['libx264', 'libx265', 'libvpx-vp9'])
+    program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libx265', choices=['libx264', 'libx265', 'libvpx-vp9'])
     program.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=1, choices=range(52), metavar='[0-51]')
     program.add_argument('--max-memory', help='maximum amount of RAM in GB', dest='max_memory', type=int, default=suggest_max_memory())
     program.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default=['coreml'], choices=suggest_execution_providers(), nargs='+')
     program.add_argument('--execution-threads', help='number of execution threads', dest='execution_threads', type=int, default=suggest_execution_threads())
-    program.add_argument('--video-processor', help='video processor to use', dest='video_processor', default='cv2', choices=['cv2', 'ffmpeg'])
-    program.add_argument('--model', help='model to use for face swapping', dest='model', default='inswapper_128.onnx')
+    program.add_argument('--video-processor', help='video processor to use', dest='video_processor', default='ffmpeg', choices=['cv2', 'ffmpeg'])
+    program.add_argument('--model', help='model to use for face swapping', dest='model', default='inswapper_128_fp16.onnx')
     program.add_argument('-v', '--version', action='version', version=f'{modules.metadata.name} {modules.metadata.version}')
 
     args = program.parse_args()
diff --git a/modules/predicter.py b/modules/predicter.py
index bb51b0a..33e06a4 100644
--- a/modules/predicter.py
+++ b/modules/predicter.py
@@ -4,7 +4,7 @@ from PIL import Image
 
 from modules.typing import Frame
 
-MAX_PROBABILITY = 0.85
+MAX_PROBABILITY = 0.7
 
 
 def predict_frame(target_frame: Frame) -> bool:
@@ -21,5 +21,5 @@ def predict_image(target_path: str) -> bool:
 
 
 def predict_video(target_path: str) -> bool:
-    _, probabilities = opennsfw2.predict_video_frames(video_path=target_path, frame_interval=100)
+    _, probabilities = opennsfw2.predict_video_frames(video_path=target_path, frame_interval=20)
     return any(probability > MAX_PROBABILITY for probability in probabilities)
diff --git a/modules/ui.py b/modules/ui.py
index a3c4023..c35dbba 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -14,10 +14,12 @@ from modules.processors.frame.core import get_frame_processors_modules
 from modules.utilities import is_image, is_video, resolve_relative_path
 
 ROOT = None
-ROOT_HEIGHT = 700
+# this is the main ui
+ROOT_HEIGHT = 900   
 ROOT_WIDTH = 600
 
 PREVIEW = None
+# this is the preview ui
 PREVIEW_MAX_HEIGHT = 720
 PREVIEW_MAX_WIDTH = 1280
 
@@ -88,9 +90,9 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
     many_faces_switch = ctk.CTkSwitch(root, text='Many faces', variable=many_faces_value, cursor='hand2', command=lambda: setattr(modules.globals, 'many_faces', many_faces_value.get()))
     many_faces_switch.place(relx=0.6, rely=0.65)
 
-#    nsfw_value = ctk.BooleanVar(value=modules.globals.nsfw)
-#    nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get()))
-#    nsfw_switch.place(relx=0.6, rely=0.7)
+    nsfw_value = ctk.BooleanVar(value=modules.globals.nsfw)
+    nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get()))
+    nsfw_switch.place(relx=0.6, rely=0.7)
 
     video_processor_label = ctk.CTkLabel(root, text="Video Processor:")
     video_processor_label.place(relx=0.1, rely=0.75)
@@ -296,9 +298,9 @@ def webcam_preview():
         if not cap.isOpened():
             update_status("Error: Unable to open webcam. Please check your camera connection.")
             return
-        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)  # Set the width of the resolution
-        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)  # Set the height of the resolution
-        cap.set(cv2.CAP_PROP_FPS, 30)  # Set the frame rate of the webcam
+        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320)  # Set the width of the resolution
+        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)  # Set the height of the resolution
+        cap.set(cv2.CAP_PROP_FPS, 60)  # Set the frame rate of the webcam
     else:
         import ffmpeg
         import subprocess
@@ -307,7 +309,7 @@ def webcam_preview():
             'ffmpeg',
             '-f', 'avfoundation',
             '-framerate', '30',
-            '-video_size', '1280x720',
+            '-video_size', '240',
             '-i', '0:none',
             '-f', 'rawvideo',
             '-pix_fmt', 'rgb24',
@@ -342,10 +344,10 @@ def webcam_preview():
                 break
             temp_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         else:
-            in_bytes = process.stdout.read(1280 * 720 * 3)
+            in_bytes = process.stdout.read(320 * 240 * 3)
             if not in_bytes:
                 break
-            temp_frame = np.frombuffer(in_bytes, np.uint8).reshape([720, 1280, 3])
+            temp_frame = np.frombuffer(in_bytes, np.uint8).reshape([240, 320, 3])
 
         for frame_processor in frame_processors:
             temp_frame = frame_processor.process_frame(source_face, temp_frame)
diff --git a/requirements.txt b/requirements.txt
index 5acf630..065f829 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@ onnxruntime-silicon==1.16.3
 pillow==9.5.0
 insightface==0.7.3
 torch==2.1.0
+torchvision==0.19.0
 tensorflow-macos==2.16.2
 tensorflow-metal==1.1.0
 
@@ -25,6 +26,8 @@ prettytable==3.11.0
 opencv-python==4.8.1.78  # Optional: for cv2 video processing
 ffmpeg-python==0.2.0  # For ffmpeg video processing
 
+customtkinter==5.2.2
+
 # Optional dependencies (comment out if not needed)
 # albumentations==1.4.13
 # coloredlogs==15.0.1