before merge

a528bee9 · De Silva K.C.C.C · 03f7f066 · a528bee9 · a528bee9 · a528bee9
Commit a528bee9 authored Nov 16, 2022 by De Silva K.C.C.C
10 changed files
--- a/Backend/Node/voice_output.wav
+++ b/Backend/Node/voice_output.wav
--- a/Backend/Python/__pycache__/main.cpython-39.pyc
+++ b/Backend/Python/__pycache__/main.cpython-39.pyc
--- a/Backend/Python/main.py
+++ b/Backend/Python/main.py
-from typing import Optional
 from fastapi import FastAPI
-from pydantic import BaseModel
+from fastapi.responses import FileResponse
+import subprocess
 app = FastAPI()
-class Item(BaseModel):
-    name: str
-    price: float
-    is_offer: Optional[bool] = None
 @app.get("/")
-def read_root():
+def root():
    return {"Hello": "World"}
+@app.get('/summarized_video')
-@app.get("/items/{item_id}")
+def short_note():
-def read_item(item_id: int, q: Optional[str] = None):
+    #python silenceRemover.py --input_file "C:\Users\User\Desktop\video\IAS_Lec1_10min.mp4" --output_file "C:\Users\User\Desktop\video\IAS_Lec1_10min_ALTERED.mp4" --silent_threshold 0 --silent_speed 999999 --sounded_speed 1.5 --frame_margin 2 --frame_rate 60
-    return {"item_id": item_id, "q": q}
+    command = 'python silenceRemover.py --input_file \"C:\\Users\\User\\Desktop\\video\\IAS_Lec1_10min.mp4\" --output_file \"C:\\Users\\User\\Desktop\\video\\IAS_Lec1_10min_ALTERED.mp4\" --silent_threshold 0 --silent_speed 999999 --sounded_speed 1 --frame_margin 2 --frame_rate 60'
+    subprocess.call(command, shell=True)
+    print(command)
-@app.put("/items/{item_id}")
+    return FileResponse("C:\Users\User\\Desktop\video\IAS_Lec1_10min_ALTERED.mp4")
-def update_item(item_id: int, item: Item):
\ No newline at end of file
-    return {"item_name": item.name, "item_id": item_id}
\ No newline at end of file
--- a/Backend/Python/result_voice.mp4
+++ b/Backend/Python/result_voice.mp4
--- a/Backend/Python/silenceRemover.py
+++ b/Backend/Python/silenceRemover.py
+from contextlib import closing
+from PIL import Image
+import subprocess
+from audiotsm import phasevocoder
+from audiotsm.io.wav import WavReader, WavWriter
+from scipy.io import wavfile
+import numpy as np
+import re
+import math
+from shutil import copyfile, rmtree
+import os
+import argparse
+from pytube import YouTube
+import time
+# get the video url from youtube
+def downloadFile(url):
+    name = YouTube(url).streams.first().download()
+    newname = name.replace(' ','_')
+    os.rename(name,newname)
+    return newname
+# get the maximum and minimum volume for a video
+def getMaxVolume(s):
+    maxv = float(np.max(s))
+    minv = float(np.min(s))
+    return max(maxv,-minv)
+# create a individual frame for keeping the sigmented images of the videos
+# helps to keep track of the new audio implementation and help in image processing for change in image 
+def copyFrame(inputFrame,outputFrame):
+    src = TEMP_FOLDER+"/frame{:06d}".format(inputFrame+1)+".jpg"
+    dst = TEMP_FOLDER+"/newFrame{:06d}".format(outputFrame+1)+".jpg"
+    if not os.path.isfile(src):
+        return False
+    copyfile(src, dst)
+    if outputFrame%20 == 19:
+        print(str(outputFrame+1)+" time-altered frames saved.")
+    return True
+# Summarized video name
+def inputToOutputFilename(filename):
+    dotIndex = filename.rfind(".")
+    return filename[:dotIndex]+"_ALTERED"+filename[dotIndex:]
+# making TEMP file to do all the computational stuff
+def createPath(s):
+    #assert (not os.path.exists(s)), "The filepath "+s+" already exists. Don't want to overwrite it. Aborting."
+    try:  
+        os.mkdir(s)
+    except OSError:  
+        assert False, "Creation of the directory %s failed. (The TEMP folder may already exist. Delete or rename it, and try again.)"
+# deleting the TEMP file as it is no longer required
+def deletePath(s): # Dangerous! Watch out!
+    try:  
+        rmtree(s,ignore_errors=False)
+    except OSError:  
+        print ("Deletion of the directory %s failed" % s)
+        print(OSError)
+# input command features with help keywords
+# default values already defined if not specified in the command
+parser = argparse.ArgumentParser(description='Modifies a video file to play at different speeds when there is sound vs. silence.')
+parser.add_argument('--input_file', type=str,  help='the video file you want modified')
+parser.add_argument('--input_folder', type=str, help='the folder of video files to modify')
+parser.add_argument('--url', type=str, help='A youtube url to download and process')
+parser.add_argument('--output_file', type=str, default="", help="the output file. (optional. if not included, it'll just modify the input file name). disregarded when the input is a folder")
+parser.add_argument('--silent_threshold', type=float, default=0.03, help="the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)")
+parser.add_argument('--sounded_speed', type=float, default=1.00, help="the speed that sounded (spoken) frames should be played at. Typically 1.")
+parser.add_argument('--silent_speed', type=float, default=5.00, help="the speed that silent frames should be played at. 999999 for jumpcutting.")
+parser.add_argument('--frame_margin', type=float, default=1, help="some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable.")
+parser.add_argument('--sample_rate', type=float, default=44100, help="sample rate of the input and output videos")
+parser.add_argument('--frame_rate', type=float, default=30, help="frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work.")
+parser.add_argument('--frame_quality', type=int, default=3, help="quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default.")
+args = parser.parse_args()
+frameRate = args.frame_rate
+SAMPLE_RATE = args.sample_rate
+SILENT_THRESHOLD = args.silent_threshold
+FRAME_SPREADAGE = args.frame_margin
+NEW_SPEED = [args.silent_speed, args.sounded_speed]
+if args.url != None:
+    INPUT_FILES = [downloadFile(args.url)]
+elif args.input_folder != None:
+    INPUT_FILES = []
+    for fn in os.listdir(args.input_folder):
+        # This assumes that all files in the directory are videos, ffmpeg will raise an error otherwise
+        if "ALTERED" not in fn:
+            INPUT_FILES.append(os.path.join(args.input_folder, fn))
+else:
+    INPUT_FILES = [args.input_file]
+URL = args.url
+FRAME_QUALITY = args.frame_quality
+for INPUT_FILE in INPUT_FILES:
+    start = time.time()
+    assert INPUT_FILE != None , "why u put no input file, that dum"
+    if len(args.output_file) >= 1 and len(INPUT_FILES) == 1:
+        OUTPUT_FILE = args.output_file
+    else:
+        OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)
+    if os.path.exists(OUTPUT_FILE):
+        print(f"{OUTPUT_FILE} already exists, skipping this file")
+        continue
+    TEMP_FOLDER = "TEMP"
+    AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
+    createPath(TEMP_FOLDER)
+    command = "ffmpeg -i "+INPUT_FILE+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner"
+    subprocess.call(command, shell=True)
+    command = "ffmpeg -i "+INPUT_FILE+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav"
+    subprocess.call(command, shell=True)
+    command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1"
+    f = open(TEMP_FOLDER+"/params.txt", "w")
+    subprocess.call(command, shell=True, stdout=f)
+    sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
+    audioSampleCount = audioData.shape[0]
+    maxAudioVolume = getMaxVolume(audioData)
+    f = open(TEMP_FOLDER+"/params.txt", 'r+')
+    pre_params = f.read()
+    f.close()
+    params = pre_params.split('\n')
+    for line in params:
+        m = re.search('Stream #.*Video.* ([0-9]*) fps',line)
+        if m is not None:
+            frameRate = float(m.group(1))
+    samplesPerFrame = sampleRate/frameRate
+    audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))
+    hasLoudAudio = np.zeros((audioFrameCount))
+    for i in range(audioFrameCount):
+        start = int(i*samplesPerFrame)
+        end = min(int((i+1)*samplesPerFrame),audioSampleCount)
+        audiochunks = audioData[start:end]
+        maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
+        if maxchunksVolume >= SILENT_THRESHOLD:
+            hasLoudAudio[i] = 1
+    chunks = [[0,0,0]]
+    shouldIncludeFrame = np.zeros((audioFrameCount))
+    for i in range(audioFrameCount):
+        start = int(max(0,i-FRAME_SPREADAGE))
+        end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
+        shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
+        if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
+            chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])
+    chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
+    chunks = chunks[1:]
+    outputAudioData = np.zeros((0,audioData.shape[1]))
+    outputPointer = 0
+    lastExistingFrame = None
+    for chunk in chunks:
+        audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]
+        sFile = TEMP_FOLDER+"/tempStart.wav"
+        eFile = TEMP_FOLDER+"/tempEnd.wav"
+        wavfile.write(sFile,SAMPLE_RATE,audioChunk)
+        with WavReader(sFile) as reader:
+            with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
+                tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
+                tsm.run(reader, writer)
+        _, alteredAudioData = wavfile.read(eFile)
+        leng = alteredAudioData.shape[0]
+        endPointer = outputPointer+leng
+        outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))
+        #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume
+        # smooth out transitiion's audio by quickly fading in/out
+        if leng < AUDIO_FADE_ENVELOPE_SIZE:
+            outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
+        else:
+            premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
+            mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
+            outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
+            outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask
+        startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
+        endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
+        for outputFrame in range(startOutputFrame, endOutputFrame):
+            inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
+            didItWork = copyFrame(inputFrame,outputFrame)
+            if didItWork:
+                lastExistingFrame = inputFrame
+            else:
+                copyFrame(lastExistingFrame,outputFrame)
+        outputPointer = endPointer
+    wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)
+    '''
+    outputFrame = math.ceil(outputPointer/samplesPerFrame)
+    for endGap in range(outputFrame,audioFrameCount):
+        copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
+    '''
+    command = "ffmpeg -framerate "+str(frameRate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+OUTPUT_FILE
+    subprocess.call(command, shell=True)
+    deletePath(TEMP_FOLDER)
+    end = time.time()
+    print(f"Completed {INPUT_FILE} in {end - start}s")
\ No newline at end of file
--- a/frontend/public/video/result_voice.mp4
+++ b/frontend/public/video/result_voice.mp4
--- a/frontend/src/digitalHuman/digitalHuman.js
+++ b/frontend/src/digitalHuman/digitalHuman.js
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from 'react';
 import Axios from 'axios';
 import { useDispatch, useSelector } from 'react-redux';
 import { List, Icon, Avatar } from 'antd';
-import avatar from './images/avatar.png';
+import avatar from './images/aiAvatar.png';
 import { saveMessage } from '../functions/actions/digitalHuman_actions';
 import Message from './sections/messageChat';
 import Card from "./sections/cardChat";
@@ -187,7 +187,7 @@ function DigitalHuman() {
    return (
        <div className='digitalHuamn'>
            <div className='avatar'>
-                <video height='520px' autoPlay>
+                <video width='100%' autoPlay>
                    <source src='/video/result_voice.mp4' type="video/mp4" />
                </video>
            </div>
@@ -220,4 +220,4 @@ function DigitalHuman() {
    )
 }
 export default DigitalHuman;
\ No newline at end of file
--- a/frontend/src/digitalHuman/images/aiAvatar.png
+++ b/frontend/src/digitalHuman/images/aiAvatar.png
--- a/frontend/src/digitalHuman/images/avatar.png
+++ b/frontend/src/digitalHuman/images/avatar.png
--- a/frontend/src/digitalHuman/sections/messageChat.js
+++ b/frontend/src/digitalHuman/sections/messageChat.js
 import React from 'react'
 import { List, Icon, Avatar } from 'antd';
 import '../styles/sectionStyles.css';
-import avatar from '../images/avatar.png';
+import avatar from '../images/aiAvatar.png';
 function Message(props) {
@@ -37,4 +37,4 @@ function Message(props) {
    )
 }
 export default Message
\ No newline at end of file