Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open innovation(1 small change made) #61

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions VOICE_INSTRUCTIONS.PY
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pyttsx3
import os
engine=pyttsx3.init()
voices=engine.getProperty('voices')
engine.setProperty('voice',voices[1].id)
engine.runAndWait()
print("...")
engine.say("Welcome!!!")
print("")
print("\n\t 1.MICROSOFT WORD \t 2.MICROSOFT POWERPOINT \n\t 3.MICROSOFT EXCEL \t 4.GOOGLE CHROME \n\t 5.VLC PLAYER \n\t 6.NOTEPAD \n\n\t\t 0. FOR EXIT")
pyttsx3.speak("Welcome to my tools")
print("")
pyttsx3.speak("chat with me with your requirements")
while True:
print(" CHAT WITH ME WITH YOUR REQUIREMENTS : ", end='')
p = input()
p = p.upper()
print(p)
if ("DONT" in p) or ("DON'T" in p) or ("NOT" in p):
pyttsx3.speak("Type Again")
continue
elif ("GOOGLE" in p) or ("SEARCH" in p) or ("WEB BROWSER" in p) or ("CHROME" in p) or ("BROWSER" in p) or ("4" in p):
pyttsx3.speak("Opening")
pyttsx3.speak("GOOGLE CHROME")
os.system("chrome")

elif ("NOTE" in p) or ("NOTES" in p) or ("NOTEPAD" in p) or ("EDITOR" in p) or ("9" in p):
pyttsx3.speak("Opening")
pyttsx3.speak("NOTEPAD")
os.system("Notepad")

elif ("VLCPLAYER" in p) or ("PLAYER" in p) or ("VIDEO PLAYER" in p) or ("5" in p):
pyttsx3.speak("Opening")
pyttsx3.speak("VLC PLAYER")
os.system("VLC")

elif ("EXCEL" in p) or ("MSEXCEL" in p) or ("SHEET" in p) or ("WINEXCEL" in p) or ("3" in p):
pyttsx3.speak("Opening")
pyttsx3.speak("MICROSOFT EXCEL")
os.system("excel")

elif ("SLIDE" in p) or ("MSPOWERPOINT" in p) or ("PPT" in p) or ("POWERPNT" in p) or ("2" in p):
pyttsx3.speak("Opening")
pyttsx3.speak("MICROSOFT POWERPOINT")
os.system("powerpnt")

elif ("WORD" in p) or ("MSWORD" in p) or ("1" in p):
pyttsx3.speak("Opening")
pyttsx3.speak("MICROSOFT WORD")
os.system("winword")
elif ("EXIT" in p) or ("QUIT" in p) or ("CLOSE" in p) or ("0" in p):
pyttsx3.speak("Exiting")
break
else:
pyttsx3.speak(p)
print("Is Invalid,Please Try Again")
pyttsx3.speak("is Invalid,Please try again")
print(".")
print(".")
17 changes: 17 additions & 0 deletions cartoonify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import cv2 as cv
import numpy as np
image=cv.imread('emma3.jpg')
cv.imshow("original",image)

# I have added this image in the task folder beacuse the sprecifications work different for different pictures. I thought these work fine for this picture.


gray_scale=cv.cvtColor(image,cv.COLOR_BGR2GRAY)
gray_scale=cv.medianBlur(gray_scale,3)
edges=cv.adaptiveThreshold(gray_scale,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,5,5)
cv.imshow("edges",edges)
color=cv.bilateralFilter(image,5,250,250)
cartoon=cv.bitwise_and(color,color,mask=edges)
cv.imshow("final result",cartoon)
cv.waitKey(0)
cv.destroyAllWindows()
98 changes: 98 additions & 0 deletions collect-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import cv2
import os

if not os.path.exists("data"):
os.makedirs("data")
os.makedirs("data/train")
os.makedirs("data/train/0")
os.makedirs("data/train/1")
os.makedirs("data/train/2")
os.makedirs("data/train/3")
os.makedirs("data/train/4")
os.makedirs("data/train/5")
os.makedirs("data/train/6")
os.makedirs("data/train/7")
os.makedirs("data/train/8")
os.makedirs("data/train/9")


mode = 'train'
directory = 'data/'+mode+'/'

# url = '<YOUR IP ADDRESS>/video'
# cap=cv2.VideoCapture(url)

cap=cv2.VideoCapture(0)

while True:
_, frame = cap.read()
frame = cv2.flip(frame, 1)

cv2.putText(frame, "hash-slash", (155, 250), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,0,0), 3)

count = {'zero/fist': len(os.listdir(directory+"/0")),
'one': len(os.listdir(directory+"/1")),
'two/peace': len(os.listdir(directory+"/2")),
'three': len(os.listdir(directory+"/3")),
'four': len(os.listdir(directory+"/4")),
'five/stop': len(os.listdir(directory+"/5")),
'yoo': len(os.listdir(directory+"/6")),
'call': len(os.listdir(directory+"/7")),
'five/opp side': len(os.listdir(directory+"/8")),
'tick': len(os.listdir(directory+"/9"))}

cv2.putText(frame, "MODE : "+mode, (10, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (128,128,56), 1)
cv2.putText(frame, "IMAGE COUNT", (10, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (120,110,255), 1)
cv2.putText(frame, "ZERO : "+str(count['zero/fist']), (10, 120), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (189,255,211), 1)
cv2.putText(frame, "ONE : "+str(count['one']), (10, 140), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,211,255), 1)
cv2.putText(frame, "TWO : "+str(count['two/peace']), (10, 160), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (167,9,255), 1)
cv2.putText(frame, "THREE : "+str(count['three']), (10, 180), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20,134,255), 1)
cv2.putText(frame, "FOUR : "+str(count['four']), (10, 200), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,25,28), 1)
cv2.putText(frame, "FIVE : "+str(count['five/stop']), (10, 220), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (205,123,211), 1)
cv2.putText(frame, "YOO : "+str(count['yoo']), (10, 240), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (205,123,0), 1)
cv2.putText(frame, "CALL : "+str(count['call']), (10, 260), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,123,211), 1)
cv2.putText(frame, "FIVE/oppside : "+str(count['five/opp side']), (10, 280), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (205,0,211), 1)
cv2.putText(frame, "TICK : "+str(count['tick']), (10, 300), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (127,123,211), 1)


x1 = int(0.5*frame.shape[1])
y1 = 10
x2 = frame.shape[1]-10
y2 = int(0.5*frame.shape[1])
cv2.rectangle(frame, (x1-1, y1-1), (x2+1, y2+1), (255,0,0) ,3)
roi = frame[y1:y2, x1:x2]
roi = cv2.resize(roi, (200, 200))
cv2.putText(frame, "R.O.I", (440, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,225,0), 3)
cv2.imshow("Frame", frame)

roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
_, roi = cv2.threshold(roi, 120, 255, cv2.THRESH_BINARY_INV)
cv2.imshow("ROI", roi)



interrupt = cv2.waitKey(10)
if interrupt & 0xFF == 27:
break
if interrupt & 0xFF == ord('0'):
cv2.imwrite(directory+'0/'+str(count['zero/fist'])+'.jpg', roi)
if interrupt & 0xFF == ord('1'):
cv2.imwrite(directory+'1/'+str(count['one'])+'.jpg', roi)
if interrupt & 0xFF == ord('2'):
cv2.imwrite(directory+'2/'+str(count['two/peace'])+'.jpg', roi)
if interrupt & 0xFF == ord('3'):
cv2.imwrite(directory+'3/'+str(count['three'])+'.jpg', roi)
if interrupt & 0xFF == ord('4'):
cv2.imwrite(directory+'4/'+str(count['four'])+'.jpg', roi)
if interrupt & 0xFF == ord('5'):
cv2.imwrite(directory+'5/'+str(count['five/stop'])+'.jpg', roi)
if interrupt & 0xFF == ord('6'):
cv2.imwrite(directory+'6/'+str(count['yoo'])+'.jpg', roi)
if interrupt & 0xFF == ord('7'):
cv2.imwrite(directory+'7/'+str(count['call'])+'.jpg', roi)
if interrupt & 0xFF == ord('8'):
cv2.imwrite(directory+'8/'+str(count['five/opp side'])+'.jpg', roi)
if interrupt & 0xFF == ord('9'):
cv2.imwrite(directory+'9/'+str(count['tick'])+'.jpg', roi)
cap.release()
cv2.destroyAllWindows()
Binary file added model-bw.h5
Binary file not shown.
1 change: 1 addition & 0 deletions model-bw.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 64, 64, 1], "dtype": "float32", "sparse": false, "ragged": false, "name": "conv2d_input"}}, {"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": [null, 64, 64, 1], "dtype": "float32", "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "dtype": "float32", "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_1", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 128, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 10, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.6.0", "backend": "tensorflow"}
65 changes: 65 additions & 0 deletions prediction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from keras.models import model_from_json
import operator
import cv2

json_file = open("model-bw.json", "r")
model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(model_json)
loaded_model.load_weights("model-bw.h5")
print("Loaded model from disk")

cap = cv2.VideoCapture(0)

categories = {0: 'ZERO/FIST', 1: 'ONE', 2: 'TWO/PEACE', 3: 'THREE', 4: 'FOUR', 5: 'FIVE/STOP', 6:'YOO', 7:'CALL', 8:'FIVE/OPP SIDE', 9:'TICK'}

while True:
_, frame = cap.read()
frame = cv2.flip(frame, 1)

x1 = int(0.5*frame.shape[1])
y1 = 10
x2 = frame.shape[1]-10
y2 = int(0.5*frame.shape[1])

cv2.putText(frame, "Expressando - TDOC 2021", (175, 450), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (225,255,0), 3)
cv2.rectangle(frame, (x1-1, y1-1), (x2+1, y2+1), (255,255,255) ,3)
roi = frame[y1:y2, x1:x2]

roi = cv2.resize(roi, (64, 64))
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
cv2.putText(frame, "R.O.I", (440, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,225,0), 3)

_, test_image = cv2.threshold(roi, 120, 255, cv2.THRESH_BINARY)
cv2.imshow("ROI", test_image)

result = loaded_model.predict(test_image.reshape(1, 64, 64, 1))
prediction = {'ZERO/FIST': result[0][0],
'ONE': result[0][1],
'TWO/PEACE': result[0][2],
'THREE': result[0][3],
'FOUR': result[0][4],
'FIVE/STOP': result[0][5],
'YOO': result[0][6],
'CALL': result[0][7],
'FIVE/OPP SIDE': result[0][8],
'TICK': result[0][9]}
prediction = sorted(prediction.items(), key=operator.itemgetter(1), reverse=True) #(0.9 = FIVE, 0.7, 0.6, 0.5, 0.4)
cv2.putText(frame, "PREDICTION:", (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
cv2.putText(frame, prediction[0][0], (80, 130), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
if prediction[0][0]==result[0][4]:
print("chat")
import subprocess
subprocess.Popen('C:\\Windows\\System32\\notepad.exe')
cv2.imshow("Frame", frame)
if prediction[0][0]==result[0][4]:
print("chat")
import subprocess
subprocess.Popen('C:\\Windows\\System32\\notepad.exe')
interrupt = cv2.waitKey(10)
if interrupt & 0xFF == 27:
break


cap.release()
cv2.destroyAllWindows()
74 changes: 74 additions & 0 deletions prediction_updated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from keras.models import model_from_json
import operator
import cv2

json_file = open("model-bw.json", "r")
model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(model_json)
loaded_model.load_weights("model-bw.h5")
print("Loaded model from disk")

cap = cv2.VideoCapture(0)

categories = {0: 'ZERO/FIST', 1: 'ONE', 2: 'TWO/PEACE', 3: 'THREE', 4: 'FOUR', 5: 'FIVE/STOP', 6:'YOO', 7:'CALL', 8:'FIVE/OPP SIDE', 9:'TICK'}

while True:
_, frame = cap.read()
frame = cv2.flip(frame, 1)

x1 = int(0.5*frame.shape[1])
y1 = 10
x2 = frame.shape[1]-10
y2 = int(0.5*frame.shape[1])

cv2.putText(frame, "Expressando - TDOC 2021", (175, 450), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (225,255,0), 3)
cv2.rectangle(frame, (x1-1, y1-1), (x2+1, y2+1), (255,255,255) ,3)
roi = frame[y1:y2, x1:x2]

roi = cv2.resize(roi, (64, 64))
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
cv2.putText(frame, "R.O.I", (440, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0,225,0), 3)

_, test_image = cv2.threshold(roi, 120, 255, cv2.THRESH_BINARY)
cv2.imshow("ROI", test_image)

result = loaded_model.predict(test_image.reshape(1, 64, 64, 1))
prediction = {'ZERO/FIST': result[0][0],
'ONE': result[0][1],
'TWO/PEACE': result[0][2],
'THREE': result[0][3],
'FOUR': result[0][4],
'FIVE/STOP': result[0][5],
'YOO': result[0][6],
'CALL': result[0][7],
'FIVE/OPP SIDE': result[0][8],
'TICK': result[0][9]}
prediction = sorted(prediction.items(), key=operator.itemgetter(1), reverse=True) #(0.9 = FIVE, 0.7, 0.6, 0.5, 0.4)
cv2.putText(frame, "PREDICTION:", (30, 90), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,255), 2)
cv2.putText(frame, prediction[0][0], (80, 130), cv2.FONT_HERSHEY_SCRIPT_COMPLEX, 1, (255,255,255), 2)


###### This part is to connect to applications#####
if prediction[0][0]=='CALL':
print("chat")
cap.release()
cv2.destroyAllWindows()
import subprocess
subprocess.Popen('C:\\Windows\\System32\\notepad.exe')
###############################################################
### WE CAN ALSO CONNECT VOICE MESSAGES AS IN FILE VOICE INSTRUCTIONS.PY INTO THIS (FUTURE SCOPE)##########
if prediction[0][0]=='FIVE/OPP SIDE':
import webbrowser
new=2
url="https://pythonrepo.com/repo/cvzone-cvzone"
webbrowser.get(using='Mozilla-Firefox').open(url,new=new)

cv2.imshow("Frame", frame)
interrupt = cv2.waitKey(10)
if interrupt & 0xFF == 27:
break


cap.release()
cv2.destroyAllWindows()
14 changes: 14 additions & 0 deletions projectideademo.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Project idea: To add gestures to existing ones and create a menu based system called "The Corner". A menu based system that works on 6 gestures. Also add voice control and Computer vision throughout the process.

How much have I been able to implement:
1. Added gestures and trained the model.
2. Wrote a file VOICEINSTRUCTIIONS.py to inculcate the start of voice using pyttsx3 package
3. Tried implementing action of opening applications using finger recognition using subprocess package

Where I failed:
1. My project was to be based purely on mediapipe package to create an almost touch like effect on thin air, all while doing a quiz or drawing, and i couldnt access the solutions package of it, hence couldnt define fucntions like select and normal movement
2. Irregular working of subprocess package due to my lack of knowledge.
3. Training almost 10 gestures reduces ability of model to differentiate especially between five and five opposite and 2 and call.

Future Scope:
Creating a multi tier level quiz, a squid game replica of honeycomb, a drawing canvas, accessing smart home equipments using their ip and accessing computer applications using a menu that works on finger recognition could be implemented, thus creating an almost phone screen like effectin thin air.
52 changes: 52 additions & 0 deletions train_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense

classifier = Sequential()

classifier.add(Convolution2D(32, (3, 3), input_shape=(64, 64, 1), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))

classifier.add(Convolution2D(32, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))

classifier.add(Flatten())

classifier.add(Dense(units=128, activation='relu'))
classifier.add(Dense(units=10, activation='softmax'))

classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255) #epoch

training_set = train_datagen.flow_from_directory('data/train',
target_size=(64, 64),
batch_size=5,
color_mode='grayscale',
class_mode='categorical')

test_set = test_datagen.flow_from_directory('data/test',
target_size=(64, 64),
batch_size=5,
color_mode='grayscale',
class_mode='categorical')

classifier.fit(
training_set,
epochs=10,
validation_data=test_set)

#Saving
model_json = classifier.to_json()
with open("model-bw.json", "w") as json_file:
json_file.write(model_json)
classifier.save_weights('model-bw.h5')