Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ocr in new funtion Content analysis implemented #21

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ This Artificially Assisted User Interface Testing framework is a pioneering tool
# Installation
```
# Add your Chat-GPT API Keys to the project:
add your API Key in /core/core_api.py -> line 3: client = OpenAI(api_key='insert_your_api_key_here')
add your API Key in /core/core_imaging.py -> line 12: api_key = 'insert_your_api_key_here'
Create a .env file with your API Key in the project folder
and add your key like this: OPENAI_API_KEY=sk-pr....


# Install requirements:
cd pywinassistant
Expand Down
30 changes: 26 additions & 4 deletions core/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
from PIL import Image, ImageTk
import time
import random
import win32gui
from queue import Queue
import speech_recognition as sr
import threading
from voice import speaker, set_volume, set_subtitles
from driver import assistant, act, fast_act, auto_role, perform_simulated_keypress, write_action
from window_focus import activate_windowt_title
from window_focus import get_previous_window
from ocr import ocr_screen

# Initialize the speech recognition and text to speech engines
assistant_voice_recognition_enabled = True # Disable if you don't want to use voice recognition
Expand All @@ -18,6 +21,7 @@
assistant_subtitles_enabled = True
recognizer = sr.Recognizer()
message_queue = Queue()
last_active_window = None # Variable to store the last active window handle
Ctk.set_appearance_mode("dark") # Modes: system (default), light, dark
Ctk.set_default_color_theme("dark-blue") # Themes: blue (default), dark-blue, green

Expand Down Expand Up @@ -238,7 +242,7 @@ def menu_command(command):
# Buttons with commands
Ctk.CTkButton(menu_frame, text="Call assistant", command=lambda: menu_command(generate_assistant_test_case(False))).pack(fill="x")
Ctk.CTkButton(menu_frame, text="Fast action", command=lambda: menu_command(generate_assistant_test_case(True))).pack(fill="x")
Ctk.CTkButton(menu_frame, text="Content analysis", command=lambda: menu_command(dummy_command)).pack(fill="x")
Ctk.CTkButton(menu_frame, text="Content analysis", command=lambda: menu_command(content_analysis)).pack(fill="x")

# Add separator or space between groups of options (This is an improvisation since Ctk doesn't have a separator widget)
Ctk.CTkLabel(menu_frame, text="", height=3).pack(fill="x")
Expand Down Expand Up @@ -290,11 +294,29 @@ def minimize_assistant():
def show_config(event):
# Function to display the settings menu using a custom context menu
create_context_menu(event.x_root, event.y_root)

def show_config(event):
# Function to display the settings menu using a custom context menu
global last_active_window
last_active_window = win32gui.GetForegroundWindow() # Store the active window
create_context_menu(event.x_root, event.y_root)


# Just for example purpose, you will replace this with actual commands
def dummy_command():
speaker("Dummy item clicked")
print("Dummy item clicked")
def content_analysis():
"""Analysiert den Textinhalt des aktiven Fensters mit OCR und liest ihn vor."""
global last_active_window
if last_active_window:
win32gui.SetForegroundWindow(last_active_window) # Set focus to the last active window
text = ocr_screen(focused=True)
print(f"OCR Output: {text}") # Add this line
if text:
speaker(f"The text in the active window is: {text}")
show_message(None, f"Text in window: {text}")
else:
speaker("No text was detected in the active window.")
show_message(None, "No text detected.")


def generate_assistant_test_case(fast_act=False):
# Function to perform a fast action
Expand Down
270 changes: 270 additions & 0 deletions core/cases.py

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions core/core_api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import os
from dotenv import load_dotenv
from openai import OpenAI

client = OpenAI(api_key='insert_your_api_key_here')
load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=openai_api_key)
# Available models: "gpt-4-1106-preview", "gpt-3.5-turbo-1106", or "davinci-codex"
MODEL_NAME = "gpt-3.5-turbo-1106"


def api_call(messages, model_name=MODEL_NAME, temperature=0.5, max_tokens=150):
# if model_name == "gpt-4-1106-preview":
# model_name = "gpt-3.5-turbo-1106"
Expand Down
6 changes: 4 additions & 2 deletions core/core_imaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
import pygetwindow as gw
import base64
import requests
import os
import io
from PIL import Image
from dotenv import load_dotenv

# Assuming that the `activate_window_title` function is defined in another module correctly
from window_focus import activate_windowt_title

# OpenAI API Key
api_key = 'insert_your_api_key_here'
openai_api_key = os.getenv('OPENAI_API_KEY')


# Function to focus a window given its title
Expand Down Expand Up @@ -47,7 +49,7 @@ def analyze_image(base64_image, window_title, additional_context='What’s in th
# Your logic to call the OpenAI API
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
"Authorization": f"Bearer {openai_api_key}"
}

payload = {
Expand Down
Loading