-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
171 lines (143 loc) · 6.22 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import pyautogui
import os
import time
import base64
import json
import uuid
from openai import OpenAI
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
)
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Function to cal gpt 4 vision api
def make_image_api_call(client, image_url, text):
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": text},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_url}"
},
},
],
}
],
max_tokens=300,
)
return response
# Function to make gpt 4 text API call
def make_text_api_call(client, text, format):
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": text},
],
}
],
max_tokens=300,
response_format={"type": format}
)
return response
def take_centered_screenshot(width=700, height=500, save_path='centered_screenshot.png'):
"""
Takes a screenshot of a specified width and height, centered on the screen.
Args:
- width: The width of the screenshot area.
- height: The height of the screenshot area.
- save_path: The file path where the screenshot will be saved.
"""
screen_width, screen_height = pyautogui.size() # Get the size of the primary monitor.
# Calculate the top left coordinates of the screenshot area to center it
# added padding to take screnshot of the game screen only
left = (screen_width - width) // 2 - 280
top = (screen_height - height) // 2 + 50
# Capture and save the screenshot
screenshot = pyautogui.screenshot(region=(left, top, width, height))
screenshot.save(save_path)
print(f"Screenshot saved to {save_path}.")
def execute_move(move):
move_key_map = {
"right": "h",
"left": "f",
"forward": "t",
"backward": "g",
"jump": "x",
"punch": "s",
}
if move in move_key_map:
move_key = move_key_map[move]
# TODO pyautogui was not working on wayland ubuntu
# so installed and switched in ydotool
# pyautogui.typewrite(move_key, interval=0.25)
os.system(f"ydotool key --delay 50 --repeat 12 {move_key}")
else:
print(f"key not found for move {move}")
def main():
print("Starting the program, taking a screenshot...")
time.sleep(1)
while True:
try:
screenshot_path = f"{uuid.uuid4()}.png"
print(f"Taking screenshot to {screenshot_path}")
# Take a centered screenshot
take_centered_screenshot(save_path=screenshot_path)
image_url = encode_image(screenshot_path)
print("Calling OpenAI API")
# Observe the current scene with GPT 4 Vision
OBSERVATION_TASK = """You are an expert Super Mario 64 player who has beat this game many times.
You are action-oriented and decisive.
List enemies, doors and in-game interactive elements that will be important to the game that are ON SCREEN.
When listing elements, mention the general direction of them with relation to the Mario character.
Do not list Mario, Counters/Meters or Controls or elements that are not ON SCREEN"""
observation_response = make_image_api_call(client, image_url, OBSERVATION_TASK)
print(f"Result for Observation Task': {observation_response.choices[0].message.content}")
PLANNING_TASK = f"""You are an expert Super Mario 64 player who has beat this game many times.
You are action-oriented and decisive.
Given these observations of a scene from the Super Mario 64 game think step by step and give what immediate action should be taken to get closer to the star (objective).
Be specific in terms of the general directions the character should be moved towards and what actions they should take
Example: Move closer to the door by moving forward
Be concise and give only one clear and simple plan
Observation: {observation_response.choices[0].message.content}"""
planning_response = make_text_api_call(client, PLANNING_TASK, "text")
print(f"Result for Planning Task': {planning_response.choices[0].message.content}")
# Get next series of moves
NEXT_MOVE_TASK = f"""You are an expert Super Mario 64 player who has beat this game many times.
You are action-oriented and decisive even if there is not much to go off of.
Given this short term plan give a short list of moves to implement it: {planning_response.choices[0].message.content}
Here is a list of possible moves for you to take:
right
left
forward
back
jump
punch
Give a list of moves to go down this path before you would need another observation. Be concise and return the result in JSON with the following format:
moves: [
"move_right_of_camera",
... all moves are only from the list of possible moves avoce
]
"""
move_response = make_text_api_call(client, NEXT_MOVE_TASK, "json_object")
print(f"Result for Moving Task': {move_response.choices[0].message.content}")
moves = json.loads(move_response.choices[0].message.content)["moves"]
print(f"Starting the following moves: {moves}")
for move in moves:
print(f"Doing the move {move}")
execute_move(move)
except KeyboardInterrupt:
raise
except Exception as e:
pass # This will pass any other exceptions
if __name__ == "__main__":
main()