[WIP: June2023] Deep Q-Learning using TorchSharp #710
Replies: 3 comments 6 replies
-
using Torch;
using System;
namespace DeepQLearning
{
class Program
{
static void Main(string[] args)
{
// Define the neural network
var model = new Sequential();
model.Add(new Linear(4, 128));
model.Add(new ReLU());
model.Add(new Linear(128, 2));
// Define the optimizer
var optimizer = new Adam(model.Parameters(), 0.001);
// Define the loss function
var loss = new MSELoss();
// Define the environment
var env = Gym.Make("CartPole-v0");
// Train the model
int episodeCount = 1000;
int stepsPerEpisode = 200;
int maxSteps = episodeCount * stepsPerEpisode;
int stepCount = 0;
int episode = 0;
while (stepCount < maxSteps)
{
env.Reset();
for (int step = 0; step < stepsPerEpisode; step++)
{
// Get the current state
var state = env.Observation;
// Choose an action based on the current state
var tensor = new Tensor(state, new[] { 1, state.Length });
var qValues = model.Forward(tensor);
var action = qValues.Max().Item2;
// Take the action and observe the result
var result = env.Step(action);
var nextState = result.Observation;
var reward = result.Reward;
var done = result.Done;
// Calculate the target Q-value
var target = qValues.Clone();
if (done)
{
target[0, action] = reward;
}
else
{
var nextTensor = new Tensor(nextState, new[] { 1, nextState.Length });
var nextQValues = model.Forward(nextTensor);
var maxNextQ = nextQValues.Max().Item1;
target[0, action] = reward + 0.99 * maxNextQ;
}
// Calculate the loss and update the model
tensor.Reshape(new[] { 1, 4 });
var output = model.Forward(tensor);
optimizer.ZeroGrad();
var l = loss.Forward(output, target);
l.Backward();
optimizer.Step();
// Update the step count
stepCount++;
// Check if the episode is done
if (done)
{
break;
}
}
// Print the episode number
Console.WriteLine("Episode: " + episode);
episode++;
}
// Close the environment
env.Close();
}
}
}
|
Beta Was this translation helpful? Give feedback.
-
I think I reproduced solution from lecture in my repo hear. |
Beta Was this translation helpful? Give feedback.
-
This is cool. I still hope we can build out a gym in .NET and maybe some shareable components for Q-learning. I don't have the expertise or experience to do that, but it'd be very cool. |
Beta Was this translation helpful? Give feedback.
-
June 2023
#981 (comment)
Feb 2023
https://www.youtube.com/watch?v=217tCMsZu0I
Beta Was this translation helpful? Give feedback.
All reactions