#include "NeuralNetwork.h"
#include <random>
#include <cmath>

NeuralNetwork::NeuralNetwork(const std::vector<int>& layerSizes)
    : layerSizes(layerSizes) {

    activation = [this](double x) { return sigmoid(x); };

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<double> dist(-2.0, 2.0);  // Larger range for more variation

    for (size_t i = 1; i < layerSizes.size(); i++) {
        std::vector<std::vector<double>> layerWeights(layerSizes[i],
            std::vector<double>(layerSizes[i-1]));

        for (auto& neuron : layerWeights) {
            for (auto& weight : neuron) {
                weight = dist(gen);  // Use full range
            }
        }
        weights.push_back(layerWeights);

        // Random biases for more variation
        std::vector<double> layerBiases(layerSizes[i]);
        for (auto& bias : layerBiases) {
            bias = dist(gen);
        }
        biases.push_back(layerBiases);
    }
}

double NeuralNetwork::sigmoid(double x) {
    return 1.0 / (1.0 + std::exp(-x));
}

double NeuralNetwork::sigmoidDerivative(double x) {
    double s = sigmoid(x);
    return s * (1.0 - s);
}

std::vector<double> NeuralNetwork::forward(const std::vector<double>& input) {
    std::vector<double> current = input;

    for (size_t layer = 0; layer < weights.size(); layer++) {
        std::vector<double> next(layerSizes[layer + 1]);

        for (size_t i = 0; i < next.size(); i++) {
            double sum = biases[layer][i];
            for (size_t j = 0; j < current.size(); j++) {
                sum += current[j] * weights[layer][i][j];
            }
            next[i] = activation(sum);
        }

        current = next;
    }

    return current;
}

void NeuralNetwork::backpropagate(const std::vector<double>& input,
                                  const std::vector<double>& target,
                                  double learningRate) {
    // Forward pass with storage
    std::vector<std::vector<double>> activations;
    activations.push_back(input);

    std::vector<double> current = input;
    for (size_t layer = 0; layer < weights.size(); layer++) {
        std::vector<double> next(layerSizes[layer + 1]);
        for (size_t i = 0; i < next.size(); i++) {
            double sum = biases[layer][i];
            for (size_t j = 0; j < current.size(); j++) {
                sum += current[j] * weights[layer][i][j];
            }
            next[i] = activation(sum);
        }
        activations.push_back(next);
        current = next;
    }

    // Backward pass (simplified)
    std::vector<double> delta(target.size());
    for (size_t i = 0; i < target.size(); i++) {
        delta[i] = (activations.back()[i] - target[i]) * sigmoidDerivative(activations.back()[i]);
    }

    // Update weights (simplified)
    for (int layer = weights.size() - 1; layer >= 0; layer--) {
        for (size_t i = 0; i < weights[layer].size(); i++) {
            for (size_t j = 0; j < weights[layer][i].size(); j++) {
                weights[layer][i][j] -= learningRate * delta[i] * activations[layer][j];
            }
            biases[layer][i] -= learningRate * delta[i];
        }
    }
}

void NeuralNetwork::train(const std::vector<std::vector<double>>& inputs,
                          const std::vector<std::vector<double>>& targets,
                          int epochs, double learningRate) {
    for (int epoch = 0; epoch < epochs; epoch++) {
        for (size_t i = 0; i < inputs.size(); i++) {
            backpropagate(inputs[i], targets[i], learningRate);
        }
    }
}
