building a multilayer perceptron

closed account (E093605o)

Hello,

I am trying to build a MLP to learn C++. So far I have implemented a small linear algebra library "Matrix.h" and "Matrix.cpp". Now, I want to implement a feedforward function in the MLP.cpp file. However, I am stuck because I dont know how to build a function that applies a function to a Matrix parametrized with a type parameter. In line 188 in my code is the function of interest!

//Matrix.h

//NOTE: the part of interest is in the apply_function method all the way down!!
#pragma once
#include <vector>
#include <cmath>
#include <cassert>
#include <iostream>
#include <tuple>
#include <random>
#include <functional>

template<typename Type>
class Matrix {

  size_t cols{};
  size_t rows{};

 public:
  std::vector<std::vector<Type>> data;
  std::tuple<size_t, size_t> shape;
  size_t elementCount{};

  /* constructors */
  Matrix(size_t rowsArg, size_t colsArg) : cols(colsArg), rows(rowsArg),
  elementCount(rows*cols), shape(std::tuple<size_t,size_t>(rows,cols))
  {
		data = std::vector<std::vector<Type>>(rows,std::vector<Type>(cols));
	}

  Matrix(){};

  //methods
  void print();

  Matrix<Type> matmul(Matrix<Type> &m);

  Matrix<Type> multiply_elementwise(Matrix<Type> &m);
  
  Matrix<Type> multiply_scalar(Type scalar);

  Matrix<Type> square();

  Matrix<Type> add(Matrix<Type> &m);

  Matrix<Type> sub(Matrix &target);

  Matrix<Type> T();

  Matrix<Type> apply_function(Type (*func)(Type));

  Type& operator()(size_t row, size_t col) {
    assert(row < data.size() && col < data[0].size());
    return data[row][col];
}

 Matrix operator+(Matrix &target) {
    return add(target);
  }

  Matrix operator-() {
    Matrix output(rows, cols);
    for (size_t r = 0; r < rows; ++r) {
      for (size_t c = 0; c < cols; ++c) {
        output(r, c) = -(*this)(r, c);
      }
    }
    return output;
  } 

   Matrix operator-(Matrix &target) {  // for cleaner usage
    return sub(target);
  }
  

};

// methods
template<typename Type>
void Matrix<Type>::print(){
  for (int i = 0; i < rows; i++){
    for (int j = 0; j < cols; j++){
      std::cout << data[i][j] << " ";
    }
    std::cout << std::endl;
  }
}

template <typename Type>
Matrix<Type> Matrix<Type>::matmul(Matrix<Type> &target) {
    assert(cols == target.rows);
    Matrix output(rows, target.cols);

    for (size_t r = 0; r < output.rows; ++r) {
      for (size_t c = 0; c < output.cols; ++c) {
        for (size_t k = 0; k < target.rows; ++k)
          output(r, c) += (*this)(r, k) * target(k, c);
      }
    }
    return output;
  };

template <typename T>
struct mtx {
  static Matrix<T> randn(size_t rows, size_t cols) {
    Matrix<T> M(rows, cols);

    std::random_device rd{};
    std::mt19937 gen{rd()};

    // init Gaussian distr. w/ N(mean=0, stdev=1/sqrt(numel))
    T n(M.elementCount);
    T stdev{1 / sqrt(n)};
    std::normal_distribution<T> d{0, stdev};

    // fill each element w/ draw from distribution
    for (size_t r = 0; r < rows; ++r) {
      for (int c = 0; c < cols; ++c) {
        M(r, c) = d(gen);
      }
    }
    return M;
  }
};

template <typename Type>
Matrix<Type> Matrix<Type>::multiply_elementwise(Matrix<Type> &target){
    assert(shape == target.shape);
    Matrix output((*this));
    for (size_t r = 0; r < output.rows; ++r) {
      for (size_t c = 0; c < output.cols; ++c) {
        output(r, c) = target(r,c) * (*this)(r, c);
      }
    }
    return output;
  }

  template<typename Type>
  Matrix<Type> Matrix<Type>::square() { 
    Matrix output((*this));
    output = multiply_elementwise(output);
    return output;
  }

  template<typename Type>
  Matrix<Type> Matrix<Type>::multiply_scalar(Type scalar) {
    Matrix output((*this));
    for (size_t r = 0; r < output.rows; ++r) {
      for (size_t c = 0; c < output.cols; ++c) {
        output(r, c) = scalar * (*this)(r, c);
      }
    }
    return output;
  }

  template<typename Type>
  Matrix<Type> Matrix<Type>::add(Matrix &target) {
    assert(shape == target.shape);
    Matrix output(rows, std::get<1>(target.shape));

    for (size_t r = 0; r < output.rows; ++r) {
      for (size_t c = 0; c < output.cols; ++c) {
        output(r, c) = (*this)(r, c) + target(r, c);
      }
    }
    return output;
  }

  template<typename Type>
  Matrix<Type> Matrix<Type>::sub(Matrix &target) {
    Matrix neg_target = -target;
    return add(neg_target);
  }

  template<typename Type>
   Matrix<Type> Matrix<Type>::T() {
    size_t new_rows{cols}, new_cols{rows};
    Matrix transposed(new_rows, new_cols);

    for (size_t r = 0; r < new_rows; ++r) {
      for (size_t c = 0; c < new_cols; ++c) {
        transposed(r, c) = (*this)(c, r);  // swap row and col
      }
    }
    return transposed;
  }

  template<typename Type>
  Matrix<Type> Matrix<Type>::apply_function(Type (*func)(Type)) {
    Matrix output((*this));
    for (size_t r = 0; r < rows; ++r) {
      for (size_t c = 0; c < cols; ++c) {
        output(r, c) = function((*this)(r, c));
      }
    }
    return output;
  }

//MLP.cpp

#include "Matrix.h"
#include "MLP.h"
#include <iostream>


    void MLP::printParameters(){
        for(int i = 0; i < weights.size(); i++){
            std::cout << "weights in layer " << i << ":" << std::endl;
            weights[i].print();
            std::cout << "biases in layer " << i << ":" << std::endl;
            biases[i].print();
        }
    }
    inline double MLP::sigmoid(double x) {
        return 1.0 / (1 + exp(-x));
    }

/* here, passing sigmoid gives an error: argument of type 
   "double (MLP::*)(double x)" is incompatible with parameter of type 
"double (*)(double)" */
    void MLP::feedforward(Matrix<double> x){
        for (size_t i = 0; i < layers.size(); i++){
            auto z = weights[i].matmul(x) + biases[i];
            activations[i] = z.apply_function(sigmoid);
        }
    }

Last edited on

seeplus (6597)

This compiles. The main issue is that sigmoid needs to be defined as static. As you didn't provide compilable code, I've guessed a MLP struct that enables a compilation:

#include <vector>
#include <cmath>
#include <cassert>
#include <iostream>
#include <tuple>
#include <random>
#include <functional>

template<typename Type>
class Matrix {

	size_t cols {};
	size_t rows {};

public:
	std::vector<std::vector<Type>> data;
	std::tuple<size_t, size_t> shape;
	size_t elementCount {};

	/* constructors */
	Matrix(size_t rowsArg, size_t colsArg) : cols(colsArg), rows(rowsArg),
		elementCount(rows* cols), shape(std::tuple<size_t, size_t>(rows, cols)) {
		data = std::vector<std::vector<Type>>(rows, std::vector<Type>(cols));
	}

	Matrix() {};

	//methods
	void print();

	Matrix<Type> matmul(Matrix<Type>& m);

	Matrix<Type> multiply_elementwise(Matrix<Type>& m);

	Matrix<Type> multiply_scalar(Type scalar);

	Matrix<Type> square();

	Matrix<Type> add(Matrix<Type>& m);

	Matrix<Type> sub(Matrix& target);

	Matrix<Type> T();

	Matrix<Type> apply_function(Type(*func)(Type));

	Type& operator()(size_t row, size_t col) {
		assert(row < data.size() && col < data[0].size());
		return data[row][col];
	}

	Matrix operator+(Matrix& target) {
		return add(target);
	}

	Matrix operator-() {
		Matrix output(rows, cols);
		for (size_t r = 0; r < rows; ++r) {
			for (size_t c = 0; c < cols; ++c) {
				output(r, c) = -(*this)(r, c);
			}
		}
		return output;
	}

	Matrix operator-(Matrix& target) {  // for cleaner usage
		return sub(target);
	}


};

// methods
template<typename Type>
void Matrix<Type>::print() {
	for (int i = 0; i < rows; i++) {
		for (int j = 0; j < cols; j++) {
			std::cout << data[i][j] << " ";
		}
		std::cout << std::endl;
	}
}

template <typename Type>
Matrix<Type> Matrix<Type>::matmul(Matrix<Type>& target) {
	assert(cols == target.rows);
	Matrix output(rows, target.cols);

	for (size_t r = 0; r < output.rows; ++r) {
		for (size_t c = 0; c < output.cols; ++c) {
			for (size_t k = 0; k < target.rows; ++k)
				output(r, c) += (*this)(r, k) * target(k, c);
		}
	}
	return output;
};

template <typename T>
struct mtx {
	static Matrix<T> randn(size_t rows, size_t cols) {
		Matrix<T> M(rows, cols);

		std::random_device rd {};
		std::mt19937 gen { rd() };

		// init Gaussian distr. w/ N(mean=0, stdev=1/sqrt(numel))
		T n(M.elementCount);
		T stdev { 1 / sqrt(n) };
		std::normal_distribution<T> d { 0, stdev };

		// fill each element w/ draw from distribution
		for (size_t r = 0; r < rows; ++r) {
			for (int c = 0; c < cols; ++c) {
				M(r, c) = d(gen);
			}
		}
		return M;
	}
};

template <typename Type>
Matrix<Type> Matrix<Type>::multiply_elementwise(Matrix<Type>& target) {
	assert(shape == target.shape);
	Matrix output((*this));
	for (size_t r = 0; r < output.rows; ++r) {
		for (size_t c = 0; c < output.cols; ++c) {
			output(r, c) = target(r, c) * (*this)(r, c);
		}
	}
	return output;
}

template<typename Type>
Matrix<Type> Matrix<Type>::square() {
	Matrix output((*this));
	output = multiply_elementwise(output);
	return output;
}

template<typename Type>
Matrix<Type> Matrix<Type>::multiply_scalar(Type scalar) {
	Matrix output((*this));
	for (size_t r = 0; r < output.rows; ++r) {
		for (size_t c = 0; c < output.cols; ++c) {
			output(r, c) = scalar * (*this)(r, c);
		}
	}
	return output;
}

template<typename Type>
Matrix<Type> Matrix<Type>::add(Matrix& target) {
	assert(shape == target.shape);
	Matrix output(rows, std::get<1>(target.shape));

	for (size_t r = 0; r < output.rows; ++r) {
		for (size_t c = 0; c < output.cols; ++c) {
			output(r, c) = (*this)(r, c) + target(r, c);
		}
	}
	return output;
}

template<typename Type>
Matrix<Type> Matrix<Type>::sub(Matrix& target) {
	Matrix neg_target = -target;
	return add(neg_target);
}

template<typename Type>
Matrix<Type> Matrix<Type>::T() {
	size_t new_rows { cols }, new_cols { rows };
	Matrix transposed(new_rows, new_cols);

	for (size_t r = 0; r < new_rows; ++r) {
		for (size_t c = 0; c < new_cols; ++c) {
			transposed(r, c) = (*this)(c, r);  // swap row and col
		}
	}
	return transposed;
}

template<typename Type>
Matrix<Type> Matrix<Type>::apply_function(Type(*func)(Type)) {
	Matrix output((*this));
	for (size_t r = 0; r < rows; ++r) {
		for (size_t c = 0; c < cols; ++c) {
			output(r, c) = func((*this)(r, c));
		}
	}
	return output;
}

struct MLP {
	void printParameters();
	std::vector<Matrix<double>> weights;
	std::vector<Matrix<double>> biases;
	std::vector<Matrix<double>> layers;
	std::vector<Matrix<double>> activations;

	inline static double sigmoid(double x);
	void feedforward(Matrix<double> x);
};

void MLP::printParameters() {
	for (int i = 0; i < weights.size(); i++) {
		std::cout << "weights in layer " << i << ":" << std::endl;
		weights[i].print();
		std::cout << "biases in layer " << i << ":" << std::endl;
		biases[i].print();
	}
}

inline double MLP::sigmoid(double x) {
	return 1.0 / (1 + exp(-x));
}

/* here, passing sigmoid gives an error: argument of type
   "double (MLP::*)(double x)" is incompatible with parameter of type
"double (*)(double)" */
void MLP::feedforward(Matrix<double> x) {
	for (size_t i = 0; i < layers.size(); i++) {
		auto z = weights[i].matmul(x) + biases[i];
		activations[i] = z.apply_function(sigmoid);
	}
}

int main() {}

closed account (E093605o)

thanks, it works now.

Topic archived. No new replies allowed.

building a multilayer perceptron

C++

Forum