Neuronal Network not working properly..

Dear Community,

I'm wondering what I'm doing wrong when it comes to building neural networks and I can't come up with a solution on my own.

I want to build a lyric lookup program that takes a string containing a phrase that occurs similarly in a lyric and uses a trained neural network to find out the matching title. The following code is what I've done so far, but always outputs the last element of std::map 'songs' and I don't understand why, because in this state it should be 'See you again'....
My code used to work perfectly fine with only 2 songs in the map..


The actual code:
https://pastebin.com/G42ShRQP


Output:
1
2
3
test string: see you again
Result : 0.00674724 0.0170621 0.976191
the song 'see you again' fits best to Sky full of Stars

this is just wrong.

Can someone tell me what needs to be changed?

Thanks in advance

Luke
Last edited on
It's better to post the actual code - rather than an unknown web link.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#include <cmath>
#include <ctime>
#include <iostream>
#include <random>
#include <vector>
#include <algorithm>
#include <cctype>
#include <map>
#include <set>
#include <string>
#include <vector>
#include <sstream>

class NeuralNetwork {
public:
	NeuralNetwork(int input_size, int hidden_size, int output_size)
		: input_size(input_size), hidden_size(hidden_size), output_size(output_size) {
		initialize_weights();
	}

	std::vector<double> forward(const std::vector<double>& input) {
		hidden_layer = apply_activation_function(multiply(input, input_to_hidden_weights));
		return apply_softmax(multiply(hidden_layer, hidden_to_output_weights));
	}

	void train(const std::vector<double>& input, const std::vector<double>& target, double learning_rate) {
		std::vector<double> output = forward(input);


		std::vector<double> output_error = subtract(target, output);
		std::vector<double> output_gradient = elementwise_multiply(output_error, learning_rate);


		std::vector<double> hidden_error = multiply_transpose(output_gradient, hidden_to_output_weights);
		std::vector<double> hidden_gradient = elementwise_multiply(hidden_error, apply_activation_function_derivative(hidden_layer));
		hidden_gradient = elementwise_multiply(hidden_gradient, learning_rate);


		update_weights(input_to_hidden_weights, input, hidden_gradient);
		update_weights(hidden_to_output_weights, hidden_layer, output_gradient);
	}

private:
	int input_size;
	int hidden_size;
	int output_size;
	std::vector<std::vector<double>> input_to_hidden_weights;
	std::vector<std::vector<double>> hidden_to_output_weights;
	std::vector<double> hidden_layer;

	void initialize_weights() {
		input_to_hidden_weights = create_matrix(input_size, hidden_size);
		hidden_to_output_weights = create_matrix(hidden_size, output_size);

		randomize_weights(input_to_hidden_weights);
		randomize_weights(hidden_to_output_weights);
	}

	std::vector<double> apply_activation_function(const std::vector<double>& x) {
		std::vector<double> result(x.size());
		for (size_t i = 0; i < x.size(); ++i) {
			result[i] = sigmoid(x[i]);
		}
		return result;
	}

	double sigmoid(double x) {
		return 1.0 / (1.0 + std::exp(-x));
	}

	double sigmoid_derivative(double x) {
		double s = sigmoid(x);
		return s * (1.0 - s);
	}

	std::vector<double> apply_activation_function_derivative(const std::vector<double>& x) {
		std::vector<double> result(x.size());
		for (size_t i = 0; i < x.size(); ++i) {
			result[i] = sigmoid_derivative(x[i]);
		}
		return result;
	}

	std::vector<double> apply_softmax(const std::vector<double>& x) {
		double sum = 0.0;
		std::vector<double> exp_x(x.size());
		for (size_t i = 0; i < x.size(); ++i) {
			exp_x[i] = std::exp(x[i]);
			sum += exp_x[i];
		}

		for (size_t i = 0; i < x.size(); ++i) {
			exp_x[i] /= sum;
		}

		return exp_x;
	}
	std::vector<std::vector<double>> create_matrix(int rows, int cols) {
		return std::vector<std::vector<double>>(rows, std::vector<double>(cols));
	}

	void randomize_weights(std::vector<std::vector<double>>& weights) {
		std::mt19937 generator(static_cast<unsigned int>(std::time(nullptr)));
		std::uniform_real_distribution<double> distribution(-0.5, 0.5);

		for (auto& row : weights) {
			for (double& value : row) {
				value = distribution(generator);
			}
		}
	}

	std::vector<double> multiply(const std::vector<double>& a, const std::vector<std::vector<double>>& b) {
		std::vector<double> result(b[0].size(), 0.0);

		for (size_t i = 0; i < b[0].size(); ++i) {
			for (size_t j = 0; j < a.size(); ++j) {
				result[i] += a[j] * b[j][i];
			}
		}

		return result;
	}

	std::vector<double> multiply_transpose(const std::vector<double>& a, const std::vector<std::vector<double>>& b) {
		std::vector<double> result(b.size(), 0.0);

		for (size_t i = 0; i < b.size(); ++i) {
			for (size_t j = 0; j < b[i].size(); ++j) {
				result[i] += a[j] * b[i][j];
			}
		}

		return result;
	}

	std::vector<double> subtract(const std::vector<double>& a, const std::vector<double>& b) {
		std::vector<double> result(a.size());

		for (size_t i = 0; i < a.size(); ++i) {
			result[i] = a[i] - b[i];
		}

		return result;
	}

	std::vector<double> elementwise_multiply(const std::vector<double>& a, double scalar) {
		std::vector<double> result(a.size());

		for (size_t i = 0; i < a.size(); ++i) {
			result[i] = a[i] * scalar;
		}

		return result;
	}

	std::vector<double> elementwise_multiply(const std::vector<double>& a, const std::vector<double>& b) {
		std::vector<double> result(a.size());

		for (size_t i = 0; i < a.size(); ++i) {
			result[i] = a[i] * b[i];
		}

		return result;
	}

	void update_weights(std::vector<std::vector<double>>& weights, const std::vector<double>& inputs, const std::vector<double>& gradients) {
		for (size_t i = 0; i < weights.size(); ++i) {
			for (size_t j = 0; j < weights[i].size(); ++j) {
				weights[i][j] += inputs[i] * gradients[j];
			}
		}
	}
};




std::string preprocess(const std::string& text) {
	std::string result = text;
	std::transform(result.begin(), result.end(), result.begin(), ::tolower);
	return result;
}

std::vector<std::string> tokenize(const std::string& text) {
	std::vector<std::string> tokens;
	std::string token;
	std::istringstream stream(text);

	while (stream >> token) {
		tokens.push_back(token);
	}

	return tokens;
}

std::set<std::string> create_vocabulary(const std::vector<std::string>& tokens) {
	return std::set<std::string>(tokens.begin(), tokens.end());
}

std::map<std::string, int> create_word_index(const std::set<std::string>& vocabulary) {
	std::map<std::string, int> word_index;
	int index = 0;

	for (const auto& word : vocabulary) {
		word_index[word] = index++;
	}

	return word_index;
}

std::vector<double> one_hot_encode(const std::string& word, const std::map<std::string, int>& word_index) {
	std::vector<double> encoded(word_index.size(), 0.0);
	auto it = word_index.find(word);

	if (it != word_index.end()) {
		encoded[it->second] = 1.0;
	}

	return encoded;
}

Last edited on
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
int main() {

	std::map <std::string, std::string> songs = {
			{
					"Halo", "Remember those walls I built? "
							"Well, baby, they're tumbling down "
							"And they didn't even put up a fight "
							"They didn't even make a sound "
							"I found a way to let you in "
							"But I never really had a doubt "
							"Standin' in the light of your halo "
							"I got my angel now "
							"It's like I've been awakened "
							"Every rule I had you breakin' "
							"It's the risk that I'm takin' "
							"I ain't never gonna shut you out "
							"Everywhere I'm lookin' now "
							"I'm surrounded by your embrace "
							"Baby, I can see your halo "
							"You know you're my saving grace "
							"You're everything I need and more "
							"It's written all over your face "
							"Baby, I can feel your halo "
							"Pray it won't fade away "
							""
							"I can see your halo (halo), halo "
							"I can feel your halo (halo), halo "
							"I can see your halo (halo), halo"
			},

			{"See you again", "It's been a long day without you, my friend "
							  "And I'll tell you all about it when I see you again "
							  "We've come a long way from where we began "
							  "Oh, I'll tell you all about it when I see you again "
							  "When I see you again "
							  "Damn, who knew? "
							  "All the planes we flew, good things we been through "
							  "That I'd be standing right here talking to you "
							  "'Bout another path, I know we loved to hit the road and laugh "
							  "But something told me that it wouldn't last "
							  "Had to switch up, look at things different, see the bigger picture "
							  "Those were the days, hard work forever pays "
							  "Now I see you in a better place (see you in a better place) "
							  "Uh "
							  "How can we not talk about family when family's all that we got? "
							  "Everything I went through, you were standing there by my side "
							  "And now you gon' be with me for the last ride "
							  "It's been a long day without you, my friend "
							  "And I'll tell you all about it when I see you again (I'll see you again) "
							  "We've come a long way (yeah, we came a long way) "
							  "From where we began (you know we started) "
							  "Oh, I'll tell you all about it when I see you again (I'll tell you) "
							  "When I see you again"},
			{"Sky full of Stars", "'Cause you're a sky, 'cause you're a sky full of stars"
								  "I'm gonna give you my heart"
								  "'Cause you're a sky, 'cause you're a sky full of stars"
								  "'Cause you light up the path"
								  "I don't care, go on and tear me apart"
								  "I don't care if you do, ooh-ooh, ooh"
								  "'Cause in a sky, 'cause in a sky full of stars"
								  "I think I saw you"
								  "'Cause you're a sky, 'cause you're a sky full of stars"
								  "I wanna die in your arms, oh, oh-oh"
								  "'Cause you get lighter the more it gets dark"
								  "I'm gonna give you my heart, oh"
								  "I don't care, go on and tear me apart"
								  "I don't care if you do, ooh-ooh, ooh"
								  "'Cause in a sky, 'cause in a sky full of stars"
								  "I think I see you"
								  "I think I see you"
								  "'Cause you're a sky, you're a sky full of stars"
								  "Such a heavenly view"
								  "You're such a heavenly view"
								  "Yeah, yeah, yeah, ooh"}


	};




	std::set<std::string> vocabulary;

	for (const auto& song : songs) {
		std::vector <std::string> tokens = tokenize(preprocess(song.second));
		vocabulary.insert(tokens.begin(), tokens.end());
	}
	std::map<std::string, int> word_index = create_word_index(vocabulary);


	int input_size = vocabulary.size();
	int hidden_size = 9;
	int output_size = songs.size();
	NeuralNetwork nn(input_size, hidden_size, output_size);


	double learning_rate = 0.1;
	int epochs = 500;

	std::vector<std::pair<std::string, std::string>> song_list(songs.begin(), songs.end());
	std::mt19937 generator(static_cast<unsigned int>(std::time(nullptr)));

	for (int epoch = 0; epoch < epochs; ++epoch) {
		std::shuffle(song_list.begin(), song_list.end(), generator);

		int output_index = 0;
		for (const auto& song : song_list) {
			std::vector <std::string> tokens = tokenize(preprocess(song.second));

			for (const auto& token : tokens) {
				std::vector<double> input = one_hot_encode(token, word_index);
				std::vector<double> target(output_size, 0.0);
				target[output_index] = 1.0;

				nn.train(input, target, learning_rate);
			}

			++output_index;
		}
	}





	std::string test_word = "see you again";
	std::vector<double> test_input = one_hot_encode(preprocess(test_word), word_index);
	std::vector<double> prediction = nn.forward(test_input);

	std::cout << "test string: " << test_word << std::endl;
	std::cout << "result : ";
	for (double p : prediction) {
		std::cout << p << " ";
	}
	std::cout << std::endl;


	size_t best_match = std::distance(prediction.begin(), std::max_element(prediction.begin(), prediction.end()));
	size_t index = 0;
	std::string best_song_title;
	for (const auto& song : songs) {
		if (index == best_match) {
			best_song_title = song.first;
			break;
		}
		++index;
	}

	std::cout << "the song '" << test_word << "' matches best to " << best_song_title << std::endl;

	return 0;
}

What debugging has been done? The issue is with the vector prediction as the last value is always the largest.

Last edited on
You implementation is missing a "Neuron" class, "Layer" class, "Synapse" class and "Topology" class in addition to Neural network which is a high level concept around these individual classes, this are all essential part of a neural network.

Also nowhere you declare a bias which is another essential variable for neural network.
Topic archived. No new replies allowed.