If you only care about how many each, then it's much easier.
1. Remove struct Position from my code above
2. Where you see Position in the rest of the code, replace with unsigned (Line 16)
3. Remove Lines 32, 33
4. Replace Line 34 with myMap[ word ] = 1; // you have one word initially
5. Replace Line 36 with myMap[ word ]++; // you have one more word
6. Replace Line 51 with unsigned count = it1->second; // it2->second points to how many
7. Remove Lines 52-54
8. At Line 52, print out the results!
That should do it - post if you have questions.
One other thing - when I say Remove, you just want to comment out, initially, or the Line numbers will be hard to track.
Is it the function that returns a count of everything. "Car Car Mouse" myMap.size return 2, how to return 3?
I need to work with those numbers to calculate 2 / 3 and 1 / 3
I finished the calculations for entropy. Now I would like to modify the input .txt
When the input contains "mouse, car. car" (character . , : ) The output is not correct. I can apply FOR cycle but it's not very nice. Is something nice than that?
if (((character>'a')&&(character<'z'))||((character>'A')&&(character<'Z')))
What can I do to make the program ignore character [ . : , ] This is just a cosmetic change. It is not important. When not used [ . : , ] everything is OK.
The bigger problem is: I do not know how to calculate the total entropy
output
word count entropy entropy*count
-------------------------------------------------
computer 3 1.58 4.75
mouse 4 1.16 4.67
Total words: 7
Total entropy: 9.42
#include <iostream>
#include <string>
#include <fstream>
#include <map>
#include <stdio.h>
#include <math.h>
usingnamespace std;
int main()
{
char str[256];
int totalWords = 0; // pocita celkove slova
map< string, unsigned > myMap; // myMap v podstate hash, key je string(slovo) a value pocet opakovani
string word;
unsigned idx = 0; // the line-number of junk.txt starting from 0
ifstream ifs;
cout << "Enter the name of an existing text file: ";
cin.get (str,256);
ifs.open( str, ifstream::in );
if ( ifs.is_open() ) { // jestli otevren
while ( !ifs.eof() ) { // vztvorim smycku dokud neni konec souboru
char character = static_cast<char>( ifs.get() ); // z int zpet na char (static_cast) >> pretypovani >> <char> do char
if ( ifs.good() ) {
if (((character>'a')&&(character<'z'))||((character>'A')&&(character<'Z'))){ // if ( isalpha( character )) { // stale stejne slovo
word.append( 1, character ); // pridam znak na konec slova
} else { // jestli neni podminka splnena, mame nove slovo
if ( myMap.find( word )==myMap.end() ) { // nenalezeno (find, parametr key a vraci par <klic,hodnota>, klic odpovida klici zadanemu jako para.)
myMap[ word ] = 1; // jestli nenajdu tak je slovo v textu jednou
} else { // jestli ho najdu tak:
myMap[ word ]++; // jinak mam toto slovo vickrat a zvysim pocet
}
word = ""; // reset because we're finished with word
idx++;
totalWords++;
}
}
}
}
// output ---------------------------------------------------------------------
map< string, unsigned >::const_iterator it1; // declare an iterator for looping over map
// vector< unsigned >::const_iterator it2; // declare an iterator for looping over vector
cout << "Output file" << endl << endl;
for ( it1=myMap.begin(); it1!=myMap.end(); ++it1 ) // iterate over key, value in myMap
{
const string& key = it1->first; // it1->first points to the key
unsigned count = it1->second; // it2->second points to how many
double probability = (double) count / (double) totalWords; // compute the probability vypocet pravdepodobnosti
double log2 = 2; // log. o zakladu 2
double result = log10(probability)/-(log10(log2)); // etropie calculation for the number of words vypocet etropie pro dany pocet slov
constdouble entropy = (result * count);
double totalEntropy ;
cout << key << " " << count <<" " << result <<" " << entropy << endl;
}
cout << " Total entropy is: " ;
cout << endl;
cout << "Total words in file: " << totalWords <<endl;
return 0;
}
#include <iostream>
#include <string>
#include <fstream>
#include <map>
#include <stdio.h>
#include <math.h>
usingnamespace std;
int main()
{
char str[256];
int totalWords = 0; // pocita celkove slova
map< string, unsigned > myMap; // myMap v podstate hash, key je string(slovo) a value pocet opakovani
string word;
unsigned idx = 0; // the line-number of junk.txt starting from 0
ifstream ifs;
cout << "Enter the name of an existing text file: ";
cin.get (str,256);
ifs.open( str, ifstream::in );
if ( ifs.is_open() ) { // jestli otevren
while ( !ifs.eof() ) { // vztvorim smycku dokud neni konec souboru
char character = static_cast<char>( ifs.get() ); // z int zpet na char (static_cast) >> pretypovani >> <char> do char
if ( ifs.good() ) {
if (((character>'a')&&(character<'z'))||((character>'A')&&(character<'Z'))){ // if ( isalpha( character )) { // stale stejne slovo
word.append( 1, character ); // pridam znak na konec slova
} else { // jestli neni podminka splnena, mame nove slovo
if ( word.size()>0 ) {
if ( myMap.find( word )==myMap.end() ) { // nenalezeno (find, parametr key a vraci par <klic,hodnota>, klic odpovida klici zadanemu jako para.)
myMap[ word ] = 1; // jestli nenajdu tak je slovo v textu jednou
// cerr << "initializaing '" << word << "' to 1" << endl;
} else { // jestli ho najdu tak:
myMap[ word ]++; // jinak mam toto slovo vickrat a zvysim pocet
// cerr << "setting '" << word << "' to " << myMap[word] << endl;
}
word = ""; // reset because we're finished with word
idx++;
totalWords++;
}
}
}
}
}
// output ---------------------------------------------------------------------
map< string, unsigned >::const_iterator it1; // declare an iterator for looping over map
// vector< unsigned >::const_iterator it2; // declare an iterator for looping over vector
cout << "Output file" << endl;
cout << "-----------" << endl;
double totalEntropy = 0 ;
for ( it1=myMap.begin(); it1!=myMap.end(); ++it1 ) // iterate over key, value in myMap
{
const string& key = it1->first; // it1->first points to the key
unsigned count = it1->second; // it2->second points to how many
double probability = (double) count / (double) totalWords; // compute the probability vypocet pravdepodobnosti
double log2 = 2; // log. o zakladu 2
double result = log10(probability)/-(log10(log2)); // etropie calculation for the number of words vypocet etropie pro dany pocet slov
constdouble entropy = (result * count);
cout << key << " " << count <<" " << result <<" " << entropy << endl;
totalEntropy += entropy;
}
cout << " Total entropy is: " << totalEntropy;
cout << endl;
cout << "Total words in file: " << totalWords <<endl;
return 0;
}