hash table, help with transfer

Pages: 12
If you only care about how many each, then it's much easier.

1. Remove struct Position from my code above
2. Where you see Position in the rest of the code, replace with unsigned (Line 16)
3. Remove Lines 32, 33
4. Replace Line 34 with
myMap[ word ] = 1; // you have one word initially
5. Replace Line 36 with
myMap[ word ]++; // you have one more word
6. Replace Line 51 with
unsigned count = it1->second; // it2->second points to how many
7. Remove Lines 52-54
8. At Line 52, print out the results!

That should do it - post if you have questions.

One other thing - when I say Remove, you just want to comment out, initially, or the Line numbers will be hard to track.
Last edited on
I make adjustments. Do I a mistake in the cout?
1
2
3
4
5
  
50:  const string&   key   = it1->first;       
51:  unsigned count = it1->second;            
52:  cout << key << " " << cout <<endl;



car 1052ED48
mouse 1052ED48
Last edited on
hehe - you have a typo!

it's count not cout!!!

you must be tired
I'm very tired, but I still have a lot of work. You're just programming for fun?

Now its work. Now I can use myMap.size (). The total number of print?
Last edited on
myMap.size() will be 2 in your case - that's right

I program for fun and profit 8^)
Is it the function that returns a count of everything. "Car Car Mouse" myMap.size return 2, how to return 3?
I need to work with those numbers to calculate 2 / 3 and 1 / 3
you can't do it directly

instead, create a new int total=0; variable and then add the counts to total inside a loop over myMap
I finished the calculations for entropy. Now I would like to modify the input .txt
When the input contains "mouse, car. car" (character . , : ) The output is not correct. I can apply FOR cycle but it's not very nice. Is something nice than that?

if (((character>'a')&&(character<'z'))||((character>'A')&&(character<'Z')))


word         count  entropy   entropy*count
-------------------------------------------------
computer     3       1.58              4.75
mouse        4       1.16              4.67


How to calculate the total entropy? I think that, how do: 4.75 + 4.67
1
2
3
4
5
  double probability = (double) count / (double) totalWords;
  double log2 = 2;
  double result = log10(probability)/-(log10(log2));
  double entropy = (result * count);
   

I tried to create new variables, but it not work correctly
Last edited on
so I do not understand. I used this. But the output still isn"t correct. Add a word as punctuation

if (((character>'a')&&(character<'z'))||((character>'A')&&(character<'Z')))
ok - please post the following so I understand what is happening:

1. your new source file
2. your new input
3. what you expect the new output to look like

junk.txt
computer. computer, mouse mouse. mouse computer

What can I do to make the program ignore character [ . : , ] This is just a cosmetic change. It is not important. When not used [ . : , ] everything is OK.

The bigger problem is: I do not know how to calculate the total entropy

output

word         count  entropy   entropy*count
-------------------------------------------------
computer     3       1.58              4.75
mouse        4       1.16              4.67

Total words: 7
Total entropy: 9.42




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include <iostream>
#include <string>
#include <fstream>
#include <map>
#include <stdio.h>
#include <math.h>
using namespace std;


int main()
{
  char str[256];
  int totalWords = 0; // pocita celkove slova

  map< string, unsigned > myMap;  // myMap v podstate hash, key je string(slovo) a value pocet opakovani
  string word;
  unsigned idx = 0;  // the line-number of junk.txt starting from 0
  ifstream ifs;

  cout << "Enter the name of an existing text file: ";
  cin.get (str,256);
  ifs.open( str, ifstream::in );

  if ( ifs.is_open() ) {  // jestli otevren
    while ( !ifs.eof() ) {  // vztvorim smycku dokud neni konec souboru
      char character = static_cast<char>( ifs.get() );  // z int zpet na char (static_cast) >> pretypovani >> <char> do char
			
      if ( ifs.good() ) {
        if (((character>'a')&&(character<'z'))||((character>'A')&&(character<'Z'))){ // if ( isalpha( character )) { // stale stejne slovo
          word.append( 1, character );  // pridam znak na konec slova
        } else { // jestli neni podminka splnena, mame nove slovo
          if ( myMap.find( word )==myMap.end() ) {  // nenalezeno (find, parametr key a vraci par <klic,hodnota>, klic odpovida klici zadanemu jako para.)
          
         
           myMap[ word ] = 1; // jestli nenajdu tak je slovo v textu jednou
          } else { // jestli ho najdu tak:
           myMap[ word ]++; // jinak mam toto slovo vickrat a zvysim pocet
          }
          word = "";  // reset because we're finished with word  
          idx++;
		  totalWords++;
        }
      }	 
    }
  }

  // output ---------------------------------------------------------------------
  map< string, unsigned >::const_iterator it1;  // declare an iterator for looping over map
                                                                          // vector< unsigned >::const_iterator      it2;  // declare an iterator for looping over vector
      cout << "Output file" << endl << endl;  
 
	

	  for ( it1=myMap.begin(); it1!=myMap.end(); ++it1 )   // iterate over key, value in myMap
    {  
            const string&   key   = it1->first;         // it1->first points to the key
            unsigned count = it1->second;  // it2->second points to how many  


               double probability = (double) count / (double) totalWords;   // compute the probability  vypocet pravdepodobnosti
               double log2 = 2;                                             // log. o zakladu 2
               double result = log10(probability)/-(log10(log2));           // etropie calculation for the number of words   vypocet etropie pro dany pocet slov
               const double entropy = (result * count);
               double totalEntropy ;
               cout << key << "   " << count <<"   " << result <<"   " << entropy << endl;

    }

  cout << " Total entropy is: " ;
  cout << endl;
  cout << "Total words in file: " << totalWords <<endl;   

  return 0;
}
Last edited on
did you try with = signs?

if (((character>='a')&&(character<='z'))||((character>='A')&&(character=<'Z')))

as for totalEntropy, set it to zero and put it on Line 35
on Line 64, you want:

totalEntropy += entropy;
Last edited on
Yes I did. It still uses the punctuation as word.

The total entropy works. I tried it before I wrote. But I did not create Global variables.
ok:

input:
computer. computer, mouse mouse. mouse computer 


output:
Output file
-----------
computer   3   1   3
mouse   3   1   3
 Total entropy is: 6
Total words in file: 6


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include <iostream>
#include <string>
#include <fstream>
#include <map>
#include <stdio.h>
#include <math.h>
using namespace std;


int main()
{
  char str[256];
  int totalWords = 0; // pocita celkove slova

  map< string, unsigned > myMap;  // myMap v podstate hash, key je string(slovo) a value pocet opakovani
  string word;
  unsigned idx = 0;  // the line-number of junk.txt starting from 0
  ifstream ifs;

  cout << "Enter the name of an existing text file: ";
  cin.get (str,256);
  ifs.open( str, ifstream::in );

  if ( ifs.is_open() ) {  // jestli otevren
    while ( !ifs.eof() ) {  // vztvorim smycku dokud neni konec souboru
      char character = static_cast<char>( ifs.get() );  // z int zpet na char (static_cast) >> pretypovani >> <char> do char
			
      if ( ifs.good() ) {
        if (((character>'a')&&(character<'z'))||((character>'A')&&(character<'Z'))){ // if ( isalpha( character )) { // stale stejne slovo
          word.append( 1, character );  // pridam znak na konec slova
        } else { // jestli neni podminka splnena, mame nove slovo
					if ( word.size()>0 ) {
            if ( myMap.find( word )==myMap.end() ) {  // nenalezeno (find, parametr key a vraci par <klic,hodnota>, klic odpovida klici zadanemu jako para.)
              myMap[ word ] = 1; // jestli nenajdu tak je slovo v textu jednou
// cerr << "initializaing '" << word << "' to 1" << endl;
            } else { // jestli ho najdu tak:
              myMap[ word ]++; // jinak mam toto slovo vickrat a zvysim pocet
// cerr << "setting '" << word << "' to " << myMap[word] << endl;
            }
            word = "";  // reset because we're finished with word  
            idx++;
		        totalWords++;
			    }
        }
      }	 
    }
  }

  // output ---------------------------------------------------------------------
  map< string, unsigned >::const_iterator it1;  // declare an iterator for looping over map
                                                                          // vector< unsigned >::const_iterator      it2;  // declare an iterator for looping over vector
      cout << "Output file" << endl;
			cout << "-----------" << endl;  
    double totalEntropy = 0 ;
	  for ( it1=myMap.begin(); it1!=myMap.end(); ++it1 )   // iterate over key, value in myMap
    {  
            const string&   key   = it1->first;         // it1->first points to the key
            unsigned count = it1->second;  // it2->second points to how many  


               double probability = (double) count / (double) totalWords;   // compute the probability  vypocet pravdepodobnosti
               double log2 = 2;                                             // log. o zakladu 2
               double result = log10(probability)/-(log10(log2));           // etropie calculation for the number of words   vypocet etropie pro dany pocet slov
               const double entropy = (result * count);
               cout << key << "   " << count <<"   " << result <<"   " << entropy << endl;
				totalEntropy += entropy;
    }

  cout << " Total entropy is: " << totalEntropy;
  cout << endl;
  cout << "Total words in file: " << totalWords <<endl;   

  return 0;
}
Topic archived. No new replies allowed.
Pages: 12