Number of word occurrences in a text file

Hello, i'm having some problems with a task I was given. I was asked to read a text file which contains words, one word a line, then print to another text file word - number of occurrences.

Here is what i got.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include <fstream>
#include <iostream>
#include<string>
using namespace std;
int main()
{
    ifstream infile;
    ofstream outfile;
    char r = 0, c = 0, achar;
    char inputfile[ 250 ];
    int len = 0, pos1 = 0, pos2 = 0, count = 1, numberofwords = 0, i = 0, j = 0, a = 0, n = 0, uniquewords = 0;//int variable names
    string mystring, myarr[ 250000 ], temp, word;

    infile.open("text1.txt");
    if( !infile.is_open() )
    {
        cerr << "ERROR 001 OCCURRED WHEN ATTEMPTING TO OPEN FILE\n\n" << endl;//prints error
        exit( 001 );
    }

    outfile.open( "outputfile.txt" );
    if( !outfile.is_open() )
    {
        cerr << "ERROR 002 OCCURRED WHEN ATTEMPTING TO OPEN OUTPUTFILE.TXT\n\n" << endl;
        exit( 002 );
    }

    while( infile.get( achar ) )
    {
        if( achar != '\n' && achar != '`' && achar != '~' && achar != '!' && achar != '@' && achar != '#' && achar != '$' && achar != '$' && achar != '%' && achar != '^' && achar != '*' && achar != '(' && achar != ')' && achar != '_' && achar != '-' && achar != '+' && achar != '=' && achar != '[' && achar != '{' && achar != '}' && achar != ']' && achar != '|' && achar != ';' && achar != ':' && achar != ',' && achar != '<' && achar != '.' && achar != '>' && achar != '/' && achar != '?' )//if achar is not a symbol
        {
            if ( achar == 'A' || achar == 'B' || achar == 'C' || achar == 'D' || achar == 'E' || achar == 'F' || achar == 'G' || achar == 'H' || achar == 'I' || achar == 'J' || achar == 'K' || achar == 'L' || achar == 'M' || achar == 'N' || achar == 'O' || achar == 'P' || achar == 'Q' || achar == 'R' || achar == 'S' || achar == 'T' || achar == 'U' || achar == 'V' || achar == 'W' || achar == 'X' || achar == 'Y' || achar == 'Z' )//if achar is a capital letter
            {
                mystring += achar + 32;
            }
            else if ( achar == '—' )
            {
                infile.get( achar );
            }
            else
                mystring += achar;
        }
    }
  
    len = mystring.length( );
  
    for ( n = 0; n < len - 1; n++ )
    {
        n = mystring.find( " ", n + 1 );
        numberofwords++;
    }

    pos1 = 0;
    pos2 = mystring.find( " ", pos1 + 1 );
    word = mystring.substr( pos1, pos2 );
    myarr[0] = word;
  
    for ( n = 1; n <= numberofwords; n++ )
    {
        pos1 = pos2;
        pos2 = mystring.find( " ", pos1 + 1 );
        word = mystring.substr( pos1 + 1, pos2 - pos1 - 1 );
        if( ( pos2 - pos1 ) > 1 )
        {
            myarr[ n ] = word;
        }   
    }

    for ( j = 1; j <= numberofwords; j++ )
    {
        for ( a = 0; a < ( numberofwords - 1 );  a++ )
        {
            if ( myarr[ a + 1 ] < myarr[ a ] )
            {
                temp = myarr[ a ];
                myarr[ a ] = myarr[ a + 1  ];
                myarr[ a + 1 ] = temp;
            }
        }
    }



    for (n=0; n<numberofwords; n++ )
    {
        if ( myarr[ n ] == myarr[ n + 1 ] )
        {
            count++;
        }
        else
        {
            mystring = count;
            mystring = mystring.append( "  " );
            mystring = mystring.append( myarr[ n ] );
            myarr[ n ] = mystring;
            uniquewords++;
            cout << myarr[ n ] << endl;
            count = 1;
        }
    }
    
    for ( j = 1; j <= numberofwords; j++ )
    {
        for ( a = 0; a < ( numberofwords - 1 );  a++ )
        {
            if ( myarr[ a + 1 ] < myarr[ a ] )
            {
                temp = myarr[ a ];
                myarr[ a ] = myarr[ a + 1  ];
                myarr[ a + 1 ] = temp;
            }
        }
    }

    infile.close();
    outfile.close();
    
    system( "PAUSE" );
    return 0;
}


The code compiles without errors, but when i try to run it, windows gives me the message, "The program has stopped working" and it closes. I can't spot the error because the compiler doesn't show any so i'm really confused. Some help would be really appreciated.
Last edited on
closed account (zwA4jE8b)
set breakpoints and step through each loop. you probably have a string or index subscript error.

and remember arrays start at subscript 0 so arr[0] is the first element, that is also true for strings.

EDIT: If there is only one word per line in the input file then why are you searching for spaces?
perhaps there is a different character you should be using to break (c++ counts it as ws usually).
Last edited on
I'm not familiar with setting breakpoints, I'm still a beginner and not too familiar with C++ or other programming languages for that matter.

Yeah I modified it a bit so it would work also with files that are not one line - one word even though my homework doesn't ask that of me.
What character should i be using to break? Sorry, I have been working on this for some hours straight :S
closed account (zwA4jE8b)
if there is one word per line then after each word the 'enter' key needs to be pressed. Therefor there is a newline char(s) after each word.

Also, c++, if using the syntax infile >> 'variable name' , will automaticall skip ws (whitespace) so spaces, newline chars, and the such.

If you are using visual studio then just click to the left of the text editor to set one of those red dots. thats a breakpoint.

I don't know about the other IDE's.

1
2
3
4
 else if ( achar == '—' )
            {
                infile.get( achar );
            }


i don't know if you realize but if the current char is '—' then infile.get(achar) skips the following character. Is that what you are meaning to do?
Last edited on
closed account (zb0S216C)
My simple solution my be pushing your limitations a bit, but here's my solution if you want to learn from it:

1) Create a structure which has two members: a string instance, and a int instance.
2) Create a vector which holds instances of your structure.
3) Create an instance of ifstream.
4) Open the file and create a loop of your choice. The condition of the loop checks to see it the file is EOF.
5) Within the loop, create a temporary instance of the structure you created.
6) Using ifstream instance's extraction operator( >> ), extract the name with the string member within the temporary instance of the structure.
7) Set the int member to 1.
8) Push back the vector with your temporary instance.
9) Extract the next word.
10) Compare the newly extracted word to the elements within the vector. if none of them match, add the new word to the vector. if it does match, increase that element's int member by 1.

That's a rough solution. I won't give full code solutions to your problem, however.

Note: The rest of the code is fairly straight forward.
Last edited on
+1 to Frameworks, but instead of using a vector<>, use a map<>: http://www.cplusplus.com/reference/stl/map/. The Key class would be std::string, and the T type would be int: map<string, int> colWords;

The map class has a find() method:

1
2
3
4
5
6
7
map<string, int>::iterator foundItem;
if (!foundItem = colWords.find(theWord))
{
    //Create a new item.
    foundItem = colWords.insert(pair<string, int>(theWord, 0)).first;
}
(*foundItem).second++;


Disclaimer: I have never used map so the above if() might need correcting.

EDIT: Just read that the iterator returns a pair, not the value itself. Modified the code to account for this + made it simpler and accounts for all cases (word found and not found). The disclaimer still applies.
Last edited on
@CreativeMFS

Thanks for clarifying.
And that wasn't exactly what I intended with that piece of code.

@Framework

This is my first programming class and we haven't learned about vectors or maps. Even so I would have to write a completely new code from the scratch and i'm really tired already :S

Thanks for the algorithm anyways. Of course iI wouldn't ask you to give me a full code solution, after all its my homework not yours.




On another note, i changed the size of the array "myarr" at the beginning to a smaller size and the program doesn't stop working anymore and i even get an output txt file but the problem is now that the output text file is blank :S
closed account (zwA4jE8b)
I don't know if this will help or confuse you but it has some similar things to what you are trying to do.

I am not reading from a file though, just user input in this program.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//Michael Ervin - A program to count different parts of a string

#include <iostream>
#include <string>
#include <iomanip>

using namespace std;

void introduction();
bool vInput(string temp, char& tempPunc, int len);
void remCommas(string& temp);
int vCounter(string temp);
int cCounter(string temp);
int lWord(string temp);
int sWord(string temp);
void Output(int V, int C, int L, int S, char P);

int main()
{
	string input;
	int len, numVowels = 0, numCons = 0, longWord, shortWord;
	char pMark = '\0';
	bool gString = false;

	introduction();

	do
	{
		getline(cin, input);
		len = input.length()-1;
		gString = vInput(input, pMark, len);
	}
	while (gString == false);

	input = input.substr(0, len);
	remCommas(input);

	numVowels = vCounter(input);
	numCons = cCounter(input);
	longWord = lWord(input);
	shortWord = sWord(input);

	Output(numVowels, numCons, longWord, shortWord, pMark);

	cout << "Press enter to exit. ";
	cin.get();
	return 0;
}

void introduction()
{
	cout << "----------------------------------------------------------\n" <<
		    "Please enter a sentence that ends with a punctuation mark.\n" <<
			"No special characters please, letters and , . ! ?\n" <<
			"----------------------------------------------------------\n\n";
}

bool vInput(string temp, char& tempPunc, int len)
{
	if (temp[len] == '.' || temp[len] == '?' || temp[len] == '!')
	{
		tempPunc = temp.at(len);
		return true;
	}
	cout << "\nSorry, your sentence does not end with a punctuation mark\n" << "Please enter a different sentence" << endl;
	return false;
}

void remCommas(string& temp)
{
	int position;
	if (temp.find(',') != -1)
	{
		position = temp.find(',');
		while (position != -1)
		{
			temp.erase(position, 1);
			position = temp.find(',', position + 1);
		}
	}
}

int vCounter(string temp)
{
	unsigned int i;
	int vc = 0;

	for (i = 0; i <= temp.length(); i++)
	{
		if (toupper(temp[i]) == 'A' || toupper(temp[i]) == 'E' ||
			toupper(temp[i]) == 'I' || toupper(temp[i]) == 'O' ||
			toupper(temp[i]) == 'U')
		vc++;
	}
	return vc;
}

int cCounter(string temp)
{
	unsigned int i;
	int cc = 0;

	for (i = 0; i <= temp.length(); i++)
	{
		if ((temp[i] >= 66 && temp[i] <= 90) || (temp[i] >= 98 && temp[i] <= 122))
			if (temp[i] != 69 && temp[i] != 73 && temp[i] != 79 &&temp[i] != 85 &&
				temp[i] != 101 && temp[i] != 105 && temp[i] != 111 && temp[i] != 117)
					cc++;
	}
	return cc;
}

int lWord(string temp)
{
	int position, longest = 0;

	temp.append(1, ' ');
	
	while (temp.size() != 0)
	{
		position = temp.find(' ', 0);
		if (position >= longest)
			longest = position;
		temp = temp.substr(position + 1, temp.length() - 1);
	}
	return longest;
}

int sWord(string temp)
{
	int position, shortest = temp.length();

	temp.append(1, ' ');
	
	while (temp.size() != 0)
	{
		position = temp.find(' ', 0);
		if (position <= shortest)
			shortest = position;
		temp = temp.substr(position + 1, temp.length() - 1);
	}
	return shortest;
}

void Output(int V, int C, int L, int S, char P)
{
	cout << endl;
	cout << "Your sentence has " << V << " vowels." << endl;
	cout << "Your sentence has " << C << " consonants." << endl;
	cout << "Your longest word is " << L << " characters long." << endl;
	cout << "Your shortest word is " << S << " characters long." << endl;
	cout << "The punctuation mark you used was: " << P << endl;
	cout << endl;
}
closed account (zwA4jE8b)
oh and you never write to outfile. you open it then close it.

to write to a file is similar to cout.

outfile << 'stuff to output'
Thanks CreativeMFS that did help.

I took a different approach at this and the program works fine except for a small problem. When the words are printed into the text file, similar words need to be printed only once with the number of occurrences but instead it prints the word as many times as it appears in the input file.

I have no idea really how to make it so that similar words would be printed only once.

Here is the source code, any help would be greatly appreciated.



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include <iostream>
#include <string>
#include <iomanip>
#include <fstream>
using namespace std;

struct countWord
{		
     string word;		
     int count;	
}; 
class wordOcurr
{
    public:
        //wordFrequency();
        int countFile(int counter);
        void openFile(ifstream &Input, ofstream &Output);
        
    private:
        string wordsM[];
        //struct wordFreq[];
};

int main()
{

    wordOcurr word;
    //Declare variables
    int counter = 0;
	int length;
    
    
    length = word.countFile(counter);
    string *wordsM = new string [length];
    struct countWord *wordFreq = new struct countWord[length]; //make array of structs
    
    //Declare stream variables
    ifstream Input;
    ofstream Output;
    
    
    word.openFile(Input, Output);
    
    
    
    while (!Input.eof()) //while not at end of file
    {
          
          for(int i=0; i < length; i++) 
          {
                    Input >> wordsM[i];  //read words from file into array
                   
          
          
          wordFreq[i].word = wordsM[i]; //place words from file into 2nd array
          wordFreq[i].count = 0;
          }
          

    }
    for (int i = 0; i < length;i++)
    {
        for (int j = 0; j < length; j++)
        {
            if (wordsM[i] == wordFreq[j].word) 
            {
               //compare the words in the 2 arrays, if there is a match 
               //increment count for that word
               wordFreq[i].count++;
            }
        
		}
		
	   
        Output << setw(15) << wordsM[i] << setw(4) << wordFreq[i].count <<endl;
		
    }
    
    Input.close();
    Output.close();
    system ("Pause");
    return 0;
}
int wordOcurr::countFile(int counter)
{
    ifstream Input;
    int counts = 0;
    string str;


    Input.open("text1.txt");
    while (!Input.eof())
    {
          Input >> str;
          counts++;
    }
    
     return counts;
}
void wordOcurr::openFile(ifstream &Input, ofstream &Output)
{
    //open input file
    Input.open("text1.txt");
    
    if (!Input)
    {
                cout << "Unable to open the file!" << endl;
    }
    
    //open output file
    Output.open("textoutput.txt"); 
    
    
    Output << left << setw(15) << "Word" << setw(4)
                    <<"Number\n\n"<< endl;
   
}
Topic archived. No new replies allowed.