Showing posts with label Big Data. Show all posts
Showing posts with label Big Data. Show all posts

Friday, December 19, 2014

wordCounter Program using HashMap

Look at previous post for the context:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;

class wordCounter2 {

String readFile(String fileName) throws IOException {
   BufferedReader br = new BufferedReader(new FileReader(fileName));
   try {
       StringBuilder sb = new StringBuilder();
       String line = br.readLine();

       while (line != null) {
           sb.append(line);
           sb.append(" ");
           line = br.readLine();
       }
       return sb.toString();
   } finally {
       br.close();
   }
}

void countAllWords(String filename) throws IOException
{
String fileContent = readFile(filename);
String[] arr = fileContent.split(" ");

Map<String, Integer> map = new HashMap<>();
   for (String w : arr) 
   {
       Integer n = map.get(w);
       n = (n == null) ? 1 : ++n;
       map.put(w, n);
   }
   
   Map <String, Integer> sortedMap = new TreeMap<String, Integer>(map);
   System.out.println("After Sorting:");
        Set<Entry<String, Integer>> set2 = sortedMap.entrySet();
        Iterator<Entry<String, Integer>> iterator2 = set2.iterator();
        while(iterator2.hasNext()) {
             Map.Entry me2 = (Map.Entry)iterator2.next();
             System.out.print(me2.getKey() + ": ");
             System.out.println(me2.getValue());
        }

        int maxValueInMap=(Collections.max(sortedMap.values()));  // This will return max value in the Hashmap
        for (Entry<String, Integer> entry : sortedMap.entrySet()) {  // Itrate through hashmap
            if (entry.getValue()==maxValueInMap) {
                System.out.println("Max Frequency: " + entry.getKey() + " ==> " + entry.getValue());     // Print the key with max value
            }
        }

}

void countWords(String filename, String word) throws IOException
{

String fileContent = readFile(filename);
String[] arr = fileContent.split(" ");

int counter = 0;

for (int i=0; i<arr.length; i++)
{
if (arr[i].equals(word))
{counter = counter + 1;}
else
continue;
}
System.out.println(counter);
}
}

public class wordCount2{

public static void main (String args[]) throws IOException
{
wordCounter2 wordcounter = new wordCounter2();
wordcounter.countAllWords("hello.txt");
//wordcounter.countWords("hello.txt", "Socket");
}
}

wordcounter Program


  • A Program that can count all the occurrence of all the words in a text file.
  • It can also find the frequency of a given word in the Program.


import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

class wordCounter {

String readFile(String fileName) throws IOException {
   BufferedReader br = new BufferedReader(new FileReader(fileName));
   try {
       StringBuilder sb = new StringBuilder();
       String line = br.readLine();

       while (line != null) {
           sb.append(line);
           sb.append(" ");
           line = br.readLine();
       }
       return sb.toString();
   } finally {
       br.close();
   }
}

String[] sort(String[] arr)
{
String tmp;
for (int i = 0;i < arr.length;i++)
{
 tmp = arr[i];
 for (int j = 0;j < arr.length;j++)
 {
   if (i == j) continue; // Same place.. Nothing to do.
   int x = tmp.compareTo(arr[j]); // Bigger smaller?!
   if (x < 0) // Need to swap.
   {
     /* Swaping proccess... */
     tmp = arr[j];
     arr[j] = arr[i];
     arr[i] = tmp;
   }
 }
}
return arr;
}

void countAllWords(String filename) throws IOException
{
String fileContent = readFile(filename);
String[] arr = fileContent.split(" ");
String[] sortedArray = sort(arr);

int counter = 0;

for (int i=0; i<sortedArray.length; i++)
{
counter = 0;
for (int j=0; j<sortedArray.length; j++)
{
if (sortedArray[i].equals(arr[j]))
{counter = counter + 1;}
else
continue;
}

if (i == 0)
System.out.println(sortedArray[i] + " : " + counter);
else
{
if (sortedArray[i].equals(sortedArray[i-1]))
continue;
else
System.out.println(sortedArray[i] + " : " + counter);
}
}
}

void countWords(String filename, String word) throws IOException
{
String fileContent = readFile(filename);
String[] arr = fileContent.split(" ");
//String[] sortedArray = sort(arr);

int counter = 0;

for (int i=0; i<arr.length; i++)
{
if (arr[i].equals(word))
{counter = counter + 1;}
else
continue;
}
System.out.println(counter);

}
}

public class wordCount{

public static void main (String args[]) throws IOException
{
wordCounter wordcounter = new wordCounter();
wordcounter.countAllWords("input.txt");
//wordcounter.countWords("input.txt", "Socket");
}
}


Following is the content of my input.txt:
Socket Class Methods:
The java.net.Socket class represents the socket that both the client and server use to
communicate with each other. The client obtains a Socket object by instantiating one,
whereas the server obtains a Socket object from the return value of the accept()
method


Running the program will produce following output


Changing above program a little-bit to find out the string with highest frequency and the frequency value: void countAllWords(String filename) throws IOException{String fileContent = readFile(filename);String[] arr = fileContent.split(" ");String[] sortedArray = sort(arr);int counter = 0;int maxcounter = counter;String maxString = null;for (int i=0; i<sortedArray.length; i++){ counter = 0;for (int j=0; j<sortedArray.length; j++){if (sortedArray[i].equals(arr[j])){counter = counter + 1;}elsecontinue; if (counter > maxcounter){maxcounter = counter;maxString = sortedArray[i];}} /*if (i == 0)System.out.println(sortedArray[i] + " : " + counter);else{if (sortedArray[i].equals(sortedArray[i-1]))continue;elseSystem.out.println(sortedArray[i] + " : " + counter);}*/}System.out.println("Highest Frequency = " + maxcounter);System.out.println("Highest Frequency String = " + maxString);}








Basic Hadoop Operations


Create a directory in hadoop:


  • hadoop fs -mkdir hdfs://localhost:9000/user/hadoop/mytestdir2
  • hadoop fs -mkdir /user/hadoop/mytestdir3


LISTING:


  • hadoop fs -ls  hdfs://localhost:9000/user/hadoop/
  • hadoop fs -ls /user/hadoop/



Create a local file and upload to hadoop directory: touch helloworld.txt or nano helloworld.txt

  • hadoop fs -put helloworld.txt /shiraz/hadoop/ 
  •  hadoop fs -cat /shiraz/hadoop/helloworld.txt


compiling:
  •  hadoop com.sun.tools.javac.Main WordCount.java
you may need to export the HADOOP_CLASSPATH variable:
export HADOOP_CLASSPATH= /usr/lib/jvm/java-7-openjdk-amd64/lib/tools.jar


running a hadoop job:
  • hadoop jar wc.jar WordCount /shiraz/hadoop/input /shiraz/hadoop/output/paracount

hadoop executable location:
  • /usr/local/hadoop/bin/hadoop

conf/*-site.xml
  • /usr/local/hadoop/etc
mapred-site.xml
yarn-site.xml
core-site.xml
hdfs-site.xml

hdfs location:
  • /usr/local/hadoop_store/hdfs

start-all.sh, start-dfs.sh,  start-yarn.sh
  • /usr/local/hadoop/sbin