Department of Information Technology Software Laboratory-V Assignment No: 1 Title of the Assignment:

Size: px
Start display at page:

Download "Department of Information Technology Software Laboratory-V Assignment No: 1 Title of the Assignment:"

Transcription

1 Department of Information Technology Software Laboratory-V Assignment No: Title of the Assignment: Implementation of Client Server Application Using Thread //MyClient.java import java.net.*; import java.io.*; public class MyClient public static void main (String args[]) int n,ch,ch1; String st,st1,st2,first,last; try Socket s = new Socket(args[0], 2001); DataInputStream in = new DataInputStream( s.getinputstream()); DataOutputStream out =new DataOutputStream( s.getoutputstream()); BufferedReader object = new BufferedReader(new InputStreamReader(System.in)); do System.out.println("1.Factorial\n2.Adddition of digits\n3.string operations\n4.exit\nenter ur choice"); ch= Integer.parseInt(object.readLine()); out.writeutf(integer.tostring(ch)); switch(ch) case 1: System.out.println("Enter a number"); n= Integer.parseInt(object.readLine()); out.writeutf(integer.tostring(n)); int fact =Integer.parseInt(in.readUTF()); System.out.println("Factorial of "+n+"is "+fact) ; case 2: System.out.println("Enter a number"); n= Integer.parseInt(object.readLine()); out.writeutf(integer.tostring(n)); int sum =Integer.parseInt(in.readUTF()); System.out.println("Addition of digits of "+n+" is "+sum) ; case 3: do System.out.println("1.concatenation\n2.substring\n3.palindrome\n4.Exit\nEnter ur choice"); ch1= Integer.parseInt(object.readLine()); out.writeutf(integer.tostring(ch1)); switch(ch1) case 1: System.out.println("Enter First string"); st1=object.readline();

2 Department of Information Technology Software Laboratory-V out.writeutf(st1); System.out.println("Enter second string"); st2=object.readline(); out.writeutf(st2); st=in.readutf(); System.out.println("Concatenated String of "+st1+" and "+st2+" is :: "+st) ; case 2: System.out.println("Enter The string"); st1=object.readline(); out.writeutf(st1); System.out.println("Enter The Start Position of the substring"); first=object.readline(); out.writeutf(first); System.out.println("Enter The end Position of the substring"); last=object.readline(); out.writeutf(last); st=in.readutf(); System.out.println("Substring of string "+st1+" is :: "+st) ; case 3: System.out.println("Enter The string"); st=object.readline(); out.writeutf(st); n=integer.parseint(in.readutf()); if(n==0) System.out.println("string"+st+" is Palindrome") ; else System.out.println("string"+st+" is not Palindrome") ; while(ch1>0 && ch1!=4); case 4: System.exit(0); while(ch>0); catch (Exception e) System.out.println("Exception:"+e.getMessage()); //MyServer.java import java.net.*; import java.io.*; public class MyServer extends Thread DataInputStream in; DataOutputStream out; Socket clientsocket; int n,ch,ch1,f,l; String st,st1,st2; public MyServer() Try ServerSocket listensocket = new ServerSocket(2001);

3 Department of Information Technology Software Laboratory-V System.out.println("\nServer is Running") ; clientsocket = listensocket.accept(); System.out.println("\n Client is Connected") ; in = new DataInputStream( clientsocket.getinputstream()); out =new DataOutputStream( clientsocket.getoutputstream()); this.start(); catch(ioexception e) System.out.println("Connection:"+e.getMessage()); public void run() Try while(true) ch =Integer.parseInt(in.readUTF()); switch(ch) case 1: int fact= 1; n =Integer.parseInt(in.readUTF()); for (int i= 1; i<=n; i++) fact=fact*i; out.writeutf(integer.tostring(fact)); case 2: int sum=0,rem; n =Integer.parseInt(in.readUTF()); while(n!=0) rem = n%10; n = n/10; sum=sum + rem; out.writeutf(integer.tostring(sum)); case 3: while(true) ch1 =Integer.parseInt(in.readUTF()); switch(ch1) case 1: st1=in.readutf(); st2=in.readutf(); st=st1.concat(st2); out.writeutf(st); case 2: st1=in.readutf(); f =Integer.parseInt(in.readUTF()); l =Integer.parseInt(in.readUTF()); st=st1.substring(f,l); out.writeutf(st); st=null; case 3:

4 Department of Information Technology Software Laboratory-V st=in.readutf(); st1 = new StringBuffer(st).reverse().toString(); n=st.compareto(st1); out.writeutf(integer.tostring(n)); catch (Exception e) System.out.println("Exception :"+e.getmessage()); public static void main (String args[]) try MyServer s=new MyServer(); catch(exception e) System.out.println("Listen socket:"+e.getmessage()); e.printstacktrace();

5 SL- V Class:BE IT Exp 2: Aim: Design a distributed application using RPC for remote computation where client submits an integer value to the server and server calculates factorial and returns the result to the client program. Steps: # Create the IDL # Open terminal sudo apt-get update sudo apt-get install rpcbind mkdir exp2 cd exp2 gedit fact.x # add following code in it struct intpair int a; ; program FACT_PROG version FACT_VERS int FACT(intpair) = 1; = 1; = 0x ; # save and exit the file rpcgen -a -C fact.x gedit Makefile.fact # find the following line in the file CFLAGS += -g and change it to: CFLAGS += -g -DRPC_SVC_FG # find the following line in the same file RPCGENFLAGS = and change it to: RPCGENFLAGS = -C # save and exit the file

6 gedit fact_client.c # we will make some changes in this file (changes are highlighted) #include "fact.h" void fact_prog_1(char *host,int a) CLIENT *clnt; int *result_1; intpair fact_1_arg; #ifndefdebug clnt = clnt_create (host, FACT_PROG, FACT_VERS, "udp"); if (clnt == NULL) clnt_pcreateerror (host); exit (1); #endif /* DEBUG */ fact_1_arg.a=a; result_1 = fact_1(&fact_1_arg, clnt); if (result_1 == (int *) NULL) clnt_perror (clnt, "call failed");

7 else printf("factorial=%d",*result_1); #ifndefdebug clnt_destroy (clnt); #endif /* DEBUG */ int main (int argc, char *argv[]) char *host; int a,ch; if (argc < 2) printf ("usage: %s server_host\n", argv[0]); exit (1); host = argv[1];

8 Department of Information Technology Software Laboratory-V Assignment No: Title of the Assignment: Design a distributed application using RMI for remote computation where client submits two numbers to the server and server returns the addition of the given numbers //AddClient.java import java.rmi.*; public class AddClient public static void main(string[] args) try // int n1 = 0,n2=0; String addserverurl = "rmi://localhost/addserver"; AddServerIntf addserverint = (AddServerIntf)Naming.lookup(addServerURL); System.out.println("The first number is: " + args[0]); int n1 = Integer.parseInt(args[0]); System.out.println("The second number is: " + args[1]); int n2 = Integer.parseInt(args[1]); System.out.println("The Addition is: " + addserverint.add(n1,n2)); catch(exception e) System.out.println("Exception: " + e); //AddServer.java import java.rmi.*; import java.net.*; public class AddServer public static void main(string args[]) try AddServerImpl addserverimpl = new AddServerImpl(); Naming.rebind("AddServer", addserverimpl); catch(exception e) System.out.println("Exception:" + e); //AddServerImpl.java import java.rmi.*; import java.rmi.server.*; public class AddServerImpl extends UnicastRemoteObject implements AddServerIntf public AddServerImpl()throws RemoteException public int add(int n1,int n2) throws RemoteException int result;

9 Department of Information Technology Software Laboratory-V result=n1+n2; return result; //AddServerIntf.java import java.rmi.*; public interface AddServerIntf extends Remote int add (int n1, int n2)throws RemoteException;

10 SL V Exp 3: Aim: Design a distributed application using Message Passing Interface (MPI) for remote computation where client submits a string to the server and server returns the reverse of it to the client. Steps: # open terminal sudo apt-get update sudo apt-get install libopenmpi-dev mkdir exp3 cd exp3 gedit server.c # add following code in it #include <stdlib.h> #include <stdio.h> #include "mpi.h" #include<string.h> int main(int argc, char **argv) MPI_Comm client; MPI_Status status; char port_name[mpi_max_port_name],str[50],ch,temp; int size, again, i,j; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size!= 1) fprintf(stderr, "Server too big"); exit(exit_failure); MPI_Open_port(MPI_INFO_NULL, port_name); printf("server available at port: %s\n", port_name); i=0; while (1) MPI_Comm_accept(port_name, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &client); again = 1; while (again) MPI_Recv(&ch, 1, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, client, &status); switch (status.mpi_tag)

11 case 0: MPI_Comm_free(&client); MPI_Close_port(port_name); MPI_Finalize(); return 0; case 1: printf("\nreceived String: %s\n",str); // reverse the string i = 0; j = strlen(str) - 1; while (i < j) temp = str[i]; str[i] = str[j]; str[j] = temp; i++; j--; printf("\nreversed string is : %s\n",str); // send the reversed string to client (character by character) for (i = 0; i < strlen(str); i++) ch=str[i]; MPI_Send(&ch, 1, MPI_CHAR, 0, 2, client); //send tag=1 to indicate end of string MPI_Send(&ch, 1, MPI_CHAR, 0, 1, client); MPI_Comm_disconnect(&client); again = 0; strcpy(str,""); i=0; case 2: printf("received character: %c\n", ch); str[i]=ch; i++; // add null character at the end of string

12 str[i]='\0'; default: /* Unexpected message type */ MPI_Abort(MPI_COMM_WORLD, 1); # save and exit the file gedit client.c # add following code in it #include <stdlib.h> #include <stdio.h> #include <string.h> #include "mpi.h" int main( int argc, char **argv ) MPI_Comm server; MPI_Status status; char port_name[mpi_max_port_name],str[50],ch; int i, tag,again; if (argc < 2) fprintf(stderr, "server port name required.\n"); exit(exit_failure); MPI_Init(&argc, &argv); strcpy(port_name, argv[1]); MPI_Comm_connect(port_name, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &server); // accept input string printf("\nenter the string :\n"); scanf("%s",str); //send string to server (character by character) for (i = 0; i < strlen(str); i++) if(str[i]!='\0') ch=str[i]; tag=2; MPI_Send(&ch, 1, MPI_CHAR, 0, tag, server);

13 // done sending string to the server MPI_Send(&i, 0, MPI_INT, 0, 1, server); // Receive the reversed string from server and display it i=0; again=1; while (again) MPI_Recv(&ch, 1, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, server, &status); switch (status.mpi_tag) case 2: str[i]=ch; i++; case 1: again=0; printf("\nreversed string is : %s\n\n",str); MPI_Comm_disconnect(&server); MPI_Finalize(); return 0; # save and exit the file # compile mpicc server.c -o server mpicc client.c -o client # run server mpirun -np 1./server # it will display output similar to below (not necessarily the same) Server available at port: ;tcp:// :35820;tcp:// : ;tcp:// :40208;tcp:// :40208:300 # copy the port-string from the terminal output (e.g. the highlighted portion above) # we are going to supply this port-string as a first command line argument to the client

14 # open another terminal mpirun -np 1./client ' ;tcp:// :35820;tcp:// : ;tcp:// :40208;tcp:// :40208:300' # Don't forget to insert single quotes at the start & end of the port-string. # output : Server Terminal # Ouput: Client terminal

15 SL-V BE IT EXP 5 Part A [According to new revised assignments] Aim: Design a distributed application using MapReduce under Hadoop for: a) Character counting in a given text file. Steps: First install hadoop (if not installed yet) by, # Download sample.txt file (attached with this post) # Paste sample.txt in your home folder # Open terminal whoami # It will display your user name, we will use it later. # Open eclipse->new java project->project name exp5a->new class-> CharMap # Add following code in that class package exp5a; import java.io.ioexception; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.mapper; public class CharMap extends Mapper<LongWritable, Text, Text, IntWritable> public void map(longwritable key, Text value, Context context) throws IOException, InterruptedException # Save the file String line = value.tostring(); char[] carr = line.tochararray(); for (char c : carr) System.out.println(c); context.write(new Text(String.valueOf(c)), new IntWritable(1)); # It will display some errors, so we are going to import three jar files in our project. # Copy hadoop-mapreduce-client-core jar from ~/hadoop/share/hadoop/mapreduce directory # In eclipse-> right click on exp5a project- >paste # Right click on pasted hadoop-mapreduce-client-core jar-> Buid path-> add to buid path #Copy hadoop-common jar from ~/hadoop/share/hadoop/common directory # In eclipse-> right click on exp5a project- >paste # Right click on pasted hadoop-common jar-> Buid path-> add to buid path #Copy commons-cli-1.2.jar from ~/hadoop/share/hadoop/common/lib directory # In eclipse-> right click on exp5a project- >paste # Right click on pasted commons-cli-1.2.jar-> Buid path-> add to buid path

16 # In eclipse->right click on project exp5a->new class-> CharReduce # Add following code in that class package exp5a; import java.io.ioexception; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.reducer; public class CharReduce extends Reducer<Text, IntWritable, Text, IntWritable> public void reduce(text key,iterable<intwritable> values,context context)throws IOException,InterruptedException int count = 0; IntWritable result = new IntWritable(); for (IntWritable val : values) count +=val.get(); result.set(count); context.write(key, result); # Save the file # In eclipse->right click on project exp5a->new class-> CharCount # Add following code in that class package exp5a; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.input.textinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat; import org.apache.hadoop.mapreduce.lib.output.textoutputformat; public class CharCount public static void main(string[] args) throws Exception // TODO Auto-generated method stub Configuration conf = new Job job = new Job(conf, "Charcount"); job.setjarbyclass(charcount.class); job.setmapperclass(charmap.class); job.setreducerclass(charreduce.class); job.setinputformatclass(textinputformat.class); job.setoutputformatclass(textoutputformat.class); job.setmapoutputkeyclass(text.class); job.setmapoutputvalueclass(intwritable.class); job.setoutputkeyclass(text.class); job.setoutputvalueclass(intwritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path( args[1])); System.exit(job.waitForCompletion(true)? 0 : 1); # Save the file

17 # In eclipse->right click on project exp5a-> export->java->jar file->next-> select the export destination -> /home/your_user_name/exp5a.jar -> next -> next -> select main class ->browse -> CharCount -> finish # exp5a.jar file will be created in your home folder # Open terminal # Now Start NameNode daemon and DataNode daemon: ~/hadoop/sbin/start-dfs.sh # Make the HDFS directories required to execute MapReduce jobs ~/hadoop/bin/hdfs dfs -mkdir /user ~/hadoop/bin/hdfs dfs -mkdir /user/your_user_name # Put sample.txt file in hdfs ~/hadoop/bin/hdfs dfs -put ~/sample.txt input_data # Perform MapReduce job ~/hadoop/bin/hadoop jar ~/exp5a.jar input_data output_data # Output ~/hadoop/bin/hdfs dfs -cat output_data/* # Our task is done, so delete the distributed files (input_data & output_data) ~/hadoop/bin/hdfs dfs -rm -r input_data output_data # Stop haddop jps ~/hadoop/sbin/stop-dfs.sh Reference : Hadoop the definitive guide, O Reilly Publications, by Tom White

18 SL-V BE IT EXP 5 Part B (According to new revised assignments) Aim: Design a distributed application using MapReduce under Hadoop for: b) Counting no. of occurrences of every word in a given text file. Steps: First install hadoop (if not installed yet) by, # Download sample.txt file (attached with this post) # Paste sample.txt in your home folder # Open terminal whoami # It will display your user name, we will use it later. # Open eclipse->new java project->project name exp5b->new class-> WordCount # Add following code in that class package exp5b; import java.io.ioexception; import java.util.stringtokenizer; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.mapper; import org.apache.hadoop.mapreduce.reducer; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat; import org.apache.hadoop.util.genericoptionsparser; public class WordCount public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(object key, Text value, Context context ) throws IOException, InterruptedException StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasmoretokens()) word.set(itr.nexttoken()); context.write(word, one); public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> private IntWritable result = new IntWritable(); public void reduce(text key, Iterable<IntWritable> values,

19 Context context ) throws IOException, InterruptedException int sum = 0; for (IntWritable val : values) sum += val.get(); result.set(sum); context.write(key, result); public static void main(string[] args) throws Exception Configuration conf = new Configuration(); String[] otherargs = new GenericOptionsParser(conf, args).getremainingargs(); if (otherargs.length < 2) System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); Job job = Job.getInstance(conf, "word count"); job.setjarbyclass(wordcount.class); job.setmapperclass(tokenizermapper.class); job.setcombinerclass(intsumreducer.class); job.setreducerclass(intsumreducer.class); job.setoutputkeyclass(text.class); job.setoutputvalueclass(intwritable.class); for (int i = 0; i < otherargs.length - 1; ++i) FileInputFormat.addInputPath(job, new Path(otherArgs[i])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true)? 0 : 1); # Save the file # It will display some errors, so we are going to import three jar files in our project. # Copy hadoop-mapreduce-client-core jar from ~/hadoop/share/hadoop/mapreduce directory # In eclipse-> right click on exp5b project- >paste # Right click on pasted hadoop-mapreduce-client-core jar-> Buid path-> add to buid path #Copy hadoop-common jar from ~/hadoop/share/hadoop/common directory # In eclipse-> right click on exp5b project- >paste # Right click on pasted hadoop-common jar-> Buid path-> add to buid path #Copy commons-cli-1.2.jar from ~/hadoop/share/hadoop/common/lib directory # In eclipse-> right click on exp5b project- >paste # Right click on pasted commons-cli-1.2.jar-> Buid path-> add to buid path # In eclipse->right click on project exp5b-> export->java->jar file->next-> select the export destination -> /home/your_user_name/exp5b.jar -> next -> next -> select main class ->browse -> WordCount -> finish # exp5b.jar file will be created in your home folder # Open terminal

20 # Now Start NameNode daemon and DataNode daemon: ~/hadoop/sbin/start-dfs.sh # Make the HDFS directories required to execute MapReduce jobs ~/hadoop/bin/hdfs dfs -mkdir /user ~/hadoop/bin/hdfs dfs -mkdir /user/your_user_name # Put sample.txt file in hdfs ~/hadoop/bin/hdfs dfs -put ~/sample.txt input_data # Perform MapReduce job ~/hadoop/bin/hadoop jar ~/exp5b.jar input_data output_data # Output ~/hadoop/bin/hdfs dfs -cat output_data/* # Our task is done, so delete the distributed files (input_data & output_data) ~/hadoop/bin/hdfs dfs -rm -r input_data output_data # Stop haddop jps ~/hadoop/sbin/stop-dfs.sh Reference : Hadoop the definitive guide, O Reilly Publications, by Tom White

21 SL-V BE IT EXP 6 (According to new revised assignments) Aim: Design a distributed application using MapReduce under Hadoop for finding maximum number in first and second columns in every line of a given text file. Steps: First install hadoop (if not installed yet) by, # Download sample6.txt file (attached with this post) # Paste sample6.txt in your home folder # Open terminal whoami # It will display your user name, we will use it later. # Open eclipse->new java project->project name exp6->new class-> MaxMap # Add following code in that class package exp6; import java.io.ioexception; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.mapper; public class MaxMap extends Mapper<LongWritable, Text, Text, IntWritable> int values[] = new int[10000]; int values1[] = new int[10000]; String word[] ; int maxvalue = 0,linenum =0; public void map(longwritable key, Text value, Context context) throws IOException, InterruptedException String words = value.tostring(); System.out.println(words); word = words.split(","); for (int i = 0; i < 2; i++) System.out.println(word[i]); values[i] = Integer.parseInt(word[i]); values1[i] = Integer.parseInt(word[i]); if(values1[0] < values1[1]) int temp =values1[0]; values1[0] = values1[1]; values1[1] = temp; maxvalue = values1[0]; String text = ""+(linenum+1)+"\t"+values[0]+"\t"+values[1]+""; if(linenum>=0) context.write(new Text(text), new IntWritable(maxValue)); linenum++; # Save the file

22 # It will display some errors, so we are going to import three jar files in our project. # Copy hadoop-mapreduce-client-core jar from ~/hadoop/share/hadoop/mapreduce directory # In eclipse-> right click on exp6 project- >paste # Right click on pasted hadoop-mapreduce-client-core jar-> Buid path-> add to buid path #Copy hadoop-common jar from ~/hadoop/share/hadoop/common directory # In eclipse-> right click on exp6 project- >paste # Right click on pasted hadoop-common jar-> Buid path-> add to buid path #Copy commons-cli-1.2.jar from ~/hadoop/share/hadoop/common/lib directory # In eclipse-> right click on exp6 project- >paste # Right click on pasted commons-cli-1.2.jar-> Buid path-> add to buid path # In eclipse->right click on project exp6->new class-> MaxCount # Add following code in that class (replace your_user_name by your own username ) package exp6; import java.io.bufferedreader; import java.io.file; import java.io.filereader; import java.util.arraylist; import java.util.list; import java.util.scanner; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.conf.configured; import org.apache.hadoop.fs.filestatus; import org.apache.hadoop.fs.filesystem; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat; import org.apache.hadoop.mapreduce.lib.output.textoutputformat; import org.apache.hadoop.util.tool; import org.apache.hadoop.util.toolrunner; public class MaxCount extends Configured implements Tool public static void main(string[] args) throws Exception int res = ToolRunner.run(new Configuration(), new MaxCount(), args); public int run(string[] args) throws Exception Configuration conf = Job job = new Job(conf,"MaxCount"); job.setjarbyclass(maxcount.class); job.setmapperclass(maxmap.class); job.setnumreducetasks(0); job.setmapoutputkeyclass(text.class); job.setmapoutputvalueclass(intwritable.class); job.setinputformatclass(org.apache.hadoop.mapreduce.lib.input.textinputformat.class); job.setoutputformatclass(textoutputformat.class); FileInputFormat.setInputPaths(job, new Path(args[0]));

23 FileOutputFormat.setOutputPath(job, new Path(args[1])); Path outputpath = new Path(args[1]); outputpath.getfilesystem(conf).delete(outputpath, true); job.waitforcompletion(true); FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.liststatus(new Path(args[1])); //copy hdfs output file to local file for(int i=0;i<status.length;i++) System.out.println(status[i].getPath()); fs.copytolocalfile(false, status[i].getpath(), new Path("/home/your_user_name/"+args[1])); System.out.println("\nLine\tFirst\tSecond\tMaximum"); System.out.println("no \tcolumn\tcolumn\n"); //display contents of local file BufferedReader br = new BufferedReader(new FileReader("/home/your _user_name/"+args[1])); String line = null; while ((line = br.readline())!= null) System.out.println(line); br.close(); Scanner s = new Scanner(new File("/home/your_user_name/"+args[1])); List<Integer> max_values = new ArrayList<Integer>(); while (s.hasnext()) s.next(); s.next(); s.next(); max_values.add(integer.parseint(s.next())); int maximum=0; for (int max: max_values) if(max>maximum) maximum=max; System.out.println("\nOverall Maximum: "+maximum+"\n"); s.close(); return 0; # Save the file # In eclipse->right click on project exp6-> export->java->jar file->next-> select the export destination -> /home/your_user_name/exp6.jar -> next -> next -> select main class ->browse -> MaxCount -> finish # exp6.jar file will be created in your home folder

24 # Open terminal # Now Start NameNode daemon and DataNode daemon: ~/hadoop/sbin/start-dfs.sh # Make the HDFS directories required to execute MapReduce jobs ~/hadoop/bin/hdfs dfs -mkdir /user ~/hadoop/bin/hdfs dfs -mkdir /user/your_user_name # Put sample6.txt file in hdfs ~/hadoop/bin/hdfs dfs -put ~/sample6.txt input_data # Perform MapReduce job ~/hadoop/bin/hadoop jar ~/exp6.jar input_data output_data # Output # Our task is done, so delete the distributed files (input_data & output_data) ~/hadoop/bin/hdfs dfs -rm -r input_data output_data

25 # Also delete local output file rm -r ~/output_data # Stop haddop ~/hadoop/sbin/stop-dfs.sh jps Reference : Hadoop the definitive guide, O Reilly Publications, by Tom White

26 EL-V BE IT EXP 7 Aim: Design and develop a distributed application to find the coolest/hottest year from the available weather data. Use weather data from the Internet and process it using MapReduce. Steps: First install hadoop (if not installed yet) by, # Download dataset.zip file (attached with this post) # It contains NCDC weather data from year 1901 to year # Copy and extract dataset.zip in your home folder # Open terminal whoami # It will display your user name, we will use it later. # Open eclipse->new java project->project name exp7->new class->maxtemperaturemapper # Add following code in that class package exp7; import java.io.ioexception; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.mapper; public class MaxTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> private static final int MISSING= public void map(longwritable key,text value, Context context) throws IOException,InterruptedException String line = value.tostring(); String year = line.substring(15, 19); int airtemperature; if (line.charat(87)=='+') airtemperature = Integer.parseInt(line.substring(88, 92)); else airtemperature = Integer.parseInt(line.substring(87, 92)); String quality = line.substring(92, 93); if (airtemperature!= MISSING && quality.matches("[01459]")) context.write(new Text(year), new IntWritable(airTemperature)); # Save the file

27 # It will display some errors, so we are going to import two jar files in our project. # Copy hadoop-mapreduce-client-core jar from ~/hadoop/share/hadoop/mapreduce directory # In eclipse-> right click on exp7 project- >paste # Right click on pasted hadoop-mapreduce-client-core jar-> Buid path-> add to buid path #Copy hadoop-common jar from ~/hadoop/share/hadoop/common directory # In eclipse-> right click on exp7 project- >paste # Right click on pasted hadoop-common jar-> Buid path-> add to buid path # Right click on project exp7->new class-> MaxTemperatureReducer # Add following code in that class package exp7; import java.io.ioexception; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.reducer; public class MaxTemperatureReducer extends Reducer<Text,IntWritable, Text, public void reduce(text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException int maxvalue = Integer.MIN_VALUE; for (IntWritable value : values) maxvalue = Math.max(maxValue, value.get()); context.write(key, new IntWritable(maxValue)); # Save the file # Right click on project exp7->new class-> MaxTemperature # Add following code in that class (replace your_user_name by your own username) # hdfs port number here is 1234, replace it with your port no (if different). package exp7; import java.io.bufferedreader; import java.io.file; import java.io.filereader; import java.util.arraylist; import java.util.list; import java.util.scanner; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.fs.filestatus; import org.apache.hadoop.fs.filesystem; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;

28 public class MaxTemperature public static void main(string[] args) throws Exception if (args.length!= 2) System.err.println("Usage:MaxTemperature <input path> <output path>"); Job job = new Job(); job.setjarbyclass(maxtemperature.class); job.setjobname("max temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setmapperclass(maxtemperaturemapper.class); job.setreducerclass(maxtemperaturereducer.class); job.setoutputkeyclass(text.class); job.setoutputvalueclass(intwritable.class); job.waitforcompletion(true); Configuration conf = new Configuration(); conf.set("fs.defaultfs", "hdfs://localhost:1234/user/your_user_name/"); FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.liststatus(new Path(args[1])); //copy hdfs output file to local folder for(int i=0;i<status.length;i++) System.out.println(status[i].getPath()); fs.copytolocalfile(false, status[i].getpath(), new Path("/home/your_user_name/"+args[1])); System.out.println("\nYear\tTemperature\n"); //display contents of local file BufferedReader br = new BufferedReader(new FileReader("/home/your_user_name/"+args[1])); String line = null; while ((line = br.readline())!= null) System.out.println(line); br.close(); Scanner s = new Scanner(new File("/home/your_user_name/"+args[1])); List<Integer> temps = new ArrayList<Integer>(); List<String> years = new ArrayList<String>(); while (s.hasnext()) years.add(s.next()); temps.add(integer.parseint(s.next())); int max_temp=0,min_temp =999,i=0,j=0; String hottest_year="", coolest_year=""; for (int temp: temps) if(temp>max_temp) max_temp=temp; hottest_year=years.get(i); i++; float max_temp1=max_temp; System.out.println("Hottest Year:"+hottest_year);

29 System.out.println("\tTemperature:"+max_temp1/10+" Degree Celcius"); for (int temp: temps) if(temp<min_temp) min_temp=temp; coolest_year=years.get(j); j++; float min_temp1=min_temp ; System.out.println("Coolest Year:"+coolest_year); System.out.println("\tTemperature:"+min_temp1/10+" Degree Celcius"); s.close(); # Save the file # In eclipse->right click on project exp7-> export->java->jar file->next-> select the export destination -> /home/your_user_name/exp7.jar -> next -> next -> select main class ->browse -> MaxTemperature -> finish # exp7.jar file will be created in your home folder # Open terminal # Now Start NameNode daemon and DataNode daemon: ~/hadoop/sbin/start-dfs.sh # Make the HDFS directories required to execute MapReduce jobs (if not already done) ~/hadoop/bin/hdfs dfs -mkdir /user ~/hadoop/bin/hdfs dfs -mkdir /user/your_user_name # Put NCDC weather dataset in hdfs ~/hadoop/bin/hdfs dfs -put ~/dataset input_dataset # Perform MapReduce job ~/hadoop/bin/hadoop jar ~/exp7.jar input_dataset output_dataset # Output

30 # Stop haddop ~/hadoop/sbin/stop-dfs.sh jps Reference : Hadoop the definitive guide, O Reilly Publications, by Tom White

Steps: First install hadoop (if not installed yet) by, https://sl6it.wordpress.com/2015/12/04/1-study-and-configure-hadoop-for-big-data/

Steps: First install hadoop (if not installed yet) by, https://sl6it.wordpress.com/2015/12/04/1-study-and-configure-hadoop-for-big-data/ SL-V BE IT EXP 7 Aim: Design and develop a distributed application to find the coolest/hottest year from the available weather data. Use weather data from the Internet and process it using MapReduce. Steps:

More information

COMP4442. Service and Cloud Computing. Lab 12: MapReduce. Prof. George Baciu PQ838.

COMP4442. Service and Cloud Computing. Lab 12: MapReduce. Prof. George Baciu PQ838. COMP4442 Service and Cloud Computing Lab 12: MapReduce www.comp.polyu.edu.hk/~csgeorge/comp4442 Prof. George Baciu csgeorge@comp.polyu.edu.hk PQ838 1 Contents Introduction to MapReduce A WordCount example

More information

2. MapReduce Programming Model

2. MapReduce Programming Model Introduction MapReduce was proposed by Google in a research paper: Jeffrey Dean and Sanjay Ghemawat. MapReduce: Simplified Data Processing on Large Clusters. OSDI'04: Sixth Symposium on Operating System

More information

MapReduce & YARN Hands-on Lab Exercise 1 Simple MapReduce program in Java

MapReduce & YARN Hands-on Lab Exercise 1 Simple MapReduce program in Java MapReduce & YARN Hands-on Lab Exercise 1 Simple MapReduce program in Java Contents Page 1 Copyright IBM Corporation, 2015 US Government Users Restricted Rights - Use, duplication or disclosure restricted

More information

Big Data Analysis using Hadoop. Map-Reduce An Introduction. Lecture 2

Big Data Analysis using Hadoop. Map-Reduce An Introduction. Lecture 2 Big Data Analysis using Hadoop Map-Reduce An Introduction Lecture 2 Last Week - Recap 1 In this class Examine the Map-Reduce Framework What work each of the MR stages does Mapper Shuffle and Sort Reducer

More information

Outline Introduction Big Data Sources of Big Data Tools HDFS Installation Configuration Starting & Stopping Map Reduc.

Outline Introduction Big Data Sources of Big Data Tools HDFS Installation Configuration Starting & Stopping Map Reduc. D. Praveen Kumar Junior Research Fellow Department of Computer Science & Engineering Indian Institute of Technology (Indian School of Mines) Dhanbad, Jharkhand, India Head of IT & ITES, Skill Subsist Impels

More information

Department of Computer Science University of Cyprus EPL646 Advanced Topics in Databases. Lecture 16. Big Data Management VI (MapReduce Programming)

Department of Computer Science University of Cyprus EPL646 Advanced Topics in Databases. Lecture 16. Big Data Management VI (MapReduce Programming) Department of Computer Science University of Cyprus EPL646 Advanced Topics in Databases Lecture 16 Big Data Management VI (MapReduce Programming) Credits: Pietro Michiardi (Eurecom): Scalable Algorithm

More information

UNIT V PROCESSING YOUR DATA WITH MAPREDUCE Syllabus

UNIT V PROCESSING YOUR DATA WITH MAPREDUCE Syllabus UNIT V PROCESSING YOUR DATA WITH MAPREDUCE Syllabus Getting to know MapReduce MapReduce Execution Pipeline Runtime Coordination and Task Management MapReduce Application Hadoop Word Count Implementation.

More information

Java in MapReduce. Scope

Java in MapReduce. Scope Java in MapReduce Kevin Swingler Scope A specific look at the Java code you might use for performing MapReduce in Hadoop Java program recap The map method The reduce method The whole program Running on

More information

Session 1 Big Data and Hadoop - Overview. - Dr. M. R. Sanghavi

Session 1 Big Data and Hadoop - Overview. - Dr. M. R. Sanghavi Session 1 Big Data and Hadoop - Overview - Dr. M. R. Sanghavi Acknowledgement Prof. Kainjan M. Sanghavi For preparing this prsentation This presentation is available on my blog https://maheshsanghavi.wordpress.com/expert-talk-fdp-workshop/

More information

Guidelines For Hadoop and Spark Cluster Usage

Guidelines For Hadoop and Spark Cluster Usage Guidelines For Hadoop and Spark Cluster Usage Procedure to create an account in CSX. If you are taking a CS prefix course, you already have an account; to get an initial password created: 1. Login to https://cs.okstate.edu/pwreset

More information

Parallel Data Processing with Hadoop/MapReduce. CS140 Tao Yang, 2014

Parallel Data Processing with Hadoop/MapReduce. CS140 Tao Yang, 2014 Parallel Data Processing with Hadoop/MapReduce CS140 Tao Yang, 2014 Overview What is MapReduce? Example with word counting Parallel data processing with MapReduce Hadoop file system More application example

More information

Big Data: Architectures and Data Analytics

Big Data: Architectures and Data Analytics Big Data: Architectures and Data Analytics July 14, 2017 Student ID First Name Last Name The exam is open book and lasts 2 hours. Part I Answer to the following questions. There is only one right answer

More information

CS455: Introduction to Distributed Systems [Spring 2018] Dept. Of Computer Science, Colorado State University

CS455: Introduction to Distributed Systems [Spring 2018] Dept. Of Computer Science, Colorado State University CS 455: INTRODUCTION TO DISTRIBUTED SYSTEMS [MAPREDUCE & HADOOP] Does Shrideep write the poems on these title slides? Yes, he does. These musing are resolutely on track For obscurity shores, from whence

More information

03 Remote invocation. Request-reply RPC. Coulouris 5 Birrel_Nelson_84.pdf RMI

03 Remote invocation. Request-reply RPC. Coulouris 5 Birrel_Nelson_84.pdf RMI 03 Remote invocation Request-reply RPC Coulouris 5 Birrel_Nelson_84.pdf RMI 2/16 Remote Procedure Call Implementation client process Request server process client program client stub procedure Communication

More information

Experiences with a new Hadoop cluster: deployment, teaching and research. Andre Barczak February 2018

Experiences with a new Hadoop cluster: deployment, teaching and research. Andre Barczak February 2018 Experiences with a new Hadoop cluster: deployment, teaching and research Andre Barczak February 2018 abstract In 2017 the Machine Learning research group got funding for a new Hadoop cluster. However,

More information

The core source code of the edge detection of the Otsu-Canny operator in the Hadoop

The core source code of the edge detection of the Otsu-Canny operator in the Hadoop Attachment: The core source code of the edge detection of the Otsu-Canny operator in the Hadoop platform (ImageCanny.java) //Map task is as follows. package bishe; import java.io.ioexception; import org.apache.hadoop.fs.path;

More information

Cloud Programming on Java EE Platforms. mgr inż. Piotr Nowak

Cloud Programming on Java EE Platforms. mgr inż. Piotr Nowak Cloud Programming on Java EE Platforms mgr inż. Piotr Nowak dsh distributed shell commands execution -c concurrent --show-machine-names -M --group cluster -g cluster /etc/dsh/groups/cluster needs passwordless

More information

Chapter 3. Distributed Algorithms based on MapReduce

Chapter 3. Distributed Algorithms based on MapReduce Chapter 3 Distributed Algorithms based on MapReduce 1 Acknowledgements Hadoop: The Definitive Guide. Tome White. O Reilly. Hadoop in Action. Chuck Lam, Manning Publications. MapReduce: Simplified Data

More information

Introduction to Map/Reduce. Kostas Solomos Computer Science Department University of Crete, Greece

Introduction to Map/Reduce. Kostas Solomos Computer Science Department University of Crete, Greece Introduction to Map/Reduce Kostas Solomos Computer Science Department University of Crete, Greece What we will cover What is MapReduce? How does it work? A simple word count example (the Hello World! of

More information

MapReduce and Hadoop. The reference Big Data stack

MapReduce and Hadoop. The reference Big Data stack Università degli Studi di Roma Tor Vergata Dipartimento di Ingegneria Civile e Ingegneria Informatica MapReduce and Hadoop Corso di Sistemi e Architetture per Big Data A.A. 2017/18 Valeria Cardellini The

More information

Table of Contents. Chapter Topics Page No. 1 Meet Hadoop

Table of Contents. Chapter Topics Page No. 1 Meet Hadoop Table of Contents Chapter Topics Page No 1 Meet Hadoop - - - - - - - - - - - - - - - - - - - - - - - - - - - 3 2 MapReduce - - - - - - - - - - - - - - - - - - - - - - - - - - - - 10 3 The Hadoop Distributed

More information

Hadoop 2.8 Configuration and First Examples

Hadoop 2.8 Configuration and First Examples Hadoop 2.8 Configuration and First Examples Big Data - 29/03/2017 Apache Hadoop & YARN Apache Hadoop (1.X) De facto Big Data open source platform Running for about 5 years in production at hundreds of

More information

MRUnit testing framework is based on JUnit and it can test Map Reduce programs written on 0.20, 0.23.x, 1.0.x, 2.x version of Hadoop.

MRUnit testing framework is based on JUnit and it can test Map Reduce programs written on 0.20, 0.23.x, 1.0.x, 2.x version of Hadoop. MRUnit Tutorial Setup development environment 1. Download the latest version of MRUnit jar from Apache website: https://repository.apache.org/content/repositories/releases/org/apache/ mrunit/mrunit/. For

More information

MapReduce Simplified Data Processing on Large Clusters

MapReduce Simplified Data Processing on Large Clusters MapReduce Simplified Data Processing on Large Clusters Amir H. Payberah amir@sics.se Amirkabir University of Technology (Tehran Polytechnic) Amir H. Payberah (Tehran Polytechnic) MapReduce 1393/8/5 1 /

More information

Hadoop 3 Configuration and First Examples

Hadoop 3 Configuration and First Examples Hadoop 3 Configuration and First Examples Big Data - 26/03/2018 Apache Hadoop & YARN Apache Hadoop (1.X) De facto Big Data open source platform Running for about 5 years in production at hundreds of companies

More information

FINAL PROJECT REPORT

FINAL PROJECT REPORT FINAL PROJECT REPORT NYC TAXI DATA ANALYSIS Reshmi Padavala 001619645 Project Summary: For my final project, I decided to showcase my big data analysis skills by working on a large amount of dataset. On

More information

Hadoop 3.X more examples

Hadoop 3.X more examples Hadoop 3.X more examples Big Data - 09/04/2018 Let s start with some examples! http://www.dia.uniroma3.it/~dvr/es2_material.zip Example: LastFM Listeners per Track Consider the following log file UserId

More information

Big Data Analytics: Insights and Innovations

Big Data Analytics: Insights and Innovations International Journal of Engineering Research and Development e-issn: 2278-067X, p-issn: 2278-800X, www.ijerd.com Volume 6, Issue 10 (April 2013), PP. 60-65 Big Data Analytics: Insights and Innovations

More information

A Guide to Running Map Reduce Jobs in Java University of Stirling, Computing Science

A Guide to Running Map Reduce Jobs in Java University of Stirling, Computing Science A Guide to Running Map Reduce Jobs in Java University of Stirling, Computing Science Introduction The Hadoop cluster in Computing Science at Stirling allows users with a valid user account to submit and

More information

Large-scale Information Processing

Large-scale Information Processing Sommer 2013 Large-scale Information Processing Ulf Brefeld Knowledge Mining & Assessment brefeld@kma.informatik.tu-darmstadt.de Anecdotal evidence... I think there is a world market for about five computers,

More information

Recommended Literature

Recommended Literature COSC 6397 Big Data Analytics Introduction to Map Reduce (I) Edgar Gabriel Spring 2017 Recommended Literature Original MapReduce paper by google http://research.google.com/archive/mapreduce-osdi04.pdf Fantastic

More information

Big Data: Architectures and Data Analytics

Big Data: Architectures and Data Analytics Big Data: Architectures and Data Analytics June 26, 2018 Student ID First Name Last Name The exam is open book and lasts 2 hours. Part I Answer to the following questions. There is only one right answer

More information

Big Data: Architectures and Data Analytics

Big Data: Architectures and Data Analytics Big Data: Architectures and Data Analytics June 26, 2018 Student ID First Name Last Name The exam is open book and lasts 2 hours. Part I Answer to the following questions. There is only one right answer

More information

Hadoop 2.X on a cluster environment

Hadoop 2.X on a cluster environment Hadoop 2.X on a cluster environment Big Data - 05/04/2017 Hadoop 2 on AMAZON Hadoop 2 on AMAZON Hadoop 2 on AMAZON Regions Hadoop 2 on AMAZON S3 and buckets Hadoop 2 on AMAZON S3 and buckets Hadoop 2 on

More information

MAPREDUCE - PARTITIONER

MAPREDUCE - PARTITIONER MAPREDUCE - PARTITIONER http://www.tutorialspoint.com/map_reduce/map_reduce_partitioner.htm Copyright tutorialspoint.com A partitioner works like a condition in processing an input dataset. The partition

More information

Big Data: Architectures and Data Analytics

Big Data: Architectures and Data Analytics Big Data: Architectures and Data Analytics January 22, 2018 Student ID First Name Last Name The exam is open book and lasts 2 hours. Part I Answer to the following questions. There is only one right answer

More information

CSE 660 Lab 2 Khoi Pham Thanh Ho April 27 th, 2015

CSE 660 Lab 2 Khoi Pham Thanh Ho April 27 th, 2015 CSE 660 Lab 2 Khoi Pham Thanh Ho April 27 th, 2015 Comment and Evaluation: This lab focuses on two ways to call a function from Client to Server: Remote Procedure Call (RPC) is basic method in C for Server

More information

Semantics with Failures

Semantics with Failures Semantics with Failures If map and reduce are deterministic, then output identical to non-faulting sequential execution For non-deterministic operators, different reduce tasks might see output of different

More information

Hadoop Integration Guide

Hadoop Integration Guide HP Vertica Analytic Database Software Version: 7.0.x Document Release Date: 4/7/2016 Legal Notices Warranty The only warranties for HP products and services are set forth in the express warranty statements

More information

IJESRT. http: //

IJESRT. http: // IJESRT [Monika,1(4): Jun., 2012] INTERNATIONAL JOURNAL OF ENGINEERING SCIENCES & RESEARCH TECHNOLOGY Innovative Techniquee of Message Passing In Loosely Coupled System Monika Arya* Department of Computer

More information

Hadoop Integration Guide

Hadoop Integration Guide HP Vertica Analytic Database Software Version: 7.0.x Document Release Date: 5/2/2018 Legal Notices Warranty The only warranties for Micro Focus products and services are set forth in the express warranty

More information

Parallel Processing - MapReduce and FlumeJava. Amir H. Payberah 14/09/2018

Parallel Processing - MapReduce and FlumeJava. Amir H. Payberah 14/09/2018 Parallel Processing - MapReduce and FlumeJava Amir H. Payberah payberah@kth.se 14/09/2018 The Course Web Page https://id2221kth.github.io 1 / 83 Where Are We? 2 / 83 What do we do when there is too much

More information

Attacking & Protecting Big Data Environments

Attacking & Protecting Big Data Environments Attacking & Protecting Big Data Environments Birk Kauer & Matthias Luft {bkauer, mluft}@ernw.de #WhoAreWe Birk Kauer - Security Researcher @ERNW - Mainly Exploit Developer Matthias Luft - Security Researcher

More information

Big Data Analytics CP3620

Big Data Analytics CP3620 Big Data Analytics CP3620 Big Data Some facts: 2.7 Zettabytes (2.7 billion TB) of data exists in the digital universe and it s growing. Facebook stores, accesses, and analyzes 30+ Petabytes (1000 TB) of

More information

Recommended Literature

Recommended Literature COSC 6339 Big Data Analytics Introduction to Map Reduce (I) Edgar Gabriel Fall 2018 Recommended Literature Original MapReduce paper by google http://research.google.com/archive/mapreduce-osdi04.pdf Fantastic

More information

Map-Reduce for Parallel Computing

Map-Reduce for Parallel Computing Map-Reduce for Parallel Computing Amit Jain Department of Computer Science College of Engineering Boise State University Big Data, Big Disks, Cheap Computers In pioneer days they used oxen for heavy pulling,

More information

An Introduction to Apache Spark

An Introduction to Apache Spark An Introduction to Apache Spark Amir H. Payberah amir@sics.se SICS Swedish ICT Amir H. Payberah (SICS) Apache Spark Feb. 2, 2016 1 / 67 Big Data small data big data Amir H. Payberah (SICS) Apache Spark

More information

Ghislain Fourny. Big Data 6. Massive Parallel Processing (MapReduce)

Ghislain Fourny. Big Data 6. Massive Parallel Processing (MapReduce) Ghislain Fourny Big Data 6. Massive Parallel Processing (MapReduce) So far, we have... Storage as file system (HDFS) 13 So far, we have... Storage as tables (HBase) Storage as file system (HDFS) 14 Data

More information

LAMPIRAN. public static void runaniteration (String datafile, String clusterfile) {

LAMPIRAN. public static void runaniteration (String datafile, String clusterfile) { DAFTAR PUSTAKA [1] Mishra Shweta, Badhe Vivek. (2016), Improved Map Reduce K Means Clustering Algorithm for Hadoop Architectur, International Journal Of Engineering and Computer Science, 2016, IJECS. [2]

More information

ECE5610/CSC6220 Introduction to Parallel and Distribution Computing. Lecture 6: MapReduce in Parallel Computing

ECE5610/CSC6220 Introduction to Parallel and Distribution Computing. Lecture 6: MapReduce in Parallel Computing ECE5610/CSC6220 Introduction to Parallel and Distribution Computing Lecture 6: MapReduce in Parallel Computing 1 MapReduce: Simplified Data Processing Motivation Large-Scale Data Processing on Large Clusters

More information

Ghislain Fourny. Big Data Fall Massive Parallel Processing (MapReduce)

Ghislain Fourny. Big Data Fall Massive Parallel Processing (MapReduce) Ghislain Fourny Big Data Fall 2018 6. Massive Parallel Processing (MapReduce) Let's begin with a field experiment 2 400+ Pokemons, 10 different 3 How many of each??????????? 4 400 distributed to many volunteers

More information

Chapter 4: Processes. Process Concept. Process State

Chapter 4: Processes. Process Concept. Process State Chapter 4: Processes Process Concept Process Scheduling Operations on Processes Cooperating Processes Interprocess Communication Communication in Client-Server Systems 4.1 Process Concept An operating

More information

PIGFARM - LAS Sponsored Computer Science Senior Design Class Project Spring Carson Cumbee - LAS

PIGFARM - LAS Sponsored Computer Science Senior Design Class Project Spring Carson Cumbee - LAS PIGFARM - LAS Sponsored Computer Science Senior Design Class Project Spring 2017 Carson Cumbee - LAS What is Big Data? Big Data is data that is too large to fit into a single server. It necessitates the

More information

Lesson 1. MPI runs on distributed memory systems, shared memory systems, or hybrid systems.

Lesson 1. MPI runs on distributed memory systems, shared memory systems, or hybrid systems. The goals of this lesson are: understanding the MPI programming model managing the MPI environment handling errors point-to-point communication 1. The MPI Environment Lesson 1 MPI (Message Passing Interface)

More information

Parallel Computing. Prof. Marco Bertini

Parallel Computing. Prof. Marco Bertini Parallel Computing Prof. Marco Bertini Apache Hadoop Chaining jobs Chaining MapReduce jobs Many complex tasks need to be broken down into simpler subtasks, each accomplished by an individual MapReduce

More information

package p1; public class Derivation extends Protection { public Derivation() { System.out.println("Derived class constructor");

package p1; public class Derivation extends Protection { public Derivation() { System.out.println(Derived class constructor); PROGRAM:1 WAP to implement the packages //package 1: package p1; public class Protection int n=1; public int n_pub=2; private int n_pri=3; protected int n_pro=4; public Protection () System.out.println("Base

More information

Java Cookbook. Java Action specification. $ java -Xms512m a.b.c.mymainclass arg1 arg2

Java Cookbook. Java Action specification. $ java -Xms512m a.b.c.mymainclass arg1 arg2 Java Cookbook This document comprehensively describes the procedure of running Java code using Oozie. Its targeted audience is all forms of users who will install, use and operate Oozie. Java Action specification

More information

Message Passing Interface

Message Passing Interface MPSoC Architectures MPI Alberto Bosio, Associate Professor UM Microelectronic Departement bosio@lirmm.fr Message Passing Interface API for distributed-memory programming parallel code that runs across

More information

Clustering Documents. Document Retrieval. Case Study 2: Document Retrieval

Clustering Documents. Document Retrieval. Case Study 2: Document Retrieval Case Study 2: Document Retrieval Clustering Documents Machine Learning for Big Data CSE547/STAT548, University of Washington Sham Kakade April, 2017 Sham Kakade 2017 1 Document Retrieval n Goal: Retrieve

More information

ITCS 4145/5145 Assignment 2

ITCS 4145/5145 Assignment 2 ITCS 4145/5145 Assignment 2 Compiling and running MPI programs Author: B. Wilkinson and Clayton S. Ferner. Modification date: September 10, 2012 In this assignment, the workpool computations done in Assignment

More information

Clustering Documents. Case Study 2: Document Retrieval

Clustering Documents. Case Study 2: Document Retrieval Case Study 2: Document Retrieval Clustering Documents Machine Learning for Big Data CSE547/STAT548, University of Washington Sham Kakade April 21 th, 2015 Sham Kakade 2016 1 Document Retrieval Goal: Retrieve

More information

Compile and Run WordCount via Command Line

Compile and Run WordCount via Command Line Aims This exercise aims to get you to: Compile, run, and debug MapReduce tasks via Command Line Compile, run, and debug MapReduce tasks via Eclipse One Tip on Hadoop File System Shell Following are the

More information

Introduction to the Message Passing Interface (MPI)

Introduction to the Message Passing Interface (MPI) Introduction to the Message Passing Interface (MPI) CPS343 Parallel and High Performance Computing Spring 2018 CPS343 (Parallel and HPC) Introduction to the Message Passing Interface (MPI) Spring 2018

More information

Data-Intensive Computing with MapReduce

Data-Intensive Computing with MapReduce Data-Intensive Computing with MapReduce Session 2: Hadoop Nuts and Bolts Jimmy Lin University of Maryland Thursday, January 31, 2013 This work is licensed under a Creative Commons Attribution-Noncommercial-Share

More information

Hadoop Cluster Implementation

Hadoop Cluster Implementation Hadoop Cluster Implementation By Aysha Binta Sayed ID:2013-1-60-068 Supervised By Dr. Md. Shamim Akhter Assistant Professor Department of Computer Science and Engineering East West University A project

More information

Java & Inheritance. Inheritance - Scenario

Java & Inheritance. Inheritance - Scenario Java & Inheritance ITNPBD7 Cluster Computing David Cairns Inheritance - Scenario Inheritance is a core feature of Object Oriented languages. A class hierarchy can be defined where the class at the top

More information

Tutorial 2: MPI. CS486 - Principles of Distributed Computing Papageorgiou Spyros

Tutorial 2: MPI. CS486 - Principles of Distributed Computing Papageorgiou Spyros Tutorial 2: MPI CS486 - Principles of Distributed Computing Papageorgiou Spyros What is MPI? An Interface Specification MPI = Message Passing Interface Provides a standard -> various implementations Offers

More information

Topics covered in this lecture

Topics covered in this lecture 9/5/2018 CS435 Introduction to Big Data - FALL 2018 W3.B.0 CS435 Introduction to Big Data 9/5/2018 CS435 Introduction to Big Data - FALL 2018 W3.B.1 FAQs How does Hadoop mapreduce run the map instance?

More information

Assignment 3 MPI Tutorial Compiling and Executing MPI programs

Assignment 3 MPI Tutorial Compiling and Executing MPI programs Assignment 3 MPI Tutorial Compiling and Executing MPI programs B. Wilkinson: Modification date: February 11, 2016. This assignment is a tutorial to learn how to execute MPI programs and explore their characteristics.

More information

Simple examples how to run MPI program via PBS on Taurus HPC

Simple examples how to run MPI program via PBS on Taurus HPC Simple examples how to run MPI program via PBS on Taurus HPC MPI setup There's a number of MPI implementations install on the cluster. You can list them all issuing the following command: module avail/load/list/unload

More information

Holland Computing Center Kickstart MPI Intro

Holland Computing Center Kickstart MPI Intro Holland Computing Center Kickstart 2016 MPI Intro Message Passing Interface (MPI) MPI is a specification for message passing library that is standardized by MPI Forum Multiple vendor-specific implementations:

More information

Map-Reduce Applications: Counting, Graph Shortest Paths

Map-Reduce Applications: Counting, Graph Shortest Paths Map-Reduce Applications: Counting, Graph Shortest Paths Adapted from UMD Jimmy Lin s slides, which is licensed under a Creative Commons Attribution-Noncommercial-Share Alike 3.0 United States. See http://creativecommons.org/licenses/by-nc-sa/3.0/us/

More information

Parallel Programming Using MPI

Parallel Programming Using MPI Parallel Programming Using MPI Prof. Hank Dietz KAOS Seminar, February 8, 2012 University of Kentucky Electrical & Computer Engineering Parallel Processing Process N pieces simultaneously, get up to a

More information

public static void main(string[] args) throws IOException { sock = new Socket(args[0], Integer.parseInt(args[1]));

public static void main(string[] args) throws IOException { sock = new Socket(args[0], Integer.parseInt(args[1])); Echo Client&Server Application EchoClient import java.net.*; import java.io.*; class EchoClient public static void main(string[] args) throws IOException if (args.length < 2) number>"); System.err.println("Usage:

More information

Distributed Systems. 3. Access to the Transport Layer. Werner Nutt

Distributed Systems. 3. Access to the Transport Layer. Werner Nutt Distributed Systems 3. Access to the Transport Layer Werner Nutt 1 Access to the Transport Layer Processes issue requests to the transport layer (i.e., the application takes the initiative, not the transport

More information

Computer Science 572 Exam Prof. Horowitz Tuesday, April 24, 2017, 8:00am 9:00am

Computer Science 572 Exam Prof. Horowitz Tuesday, April 24, 2017, 8:00am 9:00am Computer Science 572 Exam Prof. Horowitz Tuesday, April 24, 2017, 8:00am 9:00am Name: Student Id Number: 1. This is a closed book exam. 2. Please answer all questions. 3. There are a total of 40 questions.

More information

1/30/2019 Week 2- B Sangmi Lee Pallickara

1/30/2019 Week 2- B Sangmi Lee Pallickara Week 2-A-0 1/30/2019 Colorado State University, Spring 2019 Week 2-A-1 CS535 BIG DATA FAQs PART A. BIG DATA TECHNOLOGY 3. DISTRIBUTED COMPUTING MODELS FOR SCALABLE BATCH COMPUTING Term project deliverable

More information

COSC 6397 Big Data Analytics. Distributed File Systems (II) Edgar Gabriel Spring HDFS Basics

COSC 6397 Big Data Analytics. Distributed File Systems (II) Edgar Gabriel Spring HDFS Basics COSC 6397 Big Data Analytics Distributed File Systems (II) Edgar Gabriel Spring 2017 HDFS Basics An open-source implementation of Google File System Assume that node failure rate is high Assumes a small

More information

Timing ListOperations

Timing ListOperations Timing ListOperations Michael Brockway November 13, 2017 These slides are to give you a quick start with timing operations in Java and with making sensible use of the command-line. Java on a command-line

More information

COMP 213. Advanced Object-oriented Programming. Lecture 20. Network Programming

COMP 213. Advanced Object-oriented Programming. Lecture 20. Network Programming COMP 213 Advanced Object-oriented Programming Lecture 20 Network Programming Network Programming A network consists of several computers connected so that data can be sent from one to another. Network

More information

EE657 Spring 2012 HW#4 Zhou Zhao

EE657 Spring 2012 HW#4 Zhou Zhao EE657 Spring 2012 HW#4 Zhou Zhao Problem 6.3 Solution Referencing the sample application of SimpleDB in Amazon Java SDK, a simple domain which includes 5 items is prepared in the code. For instance, the

More information

mith College Computer Science CSC352 Week #7 Spring 2017 Introduction to MPI Dominique Thiébaut

mith College Computer Science CSC352 Week #7 Spring 2017 Introduction to MPI Dominique Thiébaut mith College CSC352 Week #7 Spring 2017 Introduction to MPI Dominique Thiébaut dthiebaut@smith.edu Introduction to MPI D. Thiebaut Inspiration Reference MPI by Blaise Barney, Lawrence Livermore National

More information

Transport layer protocols. Lecture 15: Operating Systems and Networks Behzad Bordbar

Transport layer protocols. Lecture 15: Operating Systems and Networks Behzad Bordbar Transport layer protocols Lecture 15: Operating Systems and Networks Behzad Bordbar 78 Interprocess communication Synchronous and asynchronous comm. Message destination Reliability Ordering Client Server

More information

Lecture 8 Chapter 4: Inter-process Communications

Lecture 8 Chapter 4: Inter-process Communications Organizational Communications and Distributed Object Technologies Lecture 8 Chapter 4: Inter-process Communications 1 Middleware layers Applications, services RMI and RPC This chapter request-reply protocol

More information

Chapter 4: Processes

Chapter 4: Processes Chapter 4: Processes Process Concept Process Scheduling Operations on Processes Cooperating Processes Interprocess Communication Communication in Client-Server Systems 4.1 Process Concept An operating

More information

Process Concept: views of a process Process Scheduling CSCI 6730/ 4730 Operating Systems

Process Concept: views of a process Process Scheduling CSCI 6730/ 4730 Operating Systems Chapter 3: Processes: Outline Process Concept: views of a process Process Scheduling CSCI 6730/ 4730 Operating Systems Operations on Processes Cooperating Processes Inter Process Communication (IPC) RPC:

More information

High Performance Computing Course Notes Message Passing Programming I

High Performance Computing Course Notes Message Passing Programming I High Performance Computing Course Notes 2008-2009 2009 Message Passing Programming I Message Passing Programming Message Passing is the most widely used parallel programming model Message passing works

More information

Map Reduce. MCSN - N. Tonellotto - Distributed Enabling Platforms

Map Reduce. MCSN - N. Tonellotto - Distributed Enabling Platforms Map Reduce 1 MapReduce inside Google Googlers' hammer for 80% of our data crunching Large-scale web search indexing Clustering problems for Google News Produce reports for popular queries, e.g. Google

More information

PCAP Assignment I. 1. A. Why is there a large performance gap between many-core GPUs and generalpurpose multicore CPUs. Discuss in detail.

PCAP Assignment I. 1. A. Why is there a large performance gap between many-core GPUs and generalpurpose multicore CPUs. Discuss in detail. PCAP Assignment I 1. A. Why is there a large performance gap between many-core GPUs and generalpurpose multicore CPUs. Discuss in detail. The multicore CPUs are designed to maximize the execution speed

More information

Networking Code CSCI 201 Principles of Software Development

Networking Code CSCI 201 Principles of Software Development Networking Code CSCI 201 Principles of Software Development Jeffrey Miller, Ph.D. jeffrey.miller@usc.edu Server Networking Client Networking Program Outline USC CSCI 201L Server Software A server application

More information

Lecture 4: Exceptions. I/O

Lecture 4: Exceptions. I/O Lecture 4: Exceptions. I/O Outline Access control. Class scope Exceptions I/O public class Malicious { public static void main(string[] args) { maliciousmethod(new CreditCard()); } static void maliciousmethod(creditcard

More information

Week 12. Streams and File I/O. Overview of Streams and File I/O Text File I/O

Week 12. Streams and File I/O. Overview of Streams and File I/O Text File I/O Week 12 Streams and File I/O Overview of Streams and File I/O Text File I/O 1 I/O Overview I/O = Input/Output In this context it is input to and output from programs Input can be from keyboard or a file

More information

QUERY OPTIMIZATION IN BIG DATA USING HADOOP, HIVE AND NEO4J

QUERY OPTIMIZATION IN BIG DATA USING HADOOP, HIVE AND NEO4J QUERY OPTIMIZATION IN BIG DATA USING HADOOP, HIVE AND NEO4J SUMMER INTERNSHIP PROJECT REPORT Submitted by M. ARUN(2016103010) S. BEN STEWART(2016103513) P. SANJAY(2016103580) COLLEGE OF ENGINEERING, GUINDY

More information

Implementing Algorithmic Skeletons over Hadoop

Implementing Algorithmic Skeletons over Hadoop Implementing Algorithmic Skeletons over Hadoop Dimitrios Mouzopoulos E H U N I V E R S I T Y T O H F R G E D I N B U Master of Science Computer Science School of Informatics University of Edinburgh 2011

More information

Internet and Intranet Applications and Protocols Examples of Bad SMTP Code Prof. Arthur P. Goldberg Spring, 2004

Internet and Intranet Applications and Protocols Examples of Bad SMTP Code Prof. Arthur P. Goldberg Spring, 2004 Internet and Intranet Applications and Protocols Examples of Bad SMTP Code Prof. Arthur P. Goldberg Spring, 00 Summary I show some examples of bad code and discuss how they fail to meet the Software Quality

More information

Using Big Data for the analysis of historic context information

Using Big Data for the analysis of historic context information 0 Using Big Data for the analysis of historic context information Francisco Romero Bueno Technological Specialist. FIWARE data engineer francisco.romerobueno@telefonica.com Big Data: What is it and how

More information

Shell Interface Assignment

Shell Interface Assignment Page 1 of 9 Shell Interface Assignment Creating a Shell Interface Using Java This assignment consists of modifying a Java program so that it serves as a shell interface that accepts user commands and then

More information

What is Hadoop? Hadoop is an ecosystem of tools for processing Big Data. Hadoop is an open source project.

What is Hadoop? Hadoop is an ecosystem of tools for processing Big Data. Hadoop is an open source project. Back to Hadoop 1 What is Hadoop? Hadoop is an ecosystem of tools for processing Big Data. Hadoop is an open source project. 2 A family of tools MapReduce HDFS HBase Hive Pig ZooKeeper Avro Sqoop Oozie

More information

Unit 1 Java Networking

Unit 1 Java Networking Q1. What is Server Socket? Discuss the difference between the Socket and ServerSocket class. The ServerSocket class (java.net) can be used to create a server socket. This object is used to establish communication

More information