int a[100]; #pragma xmp nodes p[*] #pragma xmp template t[100] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i]

Size: px
Start display at page:

Download "int a[100]; #pragma xmp nodes p[*] #pragma xmp template t[100] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i]"

Transcription

1

2 2

3 3

4 4

5 int a[100]; #pragma xmp nodes p[*] #pragma xmp template t[100] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i] integer :: a(100)!$xmp nodes p(*)!$xmp template t(100)!$xmp distribute t(block) onto p!$xmp align a(i) with t(i) #pragma xmp loop on t[i] reduction(+:res) for(int i=0;i<100;i++){ a[i] = i; res += a[i]; }!$xmp loop on t(i) reduction(+:res) do i=0, 100 a(i) = i res = res + a(i) end do 5

6 6

7 node1 node2 node3 Directive or Coarray 7

8 8

9 int a[100], b[100]; #pragma xmp nodes p[*] #pragma xmp template t[100] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i] integer :: a(100), b(100)!$xmp nodes p(*)!$xmp template t(100)!$xmp distribute t(block) onto p!$xmp align a(i) with t(i) 9

10 10

11 11

12 12

13 p(1) p(2) p(3) p(4) Directive or Coarray 13

14 14

15 alignment distribution 15

16 16

17 17

18 18

19 19

20 20

21 21

22 22

23 23

24 24

25 alignment distribution node, template, align. distribute directives are used 25

26 alignment distribution node, template, align. distribute directives are used 26

27 27

28 28

29 #pragma xmp loop on t(i) for(int i=0;i<16;i++){ a[16] #pragma xmp nodes p[4] #pragma xmp template t[16] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i] p[0] p[1] Execute do-loop in parallel with affinity to template p[2] p[3] 29

30 !$xmp loop on t(i) do i=1, 16 a(16) p(1) p(2) p(3)!$xmp nodes p(4)!$xmp template t(16)!$xmp distribute t(block) onto p!$xmp align a(i) with t(i) Execute do-loop in parallel with affinity to template p(4) 30

31 #pragma xmp loop on t(i) for(int i=2;i<11;i++){ a[16] p[0] p[1] p[2] #pragma xmp nodes p[4] #pragma xmp template t[16] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i] Execute do-loop in parallel with affinity to template p[3] 31

32 !$xmp loop on t(i) do i=3, 11 a(16) p(1) p(2) p(3)!$xmp nodes p(4)!$xmp template t(16)!$xmp distribute t(block) onto p!$xmp align a(i) with t(i) Execute do-loop in parallel with affinity to template p(4) 32

33 33

34 The variables s on all nodes are summed up and updated to the value when ending the loop-statement. 34

35 35

36 36

37 37 node-name base length node-name( base end )

38 38

39 39

40 40

41 41 #pragma xmp gmove a[2:4] = b[3:4]; array-name base length a[8] b[8] p[0] p[1] p[2] p[3]

42 42!$xmp gmove a(3:6) = b(4:7) array-name( base ) a(8) b(8) p(1) p(2) p(3) p(4)

43 43 #pragma xmp nodes p[3] #pragma xmp template t[9] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i] #pragma xmp shadow a[1:1]... #pragma xmp reflect (a)!$xmp nodes p(3)!$xmp template t(9)!$xmp distribute t(block) onto p!$xmp align a(i) with t(i)!$xmp shadow a(1:1)...!$xmp reflect (a) The shadow directive creates a shadow area (gray cell) at the upper and lower bounds of array a[].

44 44 #pragma xmp nodes p[3] #pragma xmp template t[9] #pragma xmp distribute t[block] onto p #pragma xmp align a[i] with t[i] #pragma xmp shadow a[1:1]... #pragma xmp reflect (a)!$xmp nodes p(3)!$xmp template t(9)!$xmp distribute t(block) onto p!$xmp align a(i) with t(i)!$xmp shadow a(1:1)...!$xmp reflect (a) The reflect directive synchronizes the shadow area. The directive generates communication between adjacent nodes.

45 45 #pragma xmp loop on t[i] for(int i=1;i<9;i++){ b[i] = a[i-1] + a[i] + a[i+1]; }!$xmp loop on t(i) do i = 2, 8 b(i) = a(i-1) + a(i) + a(i+1) end do

46 46 #pragma xmp shadow a[1:1]... #pragma xmp reflect (a) #pragma xmp loop on t[i] for(int i=1;i<9;i++){ b[i] = a[i-1] + a[i] + a[i+1]; }!$xmp shadow a(1:1)...!$xmp reflect (a)!$xmp loop on t(i) do i = 2, 8 b(i) = a(i-1) + a(i) + a(i+1) end do

47 47 #pragma xmp shadow a[1:1][1:1]!$xmp shadow a(1:1,1:1) #pragma xmp reflect (a)!$xmp reflect (a)

48 48

49 49 real a(8) real b(8)[*] if(this_image() == 1) then b(6)[3] = b(2) a(4) = b(3)[2] end if sync all

50 50 double b[8]:[*]; b[6]:[3] = b[2]; a[4] = b[3]:[2]; void xmp_sync_all(int *status); double a[8]; double b[8]:[*]; real a(8) real b(8)[*] if(xmpc_this_image() == 1){ b[6]:[3] = b[2]; a[4] = b[3]:[2]; } if(this_image() == 1) then b(6)[3] = b(2) a(4) = b(3)[2] end if xmpc_sync_all(null); sync all

51 array-name baselengthstep if(xmpc_this_image() == 1){ a[10:5]:[3] = b[0:5]; a[10:5:2]:[3] = b[0:5:2]; a[:]:[3] = b[:]; } 51

52 array-name baselaststep if(this_image() == 1) then a(10:14)[3] = b(1:5) a(10:18:2)[3] = b(1:9:2) a(:)[3] = b(:) end if 52

53 53

54 Omni Compiler Support XMP, OpenACC, XcalableACC Developed by RIKEN AICS and University of Tsukuba, Japan Open Source Software on GitHub Source-to-source Compiler The latest version is available at 54

55 55

56 56

57 57

58 $ emacs hello.c $ emacs hello.f90 #include <stdio.h> #include <xmp.h> #pragma xmp nodes p[*] int main(){ printf("hello World on node %d\n", xmpc_node_num()); return 0; } program hello!$xmp nodes p(*) write(*,*) "Hello World on node ", xmp_node_num() end program $ xmpcc hello.c -o hello $ mpirun -np 2./hello $ xmpf90 hello.f90 -o hello $ mpirun -np 2./hello 58

Masahiro Nakao, Hitoshi Murai, Takenori Shimosaka, Mitsuhisa Sato

Masahiro Nakao, Hitoshi Murai, Takenori Shimosaka, Mitsuhisa Sato Masahiro Nakao, Hitoshi Murai, Takenori Shimosaka, Mitsuhisa Sato Center for Computational Sciences, University of Tsukuba, Japan RIKEN Advanced Institute for Computational Science, Japan 2 XMP/C int array[16];

More information

Omni Compiler and XcodeML: An Infrastructure for Source-to- Source Transformation

Omni Compiler and XcodeML: An Infrastructure for Source-to- Source Transformation http://omni compiler.org/ Omni Compiler and XcodeML: An Infrastructure for Source-to- Source Transformation MS03 Code Generation Techniques for HPC Earth Science Applications Mitsuhisa Sato (RIKEN / Advanced

More information

What is Stencil Computation?

What is Stencil Computation? Model Checking Stencil Computations Written in a Partitioned Global Address Space Language Tatsuya Abe, Toshiyuki Maeda, and Mitsuhisa Sato RIKEN AICS HIPS 13 May 20, 2013 What is Stencil Computation?

More information

C PGAS XcalableMP(XMP) Unified Parallel

C PGAS XcalableMP(XMP) Unified Parallel PGAS XcalableMP Unified Parallel C 1 2 1, 2 1, 2, 3 C PGAS XcalableMP(XMP) Unified Parallel C(UPC) XMP UPC XMP UPC 1 Berkeley UPC GASNet 1. MPI MPI 1 Center for Computational Sciences, University of Tsukuba

More information

MPI_Send(a,..., MPI_COMM_WORLD); MPI_Recv(a,..., MPI_COMM_WORLD, &status);

MPI_Send(a,..., MPI_COMM_WORLD); MPI_Recv(a,..., MPI_COMM_WORLD, &status); $ $ 2 global void kernel(int a[max], int llimit, int ulimit) {... } : int main(int argc, char *argv[]){ MPI_Int(&argc, &argc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size);

More information

MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); double a[100]; #pragma acc data copy(a) { #pragma acc parallel loop for(i=0;i<100;i++)

MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); double a[100]; #pragma acc data copy(a) { #pragma acc parallel loop for(i=0;i<100;i++) 2 MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); double a[100]; #pragma acc data copy(a) { #pragma acc parallel loop for(i=0;i

More information

HPC Challenge Awards 2010 Class2 XcalableMP Submission

HPC Challenge Awards 2010 Class2 XcalableMP Submission HPC Challenge Awards 2010 Class2 XcalableMP Submission Jinpil Lee, Masahiro Nakao, Mitsuhisa Sato University of Tsukuba Submission Overview XcalableMP Language and model, proposed by XMP spec WG Fortran

More information

Performance Comparison between Two Programming Models of XcalableMP

Performance Comparison between Two Programming Models of XcalableMP Performance Comparison between Two Programming Models of XcalableMP H. Sakagami Fund. Phys. Sim. Div., National Institute for Fusion Science XcalableMP specification Working Group (XMP-WG) Dilemma in Parallel

More information

Linkage of XcalableMP and Python languages for high productivity on HPC cluster system

Linkage of XcalableMP and Python languages for high productivity on HPC cluster system Linkage of XcalableMP and Python languages for high productivity on HPC cluster system Masahiro Nakao (RIKEN Center for Computational Science) 6th XMP Workshop@University of Tsukuba, Japan Background XcalableMP

More information

An Extension of XcalableMP PGAS Lanaguage for Multi-node GPU Clusters

An Extension of XcalableMP PGAS Lanaguage for Multi-node GPU Clusters An Extension of XcalableMP PGAS Lanaguage for Multi-node Clusters Jinpil Lee, Minh Tuan Tran, Tetsuya Odajima, Taisuke Boku and Mitsuhisa Sato University of Tsukuba 1 Presentation Overview l Introduction

More information

XcalableMP Implementation and

XcalableMP Implementation and XcalableMP Implementation and Performance of NAS Parallel Benchmarks Mitsuhisa Sato Masahiro Nakao, Jinpil Lee and Taisuke Boku University of Tsukuba, Japan What s XcalableMP? XcalableMP (XMP for short)

More information

GPU GPU CPU. Raymond Namyst 3 Samuel Thibault 3 Olivier Aumage 3

GPU GPU CPU. Raymond Namyst 3 Samuel Thibault 3 Olivier Aumage 3 /CPU,a),2,2 2,2 Raymond Namyst 3 Samuel Thibault 3 Olivier Aumage 3 XMP XMP-dev CPU XMP-dev/StarPU XMP-dev XMP CPU StarPU CPU /CPU XMP-dev/StarPU N /CPU CPU. Graphics Processing Unit GP General-Purpose

More information

Linkage of XcalableMP and Python languages for high productivity on HPC cluster system

Linkage of XcalableMP and Python languages for high productivity on HPC cluster system Linkage of XcalableMP and Python languages for high productivity on HPC cluster system - Application to Graph Order/degree Problem - 1 1 2 1 Masahiro Nakao, Hitoshi Murai, Taisuke Boku, Mitsuhisa Sato

More information

IPSJ SIG Technical Report Vol.2014-HPC-145 No /7/29 XcalableMP FFT 1 1 1,2 HPC PGAS XcalableMP XcalableMP G-FFT 90.6% 186.6TFLOPS XMP MPI

IPSJ SIG Technical Report Vol.2014-HPC-145 No /7/29 XcalableMP FFT 1 1 1,2 HPC PGAS XcalableMP XcalableMP G-FFT 90.6% 186.6TFLOPS XMP MPI XcalableMP FFT, HPC PGAS XcalableMP XcalableMP 89 G-FFT 9.6% 86.6TFLOPS XMP MPI. Fourier (FFT) MPI [] Partitioned Global Address Space (PGAS) FFT PGAS PGAS XcalableMP(XMP)[] C Fortran XMP HPC [] Global-FFT

More information

Implementation and Evaluation of Coarray Fortran Translator Based on OMNI XcalableMP. October 29, 2015 Hidetoshi Iwashita, RIKEN AICS

Implementation and Evaluation of Coarray Fortran Translator Based on OMNI XcalableMP. October 29, 2015 Hidetoshi Iwashita, RIKEN AICS Implementation and Evaluation of Coarray Fortran Translator Based on OMNI XcalableMP October 29, 2015 Hidetoshi Iwashita, RIKEN AICS Background XMP Contains Coarray Features XcalableMP (XMP) A PGAS language,

More information

CS 470 Spring Mike Lam, Professor. OpenMP

CS 470 Spring Mike Lam, Professor. OpenMP CS 470 Spring 2017 Mike Lam, Professor OpenMP OpenMP Programming language extension Compiler support required "Open Multi-Processing" (open standard; latest version is 4.5) Automatic thread-level parallelism

More information

The CLAW project. Valentin Clément, Xavier Lapillonne. CLAW provides high-level Abstractions for Weather and climate models

The CLAW project. Valentin Clément, Xavier Lapillonne. CLAW provides high-level Abstractions for Weather and climate models Federal Department of Home Affairs FDHA Federal Office of Meteorology and Climatology MeteoSwiss The CLAW project CLAW provides high-level Abstractions for Weather and climate models Valentin Clément,

More information

Runtime Correctness Checking for Emerging Programming Paradigms

Runtime Correctness Checking for Emerging Programming Paradigms (protze@itc.rwth-aachen.de), Christian Terboven, Matthias S. Müller, Serge Petiton, Nahid Emad, Hitoshi Murai and Taisuke Boku RWTH Aachen University, Germany University of Tsukuba / RIKEN, Japan Maison

More information

AMCAT Automata Coding Sample Questions And Answers

AMCAT Automata Coding Sample Questions And Answers 1) Find the syntax error in the below code without modifying the logic. #include int main() float x = 1.1; switch (x) case 1: printf( Choice is 1 ); default: printf( Invalid choice ); return

More information

First day. Basics of parallel programming. RIKEN CCS HPC Summer School Hiroya Matsuba, RIKEN CCS

First day. Basics of parallel programming. RIKEN CCS HPC Summer School Hiroya Matsuba, RIKEN CCS First day Basics of parallel programming RIKEN CCS HPC Summer School Hiroya Matsuba, RIKEN CCS Today s schedule: Basics of parallel programming 7/22 AM: Lecture Goals Understand the design of typical parallel

More information

Performance Evaluation for Omni XcalableMP Compiler on Many-core Cluster System based on Knights Landing

Performance Evaluation for Omni XcalableMP Compiler on Many-core Cluster System based on Knights Landing ABSTRACT Masahiro Nakao RIKEN Advanced Institute for Computational Science Hyogo, Japan masahiro.nakao@riken.jp Taisuke Boku Center for Computational Sciences University of Tsukuba Ibaraki, Japan To reduce

More information

CS 470 Spring Mike Lam, Professor. OpenMP

CS 470 Spring Mike Lam, Professor. OpenMP CS 470 Spring 2018 Mike Lam, Professor OpenMP OpenMP Programming language extension Compiler support required "Open Multi-Processing" (open standard; latest version is 4.5) Automatic thread-level parallelism

More information

Assignment 5 Using Paraguin to Create Parallel Programs

Assignment 5 Using Paraguin to Create Parallel Programs Overview Assignment 5 Using Paraguin to Create Parallel Programs C. Ferner andb. Wilkinson October 15, 2014 The goal of this assignment is to use the Paraguin compiler to create parallel solutions using

More information

Parallel Programming Libraries and implementations

Parallel Programming Libraries and implementations Parallel Programming Libraries and implementations Partners Funding Reusing this material This work is licensed under a Creative Commons Attribution- NonCommercial-ShareAlike 4.0 International License.

More information

OpenMP on the FDSM software distributed shared memory. Hiroya Matsuba Yutaka Ishikawa

OpenMP on the FDSM software distributed shared memory. Hiroya Matsuba Yutaka Ishikawa OpenMP on the FDSM software distributed shared memory Hiroya Matsuba Yutaka Ishikawa 1 2 Software DSM OpenMP programs usually run on the shared memory computers OpenMP programs work on the distributed

More information

PRINCIPLES OF OPERATING SYSTEMS

PRINCIPLES OF OPERATING SYSTEMS PRINCIPLES OF OPERATING SYSTEMS Tutorial-1&2: C Review CPSC 457, Spring 2015 May 20-21, 2015 Department of Computer Science, University of Calgary Connecting to your VM Open a terminal (in your linux machine)

More information

Arrays and Pointers in C. Alan L. Cox

Arrays and Pointers in C. Alan L. Cox Arrays and Pointers in C Alan L. Cox alc@rice.edu Objectives Be able to use arrays, pointers, and strings in C programs Be able to explain the representation of these data types at the machine level, including

More information

Tutorial: parallel coding MPI

Tutorial: parallel coding MPI Tutorial: parallel coding MPI Pascal Viot September 12, 2018 Pascal Viot Tutorial: parallel coding MPI September 12, 2018 1 / 24 Generalities The individual power of a processor is still growing, but at

More information

XcalableMP and XcalableACC for Productivity and Performance in HPC Challenge Award Competition Class 2 at SC14

XcalableMP and XcalableACC for Productivity and Performance in HPC Challenge Award Competition Class 2 at SC14 XcalableMP and XcalableACC for Productivity and Performance in HPC Challenge Award Competition Class 2 at SC14 Masahiro Nakao 1,2,a) Hitoshi Murai 1 Hidetoshi Iwashita 1 Takenori Shimosaka 1 Akihiro Tabuchi

More information

Module 10: Open Multi-Processing Lecture 19: What is Parallelization? The Lecture Contains: What is Parallelization? Perfectly Load-Balanced Program

Module 10: Open Multi-Processing Lecture 19: What is Parallelization? The Lecture Contains: What is Parallelization? Perfectly Load-Balanced Program The Lecture Contains: What is Parallelization? Perfectly Load-Balanced Program Amdahl's Law About Data What is Data Race? Overview to OpenMP Components of OpenMP OpenMP Programming Model OpenMP Directives

More information

Parallel Programming. Libraries and Implementations

Parallel Programming. Libraries and Implementations Parallel Programming Libraries and Implementations Reusing this material This work is licensed under a Creative Commons Attribution- NonCommercial-ShareAlike 4.0 International License. http://creativecommons.org/licenses/by-nc-sa/4.0/deed.en_us

More information

Course Information and Introduction

Course Information and Introduction August 22, 2017 Course Information 1 Instructors : Email : arash.rafiey@indstate.edu Office : Root Hall A-127 Office Hours : Tuesdays 11:30 pm 12:30 pm. Root Hall, A127. 2 Course Home Page : http://cs.indstate.edu/~arash/cs256.html

More information

New Programming Paradigms: Partitioned Global Address Space Languages

New Programming Paradigms: Partitioned Global Address Space Languages Raul E. Silvera -- IBM Canada Lab rauls@ca.ibm.com ECMWF Briefing - April 2010 New Programming Paradigms: Partitioned Global Address Space Languages 2009 IBM Corporation Outline Overview of the PGAS programming

More information

Parallele Numerik. Blatt 1

Parallele Numerik. Blatt 1 Universität Konstanz FB Mathematik & Statistik Prof. Dr. M. Junk Dr. Z. Yang Ausgabe: 02. Mai; SS08 Parallele Numerik Blatt 1 As a first step, we consider two basic problems. Hints for the realization

More information

27-Sep CSCI 2132 Software Development Lecture 10: Formatted Input and Output. Faculty of Computer Science, Dalhousie University. Lecture 10 p.

27-Sep CSCI 2132 Software Development Lecture 10: Formatted Input and Output. Faculty of Computer Science, Dalhousie University. Lecture 10 p. Lecture 10 p.1 Faculty of Computer Science, Dalhousie University CSCI 2132 Software Development Lecture 10: Formatted Input and Output 27-Sep-2017 Location: Goldberg CS 127 Time: 14:35 15:25 Instructor:

More information

Lecture 4: OpenMP Open Multi-Processing

Lecture 4: OpenMP Open Multi-Processing CS 4230: Parallel Programming Lecture 4: OpenMP Open Multi-Processing January 23, 2017 01/23/2017 CS4230 1 Outline OpenMP another approach for thread parallel programming Fork-Join execution model OpenMP

More information

High Performance Computing: Tools and Applications

High Performance Computing: Tools and Applications High Performance Computing: Tools and Applications Edmond Chow School of Computational Science and Engineering Georgia Institute of Technology Lecture 2 OpenMP Shared address space programming High-level

More information

EL2310 Scientific Programming

EL2310 Scientific Programming Lecture 7: Introduction to C (pronobis@kth.se) Overview Overview Lecture 7: Introduction to C Wrap Up Basic Datatypes and printf Branching and Loops in C Constant values Wrap Up Lecture 7: Introduction

More information

Laboratory 2: Programming Basics and Variables. Lecture notes: 1. A quick review of hello_comment.c 2. Some useful information

Laboratory 2: Programming Basics and Variables. Lecture notes: 1. A quick review of hello_comment.c 2. Some useful information Laboratory 2: Programming Basics and Variables Lecture notes: 1. A quick review of hello_comment.c 2. Some useful information 3. Comment: a. name your program with extension.c b. use o option to specify

More information

HPCSE - I. «OpenMP Programming Model - Part I» Panos Hadjidoukas

HPCSE - I. «OpenMP Programming Model - Part I» Panos Hadjidoukas HPCSE - I «OpenMP Programming Model - Part I» Panos Hadjidoukas 1 Schedule and Goals 13.10.2017: OpenMP - part 1 study the basic features of OpenMP able to understand and write OpenMP programs 20.10.2017:

More information

OpenMP. António Abreu. Instituto Politécnico de Setúbal. 1 de Março de 2013

OpenMP. António Abreu. Instituto Politécnico de Setúbal. 1 de Março de 2013 OpenMP António Abreu Instituto Politécnico de Setúbal 1 de Março de 2013 António Abreu (Instituto Politécnico de Setúbal) OpenMP 1 de Março de 2013 1 / 37 openmp what? It s an Application Program Interface

More information

Post-Petascale Computing. Mitsuhisa Sato

Post-Petascale Computing. Mitsuhisa Sato Challenges on Programming Models and Languages for Post-Petascale Computing -- from Japanese NGS project "The K computer" to Exascale computing -- Mitsuhisa Sato Center for Computational Sciences (CCS),

More information

QUIZ on Ch.8. What is kit?

QUIZ on Ch.8. What is kit? QUIZ on Ch.8 What is kit? QUIZ on Ch.8 What is kit? A: A vector of nested structures! QUIZ on Ch.8 C crash-course Compilation and Execution of a C Program Run-time Interpreted languages (like MATLAB, Python,

More information

OpenMP and MPI. Parallel and Distributed Computing. Department of Computer Science and Engineering (DEI) Instituto Superior Técnico.

OpenMP and MPI. Parallel and Distributed Computing. Department of Computer Science and Engineering (DEI) Instituto Superior Técnico. OpenMP and MPI Parallel and Distributed Computing Department of Computer Science and Engineering (DEI) Instituto Superior Técnico November 15, 2010 José Monteiro (DEI / IST) Parallel and Distributed Computing

More information

Miwako TSUJI XcalableMP

Miwako TSUJI XcalableMP Miwako TSUJI AICS 2014.10.24 2 XcalableMP 2010.09 2014.03 2013.10.25 AKIHABARA FP2C (Framework for Post-Petascale Computing) YML + XMP(-dev) + StarPU integrated developed in Japan and in France Experimental

More information

COMP Parallel Computing. SMM (2) OpenMP Programming Model

COMP Parallel Computing. SMM (2) OpenMP Programming Model COMP 633 - Parallel Computing Lecture 7 September 12, 2017 SMM (2) OpenMP Programming Model Reading for next time look through sections 7-9 of the Open MP tutorial Topics OpenMP shared-memory parallel

More information

Parallel Programming Languages 1 - OpenMP

Parallel Programming Languages 1 - OpenMP some slides are from High-Performance Parallel Scientific Computing, 2008, Purdue University & CSCI-UA.0480-003: Parallel Computing, Spring 2015, New York University Parallel Programming Languages 1 -

More information

OpenMP and MPI. Parallel and Distributed Computing. Department of Computer Science and Engineering (DEI) Instituto Superior Técnico.

OpenMP and MPI. Parallel and Distributed Computing. Department of Computer Science and Engineering (DEI) Instituto Superior Técnico. OpenMP and MPI Parallel and Distributed Computing Department of Computer Science and Engineering (DEI) Instituto Superior Técnico November 16, 2011 CPD (DEI / IST) Parallel and Distributed Computing 18

More information

Objective. GPU Teaching Kit. OpenACC. To understand the OpenACC programming model. Introduction to OpenACC

Objective. GPU Teaching Kit. OpenACC. To understand the OpenACC programming model. Introduction to OpenACC GPU Teaching Kit Accelerated Computing OpenACC Introduction to OpenACC Objective To understand the OpenACC programming model basic concepts and pragma types simple examples 2 2 OpenACC The OpenACC Application

More information

Assignment 2 Using Paraguin to Create Parallel Programs

Assignment 2 Using Paraguin to Create Parallel Programs Overview Assignment 2 Using Paraguin to Create Parallel Programs C. Ferner and B. Wilkinson Minor clarification Oct 11, 2013 The goal of this assignment is to use the Paraguin compiler to create parallel

More information

Lab 1: Introduction to C Programming. (Creating a program using the Microsoft developer Studio, Compiling and Linking)

Lab 1: Introduction to C Programming. (Creating a program using the Microsoft developer Studio, Compiling and Linking) Lab 1: Introduction to C Programming (Creating a program using the Microsoft developer Studio, Compiling and Linking) Learning Objectives 0. To become familiar with Microsoft Visual C++ 6.0 environment

More information

APT Session 4: C. Software Development Team Laurence Tratt. 1 / 14

APT Session 4: C. Software Development Team Laurence Tratt. 1 / 14 APT Session 4: C Laurence Tratt Software Development Team 2017-11-10 1 / 14 http://soft-dev.org/ What to expect from this session 1 C. 2 / 14 http://soft-dev.org/ Prerequisites 1 Install either GCC or

More information

CS 426. Building and Running a Parallel Application

CS 426. Building and Running a Parallel Application CS 426 Building and Running a Parallel Application 1 Task/Channel Model Design Efficient Parallel Programs (or Algorithms) Mainly for distributed memory systems (e.g. Clusters) Break Parallel Computations

More information

Presented By : Gaurav Juneja

Presented By : Gaurav Juneja Presented By : Gaurav Juneja Introduction C is a general purpose language which is very closely associated with UNIX for which it was developed in Bell Laboratories. Most of the programs of UNIX are written

More information

OpenMP, Part 2. EAS 520 High Performance Scientific Computing. University of Massachusetts Dartmouth. Spring 2015

OpenMP, Part 2. EAS 520 High Performance Scientific Computing. University of Massachusetts Dartmouth. Spring 2015 OpenMP, Part 2 EAS 520 High Performance Scientific Computing University of Massachusetts Dartmouth Spring 2015 References This presentation is almost an exact copy of Dartmouth College's openmp tutorial.

More information

Assignment 1 OpenMP Tutorial Assignment

Assignment 1 OpenMP Tutorial Assignment Assignment 1 OpenMP Tutorial Assignment B. Wilkinson and C Ferner: Modification date Aug 5, 2014 Overview In this assignment, you will write and execute run some simple OpenMP programs as a tutorial. First

More information

Practical Introduction to

Practical Introduction to Outline of the workshop Practical Introduction to Bart Oldeman, Calcul Québec McGill HPC Bart.Oldeman@mcgill.ca Theoretical / practical introduction Parallelizing your serial code What is OpenMP? Why do

More information

mith College Computer Science CSC352 Week #7 Spring 2017 Introduction to C Dominique Thiébaut

mith College Computer Science CSC352 Week #7 Spring 2017 Introduction to C Dominique Thiébaut mith College CSC352 Week #7 Spring 2017 Introduction to C Dominique Thiébaut dthiebaut@smith.edu Learning C in 2 Hours D. Thiebaut Dennis Ritchie 1969 to 1973 AT&T Bell Labs Close to Assembly Unix Standard

More information

EL2310 Scientific Programming

EL2310 Scientific Programming (yaseminb@kth.se) Overview Overview Roots of C Getting started with C Closer look at Hello World Programming Environment Discussion Basic Datatypes and printf Schedule Introduction to C - main part of

More information

The C language. Introductory course #1

The C language. Introductory course #1 The C language Introductory course #1 History of C Born at AT&T Bell Laboratory of USA in 1972. Written by Dennis Ritchie C language was created for designing the UNIX operating system Quickly adopted

More information

Introduction to Programming. Lecture 2: Introduction to C

Introduction to Programming. Lecture 2: Introduction to C Introduction to Programming Lecture 2: Introduction to C Mahmoud El-Gayyar elgayyar@ci.suez.edu.eg Review Lecture 1 Introduction to the course General information Syllabus Course organization General rules

More information

BSM540 Basics of C Language

BSM540 Basics of C Language BSM540 Basics of C Language Chapter 4: Character strings & formatted I/O Prof. Manar Mohaisen Department of EEC Engineering Review of the Precedent Lecture To explain the input/output functions printf()

More information

Kurt Schmidt. October 30, 2018

Kurt Schmidt. October 30, 2018 to Structs Dept. of Computer Science, Drexel University October 30, 2018 Array Objectives to Structs Intended audience: Student who has working knowledge of Python To gain some experience with a statically-typed

More information

Open Multi-Processing: Basic Course

Open Multi-Processing: Basic Course HPC2N, UmeåUniversity, 901 87, Sweden. May 26, 2015 Table of contents Overview of Paralellism 1 Overview of Paralellism Parallelism Importance Partitioning Data Distributed Memory Working on Abisko 2 Pragmas/Sentinels

More information

OpenACC. Part 2. Ned Nedialkov. McMaster University Canada. CS/SE 4F03 March 2016

OpenACC. Part 2. Ned Nedialkov. McMaster University Canada. CS/SE 4F03 March 2016 OpenACC. Part 2 Ned Nedialkov McMaster University Canada CS/SE 4F03 March 2016 Outline parallel construct Gang loop Worker loop Vector loop kernels construct kernels vs. parallel Data directives c 2013

More information

Exploring XcalableMP. Shun Liang. August 24, 2012

Exploring XcalableMP. Shun Liang. August 24, 2012 Exploring XcalableMP Shun Liang August 24, 2012 MSc in High Performance Computing The University of Edinburgh Year of Presentation: 2012 Abstract This project has implemented synthetic and application

More information

Ricardo Rocha. Department of Computer Science Faculty of Sciences University of Porto

Ricardo Rocha. Department of Computer Science Faculty of Sciences University of Porto Ricardo Rocha Department of Computer Science Faculty of Sciences University of Porto Adapted from the slides Revisões sobre Programação em C, Sérgio Crisóstomo Compilation #include int main()

More information

Shared Memory Programming Paradigm!

Shared Memory Programming Paradigm! Shared Memory Programming Paradigm! Ivan Girotto igirotto@ictp.it Information & Communication Technology Section (ICTS) International Centre for Theoretical Physics (ICTP) 1 Multi-CPUs & Multi-cores NUMA

More information

From Hello World to Exascale

From Hello World to Exascale From Hello World to Exascale Rob Farber Chief Scien0st, BlackDog Endeavors, LLC Author, CUDA Applica0on Design and Development Research consultant: ICHEC and others Doctor Dobb s Journal CUDA & OpenACC

More information

Parallel Processing/Programming

Parallel Processing/Programming Parallel Processing/Programming with the applications to image processing Lectures: 1. Parallel Processing & Programming from high performance mono cores to multi- and many-cores 2. Programming Interfaces

More information

Computer Science & Engineering 150A Problem Solving Using Computers

Computer Science & Engineering 150A Problem Solving Using Computers Computer Science & Engineering 150A Problem Solving Using Computers Lecture 06 - Stephen Scott Adapted from Christopher M. Bourke 1 / 30 Fall 2009 Chapter 8 8.1 Declaring and 8.2 Array Subscripts 8.3 Using

More information

ME759 High Performance Computing for Engineering Applications

ME759 High Performance Computing for Engineering Applications ME759 High Performance Computing for Engineering Applications Parallel Computing on Multicore CPUs October 25, 2013 Dan Negrut, 2013 ME964 UW-Madison A programming language is low level when its programs

More information

CS 261 Fall C Introduction. Variables, Memory Model, Pointers, and Debugging. Mike Lam, Professor

CS 261 Fall C Introduction. Variables, Memory Model, Pointers, and Debugging. Mike Lam, Professor CS 261 Fall 2017 Mike Lam, Professor C Introduction Variables, Memory Model, Pointers, and Debugging The C Language Systems language originally developed for Unix Imperative, compiled language with static

More information

Synchronization. Event Synchronization

Synchronization. Event Synchronization Synchronization Synchronization: mechanisms by which a parallel program can coordinate the execution of multiple threads Implicit synchronizations Explicit synchronizations Main use of explicit synchronization

More information

ECEN 449 Microprocessor System Design. Review of C Programming. Texas A&M University

ECEN 449 Microprocessor System Design. Review of C Programming. Texas A&M University ECEN 449 Microprocessor System Design Review of C Programming 1 Objectives of this Lecture Unit Review C programming basics Refresh programming skills 2 Basic C program structure # include main()

More information

COMP s1 Lecture 1

COMP s1 Lecture 1 COMP1511 18s1 Lecture 1 1 Numbers In, Numbers Out Andrew Bennett more printf variables scanf 2 Before we begin introduce yourself to the person sitting next to you why did

More information

HPC Practical Course Part 3.1 Open Multi-Processing (OpenMP)

HPC Practical Course Part 3.1 Open Multi-Processing (OpenMP) HPC Practical Course Part 3.1 Open Multi-Processing (OpenMP) V. Akishina, I. Kisel, G. Kozlov, I. Kulakov, M. Pugach, M. Zyzak Goethe University of Frankfurt am Main 2015 Task Parallelism Parallelization

More information

CMPT 115. C tutorial for students who took 111 in Java. University of Saskatchewan. Mark G. Eramian, Ian McQuillan CMPT 115 1/32

CMPT 115. C tutorial for students who took 111 in Java. University of Saskatchewan. Mark G. Eramian, Ian McQuillan CMPT 115 1/32 CMPT 115 C tutorial for students who took 111 in Java Mark G. Eramian Ian McQuillan University of Saskatchewan Mark G. Eramian, Ian McQuillan CMPT 115 1/32 Part I Starting out Mark G. Eramian, Ian McQuillan

More information

High Performance Computing: Tools and Applications

High Performance Computing: Tools and Applications High Performance Computing: Tools and Applications Edmond Chow School of Computational Science and Engineering Georgia Institute of Technology Lecture 9 SIMD vectorization using #pragma omp simd force

More information

Introduction to C Programming

Introduction to C Programming Introduction to C Programming Digital Design and Computer Architecture David Money Harris and Sarah L. Harris 2- C Chapter :: Topics Introduction to C Why C? Example Program Compiling and running a

More information

OpenMP. A parallel language standard that support both data and functional Parallelism on a shared memory system

OpenMP. A parallel language standard that support both data and functional Parallelism on a shared memory system OpenMP A parallel language standard that support both data and functional Parallelism on a shared memory system Use by system programmers more than application programmers Considered a low level primitives

More information

Comparing OpenACC 2.5 and OpenMP 4.1 James C Beyer PhD, Sept 29 th 2015

Comparing OpenACC 2.5 and OpenMP 4.1 James C Beyer PhD, Sept 29 th 2015 Comparing OpenACC 2.5 and OpenMP 4.1 James C Beyer PhD, Sept 29 th 2015 Abstract As both an OpenMP and OpenACC insider I will present my opinion of the current status of these two directive sets for programming

More information

Introduction to OpenMP

Introduction to OpenMP Introduction to OpenMP Ricardo Fonseca https://sites.google.com/view/rafonseca2017/ Outline Shared Memory Programming OpenMP Fork-Join Model Compiler Directives / Run time library routines Compiling and

More information

STRUCTURED DATA TYPE ARRAYS IN C++ ONE-DIMENSIONAL ARRAY TWO-DIMENSIONAL ARRAY

STRUCTURED DATA TYPE ARRAYS IN C++ ONE-DIMENSIONAL ARRAY TWO-DIMENSIONAL ARRAY STRUCTURED DATA TYPE ARRAYS IN C++ ONE-DIMENSIONAL ARRAY TWO-DIMENSIONAL ARRAY Objectives Declaration of 1-D and 2-D Arrays Initialization of arrays Inputting array elements Accessing array elements Manipulation

More information

CSE 160 Lecture 10. Instruction level parallelism (ILP) Vectorization

CSE 160 Lecture 10. Instruction level parallelism (ILP) Vectorization CSE 160 Lecture 10 Instruction level parallelism (ILP) Vectorization Announcements Quiz on Friday Signup for Friday labs sessions in APM 2013 Scott B. Baden / CSE 160 / Winter 2013 2 Particle simulation

More information

Pointers. Pointer Variables. Chapter 11. Pointer Variables. Pointer Variables. Pointer Variables. Declaring Pointer Variables

Pointers. Pointer Variables. Chapter 11. Pointer Variables. Pointer Variables. Pointer Variables. Declaring Pointer Variables Chapter 11 Pointers The first step in understanding pointers is visualizing what they represent at the machine level. In most modern computers, main memory is divided into bytes, with each byte capable

More information

Programming in C. Pointers and Arrays

Programming in C. Pointers and Arrays Programming in C Pointers and Arrays NEXT SET OF SLIDES FROM DENNIS FREY S FALL 2011 CMSC313 http://www.csee.umbc.edu/courses/undergraduate/313/fall11/" Pointers and Arrays In C, there is a strong relationship

More information

Chapter 01 Arrays Prepared By: Dr. Murad Magableh 2013

Chapter 01 Arrays Prepared By: Dr. Murad Magableh 2013 Chapter 01 Arrays Prepared By: Dr. Murad Magableh 2013 One Dimensional Q1: Write a program that declares two arrays of integers and fills them from the user. Then exchanges their values and display the

More information

MCA Semester 1. MC0061 Computer Programming C Language 4 Credits Assignment: Set 1 (40 Marks)

MCA Semester 1. MC0061 Computer Programming C Language 4 Credits Assignment: Set 1 (40 Marks) Summer 2012 MCA Semester 1 4 Credits Assignment: Set 1 (40 Marks) Q1. Explain the following operators with an example for each: a. Conditional Operators b. Bitwise Operators c. gets() and puts() function

More information

C++ ARRAYS POINTERS POINTER ARITHMETIC. Problem Solving with Computers-I

C++ ARRAYS POINTERS POINTER ARITHMETIC. Problem Solving with Computers-I C++ ARRAYS POINTERS POINTER ARITHMETIC Problem Solving with Computers-I General model of memory Sequence of adjacent cells Each cell has 1-byte stored in it Each cell has an address (memory location) Memory

More information

Fundamentals of Programming Session 12

Fundamentals of Programming Session 12 Fundamentals of Programming Session 12 Instructor: Reza Entezari-Maleki Email: entezari@ce.sharif.edu 1 Fall 2014 These slides have been created using Deitel s slides Sharif University of Technology Outlines

More information

MPI introduction - exercises -

MPI introduction - exercises - MPI introduction - exercises - Paolo Ramieri, Maurizio Cremonesi May 2016 Startup notes Access the server and go on scratch partition: ssh a08tra49@login.galileo.cineca.it cd $CINECA_SCRATCH Create a job

More information

PROGRAMOVÁNÍ V C++ CVIČENÍ. Michal Brabec

PROGRAMOVÁNÍ V C++ CVIČENÍ. Michal Brabec PROGRAMOVÁNÍ V C++ CVIČENÍ Michal Brabec PARALLELISM CATEGORIES CPU? SSE Multiprocessor SIMT - GPU 2 / 17 PARALLELISM V C++ Weak support in the language itself, powerful libraries Many different parallelization

More information

Introduction to OpenMP

Introduction to OpenMP 1 / 7 Introduction to OpenMP: Exercises and Handout Introduction to OpenMP Christian Terboven Center for Computing and Communication, RWTH Aachen University Seffenter Weg 23, 52074 Aachen, Germany Abstract

More information

Parallel Programming with OpenMP. CS240A, T. Yang

Parallel Programming with OpenMP. CS240A, T. Yang Parallel Programming with OpenMP CS240A, T. Yang 1 A Programmer s View of OpenMP What is OpenMP? Open specification for Multi-Processing Standard API for defining multi-threaded shared-memory programs

More information

Co-arrays to be included in the Fortran 2008 Standard

Co-arrays to be included in the Fortran 2008 Standard Co-arrays to be included in the Fortran 2008 Standard John Reid, ISO Fortran Convener The ISO Fortran Committee has decided to include co-arrays in the next revision of the Standard. Aim of this talk:

More information

Run$me Pointer Disambigua$on

Run$me Pointer Disambigua$on Run$me Pointer Disambigua$on Péricles Alves periclesrafael@dcc.ufmg.br Alexandros Lamprineas labrinea@inria.fr Fabian Gruber fabian.gruber@inria.fr Tobias Grosser tgrosser@inf.ethz.ch Fernando Pereira

More information

Outline Arrays Examples of array usage Passing arrays to functions 2D arrays Strings Searching arrays Next Time. C Arrays.

Outline Arrays Examples of array usage Passing arrays to functions 2D arrays Strings Searching arrays Next Time. C Arrays. CS 2060 Week 5 1 Arrays Arrays Initializing arrays 2 Examples of array usage 3 Passing arrays to functions 4 2D arrays 2D arrays 5 Strings Using character arrays to store and manipulate strings 6 Searching

More information

Don t reinvent the wheel. BLAS LAPACK Intel Math Kernel Library

Don t reinvent the wheel. BLAS LAPACK Intel Math Kernel Library Libraries Don t reinvent the wheel. Specialized math libraries are likely faster. BLAS: Basic Linear Algebra Subprograms LAPACK: Linear Algebra Package (uses BLAS) http://www.netlib.org/lapack/ to download

More information

OpenMP Programming. Prof. Thomas Sterling. High Performance Computing: Concepts, Methods & Means

OpenMP Programming. Prof. Thomas Sterling. High Performance Computing: Concepts, Methods & Means High Performance Computing: Concepts, Methods & Means OpenMP Programming Prof. Thomas Sterling Department of Computer Science Louisiana State University February 8 th, 2007 Topics Introduction Overview

More information