openMP进行多线程编程
在C 中使用openmp进行多线程编程 - DWVictor - 博客园 (cnblogs.com)
openmp是由一系列#paragma指令组成,这些指令控制如何多线程的执行程序。另外,即使编译器不支持omp,程序也也能够正常运行,只是程序不会多线程并行运行。
#pragma omp parallel创建线程
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
int main()
{
//开启 4 个线程同时执行{}
//线程数目不能超过CPU核数
omp_set_num_threads(4);
#pragma omp parallel
{
printf("Hello World!, thread: %dn",omp_get_thread_num());
}
return 0;
}
代码语言:javascript复制#include <stdio.h>
#include <omp.h>
int main()
{
printf("serial part, total number of threads: %dnn", omp_get_num_threads());//1
#pragma omp parallel //12个线程
{
printf("Hello World, total number of threads: %dn", omp_get_num_threads());
}
return 0;
}
#pragma omp parallel for循环
后面是for循环,表示接下来的for循环将被多线程执行,另外每次循环之间不能有关系,for循环里的内容必须满足可以并行执行,即每次循环互不相干,后一次循环不依赖于前面的循环。
代码语言:javascript复制#include <stdio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
double omp_get_time(void)
{
struct timeval tv;
double t;
gettimeofday(&tv, (struct timezone *)0);
t = tv.tv_sec (double)tv.tv_usec * 1e-6;
return t;
}
int main(int argc, char **argv)
{
int i, len = 10000000;
double *x, *y, *z;
double va_tm;
double t = 0.;
if (argc == 2) {
int tlen = atoi(argv[1]);
if (tlen > 0) len = tlen;
}
/* malloc memory, no check */
x =(double*) malloc(sizeof(*x) * len);
y =(double*) malloc(sizeof(*y) * len);
z =(double*) malloc(sizeof(*z) * len);
#pragma omp parallel for
for (i = 0; i < len; i ) {
x[i] = i 0.3;
y[i] = i M_PI;
}
va_tm = omp_get_time();
for (i = 0; i < len; i ) t = x[i];
va_tm = omp_get_time() - va_tm;
printf("Serial result: %g, time: %g sn", t, va_tm);
va_tm = omp_get_time();
#pragma omp parallel for
for (i = 0; i < len; i ) {
z[i] = x[i] y[i];
}
va_tm = omp_get_time() - va_tm;
printf("Serial result: %g, OMP time: %g sn", z[len-1],va_tm);
free(x);
free(y);
free(z);
return 0;
}
#pragma omp parallel num_threads 指定线程数
代码语言:javascript复制#include <stdio.h>
int main()
{
#pragma omp parallel num_threads(4)
{
printf("Hello World!n");
}
return 0;
}
代码语言:javascript复制#include <stdio.h>
#include <omp.h>
int main()
{
omp_set_num_threads(3);
//这样后面生效,6个线程
#pragma omp parallel num_threads(6)
{
printf("Hello World!n");
}
return 0;
}
#pragma omp sections指定块
section指令用于指定哪些程序块可以并行运行。一个section块内的代码必须串行运行,而section块之间是可以并行运行的。指定代码分块,每个分块开一个线程去执行
代码语言:javascript复制#include <stdio.h>
#include <omp.h>
int main()
{
#pragma omp parallel
#pragma omp sections
{
#pragma omp section
printf("Hello, id: %dn", omp_get_thread_num());
#pragma omp section
printf("Hi, id: %dn", omp_get_thread_num());
#pragma omp section
printf("Nihao, id: %dn", omp_get_thread_num());
#pragma omp section
printf("Bonjour, id: %dn", omp_get_thread_num());
#pragma omp section
printf("Kon'nichiwa, id: %dn", omp_get_thread_num());
}
return 0;
}
/*
Kon'nichiwa, id: 7
Hello, id: 1
Hi, id: 8
Bonjour, id: 4
Nihao, id: 0
*/
#pragma omp single只执行一次
用于定义一个代码段,该代码段只应由团队中的任何一个线程执行一次。选择的线程可以是主线程以外的线程。
single指令相关的代码块只运行一个线程执行,但并不限定具体哪一个线程来执行,其它线程必须跳过这个代码块,并在代码块后wait,直到执行这段代码的线程完成。
代码语言:javascript复制#include <stdio.h>
#include <omp.h>
int main()
{
#pragma omp parallel
{
printf("Hello——, id: %dn", omp_get_thread_num());
#pragma omp single
{
printf("Hello, id: %dn", omp_get_thread_num());
}
}
return 0;
}
/*
Hello——, id: 1
Hello, id: 1
Hello——, id: 7
Hello——, id: 2
Hello——, id: 10
Hello——, id: 11
Hello——, id: 0
Hello——, id: 3
Hello——, id: 9
Hello——, id: 6
Hello——, id: 4
Hello——, id: 8
Hello——, id: 5
*/
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
void task_func(int id, int v)
{
int eid = omp_get_thread_num();
printf("generated by id: %d, executed by id: %d, i: %dn", id, eid, v);
}
int main()
{
#pragma omp parallel
{
int id;
int i;
id = omp_get_thread_num();
#pragma omp single
{
for (i = 0; i < 200; i ) {
#pragma omp task untied
task_func(id, i);
}
}
}
return 0;
}
#pragma omp xxx reduction组合
(125条消息) openmp 快速入门 常用技巧 parallel for sections reduction critical_billbliss的博客-CSDN博客_crtticalbliss
Reduction`操作类似于我们将很多东西组合组合在一起。
reduction(operator : list)
其中operator指操作符,list表示操作符要作用的列表,通常是一个共享变量名,之所以称之为列表是因为线程组中的每个线程都有一份变量的拷贝,reduction即负责用给定的操作符将这些拷贝的局部变量的值进行聚合,并设置回共享变量。
代码语言:javascript复制#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#define abs(x) ((x) < 0 ? -(x) : (x))
int main()
{
int i;
int result = 0;
int N = 20000;
int data[20000];
/* seed */
srand(0);
/* init */
for (i = 0; i < N; i ) data[i] = ((i % 2) * 2 - 1) * rand();
#pragma omp declare reduction(maxabs : int :
omp_out = abs(omp_in) < abs(omp_out) ? omp_out : omp_in)
initializer (omp_priv=0)
#pragma omp parallel for reduction(maxabs:result)
for (i = 0; i < N; i ) {
if (abs(data[i]) > abs(result)) {
result = data[i];
}
}
printf("result: %dn", result);
return 0;
}
#pragma omp master主线程执行
(125条消息) OpenMP编程(5)—同步结构(master、critical、barrier、atomic、flush、ordered)_常思大妹子的博客-CSDN博客_openmp 同步
学习openmp-master - 芒果的博客 - 芒果的个人博客 (mangoroom.cn)
master指令则指定其相关的代码块必须在主线程中执行,且其它线程不必在代码块后阻塞。
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
void task_func(int id, int v)
{
int eid = omp_get_thread_num();
printf("generated by id: %d, executed by id: %d, i: %dn", id, eid, v);
}
int main()
{
#pragma omp parallel
{
int id;
int i;
id = omp_get_thread_num();
#pragma omp master
for (i = 0; i < 200; i ) {
#pragma omp task untied
task_func(id, i);
}
}
return 0;
}
#pragma omp task任务划分
(125条消息) OpenMP3.0的新特性Task指令基础_Augusdi的博客-CSDN博客
task解决for和sections指令的”缺陷“:无法根据运行时的环境动态的进行任务划分,必须是预先能知道的任务划分的情况。
task是“动态”定义任务的,在运行过程中,只需要使用task就会定义一个任务,任务就会在一个线程上去执行,那么其它的任务就可以并行的执行。可能某一个任务执行了一半的时候,或者甚至要执行完的时候,程序可以去创建第二个任务,任务在一个线程上去执行,一个动态的过程,不像sections和for那样,在运行之前,已经可以判断出可以如何去分配任务。而且,task是可以进行嵌套定义的,可以用于递归的情况等等
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
void task_func(int id, int v)
{
int eid = omp_get_thread_num();
printf("generated by id: %d, executed by id: %d, i: %dn", id, eid, v);
}
int main()
{
#pragma omp parallel
{
int id;
int i;
id = omp_get_thread_num();
for (i = 0; i < 20; i ) {
#pragma omp task
task_func(id, i);
}
}
return 0;
}
#pragma omp parallel (默认shared)
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
int main()
{
int n = 20;
#pragma omp parallel
{
int id;
id = omp_get_thread_num();
printf("This is thread: %d. I can see shared variable n: %dn", id, n);
}
printf("nnn");
#pragma omp parallel
{
int id;
id = omp_get_thread_num();
/* error */
n = id;
printf("This is thread: %d. I change n to my ID: %dn", id, n);
}
printf("nData race happened and final n is: %dn", n);
return 0;
}
/*
This is thread: 6. I can see shared variable n: 20
This is thread: 1. I can see shared variable n: 20
This is thread: 5. I can see shared variable n: 20
This is thread: 11. I can see shared variable n: 20
This is thread: 9. I can see shared variable n: 20
This is thread: 0. I can see shared variable n: 20
This is thread: 3. I can see shared variable n: 20
This is thread: 8. I can see shared variable n: 20
This is thread: 10. I can see shared variable n: 20
This is thread: 2. I can see shared variable n: 20
This is thread: 7. I can see shared variable n: 20
This is thread: 4. I can see shared variable n: 20
This is thread: 7. I change n to my ID: 7
This is thread: 8. I change n to my ID: 8
This is thread: 5. I change n to my ID: 5
This is thread: 0. I change n to my ID: 0
This is thread: 10. I change n to my ID: 10
This is thread: 2. I change n to my ID: 2
This is thread: 11. I change n to my ID: 11
This is thread: 9. I change n to my ID: 9
This is thread: 6. I change n to my ID: 6
This is thread: 1. I change n to my ID: 1
This is thread: 3. I change n to my ID: 3
This is thread: 4. I change n to my ID: 4
Data race happened and final n is: 3
*/
#pragma omp parallel private(n)
用于控制变量在线程组中多个线程之间的共享方式。其中private,firstprivate,lastprivate表示变量的共享方式是私有的,即每个线程都有一份自己的拷贝;而shared表示线程组的线程访问的是同一个变量。
私有变量共享方式有三种指令,它们的区别在于:
private:每个线程都有一份自己的拷贝,但是这些变量并没有拷贝值,即如果变量是int,long,double等这些内置类型,那么这些变量在进入线程时时未初始化状态的;如果变量是类的实例对象,那么在线程中变量是通过默认构造得到的对象,假设类没有默认构造,则编译会报错,告诉你类没有可用的默认构造;
firstPrivate:每个线程有一份自己的拷贝,每个线程都会通过复制一份。如果变量是int,long,double等内置类型则直接复制,如果为类的实例对象,则会调用示例对象的拷贝构造函数,这就意味着,假如类是的拷贝构造不可访问,则变量不能够使用firstprivate方式共享;
lastprivate:变量在每个线程的共享方式与private一致,但不同的是,变量的最后一次迭代中的值会flush主线程中的变量中。最后一次迭代的意思是,如果是for循环,则主线程的变量的值是最后一个迭代值那次迭代中赋的值;如果是section,则主线程的变量最终的值是最后一个section中赋的值。要注意的是,最终主线程的中变量的值并非通过拷贝构造赋值的,而是通过operator=操作符,所以如果类的赋值操作符不可访问,那么变量不能采用lastprivate方式共享。
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
int main()
{
int n = 20;
#pragma omp parallel private(n)
{
int id;
id = omp_get_thread_num();
/* n is private and NOT initialized.
* Compiler won't be happy */
printf("This is thread: %d. Initial value of n is: %dn", id, n);
n = -id;
printf("This is thread: %d. Value of n is: %dn", id, n);
}
printf("nnn");
printf("Final n is: %dn", n);
return 0;
}
/*
This is thread: 0. Initial value of n is: 0
This is thread: 0. Value of n is: 0
This is thread: 7. Initial value of n is: 0
This is thread: 7. Value of n is: -7
This is thread: 3. Initial value of n is: 0
This is thread: 3. Value of n is: -3
This is thread: 8. Initial value of n is: 0
This is thread: 8. Value of n is: -8
This is thread: 11. Initial value of n is: 0
This is thread: 11. Value of n is: -11
This is thread: 6. Initial value of n is: 0
This is thread: 6. Value of n is: -6
This is thread: 1. Initial value of n is: 0
This is thread: 1. Value of n is: -1
This is thread: 2. Initial value of n is: 0
This is thread: 2. Value of n is: -2
This is thread: 4. Initial value of n is: 0
This is thread: 4. Value of n is: -4
This is thread: 5. Initial value of n is: 0
This is thread: 5. Value of n is: -5
This is thread: 9. Initial value of n is: 0
This is thread: 9. Value of n is: -9
This is thread: 10. Initial value of n is: 0
This is thread: 10. Value of n is: -10
Final n is: 20
*/
#pragma omp parallel firstprivate(n)
firstPrivate:每个线程有一份自己的拷贝,每个线程都会通过复制一份。如果变量是int,long,double等内置类型则直接复制,如果为类的实例对象,则会调用示例对象的拷贝构造函数,这就意味着,假如类是的拷贝构造不可访问,则变量不能够使用firstprivate方式共享;
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
int main()
{
int n = 20;
#pragma omp parallel firstprivate(n)
{
int id;
id = omp_get_thread_num();
/* n is firstprivate */
printf("This is thread: %d. Initial value of n is: %dn", id, n);
n = -id;
printf("This is thread: %d. Value of n is changed to: %dn", id, n);
}
printf("nnn");
printf("Final n is: %dn", n);
return 0;
}
/*
This is thread: 11. Initial value of n is: 20
This is thread: 11. Value of n is changed to: -11
This is thread: 10. Initial value of n is: 20
This is thread: 10. Value of n is changed to: -10
This is thread: 1. Initial value of n is: 20
This is thread: 1. Value of n is changed to: -1
This is thread: 2. Initial value of n is: 20
This is thread: 2. Value of n is changed to: -2
This is thread: 9. Initial value of n is: 20
This is thread: 9. Value of n is changed to: -9
This is thread: 6. Initial value of n is: 20
This is thread: 6. Value of n is changed to: -6
This is thread: 4. Initial value of n is: 20
This is thread: 4. Value of n is changed to: -4
This is thread: 8. Initial value of n is: 20
This is thread: 8. Value of n is changed to: -8
This is thread: 5. Initial value of n is: 20
This is thread: 5. Value of n is changed to: -5
This is thread: 0. Initial value of n is: 20
This is thread: 0. Value of n is changed to: 0
This is thread: 3. Initial value of n is: 20
This is thread: 3. Value of n is changed to: -3
This is thread: 7. Initial value of n is: 20
This is thread: 7. Value of n is changed to: -7
Final n is: 20
*/
#pragma omp parallel for lastprivate(n)
lastprivate:变量在每个线程的共享方式与private一致,但不同的是,变量的最后一次迭代中的值会flush主线程中的变量中。最后一次迭代的意思是,如果是for循环,则主线程的变量的值是最后一个迭代值那次迭代中赋的值;如果是section,则主线程的变量最终的值是最后一个section中赋的值。要注意的是,最终主线程的中变量的值并非通过拷贝构造赋值的,而是通过operator=操作符,所以如果类的赋值操作符不可访问,那么变量不能采用lastprivate方式共享。
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
int main()
{
int n = 0;
int i;
#pragma omp parallel for lastprivate(n)
for (i = 0; i < 20; i ) {
int id;
id = omp_get_thread_num();
n = -i;
printf("This is thread: %d. Value of n is changed to: %dn", id, n);
}
printf("nnn");
printf("Final n is: %dn", n);
return 0;
}
/*
This is thread: 9. Value of n is changed to: -17
This is thread: 10. Value of n is changed to: -18
This is thread: 5. Value of n is changed to: -10
This is thread: 5. Value of n is changed to: -11
This is thread: 3. Value of n is changed to: -6
This is thread: 3. Value of n is changed to: -7
This is thread: 2. Value of n is changed to: -4
This is thread: 2. Value of n is changed to: -5
This is thread: 4. Value of n is changed to: -8
This is thread: 4. Value of n is changed to: -9
This is thread: 0. Value of n is changed to: 0
This is thread: 0. Value of n is changed to: -1
This is thread: 6. Value of n is changed to: -12
This is thread: 6. Value of n is changed to: -13
This is thread: 8. Value of n is changed to: -16
This is thread: 11. Value of n is changed to: -19
This is thread: 7. Value of n is changed to: -14
This is thread: 7. Value of n is changed to: -15
This is thread: 1. Value of n is changed to: -2
This is thread: 1. Value of n is changed to: -3
Final n is: -19
*/
#pragma omp threadprivate(n)
程私有数据——线程私有数据与threadprivate和copyin子句:使用threadprivate子句用来标明 某一个变量是线程私有数据,在程序运行的过程中,不能够被其他线程访问到。使用copyin对线程私有的全局变量进行初始化。
代码语言:javascript复制#include <omp.h>
#include <stdio.h>
int n = 20;
#pragma omp threadprivate(n)
int main()
{
printf("n: %dn", n);
#pragma omp parallel
{
int id;
id = omp_get_thread_num();
n = id * 8;
printf("This is thread: %d. 11 Value of n is changed to: %dn", id, n);
}
printf("11 Serial part n is: %dnn", n);
#pragma omp parallel
{
int id;
id = omp_get_thread_num();
printf("This is thread: %d. 22 Value of n is changed to: %dn", id, n);
}
printf("22 Serial part n is: %dnn", n);
printf("copyinn");
#pragma omp parallel copyin(n)
{
int id;
id = omp_get_thread_num();
printf("This is thread: %d. 33 Value of n is changed to: %dn", id, n);
}
printf("33 Serial part n is: %dnn", n);
return 0;
}
/*
n: 20
This is thread: 0. 11 Value of n is changed to: 0
This is thread: 9. 11 Value of n is changed to: 72
This is thread: 4. 11 Value of n is changed to: 32
This is thread: 7. 11 Value of n is changed to: 56
This is thread: 8. 11 Value of n is changed to: 64
This is thread: 6. 11 Value of n is changed to: 48
This is thread: 2. 11 Value of n is changed to: 16
This is thread: 5. 11 Value of n is changed to: 40
This is thread: 10. 11 Value of n is changed to: 80
This is thread: 3. 11 Value of n is changed to: 24
This is thread: 11. 11 Value of n is changed to: 88
This is thread: 1. 11 Value of n is changed to: 8
11 Serial part n is: 0
This is thread: 0. 22 Value of n is changed to: 0
This is thread: 10. 22 Value of n is changed to: 80
This is thread: 4. 22 Value of n is changed to: 32
This is thread: 2. 22 Value of n is changed to: 16
This is thread: 5. 22 Value of n is changed to: 40
This is thread: 8. 22 Value of n is changed to: 64
This is thread: 7. 22 Value of n is changed to: 56
This is thread: 11. 22 Value of n is changed to: 88
This is thread: 6. 22 Value of n is changed to: 48
This is thread: 3. 22 Value of n is changed to: 24
This is thread: 1. 22 Value of n is changed to: 8
This is thread: 9. 22 Value of n is changed to: 72
22 Serial part n is: 0
copyin
This is thread: 0. 33 Value of n is changed to: 0
This is thread: 11. 33 Value of n is changed to: 0
This is thread: 4. 33 Value of n is changed to: 0
This is thread: 10. 33 Value of n is changed to: 0
This is thread: 1. 33 Value of n is changed to: 0
This is thread: 9. 33 Value of n is changed to: 0
This is thread: 8. 33 Value of n is changed to: 0
This is thread: 2. 33 Value of n is changed to: 0
This is thread: 5. 33 Value of n is changed to: 0
This is thread: 7. 33 Value of n is changed to: 0
This is thread: 6. 33 Value of n is changed to: 0
This is thread: 3. 33 Value of n is changed to: 0
33 Serial part n is: 0
*/