OpenMP并行编程入门指南

2023-02-27 17:10:31 浏览数 (1)

openMP进行多线程编程

在C 中使用openmp进行多线程编程 - DWVictor - 博客园 (cnblogs.com)

openmp是由一系列#paragma指令组成,这些指令控制如何多线程的执行程序。另外,即使编译器不支持omp,程序也也能够正常运行,只是程序不会多线程并行运行。

#pragma omp parallel创建线程

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

int main()
{   
    //开启 4 个线程同时执行{}
    //线程数目不能超过CPU核数
    omp_set_num_threads(4);
    #pragma omp parallel
    {
        printf("Hello World!, thread: %dn",omp_get_thread_num());
    }

    return 0;
}

代码语言:javascript复制
#include <stdio.h>
#include <omp.h>

int main()
{
    printf("serial part, total number of threads: %dnn", omp_get_num_threads());//1

    #pragma omp parallel //12个线程
    {
        printf("Hello World, total number of threads: %dn", omp_get_num_threads());
    }

    return 0;
}

#pragma omp parallel for循环

后面是for循环,表示接下来的for循环将被多线程执行,另外每次循环之间不能有关系,for循环里的内容必须满足可以并行执行,即每次循环互不相干,后一次循环不依赖于前面的循环。

代码语言:javascript复制
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>

#include <sys/time.h>
#include <time.h>

double omp_get_time(void)
{
    struct timeval tv;
    double t;

    gettimeofday(&tv, (struct timezone *)0);
    t = tv.tv_sec   (double)tv.tv_usec * 1e-6;

    return t;
}

int main(int argc, char **argv)
{
    int i, len = 10000000;
    double *x, *y, *z;
    double va_tm;
    double t = 0.;

    if (argc == 2) {
        int tlen = atoi(argv[1]);

        if (tlen > 0) len = tlen;
    }

    /* malloc memory, no check */
    x =(double*) malloc(sizeof(*x) * len);
    y =(double*) malloc(sizeof(*y) * len);
    z =(double*) malloc(sizeof(*z) * len);

    #pragma omp parallel for
    for (i = 0; i < len; i  ) {
        x[i] = i   0.3;
        y[i] = i   M_PI;
    }

    va_tm = omp_get_time();
    for (i = 0; i < len; i  ) t  = x[i];
    va_tm = omp_get_time() - va_tm;
    printf("Serial result: %g, time: %g sn", t, va_tm);

    va_tm = omp_get_time();

    #pragma omp parallel for
    for (i = 0; i < len; i  ) {
        z[i] = x[i]   y[i];
    }

    va_tm = omp_get_time() - va_tm;
    printf("Serial result: %g, OMP time: %g sn", z[len-1],va_tm);

    free(x);
    free(y);
    free(z);

    return 0;
}

#pragma omp parallel num_threads 指定线程数

代码语言:javascript复制
#include <stdio.h>

int main()
{
    #pragma omp parallel num_threads(4)
    {
        printf("Hello World!n");
    }

    return 0;
}

代码语言:javascript复制
#include <stdio.h>
#include <omp.h>

int main()
{
    omp_set_num_threads(3);
    //这样后面生效,6个线程
    #pragma omp parallel num_threads(6)
    {
        printf("Hello World!n");
    }

    return 0;
}

#pragma omp sections指定块

section指令用于指定哪些程序块可以并行运行。一个section块内的代码必须串行运行,而section块之间是可以并行运行的。指定代码分块,每个分块开一个线程去执行

代码语言:javascript复制
#include <stdio.h>
#include <omp.h>

int main()
{
    #pragma omp parallel
    #pragma omp sections
    {
        #pragma omp section
        printf("Hello, id: %dn", omp_get_thread_num());

        #pragma omp section
        printf("Hi, id: %dn", omp_get_thread_num());

        #pragma omp section
        printf("Nihao, id: %dn", omp_get_thread_num());

        #pragma omp section
        printf("Bonjour, id: %dn", omp_get_thread_num());

        #pragma omp section
        printf("Kon'nichiwa, id: %dn", omp_get_thread_num());
    }

    return 0;
}

/*
Kon'nichiwa, id: 7
Hello, id: 1
Hi, id: 8
Bonjour, id: 4
Nihao, id: 0
*/

#pragma omp single只执行一次

用于定义一个代码段,该代码段只应由团队中的任何一个线程执行一次。选择的线程可以是主线程以外的线程。

single指令相关的代码块只运行一个线程执行,但并不限定具体哪一个线程来执行,其它线程必须跳过这个代码块,并在代码块后wait,直到执行这段代码的线程完成。

代码语言:javascript复制
#include <stdio.h>
#include <omp.h>

int main()
{
#pragma omp parallel
    {
        printf("Hello——, id: %dn", omp_get_thread_num());

#pragma omp single
        {
            printf("Hello, id: %dn", omp_get_thread_num());
        }
    }

    return 0;
}

/*
Hello——, id: 1
Hello, id: 1
Hello——, id: 7
Hello——, id: 2
Hello——, id: 10
Hello——, id: 11
Hello——, id: 0
Hello——, id: 3
Hello——, id: 9
Hello——, id: 6
Hello——, id: 4
Hello——, id: 8
Hello——, id: 5

*/
代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

void task_func(int id, int v)
{
    int eid = omp_get_thread_num();

    printf("generated by id: %d, executed by id: %d, i: %dn", id, eid, v);
}

int main()
{
    #pragma omp parallel
    {
        int id;
        int i;

        id = omp_get_thread_num();

        #pragma omp single
        {
            for (i = 0; i < 200; i  ) {
                #pragma omp task untied
                task_func(id, i);
            }
        }
    }

    return 0;
}

#pragma omp xxx reduction组合

(125条消息) openmp 快速入门 常用技巧 parallel for sections reduction critical_billbliss的博客-CSDN博客_crtticalbliss

Reduction`操作类似于我们将很多东西组合组合在一起。

reduction(operator : list)

其中operator指操作符,list表示操作符要作用的列表,通常是一个共享变量名,之所以称之为列表是因为线程组中的每个线程都有一份变量的拷贝,reduction即负责用给定的操作符将这些拷贝的局部变量的值进行聚合,并设置回共享变量。

代码语言:javascript复制
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>

#define abs(x)   ((x) < 0 ? -(x) : (x))

int main()
{
    int i;
    int result = 0;

    int N = 20000;
    int data[20000];

    /* seed */
    srand(0);

    /* init */
    for (i = 0; i < N; i  ) data[i] = ((i % 2) * 2 - 1) * rand();

    #pragma omp declare reduction(maxabs : int :              
        omp_out = abs(omp_in) < abs(omp_out) ? omp_out : omp_in)
        initializer (omp_priv=0)

    #pragma omp parallel for reduction(maxabs:result)
    for (i = 0; i < N; i  ) {
        if (abs(data[i]) > abs(result)) {
            result = data[i];
        }
    }

    printf("result: %dn", result);

    return 0;
}

#pragma omp master主线程执行

(125条消息) OpenMP编程(5)—同步结构(master、critical、barrier、atomic、flush、ordered)_常思大妹子的博客-CSDN博客_openmp 同步

学习openmp-master - 芒果的博客 - 芒果的个人博客 (mangoroom.cn)

master指令则指定其相关的代码块必须在主线程中执行,且其它线程不必在代码块后阻塞。

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

void task_func(int id, int v)
{
    int eid = omp_get_thread_num();

    printf("generated by id: %d, executed by id: %d, i: %dn", id, eid, v);
}

int main()
{
    #pragma omp parallel
    {
        int id;
        int i;

        id = omp_get_thread_num();

        #pragma omp master
        for (i = 0; i < 200; i  ) {
            #pragma omp task untied
            task_func(id, i);
        }
    }

    return 0;
}

#pragma omp task任务划分

(125条消息) OpenMP3.0的新特性Task指令基础_Augusdi的博客-CSDN博客

task解决for和sections指令的”缺陷“:无法根据运行时的环境动态的进行任务划分,必须是预先能知道的任务划分的情况。

task是“动态”定义任务的,在运行过程中,只需要使用task就会定义一个任务,任务就会在一个线程上去执行,那么其它的任务就可以并行的执行。可能某一个任务执行了一半的时候,或者甚至要执行完的时候,程序可以去创建第二个任务,任务在一个线程上去执行,一个动态的过程,不像sections和for那样,在运行之前,已经可以判断出可以如何去分配任务。而且,task是可以进行嵌套定义的,可以用于递归的情况等等

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

void task_func(int id, int v)
{
    int eid = omp_get_thread_num();

    printf("generated by id: %d, executed by id: %d, i: %dn", id, eid, v);
}

int main()
{
    #pragma omp parallel
    {
        int id;
        int i;

        id = omp_get_thread_num();

        for (i = 0; i < 20; i  ) {
            #pragma omp task
            task_func(id, i);
        }
    }

    return 0;
}

#pragma omp parallel (默认shared)

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

int main()
{
    int n = 20;

    #pragma omp parallel
    {
        int id;

        id = omp_get_thread_num();

        printf("This is thread: %d. I can see shared variable n: %dn", id, n);
    }

    printf("nnn");

    #pragma omp parallel
    {
        int id;

        id = omp_get_thread_num();

        /* error */
        n = id;
        printf("This is thread: %d. I change n to my ID: %dn", id, n);
    }

    printf("nData race happened and final n is: %dn", n);

    return 0;
}

/*
This is thread: 6. I can see shared variable n: 20
This is thread: 1. I can see shared variable n: 20
This is thread: 5. I can see shared variable n: 20
This is thread: 11. I can see shared variable n: 20
This is thread: 9. I can see shared variable n: 20
This is thread: 0. I can see shared variable n: 20
This is thread: 3. I can see shared variable n: 20
This is thread: 8. I can see shared variable n: 20
This is thread: 10. I can see shared variable n: 20
This is thread: 2. I can see shared variable n: 20
This is thread: 7. I can see shared variable n: 20
This is thread: 4. I can see shared variable n: 20



This is thread: 7. I change n to my ID: 7
This is thread: 8. I change n to my ID: 8
This is thread: 5. I change n to my ID: 5
This is thread: 0. I change n to my ID: 0
This is thread: 10. I change n to my ID: 10
This is thread: 2. I change n to my ID: 2
This is thread: 11. I change n to my ID: 11
This is thread: 9. I change n to my ID: 9
This is thread: 6. I change n to my ID: 6
This is thread: 1. I change n to my ID: 1
This is thread: 3. I change n to my ID: 3
This is thread: 4. I change n to my ID: 4

Data race happened and final n is: 3



*/

#pragma omp parallel private(n)

用于控制变量在线程组中多个线程之间的共享方式。其中private,firstprivate,lastprivate表示变量的共享方式是私有的,即每个线程都有一份自己的拷贝;而shared表示线程组的线程访问的是同一个变量。

私有变量共享方式有三种指令,它们的区别在于:

private:每个线程都有一份自己的拷贝,但是这些变量并没有拷贝值,即如果变量是int,long,double等这些内置类型,那么这些变量在进入线程时时未初始化状态的;如果变量是类的实例对象,那么在线程中变量是通过默认构造得到的对象,假设类没有默认构造,则编译会报错,告诉你类没有可用的默认构造;

firstPrivate:每个线程有一份自己的拷贝,每个线程都会通过复制一份。如果变量是int,long,double等内置类型则直接复制,如果为类的实例对象,则会调用示例对象的拷贝构造函数,这就意味着,假如类是的拷贝构造不可访问,则变量不能够使用firstprivate方式共享;

lastprivate:变量在每个线程的共享方式与private一致,但不同的是,变量的最后一次迭代中的值会flush主线程中的变量中。最后一次迭代的意思是,如果是for循环,则主线程的变量的值是最后一个迭代值那次迭代中赋的值;如果是section,则主线程的变量最终的值是最后一个section中赋的值。要注意的是,最终主线程的中变量的值并非通过拷贝构造赋值的,而是通过operator=操作符,所以如果类的赋值操作符不可访问,那么变量不能采用lastprivate方式共享。

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

int main()
{
    int n = 20;

    #pragma omp parallel private(n)
    {
        int id;

        id = omp_get_thread_num();

        /* n is private and NOT initialized.
         * Compiler won't be happy */
        printf("This is thread: %d. Initial value of n is: %dn", id, n);

        n = -id;
        printf("This is thread: %d. Value of n is: %dn", id, n);
    }

    printf("nnn");
    printf("Final n is: %dn", n);

    return 0;
}

/*
This is thread: 0. Initial value of n is: 0
This is thread: 0. Value of n is: 0
This is thread: 7. Initial value of n is: 0
This is thread: 7. Value of n is: -7
This is thread: 3. Initial value of n is: 0
This is thread: 3. Value of n is: -3
This is thread: 8. Initial value of n is: 0
This is thread: 8. Value of n is: -8
This is thread: 11. Initial value of n is: 0
This is thread: 11. Value of n is: -11
This is thread: 6. Initial value of n is: 0
This is thread: 6. Value of n is: -6
This is thread: 1. Initial value of n is: 0
This is thread: 1. Value of n is: -1
This is thread: 2. Initial value of n is: 0
This is thread: 2. Value of n is: -2
This is thread: 4. Initial value of n is: 0
This is thread: 4. Value of n is: -4
This is thread: 5. Initial value of n is: 0
This is thread: 5. Value of n is: -5
This is thread: 9. Initial value of n is: 0
This is thread: 9. Value of n is: -9
This is thread: 10. Initial value of n is: 0
This is thread: 10. Value of n is: -10



Final n is: 20


*/

#pragma omp parallel firstprivate(n)

firstPrivate:每个线程有一份自己的拷贝,每个线程都会通过复制一份。如果变量是int,long,double等内置类型则直接复制,如果为类的实例对象,则会调用示例对象的拷贝构造函数,这就意味着,假如类是的拷贝构造不可访问,则变量不能够使用firstprivate方式共享;

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

int main()
{
    int n = 20;

    #pragma omp parallel firstprivate(n)
    {
        int id;

        id = omp_get_thread_num();

        /* n is firstprivate */
        printf("This is thread: %d. Initial value of n is: %dn", id, n);

        n = -id;
        printf("This is thread: %d. Value of n is changed to: %dn", id, n);
    }

    printf("nnn");
    printf("Final n is: %dn", n);

    return 0;
}

/*

This is thread: 11. Initial value of n is: 20
This is thread: 11. Value of n is changed to: -11
This is thread: 10. Initial value of n is: 20
This is thread: 10. Value of n is changed to: -10
This is thread: 1. Initial value of n is: 20
This is thread: 1. Value of n is changed to: -1
This is thread: 2. Initial value of n is: 20
This is thread: 2. Value of n is changed to: -2
This is thread: 9. Initial value of n is: 20
This is thread: 9. Value of n is changed to: -9
This is thread: 6. Initial value of n is: 20
This is thread: 6. Value of n is changed to: -6
This is thread: 4. Initial value of n is: 20
This is thread: 4. Value of n is changed to: -4
This is thread: 8. Initial value of n is: 20
This is thread: 8. Value of n is changed to: -8
This is thread: 5. Initial value of n is: 20
This is thread: 5. Value of n is changed to: -5
This is thread: 0. Initial value of n is: 20
This is thread: 0. Value of n is changed to: 0
This is thread: 3. Initial value of n is: 20
This is thread: 3. Value of n is changed to: -3
This is thread: 7. Initial value of n is: 20
This is thread: 7. Value of n is changed to: -7



Final n is: 20

*/

#pragma omp parallel for lastprivate(n)

lastprivate:变量在每个线程的共享方式与private一致,但不同的是,变量的最后一次迭代中的值会flush主线程中的变量中。最后一次迭代的意思是,如果是for循环,则主线程的变量的值是最后一个迭代值那次迭代中赋的值;如果是section,则主线程的变量最终的值是最后一个section中赋的值。要注意的是,最终主线程的中变量的值并非通过拷贝构造赋值的,而是通过operator=操作符,所以如果类的赋值操作符不可访问,那么变量不能采用lastprivate方式共享。

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

int main()
{
    int n = 0;
    int i;

    #pragma omp parallel for lastprivate(n)
    for (i = 0; i < 20; i  ) {
        int id;

        id = omp_get_thread_num();

        n = -i;
        printf("This is thread: %d. Value of n is changed to: %dn", id, n);
    }

    printf("nnn");
    printf("Final n is: %dn", n);

    return 0;
}

/*
This is thread: 9. Value of n is changed to: -17
This is thread: 10. Value of n is changed to: -18
This is thread: 5. Value of n is changed to: -10
This is thread: 5. Value of n is changed to: -11
This is thread: 3. Value of n is changed to: -6
This is thread: 3. Value of n is changed to: -7
This is thread: 2. Value of n is changed to: -4
This is thread: 2. Value of n is changed to: -5
This is thread: 4. Value of n is changed to: -8
This is thread: 4. Value of n is changed to: -9
This is thread: 0. Value of n is changed to: 0
This is thread: 0. Value of n is changed to: -1
This is thread: 6. Value of n is changed to: -12
This is thread: 6. Value of n is changed to: -13
This is thread: 8. Value of n is changed to: -16
This is thread: 11. Value of n is changed to: -19
This is thread: 7. Value of n is changed to: -14
This is thread: 7. Value of n is changed to: -15
This is thread: 1. Value of n is changed to: -2
This is thread: 1. Value of n is changed to: -3



Final n is: -19

*/

#pragma omp threadprivate(n)

程私有数据——线程私有数据与threadprivate和copyin子句:使用threadprivate子句用来标明 某一个变量是线程私有数据,在程序运行的过程中,不能够被其他线程访问到。使用copyin对线程私有的全局变量进行初始化。

代码语言:javascript复制
#include <omp.h>
#include <stdio.h>

int n = 20;

#pragma omp threadprivate(n)

int main()
{
    printf("n: %dn", n);

    #pragma omp parallel
    {
        int id;

        id = omp_get_thread_num();

        n = id * 8;

        printf("This is thread: %d. 11 Value of n is changed to: %dn", id, n);
    }

    printf("11 Serial part n is: %dnn", n);

    #pragma omp parallel
    {
        int id;

        id = omp_get_thread_num();

        printf("This is thread: %d. 22 Value of n is changed to: %dn", id, n);
    }

    printf("22 Serial part n is: %dnn", n);

    printf("copyinn");

    #pragma omp parallel copyin(n)
    {
        int id;

        id = omp_get_thread_num();

        printf("This is thread: %d. 33 Value of n is changed to: %dn", id, n);
    }

    printf("33 Serial part n is: %dnn", n);

    return 0;
}

/*

n: 20
This is thread: 0. 11 Value of n is changed to: 0
This is thread: 9. 11 Value of n is changed to: 72
This is thread: 4. 11 Value of n is changed to: 32
This is thread: 7. 11 Value of n is changed to: 56
This is thread: 8. 11 Value of n is changed to: 64
This is thread: 6. 11 Value of n is changed to: 48
This is thread: 2. 11 Value of n is changed to: 16
This is thread: 5. 11 Value of n is changed to: 40
This is thread: 10. 11 Value of n is changed to: 80
This is thread: 3. 11 Value of n is changed to: 24
This is thread: 11. 11 Value of n is changed to: 88
This is thread: 1. 11 Value of n is changed to: 8
11 Serial part n is: 0

This is thread: 0. 22 Value of n is changed to: 0
This is thread: 10. 22 Value of n is changed to: 80
This is thread: 4. 22 Value of n is changed to: 32
This is thread: 2. 22 Value of n is changed to: 16
This is thread: 5. 22 Value of n is changed to: 40
This is thread: 8. 22 Value of n is changed to: 64
This is thread: 7. 22 Value of n is changed to: 56
This is thread: 11. 22 Value of n is changed to: 88
This is thread: 6. 22 Value of n is changed to: 48
This is thread: 3. 22 Value of n is changed to: 24
This is thread: 1. 22 Value of n is changed to: 8
This is thread: 9. 22 Value of n is changed to: 72
22 Serial part n is: 0

copyin
This is thread: 0. 33 Value of n is changed to: 0
This is thread: 11. 33 Value of n is changed to: 0
This is thread: 4. 33 Value of n is changed to: 0
This is thread: 10. 33 Value of n is changed to: 0
This is thread: 1. 33 Value of n is changed to: 0
This is thread: 9. 33 Value of n is changed to: 0
This is thread: 8. 33 Value of n is changed to: 0
This is thread: 2. 33 Value of n is changed to: 0
This is thread: 5. 33 Value of n is changed to: 0
This is thread: 7. 33 Value of n is changed to: 0
This is thread: 6. 33 Value of n is changed to: 0
This is thread: 3. 33 Value of n is changed to: 0
33 Serial part n is: 0


*/

0 人点赞