Fundamentals of Parallelism on Intel Architecture笔记
#pragma omp simd
for循环前对for循环显式simd优化
#pragma omp declare simd
函数前使函数生成simd版本
#pargma ivdep
for循环前忽略vector dependence
#pargma vector nontemporal
跳过过渡cache,直接stream到最下层cache
#include <omp.h>
int nt = omp_get_max_threads();
omp最多线程数
#pragma omp parallel private(A) share(B)
{
int C;
omp_get_thread_num();
}
omp多线程运行
每个thread有独立的A变量,B变量在所有thread间share
每个thread有独立C
export OMP_NUM_THREADS=5
限制omp线程数
fork thread:
#include <pthread.h>
int pthread_create(pthread_t *tidp,const pthread_attr_t *attr,
(void*)(*start_rtn)(void*),void *arg);
fork出一个thread
-lpthread
fork process:
pid = fork();
parent进程pid = 0,child进程pid!=0