prim.c |
n2=n*n; /* initialize the table. */ for(i=0; i < n2; i++) tab[i] = 0; /* main loop */ for(i=2; i < n; i++) if(tab[i] == 0) for(k=i*i; k < n2; k+=i) tab[k] = 1; |
補足:
ここで示している例では,整数オーバフローの問題が生じる可能性があるが,
これを厳密には防いでいないので注意してほしい.脆弱性をもたないように
するには,例えば,加算(k+=j
)
や乗算(i*i
やn2*(myid+1)
など)で,
整数オーバフローが生じないようにする必要がある.
n2*(myid+1)については,
i0=n2*myid/P; i1=n2*(myid+1)/P;
のような分担の決め方ではなく,
乗算を先にしない,
i0=(n2/P)*myid, i1=(myid<P-1)?(n2/P)*(myid+1):n2
のようにする方法を使うことが多い.
ただし,最後(myid=P-1
)の範囲に「余り」が集中するので,
負荷の均一化が不十分となる可能性がある.
→ prim-p.c - 高速化 → prim-p2.c → prim2-p.c / / パイプライン型並列化(1) パイプライン型並列化(2) / / prim.c ------ 高速化(1)---------------→ prim2.c | | 独立部分を増やす変形 独立部分を増やす変形 | | V V prim-i.c -----高速化------→ prim-i2.c prim2-i.c | | | 分割型並列化 分割型並列化 分割型並列化 | | | V V V prim-ip-1.c prim-i2p.c prim2-ip.c (同期過多) prim-ip.c -高速化(2)→ prim-ip2.c
prim.c | prim2.c |
n2=n*n; /* initialize the table. */ for(i=0; i < n2; i++) tab[i] = 0; /* main loop */ for(i=2; i < n; i++) if(tab[i] == 0) for(k=i*i; k < n2; k+=i) tab[k] = 1; |
n2=n*n; tab[2] = tab[3] = 0; for(k0=4,k1=MIN(k0+BSIZE,n2); k0 < n2; k0=k1,k1=MIN(k0+BSIZE,n2)){ for(i=k0; i < k1; i++) tab[i] = 0; for(i=2; i < n; i++) if(tab[i] == 0) if(i*i > k1) break; else for(k=MAX(i*i,k0),k+=i-1,k-=k%i; k < k1; k+=i) tab[k] = 1; } |
prim.c | prim-i.c |
n2=n*n; /* initialize the table. */ for(i=0; i < n2; i++) tab[i] = 0; /* main loop */ for(i=2; i < n; i++) if(tab[i] == 0) for(k=i*i; k < n2; k+=i) tab[k] = 1; |
n2=n*n /* initialize the table. */ for(i=0; i < n2; i++) tab[i] = 0; /* main loop {i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */ for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n)) /* sieve (write) tab[i*i .. (n2-1)] by reading tab[i .. (i*i-1)]. */ for(j=i; j < i2; j++) if(tab[j] == 0) for(k=j*j; k < n2; k+=j) tab[k] = 1; |
prim-i.c | prim-ip.c |
n2=n*n; /* initialize the table. */ for(i=0; i < n2; i++) tab[i] = 0; /* main loop {i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */ for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n)) /* sieve (write) tab[i*i .. (n2-1)] by reading tab[i .. (i*i-1)]. */ for(j=i; j < i2; j++) if(tab[j] == 0) for(k=j*j; k < n2; k+=j) tab[k] = 1; |
n2=n*n; /* initialize the table. */ i0=n2*myid/P; i1=n2*(myid+1)/P; for(i=i0; i < i1; i++) tab[i] = 0; barrier_sync(P, myid); /* main loop {i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */ for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n)) /* sieve (write) tab[i*i .. (n2-1)] by reading tab[i .. (i*i-1)]. */ { k0 = i*i + (n2-i*i)*myid/P; k1 = i*i + (n2-i*i)*(myid+1)/P; for(j=i; j < i2; j++) if(tab[j] == 0) if(j*j > k1) break; else for(k=MAX(j*j,k0),k+=j-1,k-=k%j; k < k1; k+=j) tab[k] = 1; barrier_sync(P, myid); } |
prim-ip.c | prim-ip2.c |
n2=n*n; /* initialize the table. */ i0=n2*myid/P; i1=n2*(myid+1)/P; for(i=i0; i < i1; i++) tab[i] = 0; barrier_sync(P, myid); /* main loop {i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */ for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n)) /* sieve (write) tab[i*i .. (n2-1)] by reading tab[i .. (i*i-1)]. */ { k0 = i*i + (n2-i*i)*myid/P; k1 = i*i + (n2-i*i)*(myid+1)/P; for(j=i; j < i2; j++) if(tab[j] == 0) if(j*j > k1) break; else for(k=MAX(j*j,k0),k+=j-1,k-=k%j; k < k1; k+=j) tab[k] = 1; barrier_sync(P, myid); } |
n2=n*n; /* initialize the table. */ i0=n2*myid/P; i1=n2*(myid+1)/P; for(i=i0; i < i1; i++) tab[i] = 0; barrier_sync(P, myid); /* main loop {i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */ for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n)) /* sieve (write) tab[i*i .. (n2-1)] by reading tab[i .. (i*i-1)]. */ { k0 = i*i + (n2-i*i)*myid/P; k1 = i*i + (n2-i*i)*(myid+1)/P; for(k0a=k0,k1a=MIN(k0a+BSIZE,k1); k0a < k1; k0a=k1a,k1a=MIN(k0a+BSIZE,k1)) for(j=i; j < i2; j++) if(tab[j] == 0) if(j*j > k1a) break; else for(k=MAX(j*j,k0a),k+=j-1,k-=k%j; k < k1a; k+=j) tab[k] = 1; barrier_sync(P, myid); } |
prim.c | prim-p.c |
n2=n*n; /* initialize the table. */ for(i=0; i < n2; i++) tab[i] = 0; /* main loop */ for(i=2; i < n; i++) if(tab[i] == 0) for(k=i*i; k < n2; k+=i) tab[k] = 1; |
int n2=n*n; i0=n2*myid/P; i1=n2*(myid+1)/P; /* initialize the table. */ for(i=i0; i < i1; i++) tab[i] = 0; maxnewp = MIN(i1, n); prg = 4; /* main loop */ for(i=2; i < n; i++){ if(! (i < prg)) prg = wait_progress2(i); if(tab[i] == 0){ i2=i*i; newp = MIN(i2, maxnewp); if(newp > i0) prg = wait_and_signal_progress2(newp, i0); if(i2 >= i1) break; for(k=MAX(i2,i0),k+=i-1,k-=k%i; k < i1; k+=i) tab[k] = 1; } } if(maxnewp > i0) prg = wait_and_signal_progress2(maxnewp, i0); barrier_sync(P, myid); |
prim2.c | prim2-p.c |
n2=n*n; tab[2] = tab[3] = 0; for(k0=4,k1=MIN(k0+BSIZE,n2); k0 < n2; k0=k1,k1=MIN(k0+BSIZE,n2)){ for(i=k0; i < k1; i++) tab[i] = 0; for(i=2; i < n; i++) if(tab[i] == 0) if(i*i > k1) break; else for(k=MAX(i*i,k0),k+=i-1,k-=k%i; k < k1; k+=i) tab[k] = 1; } |
n2=n*n; tab[2] = tab[3] = 0; prg = 4; for(k0=4,k1=MIN(k0+BSIZE*P,n2); k0 < n2; k0=k1,k1=MIN(k0+BSIZE*P,n2)){ k0a = k0 + (k1-k0)*myid/P; k1a = k0 + (k1-k0)*(myid+1)/P; /* initialize the table. */ for(i=k0a; i < k1a; i++) tab[i] = 0; maxnewp = MIN(k1a, n); /* main loop */ for(i=2; i < n; i++){ if(! (i < prg)) prg = wait_progress2(i); if(tab[i] == 0){ i2 = i*i; newp = MIN(i2, maxnewp); if(newp > k0a && newp > prg) prg = wait_and_signal_progress2(newp, k0a); if(i2 > k1a) break; for(k=MAX(i2,k0a),k+=i-1,k-=k%i; k < k1a; k+=i) tab[k] = 1; } } if(maxnewp > k0a) prg = wait_and_signal_progress2(maxnewp, k0a); } barrier_sync(P, myid); |