| prim.c |
n2=n*n;
/* initialize the table. */
for(i=0; i < n2; i++) tab[i] = 0;
/* main loop */
for(i=2; i < n; i++)
if(tab[i] == 0)
for(k=i*i; k < n2; k+=i)
tab[k] = 1;
|
→ prim-p.c - 高速化 → prim-p2.c → prim2-p.c
/ /
パイプライン型並列化(1) パイプライン型並列化(2)
/ /
prim.c ------ 高速化(1)---------------→ prim2.c
| |
独立部分を増やす変形 独立部分を増やす変形
| |
V V
prim-i.c -----高速化------→ prim-i2.c prim2-i.c
| | |
分割型並列化 分割型並列化 分割型並列化
| | |
V V V
prim-ip-1.c prim-i2p.c prim2-ip.c (同期過多)
prim-ip.c -高速化(2)→ prim-ip2.c
| prim.c | prim2.c |
n2=n*n;
/* initialize the table. */
for(i=0; i < n2; i++) tab[i] = 0;
/* main loop */
for(i=2; i < n; i++)
if(tab[i] == 0)
for(k=i*i; k < n2; k+=i)
tab[k] = 1;
|
n2=n*n;
tab[2] = tab[3] = 0;
for(k0=4,k1=MIN(k0+BSIZE,n2); k0 < n2; k0=k1,k1=MIN(k0+BSIZE,n2)){
for(i=k0; i < k1; i++)
tab[i] = 0;
for(i=2; i < n; i++)
if(tab[i] == 0)
if(i*i > k1)
break;
else
for(k=MAX(i*i,k0),k+=i-1,k-=k%i; k < k1; k+=i)
tab[k] = 1;
}
|
| prim.c | prim-i.c |
n2=n*n;
/* initialize the table. */
for(i=0; i < n2; i++) tab[i] = 0;
/* main loop */
for(i=2; i < n; i++)
if(tab[i] == 0)
for(k=i*i; k < n2; k+=i)
tab[k] = 1;
|
n2=n*n
/* initialize the table. */
for(i=0; i < n2; i++) tab[i] = 0;
/* main loop
{i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */
for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n))
/* sieve (write) tab[i*i .. (n2-1)]
by reading tab[i .. (i*i-1)]. */
for(j=i; j < i2; j++)
if(tab[j] == 0)
for(k=j*j; k < n2; k+=j)
tab[k] = 1;
|
| prim-i.c | prim-ip.c |
n2=n*n;
/* initialize the table. */
for(i=0; i < n2; i++) tab[i] = 0;
/* main loop
{i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */
for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n))
/* sieve (write) tab[i*i .. (n2-1)]
by reading tab[i .. (i*i-1)]. */
for(j=i; j < i2; j++)
if(tab[j] == 0)
for(k=j*j; k < n2; k+=j)
tab[k] = 1;
|
n2=n*n;
/* initialize the table. */
i0=n2*myid/P; i1=n2*(myid+1)/P;
for(i=i0; i < i1; i++) tab[i] = 0;
barrier_sync(P, myid);
/* main loop
{i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */
for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n))
/* sieve (write) tab[i*i .. (n2-1)]
by reading tab[i .. (i*i-1)]. */
{
k0 = i*i + (n2-i*i)*myid/P;
k1 = i*i + (n2-i*i)*(myid+1)/P;
for(j=i; j < i2; j++)
if(tab[j] == 0)
if(j*j > k1)
break;
else
for(k=MAX(j*j,k0),k+=j-1,k-=k%j; k < k1; k+=j)
tab[k] = 1;
barrier_sync(P, myid);
}
|
| prim-ip.c | prim-ip2.c |
n2=n*n;
/* initialize the table. */
i0=n2*myid/P; i1=n2*(myid+1)/P;
for(i=i0; i < i1; i++) tab[i] = 0;
barrier_sync(P, myid);
/* main loop
{i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */
for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n))
/* sieve (write) tab[i*i .. (n2-1)]
by reading tab[i .. (i*i-1)]. */
{
k0 = i*i + (n2-i*i)*myid/P;
k1 = i*i + (n2-i*i)*(myid+1)/P;
for(j=i; j < i2; j++)
if(tab[j] == 0)
if(j*j > k1)
break;
else
for(k=MAX(j*j,k0),k+=j-1,k-=k%j; k < k1; k+=j)
tab[k] = 1;
barrier_sync(P, myid);
}
|
n2=n*n;
/* initialize the table. */
i0=n2*myid/P; i1=n2*(myid+1)/P;
for(i=i0; i < i1; i++) tab[i] = 0;
barrier_sync(P, myid);
/* main loop
{i=2,i2=4}→{i=4,i2=16}→{i=16,i2=256}→... */
for(i=2,i2=MIN(4,n); i < n; i=i2,i2=MIN(i*i,n))
/* sieve (write) tab[i*i .. (n2-1)]
by reading tab[i .. (i*i-1)]. */
{
k0 = i*i + (n2-i*i)*myid/P;
k1 = i*i + (n2-i*i)*(myid+1)/P;
for(k0a=k0,k1a=MIN(k0a+BSIZE,k1); k0a < k1;
k0a=k1a,k1a=MIN(k0a+BSIZE,k1))
for(j=i; j < i2; j++)
if(tab[j] == 0)
if(j*j > k1a)
break;
else
for(k=MAX(j*j,k0a),k+=j-1,k-=k%j; k < k1a; k+=j)
tab[k] = 1;
barrier_sync(P, myid);
}
|
| prim.c | prim-p.c |
n2=n*n;
/* initialize the table. */
for(i=0; i < n2; i++) tab[i] = 0;
/* main loop */
for(i=2; i < n; i++)
if(tab[i] == 0)
for(k=i*i; k < n2; k+=i)
tab[k] = 1;
|
int n2=n*n;
i0=n2*myid/P; i1=n2*(myid+1)/P;
/* initialize the table. */
for(i=i0; i < i1; i++) tab[i] = 0;
maxnewp = MIN(i1, n); prg = 4;
/* main loop */
for(i=2; i < n; i++){
if(! (i < prg))
prg = wait_progress2(i);
if(tab[i] == 0){
i2=i*i;
newp = MIN(i2, maxnewp);
if(newp > i0)
prg = wait_and_signal_progress2(newp, i0);
if(i2 >= i1) break;
for(k=MAX(i2,i0),k+=i-1,k-=k%i; k < i1; k+=i)
tab[k] = 1;
}
}
if(maxnewp > i0)
prg = wait_and_signal_progress2(maxnewp, i0);
barrier_sync(P, myid);
|
| prim2.c | prim2-p.c |
n2=n*n;
tab[2] = tab[3] = 0;
for(k0=4,k1=MIN(k0+BSIZE,n2); k0 < n2;
k0=k1,k1=MIN(k0+BSIZE,n2)){
for(i=k0; i < k1; i++)
tab[i] = 0;
for(i=2; i < n; i++)
if(tab[i] == 0)
if(i*i > k1)
break;
else
for(k=MAX(i*i,k0),k+=i-1,k-=k%i; k < k1; k+=i)
tab[k] = 1;
}
|
n2=n*n;
tab[2] = tab[3] = 0;
prg = 4;
for(k0=4,k1=MIN(k0+BSIZE*P,n2); k0 < n2;
k0=k1,k1=MIN(k0+BSIZE*P,n2)){
k0a = k0 + (k1-k0)*myid/P;
k1a = k0 + (k1-k0)*(myid+1)/P;
/* initialize the table. */
for(i=k0a; i < k1a; i++)
tab[i] = 0;
maxnewp = MIN(k1a, n);
/* main loop */
for(i=2; i < n; i++){
if(! (i < prg))
prg = wait_progress2(i);
if(tab[i] == 0){
i2 = i*i;
newp = MIN(i2, maxnewp);
if(newp > k0a && newp > prg)
prg = wait_and_signal_progress2(newp, k0a);
if(i2 > k1a) break;
for(k=MAX(i2,k0a),k+=i-1,k-=k%i; k < k1a; k+=i)
tab[k] = 1;
}
}
if(maxnewp > k0a)
prg = wait_and_signal_progress2(maxnewp, k0a);
}
barrier_sync(P, myid);
|