Parallel PrefixSumExecutor: 0.016s
Serialize PrefixSum: 0.0011s
import taichi as ti
ti.init(arch=ti.gpu)
num=120000
a=ti.field(int, shape=num)
@ti.kernel
def k():
for i in range(num):
a[i] = i
@ti.kernel
def prefix_sum(a: ti.template()):
ti.loop_config(serialize=True)
for i in range(1,num):
a[i]=a[i]+a[i-1]
k()
pse = ti.algorithms.PrefixSumExecutor(num)
pse.run(a)
k()
prefix_sum(a)
import time
k()
start = time.time()
s=0
while s<1e2:
pse.run(a)
s+=1
end = time.time()
print(end-start)
k()
start = time.time()
s=0
while s<1e2:
prefix_sum(a)
s+=1
end = time.time()
print(end-start)