大家好!
- 问题1: 在ti.metal与ti.cpu两种条件下运行相同代码所得的计算结果不同
##参照同一个程序用ti.cpu与ti.cuda计算结果不同
##以及使用ti.gpu比ti.cpu运行速度更慢
及其中的链接手把手教你用 Taichi 做高性能并行运算(0) - 知乎
我在macbookpro(2017, intel)中使用上述知乎链接的代码分别在ti.metal与ti.cpu的条件下运行:
import time
import taichi as ti
ti.init(arch=ti.metal)
@ti.kernel
def calc_pi() -> ti.f32:
sum = 0.0
for i in range(10**6):
n = 2 * i + 1
sum += pow(-1.0, i) / n
return sum * 4
t0 = time.time()
print('PI =', calc_pi())
t1 = time.time()
print(f'{t1 - t0:.3} sec')
得到结果:
[Taichi] Starting on arch=metal
PI = 31.55793571472168
0.487 sec
将ti.metal改为ti.cpu后:
import time
import taichi as ti
ti.init(arch=ti.cpu)
@ti.kernel
def calc_pi() -> ti.f32:
sum = 0.0
for i in range(10**6):
n = 2 * i + 1
sum += pow(-1.0, i) / n
return sum * 4
t0 = time.time()
print('PI =', calc_pi())
t1 = time.time()
print(f'{t1 - t0:.3} sec')
得到结果:
[Taichi] Starting on arch=x64
PI = 3.141583204269409
0.0561 sec
可以发现,ti.metal条件下得到了明显异于ti.cpu条件下的不正确结果(圆周率pi = 3.1415…)。
- 问题2:自动并行似乎没有成功
在我自己使用Taichi编写的一个程序中,其@ti.kernel修饰的函数的伪代码如下:
伪代码:
#全局变量:
dt = 10^-8
Num_t = 10^4
d_nu = 0.1
HW = 6*10^3
pumpcenter : ti.f32
num_pumpFW = 6*10^3
N1, N2, N3, N4, dN1, dN2, dN3, dN4 : ti.field ( ti.f32, shape = 120000 )
pumparray : ti.field ( ti.f32, shape = num_pumpFW )
detune_array : ti.Vector.field( 4, ti.f32, shape = num_pumpFW )
choose_matrix : ti.Vector.field( 4, ti.i32, shape = 4 )
#主要计算模块使用 Taichi 实现:
@ti.kernel
for tn in Num_t : #变量按时间演化,该循环不能使用并行计算,ti.loop_config( serialize=True )
for i_transi0 in range(num_pumpFW) :
transi0 = pumparray[ i_transi0 ]
for j_choice in ti.static( range(4) ):
n1 = N1[ detune_array[ i_transi0 ][ j_choice ] ] # 类似得到n2, n3, n4
k1, k2, k3, k4 = k_L4RK( transi0, pumpcenter, n1, n2, n3, n4, j_choice )
dN1[ detune_array[ i_transi0 ][ j_choice ] ] += k1*dt # dN2, dN3, dN4类似操作
for i in range(120000) : # 一个dt计算结束更新N1, N2, N3, N4
N1[ i ] += dN1[ i ] # N2, N3, N4类似操作
dN1[ i ] = 0 # dN2, dN3, dN4类似操作
实际代码(全部代码在文末附录)长下面这样:
@ti.kernel
def Burning():
ti.loop_config(serialize=True)
for tn in ti.ndrange(Num_t):
for i_transi0 in ti.ndrange(num_pumpFW):
transi0 = pumparray_ti[i_transi0]
for j_choice in ti.static(range(4)):
n1 = N1_ti[detune_array_ti[i_transi0][j_choice]]
n2 = N2_ti[detune_array_ti[i_transi0][j_choice]]
n3 = N3_ti[detune_array_ti[i_transi0][j_choice]]
n4 = N4_ti[detune_array_ti[i_transi0][j_choice]]
k_N1, k_N2, k_N3, k_N4 = k_L4RK(transi0, pumpcenter_ti,
n1, n2, n3, n4, j_choice)
dN1_ti[detune_array_ti[i_transi0][j_choice]] += k_N1*dt
dN2_ti[detune_array_ti[i_transi0][j_choice]] += k_N2*dt
dN3_ti[detune_array_ti[i_transi0][j_choice]] += k_N3*dt
dN4_ti[detune_array_ti[i_transi0][j_choice]] += k_N4*dt
for i in ti.ndrange(Inh_len):
N1_ti[i] += dN1_ti[i]
N2_ti[i] += dN2_ti[i]
N3_ti[i] += dN3_ti[i]
N4_ti[i] += dN4_ti[i]
dN1_ti[i] = 0
dN2_ti[i] = 0
dN3_ti[i] = 0
dN4_ti[i] = 0
即使我声明了第一个最外层作用域的for循环1
ti.loop_config(serialize=True)
for tn in ti.ndrange(Num_t):
为串行,那么似乎更内层一点的for循环2:
for i_transi0 in ti.ndrange(num_pumpFW):
似乎应当自动并行 。但是循环2前加与不加ti.loop_config(serialize=True)
这两种情况在ti.cpu条件下得到的结果的运行时长几乎没有差异(问题1中的代码中也存在这样的情况)。且spyder中的右下角显示我的cpu利用率始终处于较低的水平,约为20-30%,没有出现对近乎一样的代码使用numba的@jit(parallel=True)
CPU并行时显示cpu利用率到达100%的情况。
进一步将上述代码在ti.metal条件下运行虽然能够得到结果,耗时明显缩短(由ti.cpu条件下的24s左右缩短为4s左右),但是得到的结果则完全与ti.cpu不一样(可以从结果中所绘制的图看出来)。
- 综上,我的主要疑问是:
- 尽管(表面上)程序一样,metal和cpu还是在计算上有所差别?(例如精度?即使都是f32的数据类型)因此会出现ti.metal与ti.cpu两种条件下计算结果不一样的情况;
- 请问我的程序中for循环2未能成功自动并行的原因是什么呢?万分感谢!呜呜呜
- 附录(代码,与Taichi有关代码是@ti.func,@ti.kernel修饰的函数部分,以及所设置的全局变量ti.field类型的数据等)
import taichi as ti
import numpy as np
import matplotlib.pyplot as plt
import scipy.constants as const
from scipy.special import wofz
from sympy.physics.quantum import TensorProduct
from numba import jit
import time
from scipy.linalg import eigh
import pandas as pd
ti.init(arch=ti.cpu)
def Voiigt(x, alpha, gamma):
sigma = alpha / np.sqrt(2 * np.log(2))
return np.real(wofz((x + 1j*gamma)/sigma/np.sqrt(2))) / sigma\
/np.sqrt(2*np.pi)
def J_z( eigenvector ):
J = eigenvector[0]
m = eigenvector[1]
eigenvalue = m
return eigenvalue, (J,m)
def J_plus( eigenvector ):
J = eigenvector[0]
m = eigenvector[1]
pseudo_eigenvalue = np.sqrt( (J-m)*(J+m+1) )
return pseudo_eigenvalue, (J,m+1)
def J_minus( eigenvector ):
J = eigenvector[0]
m = eigenvector[1]
pseudo_eigenvalue = np.sqrt( (J+m)*(J-m+1) )
return pseudo_eigenvalue, (J,m-1)
def kronecker_bra_ket( eigenvector1, eigenvector2 ):
if eigenvector1 == eigenvector2:
return 1
else:
return 0
def spin( azimuthal_quantum_number ):
aqm = azimuthal_quantum_number
# 1. create a basis
basis = []
order = np.linspace( +aqm, -aqm, round(2*aqm+1), endpoint =True )
for i in order:
basis.append( ( aqm, i ) )
# 2. create matrix of operator
J_z_matrix = np.zeros([round(2*aqm+1), round(2*aqm+1)], dtype = float)
J_plus_matrix = np.zeros([round(2*aqm+1), round(2*aqm+1)], dtype = float)
J_minus_matrix = np.zeros([round(2*aqm+1), round(2*aqm+1)], dtype = float)
for row_num in range(round(2*aqm+1)):
for column_num in range(round(2*aqm+1)):
bra = basis[row_num]
ket = J_z( basis[column_num] )[1]
eigenvalue = J_z( basis[column_num] )[0]
J_z_matrix[row_num, column_num] = kronecker_bra_ket( bra, ket)\
*eigenvalue
ket = J_plus( basis[column_num] )[1]
eigenvalue = J_plus( basis[column_num] )[0]
J_plus_matrix[row_num, column_num] = kronecker_bra_ket( bra, ket)\
*eigenvalue
ket = J_minus( basis[column_num] )[1]
eigenvalue = J_minus( basis[column_num] )[0]
J_minus_matrix[row_num, column_num] = kronecker_bra_ket( bra, ket)\
*eigenvalue
J_x_matrix = 0.5*( J_plus_matrix + J_minus_matrix )
J_y_matrix = -0.5*1j*( J_plus_matrix - J_minus_matrix )
J = np.array([[J_x_matrix, J_y_matrix, J_z_matrix]])
return J
def matrix_multiply( AB, BC ):
'''
numpy官网的各种矩阵操作函数太麻烦了,自己造一个
矩阵AB[A:row,B:colume](type=ndarry)*矩阵BC[B:row,C:colume]
(type=ndarry)的矩阵元素乘法器以便于嵌套矩阵作
为元素的矩阵乘法.Return AC
*e.g. g_site1_sub1_ground*Pauli_matrix
'''
A = AB.shape[0]
B = AB.shape[1] #B = BC.shape[0]
C = BC.shape[1]
AC_lst = []
if ( AB[0,0].shape == () ) and ( BC[0,0].shape == () ):
'''
element of AB is number, element of BC is also number
'''
for i in range(A):
for j in range(C):
AC_ij = 0
for k in range(B):
AC_ij = AC_ij + AB[i,k]*BC[k,j]
AC_lst.append(AC_ij)
return np.array(AC_lst).reshape( A, C )
else:
'''
element of AB or BC is number, the other is matrix
or
element of AB is matrix, element of BC is also matrix
'''
for i in range(A):
for j in range(C):
AC_ij = 0
for k in range(B):
AC_ij = AC_ij + np.dot( AB[i,k],BC[k,j] )
AC_lst.append(AC_ij)
ele_shape = AC_ij.shape
return np.array(AC_lst).reshape( A, C, ele_shape[0], ele_shape[1] )
def Trans( nm ):
'''
适用于以矩阵为元素的矩阵的转置:
[ ]n*m--->[ ]m*n, A_ij--->A_ji for square matrix
*elements of the matrix is also matrix
'''
n = nm.shape[0]
m = nm.shape[1]
mn_lst = []
for col_n in range(m):
for row_n in range(n):
mn_lst.append( nm[row_n][col_n] )
ele_shape = nm[row_n][col_n].shape
return np.array(mn_lst).reshape( m, n, ele_shape[0], ele_shape[1] )
def B( phi, theta, am_B ):
B_x = am_B*np.sin(theta)*np.cos(phi)
B_y = am_B*np.sin(theta)*np.sin(phi)
B_z = am_B*np.cos(theta)
B = np.array( [[B_x, B_y, B_z]] )
return B
def CenterFreq_non167(phi, theta, am_B, flag1, flag2):
B_vec = B( phi, theta, am_B)
S = spin(1/2)
# some constants
mu_B = const.e*const.hbar/(2*const.m_e)
if flag2 == 'Site 1':
if flag1 == 'II':
g_tensor_e = np.array([[1.63, -1.86, -2.98],
[-1.86, 3.66, 5.11],
[-2.98, 5.11, 8.29]])
g_tensor_g = np.array([[2.90, -2.95, -3.56],
[-2.95, 8.90, 5.57],
[-3.56, 5.57, 5.12]])
elif flag1 == 'I':
g_tensor_e = np.array([[1.63, -1.86, 2.98],
[-1.86, 3.66, -5.11],
[2.98, -5.11, 8.29]])
g_tensor_g = np.array([[2.90, -2.95, 3.56],
[-2.95, 8.90, -5.57],
[3.56, -5.57, 5.12]])
freq_original = const.c/(1536.4*10**-9*10**6)
elif flag2 == 'Site 2':
if flag1 == 'I':
g_tensor_e = np.array([[11.58, -0.69, 4.44],
[-0.69, 0.31, -0.26],
[4.44, -0.26, 2.13]])
g_tensor_g = np.array([[14.37, -1.77, 2.40],
[-1.77, 1.93, -0.43],
[2.40, -0.43, 1.44]])
elif flag1 == 'II':
g_tensor_e = np.array([[11.58, -0.69, -4.44],
[-0.69, 0.31, 0.26],
[-4.44, 0.26, 2.13]])
g_tensor_g = np.array([[14.37, -1.77, -2.40],
[-1.77, 1.93, 0.43],
[-2.40, 0.43, 1.44]])
freq_original = const.c/(1538.9*10**-9*10**6)
electron_Zeeman_g = mu_B*matrix_multiply( B_vec,
matrix_multiply( g_tensor_g,
Trans(S)
)
)
electron_Zeeman_e = mu_B*matrix_multiply( B_vec,
matrix_multiply( g_tensor_e,
Trans(S)
)
)
H_g = electron_Zeeman_g[0][0]
H_e = electron_Zeeman_e[0][0]
eigen_g = eigh( H_g, eigvals_only=False )
eigen_e = eigh( H_e, eigvals_only=False )
eigen_value_g = eigen_g[0]
eigen_value_e = eigen_e[0]
#eigen_vector_g = eigen_g[1]
eigen_value_MHz_g = eigen_value_g/(const.h*10**6)
eigen_value_MHz_e = eigen_value_e/(const.h*10**6)
MHz_low_g = eigen_value_MHz_g[0]
MHz_up_g = eigen_value_MHz_g[1]
MHz_low_e = eigen_value_MHz_e[0]
MHz_up_e = eigen_value_MHz_e[1]
low_to_low = freq_original - MHz_low_g + MHz_low_e
low_to_up = freq_original - MHz_low_g + MHz_up_e
up_to_low = freq_original - MHz_up_g + MHz_low_e
up_to_up = freq_original - MHz_up_g + MHz_up_e
return low_to_low, low_to_up, up_to_low, up_to_up
def df_1d_to_1darray( df_1d ):
"""
将[一维]的dataframe转化为一维的dtype=float64的ndarray
"""
ary_1d = np.array( [] )
dfary_1d = df_1d.values
for i in range( len(dfary_1d) ):
try:
#适用于大小为(n,1)的一维数据
ary_1d = np.append( ary_1d, dfary_1d[i][0] )
except IndexError:
#适用于大小为(n,)的一维数据
ary_1d = np.append( ary_1d, dfary_1d[i] )
return ary_1d
@ti.func
def Lorentzian(x: ti.f32, gamma: ti.f32) -> ti.f32:
'''
gamma: HWHM
'''
return gamma/(pi_ti*(x**2+gamma**2))
@ti.func
def Gaussian(x: ti.f32, sigma: ti.f32) -> ti.f32:
'''
sigma: HWHM/sqrt(2ln2)
'''
return e_ti**(-x**2/(2*sigma**2))/(sigma*(2*pi_ti)**0.5)
@ti.func
def B31_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B31*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)
@ti.func
def B32_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B32*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)
@ti.func
def B41_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B41*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)
@ti.func
def B42_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B42*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)
@ti.func
def derivative_N1(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (-1*(B31_prime(nu0,nu_p)*popu_matrix[0] +
B41_prime(nu0,nu_p)*popu_matrix[1] + w12)*N1_t +
w21*N2_t +
(A31 + B31_prime(nu0,nu_p)*popu_matrix[0])*N3_t +
(A41 + B41_prime(nu0,nu_p)*popu_matrix[1])*N4_t
)
@ti.func
def derivative_N2(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (w12*N1_t -
(B32_prime(nu0,nu_p)*popu_matrix[2] +
B42_prime(nu0,nu_p)*popu_matrix[3] + w21)*N2_t +
(A32 + B32_prime(nu0,nu_p)*popu_matrix[2])*N3_t +
(A42 + B42_prime(nu0,nu_p)*popu_matrix[3])*N4_t
)
@ti.func
def derivative_N3(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (B31_prime(nu0,nu_p)*popu_matrix[0]*N1_t +
B32_prime(nu0,nu_p)*popu_matrix[2]*N2_t -
(A31 + B31_prime(nu0,nu_p)*popu_matrix[0] +
A32 + B32_prime(nu0,nu_p)*popu_matrix[2] +
w34)*N3_t +
w43*N4_t
)
@ti.func
def derivative_N4(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (B41_prime(nu0,nu_p)*popu_matrix[1]*N1_t +
B42_prime(nu0,nu_p)*popu_matrix[3]*N2_t +
w34*N3_t -
(A41 + B41_prime(nu0,nu_p)*popu_matrix[1] +
A42 + B42_prime(nu0,nu_p)*popu_matrix[3] +
w43)*N4_t
)
@ti.func
def k_L4RK(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
choice: ti.i32) -> (ti.f32, ti.f32, ti.f32, ti.f32):
'''
四阶龙格库塔
'''
k1_N1 = derivative_N1(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])
k1_N2 = derivative_N2(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])
k1_N3 = derivative_N3(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])
k1_N4 = derivative_N4(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])
k2_N1 = derivative_N1(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])
k2_N2 = derivative_N2(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])
k2_N3 = derivative_N3(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])
k2_N4 = derivative_N4(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])
k3_N1 = derivative_N1(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])
k3_N2 = derivative_N2(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])
k3_N3 = derivative_N3(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])
k3_N4 = derivative_N4(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])
k4_N1 = derivative_N1(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])
k4_N2 = derivative_N2(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])
k4_N3 = derivative_N3(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])
k4_N4 = derivative_N4(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])
k_N1 = (k1_N1+2*k2_N1+2*k3_N1+k4_N1)/6
k_N2 = (k1_N2+2*k2_N2+2*k3_N2+k4_N2)/6
k_N3 = (k1_N3+2*k2_N3+2*k3_N3+k4_N3)/6
k_N4 = (k1_N4+2*k2_N4+2*k3_N4+k4_N4)/6
return k_N1, k_N2, k_N3, k_N4
@ti.kernel
def Burning():
ti.loop_config(serialize=True)
for tn in ti.ndrange(Num_t):
for i_transi0 in ti.ndrange(num_pumpFW):
transi0 = pumparray_ti[i_transi0]
for j_choice in ti.static(range(4)):
n1 = N1_ti[detune_array_ti[i_transi0][j_choice]]
n2 = N2_ti[detune_array_ti[i_transi0][j_choice]]
n3 = N3_ti[detune_array_ti[i_transi0][j_choice]]
n4 = N4_ti[detune_array_ti[i_transi0][j_choice]]
k_N1, k_N2, k_N3, k_N4 = k_L4RK(transi0, pumpcenter_ti,
n1, n2, n3, n4, j_choice)
dN1_ti[detune_array_ti[i_transi0][j_choice]] += k_N1*dt
dN2_ti[detune_array_ti[i_transi0][j_choice]] += k_N2*dt
dN3_ti[detune_array_ti[i_transi0][j_choice]] += k_N3*dt
dN4_ti[detune_array_ti[i_transi0][j_choice]] += k_N4*dt
for i in ti.ndrange(Inh_len):
N1_ti[i] += dN1_ti[i]
N2_ti[i] += dN2_ti[i]
N3_ti[i] += dN3_ti[i]
N4_ti[i] += dN4_ti[i]
dN1_ti[i] = 0
dN2_ti[i] = 0
dN3_ti[i] = 0
dN4_ti[i] = 0
if __name__ == "__main__":
#----------------------------------时间步长------------------------------
dt = 10**-8 # 峰值处连续条件(不出现nan值)max:5*10**-8 s
Num_t = 10**4
#-------------------------一些flag以及外部文件信息-----------------------
Site_flag = 'Site 1'
Subclass_flag = 'I'
B_amplitude = 0.012 #Tesla
B_phi = np.pi*3/4
B_theta = np.pi/2
T = 2.1 # Kelvin
#----------------------------系统中的物理量-----------------------------
Time_begin = time.perf_counter()
#生成跃迁线部分, >_<
nu_13,nu_14,nu_23,nu_24 = CenterFreq_non167( B_phi, B_theta, B_amplitude
, Subclass_flag, Site_flag)
Time_end1 = time.perf_counter()
print('\n\t[1] ****Construction of Transitions of Er**** finished'+
' (None):\n\t\t'
+str(Time_end1 - Time_end1)[:6]+'s')
T1 = 11*10**-3 #T1 = 11 ms
Tz = 129*10**-3 #Tz = 129 ms
A3 = 1/T1
beta = 0.9
#---float32--->>>
A31 = np.float32(beta*A3)
A32 = np.float32((1-beta)*A3)
A42 = np.float32(beta*A3)
A41 = np.float32((1-beta)*A3)
B31 = np.float32(const.c**3/(8*np.pi*const.h*(nu_13*10**6)**3)*A31)
B32 = np.float32(const.c**3/(8*np.pi*const.h*(nu_23*10**6)**3)*A32)
B41 = np.float32(const.c**3/(8*np.pi*const.h*(nu_14*10**6)**3)*A41)
B42 = np.float32(const.c**3/(8*np.pi*const.h*(nu_24*10**6)**3)*A42)
w12 = np.float32(0.5*1/Tz)
w21 = np.float32(0.5*1/Tz)
w34 = np.float32(0.5*1/Tz)
w43 = np.float32(0.5*1/Tz)
#---float32---<<<
#泵浦激光部分
pump_HWHM = 0.05 # HWHM in MHz
n = 1.8
P = 1*10**-9 #泵浦激光的(有效)功率 W
r = 20*10**-6 #通过晶体时的光束半径 m
S = np.pi*r**2
I = P/S
E0 = np.sqrt(2*I/(const.epsilon_0*const.c*n))
u = I/const.c*n
#------------------------------------------------------------------------
#-------------------------并生成{N_ti}和{dN_ti}--------------------------
HW = 6*10**3
d_nu = 10**-1
Inh_len = int(2*HW/d_nu)
N_Voigt = np.zeros( (Inh_len,) ) #2*HW的numpy数组
for i in range(Inh_len):
ni = Voiigt( (i-Inh_len/2-0.5)*d_nu, 100, 100)
N_Voigt[i] = ni
Population_ratio = np.array([1,
np.e**(-const.h*(nu_13-nu_23)*10**6/(const.k*T)),
np.e**(-const.h*nu_13*10**6/(const.k*T)),
np.e**(-const.h*nu_14*10**6/(const.k*T))])
Population_ratio = Population_ratio/Population_ratio.sum()
N1 = N_Voigt.copy()*Population_ratio[0]
N2 = N_Voigt.copy()*Population_ratio[1]
N3 = N_Voigt.copy()*Population_ratio[2]
N4 = N_Voigt.copy()*Population_ratio[3]
N1_ti = ti.field(ti.f32, shape = Inh_len)
N2_ti = ti.field(ti.f32, shape = Inh_len)
N3_ti = ti.field(ti.f32, shape = Inh_len)
N4_ti = ti.field(ti.f32, shape = Inh_len)
N1_ti.from_numpy(N1.astype(np.float32))
del N1
N2_ti.from_numpy(N2.astype(np.float32))
del N2
N3_ti.from_numpy(N3.astype(np.float32))
del N3
N4_ti.from_numpy(N4.astype(np.float32))
del N4
dN1_ti = ti.field(ti.f32, shape=Inh_len)
dN2_ti = ti.field(ti.f32, shape=Inh_len)
dN3_ti = ti.field(ti.f32, shape=Inh_len)
dN4_ti = ti.field(ti.f32, shape=Inh_len)
Time_end2 = time.perf_counter()
print('\n\t[2] ****Population Initialization**** finished (None)'+
': \n\t\t'
+str(Time_end2 - Time_end1)[:6]+'s')
#----------------------将常数转换为Taichi用的32位浮点数型---------------
pi_ti = np.float32(const.pi)
e_ti = np.float32(const.e)
pump_HWHM_ti = np.float32(pump_HWHM)
u_ti = np.float32(u)
#---频率值均作了相同大值截断---
nu_13_ti = np.float32(nu_13 - 1.951*10**8)
nu_14_ti = np.float32(nu_14 - 1.951*10**8)
nu_23_ti = np.float32(nu_23 - 1.951*10**8)
nu_24_ti = np.float32(nu_24 - 1.951*10**8)
HW_ti = np.float32(HW)
d_nu_ti = np.float32(d_nu)
#------------------------生成pumpuarray-----------------
#-----------------------生成detune_array------------------
#-----------由于使用的是float32,所以进行大值截断- 1.951*10**8----------
num_pumpHW = 6000
num_pumpFW = int(2*num_pumpHW*pump_HWHM/d_nu)
Series_pumpcenter = np.array([nu_13 - 1.951*10**8])
pumparray = np.array([])
for pumpcenter in Series_pumpcenter:
pumparray = np.append(pumparray,
np.linspace(pumpcenter-num_pumpHW*pump_HWHM,
pumpcenter+num_pumpHW*pump_HWHM,
num_pumpFW,
endpoint=False)
)
pumparray_ti = ti.field(ti.f32, num_pumpFW)
pumparray_ti.from_numpy(pumparray.astype(np.float32))
pumpcenter_ti = nu_13_ti
detune_N1_13_f64 = np.around((pumparray - nu_13_ti + HW_ti)/d_nu_ti)
detune_N1_14_f64 = np.around((pumparray - nu_14_ti + HW_ti)/d_nu_ti)
detune_N2_23_f64 = np.around((pumparray - nu_23_ti + HW_ti)/d_nu_ti)
detune_N2_24_f64 = np.around((pumparray - nu_24_ti + HW_ti)/d_nu_ti)
detune_array_ti = ti.Vector.field(4, ti.i32, shape = num_pumpFW)
detune_array = np.array( [detune_N1_13_f64,
detune_N1_14_f64,
detune_N2_23_f64,
detune_N2_24_f64] )
detune_array_T = np.transpose(detune_array)
detune_array_ti.from_numpy(detune_array_T.astype(np.int32))
#--------------------------构建choose_matrix----------------------------
choose_matrix_ti = ti.Vector.field(4, ti.i32, 4)
choose_matrix = np.array([[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]], dtype=np.int32)
choose_matrix_ti.from_numpy(choose_matrix)
Time_end3 = time.perf_counter()
print('\n\t[3] ****Burning-pumparray Initialization**** finished (@Taichi)'+
': \n\t\t'
+str(Time_end3 - Time_end2)[:6]+'s')
#-----------------------------计算模块---------------------------------
Burning()
Time_end4 = time.perf_counter()
print('\n\t[4] ****Population Computation**** finished (@Taichi): \n\t\t'
+str(Time_end4 - Time_end3)[:6]+'s')
plt.plot(np.linspace(0, N1_ti.shape[0], N1_ti.shape[0],
endpoint=False), N1_ti.to_numpy())
'''
plt.plot(np.linspace(0, N1_ti.shape[0], N1_ti.shape[0],
endpoint=False),
N1_ti.to_numpy()-N_Voigt.copy()*Population_ratio[0])
'''