Compare commits
5 Commits
blochsolve
...
master
Author | SHA1 | Date | |
---|---|---|---|
03fc9e6d70 | |||
47dd0df8bc | |||
66712efd49 | |||
a70687f5e3 | |||
85030448c3 |
@ -1,5 +1,11 @@
|
|||||||
# fdfd_tools
|
# fdfd_tools
|
||||||
|
|
||||||
|
** DEPRECATED **
|
||||||
|
|
||||||
|
The functionality in this module is now provided by [meanas](https://mpxd.net/code/jan/meanas).
|
||||||
|
|
||||||
|
-----------------------
|
||||||
|
|
||||||
**fdfd_tools** is a python package containing utilities for
|
**fdfd_tools** is a python package containing utilities for
|
||||||
creating and analyzing 2D and 3D finite-difference frequency-domain (FDFD)
|
creating and analyzing 2D and 3D finite-difference frequency-domain (FDFD)
|
||||||
electromagnetic simulations.
|
electromagnetic simulations.
|
||||||
|
@ -30,7 +30,7 @@ g2.shifts = numpy.zeros((6,3))
|
|||||||
g2.grids = [numpy.zeros(g.shape) for _ in range(6)]
|
g2.grids = [numpy.zeros(g.shape) for _ in range(6)]
|
||||||
|
|
||||||
epsilon = [g.grids[0],] * 3
|
epsilon = [g.grids[0],] * 3
|
||||||
reciprocal_lattice = numpy.diag(1000/numpy.array([x_period, y_period, z_period])) #cols are vectors
|
reciprocal_lattice = numpy.diag(1e6/numpy.array([x_period, y_period, z_period])) #cols are vectors
|
||||||
|
|
||||||
#print('Finding k at 1550nm')
|
#print('Finding k at 1550nm')
|
||||||
#k, f = bloch.find_k(frequency=1/1550,
|
#k, f = bloch.find_k(frequency=1/1550,
|
||||||
@ -47,7 +47,7 @@ for k0x in [.25]:
|
|||||||
k0 = numpy.array([k0x, 0, 0])
|
k0 = numpy.array([k0x, 0, 0])
|
||||||
|
|
||||||
kmag = norm(reciprocal_lattice @ k0)
|
kmag = norm(reciprocal_lattice @ k0)
|
||||||
tolerance = 1e-4 * (1/1550) # df = f * dn_eff / n
|
tolerance = (1e6/1550) * 1e-4/1.5 # df = f * dn_eff / n
|
||||||
logger.info('tolerance {}'.format(tolerance))
|
logger.info('tolerance {}'.format(tolerance))
|
||||||
|
|
||||||
n, v = bloch.eigsolve(4, k0, G_matrix=reciprocal_lattice, epsilon=epsilon, tolerance=tolerance)
|
n, v = bloch.eigsolve(4, k0, G_matrix=reciprocal_lattice, epsilon=epsilon, tolerance=tolerance)
|
||||||
|
@ -359,6 +359,8 @@ def eigsolve(num_modes: int,
|
|||||||
"""
|
"""
|
||||||
h_size = 2 * epsilon[0].size
|
h_size = 2 * epsilon[0].size
|
||||||
|
|
||||||
|
kmag = norm(G_matrix @ k0)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Generate the operators
|
Generate the operators
|
||||||
'''
|
'''
|
||||||
@ -390,7 +392,7 @@ def eigsolve(num_modes: int,
|
|||||||
onto the space orthonormal to Z. If approx_grad is True, the approximate
|
onto the space orthonormal to Z. If approx_grad is True, the approximate
|
||||||
inverse of the maxwell operator is used to precondition the gradient.
|
inverse of the maxwell operator is used to precondition the gradient.
|
||||||
"""
|
"""
|
||||||
z = Z.reshape(y_shape)
|
z = Z.view(dtype=complex).reshape(y_shape)
|
||||||
U = numpy.linalg.inv(z.conj().T @ z)
|
U = numpy.linalg.inv(z.conj().T @ z)
|
||||||
zU = z @ U
|
zU = z @ U
|
||||||
AzU = scipy_op @ zU
|
AzU = scipy_op @ zU
|
||||||
@ -400,27 +402,49 @@ def eigsolve(num_modes: int,
|
|||||||
df_dy = scipy_iop @ (AzU - zU @ zTAzU)
|
df_dy = scipy_iop @ (AzU - zU @ zTAzU)
|
||||||
else:
|
else:
|
||||||
df_dy = (AzU - zU @ zTAzU)
|
df_dy = (AzU - zU @ zTAzU)
|
||||||
return numpy.abs(f), numpy.sign(f) * numpy.real(df_dy).ravel()
|
|
||||||
|
df_dy_flat = df_dy.view(dtype=float).ravel()
|
||||||
|
return numpy.abs(f), numpy.sign(f) * df_dy_flat
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Use the conjugate gradient method and the approximate gradient calculation to
|
Use the conjugate gradient method and the approximate gradient calculation to
|
||||||
quickly find approximate eigenvectors.
|
quickly find approximate eigenvectors.
|
||||||
'''
|
'''
|
||||||
result = scipy.optimize.minimize(rayleigh_quotient,
|
result = scipy.optimize.minimize(rayleigh_quotient,
|
||||||
numpy.random.rand(*y_shape),
|
numpy.random.rand(*y_shape, 2),
|
||||||
jac=True,
|
jac=True,
|
||||||
method='CG',
|
method='L-BFGS-B',
|
||||||
tol=1e-5,
|
tol=1e-20,
|
||||||
options={'maxiter': 30, 'disp':True})
|
options={'maxiter': 2000, 'gtol':0, 'ftol':1e-20 , 'disp':True})#, 'maxls':80, 'm':30})
|
||||||
|
|
||||||
|
|
||||||
|
result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, True),
|
||||||
|
result.x,
|
||||||
|
jac=True,
|
||||||
|
method='L-BFGS-B',
|
||||||
|
tol=1e-20,
|
||||||
|
options={'maxiter': 2000, 'gtol':0, 'disp':True})
|
||||||
|
|
||||||
result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False),
|
result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False),
|
||||||
result.x,
|
result.x,
|
||||||
jac=True,
|
jac=True,
|
||||||
method='CG',
|
method='L-BFGS-B',
|
||||||
tol=1e-13,
|
tol=1e-20,
|
||||||
options={'maxiter': 100, 'disp':True})
|
options={'maxiter': 2000, 'gtol':0, 'disp':True})
|
||||||
|
|
||||||
z = result.x.reshape(y_shape)
|
for i in range(20):
|
||||||
|
result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False),
|
||||||
|
result.x,
|
||||||
|
jac=True,
|
||||||
|
method='L-BFGS-B',
|
||||||
|
tol=1e-20,
|
||||||
|
options={'maxiter': 70, 'gtol':0, 'disp':True})
|
||||||
|
if result.nit == 0:
|
||||||
|
# We took 0 steps, so re-running won't help
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
z = result.x.view(dtype=complex).reshape(y_shape)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Recover eigenvectors from Z
|
Recover eigenvectors from Z
|
||||||
@ -432,26 +456,15 @@ def eigsolve(num_modes: int,
|
|||||||
eigvals, w_eigvecs = numpy.linalg.eig(w)
|
eigvals, w_eigvecs = numpy.linalg.eig(w)
|
||||||
eigvecs = y @ w_eigvecs
|
eigvecs = y @ w_eigvecs
|
||||||
|
|
||||||
for i in range(len(eigvals)):
|
|
||||||
v = eigvecs[:, i]
|
|
||||||
n = eigvals[i]
|
|
||||||
v /= norm(v)
|
|
||||||
logger.info('eigness {}: {}'.format(i, norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v )))
|
|
||||||
|
|
||||||
for i in range(len(eigvals)):
|
|
||||||
logger.info('Refining eigenvector {}'.format(i))
|
|
||||||
eigvals[i], eigvecs[:, i] = rayleigh_quotient_iteration(scipy_op,
|
|
||||||
guess_vector=eigvecs[:, i],
|
|
||||||
iterations=40,
|
|
||||||
tolerance=tolerance * numpy.real(numpy.sqrt(eigvals[i])) * 2,
|
|
||||||
solver = lambda A, b: spalg.bicgstab(A, b, maxiter=200)[0])
|
|
||||||
|
|
||||||
for i in range(len(eigvals)):
|
for i in range(len(eigvals)):
|
||||||
v = eigvecs[:, i]
|
v = eigvecs[:, i]
|
||||||
n = eigvals[i]
|
n = eigvals[i]
|
||||||
v /= norm(v)
|
v /= norm(v)
|
||||||
eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v )
|
eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v )
|
||||||
logger.info('eigness {}: {}'.format(i, eigness))
|
f = numpy.sqrt(-numpy.real(n))
|
||||||
|
df = numpy.sqrt(-numpy.real(n + eigness))
|
||||||
|
neff_err = kmag * (1/df - 1/f)
|
||||||
|
logger.info('eigness {}: {}\n neff_err: {}'.format(i, eigness, neff_err))
|
||||||
|
|
||||||
order = numpy.argsort(numpy.abs(eigvals))
|
order = numpy.argsort(numpy.abs(eigvals))
|
||||||
return eigvals[order], eigvecs.T[order]
|
return eigvals[order], eigvecs.T[order]
|
||||||
@ -498,245 +511,3 @@ def find_k(frequency: float,
|
|||||||
return res.x * direction, res.fun + frequency
|
return res.x * direction, res.fun + frequency
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#def eigsolve2(num_modes: int,
|
|
||||||
# k0: numpy.ndarray,
|
|
||||||
# G_matrix: numpy.ndarray,
|
|
||||||
# epsilon: field_t,
|
|
||||||
# mu: field_t = None,
|
|
||||||
# tolerance = 1e-8,
|
|
||||||
# ) -> Tuple[numpy.ndarray, numpy.ndarray]:
|
|
||||||
# """
|
|
||||||
# Find the first (lowest-frequency) num_modes eigenmodes with Bloch wavevector
|
|
||||||
# k0 of the specified structure.
|
|
||||||
#
|
|
||||||
# :param k0: Bloch wavevector, [k0x, k0y, k0z].
|
|
||||||
# :param G_matrix: 3x3 matrix, with reciprocal lattice vectors as columns.
|
|
||||||
# :param epsilon: Dielectric constant distribution for the simulation.
|
|
||||||
# All fields are sampled at cell centers (i.e., NOT Yee-gridded)
|
|
||||||
# :param mu: Magnetic permability distribution for the simulation.
|
|
||||||
# Default None (1 everywhere).
|
|
||||||
# :return: (eigenvalues, eigenvectors) where eigenvalues[i] corresponds to the
|
|
||||||
# vector eigenvectors[i, :]
|
|
||||||
# """
|
|
||||||
# h_size = 2 * epsilon[0].size
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# '''
|
|
||||||
# Generate the operators
|
|
||||||
# '''
|
|
||||||
# mop = maxwell_operator(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu)
|
|
||||||
# imop = inverse_maxwell_operator_approx(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu)
|
|
||||||
#
|
|
||||||
# scipy_op = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=mop)
|
|
||||||
# scipy_iop = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=imop)
|
|
||||||
#
|
|
||||||
# y_shape = (h_size, num_modes)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# prev_E = 0
|
|
||||||
# d_scale = 1
|
|
||||||
# prev_traceGtKG = 0
|
|
||||||
# prev_theta = 0.5
|
|
||||||
# D = numpy.zeros(shape=y_shape, dtype=complex)
|
|
||||||
#
|
|
||||||
# y0 = None
|
|
||||||
# if y0 is None:
|
|
||||||
# z = numpy.random.rand(*y_shape, dtype=complex)
|
|
||||||
# else:
|
|
||||||
# z = y0
|
|
||||||
#
|
|
||||||
# while True:
|
|
||||||
# z2 = z.conj().T @ z
|
|
||||||
# z_norm = numpy.sqrt(numpy.real(numpy.trace(z2))) / num_modes
|
|
||||||
# z /= z_norm
|
|
||||||
# z2 /= z_norm * z_norm
|
|
||||||
# try:
|
|
||||||
# U = numpy.linalg.inv(z2)
|
|
||||||
# except numpy.linalg.LinAlgError:
|
|
||||||
# z = numpy.random.rand(*y_shape, dtype=complex)
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# trace_U = numpy.real(numpy.trace(U))
|
|
||||||
# if trace_U > 1e8 * num_modes:
|
|
||||||
# z = z @ scipy.linalg.sqrtm(U).conj().T
|
|
||||||
# prev_trace GtX = 0
|
|
||||||
# continue
|
|
||||||
# break
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def rayleigh_quotient(Z: numpy.ndarray, approx_grad: bool = True):
|
|
||||||
# """
|
|
||||||
# Absolute value of the block Rayleigh quotient, and the associated gradient.
|
|
||||||
#
|
|
||||||
# See Johnson and Joannopoulos, Opt. Expr. 8, 3 (2001) for details (full
|
|
||||||
# citation in module docstring).
|
|
||||||
#
|
|
||||||
# ===
|
|
||||||
#
|
|
||||||
# Notes on my understanding of the procedure:
|
|
||||||
#
|
|
||||||
# Minimize f(Y) = |trace((Y.H @ A @ Y)|, making use of Y = Z @ inv(Z.H @ Z)^(1/2)
|
|
||||||
# (a polar orthogonalization of Y). This gives f(Z) = |trace(Z.H @ A @ Z @ U)|,
|
|
||||||
# where U = inv(Z.H @ Z). We minimize the absolute value to find the eigenvalues
|
|
||||||
# with smallest magnitude.
|
|
||||||
#
|
|
||||||
# The gradient is P @ (A @ Z @ U), where P = (1 - Z @ U @ Z.H) is a projection
|
|
||||||
# onto the space orthonormal to Z. If approx_grad is True, the approximate
|
|
||||||
# inverse of the maxwell operator is used to precondition the gradient.
|
|
||||||
# """
|
|
||||||
# z = Z.reshape(y_shape)
|
|
||||||
# U = numpy.linalg.inv(z.conj().T @ z)
|
|
||||||
# zU = z @ U
|
|
||||||
# AzU = scipy_op @ zU
|
|
||||||
# zTAzU = z.conj().T @ AzU
|
|
||||||
# f = numpy.real(numpy.trace(zTAzU))
|
|
||||||
# if approx_grad:
|
|
||||||
# df_dy = scipy_iop @ (AzU - zU @ zTAzU)
|
|
||||||
# else:
|
|
||||||
# df_dy = (AzU - zU @ zTAzU)
|
|
||||||
# logging.info('f={}'.format(f))
|
|
||||||
# return numpy.abs(f), numpy.sign(f) * numpy.real(df_dy).ravel()
|
|
||||||
#
|
|
||||||
# max_iters = 10000 #TODO
|
|
||||||
# for iter in range(max_iters):
|
|
||||||
# f, G = rayleigh_quotient(z, False)
|
|
||||||
#
|
|
||||||
# if iter > 0 and abs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7):
|
|
||||||
# break
|
|
||||||
#
|
|
||||||
# KG = scipy_iop @ G
|
|
||||||
# traceGtKG = numpy.real(numpy.trace(G.conj().T @ KG)) + g_lag * g_lag
|
|
||||||
# gamma_numerator = traceGtKG
|
|
||||||
#
|
|
||||||
# reset_iters = 1000 #TODO
|
|
||||||
# if prev_trace_GtKG == 0 or iter % reset_iters == 0:
|
|
||||||
# gamma = 0
|
|
||||||
# else:
|
|
||||||
# gamma = gamma_numerator / prev_trace_GtKG
|
|
||||||
#
|
|
||||||
# D = gamma * d_scale * D + KG
|
|
||||||
# d_lag = gamma * d_scale * d_lag + g_lag
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# d_scale = numpy.sqrt(numpy.real(numpy.sum(D.conj() * D))) / num_modes
|
|
||||||
# D /= d_scale
|
|
||||||
#
|
|
||||||
# AD = A @ D
|
|
||||||
# DtD = D.conj().T @ D
|
|
||||||
# DtAD = D.conj().T @ AD
|
|
||||||
#
|
|
||||||
# YtD = z.conj().T @ D
|
|
||||||
# YtAD = z.conj().T @ AD
|
|
||||||
# symYtD = (YtD + YtD.conj().T) * 0.5
|
|
||||||
# symYtAD = (YtD + YtAD.conj().T) * 0.5
|
|
||||||
#
|
|
||||||
# U_sYtD = U @ symYtD #.H -- shouldn't matter?
|
|
||||||
#
|
|
||||||
# dE = 2.0 * (real(trace(U.H @ symYtAD)) - real(trace(YtAYU.H @ U_sYtD)))
|
|
||||||
#
|
|
||||||
# S2 = DtD - 4 * symYtD @ U_sYtD
|
|
||||||
# d2E = 2.0 * (real(trace(U.conj().T @ DtAD)) -
|
|
||||||
# real(trace(YtAYU.conj().T @ U @ S2)) -
|
|
||||||
# 4.0 * real(trace(U.conj().T @ symYtAD @ U_sYtD)))
|
|
||||||
#
|
|
||||||
# d_lag = lag = trace_YtLY = trace_DtLD = trace_YtLD = 0
|
|
||||||
#
|
|
||||||
# # Newton-Raphson to find a root of the first derivative:
|
|
||||||
# theta = -dE/d2E
|
|
||||||
#
|
|
||||||
# if d2E < 0 or abs(theta) >= K_PI:
|
|
||||||
# theta = -abs(prev_theta) * numpy.sign(dE)
|
|
||||||
#
|
|
||||||
# # YtAYU * YtBY = YtAY for use in linmin.
|
|
||||||
# YtAY = YtAYU @ YtBY.conj().T
|
|
||||||
#
|
|
||||||
# theta, new_E, new_dE = linmin(theta, E, dE, 0.1, min(tolerance, 1e-6), 1e-14, 0, -numpy.sign(dE) * K_PI, trace_func, tfd)
|
|
||||||
#
|
|
||||||
# improvement = numpy.abs(E - new_E) * 2 / numpy.abs(E + new_E)
|
|
||||||
# logging.info('linmin improvement {}'.format(improvement))
|
|
||||||
# z *= numpy.cos(theta)
|
|
||||||
# z += D * numpy.sin(theta)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# prev_traceGtKG = trace_GtKG
|
|
||||||
# prev_theta = theta
|
|
||||||
# prev_E = E
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func):
|
|
||||||
# if df0 > 0:
|
|
||||||
# x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq))
|
|
||||||
# return -x0, f0, -df0
|
|
||||||
# elif df0 == 0:
|
|
||||||
# return 0, f0, df0
|
|
||||||
# else:
|
|
||||||
# x = x_guess
|
|
||||||
# fx = f0
|
|
||||||
# dfx = df0
|
|
||||||
#
|
|
||||||
# dsrch(xxx,,)
|
|
||||||
# for i in range(int(1e6)):
|
|
||||||
# if task != 'F':
|
|
||||||
# logging.info('search converged in {} iterations'.format(i))
|
|
||||||
# break
|
|
||||||
# fx = f(x, dfx)
|
|
||||||
# dsrch(...)
|
|
||||||
#
|
|
||||||
# return x, fx, dfx
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# '''
|
|
||||||
# Use the conjugate gradient method and the approximate gradient calculation to
|
|
||||||
# quickly find approximate eigenvectors.
|
|
||||||
# '''
|
|
||||||
# #result = scipy.optimize.minimize(rayleigh_quotient,
|
|
||||||
# # numpy.random.rand(*y_shape),
|
|
||||||
# # jac=True,
|
|
||||||
# # method='CG',
|
|
||||||
# # tol=1e-5,
|
|
||||||
# # options={'maxiter': 30, 'disp':True})
|
|
||||||
#
|
|
||||||
# #result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False),
|
|
||||||
# # result.x,
|
|
||||||
# # jac=True,
|
|
||||||
# # method='CG',
|
|
||||||
# # tol=1e-13,
|
|
||||||
# # options={'maxiter': 100, 'disp':True})
|
|
||||||
# #
|
|
||||||
# #z = result.x.reshape(y_shape)
|
|
||||||
#
|
|
||||||
# '''
|
|
||||||
# Recover eigenvectors from Z
|
|
||||||
# '''
|
|
||||||
# U = numpy.linalg.inv(z.conj().T @ z)
|
|
||||||
# y = z @ scipy.linalg.sqrtm(U)
|
|
||||||
# w = y.conj().T @ (scipy_op @ y)
|
|
||||||
#
|
|
||||||
# eigvals, w_eigvecs = numpy.linalg.eig(w)
|
|
||||||
# eigvecs = y @ w_eigvecs
|
|
||||||
#
|
|
||||||
# for i in range(len(eigvals)):
|
|
||||||
# v = eigvecs[:, i]
|
|
||||||
# n = eigvals[i]
|
|
||||||
# v /= norm(v)
|
|
||||||
# logger.info('eigness {}: {}'.format(i, norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v )))
|
|
||||||
#
|
|
||||||
# for i in range(len(eigvals)):
|
|
||||||
# logger.info('Refining eigenvector {}'.format(i))
|
|
||||||
# eigvals[i], eigvecs[:, i] = rayleigh_quotient_iteration(scipy_op,
|
|
||||||
# guess_vector=eigvecs[:, i],
|
|
||||||
# iterations=40,
|
|
||||||
# tolerance=tolerance * numpy.real(numpy.sqrt(eigvals[i])) * 2,
|
|
||||||
# solver = lambda A, b: spalg.bicgstab(A, b, maxiter=200)[0])
|
|
||||||
#
|
|
||||||
# for i in range(len(eigvals)):
|
|
||||||
# v = eigvecs[:, i]
|
|
||||||
# n = eigvals[i]
|
|
||||||
# v /= norm(v)
|
|
||||||
# eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v )
|
|
||||||
# logger.info('eigness {}: {}'.format(i, eigness))
|
|
||||||
#
|
|
||||||
# order = numpy.argsort(numpy.abs(eigvals))
|
|
||||||
# return eigvals[order], eigvecs.T[order]
|
|
||||||
|
@ -1,412 +0,0 @@
|
|||||||
/* Copyright (C) 1999-2014 Massachusetts Institute of Technology.
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include <mpiglue.h>
|
|
||||||
#include <mpi_utils.h>
|
|
||||||
#include <check.h>
|
|
||||||
#include <scalar.h>
|
|
||||||
#include <matrices.h>
|
|
||||||
#include <blasglue.h>
|
|
||||||
|
|
||||||
#include "eigensolver.h"
|
|
||||||
#include "linmin.h"
|
|
||||||
|
|
||||||
extern void eigensolver_get_eigenvals_aux(evectmatrix Y, real *eigenvals,
|
|
||||||
evectoperator A, void *Adata,
|
|
||||||
evectmatrix Work1, evectmatrix Work2,
|
|
||||||
sqmatrix U, sqmatrix Usqrt,
|
|
||||||
sqmatrix Uwork);
|
|
||||||
|
|
||||||
#define STRINGIZEx(x) #x /* a hack so that we can stringize macro values */
|
|
||||||
#define STRINGIZE(x) STRINGIZEx(x)
|
|
||||||
|
|
||||||
#define K_PI 3.141592653589793238462643383279502884197
|
|
||||||
#define MIN2(a,b) ((a) < (b) ? (a) : (b))
|
|
||||||
#define MAX2(a,b) ((a) > (b) ? (a) : (b))
|
|
||||||
|
|
||||||
#if defined(SCALAR_LONG_DOUBLE_PREC)
|
|
||||||
# define fabs fabsl
|
|
||||||
# define cos cosl
|
|
||||||
# define sin sinl
|
|
||||||
# define sqrt sqrtl
|
|
||||||
# define atan atanl
|
|
||||||
# define atan2 atan2l
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Evalutate op, and set t to the elapsed time (in seconds). */
|
|
||||||
#define TIME_OP(t, op) { \
|
|
||||||
mpiglue_clock_t xxx_time_op_start_time = MPIGLUE_CLOCK; \
|
|
||||||
{ \
|
|
||||||
op; \
|
|
||||||
} \
|
|
||||||
(t) = MPIGLUE_CLOCK_DIFF(MPIGLUE_CLOCK, xxx_time_op_start_time); \
|
|
||||||
}
|
|
||||||
|
|
||||||
/**************************************************************************/
|
|
||||||
|
|
||||||
#define EIGENSOLVER_MAX_ITERATIONS 100000
|
|
||||||
#define FEEDBACK_TIME 4.0 /* elapsed time before we print progress feedback */
|
|
||||||
|
|
||||||
/* Number of iterations after which to reset conjugate gradient
|
|
||||||
direction to steepest descent. (Picked after some experimentation.
|
|
||||||
Is there a better basis? Should this change with the problem
|
|
||||||
size?) */
|
|
||||||
#define CG_RESET_ITERS 70
|
|
||||||
|
|
||||||
/* Threshold for trace(1/YtBY) = trace(U) before we reorthogonalize: */
|
|
||||||
#define EIGS_TRACE_U_THRESHOLD 1e8
|
|
||||||
|
|
||||||
/**************************************************************************/
|
|
||||||
|
|
||||||
/* estimated times/iteration for different iteration schemes, based
|
|
||||||
on the measure times for various operations and the operation counts: */
|
|
||||||
|
|
||||||
#define EXACT_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ, t_linmin) \
|
|
||||||
((t_AZ)*2 + (t_KZ) + (t_ZtW)*4 + (t_ZS)*2 + (t_ZtZ)*2 + (t_linmin))
|
|
||||||
|
|
||||||
#define APPROX_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ) \
|
|
||||||
((t_AZ)*2 + (t_KZ) + (t_ZtW)*2 + (t_ZS)*2 + (t_ZtZ)*2)
|
|
||||||
|
|
||||||
/* Guess for the convergence slowdown factor due to the approximate
|
|
||||||
line minimization. It is probably best to be conservative, as the
|
|
||||||
exact line minimization is more reliable and we only want to
|
|
||||||
abandon it if there is a big speed gain. */
|
|
||||||
#define APPROX_LINMIN_SLOWDOWN_GUESS 2.0
|
|
||||||
|
|
||||||
/* We also don't want to use the approximate line minimization if
|
|
||||||
the exact line minimization makes a big difference in the value
|
|
||||||
of the trace that's achieved (i.e. if one step of Newton's method
|
|
||||||
on the trace derivative does not do a good job). The following
|
|
||||||
is the maximum improvement by the exact line minimization (over
|
|
||||||
one step of Newton) at which we'll allow the use of approximate line
|
|
||||||
minimization. */
|
|
||||||
#define APPROX_LINMIN_IMPROVEMENT_THRESHOLD 0.05
|
|
||||||
|
|
||||||
/**************************************************************************/
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
sqmatrix YtAY, DtAD, symYtAD, YtBY, DtBD, symYtBD, S1, S2, S3;
|
|
||||||
real lag, d_lag, trace_YtLY, trace_DtLD, trace_YtLD;
|
|
||||||
} trace_func_data;
|
|
||||||
|
|
||||||
static linmin_real trace_func(linmin_real theta, linmin_real *trace_deriv, void *data)
|
|
||||||
{
|
|
||||||
linmin_real trace;
|
|
||||||
trace_func_data *d = (trace_func_data *) data;
|
|
||||||
linmin_real c = cos(theta), s = sin(theta);
|
|
||||||
|
|
||||||
YDNi = c*c * YtY + s*s * DtD + 2*s*c * symYtD
|
|
||||||
YDNi.inv()
|
|
||||||
|
|
||||||
if not YDNi.inv():
|
|
||||||
/* if c or s is small, we sometimes run into numerical
|
|
||||||
difficulties, and it is better to use the Taylor expansion */
|
|
||||||
if c < 1e-4 * s and c != 0:
|
|
||||||
YDNi = DtD.inv()
|
|
||||||
S3 = (YDNi @ symYtD.H) @ YDNi.H
|
|
||||||
YDNi = 1/(s*s) * YDNi - 2*c/(s*s*s) * S3
|
|
||||||
elif s < 1e-4 * c and s != 0:
|
|
||||||
YDNi = YtY.inv()
|
|
||||||
S3 = (YDNi @ symYtD.H) @ YDNi.H
|
|
||||||
YDNi = 1/(c*c) * YDNi - 2*s/(c*c*c) * S3
|
|
||||||
else:
|
|
||||||
CHECK(0, "inexplicable singularity in linmin trace_func")
|
|
||||||
|
|
||||||
YADN = c*c * YtAY + s*s * DtAD + 2*s*c * smYtAD
|
|
||||||
trace = real(trace(YADN.H @ YDNi)) + (c*c * trace_YtLY + s*s * trace_DtLD + 2*s*c * trace_YtLD) * (c * lag + s * d_lag)
|
|
||||||
|
|
||||||
if (trace_deriv) {
|
|
||||||
c2 = cos(2*theta)
|
|
||||||
s2 = sin(2*theta);
|
|
||||||
|
|
||||||
S3 = -0.5 * s2 * (YtAY - DtAD) + c2 * symYtAD
|
|
||||||
|
|
||||||
*trace_deriv = real(trace(YDNi.H @ S3))
|
|
||||||
|
|
||||||
S2 = (YDNi @ YADN.H) @ YDNi.H
|
|
||||||
S3 = -0.5 * s2 * (YtY - DtD) + c2 * symYtD
|
|
||||||
|
|
||||||
*trace_deriv -= real(trace(S2.H @ S3))
|
|
||||||
*trace_deriv *= 2
|
|
||||||
|
|
||||||
*trace_deriv += (-s2 * trace_YtLY + s2 * trace_DtLD
|
|
||||||
+ 2*c2 * trace_YtLD) * (c * lag + s * d_lag);
|
|
||||||
*trace_deriv += (c*c * trace_YtLY + s*s * trace_DtLD
|
|
||||||
+ 2*s*c * trace_YtLD) * (-s * lag + c * d_lag);
|
|
||||||
}
|
|
||||||
|
|
||||||
return trace;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**************************************************************************/
|
|
||||||
|
|
||||||
#define EIG_HISTORY_SIZE 5
|
|
||||||
|
|
||||||
/* find generalized eigenvectors Y of (A,B) by minimizing Rayleigh quotient
|
|
||||||
|
|
||||||
tr [ Yt A Y / (Yt B Y) ] + lag * tr [ Yt L Y ]
|
|
||||||
|
|
||||||
where lag is a Lagrange multiplier and L is a Hermitian operator
|
|
||||||
implementing some constraint tr [ Yt L Y ] = 0 on the eigenvectors
|
|
||||||
(if L is not NULL).
|
|
||||||
|
|
||||||
Constraints that commute with A and B (and L) are specified via the
|
|
||||||
"constraint" argument, which gives the projection operator for
|
|
||||||
the constraint(s).
|
|
||||||
*/
|
|
||||||
|
|
||||||
void eigensolver_lagrange(evectmatrix Y, real *eigenvals,
|
|
||||||
evectoperator A, void *Adata,
|
|
||||||
evectoperator B, void *Bdata,
|
|
||||||
evectpreconditioner K, void *Kdata,
|
|
||||||
evectconstraint constraint, void *constraint_data,
|
|
||||||
evectoperator L, void *Ldata, real *lag,
|
|
||||||
evectmatrix Work[], int nWork,
|
|
||||||
real tolerance, int *num_iterations,
|
|
||||||
int flags)
|
|
||||||
{
|
|
||||||
real g_lag = 0, d_lag = 0, prev_g_lag = 0;
|
|
||||||
short usingConjugateGradient = 0, use_polak_ribiere = 0,
|
|
||||||
use_linmin = 1;
|
|
||||||
real E, prev_E = 0.0;
|
|
||||||
real d_scale = 1.0;
|
|
||||||
real traceGtX, prev_traceGtX = 0.0;
|
|
||||||
real theta, prev_theta = 0.5;
|
|
||||||
int i, iteration = 0, num_emergency_restarts = 0;
|
|
||||||
mpiglue_clock_t prev_feedback_time;
|
|
||||||
real linmin_improvement = 0;
|
|
||||||
|
|
||||||
G = Work[0];
|
|
||||||
X = Work[1];
|
|
||||||
|
|
||||||
BY = Y;
|
|
||||||
D = X;
|
|
||||||
BD = D; /* storage for B*D (re-use B*Y storage) */
|
|
||||||
prev_G = G;
|
|
||||||
|
|
||||||
restartY:
|
|
||||||
|
|
||||||
eigenvals *= 0.0
|
|
||||||
convergence_history = [10000.0] * n
|
|
||||||
constraint(Y, constraint_data)
|
|
||||||
|
|
||||||
do {
|
|
||||||
real y_norm, gamma_numerator = 0;
|
|
||||||
|
|
||||||
YtBY = Y.H @ Y
|
|
||||||
y_norm = sqrt(real(trace(YtBY)) / Y.p);
|
|
||||||
Y /= y_norm
|
|
||||||
YtBY /= y_norm*y_norm
|
|
||||||
U = YtBY
|
|
||||||
|
|
||||||
if (!sqmatrix_invert(U, 1, S2)) /* non-independent Y columns */
|
|
||||||
/* emergency restart with random Y */
|
|
||||||
...
|
|
||||||
|
|
||||||
/* If trace(1/YtBY) gets big, it means that the columns
|
|
||||||
of Y are becoming nearly parallel. This sometimes happens,
|
|
||||||
especially in the targeted eigensolver, because the
|
|
||||||
preconditioner pushes all the columns towards the ground
|
|
||||||
state. If it gets too big, it seems to be a good idea
|
|
||||||
to re-orthogonalize, resetting conjugate-gradient, as
|
|
||||||
otherwise we start to encounter numerical problems. */
|
|
||||||
if (flags & EIGS_REORTHOGONALIZE) {
|
|
||||||
traceU = real(trace(U))
|
|
||||||
if (traceU > EIGS_TRACE_U_THRESHOLD * U.p) {
|
|
||||||
Y = Y @ sqrtm(U).H /* orthonormalize Y */
|
|
||||||
prev_traceGtX = 0.0;
|
|
||||||
|
|
||||||
YtBY = Y.H @ Y
|
|
||||||
y_norm = sqrt(real(trace(YtBY)) / Y.p)
|
|
||||||
Y /= y_norm
|
|
||||||
YtBY /= y_norm * y_norm
|
|
||||||
U = YtBY
|
|
||||||
|
|
||||||
/* G = AYU; note that U is Hermitian: */
|
|
||||||
G = A @ Y @ U
|
|
||||||
YtAYU = Y.H @ G
|
|
||||||
E = real(trace(YtAYU))
|
|
||||||
|
|
||||||
convergence_history[iteration % EIG_HISTORY_SIZE] =
|
|
||||||
200.0 * fabs(E - prev_E) / (fabs(E) + fabs(prev_E));
|
|
||||||
|
|
||||||
if (iteration > 0 &&
|
|
||||||
fabs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7))
|
|
||||||
break; /* convergence! hooray! */
|
|
||||||
|
|
||||||
/* Compute gradient of functional: G = (1 - BY U Yt) A Y U */
|
|
||||||
G += -Y @ (U @ YtAYU)
|
|
||||||
|
|
||||||
/* set X = precondition(G): */
|
|
||||||
X = K @ G
|
|
||||||
//TIME_OP(time_KZ, K(G, X, Kdata, Y, NULL, YtBY));
|
|
||||||
/* We have to apply the constraint here, in case it doesn't
|
|
||||||
commute with the preconditioner. */
|
|
||||||
constraint(X, constraint_data);
|
|
||||||
|
|
||||||
d_scale = 1.0;
|
|
||||||
|
|
||||||
/* Minimize the trace along Y + lambda*D: */
|
|
||||||
|
|
||||||
if (!use_linmin) {
|
|
||||||
real dE, E2, d2E, t, d_norm;
|
|
||||||
|
|
||||||
/* Here, we do an approximate line minimization along D
|
|
||||||
by evaluating dE (the derivative) at the current point,
|
|
||||||
and the trace E2 at a second point, and then approximating
|
|
||||||
the second derivative d2E by finite differences. Then,
|
|
||||||
we use one step of Newton's method on the derivative.
|
|
||||||
This has the advantage of requiring two fewer O(np^2)
|
|
||||||
matrix multiplications compared to the exact linmin. */
|
|
||||||
|
|
||||||
d_norm = sqrt(real(trace(D.H @ D)) / Y.p);
|
|
||||||
|
|
||||||
/* dE = 2 * tr Gt D. (Use prev_G instead of G so that
|
|
||||||
it works even when we are using Polak-Ribiere.) */
|
|
||||||
dE = 2.0 * SCALAR_RE(evectmatrix_traceXtY(prev_G, D)) / d_norm;
|
|
||||||
|
|
||||||
/* shift Y by prev_theta along D, in the downhill direction: */
|
|
||||||
t = dE < 0 ? -fabs(prev_theta) : fabs(prev_theta);
|
|
||||||
Y += t/d_norm * D
|
|
||||||
|
|
||||||
U = inv(Y.H @ Y)
|
|
||||||
E2 = real(trace((Y.H @ A @ Y).H @ U))
|
|
||||||
|
|
||||||
/* Get finite-difference approximation for the 2nd derivative
|
|
||||||
of the trace. Equivalently, fit to a quadratic of the
|
|
||||||
form: E(theta) = E + dE theta + 1/2 d2E theta^2 */
|
|
||||||
d2E = (E2 - E - dE * t) / (0.5 * t * t);
|
|
||||||
|
|
||||||
theta = -dE/d2E;
|
|
||||||
|
|
||||||
/* If the 2nd derivative is negative, or a big shift
|
|
||||||
in the trace is predicted (compared to the previous
|
|
||||||
iteration), then this approximate line minimization is
|
|
||||||
probably not very good; switch back to the exact
|
|
||||||
line minimization. Hopefully, we won't have to
|
|
||||||
abort like this very often, as it wastes operations. */
|
|
||||||
if (d2E < 0 || -0.5*dE*theta > 20.0 * fabs(E-prev_E)) {
|
|
||||||
if (flags & EIGS_FORCE_APPROX_LINMIN) {
|
|
||||||
} else {
|
|
||||||
use_linmin = 1;
|
|
||||||
evectmatrix_aXpbY(1.0, Y, -t / d_norm, D);
|
|
||||||
prev_theta = atan(prev_theta); /* convert to angle */
|
|
||||||
/* don't do this again: */
|
|
||||||
flags |= EIGS_FORCE_EXACT_LINMIN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Shift Y by theta, hopefully minimizing the trace: */
|
|
||||||
evectmatrix_aXpbY(1.0, Y, (theta - t) / d_norm, D);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (use_linmin) {
|
|
||||||
d_scale = sqrt(real(trace(D.H @ D)) / Y.p);
|
|
||||||
D /= d_scale
|
|
||||||
|
|
||||||
AD = A @ D
|
|
||||||
DtD = D.H @ D
|
|
||||||
DtAD = D.H @ AD
|
|
||||||
|
|
||||||
YtD = Y.H @ D
|
|
||||||
YtAD = Y.H @ AD
|
|
||||||
symYtD = (YtD + YtD.H) / 2
|
|
||||||
symYtAD = (YtAD + YtAD.H) / 2
|
|
||||||
|
|
||||||
U_sYtD = U @ symYtD.H
|
|
||||||
dE = 2.0 * (real(trace(U.H @ symYtAD)) - real(trace(YtAYU.H @ U_sYtD)))
|
|
||||||
|
|
||||||
S2 = DtD - 4 * symYtD @ U_sYtD
|
|
||||||
d2E = 2.0 * (real(trace(U.H @ DtAD)) -
|
|
||||||
real(trace(YtAYU.H @ U @ S2)) -
|
|
||||||
4.0 * real(trace(U.H @ symYtAD @ U_sYtD)))
|
|
||||||
|
|
||||||
d_lag = lag = trace_YtLY = trace_DtLD = trace_YtLD = 0
|
|
||||||
|
|
||||||
/* this is just Newton-Raphson to find a root of
|
|
||||||
the first derivative: */
|
|
||||||
theta = -dE/d2E;
|
|
||||||
|
|
||||||
if d2E < 0 or abs(theta) >= K_PI:
|
|
||||||
theta = -abs(prev_theta) * numpy.sign(dE)
|
|
||||||
|
|
||||||
/* Set S1 to YtAYU * YtBY = YtAY for use in linmin.
|
|
||||||
(tfd.YtAY == S1). */
|
|
||||||
YtAY = YtAYU @ YtBY.H
|
|
||||||
|
|
||||||
theta = linmin(&new_E, &new_dE, theta, E, dE,
|
|
||||||
0.1, min(tolerance, 1e-6), 1e-14,
|
|
||||||
0, -numpy.sign(dE) * K_PI,
|
|
||||||
trace_func, &tfd,
|
|
||||||
flags & EIGS_VERBOSE)
|
|
||||||
linmin_improvement = abs(E - new_E) * 2.0/abs(E + new_E);
|
|
||||||
|
|
||||||
/* Shift Y to new location minimizing the trace along D: */
|
|
||||||
Y *= cos(theta)
|
|
||||||
Y += D * sin(theta)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* In exact arithmetic, we don't need to do this, but in practice
|
|
||||||
it is probably a good idea to keep errors from adding up and
|
|
||||||
eventually violating the constraints. */
|
|
||||||
constraint(Y, constraint_data);
|
|
||||||
|
|
||||||
prev_traceGtX = traceGtX;
|
|
||||||
prev_theta = theta;
|
|
||||||
prev_E = E;
|
|
||||||
|
|
||||||
/* Finally, we use the times for the various operations to
|
|
||||||
help us pick an algorithm for the next iteration: */
|
|
||||||
{
|
|
||||||
real t_exact, t_approx;
|
|
||||||
t_exact = EXACT_LINMIN_TIME(time_AZ, time_KZ, time_ZtW,
|
|
||||||
time_ZS, time_ZtZ, time_linmin);
|
|
||||||
t_approx = APPROX_LINMIN_TIME(time_AZ, time_KZ, time_ZtW,
|
|
||||||
time_ZS, time_ZtZ);
|
|
||||||
|
|
||||||
/* Sum the times over the processors so that all the
|
|
||||||
processors compare the same, average times. */
|
|
||||||
mpi_allreduce_1(&t_exact,
|
|
||||||
real, SCALAR_MPI_TYPE, MPI_SUM, mpb_comm);
|
|
||||||
mpi_allreduce_1(&t_approx,
|
|
||||||
real, SCALAR_MPI_TYPE, MPI_SUM, mpb_comm);
|
|
||||||
|
|
||||||
if (!(flags & EIGS_FORCE_EXACT_LINMIN) &&
|
|
||||||
linmin_improvement > 0 &&
|
|
||||||
linmin_improvement <= APPROX_LINMIN_IMPROVEMENT_THRESHOLD &&
|
|
||||||
t_exact > t_approx * APPROX_LINMIN_SLOWDOWN_GUESS) {
|
|
||||||
use_linmin = 0;
|
|
||||||
}
|
|
||||||
else if (!(flags & EIGS_FORCE_APPROX_LINMIN)) {
|
|
||||||
use_linmin = 1;
|
|
||||||
prev_theta = atan(prev_theta); /* convert to angle */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (++iteration < EIGENSOLVER_MAX_ITERATIONS);
|
|
||||||
|
|
||||||
evectmatrix_XtX(U, Y, S2);
|
|
||||||
CHECK(sqmatrix_invert(U, 1, S2), "singular YtBY at end");
|
|
||||||
eigensolver_get_eigenvals_aux(Y, eigenvals, A, Adata,
|
|
||||||
X, G, U, S1, S2);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -53,7 +53,7 @@ def rayleigh_quotient_iteration(operator: sparse.spmatrix or spalg.LinearOperato
|
|||||||
:return: (eigenvalue, eigenvector)
|
:return: (eigenvalue, eigenvector)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
_test = operator - sparse.eye(operator.shape)
|
_test = operator - sparse.eye(operator.shape[0])
|
||||||
shift = lambda eigval: eigval * sparse.eye(operator.shape[0])
|
shift = lambda eigval: eigval * sparse.eye(operator.shape[0])
|
||||||
if solver is None:
|
if solver is None:
|
||||||
solver = spalg.spsolve
|
solver = spalg.spsolve
|
||||||
|
Loading…
Reference in New Issue
Block a user