diff --git a/README.md b/README.md index 5a3f49c..35a190c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # fdfd_tools +** DEPRECATED ** + +The functionality in this module is now provided by [meanas](https://mpxd.net/code/jan/meanas). + +----------------------- + **fdfd_tools** is a python package containing utilities for creating and analyzing 2D and 3D finite-difference frequency-domain (FDFD) electromagnetic simulations. diff --git a/examples/bloch.py b/examples/bloch.py index 6f34a3b..8bbd30e 100644 --- a/examples/bloch.py +++ b/examples/bloch.py @@ -30,7 +30,7 @@ g2.shifts = numpy.zeros((6,3)) g2.grids = [numpy.zeros(g.shape) for _ in range(6)] epsilon = [g.grids[0],] * 3 -reciprocal_lattice = numpy.diag(1000/numpy.array([x_period, y_period, z_period])) #cols are vectors +reciprocal_lattice = numpy.diag(1e6/numpy.array([x_period, y_period, z_period])) #cols are vectors #print('Finding k at 1550nm') #k, f = bloch.find_k(frequency=1/1550, @@ -47,7 +47,7 @@ for k0x in [.25]: k0 = numpy.array([k0x, 0, 0]) kmag = norm(reciprocal_lattice @ k0) - tolerance = 1e-4 * (1/1550) # df = f * dn_eff / n + tolerance = (1e6/1550) * 1e-4/1.5 # df = f * dn_eff / n logger.info('tolerance {}'.format(tolerance)) n, v = bloch.eigsolve(4, k0, G_matrix=reciprocal_lattice, epsilon=epsilon, tolerance=tolerance) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index 7e41e14..b172e21 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -359,6 +359,8 @@ def eigsolve(num_modes: int, """ h_size = 2 * epsilon[0].size + kmag = norm(G_matrix @ k0) + ''' Generate the operators ''' @@ -390,7 +392,7 @@ def eigsolve(num_modes: int, onto the space orthonormal to Z. If approx_grad is True, the approximate inverse of the maxwell operator is used to precondition the gradient. """ - z = Z.reshape(y_shape) + z = Z.view(dtype=complex).reshape(y_shape) U = numpy.linalg.inv(z.conj().T @ z) zU = z @ U AzU = scipy_op @ zU @@ -400,27 +402,49 @@ def eigsolve(num_modes: int, df_dy = scipy_iop @ (AzU - zU @ zTAzU) else: df_dy = (AzU - zU @ zTAzU) - return numpy.abs(f), numpy.sign(f) * numpy.real(df_dy).ravel() + + df_dy_flat = df_dy.view(dtype=float).ravel() + return numpy.abs(f), numpy.sign(f) * df_dy_flat ''' Use the conjugate gradient method and the approximate gradient calculation to quickly find approximate eigenvectors. ''' result = scipy.optimize.minimize(rayleigh_quotient, - numpy.random.rand(*y_shape), + numpy.random.rand(*y_shape, 2), jac=True, - method='CG', - tol=1e-5, - options={'maxiter': 30, 'disp':True}) + method='L-BFGS-B', + tol=1e-20, + options={'maxiter': 2000, 'gtol':0, 'ftol':1e-20 , 'disp':True})#, 'maxls':80, 'm':30}) + + + result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, True), + result.x, + jac=True, + method='L-BFGS-B', + tol=1e-20, + options={'maxiter': 2000, 'gtol':0, 'disp':True}) result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False), result.x, jac=True, - method='CG', - tol=1e-13, - options={'maxiter': 100, 'disp':True}) + method='L-BFGS-B', + tol=1e-20, + options={'maxiter': 2000, 'gtol':0, 'disp':True}) - z = result.x.reshape(y_shape) + for i in range(20): + result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False), + result.x, + jac=True, + method='L-BFGS-B', + tol=1e-20, + options={'maxiter': 70, 'gtol':0, 'disp':True}) + if result.nit == 0: + # We took 0 steps, so re-running won't help + break + + + z = result.x.view(dtype=complex).reshape(y_shape) ''' Recover eigenvectors from Z @@ -432,26 +456,15 @@ def eigsolve(num_modes: int, eigvals, w_eigvecs = numpy.linalg.eig(w) eigvecs = y @ w_eigvecs - for i in range(len(eigvals)): - v = eigvecs[:, i] - n = eigvals[i] - v /= norm(v) - logger.info('eigness {}: {}'.format(i, norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ))) - - for i in range(len(eigvals)): - logger.info('Refining eigenvector {}'.format(i)) - eigvals[i], eigvecs[:, i] = rayleigh_quotient_iteration(scipy_op, - guess_vector=eigvecs[:, i], - iterations=40, - tolerance=tolerance * numpy.real(numpy.sqrt(eigvals[i])) * 2, - solver = lambda A, b: spalg.bicgstab(A, b, maxiter=200)[0]) - for i in range(len(eigvals)): v = eigvecs[:, i] n = eigvals[i] v /= norm(v) eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ) - logger.info('eigness {}: {}'.format(i, eigness)) + f = numpy.sqrt(-numpy.real(n)) + df = numpy.sqrt(-numpy.real(n + eigness)) + neff_err = kmag * (1/df - 1/f) + logger.info('eigness {}: {}\n neff_err: {}'.format(i, eigness, neff_err)) order = numpy.argsort(numpy.abs(eigvals)) return eigvals[order], eigvecs.T[order] @@ -498,245 +511,3 @@ def find_k(frequency: float, return res.x * direction, res.fun + frequency - -#def eigsolve2(num_modes: int, -# k0: numpy.ndarray, -# G_matrix: numpy.ndarray, -# epsilon: field_t, -# mu: field_t = None, -# tolerance = 1e-8, -# ) -> Tuple[numpy.ndarray, numpy.ndarray]: -# """ -# Find the first (lowest-frequency) num_modes eigenmodes with Bloch wavevector -# k0 of the specified structure. -# -# :param k0: Bloch wavevector, [k0x, k0y, k0z]. -# :param G_matrix: 3x3 matrix, with reciprocal lattice vectors as columns. -# :param epsilon: Dielectric constant distribution for the simulation. -# All fields are sampled at cell centers (i.e., NOT Yee-gridded) -# :param mu: Magnetic permability distribution for the simulation. -# Default None (1 everywhere). -# :return: (eigenvalues, eigenvectors) where eigenvalues[i] corresponds to the -# vector eigenvectors[i, :] -# """ -# h_size = 2 * epsilon[0].size -# -# -# ''' -# Generate the operators -# ''' -# mop = maxwell_operator(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) -# imop = inverse_maxwell_operator_approx(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) -# -# scipy_op = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=mop) -# scipy_iop = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=imop) -# -# y_shape = (h_size, num_modes) -# -# -# prev_E = 0 -# d_scale = 1 -# prev_traceGtKG = 0 -# prev_theta = 0.5 -# D = numpy.zeros(shape=y_shape, dtype=complex) -# -# y0 = None -# if y0 is None: -# z = numpy.random.rand(*y_shape, dtype=complex) -# else: -# z = y0 -# -# while True: -# z2 = z.conj().T @ z -# z_norm = numpy.sqrt(numpy.real(numpy.trace(z2))) / num_modes -# z /= z_norm -# z2 /= z_norm * z_norm -# try: -# U = numpy.linalg.inv(z2) -# except numpy.linalg.LinAlgError: -# z = numpy.random.rand(*y_shape, dtype=complex) -# continue -# -# trace_U = numpy.real(numpy.trace(U)) -# if trace_U > 1e8 * num_modes: -# z = z @ scipy.linalg.sqrtm(U).conj().T -# prev_trace GtX = 0 -# continue -# break -# -# -# def rayleigh_quotient(Z: numpy.ndarray, approx_grad: bool = True): -# """ -# Absolute value of the block Rayleigh quotient, and the associated gradient. -# -# See Johnson and Joannopoulos, Opt. Expr. 8, 3 (2001) for details (full -# citation in module docstring). -# -# === -# -# Notes on my understanding of the procedure: -# -# Minimize f(Y) = |trace((Y.H @ A @ Y)|, making use of Y = Z @ inv(Z.H @ Z)^(1/2) -# (a polar orthogonalization of Y). This gives f(Z) = |trace(Z.H @ A @ Z @ U)|, -# where U = inv(Z.H @ Z). We minimize the absolute value to find the eigenvalues -# with smallest magnitude. -# -# The gradient is P @ (A @ Z @ U), where P = (1 - Z @ U @ Z.H) is a projection -# onto the space orthonormal to Z. If approx_grad is True, the approximate -# inverse of the maxwell operator is used to precondition the gradient. -# """ -# z = Z.reshape(y_shape) -# U = numpy.linalg.inv(z.conj().T @ z) -# zU = z @ U -# AzU = scipy_op @ zU -# zTAzU = z.conj().T @ AzU -# f = numpy.real(numpy.trace(zTAzU)) -# if approx_grad: -# df_dy = scipy_iop @ (AzU - zU @ zTAzU) -# else: -# df_dy = (AzU - zU @ zTAzU) -# logging.info('f={}'.format(f)) -# return numpy.abs(f), numpy.sign(f) * numpy.real(df_dy).ravel() -# -# max_iters = 10000 #TODO -# for iter in range(max_iters): -# f, G = rayleigh_quotient(z, False) -# -# if iter > 0 and abs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7): -# break -# -# KG = scipy_iop @ G -# traceGtKG = numpy.real(numpy.trace(G.conj().T @ KG)) + g_lag * g_lag -# gamma_numerator = traceGtKG -# -# reset_iters = 1000 #TODO -# if prev_trace_GtKG == 0 or iter % reset_iters == 0: -# gamma = 0 -# else: -# gamma = gamma_numerator / prev_trace_GtKG -# -# D = gamma * d_scale * D + KG -# d_lag = gamma * d_scale * d_lag + g_lag -# -# -# d_scale = numpy.sqrt(numpy.real(numpy.sum(D.conj() * D))) / num_modes -# D /= d_scale -# -# AD = A @ D -# DtD = D.conj().T @ D -# DtAD = D.conj().T @ AD -# -# YtD = z.conj().T @ D -# YtAD = z.conj().T @ AD -# symYtD = (YtD + YtD.conj().T) * 0.5 -# symYtAD = (YtD + YtAD.conj().T) * 0.5 -# -# U_sYtD = U @ symYtD #.H -- shouldn't matter? -# -# dE = 2.0 * (real(trace(U.H @ symYtAD)) - real(trace(YtAYU.H @ U_sYtD))) -# -# S2 = DtD - 4 * symYtD @ U_sYtD -# d2E = 2.0 * (real(trace(U.conj().T @ DtAD)) - -# real(trace(YtAYU.conj().T @ U @ S2)) - -# 4.0 * real(trace(U.conj().T @ symYtAD @ U_sYtD))) -# -# d_lag = lag = trace_YtLY = trace_DtLD = trace_YtLD = 0 -# -# # Newton-Raphson to find a root of the first derivative: -# theta = -dE/d2E -# -# if d2E < 0 or abs(theta) >= K_PI: -# theta = -abs(prev_theta) * numpy.sign(dE) -# -# # YtAYU * YtBY = YtAY for use in linmin. -# YtAY = YtAYU @ YtBY.conj().T -# -# theta, new_E, new_dE = linmin(theta, E, dE, 0.1, min(tolerance, 1e-6), 1e-14, 0, -numpy.sign(dE) * K_PI, trace_func, tfd) -# -# improvement = numpy.abs(E - new_E) * 2 / numpy.abs(E + new_E) -# logging.info('linmin improvement {}'.format(improvement)) -# z *= numpy.cos(theta) -# z += D * numpy.sin(theta) -# -# -# prev_traceGtKG = trace_GtKG -# prev_theta = theta -# prev_E = E -# -# -# def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func): -# if df0 > 0: -# x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq)) -# return -x0, f0, -df0 -# elif df0 == 0: -# return 0, f0, df0 -# else: -# x = x_guess -# fx = f0 -# dfx = df0 -# -# dsrch(xxx,,) -# for i in range(int(1e6)): -# if task != 'F': -# logging.info('search converged in {} iterations'.format(i)) -# break -# fx = f(x, dfx) -# dsrch(...) -# -# return x, fx, dfx -# -# -# -# ''' -# Use the conjugate gradient method and the approximate gradient calculation to -# quickly find approximate eigenvectors. -# ''' -# #result = scipy.optimize.minimize(rayleigh_quotient, -# # numpy.random.rand(*y_shape), -# # jac=True, -# # method='CG', -# # tol=1e-5, -# # options={'maxiter': 30, 'disp':True}) -# -# #result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False), -# # result.x, -# # jac=True, -# # method='CG', -# # tol=1e-13, -# # options={'maxiter': 100, 'disp':True}) -# # -# #z = result.x.reshape(y_shape) -# -# ''' -# Recover eigenvectors from Z -# ''' -# U = numpy.linalg.inv(z.conj().T @ z) -# y = z @ scipy.linalg.sqrtm(U) -# w = y.conj().T @ (scipy_op @ y) -# -# eigvals, w_eigvecs = numpy.linalg.eig(w) -# eigvecs = y @ w_eigvecs -# -# for i in range(len(eigvals)): -# v = eigvecs[:, i] -# n = eigvals[i] -# v /= norm(v) -# logger.info('eigness {}: {}'.format(i, norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ))) -# -# for i in range(len(eigvals)): -# logger.info('Refining eigenvector {}'.format(i)) -# eigvals[i], eigvecs[:, i] = rayleigh_quotient_iteration(scipy_op, -# guess_vector=eigvecs[:, i], -# iterations=40, -# tolerance=tolerance * numpy.real(numpy.sqrt(eigvals[i])) * 2, -# solver = lambda A, b: spalg.bicgstab(A, b, maxiter=200)[0]) -# -# for i in range(len(eigvals)): -# v = eigvecs[:, i] -# n = eigvals[i] -# v /= norm(v) -# eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ) -# logger.info('eigness {}: {}'.format(i, eigness)) -# -# order = numpy.argsort(numpy.abs(eigvals)) -# return eigvals[order], eigvecs.T[order] diff --git a/fdfd_tools/eigensolver.c b/fdfd_tools/eigensolver.c deleted file mode 100644 index 4109879..0000000 --- a/fdfd_tools/eigensolver.c +++ /dev/null @@ -1,412 +0,0 @@ -/* Copyright (C) 1999-2014 Massachusetts Institute of Technology. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include - -#include "config.h" -#include -#include -#include -#include -#include -#include - -#include "eigensolver.h" -#include "linmin.h" - -extern void eigensolver_get_eigenvals_aux(evectmatrix Y, real *eigenvals, - evectoperator A, void *Adata, - evectmatrix Work1, evectmatrix Work2, - sqmatrix U, sqmatrix Usqrt, - sqmatrix Uwork); - -#define STRINGIZEx(x) #x /* a hack so that we can stringize macro values */ -#define STRINGIZE(x) STRINGIZEx(x) - -#define K_PI 3.141592653589793238462643383279502884197 -#define MIN2(a,b) ((a) < (b) ? (a) : (b)) -#define MAX2(a,b) ((a) > (b) ? (a) : (b)) - -#if defined(SCALAR_LONG_DOUBLE_PREC) -# define fabs fabsl -# define cos cosl -# define sin sinl -# define sqrt sqrtl -# define atan atanl -# define atan2 atan2l -#endif - -/* Evalutate op, and set t to the elapsed time (in seconds). */ -#define TIME_OP(t, op) { \ - mpiglue_clock_t xxx_time_op_start_time = MPIGLUE_CLOCK; \ - { \ - op; \ - } \ - (t) = MPIGLUE_CLOCK_DIFF(MPIGLUE_CLOCK, xxx_time_op_start_time); \ -} - -/**************************************************************************/ - -#define EIGENSOLVER_MAX_ITERATIONS 100000 -#define FEEDBACK_TIME 4.0 /* elapsed time before we print progress feedback */ - -/* Number of iterations after which to reset conjugate gradient - direction to steepest descent. (Picked after some experimentation. - Is there a better basis? Should this change with the problem - size?) */ -#define CG_RESET_ITERS 70 - -/* Threshold for trace(1/YtBY) = trace(U) before we reorthogonalize: */ -#define EIGS_TRACE_U_THRESHOLD 1e8 - -/**************************************************************************/ - -/* estimated times/iteration for different iteration schemes, based - on the measure times for various operations and the operation counts: */ - -#define EXACT_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ, t_linmin) \ - ((t_AZ)*2 + (t_KZ) + (t_ZtW)*4 + (t_ZS)*2 + (t_ZtZ)*2 + (t_linmin)) - -#define APPROX_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ) \ - ((t_AZ)*2 + (t_KZ) + (t_ZtW)*2 + (t_ZS)*2 + (t_ZtZ)*2) - -/* Guess for the convergence slowdown factor due to the approximate - line minimization. It is probably best to be conservative, as the - exact line minimization is more reliable and we only want to - abandon it if there is a big speed gain. */ -#define APPROX_LINMIN_SLOWDOWN_GUESS 2.0 - -/* We also don't want to use the approximate line minimization if - the exact line minimization makes a big difference in the value - of the trace that's achieved (i.e. if one step of Newton's method - on the trace derivative does not do a good job). The following - is the maximum improvement by the exact line minimization (over - one step of Newton) at which we'll allow the use of approximate line - minimization. */ -#define APPROX_LINMIN_IMPROVEMENT_THRESHOLD 0.05 - -/**************************************************************************/ - -typedef struct { - sqmatrix YtAY, DtAD, symYtAD, YtBY, DtBD, symYtBD, S1, S2, S3; - real lag, d_lag, trace_YtLY, trace_DtLD, trace_YtLD; -} trace_func_data; - -static linmin_real trace_func(linmin_real theta, linmin_real *trace_deriv, void *data) -{ - linmin_real trace; - trace_func_data *d = (trace_func_data *) data; - linmin_real c = cos(theta), s = sin(theta); - - YDNi = c*c * YtY + s*s * DtD + 2*s*c * symYtD - YDNi.inv() - - if not YDNi.inv(): - /* if c or s is small, we sometimes run into numerical - difficulties, and it is better to use the Taylor expansion */ - if c < 1e-4 * s and c != 0: - YDNi = DtD.inv() - S3 = (YDNi @ symYtD.H) @ YDNi.H - YDNi = 1/(s*s) * YDNi - 2*c/(s*s*s) * S3 - elif s < 1e-4 * c and s != 0: - YDNi = YtY.inv() - S3 = (YDNi @ symYtD.H) @ YDNi.H - YDNi = 1/(c*c) * YDNi - 2*s/(c*c*c) * S3 - else: - CHECK(0, "inexplicable singularity in linmin trace_func") - - YADN = c*c * YtAY + s*s * DtAD + 2*s*c * smYtAD - trace = real(trace(YADN.H @ YDNi)) + (c*c * trace_YtLY + s*s * trace_DtLD + 2*s*c * trace_YtLD) * (c * lag + s * d_lag) - - if (trace_deriv) { - c2 = cos(2*theta) - s2 = sin(2*theta); - - S3 = -0.5 * s2 * (YtAY - DtAD) + c2 * symYtAD - - *trace_deriv = real(trace(YDNi.H @ S3)) - - S2 = (YDNi @ YADN.H) @ YDNi.H - S3 = -0.5 * s2 * (YtY - DtD) + c2 * symYtD - - *trace_deriv -= real(trace(S2.H @ S3)) - *trace_deriv *= 2 - - *trace_deriv += (-s2 * trace_YtLY + s2 * trace_DtLD - + 2*c2 * trace_YtLD) * (c * lag + s * d_lag); - *trace_deriv += (c*c * trace_YtLY + s*s * trace_DtLD - + 2*s*c * trace_YtLD) * (-s * lag + c * d_lag); - } - - return trace; -} - -/**************************************************************************/ - -#define EIG_HISTORY_SIZE 5 - -/* find generalized eigenvectors Y of (A,B) by minimizing Rayleigh quotient - - tr [ Yt A Y / (Yt B Y) ] + lag * tr [ Yt L Y ] - - where lag is a Lagrange multiplier and L is a Hermitian operator - implementing some constraint tr [ Yt L Y ] = 0 on the eigenvectors - (if L is not NULL). - - Constraints that commute with A and B (and L) are specified via the - "constraint" argument, which gives the projection operator for - the constraint(s). -*/ - -void eigensolver_lagrange(evectmatrix Y, real *eigenvals, - evectoperator A, void *Adata, - evectoperator B, void *Bdata, - evectpreconditioner K, void *Kdata, - evectconstraint constraint, void *constraint_data, - evectoperator L, void *Ldata, real *lag, - evectmatrix Work[], int nWork, - real tolerance, int *num_iterations, - int flags) -{ - real g_lag = 0, d_lag = 0, prev_g_lag = 0; - short usingConjugateGradient = 0, use_polak_ribiere = 0, - use_linmin = 1; - real E, prev_E = 0.0; - real d_scale = 1.0; - real traceGtX, prev_traceGtX = 0.0; - real theta, prev_theta = 0.5; - int i, iteration = 0, num_emergency_restarts = 0; - mpiglue_clock_t prev_feedback_time; - real linmin_improvement = 0; - - G = Work[0]; - X = Work[1]; - - BY = Y; - D = X; - BD = D; /* storage for B*D (re-use B*Y storage) */ - prev_G = G; - - restartY: - - eigenvals *= 0.0 - convergence_history = [10000.0] * n - constraint(Y, constraint_data) - - do { - real y_norm, gamma_numerator = 0; - - YtBY = Y.H @ Y - y_norm = sqrt(real(trace(YtBY)) / Y.p); - Y /= y_norm - YtBY /= y_norm*y_norm - U = YtBY - - if (!sqmatrix_invert(U, 1, S2)) /* non-independent Y columns */ - /* emergency restart with random Y */ - ... - - /* If trace(1/YtBY) gets big, it means that the columns - of Y are becoming nearly parallel. This sometimes happens, - especially in the targeted eigensolver, because the - preconditioner pushes all the columns towards the ground - state. If it gets too big, it seems to be a good idea - to re-orthogonalize, resetting conjugate-gradient, as - otherwise we start to encounter numerical problems. */ - if (flags & EIGS_REORTHOGONALIZE) { - traceU = real(trace(U)) - if (traceU > EIGS_TRACE_U_THRESHOLD * U.p) { - Y = Y @ sqrtm(U).H /* orthonormalize Y */ - prev_traceGtX = 0.0; - - YtBY = Y.H @ Y - y_norm = sqrt(real(trace(YtBY)) / Y.p) - Y /= y_norm - YtBY /= y_norm * y_norm - U = YtBY - - /* G = AYU; note that U is Hermitian: */ - G = A @ Y @ U - YtAYU = Y.H @ G - E = real(trace(YtAYU)) - - convergence_history[iteration % EIG_HISTORY_SIZE] = - 200.0 * fabs(E - prev_E) / (fabs(E) + fabs(prev_E)); - - if (iteration > 0 && - fabs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7)) - break; /* convergence! hooray! */ - - /* Compute gradient of functional: G = (1 - BY U Yt) A Y U */ - G += -Y @ (U @ YtAYU) - - /* set X = precondition(G): */ - X = K @ G - //TIME_OP(time_KZ, K(G, X, Kdata, Y, NULL, YtBY)); - /* We have to apply the constraint here, in case it doesn't - commute with the preconditioner. */ - constraint(X, constraint_data); - - d_scale = 1.0; - - /* Minimize the trace along Y + lambda*D: */ - - if (!use_linmin) { - real dE, E2, d2E, t, d_norm; - - /* Here, we do an approximate line minimization along D - by evaluating dE (the derivative) at the current point, - and the trace E2 at a second point, and then approximating - the second derivative d2E by finite differences. Then, - we use one step of Newton's method on the derivative. - This has the advantage of requiring two fewer O(np^2) - matrix multiplications compared to the exact linmin. */ - - d_norm = sqrt(real(trace(D.H @ D)) / Y.p); - - /* dE = 2 * tr Gt D. (Use prev_G instead of G so that - it works even when we are using Polak-Ribiere.) */ - dE = 2.0 * SCALAR_RE(evectmatrix_traceXtY(prev_G, D)) / d_norm; - - /* shift Y by prev_theta along D, in the downhill direction: */ - t = dE < 0 ? -fabs(prev_theta) : fabs(prev_theta); - Y += t/d_norm * D - - U = inv(Y.H @ Y) - E2 = real(trace((Y.H @ A @ Y).H @ U)) - - /* Get finite-difference approximation for the 2nd derivative - of the trace. Equivalently, fit to a quadratic of the - form: E(theta) = E + dE theta + 1/2 d2E theta^2 */ - d2E = (E2 - E - dE * t) / (0.5 * t * t); - - theta = -dE/d2E; - - /* If the 2nd derivative is negative, or a big shift - in the trace is predicted (compared to the previous - iteration), then this approximate line minimization is - probably not very good; switch back to the exact - line minimization. Hopefully, we won't have to - abort like this very often, as it wastes operations. */ - if (d2E < 0 || -0.5*dE*theta > 20.0 * fabs(E-prev_E)) { - if (flags & EIGS_FORCE_APPROX_LINMIN) { - } else { - use_linmin = 1; - evectmatrix_aXpbY(1.0, Y, -t / d_norm, D); - prev_theta = atan(prev_theta); /* convert to angle */ - /* don't do this again: */ - flags |= EIGS_FORCE_EXACT_LINMIN; - } - } - else { - /* Shift Y by theta, hopefully minimizing the trace: */ - evectmatrix_aXpbY(1.0, Y, (theta - t) / d_norm, D); - } - } - if (use_linmin) { - d_scale = sqrt(real(trace(D.H @ D)) / Y.p); - D /= d_scale - - AD = A @ D - DtD = D.H @ D - DtAD = D.H @ AD - - YtD = Y.H @ D - YtAD = Y.H @ AD - symYtD = (YtD + YtD.H) / 2 - symYtAD = (YtAD + YtAD.H) / 2 - - U_sYtD = U @ symYtD.H - dE = 2.0 * (real(trace(U.H @ symYtAD)) - real(trace(YtAYU.H @ U_sYtD))) - - S2 = DtD - 4 * symYtD @ U_sYtD - d2E = 2.0 * (real(trace(U.H @ DtAD)) - - real(trace(YtAYU.H @ U @ S2)) - - 4.0 * real(trace(U.H @ symYtAD @ U_sYtD))) - - d_lag = lag = trace_YtLY = trace_DtLD = trace_YtLD = 0 - - /* this is just Newton-Raphson to find a root of - the first derivative: */ - theta = -dE/d2E; - - if d2E < 0 or abs(theta) >= K_PI: - theta = -abs(prev_theta) * numpy.sign(dE) - - /* Set S1 to YtAYU * YtBY = YtAY for use in linmin. - (tfd.YtAY == S1). */ - YtAY = YtAYU @ YtBY.H - - theta = linmin(&new_E, &new_dE, theta, E, dE, - 0.1, min(tolerance, 1e-6), 1e-14, - 0, -numpy.sign(dE) * K_PI, - trace_func, &tfd, - flags & EIGS_VERBOSE) - linmin_improvement = abs(E - new_E) * 2.0/abs(E + new_E); - - /* Shift Y to new location minimizing the trace along D: */ - Y *= cos(theta) - Y += D * sin(theta) - } - - /* In exact arithmetic, we don't need to do this, but in practice - it is probably a good idea to keep errors from adding up and - eventually violating the constraints. */ - constraint(Y, constraint_data); - - prev_traceGtX = traceGtX; - prev_theta = theta; - prev_E = E; - - /* Finally, we use the times for the various operations to - help us pick an algorithm for the next iteration: */ - { - real t_exact, t_approx; - t_exact = EXACT_LINMIN_TIME(time_AZ, time_KZ, time_ZtW, - time_ZS, time_ZtZ, time_linmin); - t_approx = APPROX_LINMIN_TIME(time_AZ, time_KZ, time_ZtW, - time_ZS, time_ZtZ); - - /* Sum the times over the processors so that all the - processors compare the same, average times. */ - mpi_allreduce_1(&t_exact, - real, SCALAR_MPI_TYPE, MPI_SUM, mpb_comm); - mpi_allreduce_1(&t_approx, - real, SCALAR_MPI_TYPE, MPI_SUM, mpb_comm); - - if (!(flags & EIGS_FORCE_EXACT_LINMIN) && - linmin_improvement > 0 && - linmin_improvement <= APPROX_LINMIN_IMPROVEMENT_THRESHOLD && - t_exact > t_approx * APPROX_LINMIN_SLOWDOWN_GUESS) { - use_linmin = 0; - } - else if (!(flags & EIGS_FORCE_APPROX_LINMIN)) { - use_linmin = 1; - prev_theta = atan(prev_theta); /* convert to angle */ - } - } - } while (++iteration < EIGENSOLVER_MAX_ITERATIONS); - - evectmatrix_XtX(U, Y, S2); - CHECK(sqmatrix_invert(U, 1, S2), "singular YtBY at end"); - eigensolver_get_eigenvals_aux(Y, eigenvals, A, Adata, - X, G, U, S1, S2); - -} - diff --git a/fdfd_tools/eigensolvers.py b/fdfd_tools/eigensolvers.py index e348cd7..d0ee541 100644 --- a/fdfd_tools/eigensolvers.py +++ b/fdfd_tools/eigensolvers.py @@ -53,7 +53,7 @@ def rayleigh_quotient_iteration(operator: sparse.spmatrix or spalg.LinearOperato :return: (eigenvalue, eigenvector) """ try: - _test = operator - sparse.eye(operator.shape) + _test = operator - sparse.eye(operator.shape[0]) shift = lambda eigval: eigval * sparse.eye(operator.shape[0]) if solver is None: solver = spalg.spsolve