diff --git a/examples/bloch.py b/examples/bloch.py new file mode 100644 index 0000000..6f34a3b --- /dev/null +++ b/examples/bloch.py @@ -0,0 +1,68 @@ +import numpy, scipy, gridlock, fdfd_tools +from fdfd_tools import bloch +from numpy.linalg import norm +import logging + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + + +dx = 40 +x_period = 400 +y_period = z_period = 2000 +g = gridlock.Grid([numpy.arange(-x_period/2, x_period/2, dx), + numpy.arange(-1000, 1000, dx), + numpy.arange(-1000, 1000, dx)], + shifts=numpy.array([[0,0,0]]), + initial=1.445**2, + periodic=True) + +g.draw_cuboid([0,0,0], [200e8, 220, 220], eps=3.47**2) + +#x_period = y_period = z_period = 13000 +#g = gridlock.Grid([numpy.arange(3), ]*3, +# shifts=numpy.array([[0, 0, 0]]), +# initial=2.0**2, +# periodic=True) + +g2 = g.copy() +g2.shifts = numpy.zeros((6,3)) +g2.grids = [numpy.zeros(g.shape) for _ in range(6)] + +epsilon = [g.grids[0],] * 3 +reciprocal_lattice = numpy.diag(1000/numpy.array([x_period, y_period, z_period])) #cols are vectors + +#print('Finding k at 1550nm') +#k, f = bloch.find_k(frequency=1/1550, +# tolerance=(1/1550 - 1/1551), +# direction=[1, 0, 0], +# G_matrix=reciprocal_lattice, +# epsilon=epsilon, +# band=0) +# +#print("k={}, f={}, 1/f={}, k/f={}".format(k, f, 1/f, norm(reciprocal_lattice @ k) / f )) + +print('Finding f at [0.25, 0, 0]') +for k0x in [.25]: + k0 = numpy.array([k0x, 0, 0]) + + kmag = norm(reciprocal_lattice @ k0) + tolerance = 1e-4 * (1/1550) # df = f * dn_eff / n + logger.info('tolerance {}'.format(tolerance)) + + n, v = bloch.eigsolve(4, k0, G_matrix=reciprocal_lattice, epsilon=epsilon, tolerance=tolerance) + v2e = bloch.hmn_2_exyz(k0, G_matrix=reciprocal_lattice, epsilon=epsilon) + v2h = bloch.hmn_2_hxyz(k0, G_matrix=reciprocal_lattice, epsilon=epsilon) + ki = bloch.generate_kmn(k0, reciprocal_lattice, g.shape) + + z = 0 + e = v2e(v[0]) + for i in range(3): + g2.grids[i] += numpy.real(e[i]) + g2.grids[i+3] += numpy.imag(e[i]) + + f = numpy.sqrt(numpy.real(numpy.abs(n))) # TODO + print('k0x = {:3g}\n eigval = {}\n f = {}\n'.format(k0x, n, f)) + n_eff = norm(reciprocal_lattice @ k0) / f + print('kmag/f = n_eff = {} \n wl = {}\n'.format(n_eff, 1/f )) + diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index ad7c555..7e41e14 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -438,23 +438,22 @@ def eigsolve(num_modes: int, v /= norm(v) logger.info('eigness {}: {}'.format(i, norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ))) - ev2 = eigvecs.copy() for i in range(len(eigvals)): logger.info('Refining eigenvector {}'.format(i)) - eigvals[i], ev2[:, i] = rayleigh_quotient_iteration(scipy_op, - guess_vector=eigvecs[:, i], - iterations=40, - tolerance=tolerance * numpy.real(numpy.sqrt(eigvals[i])) * 2, - solver = lambda A, b: spalg.bicgstab(A, b, maxiter=200)[0]) - eigvecs = ev2 - order = numpy.argsort(numpy.abs(eigvals)) + eigvals[i], eigvecs[:, i] = rayleigh_quotient_iteration(scipy_op, + guess_vector=eigvecs[:, i], + iterations=40, + tolerance=tolerance * numpy.real(numpy.sqrt(eigvals[i])) * 2, + solver = lambda A, b: spalg.bicgstab(A, b, maxiter=200)[0]) for i in range(len(eigvals)): v = eigvecs[:, i] n = eigvals[i] v /= norm(v) - logger.info('eigness {}: {}'.format(i, norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ))) + eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ) + logger.info('eigness {}: {}'.format(i, eigness)) + order = numpy.argsort(numpy.abs(eigvals)) return eigvals[order], eigvecs.T[order] @@ -499,3 +498,245 @@ def find_k(frequency: float, return res.x * direction, res.fun + frequency + +#def eigsolve2(num_modes: int, +# k0: numpy.ndarray, +# G_matrix: numpy.ndarray, +# epsilon: field_t, +# mu: field_t = None, +# tolerance = 1e-8, +# ) -> Tuple[numpy.ndarray, numpy.ndarray]: +# """ +# Find the first (lowest-frequency) num_modes eigenmodes with Bloch wavevector +# k0 of the specified structure. +# +# :param k0: Bloch wavevector, [k0x, k0y, k0z]. +# :param G_matrix: 3x3 matrix, with reciprocal lattice vectors as columns. +# :param epsilon: Dielectric constant distribution for the simulation. +# All fields are sampled at cell centers (i.e., NOT Yee-gridded) +# :param mu: Magnetic permability distribution for the simulation. +# Default None (1 everywhere). +# :return: (eigenvalues, eigenvectors) where eigenvalues[i] corresponds to the +# vector eigenvectors[i, :] +# """ +# h_size = 2 * epsilon[0].size +# +# +# ''' +# Generate the operators +# ''' +# mop = maxwell_operator(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) +# imop = inverse_maxwell_operator_approx(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) +# +# scipy_op = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=mop) +# scipy_iop = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=imop) +# +# y_shape = (h_size, num_modes) +# +# +# prev_E = 0 +# d_scale = 1 +# prev_traceGtKG = 0 +# prev_theta = 0.5 +# D = numpy.zeros(shape=y_shape, dtype=complex) +# +# y0 = None +# if y0 is None: +# z = numpy.random.rand(*y_shape, dtype=complex) +# else: +# z = y0 +# +# while True: +# z2 = z.conj().T @ z +# z_norm = numpy.sqrt(numpy.real(numpy.trace(z2))) / num_modes +# z /= z_norm +# z2 /= z_norm * z_norm +# try: +# U = numpy.linalg.inv(z2) +# except numpy.linalg.LinAlgError: +# z = numpy.random.rand(*y_shape, dtype=complex) +# continue +# +# trace_U = numpy.real(numpy.trace(U)) +# if trace_U > 1e8 * num_modes: +# z = z @ scipy.linalg.sqrtm(U).conj().T +# prev_trace GtX = 0 +# continue +# break +# +# +# def rayleigh_quotient(Z: numpy.ndarray, approx_grad: bool = True): +# """ +# Absolute value of the block Rayleigh quotient, and the associated gradient. +# +# See Johnson and Joannopoulos, Opt. Expr. 8, 3 (2001) for details (full +# citation in module docstring). +# +# === +# +# Notes on my understanding of the procedure: +# +# Minimize f(Y) = |trace((Y.H @ A @ Y)|, making use of Y = Z @ inv(Z.H @ Z)^(1/2) +# (a polar orthogonalization of Y). This gives f(Z) = |trace(Z.H @ A @ Z @ U)|, +# where U = inv(Z.H @ Z). We minimize the absolute value to find the eigenvalues +# with smallest magnitude. +# +# The gradient is P @ (A @ Z @ U), where P = (1 - Z @ U @ Z.H) is a projection +# onto the space orthonormal to Z. If approx_grad is True, the approximate +# inverse of the maxwell operator is used to precondition the gradient. +# """ +# z = Z.reshape(y_shape) +# U = numpy.linalg.inv(z.conj().T @ z) +# zU = z @ U +# AzU = scipy_op @ zU +# zTAzU = z.conj().T @ AzU +# f = numpy.real(numpy.trace(zTAzU)) +# if approx_grad: +# df_dy = scipy_iop @ (AzU - zU @ zTAzU) +# else: +# df_dy = (AzU - zU @ zTAzU) +# logging.info('f={}'.format(f)) +# return numpy.abs(f), numpy.sign(f) * numpy.real(df_dy).ravel() +# +# max_iters = 10000 #TODO +# for iter in range(max_iters): +# f, G = rayleigh_quotient(z, False) +# +# if iter > 0 and abs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7): +# break +# +# KG = scipy_iop @ G +# traceGtKG = numpy.real(numpy.trace(G.conj().T @ KG)) + g_lag * g_lag +# gamma_numerator = traceGtKG +# +# reset_iters = 1000 #TODO +# if prev_trace_GtKG == 0 or iter % reset_iters == 0: +# gamma = 0 +# else: +# gamma = gamma_numerator / prev_trace_GtKG +# +# D = gamma * d_scale * D + KG +# d_lag = gamma * d_scale * d_lag + g_lag +# +# +# d_scale = numpy.sqrt(numpy.real(numpy.sum(D.conj() * D))) / num_modes +# D /= d_scale +# +# AD = A @ D +# DtD = D.conj().T @ D +# DtAD = D.conj().T @ AD +# +# YtD = z.conj().T @ D +# YtAD = z.conj().T @ AD +# symYtD = (YtD + YtD.conj().T) * 0.5 +# symYtAD = (YtD + YtAD.conj().T) * 0.5 +# +# U_sYtD = U @ symYtD #.H -- shouldn't matter? +# +# dE = 2.0 * (real(trace(U.H @ symYtAD)) - real(trace(YtAYU.H @ U_sYtD))) +# +# S2 = DtD - 4 * symYtD @ U_sYtD +# d2E = 2.0 * (real(trace(U.conj().T @ DtAD)) - +# real(trace(YtAYU.conj().T @ U @ S2)) - +# 4.0 * real(trace(U.conj().T @ symYtAD @ U_sYtD))) +# +# d_lag = lag = trace_YtLY = trace_DtLD = trace_YtLD = 0 +# +# # Newton-Raphson to find a root of the first derivative: +# theta = -dE/d2E +# +# if d2E < 0 or abs(theta) >= K_PI: +# theta = -abs(prev_theta) * numpy.sign(dE) +# +# # YtAYU * YtBY = YtAY for use in linmin. +# YtAY = YtAYU @ YtBY.conj().T +# +# theta, new_E, new_dE = linmin(theta, E, dE, 0.1, min(tolerance, 1e-6), 1e-14, 0, -numpy.sign(dE) * K_PI, trace_func, tfd) +# +# improvement = numpy.abs(E - new_E) * 2 / numpy.abs(E + new_E) +# logging.info('linmin improvement {}'.format(improvement)) +# z *= numpy.cos(theta) +# z += D * numpy.sin(theta) +# +# +# prev_traceGtKG = trace_GtKG +# prev_theta = theta +# prev_E = E +# +# +# def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func): +# if df0 > 0: +# x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq)) +# return -x0, f0, -df0 +# elif df0 == 0: +# return 0, f0, df0 +# else: +# x = x_guess +# fx = f0 +# dfx = df0 +# +# dsrch(xxx,,) +# for i in range(int(1e6)): +# if task != 'F': +# logging.info('search converged in {} iterations'.format(i)) +# break +# fx = f(x, dfx) +# dsrch(...) +# +# return x, fx, dfx +# +# +# +# ''' +# Use the conjugate gradient method and the approximate gradient calculation to +# quickly find approximate eigenvectors. +# ''' +# #result = scipy.optimize.minimize(rayleigh_quotient, +# # numpy.random.rand(*y_shape), +# # jac=True, +# # method='CG', +# # tol=1e-5, +# # options={'maxiter': 30, 'disp':True}) +# +# #result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False), +# # result.x, +# # jac=True, +# # method='CG', +# # tol=1e-13, +# # options={'maxiter': 100, 'disp':True}) +# # +# #z = result.x.reshape(y_shape) +# +# ''' +# Recover eigenvectors from Z +# ''' +# U = numpy.linalg.inv(z.conj().T @ z) +# y = z @ scipy.linalg.sqrtm(U) +# w = y.conj().T @ (scipy_op @ y) +# +# eigvals, w_eigvecs = numpy.linalg.eig(w) +# eigvecs = y @ w_eigvecs +# +# for i in range(len(eigvals)): +# v = eigvecs[:, i] +# n = eigvals[i] +# v /= norm(v) +# logger.info('eigness {}: {}'.format(i, norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ))) +# +# for i in range(len(eigvals)): +# logger.info('Refining eigenvector {}'.format(i)) +# eigvals[i], eigvecs[:, i] = rayleigh_quotient_iteration(scipy_op, +# guess_vector=eigvecs[:, i], +# iterations=40, +# tolerance=tolerance * numpy.real(numpy.sqrt(eigvals[i])) * 2, +# solver = lambda A, b: spalg.bicgstab(A, b, maxiter=200)[0]) +# +# for i in range(len(eigvals)): +# v = eigvecs[:, i] +# n = eigvals[i] +# v /= norm(v) +# eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ) +# logger.info('eigness {}: {}'.format(i, eigness)) +# +# order = numpy.argsort(numpy.abs(eigvals)) +# return eigvals[order], eigvecs.T[order] diff --git a/fdfd_tools/eigensolver.c b/fdfd_tools/eigensolver.c new file mode 100644 index 0000000..4109879 --- /dev/null +++ b/fdfd_tools/eigensolver.c @@ -0,0 +1,412 @@ +/* Copyright (C) 1999-2014 Massachusetts Institute of Technology. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include "config.h" +#include +#include +#include +#include +#include +#include + +#include "eigensolver.h" +#include "linmin.h" + +extern void eigensolver_get_eigenvals_aux(evectmatrix Y, real *eigenvals, + evectoperator A, void *Adata, + evectmatrix Work1, evectmatrix Work2, + sqmatrix U, sqmatrix Usqrt, + sqmatrix Uwork); + +#define STRINGIZEx(x) #x /* a hack so that we can stringize macro values */ +#define STRINGIZE(x) STRINGIZEx(x) + +#define K_PI 3.141592653589793238462643383279502884197 +#define MIN2(a,b) ((a) < (b) ? (a) : (b)) +#define MAX2(a,b) ((a) > (b) ? (a) : (b)) + +#if defined(SCALAR_LONG_DOUBLE_PREC) +# define fabs fabsl +# define cos cosl +# define sin sinl +# define sqrt sqrtl +# define atan atanl +# define atan2 atan2l +#endif + +/* Evalutate op, and set t to the elapsed time (in seconds). */ +#define TIME_OP(t, op) { \ + mpiglue_clock_t xxx_time_op_start_time = MPIGLUE_CLOCK; \ + { \ + op; \ + } \ + (t) = MPIGLUE_CLOCK_DIFF(MPIGLUE_CLOCK, xxx_time_op_start_time); \ +} + +/**************************************************************************/ + +#define EIGENSOLVER_MAX_ITERATIONS 100000 +#define FEEDBACK_TIME 4.0 /* elapsed time before we print progress feedback */ + +/* Number of iterations after which to reset conjugate gradient + direction to steepest descent. (Picked after some experimentation. + Is there a better basis? Should this change with the problem + size?) */ +#define CG_RESET_ITERS 70 + +/* Threshold for trace(1/YtBY) = trace(U) before we reorthogonalize: */ +#define EIGS_TRACE_U_THRESHOLD 1e8 + +/**************************************************************************/ + +/* estimated times/iteration for different iteration schemes, based + on the measure times for various operations and the operation counts: */ + +#define EXACT_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ, t_linmin) \ + ((t_AZ)*2 + (t_KZ) + (t_ZtW)*4 + (t_ZS)*2 + (t_ZtZ)*2 + (t_linmin)) + +#define APPROX_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ) \ + ((t_AZ)*2 + (t_KZ) + (t_ZtW)*2 + (t_ZS)*2 + (t_ZtZ)*2) + +/* Guess for the convergence slowdown factor due to the approximate + line minimization. It is probably best to be conservative, as the + exact line minimization is more reliable and we only want to + abandon it if there is a big speed gain. */ +#define APPROX_LINMIN_SLOWDOWN_GUESS 2.0 + +/* We also don't want to use the approximate line minimization if + the exact line minimization makes a big difference in the value + of the trace that's achieved (i.e. if one step of Newton's method + on the trace derivative does not do a good job). The following + is the maximum improvement by the exact line minimization (over + one step of Newton) at which we'll allow the use of approximate line + minimization. */ +#define APPROX_LINMIN_IMPROVEMENT_THRESHOLD 0.05 + +/**************************************************************************/ + +typedef struct { + sqmatrix YtAY, DtAD, symYtAD, YtBY, DtBD, symYtBD, S1, S2, S3; + real lag, d_lag, trace_YtLY, trace_DtLD, trace_YtLD; +} trace_func_data; + +static linmin_real trace_func(linmin_real theta, linmin_real *trace_deriv, void *data) +{ + linmin_real trace; + trace_func_data *d = (trace_func_data *) data; + linmin_real c = cos(theta), s = sin(theta); + + YDNi = c*c * YtY + s*s * DtD + 2*s*c * symYtD + YDNi.inv() + + if not YDNi.inv(): + /* if c or s is small, we sometimes run into numerical + difficulties, and it is better to use the Taylor expansion */ + if c < 1e-4 * s and c != 0: + YDNi = DtD.inv() + S3 = (YDNi @ symYtD.H) @ YDNi.H + YDNi = 1/(s*s) * YDNi - 2*c/(s*s*s) * S3 + elif s < 1e-4 * c and s != 0: + YDNi = YtY.inv() + S3 = (YDNi @ symYtD.H) @ YDNi.H + YDNi = 1/(c*c) * YDNi - 2*s/(c*c*c) * S3 + else: + CHECK(0, "inexplicable singularity in linmin trace_func") + + YADN = c*c * YtAY + s*s * DtAD + 2*s*c * smYtAD + trace = real(trace(YADN.H @ YDNi)) + (c*c * trace_YtLY + s*s * trace_DtLD + 2*s*c * trace_YtLD) * (c * lag + s * d_lag) + + if (trace_deriv) { + c2 = cos(2*theta) + s2 = sin(2*theta); + + S3 = -0.5 * s2 * (YtAY - DtAD) + c2 * symYtAD + + *trace_deriv = real(trace(YDNi.H @ S3)) + + S2 = (YDNi @ YADN.H) @ YDNi.H + S3 = -0.5 * s2 * (YtY - DtD) + c2 * symYtD + + *trace_deriv -= real(trace(S2.H @ S3)) + *trace_deriv *= 2 + + *trace_deriv += (-s2 * trace_YtLY + s2 * trace_DtLD + + 2*c2 * trace_YtLD) * (c * lag + s * d_lag); + *trace_deriv += (c*c * trace_YtLY + s*s * trace_DtLD + + 2*s*c * trace_YtLD) * (-s * lag + c * d_lag); + } + + return trace; +} + +/**************************************************************************/ + +#define EIG_HISTORY_SIZE 5 + +/* find generalized eigenvectors Y of (A,B) by minimizing Rayleigh quotient + + tr [ Yt A Y / (Yt B Y) ] + lag * tr [ Yt L Y ] + + where lag is a Lagrange multiplier and L is a Hermitian operator + implementing some constraint tr [ Yt L Y ] = 0 on the eigenvectors + (if L is not NULL). + + Constraints that commute with A and B (and L) are specified via the + "constraint" argument, which gives the projection operator for + the constraint(s). +*/ + +void eigensolver_lagrange(evectmatrix Y, real *eigenvals, + evectoperator A, void *Adata, + evectoperator B, void *Bdata, + evectpreconditioner K, void *Kdata, + evectconstraint constraint, void *constraint_data, + evectoperator L, void *Ldata, real *lag, + evectmatrix Work[], int nWork, + real tolerance, int *num_iterations, + int flags) +{ + real g_lag = 0, d_lag = 0, prev_g_lag = 0; + short usingConjugateGradient = 0, use_polak_ribiere = 0, + use_linmin = 1; + real E, prev_E = 0.0; + real d_scale = 1.0; + real traceGtX, prev_traceGtX = 0.0; + real theta, prev_theta = 0.5; + int i, iteration = 0, num_emergency_restarts = 0; + mpiglue_clock_t prev_feedback_time; + real linmin_improvement = 0; + + G = Work[0]; + X = Work[1]; + + BY = Y; + D = X; + BD = D; /* storage for B*D (re-use B*Y storage) */ + prev_G = G; + + restartY: + + eigenvals *= 0.0 + convergence_history = [10000.0] * n + constraint(Y, constraint_data) + + do { + real y_norm, gamma_numerator = 0; + + YtBY = Y.H @ Y + y_norm = sqrt(real(trace(YtBY)) / Y.p); + Y /= y_norm + YtBY /= y_norm*y_norm + U = YtBY + + if (!sqmatrix_invert(U, 1, S2)) /* non-independent Y columns */ + /* emergency restart with random Y */ + ... + + /* If trace(1/YtBY) gets big, it means that the columns + of Y are becoming nearly parallel. This sometimes happens, + especially in the targeted eigensolver, because the + preconditioner pushes all the columns towards the ground + state. If it gets too big, it seems to be a good idea + to re-orthogonalize, resetting conjugate-gradient, as + otherwise we start to encounter numerical problems. */ + if (flags & EIGS_REORTHOGONALIZE) { + traceU = real(trace(U)) + if (traceU > EIGS_TRACE_U_THRESHOLD * U.p) { + Y = Y @ sqrtm(U).H /* orthonormalize Y */ + prev_traceGtX = 0.0; + + YtBY = Y.H @ Y + y_norm = sqrt(real(trace(YtBY)) / Y.p) + Y /= y_norm + YtBY /= y_norm * y_norm + U = YtBY + + /* G = AYU; note that U is Hermitian: */ + G = A @ Y @ U + YtAYU = Y.H @ G + E = real(trace(YtAYU)) + + convergence_history[iteration % EIG_HISTORY_SIZE] = + 200.0 * fabs(E - prev_E) / (fabs(E) + fabs(prev_E)); + + if (iteration > 0 && + fabs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7)) + break; /* convergence! hooray! */ + + /* Compute gradient of functional: G = (1 - BY U Yt) A Y U */ + G += -Y @ (U @ YtAYU) + + /* set X = precondition(G): */ + X = K @ G + //TIME_OP(time_KZ, K(G, X, Kdata, Y, NULL, YtBY)); + /* We have to apply the constraint here, in case it doesn't + commute with the preconditioner. */ + constraint(X, constraint_data); + + d_scale = 1.0; + + /* Minimize the trace along Y + lambda*D: */ + + if (!use_linmin) { + real dE, E2, d2E, t, d_norm; + + /* Here, we do an approximate line minimization along D + by evaluating dE (the derivative) at the current point, + and the trace E2 at a second point, and then approximating + the second derivative d2E by finite differences. Then, + we use one step of Newton's method on the derivative. + This has the advantage of requiring two fewer O(np^2) + matrix multiplications compared to the exact linmin. */ + + d_norm = sqrt(real(trace(D.H @ D)) / Y.p); + + /* dE = 2 * tr Gt D. (Use prev_G instead of G so that + it works even when we are using Polak-Ribiere.) */ + dE = 2.0 * SCALAR_RE(evectmatrix_traceXtY(prev_G, D)) / d_norm; + + /* shift Y by prev_theta along D, in the downhill direction: */ + t = dE < 0 ? -fabs(prev_theta) : fabs(prev_theta); + Y += t/d_norm * D + + U = inv(Y.H @ Y) + E2 = real(trace((Y.H @ A @ Y).H @ U)) + + /* Get finite-difference approximation for the 2nd derivative + of the trace. Equivalently, fit to a quadratic of the + form: E(theta) = E + dE theta + 1/2 d2E theta^2 */ + d2E = (E2 - E - dE * t) / (0.5 * t * t); + + theta = -dE/d2E; + + /* If the 2nd derivative is negative, or a big shift + in the trace is predicted (compared to the previous + iteration), then this approximate line minimization is + probably not very good; switch back to the exact + line minimization. Hopefully, we won't have to + abort like this very often, as it wastes operations. */ + if (d2E < 0 || -0.5*dE*theta > 20.0 * fabs(E-prev_E)) { + if (flags & EIGS_FORCE_APPROX_LINMIN) { + } else { + use_linmin = 1; + evectmatrix_aXpbY(1.0, Y, -t / d_norm, D); + prev_theta = atan(prev_theta); /* convert to angle */ + /* don't do this again: */ + flags |= EIGS_FORCE_EXACT_LINMIN; + } + } + else { + /* Shift Y by theta, hopefully minimizing the trace: */ + evectmatrix_aXpbY(1.0, Y, (theta - t) / d_norm, D); + } + } + if (use_linmin) { + d_scale = sqrt(real(trace(D.H @ D)) / Y.p); + D /= d_scale + + AD = A @ D + DtD = D.H @ D + DtAD = D.H @ AD + + YtD = Y.H @ D + YtAD = Y.H @ AD + symYtD = (YtD + YtD.H) / 2 + symYtAD = (YtAD + YtAD.H) / 2 + + U_sYtD = U @ symYtD.H + dE = 2.0 * (real(trace(U.H @ symYtAD)) - real(trace(YtAYU.H @ U_sYtD))) + + S2 = DtD - 4 * symYtD @ U_sYtD + d2E = 2.0 * (real(trace(U.H @ DtAD)) - + real(trace(YtAYU.H @ U @ S2)) - + 4.0 * real(trace(U.H @ symYtAD @ U_sYtD))) + + d_lag = lag = trace_YtLY = trace_DtLD = trace_YtLD = 0 + + /* this is just Newton-Raphson to find a root of + the first derivative: */ + theta = -dE/d2E; + + if d2E < 0 or abs(theta) >= K_PI: + theta = -abs(prev_theta) * numpy.sign(dE) + + /* Set S1 to YtAYU * YtBY = YtAY for use in linmin. + (tfd.YtAY == S1). */ + YtAY = YtAYU @ YtBY.H + + theta = linmin(&new_E, &new_dE, theta, E, dE, + 0.1, min(tolerance, 1e-6), 1e-14, + 0, -numpy.sign(dE) * K_PI, + trace_func, &tfd, + flags & EIGS_VERBOSE) + linmin_improvement = abs(E - new_E) * 2.0/abs(E + new_E); + + /* Shift Y to new location minimizing the trace along D: */ + Y *= cos(theta) + Y += D * sin(theta) + } + + /* In exact arithmetic, we don't need to do this, but in practice + it is probably a good idea to keep errors from adding up and + eventually violating the constraints. */ + constraint(Y, constraint_data); + + prev_traceGtX = traceGtX; + prev_theta = theta; + prev_E = E; + + /* Finally, we use the times for the various operations to + help us pick an algorithm for the next iteration: */ + { + real t_exact, t_approx; + t_exact = EXACT_LINMIN_TIME(time_AZ, time_KZ, time_ZtW, + time_ZS, time_ZtZ, time_linmin); + t_approx = APPROX_LINMIN_TIME(time_AZ, time_KZ, time_ZtW, + time_ZS, time_ZtZ); + + /* Sum the times over the processors so that all the + processors compare the same, average times. */ + mpi_allreduce_1(&t_exact, + real, SCALAR_MPI_TYPE, MPI_SUM, mpb_comm); + mpi_allreduce_1(&t_approx, + real, SCALAR_MPI_TYPE, MPI_SUM, mpb_comm); + + if (!(flags & EIGS_FORCE_EXACT_LINMIN) && + linmin_improvement > 0 && + linmin_improvement <= APPROX_LINMIN_IMPROVEMENT_THRESHOLD && + t_exact > t_approx * APPROX_LINMIN_SLOWDOWN_GUESS) { + use_linmin = 0; + } + else if (!(flags & EIGS_FORCE_APPROX_LINMIN)) { + use_linmin = 1; + prev_theta = atan(prev_theta); /* convert to angle */ + } + } + } while (++iteration < EIGENSOLVER_MAX_ITERATIONS); + + evectmatrix_XtX(U, Y, S2); + CHECK(sqmatrix_invert(U, 1, S2), "singular YtBY at end"); + eigensolver_get_eigenvals_aux(Y, eigenvals, A, Adata, + X, G, U, S1, S2); + +} +