From 40677664784477735b7dc3852e2db9c89c58b487 Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 8 Jan 2018 16:16:26 -0800 Subject: [PATCH 1/9] use own CG implementation --- examples/bloch.py | 10 +- fdfd_tools/bloch.py | 378 ++++++++++++++++++++++++++++---------------- 2 files changed, 250 insertions(+), 138 deletions(-) diff --git a/examples/bloch.py b/examples/bloch.py index 8bbd30e..793bd89 100644 --- a/examples/bloch.py +++ b/examples/bloch.py @@ -30,11 +30,11 @@ g2.shifts = numpy.zeros((6,3)) g2.grids = [numpy.zeros(g.shape) for _ in range(6)] epsilon = [g.grids[0],] * 3 -reciprocal_lattice = numpy.diag(1e6/numpy.array([x_period, y_period, z_period])) #cols are vectors +reciprocal_lattice = numpy.diag(1000/numpy.array([x_period, y_period, z_period])) #cols are vectors #print('Finding k at 1550nm') -#k, f = bloch.find_k(frequency=1/1550, -# tolerance=(1/1550 - 1/1551), +#k, f = bloch.find_k(frequency=1000/1550, +# tolerance=(1000 * (1/1550 - 1/1551)), # direction=[1, 0, 0], # G_matrix=reciprocal_lattice, # epsilon=epsilon, @@ -47,10 +47,10 @@ for k0x in [.25]: k0 = numpy.array([k0x, 0, 0]) kmag = norm(reciprocal_lattice @ k0) - tolerance = (1e6/1550) * 1e-4/1.5 # df = f * dn_eff / n + tolerance = (1000/1550) * 1e-4/1.5 # df = f * dn_eff / n logger.info('tolerance {}'.format(tolerance)) - n, v = bloch.eigsolve(4, k0, G_matrix=reciprocal_lattice, epsilon=epsilon, tolerance=tolerance) + n, v = bloch.eigsolve(4, k0, G_matrix=reciprocal_lattice, epsilon=epsilon, tolerance=tolerance**2) v2e = bloch.hmn_2_exyz(k0, G_matrix=reciprocal_lattice, epsilon=epsilon) v2h = bloch.hmn_2_hxyz(k0, G_matrix=reciprocal_lattice, epsilon=epsilon) ki = bloch.generate_kmn(k0, reciprocal_lattice, g.shape) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index b172e21..31d364d 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -76,6 +76,7 @@ This module contains functions for generating and solving the from typing import List, Tuple, Callable, Dict import logging import numpy +from numpy import pi, real, trace from numpy.fft import fftn, ifftn, fftfreq import scipy import scipy.optimize @@ -337,139 +338,6 @@ def inverse_maxwell_operator_approx(k0: numpy.ndarray, return operator -def eigsolve(num_modes: int, - k0: numpy.ndarray, - G_matrix: numpy.ndarray, - epsilon: field_t, - mu: field_t = None, - tolerance = 1e-8, - ) -> Tuple[numpy.ndarray, numpy.ndarray]: - """ - Find the first (lowest-frequency) num_modes eigenmodes with Bloch wavevector - k0 of the specified structure. - - :param k0: Bloch wavevector, [k0x, k0y, k0z]. - :param G_matrix: 3x3 matrix, with reciprocal lattice vectors as columns. - :param epsilon: Dielectric constant distribution for the simulation. - All fields are sampled at cell centers (i.e., NOT Yee-gridded) - :param mu: Magnetic permability distribution for the simulation. - Default None (1 everywhere). - :return: (eigenvalues, eigenvectors) where eigenvalues[i] corresponds to the - vector eigenvectors[i, :] - """ - h_size = 2 * epsilon[0].size - - kmag = norm(G_matrix @ k0) - - ''' - Generate the operators - ''' - mop = maxwell_operator(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) - imop = inverse_maxwell_operator_approx(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) - - scipy_op = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=mop) - scipy_iop = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=imop) - - y_shape = (h_size, num_modes) - - def rayleigh_quotient(Z: numpy.ndarray, approx_grad: bool = True): - """ - Absolute value of the block Rayleigh quotient, and the associated gradient. - - See Johnson and Joannopoulos, Opt. Expr. 8, 3 (2001) for details (full - citation in module docstring). - - === - - Notes on my understanding of the procedure: - - Minimize f(Y) = |trace((Y.H @ A @ Y)|, making use of Y = Z @ inv(Z.H @ Z)^(1/2) - (a polar orthogonalization of Y). This gives f(Z) = |trace(Z.H @ A @ Z @ U)|, - where U = inv(Z.H @ Z). We minimize the absolute value to find the eigenvalues - with smallest magnitude. - - The gradient is P @ (A @ Z @ U), where P = (1 - Z @ U @ Z.H) is a projection - onto the space orthonormal to Z. If approx_grad is True, the approximate - inverse of the maxwell operator is used to precondition the gradient. - """ - z = Z.view(dtype=complex).reshape(y_shape) - U = numpy.linalg.inv(z.conj().T @ z) - zU = z @ U - AzU = scipy_op @ zU - zTAzU = z.conj().T @ AzU - f = numpy.real(numpy.trace(zTAzU)) - if approx_grad: - df_dy = scipy_iop @ (AzU - zU @ zTAzU) - else: - df_dy = (AzU - zU @ zTAzU) - - df_dy_flat = df_dy.view(dtype=float).ravel() - return numpy.abs(f), numpy.sign(f) * df_dy_flat - - ''' - Use the conjugate gradient method and the approximate gradient calculation to - quickly find approximate eigenvectors. - ''' - result = scipy.optimize.minimize(rayleigh_quotient, - numpy.random.rand(*y_shape, 2), - jac=True, - method='L-BFGS-B', - tol=1e-20, - options={'maxiter': 2000, 'gtol':0, 'ftol':1e-20 , 'disp':True})#, 'maxls':80, 'm':30}) - - - result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, True), - result.x, - jac=True, - method='L-BFGS-B', - tol=1e-20, - options={'maxiter': 2000, 'gtol':0, 'disp':True}) - - result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False), - result.x, - jac=True, - method='L-BFGS-B', - tol=1e-20, - options={'maxiter': 2000, 'gtol':0, 'disp':True}) - - for i in range(20): - result = scipy.optimize.minimize(lambda y: rayleigh_quotient(y, False), - result.x, - jac=True, - method='L-BFGS-B', - tol=1e-20, - options={'maxiter': 70, 'gtol':0, 'disp':True}) - if result.nit == 0: - # We took 0 steps, so re-running won't help - break - - - z = result.x.view(dtype=complex).reshape(y_shape) - - ''' - Recover eigenvectors from Z - ''' - U = numpy.linalg.inv(z.conj().T @ z) - y = z @ scipy.linalg.sqrtm(U) - w = y.conj().T @ (scipy_op @ y) - - eigvals, w_eigvecs = numpy.linalg.eig(w) - eigvecs = y @ w_eigvecs - - for i in range(len(eigvals)): - v = eigvecs[:, i] - n = eigvals[i] - v /= norm(v) - eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ) - f = numpy.sqrt(-numpy.real(n)) - df = numpy.sqrt(-numpy.real(n + eigness)) - neff_err = kmag * (1/df - 1/f) - logger.info('eigness {}: {}\n neff_err: {}'.format(i, eigness, neff_err)) - - order = numpy.argsort(numpy.abs(eigvals)) - return eigvals[order], eigvecs.T[order] - - def find_k(frequency: float, tolerance: float, direction: numpy.ndarray, @@ -511,3 +379,247 @@ def find_k(frequency: float, return res.x * direction, res.fun + frequency + +def eigsolve(num_modes: int, + k0: numpy.ndarray, + G_matrix: numpy.ndarray, + epsilon: field_t, + mu: field_t = None, + tolerance = 1e-20, + ) -> Tuple[numpy.ndarray, numpy.ndarray]: + """ + Find the first (lowest-frequency) num_modes eigenmodes with Bloch wavevector + k0 of the specified structure. + + :param k0: Bloch wavevector, [k0x, k0y, k0z]. + :param G_matrix: 3x3 matrix, with reciprocal lattice vectors as columns. + :param epsilon: Dielectric constant distribution for the simulation. + All fields are sampled at cell centers (i.e., NOT Yee-gridded) + :param mu: Magnetic permability distribution for the simulation. + Default None (1 everywhere). + :param tolerance: Solver stops when fractional change in the objective + trace(Z.H @ A @ Z @ inv(Z Z.H)) is smaller than the tolerance + :return: (eigenvalues, eigenvectors) where eigenvalues[i] corresponds to the + vector eigenvectors[i, :] + """ + h_size = 2 * epsilon[0].size + + kmag = norm(G_matrix @ k0) + + ''' + Generate the operators + ''' + mop = maxwell_operator(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) + imop = inverse_maxwell_operator_approx(k0=k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) + + scipy_op = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=mop) + scipy_iop = spalg.LinearOperator(dtype=complex, shape=(h_size, h_size), matvec=imop) + + y_shape = (h_size, num_modes) + + prev_E = 0 + d_scale = 1 + prev_traceGtKG = 0 + prev_theta = 0.5 + D = numpy.zeros(shape=y_shape, dtype=complex) + + y0 = None + if y0 is None: + Z = numpy.random.rand(*y_shape).astype(complex) + else: + Z = y0 + + while True: + Z2 = Z.conj().T @ Z + Z_norm = numpy.sqrt(real(trace(Z2))) / num_modes + Z /= Z_norm + Z2 /= Z_norm * Z_norm + try: + U = numpy.linalg.inv(Z2) + except numpy.linalg.LinAlgError: + Z = numpy.random.rand(*y_shape).astype(complex) + continue + + trace_U = real(trace(U)) + if trace_U > 1e8 * num_modes: + Z = Z @ scipy.linalg.sqrtm(U).conj().T + prev_traceGtKG = 0 + continue + break + + def rtrace_AtB(A, B): + return real(numpy.sum(A.conj() * B)) + + def symmetrize(A): + return (A + A.conj().T) * 0.5 + + max_iters = 10000 + for iter in range(max_iters): + U = numpy.linalg.inv(Z.conj().T @ Z) + AZ = scipy_op @ Z + AZU = AZ @ U + ZtAZU = Z.conj().T @ AZU + E = real(trace(ZtAZU)) + sgn = numpy.sign(E) + E = numpy.abs(E) + G = (AZU - Z @ U @ ZtAZU) * sgn + + if iter > 0 and abs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7): + logging.info('Optimization succeded: {} - 5e-8 < {} * {} / 2'.format(abs(E - prev_E), tolerance, E + prev_E)) + break + + KG = scipy_iop @ G + traceGtKG = rtrace_AtB(G, KG) + gamma_numerator = traceGtKG + + reset_iters = 100 + if prev_traceGtKG == 0 or iter % reset_iters == 0: + print('RESET!') + gamma = 0 + else: + gamma = gamma_numerator / prev_traceGtKG + + D = gamma * d_scale * D + KG + d_scale = numpy.sqrt(rtrace_AtB(D, D)) / num_modes + D /= d_scale + + AD = scipy_op @ D + DtD = D.conj().T @ D + DtAD = D.conj().T @ AD + + ZtD = Z.conj().T @ D + ZtAD = Z.conj().T @ AD + symZtD = symmetrize(ZtD) + symZtAD = symmetrize(ZtAD) + + U_sZtD = U @ symZtD + + dE = 2.0 * (rtrace_AtB(U, symZtAD) - rtrace_AtB(ZtAZU, U_sZtD)) + + S2 = DtD - 4 * symZtD @ U_sZtD + d2E = 2 * (rtrace_AtB(U, DtAD) - + rtrace_AtB(ZtAZU, U @ S2) - + 4 * rtrace_AtB(U, symZtAD @ U_sZtD)) + + # Newton-Raphson to find a root of the first derivative: + theta = -dE/d2E + + if d2E < 0 or abs(theta) >= pi: + theta = -abs(prev_theta) * numpy.sign(dE) + + # ZtAZU * ZtZ = ZtAZ for use in line search + ZtZ = Z.conj().T @ Z + ZtAZ = ZtAZU @ ZtZ.conj().T + + def Qi_func(theta, memo=[None, None]): + if memo[0] == theta: + return memo[1] + + c = numpy.cos(theta) + s = numpy.sin(theta) + Q = c*c * ZtZ + s*s * DtD + 2*s*c * symZtD + try: + Qi = numpy.linalg.inv(Q) + except numpy.linalg.LinAlgError: + logger.info('taylor Qi') + # if c or s small, taylor expand + if c < 1e-4 * s and c != 0: + Qi = numpy.linalg.inv(DtD) + Qi = Qi / (s*s) - 2*c/(s*s*s) * (Qi @ symZtD.conj().T @ Qi.conj().T) + elif s < 1e-4 * c and s != 0: + Qi = numpy.linalg.inv(ZtZ) + Qi = Qi / (c*c) - 2*s/(c*c*c) * (Qi @ symZtD.conj().T @ Qi.conj().T) + else: + raise Exception('Inexplicable singularity in trace_func') + memo[0] = theta + memo[1] = Qi + return Qi + + def trace_func(theta): + c = numpy.cos(theta) + s = numpy.sin(theta) + Qi = Qi_func(theta) + R = c*c * ZtAZ + s*s * DtAD + 2*s*c * symZtAD + trace = rtrace_AtB(R, Qi) + return numpy.abs(trace) + + #def trace_deriv(theta): + # Qi = Qi_func(theta) + # c2 = numpy.cos(2 * theta) + # s2 = numpy.sin(2 * theta) + # F = -0.5*s2 * (ZtAZ - DtAD) + c2 * symZtAD + # trace_deriv = rtrace_AtB(Qi, F) + + # G = Qi @ F.conj().T @ Qi.conj().T + # H = -0.5*s2 * (ZtZ - DtD) + c2 * symZtD + # trace_deriv -= rtrace_AtB(G, H) + + # trace_deriv *= 2 + # return trace_deriv * sgn + + ''' + theta, new_E, new_dE = linmin(theta, E, dE, 0.1, min(tolerance, 1e-6), 1e-14, 0, -numpy.sign(dE) * K_PI, trace_func) + ''' + #theta, n, _, new_E, _, _new_dE = scipy.optimize.line_search(trace_func, trace_deriv, xk=theta, pk=numpy.ones((1,1)), gfk=dE, old_fval=E, c1=min(tolerance, 1e-6), c2=0.1, amax=pi) + result = scipy.optimize.minimize_scalar(trace_func, bounds=(0, pi), tol=tolerance) + new_E = result.fun + theta = result.x + + improvement = numpy.abs(E - new_E) * 2 / numpy.abs(E + new_E) + logger.info('linmin improvement {}'.format(improvement)) + Z *= numpy.cos(theta) + Z += D * numpy.sin(theta) + + prev_traceGtKG = traceGtKG + prev_theta = theta + prev_E = E + + ''' + Recover eigenvectors from Z + ''' + U = numpy.linalg.inv(Z.conj().T @ Z) + Y = Z @ scipy.linalg.sqrtm(U) + W = Y.conj().T @ (scipy_op @ Y) + + eigvals, W_eigvecs = numpy.linalg.eig(W) + eigvecs = Y @ W_eigvecs + + for i in range(len(eigvals)): + v = eigvecs[:, i] + n = eigvals[i] + v /= norm(v) + eigness = norm(scipy_op @ v - (v.conj() @ (scipy_op @ v)) * v ) + f = numpy.sqrt(-numpy.real(n)) + df = numpy.sqrt(-numpy.real(n + eigness)) + neff_err = kmag * (1/df - 1/f) + logger.info('eigness {}: {}\n neff_err: {}'.format(i, eigness, neff_err)) + + order = numpy.argsort(numpy.abs(eigvals)) + return eigvals[order], eigvecs.T[order] + + #def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func): + # if df0 > 0: + # x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq)) + # return -x0, f0, -df0 + # elif df0 == 0: + # return 0, f0, df0 + # else: + # x = x_guess + # fx = f0 + # dfx = df0 + + # isave = numpy.zeros((2,), numpy.intc) + # dsave = numpy.zeros((13,), float) + + # x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, + # x_min, x_max, isave, dsave) + # for i in range(int(1e6)): + # if task != 'F': + # logging.info('search converged in {} iterations'.format(i)) + # break + # fx = f(x, dfx) + # x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, + # x_min, x_max, isave, dsave) + + # return x, fx, dfx + From c4cbdff751c19716b788c582ac6d4df0af5d3576 Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 8 Jan 2018 23:28:57 -0800 Subject: [PATCH 2/9] cleanup --- fdfd_tools/bloch.py | 125 ++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 62 deletions(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index 31d364d..de070c0 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -447,15 +447,10 @@ def eigsolve(num_modes: int, continue break - def rtrace_AtB(A, B): - return real(numpy.sum(A.conj() * B)) - - def symmetrize(A): - return (A + A.conj().T) * 0.5 - max_iters = 10000 for iter in range(max_iters): - U = numpy.linalg.inv(Z.conj().T @ Z) + ZtZ = Z.conj().T @ Z + U = numpy.linalg.inv(ZtZ) AZ = scipy_op @ Z AZU = AZ @ U ZtAZU = Z.conj().T @ AZU @@ -469,47 +464,44 @@ def eigsolve(num_modes: int, break KG = scipy_iop @ G - traceGtKG = rtrace_AtB(G, KG) - gamma_numerator = traceGtKG + traceGtKG = _rtrace_AtB(G, KG) - reset_iters = 100 + reset_iters = 100 # TODO if prev_traceGtKG == 0 or iter % reset_iters == 0: - print('RESET!') + logger.inf('CG reset') gamma = 0 else: - gamma = gamma_numerator / prev_traceGtKG + gamma = traceGtKG / prev_traceGtKG D = gamma * d_scale * D + KG - d_scale = numpy.sqrt(rtrace_AtB(D, D)) / num_modes + d_scale = numpy.sqrt(_rtrace_AtB(D, D)) / num_modes D /= d_scale + ZtAZ = Z.conj().T @ AZ + AD = scipy_op @ D DtD = D.conj().T @ D DtAD = D.conj().T @ AD - ZtD = Z.conj().T @ D - ZtAD = Z.conj().T @ AD - symZtD = symmetrize(ZtD) - symZtAD = symmetrize(ZtAD) + symZtD = _symmetrize(Z.conj().T @ D) + symZtAD = _symmetrize(Z.conj().T @ AD) + ''' U_sZtD = U @ symZtD - dE = 2.0 * (rtrace_AtB(U, symZtAD) - rtrace_AtB(ZtAZU, U_sZtD)) + dE = 2.0 * (_rtrace_AtB(U, symZtAD) - + _rtrace_AtB(ZtAZU, U_sZtD)) - S2 = DtD - 4 * symZtD @ U_sZtD - d2E = 2 * (rtrace_AtB(U, DtAD) - - rtrace_AtB(ZtAZU, U @ S2) - - 4 * rtrace_AtB(U, symZtAD @ U_sZtD)) + d2E = 2 * (_rtrace_AtB(U, DtAD) - + _rtrace_AtB(ZtAZU, U @ (DtD - 4 * symZtD @ U_sZtD)) - + 4 * _rtrace_AtB(U, symZtAD @ U_sZtD)) # Newton-Raphson to find a root of the first derivative: theta = -dE/d2E if d2E < 0 or abs(theta) >= pi: theta = -abs(prev_theta) * numpy.sign(dE) - - # ZtAZU * ZtZ = ZtAZ for use in line search - ZtZ = Z.conj().T @ Z - ZtAZ = ZtAZU @ ZtZ.conj().T + ''' def Qi_func(theta, memo=[None, None]): if memo[0] == theta: @@ -525,10 +517,10 @@ def eigsolve(num_modes: int, # if c or s small, taylor expand if c < 1e-4 * s and c != 0: Qi = numpy.linalg.inv(DtD) - Qi = Qi / (s*s) - 2*c/(s*s*s) * (Qi @ symZtD.conj().T @ Qi.conj().T) + Qi = Qi / (s*s) - 2*c/(s*s*s) * (Qi @ (Qi @ symZtD).conj().T) elif s < 1e-4 * c and s != 0: Qi = numpy.linalg.inv(ZtZ) - Qi = Qi / (c*c) - 2*s/(c*c*c) * (Qi @ symZtD.conj().T @ Qi.conj().T) + Qi = Qi / (c*c) - 2*s/(c*c*c) * (Qi @ (Qi @ symZtD).conj().T) else: raise Exception('Inexplicable singularity in trace_func') memo[0] = theta @@ -540,22 +532,24 @@ def eigsolve(num_modes: int, s = numpy.sin(theta) Qi = Qi_func(theta) R = c*c * ZtAZ + s*s * DtAD + 2*s*c * symZtAD - trace = rtrace_AtB(R, Qi) + trace = _rtrace_AtB(R, Qi) return numpy.abs(trace) - #def trace_deriv(theta): - # Qi = Qi_func(theta) - # c2 = numpy.cos(2 * theta) - # s2 = numpy.sin(2 * theta) - # F = -0.5*s2 * (ZtAZ - DtAD) + c2 * symZtAD - # trace_deriv = rtrace_AtB(Qi, F) + ''' + def trace_deriv(theta): + Qi = Qi_func(theta) + c2 = numpy.cos(2 * theta) + s2 = numpy.sin(2 * theta) + F = -0.5*s2 * (ZtAZ - DtAD) + c2 * symZtAD + trace_deriv = _rtrace_AtB(Qi, F) - # G = Qi @ F.conj().T @ Qi.conj().T - # H = -0.5*s2 * (ZtZ - DtD) + c2 * symZtD - # trace_deriv -= rtrace_AtB(G, H) + G = Qi @ F.conj().T @ Qi.conj().T + H = -0.5*s2 * (ZtZ - DtD) + c2 * symZtD + trace_deriv -= _rtrace_AtB(G, H) - # trace_deriv *= 2 - # return trace_deriv * sgn + trace_deriv *= 2 + return trace_deriv * sgn + ''' ''' theta, new_E, new_dE = linmin(theta, E, dE, 0.1, min(tolerance, 1e-6), 1e-14, 0, -numpy.sign(dE) * K_PI, trace_func) @@ -597,29 +591,36 @@ def eigsolve(num_modes: int, order = numpy.argsort(numpy.abs(eigvals)) return eigvals[order], eigvecs.T[order] - #def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func): - # if df0 > 0: - # x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq)) - # return -x0, f0, -df0 - # elif df0 == 0: - # return 0, f0, df0 - # else: - # x = x_guess - # fx = f0 - # dfx = df0 +#def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func): +# if df0 > 0: +# x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq)) +# return -x0, f0, -df0 +# elif df0 == 0: +# return 0, f0, df0 +# else: +# x = x_guess +# fx = f0 +# dfx = df0 - # isave = numpy.zeros((2,), numpy.intc) - # dsave = numpy.zeros((13,), float) +# isave = numpy.zeros((2,), numpy.intc) +# dsave = numpy.zeros((13,), float) - # x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, - # x_min, x_max, isave, dsave) - # for i in range(int(1e6)): - # if task != 'F': - # logging.info('search converged in {} iterations'.format(i)) - # break - # fx = f(x, dfx) - # x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, - # x_min, x_max, isave, dsave) +# x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, +# x_min, x_max, isave, dsave) +# for i in range(int(1e6)): +# if task != 'F': +# logging.info('search converged in {} iterations'.format(i)) +# break +# fx = f(x, dfx) +# x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, +# x_min, x_max, isave, dsave) - # return x, fx, dfx +# return x, fx, dfx + + +def _rtrace_AtB(A, B): + return real(numpy.sum(A.conj() * B)) + +def _symmetrize(A): + return (A + A.conj().T) * 0.5 From e02040c7093de5894ad1857a7fae5569d1bfcaee Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 8 Jan 2018 23:33:22 -0800 Subject: [PATCH 3/9] fixes and clarification --- fdfd_tools/bloch.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index de070c0..ea9e760 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -385,7 +385,9 @@ def eigsolve(num_modes: int, G_matrix: numpy.ndarray, epsilon: field_t, mu: field_t = None, - tolerance = 1e-20, + tolerance: float = 1e-20, + max_iters: int = 10000, + reset_iters: int = 100, ) -> Tuple[numpy.ndarray, numpy.ndarray]: """ Find the first (lowest-frequency) num_modes eigenmodes with Bloch wavevector @@ -447,16 +449,15 @@ def eigsolve(num_modes: int, continue break - max_iters = 10000 for iter in range(max_iters): ZtZ = Z.conj().T @ Z U = numpy.linalg.inv(ZtZ) AZ = scipy_op @ Z AZU = AZ @ U ZtAZU = Z.conj().T @ AZU - E = real(trace(ZtAZU)) - sgn = numpy.sign(E) - E = numpy.abs(E) + E_signed = real(trace(ZtAZU)) + sgn = numpy.sign(E_signed) + E = numpy.abs(E_signed) G = (AZU - Z @ U @ ZtAZU) * sgn if iter > 0 and abs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7): @@ -466,9 +467,8 @@ def eigsolve(num_modes: int, KG = scipy_iop @ G traceGtKG = _rtrace_AtB(G, KG) - reset_iters = 100 # TODO if prev_traceGtKG == 0 or iter % reset_iters == 0: - logger.inf('CG reset') + logger.info('CG reset') gamma = 0 else: gamma = traceGtKG / prev_traceGtKG From 0e47fdd5fb97da65bc211b83ce1c3f8cabf3b985 Mon Sep 17 00:00:00 2001 From: jan Date: Tue, 9 Jan 2018 00:00:58 -0800 Subject: [PATCH 4/9] randomize imaginary part of starting vector --- fdfd_tools/bloch.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index ea9e760..168a349 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -422,24 +422,24 @@ def eigsolve(num_modes: int, prev_E = 0 d_scale = 1 prev_traceGtKG = 0 - prev_theta = 0.5 + #prev_theta = 0.5 D = numpy.zeros(shape=y_shape, dtype=complex) y0 = None if y0 is None: - Z = numpy.random.rand(*y_shape).astype(complex) + Z = numpy.random.rand(*y_shape) + 1j * numpy.random.rand(*y_shape) else: Z = y0 while True: - Z2 = Z.conj().T @ Z - Z_norm = numpy.sqrt(real(trace(Z2))) / num_modes + ZtZ = Z.conj().T @ Z + Z_norm = numpy.sqrt(real(trace(ZtZ))) / num_modes Z /= Z_norm - Z2 /= Z_norm * Z_norm + ZtZ /= Z_norm * Z_norm try: - U = numpy.linalg.inv(Z2) + U = numpy.linalg.inv(ZtZ) except numpy.linalg.LinAlgError: - Z = numpy.random.rand(*y_shape).astype(complex) + Z = numpy.random.rand(*y_shape) + 1j * numpy.random.rand(*y_shape) continue trace_U = real(trace(U)) @@ -565,7 +565,7 @@ def eigsolve(num_modes: int, Z += D * numpy.sin(theta) prev_traceGtKG = traceGtKG - prev_theta = theta + #prev_theta = theta prev_E = E ''' From e8f836c908996fa80f866174f3ff7a1dc7ebb8db Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 15 Jan 2018 22:43:33 -0800 Subject: [PATCH 5/9] Cleanup --- fdfd_tools/bloch.py | 106 +++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index 168a349..1816bdc 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -73,7 +73,7 @@ This module contains functions for generating and solving the ''' -from typing import List, Tuple, Callable, Dict +from typing import Tuple, Callable import logging import numpy from numpy import pi, real, trace @@ -83,7 +83,6 @@ import scipy.optimize from scipy.linalg import norm import scipy.sparse.linalg as spalg -from .eigensolvers import rayleigh_quotient_iteration from . import field_t logger = logging.getLogger(__name__) @@ -256,7 +255,7 @@ def hmn_2_hxyz(k0: numpy.ndarray, :return: Function for converting h_mn into H_xyz """ shape = epsilon[0].shape + (1,) - k_mag, m, n = generate_kmn(k0, G_matrix, shape) + _k_mag, m, n = generate_kmn(k0, G_matrix, shape) def operator(h: numpy.ndarray): hin_m, hin_n = [hi.reshape(shape) for hi in numpy.split(h, 2)] @@ -379,7 +378,6 @@ def find_k(frequency: float, return res.x * direction, res.fun + frequency - def eigsolve(num_modes: int, k0: numpy.ndarray, G_matrix: numpy.ndarray, @@ -432,10 +430,8 @@ def eigsolve(num_modes: int, Z = y0 while True: + Z *= num_modes / norm(Z) ZtZ = Z.conj().T @ Z - Z_norm = numpy.sqrt(real(trace(ZtZ))) / num_modes - Z /= Z_norm - ZtZ /= Z_norm * Z_norm try: U = numpy.linalg.inv(ZtZ) except numpy.linalg.LinAlgError: @@ -449,7 +445,7 @@ def eigsolve(num_modes: int, continue break - for iter in range(max_iters): + for i in range(max_iters): ZtZ = Z.conj().T @ Z U = numpy.linalg.inv(ZtZ) AZ = scipy_op @ Z @@ -460,22 +456,22 @@ def eigsolve(num_modes: int, E = numpy.abs(E_signed) G = (AZU - Z @ U @ ZtAZU) * sgn - if iter > 0 and abs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7): + if i > 0 and abs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7): logging.info('Optimization succeded: {} - 5e-8 < {} * {} / 2'.format(abs(E - prev_E), tolerance, E + prev_E)) break KG = scipy_iop @ G traceGtKG = _rtrace_AtB(G, KG) - if prev_traceGtKG == 0 or iter % reset_iters == 0: + if prev_traceGtKG == 0 or i % reset_iters == 0: logger.info('CG reset') gamma = 0 else: gamma = traceGtKG / prev_traceGtKG - D = gamma * d_scale * D + KG - d_scale = numpy.sqrt(_rtrace_AtB(D, D)) / num_modes - D /= d_scale + D = gamma / d_scale * D + KG + d_scale = num_modes / norm(D) + D *= d_scale ZtAZ = Z.conj().T @ AZ @@ -486,22 +482,6 @@ def eigsolve(num_modes: int, symZtD = _symmetrize(Z.conj().T @ D) symZtAD = _symmetrize(Z.conj().T @ AD) - ''' - U_sZtD = U @ symZtD - - dE = 2.0 * (_rtrace_AtB(U, symZtAD) - - _rtrace_AtB(ZtAZU, U_sZtD)) - - d2E = 2 * (_rtrace_AtB(U, DtAD) - - _rtrace_AtB(ZtAZU, U @ (DtD - 4 * symZtD @ U_sZtD)) - - 4 * _rtrace_AtB(U, symZtAD @ U_sZtD)) - - # Newton-Raphson to find a root of the first derivative: - theta = -dE/d2E - - if d2E < 0 or abs(theta) >= pi: - theta = -abs(prev_theta) * numpy.sign(dE) - ''' def Qi_func(theta, memo=[None, None]): if memo[0] == theta: @@ -549,12 +529,25 @@ def eigsolve(num_modes: int, trace_deriv *= 2 return trace_deriv * sgn - ''' + U_sZtD = U @ symZtD + + dE = 2.0 * (_rtrace_AtB(U, symZtAD) - + _rtrace_AtB(ZtAZU, U_sZtD)) + + d2E = 2 * (_rtrace_AtB(U, DtAD) - + _rtrace_AtB(ZtAZU, U @ (DtD - 4 * symZtD @ U_sZtD)) - + 4 * _rtrace_AtB(U, symZtAD @ U_sZtD)) + + # Newton-Raphson to find a root of the first derivative: + theta = -dE/d2E + + if d2E < 0 or abs(theta) >= pi: + theta = -abs(prev_theta) * numpy.sign(dE) + + # theta, new_E, new_dE = linmin(theta, E, dE, 0.1, min(tolerance, 1e-6), 1e-14, 0, -numpy.sign(dE) * K_PI, trace_func) + theta, n, _, new_E, _, _new_dE = scipy.optimize.line_search(trace_func, trace_deriv, xk=theta, pk=numpy.ones((1,1)), gfk=dE, old_fval=E, c1=min(tolerance, 1e-6), c2=0.1, amax=pi) ''' - theta, new_E, new_dE = linmin(theta, E, dE, 0.1, min(tolerance, 1e-6), 1e-14, 0, -numpy.sign(dE) * K_PI, trace_func) - ''' - #theta, n, _, new_E, _, _new_dE = scipy.optimize.line_search(trace_func, trace_deriv, xk=theta, pk=numpy.ones((1,1)), gfk=dE, old_fval=E, c1=min(tolerance, 1e-6), c2=0.1, amax=pi) result = scipy.optimize.minimize_scalar(trace_func, bounds=(0, pi), tol=tolerance) new_E = result.fun theta = result.x @@ -591,32 +584,33 @@ def eigsolve(num_modes: int, order = numpy.argsort(numpy.abs(eigvals)) return eigvals[order], eigvecs.T[order] -#def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func): -# if df0 > 0: -# x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq)) -# return -x0, f0, -df0 -# elif df0 == 0: -# return 0, f0, df0 -# else: -# x = x_guess -# fx = f0 -# dfx = df0 +''' +def linmin(x_guess, f0, df0, x_max, f_tol=0.1, df_tol=min(tolerance, 1e-6), x_tol=1e-14, x_min=0, linmin_func): + if df0 > 0: + x0, f0, df0 = linmin(-x_guess, f0, -df0, -x_max, f_tol, df_tol, x_tol, -x_min, lambda q, dq: -linmin_func(q, dq)) + return -x0, f0, -df0 + elif df0 == 0: + return 0, f0, df0 + else: + x = x_guess + fx = f0 + dfx = df0 -# isave = numpy.zeros((2,), numpy.intc) -# dsave = numpy.zeros((13,), float) + isave = numpy.zeros((2,), numpy.intc) + dsave = numpy.zeros((13,), float) -# x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, -# x_min, x_max, isave, dsave) -# for i in range(int(1e6)): -# if task != 'F': -# logging.info('search converged in {} iterations'.format(i)) -# break -# fx = f(x, dfx) -# x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, -# x_min, x_max, isave, dsave) - -# return x, fx, dfx + x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, + x_min, x_max, isave, dsave) + for i in range(int(1e6)): + if task != 'F': + logging.info('search converged in {} iterations'.format(i)) + break + fx = f(x, dfx) + x, fx, dfx, task = minpack2.dsrch(x, fx, dfx, f_tol, df_tol, x_tol, task, + x_min, x_max, isave, dsave) + return x, fx, dfx +''' def _rtrace_AtB(A, B): return real(numpy.sum(A.conj() * B)) From c1f65f61c1db4d45a5d2df0233b196aa028a23e1 Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 15 Jan 2018 22:43:59 -0800 Subject: [PATCH 6/9] Use pyfftw if available --- fdfd_tools/bloch.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index 1816bdc..fd5f758 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -77,7 +77,7 @@ from typing import Tuple, Callable import logging import numpy from numpy import pi, real, trace -from numpy.fft import fftn, ifftn, fftfreq +from numpy.fft import fftfreq import scipy import scipy.optimize from scipy.linalg import norm @@ -88,6 +88,29 @@ from . import field_t logger = logging.getLogger(__name__) +try: + import pyfftw.interfaces.numpy_fft + import pyfftw.interfaces + import multiprocessing + + pyfftw.interfaces.cache.enable() + pyfftw.interfaces.cache.set_keepalive_time(3600) + fftw_args = { + 'threads': multiprocessing.cpu_count(), + 'overwrite_input': True, + 'planner_effort': 'FFTW_EXHAUSTIVE', + } + + def fftn(*args, **kwargs): + return pyfftw.interfaces.numpy_fft.fftn(*args, **kwargs, **fftw_args) + + def ifftn(*args, **kwargs): + return pyfftw.interfaces.numpy_fft.ifftn(*args, **kwargs, **fftw_args) + +except ImportError: + from numpy.fft import fftn, ifftn + + def generate_kmn(k0: numpy.ndarray, G_matrix: numpy.ndarray, shape: numpy.ndarray From ee9abb77d9f42e79074357e95aee960e7588a566 Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 15 Jan 2018 22:44:14 -0800 Subject: [PATCH 7/9] Fix approx_inverse operator --- fdfd_tools/bloch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index fd5f758..aabc96c 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -352,8 +352,8 @@ def inverse_maxwell_operator_approx(k0: numpy.ndarray, d_xyz = fftn(ifftn(e_xyz, axes=range(3)) * epsilon, axes=range(3)) # cross product and transform into mn basis crossinv_t2c - h_m = numpy.sum(e_xyz * n, axis=3)[:, :, :, None] / +k_mag - h_n = numpy.sum(e_xyz * m, axis=3)[:, :, :, None] / -k_mag + h_m = numpy.sum(d_xyz * n, axis=3)[:, :, :, None] / +k_mag + h_n = numpy.sum(d_xyz * m, axis=3)[:, :, :, None] / -k_mag return numpy.hstack((h_m.ravel(), h_n.ravel())) From 323bcf88ad224f5a77df7f4e3d4c25e46055ffcf Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 15 Jan 2018 22:44:26 -0800 Subject: [PATCH 8/9] Propagate mu correctly --- fdfd_tools/bloch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index aabc96c..cbfdf1c 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -389,7 +389,7 @@ def find_k(frequency: float, def get_f(k0_mag: float, band: int = 0): k0 = direction * k0_mag - n, _v = eigsolve(band + 1, k0, G_matrix=G_matrix, epsilon=epsilon) + n, _v = eigsolve(band + 1, k0, G_matrix=G_matrix, epsilon=epsilon, mu=mu) f = numpy.sqrt(numpy.abs(numpy.real(n[band]))) return f From 1f9a9949c06ff33a836b05e2b18f18b1495e39fc Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 15 Jan 2018 22:44:59 -0800 Subject: [PATCH 9/9] Clarify memo and cleanup --- fdfd_tools/bloch.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fdfd_tools/bloch.py b/fdfd_tools/bloch.py index cbfdf1c..002234f 100644 --- a/fdfd_tools/bloch.py +++ b/fdfd_tools/bloch.py @@ -505,10 +505,11 @@ def eigsolve(num_modes: int, symZtD = _symmetrize(Z.conj().T @ D) symZtAD = _symmetrize(Z.conj().T @ AD) - - def Qi_func(theta, memo=[None, None]): - if memo[0] == theta: - return memo[1] + Qi_memo = [None, None] + def Qi_func(theta): + nonlocal Qi_memo + if Qi_memo[0] == theta: + return Qi_memo[1] c = numpy.cos(theta) s = numpy.sin(theta) @@ -519,15 +520,15 @@ def eigsolve(num_modes: int, logger.info('taylor Qi') # if c or s small, taylor expand if c < 1e-4 * s and c != 0: - Qi = numpy.linalg.inv(DtD) - Qi = Qi / (s*s) - 2*c/(s*s*s) * (Qi @ (Qi @ symZtD).conj().T) + DtDi = numpy.linalg.inv(DtD) + Qi = DtDi / (s*s) - 2*c/(s*s*s) * (DtDi @ (DtDi @ symZtD).conj().T) elif s < 1e-4 * c and s != 0: - Qi = numpy.linalg.inv(ZtZ) - Qi = Qi / (c*c) - 2*s/(c*c*c) * (Qi @ (Qi @ symZtD).conj().T) + ZtZi = numpy.linalg.inv(ZtZ) + Qi = ZtZi / (c*c) - 2*s/(c*c*c) * (ZtZi @ (ZtZi @ symZtD).conj().T) else: raise Exception('Inexplicable singularity in trace_func') - memo[0] = theta - memo[1] = Qi + Qi_memo[0] = theta + Qi_memo[1] = Qi return Qi def trace_func(theta):