@unpublished{li2025rkhsODE, title = {Derivative estimation by RKHS regularization for learning dynamics from time-series data}, author = {Guo, Hailong and Li, Haibo}, note = {arXiv:2504.01289}, doi = {2504.01289}, year = {2025}, file = {2025-RKHS_ODE.pdf} }
Learning the governing equations from time-series data has gained increasing attention due to its potential to extract useful dynamics from real-world data. Despite significant progress, it becomes challenging in the presence of noise, especially when derivatives need to be calculated. To reduce the effect of noise, we propose a method that simultaneously fits both the derivative and trajectory from noisy time-series data. Our approach formulates derivative estimation as an inverse problem involving integral operators within the forward model, and estimates the derivative function by solving a regularization problem in a vector-valued reproducing kernel Hilbert space (vRKHS). We derive an integral-form representer theorem, which enables the computation of the regularized solution by solving a finite-dimensional problem and facilitates efficiently estimating the optimal regularization parameter. By embedding the dynamics within a vRKHS and utilizing the fitted derivative and trajectory, we can recover the underlying dynamics from noisy data by solving a linear regularization problem. Several numerical experiments are conducted to validate the effectiveness and efficiency of our method.
@unpublished{li2025lse, title = {Krylov iterative methods for linear least squares problems with linear equality constraints}, author = {Li, Haibo}, note = {arXiv:2501.01655}, doi = {2501.01655}, year = {2025}, file = {2025-LSE.pdf} }
We consider the linear least squares problem with linear equality constraints (LSE problem) formulated as min ||Ax-b||_2 s.t Cx=d. Although there are some classical methods available to solve this problem, most of them rely on matrix factorizations or require the null space of C, which limits their applicability to large-scale problems. To address this challenge, we present a novel analysis of the LSE problem from the perspective of operator-type least squares (LS) problems, where the linear operators are induced by A, C. We show that the solution of the LSE problem can be decomposed into two components, each corresponding to the solution of an operator-form LS problem. Building on this decomposed-form solution, we propose two Krylov subspace based iterative methods to approximate each component, thereby providing an approximate solution of the LSE problem. Several numerical examples are constructed to test the proposed iterative algorithm for solving the LSE problems, which demonstrate the effectiveness of the algorithms.
@unpublished{li2024scalable, title = {Scalable iterative data-adaptive RKHS regularization}, author = {Li, Haibo and Feng, Jinchao and Lu, Fei}, note = {arXiv:2401.00656}, doi = {2401.00656}, year = {2024}, file = {2024-idarr.pdf} }
We present iDARR, a scalable iterative Data-Adaptive RKHS Regularization method, for solving ill-posed linear inverse problems. The method searches for solutions in subspaces where the true solution can be identified, with the data-adaptive RKHS penalizing the spaces of small singular values. At the core of the method is a new generalized Golub-Kahan bidiagonalization procedure that recursively constructs orthonormal bases for a sequence of RKHS-restricted Krylov subspaces. The method is scalable with a complexity of O(kmn) for m-by-n matrices with k denoting the iteration numbers. Numerical tests on the Fredholm integral equation and 2D image deblurring show that it outperforms the widely used L2 and l2 norms, producing stable accurate solutions consistently converging when the noise level decays.
@unpublished{li2023alkpu, title = {ALKPU: An Active Learning Method for the DeePMD Model with Kalman Filtering}, author = {Li, Haibo and Wu, Xingxing and Liu, Liping and Wang, Long and Wang, Lin-Wang and Tan, Guangming and Jia, Weile}, note = {arXiv:2411.13850}, doi = {2411.13850}, year = {2024}, file = {2024-ALKPU.pdf} }
Neural network force field models such as DeePMD has enabled highly efficient large-scale molecular dynamics simulations with ab initio accuracy. However, building such models covering complete configurations of the simulated system heavily depends on training data obtained by costly electronic structure calculations, thus concurrently selecting and labelling the most representative configurations during model training is crucial for enhancing its extrapolation capability while improving training efficiency. To address this challenge, for the DeePMD model trained by an efficient Kalman filtering based optimizer, we propose the Kalman Prediction Uncertainty (KPU) to quantify uncertainty of the model’s prediction. With KPU we design the Active Learning by KPU (ALKPU) method that can efficiently select representative configurations that should be labelled during model training. Using the Fisher information matrix and Shannon entropy, we prove that ALKPU locally leads to fastest reduction of model’s uncertainty, which reveals its rationality as a general active learning method. We test the ALKPU method with various physical system simulations and demonstrate that it can efficiently coverage the system’s configuration space. Our work shows advantages of ALKPU as a new active learning method in terms of training efficiency and computational resource overheads.
@unpublished{li2023generalizing, title = {Generalizing the SVD of a matrix under non-standard inner product and its applications to linear ill-posed problems}, author = {Li, Haibo}, note = {arXiv:2312.10403}, doi = {2312.10403}, year = {2023}, file = {2023-Generalizing.pdf} }
The singular value decomposition (SVD) of a matrix is a powerful tool for many matrix computation problems. In this paper, we consider generalizing the standard SVD to analyze and compute the regularized solution of linear ill-posed problems that arise from discretizing the first kind Fredholm integral equations. For the commonly used quadrature method for discretization, a regularizer of the form ∥x∥2M:=xTMx should be exploited, where M is symmetric positive definite. To handle this regularizer, we give the weighted SVD (WSVD) of a matrix under the M-inner product. Several important applications of WSVD, such as low-rank approximation and solving the least squares problems with minimum ∥⋅∥M-norm, are studied. We propose the weighted Golub-Kahan bidiagonalization (WGKB) to compute several dominant WSVD components and a corresponding weighted LSQR algorithm to iteratively solve the least squares problem. All the above tools and methods are used to analyze and solve linear ill-posed problems with the regularizer ∥x∥2M. A WGKB-based subspace projection regularization method is proposed to efficiently compute a good regularized solution, which can incorporate the prior information about x encoded by the regularizer ∥x∥2M. Several numerical experiments are performed to illustrate the fruitfulness of our methods.
@unpublished{li2023subspace, title = {Subspace projection regularization for large-scale Bayesian linear inverse problems}, author = {Li, Haibo}, note = {arXiv:2310.18618}, doi = {2310.18618}, year = {2023}, file = {2023-spr.pdf} }
The Bayesian statistical framework provides a systematic approach to enhance the regularization model by incorporating prior information about the desired solution. For the Bayesian linear inverse problems with Gaussian noise and Gaussian prior, we propose a new iterative regularization algorithm that belongs to subspace projection regularization (SPR) methods. By treating the forward model matrix as a linear operator between the two underlying finite dimensional Hilbert spaces with new introduced inner products, we first introduce an iterative process that can generate a series of valid solution subspaces. The SPR method then projects the original problem onto these solution subspaces to get a series of low dimensional linear least squares problems, where an efficient procedure is developed to update the solutions of them to approximate the desired solution of the original problem. With the new designed early stopping rules, this iterative algorithm can obtain a regularized solution with a satisfied accuracy. Several theoretical results about the algorithm are established to reveal the regularization properties of it. We use both small-scale and large-scale inverse problems to test the proposed algorithm and demonstrate its robustness and efficiency. The most computationally intensive operations in the proposed algorithm only involve matrix-vector products, making it highly efficient for large-scale problems.
@article{li2024projected, title = {Projected Newton method for large-scale Bayesian linear inverse problems}, author = {Li, Haibo}, journal = {SIAM Journal on Optimization (accepted)}, year = {2025}, file = {2024-ProjN.pdf} }
Computing the regularized solution of Bayesian linear inverse problems as well as the corresponding regularization parameter is highly desirable in many applications. This paper proposes a novel iterative method, termed the Projected Newton method (PNT), that can simultaneously update the regularization parameter and solution step by step without requiring any high-cost matrix inversions or decompositions. By reformulating the Tikhonov regularization as a constrained minimization problem and writing its Lagrangian function, a Newton-type method coupled with a Krylov subspace method, called the generalized Golub-Kahan bidiagonalization, is employed for the unconstrained Lagrangian function. The resulting PNT algorithm only needs solving a small-scale linear system to get a descent direction of a merit function at each iteration, thus significantly reducing computational overhead. Rigorous convergence results are proved, showing that PNT always converges to the unique regularized solution and the corresponding Lagrangian multiplier. Experimental results on both small and large-scale Bayesian inverse problems demonstrate its excellent convergence property, robustness and efficiency. Given that the most demanding computational tasks in PNT are primarily matrix-vector products, it is particularly well-suited for large-scale problems.
@article{li2024new, title = {A new interpretation of the weighted pseudoinverse and its applications}, author = {Li, Haibo}, journal = {SIAM Journal on Matrix Analysis and Applications}, volume = {46}, number = {2}, pages = {934--956}, year = {2025}, doi = {10.1137/24M1686073}, file = {2024-new.pdf} }
Consider the generalized linear least squares (GLS) problem min∥Lx∥2 s.t. ∥M(Ax-b)∥2=min. The weighted pseudoinverse A†ML is the matrix that maps b to the minimum 2-norm solution of this GLS problem. By introducing a linear operator induced by A,M,L between two finite-dimensional Hilbert spaces, we show that the minimum 2-norm solution of the GLS problem is equivalent to the minimum norm solution of a linear least squares problem involving this linear operator, and A†ML can be expressed as the composition of the Moore-Penrose pseudoinverse of this linear operator and an orthogonal projector. With this new interpretation, we establish the generalized Moore-Penrose equations that completely characterize the weighted pseudoinverse, give a closed-form expression of the weighted pseudoinverse using the generalized singular value decomposition (GSVD), and propose a generalized LSQR (gLSQR) algorithm for iteratively solving the GLS problem. We construct several numerical examples to test the proposed iterative algorithm for solving GLS problems. Our results highlight the close connections between GLS, weighted pseudoinverse, GSVD and gLSQR, providing new tools for both analysis and computations.
@article{li2024characterizing, title = {Characterizing GSVD by singular value expansion of linear operators and its computation}, author = {Li, Haibo}, journal = {SIAM Journal on Matrix Analysis and Applications}, volume = {46}, number = {1}, pages = {439--465}, doi = {10.1137/24M1651150}, year = {2025}, publisher = {SIAM}, file = {2024-character.pdf} }
The generalized singular value decomposition (GSVD) of a matrix pair A,L with A∈Rmxn and L∈Rpxn generalizes the singular value decomposition (SVD) of a single matrix. In this paper, we provide a new understanding of GSVD from the viewpoint of SVD, based on which we propose a new iterative method for computing nontrivial GSVD components of a large-scale matrix pair. By introducing two linear operators A and L induced by A,L between two finite-dimensional Hilbert spaces and applying the theory of singular value expansion (SVE) for linear compact operators, we show that the GSVD of A,L is nothing but the SVEs of A and L. This result characterizes completely the structure of GSVD for any matrix pair with the same number of columns. As a direct application of this result, we generalize the standard Golub-Kahan bidiagonalization (GKB) that is a basic routine for large-scale SVD computation such that the resulting generalized GKB (gGKB) process can be used to approximate nontrivial extreme GSVD components of A,L, which is named the gGKB_GSVD algorithm. We use the GSVD of A,L to study several basic properties of gGKB and also provide preliminary results about convergence and accuracy of gGKB_GSVD for GSVD computation. Numerical experiments are presented to demonstrate the effectiveness of this method.
@article{li2022backward, title = {Backward error analysis of the Lanczos bidiagonalization with reorthogonalization}, author = {Li, Haibo and Tan, Guangming and Zhao, Tong}, journal = {Journal of Computational and Applied Mathematics}, volume = {460}, pages = {116414}, year = {2025}, doi = {10.1016/j.cam.2024.116414}, file = {2025-backward.pdf} }
The k-step Lanczos bidiagonalization reduces a matrix A∈Rmxn into a bidiagonal form Bk∈R(k+1)xk while generates two orthonormal matrices Uk+1∈Rmx(k+1) and Vk+1∈Rnx(k+1). However, any practical implementation of the algorithm suffers from loss of orthogonality of Uk+1 and Vk+1 due to the presence of rounding errors, and several reorthogonalization strategies are proposed to maintain some level of orthogonality. In this paper, by writing various reorthogonalization strategies in a general form we make a backward error analysis of the Lanczos bidiagonalization with reorthogonalization (LBRO). Our results show that the computed Bk by the k-step LBRO of A with starting vector b is the exact one generated by the k-step Lanczos bidiagonalization of A+E with starting vector b+δb (denoted by LB(A+E,b+δb)), where the 2-norm of perturbation vector/matrix δb and E depend on the roundoff unit and orthogonality levels of Uk+1 and Vk+1. The results also show that the 2-norm of Uk+1-U¯k+1 and Vk+1-V¯k+1 are controlled by the orthogonality levels of Uk+1 and Vk+1, respectively, where U¯k+1 and V¯k+1 are the two orthonormal matrices generated by the k-step LB(A+E,b+δb) in exact arithmetic. Thus the k-step LBRO is mixed forward-backward stable as long as the orthogonality of Uk+1 and Vk+1 are good enough. We use this result to investigate the backward stability of LBRO based SVD computation algorithm and LSQR algorithm. Numerical experiments are made to confirm our results.
@article{li2024preconditioned, title = {A preconditioned Krylov subspace method for linear inverse problems with general-form Tikhonov regularization}, author = {Li, Haibo}, journal = {SIAM Journal on Scientific Computing}, volume = {46}, number = {4}, pages = {A2607--A2633}, doi = {10.1137/23M1593802}, year = {2024}, publisher = {SIAM}, file = {2024-pGKB.pdf} }
Tikhonov regularization is a widely used technique in solving inverse problems that can enforce prior properties on the desired solution. In this paper, we propose a Krylov subspace based iterative method for solving linear inverse problems with general-form Tikhonov regularization term \(x^TMx\), where \(M\)is a positive semidefinite matrix. An iterative process called the preconditioned Golub–Kahan bidiagonalization (pGKB) is designed, which implicitly utilizes a proper preconditioner to generate a series of solution subspaces with desirable properties encoded by the regularizer \(x^TMx\). Based on the pGKB process, we propose an iterative regularization algorithm via projecting the original problem onto small dimensional solution subspaces. We analyze the regularization properties of this algorithm, including the incorporation of prior properties of the desired solution into the solution subspace and the semiconvergence behavior of the regularized solution. To overcome instabilities caused by semiconvergence, we further propose two pGKB based hybrid regularization algorithms. All the proposed algorithms are tested on both small-scale and large-scale linear inverse problems. Numerical results demonstrate that these iterative algorithms exhibit excellent performance, outperforming other state-of-the-art algorithms in some cases.
@article{li2024JBDinner, author = {Li, Haibo}, title = {The Joint Bidiagonalization of a Matrix Pair with Inaccurate Inner Iterations}, journal = {SIAM Journal on Matrix Analysis and Applications}, volume = {45}, number = {1}, pages = {232-259}, year = {2024}, doi = {10.1137/22M1541083}, file = {2024-JBDinner.pdf} }
The joint bidiagonalization (JBD) process iteratively reduces a matrix pair \({A,L}\)to two bidiagonal forms simultaneously, which can be used for computing a partial generalized singular value decomposition (GSVD) of \({A,L}\). The process has a nested inner-outer iteration structure, where the inner iteration usually cannot be computed exactly. In this paper, we study the inaccurately computed inner iterations of JBD by first investigating the influence of computational error of the inner iteration on the outer iteration, and then proposing a reorthogonalized JBD (rJBD) process to keep orthogonality of a part of Lanczos vectors. An error analysis of the rJBD is carried out to build up connections with Lanczos bidiagonalizations. The results are then used to investigate convergence and accuracy of the rJBD based GSVD computation. It is shown that the accuracy of computed GSVD components depends on the computing accuracy of inner iterations and the condition number of \((A^T,L^T)^T\), while the convergence rate is not affected very much. For practical JBD based GSVD computations, our results can provide a guideline for choosing a proper computing accuracy of inner iterations in order to obtain approximate GSVD components with a desired accuracy. Numerical experiments are made to confirm our theoretical results.
@article{li2024double, title = {Double precision is not necessary for LSQR for solving discrete linear ill-posed problems}, author = {Li, Haibo}, journal = {Journal of Scientific Computing}, volume = {98}, number = {3}, pages = {55}, year = {2024}, doi = {https://doi.org/10.1007/s10915-023-02447-4}, publisher = {Springer}, file = {2024-double.pdf} }
The growing availability and usage of low precision floating point formats attracts many interests of developing lower or mixed precision algorithms for scientific computing problems. In this paper we investigate the possibility of exploiting mixed precision computing in LSQR for solving discrete linear ill-posed problems. Based on the commonly used regularization model for linear inverse problems, we analyze the choice of proper computing precision in the two main parts of LSQR, including the construction of Krylov subspace and updating procedure of iterative solutions. We show that, under some mild conditions, the Lanczos vectors can be computed using single precision without loss of any accuracy of the final regularized solution as long as the noise level is not extremely small. We also show that the most time consuming part for updating iterative solutions can be performed using single precision without sacrificing any accuracy. The results indicate that several highly time consuming parts of the algorithm can be implemented using lower precisions, and provide a theoretical guideline for implementing a robust and efficient mixed precision variant of LSQR for solving discrete linear ill-posed problems. Numerical experiments are made to test two mixed precision variants of LSQR and confirming our results.
@article{yan202410, title = {10-Million Atoms Simulation of First-Principle Package LS3DF}, author = {Yan, Yu-Jin and Li, Hai-Bo and Zhao, Tong and Wang, Lin-Wang and Shi, Lin and Liu, Tao and Tan, Guang-Ming and Jia, Wei-Le and Sun, Ning-Hui}, journal = {Journal of Computer Science and Technology}, volume = {39}, number = {1}, pages = {45--62}, year = {2024}, doi = {https://doi.org/10.1007/s11390-023-3011-6}, publisher = {Springer}, file = {2024-ls3df.pdf} }
The growing demand for semiconductor devices simulation poses a big challenge for large-scale electronic structure calculations. Among various methods, the linearly scaling three-dimensional fragment (LS3DF) method exhibits excellent scalability in large-scale simulations. Based on algorithmic and system-level optimizations, we propose a highly scalable and highly efficient implementation of LS3DF on a domestic heterogeneous supercomputer equipped with accelerators. In terms of algorithmic optimizations, the original all-band conjugate gradient algorithm is refined to achieve faster convergence, and mixed precision computing is adopted to increase overall efficiency. In terms of system-level optimizations, the original two-layer parallel structure is replaced by a coarse-grained parallel method. Optimization strategies such as multi-stream, kernel fusion, and redundant computation removal are proposed to increase further utilization of the computational power provided by the heterogeneous machines. As a result, our optimized LS3DF can scale to a 10-million silicon atoms system, attaining a peak performance of 34.8 PFLOPS (21.2% of the peak). All the improvements can be adapted to the next-generation supercomputers for larger simulations.
@article{JiaLiJBD2023, author = {Jia, Zhongxiao and Li, Haibo}, title = {The Joint Bidiagonalization Method for Large GSVD Computations in Finite Precision}, journal = {SIAM Journal on Matrix Analysis and Applications}, volume = {44}, number = {1}, pages = {382-407}, year = {2023}, doi = {10.1137/22M1483608}, file = {jialisimax2023.pdf} }
Abstract. The joint bidiagonalization (JBD) method has been used to compute some extreme generalized singular values and vectors of a large regular matrix pair \({A,L}\). We make a numerical analysis of the underlying JBD process and establish relationships between it and two mathematically equivalent Lanczos bidiagonalizations in finite precision. Based on the results of numerical analysis, we investigate the convergence of the approximate generalized singular values and vectors of \({A,L}\). The results show that, under some mild conditions, the semiorthogonality of Lanczos-type vectors suffices to deliver approximate generalized singular values with the same accuracy as the full orthogonality does, meaning that it is only necessary to seek for efficient semiorthogonalization strategies for the JBD process. We establish a sharp bound for the residual norm of an approximate generalized singular value and corresponding approximate right generalized singular vectors, which can reliably estimate the residual norm without explicitly computing the approximate right generalized singular vectors before the convergence occurs.
@article{jia2021joint, title = {The joint bidiagonalization process with partial reorthogonalization}, author = {Jia, Zhongxiao and Li, Haibo}, journal = {Numerical Algorithms}, volume = {88}, pages = {965--992}, year = {2021}, doi = {https://doi.org/10.1007/s11075-020-01064-8}, publisher = {Springer}, file = {2021-JBDPRO.pdf} }
The joint bidiagonalization (JBD) process is a useful algorithm for the computation of the generalized singular value decomposition (GSVD) of a matrix pair. However, it always suffers from rounding errors, which causes the Lanczos vectors to lose their mutual orthogonality. In order to maintain some level of orthogonality, we present a semiorthogonalization strategy. Our rounding error analysis shows that the JBD process with the semiorthogonalization strategy can ensure that the convergence of the computed quantities is not affected by rounding errors and the final accuracy is high enough. Based on the semiorthogonalization strategy, we develop the joint bidiagonalization process with partial reorthogonalization (JBDPRO). In the JBDPRO algorithm, reorthogonalizations occur only when necessary, which saves a big amount of reorthogonalization work, compared with the full reorthogonalization strategy. Numerical experiments illustrate our theory and algorithm.
@inproceedings{liu2024tddft, title = {Large Scale Finite-Temperature Real-time Time Dependent Density Functional Theory Calculation with Hybrid Functional on ARM and GPU Systems}, author = {Liu, Rongrong and Guo, Zhuoqiang and Sha, Qiuchen and Zhao, Tong and Li, Haibo and Hu, Wei and Liu, Lijun and Tan, Guangming and Jia, Weile}, booktitle = {39th IEEE International Parallel & Distributed Processing Symposium (IPDPS 2025)}, year = {2025}, address = {Milano, Italy}, file = {2025-TDDFT.pdf} }
Ultra-fast electronic phenomena originating from finite temperature, such as nonlinear optical excitation, can be simulated with high fidelity via real-time time dependent density functional theory (rt-TDDFT) calculations with hybrid functional. However, previous rt-TDDFT simulations of real materials using the optimal gauge known as the parallel transport gauge–have been limited to low-temperature systems with band gaps. In this paper, we introduce the parallel transport-implicit midpoint (PT-IM) method, which significantly accelerates finite temperature rt-TDDFT calculations of real materials with hybrid function. We first implement PT-IM with hybrid functional in our plane wave code PWDFT, and optimized it on both GPU and ARM platforms to build a solid baseline code. Next, we propose a diagonalization method to reduce computation and communication complexity, and then, we employ adaptively compressed exchange (ACE) method to reduce the frequency of the most expensive Fock exchange operator. Finally, we adopt the ring based method and the shared memory mechanism to overlap computation and communication and alleviate memory consumption respectively. Numerical results show that our optimized code can reach 3072 atoms for rt-TDDFT simulation with hybrid functional at finite temperature on 192 computing nodes, the time-to-solution for one time step is 429.3s, which is 41.4 times faster compared to the baseline
Haibo Li
Research Fellow
University of Melbourne
G82, Peter Hall Building (160)
University of Melbourne
Melbourne VIC 3010, Australia
© April 2025 Haibo Li