﻿<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "http://jats.nlm.nih.gov/publishing/1.0/JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="nlm-ta">Complex Eng. Syst.</journal-id>
      <journal-id journal-id-type="publisher-id">COMENGSYS</journal-id>
      <journal-title-group>
        <journal-title>Complex Engineering Systems</journal-title>
      </journal-title-group>
      <issn pub-type="epub">2770-6249</issn>
      <publisher>
        <publisher-name>OAE Publishing Inc.</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
	<article-id pub-id-type="doi">10.20517/ces.2026.13</article-id>
      <article-categories>
        <subj-group>
          <subject>Research Article</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Fault diagnosis of water injection pump via wavelet-enhanced attention guided Inception-LSTM networks</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <name>
            <surname>Wu</surname>
            <given-names>Xiao</given-names>
          </name>
          <xref ref-type="aff" rid="I1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Wu</surname>
            <given-names>Zelin</given-names>
          </name>
          <xref ref-type="aff" rid="I1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Luo</surname>
            <given-names>Feng</given-names>
          </name>
          <xref ref-type="aff" rid="I1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Wang</surname>
            <given-names>Jiawei</given-names>
          </name>
          <xref ref-type="aff" rid="I1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <name>
            <surname>Xia</surname>
            <given-names>Tangbin</given-names>
          </name>
          <xref ref-type="aff" rid="I1">
            <sup>1</sup>
          </xref>
          <xref ref-type="aff" rid="I2">
            <sup>2</sup>
          </xref>
          <xref ref-type="aff" rid="I3">
            <sup>3</sup>
          </xref>
          <xref ref-type="corresp" rid="cor1" />
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Xi</surname>
            <given-names>Lifeng</given-names>
          </name>
          <xref ref-type="aff" rid="I1">
            <sup>1</sup>
          </xref>
          <xref ref-type="aff" rid="I2">
            <sup>2</sup>
          </xref>
        </contrib>
      </contrib-group>
      <aff id="I1">
        <sup>1</sup>School of Mechanical Engineering, Shanghai Jiao Tong University, Shanghai 200240, China.</aff>
      <aff id="I2">
        <sup>2</sup>Special Environment Digital Manufacturing Equipment Technology Innovation Center, Mianyang 621900, Sichuan, China.</aff>
      <aff id="I3">
        <sup>3</sup>Shanghai Changxing Ocean Laboratory, Shanghai 201913, China.</aff>
      <author-notes>
        <corresp id="cor1">Correspondence to: Dr. Tangbin Xia, School of Mechanical Engineering, Shanghai Jiao Tong University, Shanghai 200240, China. E-mail: <email>xtbxtb@sjtu.edu.cn</email></corresp>
     
	 
	 <fn fn-type="other">
          <p>
            <bold>Received:</bold> 16 Mar 2026 | <bold>First Decision:</bold> 29 Apr 2026 | <bold>Revised:</bold> 12 May 2026 | <bold>Accepted:</bold> 11 Jun 2026 | <bold>Published:</bold> 22 Jun 2026</p>
        </fn>
        <fn fn-type="other">
          <p>
            <bold>Academic Editor:</bold> Zhiqiang Ge | <bold>Copy Editor:</bold> Fangling Lan | <bold>Production Editor:</bold> Fangling Lan</p>
        </fn>
      </author-notes>
      <pub-date pub-type="ppub">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>6</month>
        <year>2026</year>
      </pub-date>
      <volume>6</volume>
	  <issue>2</issue>
     <elocation-id>11</elocation-id>
	 
	 
      <permissions>
        <copyright-statement>© The Author(s) 2026.</copyright-statement>
        <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>© The Author(s) 2026. <bold>Open Access</bold> This article is licensed under a Creative Commons Attribution 4.0 International License (<uri xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</uri>), which permits unrestricted use, sharing, adaptation, distribution and reproduction in any medium or format, for any purpose, even commercially, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made.</license-p>
        </license>
      </permissions>
      <abstract>
        <p>Accurate fault diagnosis of water injection pump is essential for ensuring operational safety and efficiency in oil and gas exploitation. However, traditional diagnostic methods often struggle with non-stationary vibration signals and severe category imbalance in complex industrial environments. To address these challenges, this paper proposes a multi-level Inception-long short-term memory (Inception-LSTM) network integrated with wavelet packet decomposition (WPD) and efficient channel attention (ECA), termed the multi-level Inception-LSTM network with WPD and ECA (MILN-WE). The proposed framework first employs WPD to decompose complex vibration signals into fine-grained frequency sub-bands, capturing subtle fault characteristics. Subsequently, a multi-scale Inception module is utilized to extract diverse spatial features, while an LSTM layer captures the long-term temporal dependencies of the signals. The integration of the ECA mechanism further enhances the model’s ability to focus on critical diagnostic information. The effectiveness of MILN-WE is validated using a private oilfield water injection pump dataset and a public rotating machinery dataset. Experimental results demonstrate that the proposed model achieves higher diagnostic accuracy and robustness compared to state-of-the-art methods, particularly under conditions of strong noise interference and data imbalance. Specifically, on the private oilfield water injection pump dataset, the model achieved an accuracy of 99.38%, improving upon traditional convolutional neural network (CNN) and class-balanced-CNN models by 6.05% and 3.24%, respectively. This study provides a high-precision and robust solution for the intelligent predictive maintenance of critical energy equipment, offering significant theoretical and practical value for industrial health monitoring systems.</p>
      </abstract>
      <kwd-group>
        <kwd>Water injection pump</kwd>
        <kwd>unbalanced data</kwd>
        <kwd>convolutional neural network</kwd>
        <kwd>long short-term memory network</kwd>
        <kwd>attention mechanism</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec1">
      <title>1. INTRODUCTION</title>
      <p>Hydraulic water injection pumps play a vital role in modern oil and gas exploitation, where they are responsible for injecting high-pressure fracturing fluids into underground formations to enhance hydrocarbon production. The reliability of water injection pump systems directly affects operational safety and production efficiency. Water injection pumps are critical flow-control elements that operate under severe working conditions, including high pressure, intense vibration, and rapidly changing loads. Long-term operation under such harsh environments may lead to valve wear, fatigue damage, and sealing failures, which can eventually cause equipment malfunction or even catastrophic accidents. Therefore, the development of reliable and accurate fault diagnosis techniques for water injection pump has become an important research topic in industrial condition monitoring and predictive maintenance.</p>
      <p>Traditional fault diagnosis approaches for rotating machinery mainly rely on signal processing techniques combined with handcrafted feature extraction. Time-frequency analysis methods such as wavelet transform, empirical mode decomposition, and variational mode decomposition have been widely applied to extract representative features from vibration signals. These approaches aim to capture the nonstationary characteristics of mechanical vibration signals and improve fault identification performance. For instance, He <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B1">1</xref>]</sup> proposed a fault diagnosis framework integrating wavelet packet transform and convolutional neural networks (CNNs), where time-frequency representations were used to enhance feature discrimination. <InlineParagraph>Guo <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B2">2</xref>]</sup></InlineParagraph> further improved the Morlet wavelet transform to enhance time-frequency resolution and combined it with a shallow residual neural network for bearing fault classification. Zhai <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B3">3</xref>]</sup> developed a diagnostic method based on synchro squeezing wavelet transform and a transfer residual convolutional neural network to address feature extraction challenges caused by complex vibration signals.</p>
      <p>Although these signal processing methods can effectively reveal certain fault characteristics, their performance strongly depends on expert knowledge and manual feature engineering. In complex industrial environments, vibration signals are often contaminated by strong noise and nonlinear disturbances, which makes it difficult for handcrafted features to maintain stable diagnostic performance. Consequently, traditional diagnostic approaches often suffer from limited adaptability and generalization ability when applied to practical industrial scenarios.</p>
      <p>With the rapid development of artificial intelligence technologies, deep learning has emerged as a powerful tool for intelligent fault diagnosis. Compared with traditional methods, deep neural networks can automatically learn hierarchical feature representations directly from raw sensor signals, thereby significantly reducing the reliance on manual feature extraction. CNNs, in particular, have demonstrated strong capability in feature learning and pattern recognition and have been widely adopted in rotating machinery fault diagnosis.</p>
      <p>Recent studies have reported significant progress in CNN-based diagnostic frameworks. Tan <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B4">4</xref>]</sup> demonstrated the efficacy of coupling long short-term memory (LSTM) with CNNs for diagnosing mixed-flow pumps under complex cavitation conditions, highlighting the potential of recurrent architectures in capturing fluid-induced vibrations. Gao <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B5">5</xref>]</sup> introduced a diagnostic approach combining continuous wavelet transform and deep convolutional generative adversarial networks to address the issue of imbalanced datasets in machinery fault classification tasks. Deng <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B6">6</xref>]</sup> further developed an attention-based CNN that enhances feature representation capability by adaptively focusing on important signal components.</p>
      <p>In addition to convolutional architectures, more advanced deep learning models have also been explored to improve diagnostic performance. Transformer-based architectures have recently attracted considerable attention due to their ability to capture long-range dependencies and global contextual information in signals. Lai <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B7">7</xref>]</sup> proposed a residual attention vision transformer network for rolling bearing fault diagnosis, which integrates convolutional feature extraction with self-attention mechanisms to capture both local and global signal features. Liu <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B8">8</xref>]</sup> further proposed a Transformer transfer learning framework capable of improving fault diagnosis performance under cross-condition scenarios.</p>
      <p>Another important research direction focuses on improving the interpretability and physical consistency of deep learning models. Several studies have attempted to integrate signal processing knowledge into neural network structures. Hassannejad <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B9">9</xref>]</sup> proposed a physics-informed CNN in which wavelet-based feature extraction was embedded into the network architecture to improve both interpretability and diagnostic accuracy. Similarly, Deng <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B10">10</xref>]</sup> proposed a multi-sensor fusion framework for axial piston pumps, while Kim <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B11">11</xref>]</sup> validated the superiority of Transformer-based models in identifying pressure signal anomalies.</p>
      <p>Despite the promising results achieved by deep learning methods, most existing studies assume that training and testing data follow the same distribution. However, in real industrial applications, operating conditions such as load, rotational speed, and pressure frequently vary over time. These variations often lead to distribution discrepancies between training and testing data, which can notably degrade the performance of deep learning models. Therefore, improving the generalization capability of intelligent diagnostic models under varying operating conditions has become a critical challenge in machinery fault diagnosis.</p>
      <p>To address this issue, transfer learning has been widely investigated in recent years. Transfer learning aims to transfer knowledge learned from a source domain to a target domain with limited labeled data, thereby improving diagnostic performance under varying operating conditions. Zhao <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B12">12</xref>]</sup> proposed a wavelet convolution-based transfer learning framework for cross-machine fault diagnosis, demonstrating improved robustness under different working conditions. Yu <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B13">13</xref>]</sup> introduced a domain adaptation neural network based on maximum mean discrepancy to align feature distributions between source and target domains. <InlineParagraph>Sun <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B14">14</xref>]</sup></InlineParagraph> further proposed an adversarial domain adaptation framework that employs a domain discriminator to reduce distribution discrepancies across domains.</p>
      <p>More recently, contrastive learning and self-supervised learning strategies have also been applied to improve feature representation and transferability. Li <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B15">15</xref>]</sup> proposed a contrastive learning-based diagnostic framework capable of learning more discriminative feature representations for rotating machinery faults. Zhu <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B16">16</xref>]</sup> further developed a supervised contrastive transfer learning method that combines domain adaptation with contrastive loss to enhance feature discrimination across different domains. Moreover, to handle the distribution discrepancies in decentralized data, Zhou <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B17">17</xref>]</sup> proposed a modular federated learning framework using dynamic routing to collaboratively optimize local models under multiple working conditions.</p>
      <p>In addition to domain adaptation strategies, several studies have explored other deep learning frameworks for machinery health monitoring and fault diagnosis. Shao <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B18">18</xref>]</sup> proposed a deep autoencoder-based feature learning method for rotating machinery diagnosis. Wang <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B19">19</xref>]</sup> developed a recurrent neural network-based health indicator for equipment health monitoring and remaining useful life prediction. <InlineParagraph>Zhang <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B20">20</xref>]</sup></InlineParagraph> optimized CNN architectures to improve diagnostic accuracy under complex working conditions. Zhang <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B21">21</xref>]</sup> and Zhu <italic>et al</italic>.<sup>[<xref ref-type="bibr" rid="B22">22</xref>]</sup> further demonstrated the effectiveness of deep convolutional networks in machinery fault diagnosis tasks.</p>
      <p>Beyond pure fault identification, the ultimate goal of machinery health monitoring is to facilitate systemic and intelligent predictive maintenance. Recently, advanced prognosis and dynamic maintenance strategies have attracted substantial attention in the industrial engineering field. For example, researchers have developed prognosis-centered intelligent maintenance optimization frameworks that systematically account for uncertain failure thresholds<sup>[<xref ref-type="bibr" rid="B23">23</xref>]</sup>, as well as customized multi-agent reinforcement learning approaches for systemic condition-based maintenance under inspection uncertainties<sup>[<xref ref-type="bibr" rid="B24">24</xref>]</sup>. Furthermore, for large-scale industrial systems, adaptive health prediction has been integrated with global dynamic maintenance decision-making to optimize group machinery operations<sup>[<xref ref-type="bibr" rid="B25">25</xref>]</sup>. Since robust and dynamic maintenance policies heavily rely on accurate condition sensing, developing a highly reliable fault diagnosis model under complex industrial constraints becomes a critical prerequisite for successfully implementing these downstream predictive maintenance systems.</p>
      <p>Despite the promising results achieved by advanced deep learning methods, real-world industrial applications still face critical challenges. A systematic comparison reveals the distinct limitations of existing methods in handling non-stationary signals and severe data imbalance. First, traditional signal processing methods struggle with the highly non-stationary nature of vibration signals, as they rely heavily on static, handcrafted features that fail to dynamically capture transient fault shocks submerged in strong background noise. Second, while standard deep learning models can automatically extract features, they fundamentally assume a balanced data distribution. In practical scenarios, severe category imbalance is inevitable; critical faults force immediate equipment shutdown, resulting in a severe scarcity of fault samples compared to normal operating data. Consequently, traditional deep models tend to overfit the majority class while ignoring minority fault states. Although some recent studies have employed generative models or data resampling to supplement minority classes, these data-level approaches often introduce artificial noise and struggle to extract truly discriminative features under extreme imbalance. Therefore, a critical research gap exists: there is a lack of an integrated diagnostic framework capable of simultaneously isolating non-stationary fault features under strong noise and achieving precise minority-class identification without relying on potentially unreliable data resampling techniques. This distinct gap directly motivates the development of our proposed robust diagnostic architecture.</p>
      <p>To address the aforementioned challenges, this paper proposes a novel diagnostic framework termed the multi-level Inception-LSTM network with WPD and ECA (MILN-WE), which employs wavelet packet decomposition (WPD) to adaptively decompose non-stationary vibration signals into multiple frequency sub-bands, thereby isolating subtle fault-related transients from background noise. To capture the complex spatial-temporal patterns, a multi-scale Inception module is integrated to extract features across different receptive fields, while a LSTM layer is utilized to model the long-term temporal dependencies inherent in the water injection pump cycles. Importantly, instead of conventional feature concatenation, we uniquely embed the efficient channel attention (ECA) mechanism to perform adaptive weighted fusion of the multi-scale spatial-temporal features extracted independently from each WPD sub-band. By utilizing a non-dimensionality-reduction local cross-channel interaction strategy, the model autonomously assigns higher fusion weights to the specific frequency sub-bands that contain critical minority-class fault signatures, while actively suppressing noise-dominated sub-bands. This integrated architecture ensures high diagnostic precision and robust performance under complex industrial operating conditions. The main contributions of this paper are as follows:</p>
      <p>(1) A novel hybrid diagnostic framework, MILN-WE, is proposed, which effectively integrates WPD, multi-scale Inception-LSTM, and an ECA mechanism to isolate subtle fault-related transients from background noise;</p>
      <p>(2) The proposed model provides a robust solution for multi-scale feature extraction and precise minority-class identification in scenarios characterized by severe category imbalance, without relying on traditional data resampling techniques;</p>
      <p>(3) Extensive empirical validation on both a private oilfield water injection pump dataset and a public rotating machinery dataset demonstrates the model’s superior diagnostic accuracy, generalization capability, and robustness against strong noise interference compared to existing state-of-the-art methods.</p>
      <p>The remainder of this paper is organized as follows. Section 2 and 3 introduce the details of the proposed framework and sub-modules. Section 4 presents the experimental study, and Section 5 concludes the paper with a summary of findings and suggestions for future work.</p>
    </sec>
    <sec id="sec2">
      <title>2. PRELIMINARY MATERIALS</title>
      <sec id="sec2-1">
        <title>2.1 Wavelet packet decomposition</title>
        <p>WPD, as an extension of the Discrete Wavelet Transform (DWT), provides a more refined time-frequency analysis capability. Through recursive filtering and down-sampling processes, WPD not only iteratively decomposes low-frequency parts but also further segments high-frequency parts, thereby achieving multi-level decomposition. For an <italic>n</italic>-level decomposition, WPD can generate multiple different sets of coefficients or nodes, rather than just (<italic>n</italic> + 1) sets as in DWT. Although the total number of coefficients remains unchanged due to down-sampling, this provides greater flexibility in adapting to different signal characteristics. The basic process of WPD is shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>.</p>
        <fig id="fig1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Three-level wavelet packet decomposition process.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.1.jpg" />
        </fig>
        <p>The formula for WPD is as follows:</p>
       
	   <p><disp-formula> <label>(1)</label> <tex-math id="E1"> $$ \begin{equation}  \begin{aligned} P_{(t)}=  \textstyle\sum_{j=-\infty}^{\infty} \alpha_{j k} \varphi_{j k}(t)+ \textstyle\sum_{j=0}^{\infty}  \textstyle\sum_{k=-\infty}^{\infty} \beta_{j k} \varphi_{j k}(t) \end{aligned} \end{equation} $$ </tex-math>
</disp-formula></p>

	   
	   
	   
	   
      </sec>
      <sec id="sec2-2">
        <title>2.2 LSTM module</title>
        <p>LSTM is a variant of the Recurrent Neural Network that can process sequential data while addressing the problems of gradient vanishing and gradient exploding that occur when training on long sequences. It allows the model to retain information from previous data, thereby further enhancing the model’s capability to capture the features of each sample, as shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>.</p>
        <fig id="fig2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The structure of LSTM module.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.2.jpg" />
        </fig>
        <p>An LSTM unit consists of an input gate, a forget gate, an output gate, and a cell state. The transition formulas at time <italic>t</italic> are as follows:</p>
       
	   
	   <p><disp-formula> <label>(2)</label> <tex-math id="E1"> $$ \begin{equation}  \begin{aligned}  \mathrm{Input~gate:}   \left\{\begin{array}{c}i_{t}=\sigma\left(W_{i} \cdot\left[\begin{array}{ll}h_{t-1} &amp; x_{t}\end{array}\right]+b_{i}\right) \\ \overline{C_{t}}=\tanh \left(W_{c} \cdot\left[\begin{array}{ll}h_{t-1} &amp; x_{t}\end{array}\right]+b_{c}\right)\end{array}\right.  \end{aligned} \end{equation} $$ </tex-math>
</disp-formula></p>

<p><disp-formula> <label>(3)</label> <tex-math id="E1"> $$ \begin{equation}  \begin{aligned}  \mathrm{Forget~gate:}  f_{t}=\sigma\left(W_{f} \cdot\left[\begin{array}{ll}h_{t-1} &amp; x_{t}\end{array}\right]+b_{f}\right)  \end{aligned} \end{equation} $$ </tex-math>
</disp-formula></p>

<p><disp-formula> <label>(4)</label> <tex-math id="E1"> $$ \begin{equation}  \begin{aligned} \mathrm{Output~gate:}  \left\{\begin{array}{c}o_{t}=\sigma\left(W_{o} \cdot\left[h_{t-1} \quad x_{t}\right]+b_{o}\right) \\ h_{t}=o_{t} * \tanh \left(C_{t}\right)\end{array}\right. \end{aligned} \end{equation} $$ </tex-math>
</disp-formula></p>

<p><disp-formula> <label>(5)</label> <tex-math id="E1"> $$ \begin{equation}  \begin{aligned} \mathrm{Cell~state}:  C_{t}=f_{t} * C_{t-1}+i_{t} * \overline{C_{t}} \end{aligned} \end{equation} $$ </tex-math>
</disp-formula></p>


	   
	   
	   
        <p>where <italic>x<sub>t</sub></italic> represents the sequence of the input unit, <italic>h<sub>t</sub></italic> denotes the hidden state of the unit, and <italic>C<sub>t</sub></italic> signifies the cell state. <italic>W<sub>i</sub></italic>, <italic>W<sub>c</sub></italic>, <italic>b<sub>i</sub></italic>, and <italic>b<sub>c</sub></italic> are the weight matrices and bias terms for the input gate; <italic>W<sub>f</sub></italic> and <italic>b<sub>f</sub></italic> are those for the forget gate; <italic>W<sub>o</sub></italic> and <italic>b<sub>o</sub></italic> are those for the output gate; and <italic>σ</italic> represents the sigmoid function.</p>
      </sec>
      <sec id="sec2-3">
        <title>2.3 ECA mechanism</title>
        <p>The ECA mechanism is a lightweight attention module specifically designed for deep CNNs. It achieves performance gains with minimal increases in complexity by utilizing an appropriate cross-channel interaction strategy. This strategy is implemented via one-dimensional convolutions, which notably reduce model complexity while maintaining performance, as shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>. The kernel size (<italic>k</italic>) for the convolution operation is adaptively selected based on the following formula to ensure adequate coverage of local cross-channel interactions:</p>
       
	   <p><disp-formula> <label>(6)</label> <tex-math id="E1"> $$ \begin{equation}  \begin{aligned}  k=\varphi(D)=\left|\frac{\log _{2} D}{\gamma}+\frac{b}{\gamma}\right|_{o d d}  \end{aligned} \end{equation} $$ </tex-math>
</disp-formula></p>

	   
	   
	   
       
        <p>Where <italic>k</italic> represents the size of the convolutional kernel. <italic>D</italic> represents the dimension of the input sequence. |<italic>n</italic>|<italic><sub>odd</sub></italic> indicates the nearest odd number to <italic>n</italic>. Additionally, the mapping parameters γ and <italic>b</italic> are fixed at 2.0 and 1.0<sup>[<xref ref-type="bibr" rid="B26">26</xref>]</sup>, which enables the network to adaptively determine the optimal kernel size <italic>k</italic> based on the channel dimension <italic>D</italic>, ensuring efficient cross-channel interaction without manual tuning.</p>
		 <fig id="fig3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Schematic diagram of ECA mechanism.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.3.jpg" />
        </fig>
      </sec>
    </sec>
    <sec id="sec3">
      <title>3. THE METHODOLOGY</title>
      <sec id="sec3-1">
        <title>3.1 Overall framework of MILN-WE</title>
        <p>To achieve high-precision fault diagnosis of water injection pumps under complex industrial conditions, this paper proposes a multi-level Inception-LSTM network integrated with Wavelet-enhanced attention mechanism, termed MILN-WE. The overall architecture is designed to handle the non-stationary nature of vibration signals and the challenges of feature extraction from imbalanced data.</p>
        <p>The proposed MILN-WE framework consists of three main stages: signal decomposition, multi-scale feature extraction, and feature fusion classification. The systematic flowchart of the MILN-WE is illustrated in <xref ref-type="fig" rid="fig4">Figure 4</xref>.</p>
        <fig id="fig4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Overall architecture of the proposed MILN-WE framework.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.4.jpg" />
        </fig>
        <p>Specifically, the raw vibration signals collected from the water injection pump are first processed using a three-level WPD. A three-level decomposition was selected because it provides a sufficient frequency resolution to isolate subtle fault characteristics without introducing excessive computational complexity or over-segmenting the signal<sup>[<xref ref-type="bibr" rid="B27">27</xref>,<xref ref-type="bibr" rid="B28">28</xref>]</sup>. This process decomposes the original complex signal into eight distinct frequency sub-bands. By transforming the 1D time-series signal into multiple frequency components, the model can capture subtle fault characteristics that are often submerged in noise in the original domain.</p>
        <p>Subsequently, the features extracted from the eight frequency sub-bands are individually processed through the multi-scale Inception-LSTM branches. Within each branch, an ECA mechanism is integrated to adaptively recalibrate the importance of various feature channels. Unlike conventional hybrid models that typically rely on simple feature concatenation or late-stage attention pooling, this design leverages ECA to perform a non-dimensionality-reduction local cross-channel interaction specifically tailored for the independent WPD sub-bands. This advanced weighted fusion strategy allows the model to dynamically evaluate and effectively highlight fault-related transients while simultaneously suppressing background noise. To integrate information from different frequency domains, the enhanced latent features <italic>F<sub>i</sub></italic> from all branches are fused into a global representation <italic>F<sub>S</sub></italic> via a weighted summation strategy, expressed as <italic>F<sub>S</sub></italic> = ∑<italic><sub>i</sub></italic><italic>W<sub>Ci</sub>F<sub>i</sub></italic> (where <italic>i</italic> = 1, 2, ... 8), where <italic>W<sub>Ci</sub></italic> denotes the learned contribution weight of the <italic>i</italic>-th sub-band.</p>
        <p>Finally, this fused feature map is mapped into the label space through a fully connected layer. A Softmax activation function is then employed to calculate the probability distribution across various fault types, where the category with the highest probability determines the final diagnostic result of the water injection pump.</p>
      </sec>
      <sec id="sec3-2">
        <title>3.2 Inception-LSTM module</title>
        <p>In the fault diagnosis of water injection pumps, the complexity of vibration signals and the severe category imbalance in industrial environments present significant challenges. Traditional CNNs often employ a single-size convolutional kernel, which results in a fixed receptive field that may fail to capture diverse and subtle fault features, especially from minority class samples. To address this limitation, the proposed MILN-WE model utilizes a multi-scale Inception module instead of the standard CNN architecture. By incorporating parallel convolutional layers with different kernel sizes, the module can perceive local spatial features across multiple scales simultaneously. This design notably enhances the model’s ability to extract discriminative information from non-stationary signals and improves the diagnostic sensitivity for rare fault types, as shown in <xref ref-type="fig" rid="fig5">Figure 5</xref>. Specifically, to further enhance the discriminative power of the spatial features, local ECA modules are embedded within the Inception block following the Conv2-1 and Conv3-1 layers, prior to the feature concatenation.</p>
        <fig id="fig5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Hybrid structure of the Inception-LSTM network.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.5.jpg" />
        </fig>
        <p>To complement these spatial features, the architecture further integrates LSTM layers to model the temporal dependencies within the signal. The multi-scale feature maps generated by the Inception module are fed into the LSTM unit, which leverages its unique gating mechanisms—the input, forget, and output gates—to capture long-term correlations in the vibration sequences. By synergizing multi-scale spatial perception with temporal sequential modeling, the MILN-WE framework can derive highly robust and representative features from the sub-bands decomposed by WPD, providing a solid foundation for accurate fault classification under complex operational conditions.</p>
      </sec>
      <sec id="sec3-3">
        <title>3.3 Adaptive feature weighting via ECA</title>
        <p>Traditional feature concatenation or simple averaging fusion methods often overlook the varying contributions of different feature branches to the diagnosis of the current operating state, which can easily introduce redundant information and increase subsequent computational overhead. Therefore, during the feature enhancement and fusion stages, this paper introduces the ECA mechanism to perform adaptive weighted fusion of multi-scale features. It achieves cross-channel local interactions with an extremely low number of parameters, dynamically focusing on the most discriminative key features while avoiding information loss from feature dimensionality reduction. Specifically, complementing the local ECA modules inside the Inception blocks, an ECA-based global weighting mechanism is employed after the parallel Inception-LSTM branches and before the final classification layer to evaluate the eight frequency sub-bands. Through this design, the model captures cross-channel interactions via a non-dimension-reduction local cross-channel interaction strategy. This enables the network to adaptively assign higher weights to key feature channels that significantly contribute to fault diagnosis, while suppressing irrelevant background noise, thereby further enhancing the architecture’s overall performance under complex interference, as shown in <xref ref-type="fig" rid="fig6">Figure 6</xref>.</p>
        <fig id="fig6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Schematic diagram of adaptive feature weighting via ECA mechanism.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.6.jpg" />
        </fig>
        <p>The ECA-based feature fusion process comprises three strategic phases. First, multi-scale features are compressed into channel descriptors via Global Average Pooling (GAP) to capture a global receptive field. Subsequently, instead of using computationally expensive fully connected layers, the module employs a one-dimensional convolution (1D Conv) with an adaptive kernel size <italic>k</italic> = <italic>φ</italic>(<italic>D</italic>) to facilitate Local Cross-Channel Interaction. This approach effectively captures local dependencies and avoids information loss from dimensionality reduction. Finally, the generated attention weights <italic>W<sub>C</sub></italic><sub>1</sub> to <italic>W<sub>C</sub></italic><sub>8</sub> are applied to the original features through element-wise multiplication and summation. By amplifying fault-sensitive features and suppressing redundant noise, this adaptive strategy notably enhances the discriminative power of the fused feature, ensuring precise identification of the 16 complex operating states.</p>
      </sec>
    </sec>
    <sec id="sec4">
      <title>4. EXPERIMENTAL VERIFICATION</title>
      <sec id="sec4-1">
        <title>4.1 Dataset processing</title>
        <p>The private water injection pump dataset is derived from the field operation data of an oil field in China. The water injection pump model used in this study is 3H-8/450II, as shown in <xref ref-type="fig" rid="fig7">Figure 7</xref>. Data collection was performed using 15 vibration sensors installed at different locations, the collected data were treated as independent single-channel input sequences. Additionally, all vibration signals were globally normalized prior to model training to eliminate amplitude variations caused by differences in distance and mounting positions between the sensors and the vibration sources, thereby ensuring that the diagnostic process is based on inherent time-frequency fault patterns. The specific installation positions of these sensors are listed in <xref ref-type="table" rid="t1">Table 1</xref>. The sampling frequency of the sensors is 8,192 Hz with a sampling duration of 1 s, meaning each sample contains 8,192 sampling points.</p>
        <fig id="fig7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Schematic diagram of the 3H-8/450II water injection pump structure and sensor layout (Photographed by the authors).</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.7.jpg" />
        </fig>
        <table-wrap id="t1">
          <label>Table 1</label>
          <caption>
            <p>Vibration sensor mounting areas</p>
          </caption>
          <table frame="hsides" rules="groups">
            <thead>
              <tr>
                <td style="border-bottom:1;">
                  <bold>Sensor layout</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Sensor layout</bold>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td>Base Southeast</td>
                <td>West plunger stuffing box</td>
              </tr>
              <tr>
                <td>Base Northeast</td>
                <td>Center plunger stuffing box</td>
              </tr>
              <tr>
                <td>Base Northwest</td>
                <td>East plunger stuffing box</td>
              </tr>
              <tr>
                <td>Base Southwest</td>
                <td>Directly above pump head</td>
              </tr>
              <tr>
                <td>Crankshaft bearing</td>
                <td>Front of pump head</td>
              </tr>
              <tr>
                <td>Motor East</td>
                <td>Pump inlet pipeline</td>
              </tr>
              <tr>
                <td>Motor West</td>
                <td>Pump outlet pipeline</td>
              </tr>
              <tr>
                <td>Crankshaft East</td>
                <td>/</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>During the operation of the water injection pump, the motor at the power end drives the crankshaft to rotate, which in turn moves the plunger through the connecting rod, causing it to reciprocate within a high-sealing cylinder. The resulting ultra-high pressure pumps the sand-bearing fluid out of the hydraulic end. During this process, the pump head of the plunger pump is subjected to continuous high-intensity impacts. Under the constant erosion of the sand-bearing fluid, sealing components such as the plunger and valve seats are highly susceptible to wear and tear.</p>
        <p>The dataset contains a total of 5,190 samples across 16 different states (including the normal operating state), covering various common failures of plunger pumps, such as plunger wear, pump head spring wear, and bearing bush wear. <xref ref-type="fig" rid="fig8">Figures 8</xref>-<xref ref-type="fig" rid="fig10">10</xref> display the raw data sequence plots for some of these states. It can be observed that when the equipment is in a normal state, the cycles are clear, noise is relatively low, and the signal components are relatively simple. Conversely, when a fault occurs, the noise in the data increases notably, the data cycles change, and the underlying components become much more complex.</p>
        <fig id="fig8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Vibration data of normal state.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.8.jpg" />
        </fig>
        <fig id="fig9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Vibration data with worn pump head spring.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.9.jpg" />
        </fig>
        <fig id="fig10" position="float">
          <label>Figure 10</label>
          <caption>
            <p>Vibration data with worn plunger.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.10.jpg" />
        </fig>
        <p>The sample sizes collected for each state are shown in <xref ref-type="fig" rid="fig11">Figure 11</xref>. In actual operations, field engineers strive to keep the machinery in a normal state as much as possible. When certain severe faults occur—such as plunger looseness, bearing bracket damage, or motor bolt looseness—the machine may be shut down immediately, forcing data collection to stop. This further increases the difficulty of diagnosing such faults. Consequently, there is a severe imbalance in the amount of data across different operating states, with the maximum gap between sample sizes exceeding 20 times. Therefore, it is difficult to use data reconstruction methods like resampling to supplement the number of minority class samples, and the quality of such supplemented data is hard to guarantee.</p>
        <fig id="fig11" position="float">
          <label>Figure 11</label>
          <caption>
            <p>Sample size statistics by state.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.11.jpg" />
        </fig>
        <p>To strictly prevent data leakage between the training and testing phases, the continuous raw data sequences of each state are first chronologically divided into training, validation, and test sets. Specifically, based on the temporal order of data collection, the first 80% of the continuous time period for each state is designated as the training period. The subsequent 10% of the time period is allocated for validation, and the final 10% of the time period is strictly reserved for testing. Following this strict chronological division, a sliding window with overlap is independently employed to segment the data within each respective set to increase the data volume and ensure better training performance, as shown in <xref ref-type="fig" rid="fig12">Figure 12</xref>. Specifically, the width of the sliding window is 2,048 and the step size is 1,024. After independent segmentation, the total number of samples effectively generated reach 36,330.</p>
        <fig id="fig12" position="float">
          <label>Figure 12</label>
          <caption>
            <p>Data splitting with sliding window.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.12.jpg" />
        </fig>
      </sec>
      <sec id="sec4-2">
        <title>4.2 Experimental setup</title>
        <p>In this experimental section, Accuracy, Precision, Recall, and F1-Score are employed as evaluation metrics to assess the diagnostic performance of the proposed models. Class-balanced CNN (CB-CNN)<sup>[<xref ref-type="bibr" rid="B29">29</xref>]</sup> and SMOTE-CNN<sup>[<xref ref-type="bibr" rid="B30">30</xref>]</sup> are selected as baseline methods, representing loss function-based improvement and oversampling-based approaches for handling class imbalance in water injection pump fault diagnosis, respectively. To further validate the contribution of individual modules in addressing sample imbalance, a comprehensive ablation study is conducted with traditional CNN as the reference model. Several variant architectures are constructed for comparison, including CNN-LSTM, Inception-LSTM, Inception-LSTM-ECA, CNN-LSTM-WE, Inception-LSTM-WPD, supervised contrastive learning<sup>[<xref ref-type="bibr" rid="B31">31</xref>]</sup> (SCL), the deep-stable CNN<sup>[<xref ref-type="bibr" rid="B32">32</xref>]</sup> (DSCNN), Inception-WE, and the proposed MILN-WE.</p>
        <p>Each variant systematically excludes specific components from the complete architecture: CNN-LSTM removes WPD, Inception, and ECA modules; Inception-LSTM excludes the WPD and ECA modules; Inception-LSTM-ECA removes WPD module; CNN-LSTM-WE and Inception-WE remove the Inception and LSTM modules, respectively; and Inception-LSTM-WPD excludes the ECA module. This systematic comparison enables quantitative assessment of each module’s contribution to the overall diagnostic performance under imbalanced data conditions.</p>
        <p>Based on the PyTorch deep learning framework, the proposed Multi-level Inception-LSTM network was constructed, integrating WPD and ECA mechanism. The hardware configuration included an Intel i5-13500HX CPU, an NVIDIA 4060 GPU, and 16GB of RAM. Based on the aforementioned independent data splitting strategy, the segmented samples from each state strictly maintain the 8:1:1 ratio for the training, validation, and test sets, respectively. This rigorous procedure ensures zero information crossover, resulting in 29,064 training samples, 3,633 validation samples and 3,633 testing samples.</p>
        <p>Cross-Entropy Loss was employed as the loss function, and the Adam optimizer was used for model training. The learning rate was set to 0.002. To ensure statistical reliability and mitigate the influence of random initialization, all models evaluated in this study were independently trained and tested 5 times under the same hardware and software configurations. Within the Inception module, the kernel sizes for each convolutional layer are specified in <xref ref-type="table" rid="t2">Table 2</xref>, with a Batch Normalization layer added after each convolution. The LSTM module was configured with 2 layers. Additionally, as depicted in <xref ref-type="fig" rid="fig5">Figure 5</xref>, local ECA modules were integrated specifically after the Conv2-1 and Conv3-1 layers to enhance the cross-channel interaction capability of the Inception module.</p>
        <table-wrap id="t2">
          <label>Table 2</label>
          <caption>
            <p>Network parameter settings</p>
          </caption>
          <table frame="hsides" rules="groups">
            <thead>
              <tr>
                <td style="border-bottom:1;">
                  <bold>Network layer</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Parameter settings</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Data dimensions</bold>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td>Vibration signal</td>
                <td>/</td>
                <td>[256,1,2048]</td>
              </tr>
              <tr>
                <td>Wavelet decomposition</td>
                <td>/</td>
                <td>[256,1,256]</td>
              </tr>
              <tr>
                <td>Conv 1-1</td>
                <td>Kernel size: 5<break />Output channel: 16</td>
                <td>[256,16,252]</td>
              </tr>
              <tr>
                <td>Maximum pooling layer</td>
                <td>Pooling length: 3<break />Pooling stride: 2</td>
                <td>[256,16,125]</td>
              </tr>
              <tr>
                <td>Conv2-1 to 2-3</td>
                <td>Kernel size: 5/7/9<break />Output channel: 48</td>
                <td>[256,48,125]</td>
              </tr>
			   <tr>
                <td>ECA module</td>
                <td>Output channel: 48</td>
                <td>[256,48,125]</td>
              </tr>
              <tr>
                <td>Maximum pooling layer</td>
                <td>Pooling length: 3<break />Pooling stride: 2</td>
                <td>[256,48,62]</td>
              </tr>
              <tr>
                <td>Conv3-1 to 3-3</td>
                <td>Kernel size: 5/7/9<break />Output channel: 144</td>
                <td>[256,144,62]</td>
              </tr>
              <tr>
                <td>ECA module</td>
                <td>Output channel: 144</td>
                <td>[256,144,62]</td>
              </tr>
			  
			   <tr>
                <td>Maximum pooling layer</td>
                <td>Pooling length: 3<break />Pooling stride: 2</td>
                <td>[256, 144, 30]</td>
              </tr>
			  
			  
			  
              <tr>
                <td>Dropout layer</td>
                <td>/</td>
                <td>[256,144,30]</td>
              </tr>
              <tr>
                <td>LSTM layer</td>
                <td>Output dimensions: 256<break />Number of layers: 2<break />Dropout probability: 0.2</td>
                <td>[256,256,30]</td>
              </tr>
              <tr>
                <td>Fully connected layer</td>
                <td>Output dimensions: 960</td>
                <td>[256,960]</td>
              </tr>
              <tr>
                <td>Fully connected layer</td>
                <td>Output dimensions:16</td>
                <td>[256,16]</td>
              </tr>
            </tbody>
          </table>
		   <table-wrap-foot>
            <fn>
              <p>ECA: Efficient channel attention; LSTM: long short-term memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec id="sec4-3">
        <title>4.3 Experimental results</title>
        <p>The confusion matrix for the diagnostic results of the MILN-WE model is shown in <xref ref-type="fig" rid="fig13">Figure 13</xref>, where the horizontal axis represents the true labels and the vertical axis represents the predicted labels. The numbers within each colored block correspond to the number of samples classified into that category. As can be observed from the figure, except for fault samples of label 7, the number of misclassified samples for all other faults is below 10.</p>
        <fig id="fig13" position="float">
          <label>Figure 13</label>
          <caption>
            <p>Confusion matrix of diagnosis results.</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.13.jpg" />
        </fig>
        <p>To verify the effectiveness of the improved method and evaluate its performance against common fault diagnosis algorithms for imbalanced data, other baseline models were trained using the same parameters for verification. <xref ref-type="fig" rid="fig14">Figures 14</xref> and <xref ref-type="fig" rid="fig15">15</xref>, and <xref ref-type="table" rid="t3">Table 3</xref> illustrate the diagnostic performance comparison between the proposed model and eleven benchmark models: CNN, CB-CNN, SMOTE-CNN, CNN-LSTM, Inception-LSTM, Inception-LSTM-ECA, CNN-LSTM-WE, Inception-WE, and Inception-LSTM-WPD, SCL, DSCNN.</p>
        <fig id="fig14" position="float">
          <label>Figure 14</label>
          <caption>
            <p>Training loss function. (A) Overall training loss curve, (B) Local training loss curve (80-100).</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.14.jpg" />
        </fig>
        <fig id="fig15" position="float">
          <label>Figure 15</label>
          <caption>
            <p>Testing loss function. (A) Overall test loss curve, (B) Local test loss curve(80-100).</p>
          </caption>
          <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="ces6013.fig.15.jpg" />
        </fig>
        <table-wrap id="t3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of diagnostic results</p>
          </caption>
          <table frame="hsides" rules="groups">
            <thead>
              <tr>
                <td style="border-bottom:1;">
                  <bold>Model</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Accuracy</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Precision</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Recall</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>F1-score</bold>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td>CNN</td>
                <td>93.33</td>
                <td>90.26</td>
                <td>93.50</td>
                <td>91.85</td>
              </tr>
              <tr>
                <td>CB-CNN</td>
                <td>96.14</td>
                <td>95.83</td>
                <td>96.68</td>
                <td>96.25</td>
              </tr>
              <tr>
                <td>SMOTE-CNN</td>
                <td>94.10</td>
                <td>92.37</td>
                <td>94.19</td>
                <td>93.27</td>
              </tr>
              <tr>
                <td>CNN-LSTM</td>
                <td>95.79</td>
                <td>94.01</td>
                <td>95.49</td>
                <td>94.74</td>
              </tr>
              <tr>
                <td>Inception-LSTM</td>
                <td>96.31</td>
                <td>95.14</td>
                <td>97.17</td>
                <td>96.14</td>
              </tr>
              <tr>
                <td>Inception -LSTM-ECA</td>
                <td>98.82</td>
                <td>98.49</td>
                <td>98.81</td>
                <td>98.65</td>
              </tr>
              <tr>
                <td>CNN-LSTM-WE</td>
                <td>98.39</td>
                <td>96.94</td>
                <td>99.03</td>
                <td>97.97</td>
              </tr>
              <tr>
                <td>Inception-WE</td>
                <td>97.84</td>
                <td>98.42</td>
                <td>96.35</td>
                <td>97.37</td>
              </tr>
              <tr>
                <td>SCL</td>
                <td>98.03</td>
                <td>97.86</td>
                <td>97.21</td>
                <td>97.53</td>
              </tr>
              <tr>
                <td>Inception-LSTM-WPD</td>
                <td>97.77</td>
                <td>97.25</td>
                <td>98.42</td>
                <td>97.83</td>
              </tr>
              <tr>
                <td>DSCNN</td>
                <td>97.71</td>
                <td>97.34</td>
                <td>98.02</td>
                <td>97.68</td>
              </tr>
              <tr>
                <td>MILN-WE</td>
                <td>
                  <bold>99.38</bold>
                </td>
                <td>
                  <bold>99.42</bold>
                </td>
                <td>
                  <bold>99.27</bold>
                </td>
                <td>
                  <bold>99.34</bold>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn>
              <p>*Bold values indicate the optimal results under different evaluation metrics. CNN: Convolutional neural network; CB: class-balanced; LSTM: long short-term memory; ECA: efficient channel attention; SCL: supervised contrastive learning; WPD: wavelet packet decomposition; DSCNN: the deep-stable CNN; MILN-WE: the multi-level Inception-LSTM network with WPD and ECA.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>During the model training process, the proposed model achieved faster convergence speed in terms of both training and testing losses compared to other models. Regarding the testing set, the SMOTE-CNN, which incorporates SMOTE resampling, showed a performance similar to that of the traditional CNN model. This indicates that when facing severe data imbalance, resampling methods struggle to generate appropriate new data and fail to effectively distinguish minority class faults.</p>
        <p>The diagnostic Accuracy, Precision, Recall and F1-Score of the proposed model on the test set reached 99.38%, 99.42%, 99.27%, and 99.34%, respectively, all of which showed improvements over the other benchmark models. Compared to the traditional CNN, the diagnostic accuracy improved by 6.05%. Compared with CB-CNN, a representative imbalance-aware fault diagnosis model based on improved loss function, the proposed MILN-WE model achieves a 3.24% improvement in diagnostic accuracy. Furthermore, when compared with SMOTE-CNN, which employs oversampling techniques for handling class imbalance, the diagnostic accuracy is enhanced by 5.28%. These results demonstrate that the proposed MILN-WE model exhibits better performance in addressing the imbalanced data diagnosis problem of plunger-type water injection pumps when compared with other advanced fault diagnosis methods.</p>
        <p>Besides, during the process of network optimization, the diagnostic performance enhanced progressively. By comparing the diagnostic results of the proposed model with the Inception-LSTM-ECA, CNN-LSTM-WE, Inception-WE, and Inception-LSTM-WPD, SCL, DSCNN models, it is evident that each module contributes to the improvement of the final diagnostic accuracy. In summary, these results demonstrate the effectiveness and superiority of the proposed model for fault diagnosis under class-imbalanced conditions.</p>
      </sec>
      <sec id="sec4-4">
        <title>4.4 Public dataset verification</title>
        <p>To verify the cross-platform robustness of the MILN-WE architecture, we extended our evaluation to a widely recognized public repository: the centrifugal multistage impeller blower dataset<sup>[<xref ref-type="bibr" rid="B33">33</xref>]</sup>.</p>
        <p>In this validation phase, five operational states are considered: normal baseline (C0), along with four localized defects—outer-race (C1), inner-race (C2), rolling-element (C3), and gear (C4) failures. To mirror the sparsity of fault samples in actual industrial production, we intentionally constructed a non-uniform distribution. As detailed in <xref ref-type="table" rid="t4">Table 4</xref>, for classes C0-C2, 600 training and 180 testing samples per class are used; for minority classes C3 and C4, 200 training and 60 testing samples per class are used. Additionally, 10% of the training samples from each class are partitioned as a validation set during the training process. By testing the model on this skewed dataset, we can more effectively demonstrate MILN-WE’s proficiency in identifying underrepresented fault signatures amidst dominant healthy signals.</p>
        <table-wrap id="t4">
          <label>Table 4</label>
          <caption>
            <p>Details of the public dataset</p>
          </caption>
          <table frame="hsides" rules="groups">
            <thead>
              <tr>
                <td style="border-bottom:1;">
                  <bold>Label</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Fault type</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Training samples</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Testing samples</bold>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td>C0</td>
                <td>No fault</td>
                <td>600</td>
                <td>180</td>
              </tr>
              <tr>
                <td>C1</td>
                <td>Bearing outer race fault</td>
                <td>600</td>
                <td>180</td>
              </tr>
              <tr>
                <td>C2</td>
                <td>Bearing inner race fault</td>
                <td>600</td>
                <td>180</td>
              </tr>
              <tr>
                <td>C3</td>
                <td>Bearing rolling element fault</td>
                <td>200</td>
                <td>60</td>
              </tr>
              <tr>
                <td>C4</td>
                <td>Gear fault</td>
                <td>200</td>
                <td>60</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>In the training process, the Adam optimizer was deployed for 50 epochs with a starting learning rate of 0.001. To prevent the model from overfitting, a decay strategy was integrated: the learning rate was reduced by 50% whenever the loss metric failed to decrease over 15 successive epochs. Additionally, we introduced Gaussian white noise with a signal-to-noise ratio of 1 into the raw vibration data to replicate the complex noise interference typically encountered in actual industrial operations.</p>
        <p>As detailed in <xref ref-type="table" rid="t5">Table 5</xref>, our MILN-WE framework attained a 95.82% diagnostic accuracy. This performance shows higher diagnostic accuracy compared to the benchmark models, exhibiting improvements over CNN, SMOTE-CNN, CNN-LSTM, INCEPTION-LSTM-WPD, SCL, DSCNN by margins of 14.58%, 13.72%, 10.17%, 2.17%, 2.74%, and 1.25%, respectively. Beyond accuracy, our approach consistently yielded competitive results across the remaining evaluation metrics. These outcomes illustrate the model's capability to maintain effective fault identification on the public dataset, even under severe background noise conditions.</p>
        <table-wrap id="t5">
          <label>Table 5</label>
          <caption>
            <p>Results of comparison experiments on public dataset</p>
          </caption>
          <table frame="hsides" rules="groups">
            <thead>
              <tr>
                <td style="border-bottom:1;">
                  <bold>Model</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Accuracy</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Precision</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>Recall</bold>
                </td>
                <td style="border-bottom:1;">
                  <bold>F1-score</bold>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td>CNN</td>
                <td>81.24</td>
                <td>80.15</td>
                <td>82.30</td>
                <td>81.21</td>
              </tr>
              <tr>
                <td>CB-CNN</td>
                <td>84.50</td>
                <td>83.92</td>
                <td>84.88</td>
                <td>84.40</td>
              </tr>
              <tr>
                <td>SMOTE-CNN</td>
                <td>82.10</td>
                <td>81.05</td>
                <td>82.95</td>
                <td>81.99</td>
              </tr>
              <tr>
                <td>CNN-LSTM</td>
                <td>85.65</td>
                <td>84.33</td>
                <td>86.12</td>
                <td>85.22</td>
              </tr>
              <tr>
                <td>INCEPTION-LSTM</td>
                <td>88.42</td>
                <td>87.90</td>
                <td>88.75</td>
                <td>88.32</td>
              </tr>
              <tr>
                <td>INCEPTION-LSTM-ECA</td>
                <td>91.15</td>
                <td>90.85</td>
                <td>91.50</td>
                <td>91.17</td>
              </tr>
              <tr>
                <td>CNN-LSTM-WE</td>
                <td>92.30</td>
                <td>91.75</td>
                <td>92.65</td>
                <td>92.20</td>
              </tr>
              <tr>
                <td>INCEPTION-WE</td>
                <td>92.85</td>
                <td>92.10</td>
                <td>93.40</td>
                <td>92.75</td>
              </tr>
              <tr>
                <td>SCL</td>
                <td>93.08</td>
                <td>92.76</td>
                <td>93.35</td>
                <td>93.05</td>
              </tr>
              <tr>
                <td>INCEPTION-LSTM-WPD</td>
                <td>93.65</td>
                <td>93.20</td>
                <td>94.05</td>
                <td>93.62</td>
              </tr>
              <tr>
                <td>DSCNN</td>
                <td>94.57</td>
                <td>94.21</td>
                <td>94.88</td>
                <td>94.54</td>
              </tr>
              <tr>
                <td>MILN-WE</td>
                <td>
                  <bold>95.82</bold>
                </td>
                <td>
                  <bold>95.60</bold>
                </td>
                <td>
                  <bold>96.15</bold>
                </td>
                <td>
                  <bold>95.87</bold>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn>
              <p>*Bold values indicate the optimal results under different evaluation metrics. CNN: Convolutional neural network; CB: class-balanced; LSTM: long short-term memory; ECA: efficient channel attention; SCL: supervised contrastive learning; WPD: wavelet packet decomposition; DSCNN: the deep-stable CNN; MILN-WE: the multi-level Inception-LSTM network with WPD and ECA.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec id="sec5">
      <title>5. DISCUSSION</title>
      <p>The study proposes a MILN-WE network for fault diagnosis of oilfield water injection pump and public rotating machinery datasets. Experimental results demonstrate that MILN-WE is effective in processing complex, non-stationary vibration signals. Dual validation on both private and public datasets confirms its superior robustness and generalization across diverse mechanical structures and operating conditions. This research contributes a high-precision feature extraction and classification scheme to address industrial challenges such as class imbalance and strong noise, providing a theoretical foundation for intelligent predictive maintenance.</p>
    </sec>
    <sec id="sec6">
      <title>6. CONCLUSION</title>
      <p>This study addresses the critical challenges of non-stationary vibration signals and severe category imbalance in the fault diagnosis of water injection pumps operating in complex industrial environments. Looking forward, a prospective summary of our future research will focus on exploring cross-condition transfer learning and few-shot learning to improve model adaptability in data-scarce environments, and pursuing model lightweighting for real-time edge-side deployment.</p>
    </sec>
  </body>
  <back>
    <sec>
      <title>DECLARATIONS</title>
      <sec>
        <title>Authors’ contributions</title>
        <p>Software, Writing-Original draft preparation: Wu, X.</p>
        <p>Writing-Reviewing and Editing: Wu, Z.</p>
        <p>Data Curation: Luo, F.</p>
        <p>Software: Wang, J.</p>
        <p>Conceptualization, Methodology, Funding acquisition: Xia, T.</p>
        <p>Project administration: Xi, L.</p>
      </sec>
      <sec>
        <title>Availability of data and materials</title>
        <p>The private dataset used in the study are available from the corresponding author upon reasonable request. The public dataset used in the study is openly available in CFD_datasets at <uri xlink:href="https://github.com/THUFDD/CFD_datasets">https://github.com/THUFDD/CFD_datasets</uri>.</p>
      </sec>
	  <sec>
        <title>AI and AI-assisted tools Statement</title>
        <p>Not applicable.</p>
      </sec>
      <sec>
        <title>Financial support and sponsorship</title>
        <p>This research is supported by National Natural Science Foundation of China (72571173), Natural Science Foundation of Shanghai (25ZR1401196), and National Key Research and Development Program of China (2022YFF0605700).</p>
      </sec>
      <sec>
        <title>Conflicts of interest</title>
        <p>Xia, T. is an Editorial Board Member of the journal <italic>Complex Engineering Systems</italic>. Xia, T. was not involved in any steps of editorial processing, notably including reviewers' selection, manuscript handling and decision making, while the other authors have declared that they have no conflicts of interest.</p>
      </sec>
      
      <sec>
        <title>Ethical approval and consent to participate</title>
        <p>Not applicable.</p>
      </sec>
      <sec>
        <title>Consent for publication</title>
        <p>Not applicable.</p>
      </sec>
      <sec>
        <title>Copyright</title>
        <p>© The Author(s) 2026.</p>
      </sec>
    </sec>
    <ref-list>
      <ref id="B1">
        <label>1</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>He</surname>
              <given-names>F</given-names>
            </name>
            <name>
              <surname>Ye</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>A bearing fault diagnosis method based on wavelet packet transform and convolutional neural network optimized by simulated annealing algorithm</article-title>
          <source>Sensors</source>
          <year>2022</year>
          <volume>22</volume>
          <fpage>1410</fpage>
          <pub-id pub-id-type="doi">10.3390/s22041410</pub-id>
          <pub-id pub-id-type="pmid">35214312</pub-id>
          <pub-id pub-id-type="pmcid">PMC8962982</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B2">
        <label>2</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Guo</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Han</surname>
              <given-names>B</given-names>
            </name>
            <name>
              <surname>Huang</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Bearing fault diagnosis based on improved morlet wavelet transform and shallow residual neural network</article-title>
          <source>Appl Sci</source>
          <year>2024</year>
          <volume>14</volume>
          <fpage>4542</fpage>
          <pub-id pub-id-type="doi">10.3390/app14114542</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B3">
        <label>3</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Zhai</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Luo</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Rolling bearing fault diagnosis based on a synchrosqueezing wavelet transform and a transfer residual convolutional neural network</article-title>
          <source>Sensors</source>
          <year>2025</year>
          <volume>25</volume>
          <fpage>325</fpage>
          <pub-id pub-id-type="doi">10.3390/s25020325</pub-id>
          <pub-id pub-id-type="pmid">39860695</pub-id>
          <pub-id pub-id-type="pmcid">PMC11768241</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B4">
        <label>4</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Tan</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Wu</surname>
              <given-names>G</given-names>
            </name>
            <name>
              <surname>Qiu</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Fan</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Wan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Fault diagnosis of a mixed-flow pump under cavitation condition based on deep learning techniques</article-title>
          <source>Front Energy Res</source>
          <year>2023</year>
          <volume>10</volume>
          <fpage>1109214</fpage>
          <pub-id pub-id-type="doi">10.3389/fenrg.2022.1109214</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B5">
        <label>5</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Piltan</surname>
              <given-names>F</given-names>
            </name>
            <name>
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A novel image-based diagnosis method using improved DCGAN for rotating machinery</article-title>
          <source>Sensors</source>
          <year>2022</year>
          <volume>22</volume>
          <fpage>7534</fpage>
          <pub-id pub-id-type="doi">10.3390/s22197534</pub-id>
          <pub-id pub-id-type="pmid">36236633</pub-id>
          <pub-id pub-id-type="pmcid">PMC9570832</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B6">
        <label>6</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
            <name>
              <surname>Deng</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Lu</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Miao</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Fault diagnosis method for imbalanced data based on multi-signal fusion and improved deep convolution generative adversarial network</article-title>
          <source>Sensors</source>
          <year>2023</year>
          <volume>23</volume>
          <fpage>2542</fpage>
          <pub-id pub-id-type="doi">10.3390/s23052542</pub-id>
          <pub-id pub-id-type="pmid">36904745</pub-id>
          <pub-id pub-id-type="pmcid">PMC10007067</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B7">
        <label>7</label>
        <nlm-citation publication-type="confproc">
          <person-group person-group-type="author">
            <name>
              <surname>Lai</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Cheung</surname>
              <given-names>T</given-names>
            </name>
            <name>
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Xue</surname>
              <given-names>K</given-names>
            </name>
            <name>
              <surname>Fung</surname>
              <given-names>K</given-names>
            </name>
            <name>
              <surname>Lam</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <comment>Residual attention single-head vision transformer network for rolling bearing fault diagnosis in noisy environments. In Proceedings of the 2024 6th International Conference on Video, Signal and Image Processing; Ningbo Hainan, China. New York, NY, USA: ACM; 2024. pp. 136-50.</comment>
          <pub-id pub-id-type="doi">10.1145/3708568.3708591</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B8">
        <label>8</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name>
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name>
              <surname>Cheng</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Interpretable domain adaptation transformer: a transfer learning method for fault diagnosis of rotating machinery</article-title>
          <source>Struct Health Monit</source>
          <year>2024</year>
          <volume>24</volume>
          <fpage>1187</fpage>
          <lpage>200</lpage>
          <pub-id pub-id-type="doi">10.1177/14759217241249656</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B9">
        <label>9</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Hassannejad</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Ettefagh</surname>
              <given-names>MM</given-names>
            </name>
            <name>
              <surname>Bahrami Mossayebi</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Adaptive wavelet-based physics-informed CNN for bearing fault diagnosis</article-title>
          <source>Int J Progn Health Manag</source>
          <year>2025</year>
          <volume>16</volume>
          <fpage>4234</fpage>
          <pub-id pub-id-type="doi">10.36001/ijphm.2025.v16i1.4234</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B10">
        <label>10</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Deng</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name>
              <surname>Yao</surname>
              <given-names>C</given-names>
            </name>
            <name>
              <surname>Shao</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Hu</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A multi-scale sensor importance-aware attention fusion network and its applications in fault diagnosis of centrifugal pumps and axial piston pumps</article-title>
          <source>Measurement</source>
          <year>2026</year>
          <volume>258</volume>
          <fpage>119315</fpage>
          <pub-id pub-id-type="doi">10.1016/j.measurement.2025.119315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B11">
        <label>11</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Kim</surname>
              <given-names>AR</given-names>
            </name>
            <name>
              <surname>Seon</surname>
              <given-names>Kim H</given-names>
            </name>
            <name>
              <surname>Young</surname>
              <given-names>Kim S</given-names>
            </name>
          </person-group>
          <article-title>Transformer-based fault detection using pressure signals for hydraulic pumps</article-title>
          <source>IEEE Access</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>145795</fpage>
          <lpage>808</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2024.3472750</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B12">
        <label>12</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Zheng</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Dai</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A novel multistep wavelet convolutional transfer diagnostic framework for cross-machine bearing fault diagnosis</article-title>
          <source>Sensors</source>
          <year>2025</year>
          <volume>25</volume>
          <fpage>3141</fpage>
          <pub-id pub-id-type="doi">10.3390/s25103141</pub-id>
          <pub-id pub-id-type="pmid">40431934</pub-id>
          <pub-id pub-id-type="pmcid">PMC12115531</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B13">
        <label>13</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Pang</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Xie</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>M-net: a novel unsupervised domain adaptation framework based on multi-kernel maximum mean discrepancy for fault diagnosis of rotating machinery</article-title>
          <source>Complex Intell Syst</source>
          <year>2024</year>
          <volume>10</volume>
          <fpage>3259</fpage>
          <lpage>72</lpage>
          <pub-id pub-id-type="doi">10.1007/s40747-023-01320-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B14">
        <label>14</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Sun</surname>
              <given-names>K</given-names>
            </name>
            <name>
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Lu</surname>
              <given-names>N</given-names>
            </name>
            <name>
              <surname>Xia</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Han</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Joint discriminative adversarial domain adaptation for cross-domain fault diagnosis</article-title>
          <source>IEEE Trans Instrum Meas</source>
          <year>2023</year>
          <volume>72</volume>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1109/tim.2023.3317472</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B15">
        <label>15</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Li</surname>
              <given-names>G</given-names>
            </name>
            <name>
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
            <name>
              <surname>Wei</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Self-supervised learning for intelligent fault diagnosis of rotating machinery with limited labeled data</article-title>
          <source>Appl Acoust</source>
          <year>2022</year>
          <volume>191</volume>
          <fpage>108663</fpage>
          <pub-id pub-id-type="doi">10.1016/j.apacoust.2022.108663</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B16">
        <label>16</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Zhu</surname>
              <given-names>P</given-names>
            </name>
            <name>
              <surname>Ma</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Han</surname>
              <given-names>Q</given-names>
            </name>
            <name>
              <surname>Chu</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Deep contrastive transfer learning for rotating machinery fault diagnosis</article-title>
          <source>IEEE Trans Instrum Meas</source>
          <year>2025</year>
          <volume>74</volume>
          <fpage>1</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1109/tim.2024.3502723</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B17">
        <label>17</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name>
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Research on federated learning method for fault diagnosis in multiple working conditions</article-title>
          <source>Complex Eng Syst</source>
          <year>2021</year>
          <volume>1</volume>
          <fpage>7</fpage>
          <pub-id pub-id-type="doi">10.20517/ces.2021.08</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B18">
        <label>18</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Shao</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Xia</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Wan</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>De Silva</surname>
              <given-names>CW</given-names>
            </name>
          </person-group>
          <article-title>Modified stacked autoencoder using adaptive morlet wavelet for intelligent fault diagnosis of rotating machinery</article-title>
          <source>IEEE/ASME Trans Mechatron</source>
          <year>2022</year>
          <volume>27</volume>
          <fpage>24</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1109/tmech.2021.3058061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B19">
        <label>19</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Chow</surname>
              <given-names>TWS</given-names>
            </name>
            <name>
              <surname>Gu</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A balanced adversarial domain adaptation method for partial transfer intelligent fault diagnosis</article-title>
          <source>IEEE Trans Instrum Meas</source>
          <year>2022</year>
          <volume>71</volume>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1109/tim.2022.3214490</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B20">
        <label>20</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Huang</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>A contrastive learning-based fault diagnosis method for rotating machinery with limited and imbalanced labels</article-title>
          <source>IEEE Sensors J</source>
          <year>2023</year>
          <volume>23</volume>
          <fpage>16402</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1109/jsen.2023.3284044</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B21">
        <label>21</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Ren</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Zhou</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Feng</surname>
              <given-names>K</given-names>
            </name>
            <name>
              <surname>Yu</surname>
              <given-names>K</given-names>
            </name>
            <name>
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Supervised contrastive learning-based domain adaptation network for intelligent unsupervised fault diagnosis of rolling bearing</article-title>
          <source>IEEE/ASME Trans Mechatron</source>
          <year>2022</year>
          <volume>27</volume>
          <fpage>5371</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1109/tmech.2022.3179289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B22">
        <label>22</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Chen</surname>
              <given-names>N</given-names>
            </name>
            <name>
              <surname>Shen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A new multiple source domain adaptation fault diagnosis method between different rotating machines</article-title>
          <source>IEEE Trans Ind Inf</source>
          <year>2021</year>
          <volume>17</volume>
          <fpage>4788</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1109/tii.2020.3021406</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B23">
        <label>23</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Qiu</surname>
              <given-names>Q</given-names>
            </name>
            <name>
              <surname>Peng</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A prognosis-centered intelligent maintenance optimization framework under uncertain failure threshold</article-title>
          <source>IEEE Trans Rel</source>
          <year>2024</year>
          <volume>73</volume>
          <fpage>115</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1109/tr.2023.3273082</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B24">
        <label>24</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Tan</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Wei</surname>
              <given-names>F</given-names>
            </name>
            <name>
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Peng</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Xiao</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Systemic condition-based maintenance optimization under inspection uncertainties: a customized multiagent reinforcement learning approach</article-title>
          <source>IEEE Trans Rel</source>
          <year>2025</year>
          <volume>74</volume>
          <fpage>5848</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1109/tr.2025.3583769</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B25">
        <label>25</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Zhou</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Jia</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Dai</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Group machinery intelligent maintenance: Adaptive health prediction and global dynamic maintenance decision-making</article-title>
          <source>Reliab Eng Syst Saf</source>
          <year>2024</year>
          <volume>252</volume>
          <fpage>110426</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ress.2024.110426</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B26">
        <label>26</label>
        <nlm-citation publication-type="confproc">
          <person-group person-group-type="author">
            <name>
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name>
              <surname>Wu</surname>
              <given-names>B</given-names>
            </name>
            <name>
              <surname>Zhu</surname>
              <given-names>P</given-names>
            </name>
            <name>
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name>
              <surname>Zuo</surname>
              <given-names>W</given-names>
            </name>
            <name>
              <surname>Hu</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <comment>ECA-net: efficient channel attention for deep convolutional neural networks. In Proceedings of the 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR); 2020 Jun 13-19; Seattle, WA, USA. IEEE; 2020. pp. 11531-9.</comment>
          <pub-id pub-id-type="doi">10.1109/cvpr42600.2020.01155</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B27">
        <label>27</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Aburakhia</surname>
              <given-names>SA</given-names>
            </name>
            <name>
              <surname>Myers</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Shami</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A hybrid method for condition monitoring and fault diagnosis of rolling bearings with low system delay</article-title>
          <source>IEEE Trans Instrum Meas</source>
          <year>2022</year>
          <volume>71</volume>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1109/tim.2022.3198477</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B28">
        <label>28</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Dubaish</surname>
              <given-names>AA</given-names>
            </name>
            <name>
              <surname>Jaber</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Comparative analysis of SVM and ANN for machine condition monitoring and fault diagnosis in gearboxes</article-title>
          <source>Math Model Eng Probl</source>
          <year>2024</year>
          <volume>11</volume>
          <fpage>976</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.18280/mmep.110414</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B29">
        <label>29</label>
        <nlm-citation publication-type="confproc">
          <person-group person-group-type="author">
            <name>
              <surname>Cui</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Jia</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Lin</surname>
              <given-names>T</given-names>
            </name>
            <name>
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Belongie</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <comment>Class-balanced loss based on effective number of samples. In Proceedings of the 2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR); 2019 Jun 15-20; Long Beach, CA, USA. IEEE; 2019. pp. 9260-9.</comment>
          <pub-id pub-id-type="doi">10.1109/cvpr.2019.00949</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B30">
        <label>30</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Joloudari</surname>
              <given-names>JH</given-names>
            </name>
            <name>
              <surname>Marefat</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Nematollahi</surname>
              <given-names>MA</given-names>
            </name>
            <name>
              <surname>Oyelere</surname>
              <given-names>SS</given-names>
            </name>
            <name>
              <surname>Hussain</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Effective class-imbalance learning based on SMOTE and convolutional neural networks</article-title>
          <source>Appl Sci</source>
          <year>2023</year>
          <volume>13</volume>
          <fpage>4006</fpage>
          <pub-id pub-id-type="doi">10.3390/app13064006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B31">
        <label>31</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Pan</surname>
              <given-names>C</given-names>
            </name>
            <name>
              <surname>Shang</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Tang</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Cheng</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Open-set domain adaptive fault diagnosis based on supervised contrastive learning and a complementary weighted dual adversarial network</article-title>
          <source>Mech Syst Signal Process</source>
          <year>2025</year>
          <volume>222</volume>
          <fpage>111780</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ymssp.2024.111780</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B32">
        <label>32</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Xu</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Lee</surname>
              <given-names>CKM</given-names>
            </name>
            <name>
              <surname>Wong</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A novel fault diagnosis method based on deep stable learning for bearings with imbalanced data samples</article-title>
          <source>Expert Syst Appl</source>
          <year>2025</year>
          <volume>281</volume>
          <fpage>127634</fpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2025.127634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="B33">
        <label>33</label>
        <nlm-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name>
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name>
              <surname>Sun</surname>
              <given-names>G</given-names>
            </name>
            <name>
              <surname>Zhou</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Fault diagnosis of rotating machinery with limited expert interaction: a multicriteria active learning approach based on broad learning system</article-title>
          <source>IEEE Trans Contr Syst Technol</source>
          <year>2023</year>
          <volume>31</volume>
          <fpage>953</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1109/tcst.2022.3200214</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>