diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index eabb3248ea0085409ad7dc7a18e852f967f5252a..c1ec92ffb15093e58782807671652ac8e4292b61 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -430,6 +430,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS]
 #define TARGET_FUSE_ALU_AND_BRANCH \
 	ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH]
+#define TARGET_FUSE_MOV_AND_ALU \
+	ix86_tune_features[X86_TUNE_FUSE_MOV_AND_ALU]
 #define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
 #define TARGET_AVOID_LEA_FOR_ADDR \
 	ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index d77298b0e34dc403600078da62ed510d524fbb95..c6d5426ae8d383e693fa80745ad8b685d954c780 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -67,7 +67,6 @@ ix86_issue_rate (void)
     case PROCESSOR_ZNVER2:
     case PROCESSOR_ZNVER3:
     case PROCESSOR_ZNVER4:
-    case PROCESSOR_ZNVER5:
     case PROCESSOR_CORE2:
     case PROCESSOR_NEHALEM:
     case PROCESSOR_SANDYBRIDGE:
@@ -91,6 +90,13 @@ ix86_issue_rate (void)
       return 5;
 
     case PROCESSOR_SAPPHIRERAPIDS:
+    /* For znver5 decoder can handle 4 or 8 instructions per cycle,
+       op cache 12 instruction/cycle, dispatch 8 instructions
+       integer rename 8 instructions and Fp 6 instructions.
+
+       The scheduler, without understanding out of order nature of the CPU
+       is unlikely going to be able to fill all of these.  */
+    case PROCESSOR_ZNVER5:
       return 6;
 
     default:
@@ -434,6 +440,8 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
 	  enum attr_unit unit = get_attr_unit (insn);
 	  int loadcost;
 
+	  /* TODO: On znver5 complex addressing modes have
+	     greater latency.  */
 	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
 	    loadcost = 4;
 	  else
@@ -563,6 +571,60 @@ ix86_macro_fusion_p ()
   return TARGET_FUSE_CMP_AND_BRANCH;
 }
 
+static bool
+ix86_fuse_mov_alu_p (rtx_insn *mov, rtx_insn *alu)
+{
+  /* Validate mov:
+      - It should be reg-reg move with opcode 0x89 or 0x8B.  */
+  rtx set1 = PATTERN (mov);
+  if (GET_CODE (set1) != SET
+      || !GENERAL_REG_P (SET_SRC (set1))
+      || !GENERAL_REG_P (SET_DEST (set1)))
+    return false;
+  rtx reg = SET_DEST (set1);
+  /*  - it should have 0x89 or 0x8B opcode.  */
+  if (!INTEGRAL_MODE_P (GET_MODE (reg))
+      || GET_MODE_SIZE (GET_MODE (reg)) < 2
+      || GET_MODE_SIZE (GET_MODE (reg)) > 8)
+    return false;
+  /* Validate ALU.  */
+  if (GET_CODE (PATTERN (alu)) != PARALLEL)
+    return false;
+  rtx set2 = XVECEXP (PATTERN (alu), 0, 0);
+  if (GET_CODE (set2) != SET)
+    return false;
+  /* Match one of:
+     ADD ADC AND XOR OR SUB SBB INC DEC NOT SAL SHL SHR SAR
+     We also may add insn attribute to handle some of sporadic
+     case we output those with different RTX expressions.  */
+
+  if (GET_CODE (SET_SRC (set2)) != PLUS
+      && GET_CODE (SET_SRC (set2)) != MINUS
+      && GET_CODE (SET_SRC (set2)) != XOR
+      && GET_CODE (SET_SRC (set2)) != AND
+      && GET_CODE (SET_SRC (set2)) != IOR
+      && GET_CODE (SET_SRC (set2)) != NOT
+      && GET_CODE (SET_SRC (set2)) != ASHIFT
+      && GET_CODE (SET_SRC (set2)) != ASHIFTRT
+      && GET_CODE (SET_SRC (set2)) != LSHIFTRT)
+    return false;
+  rtx op0 = XEXP (SET_SRC (set2), 0);
+  rtx op1 = GET_CODE (SET_SRC (set2)) != NOT ? XEXP (SET_SRC (set2), 1) : NULL;
+  /* One of operands should be register.  */
+  if (op1 && (!REG_P (op0) || REGNO (op0) != REGNO (reg)))
+    std::swap (op0, op1);
+  if (!REG_P (op0) || REGNO (op1) != REGNO (reg))
+    return false;
+  if (op1
+      && !REG_P (op1)
+      && !x86_64_immediate_operand (op1, VOIDmode))
+    return false;
+  /* Only one of two paramters must be move destination.  */
+  if (op1 && REG_P (op1) && REGNO (op1) == REGNO (reg))
+    return false;
+  return true;
+}
+
 /* Check whether current microarchitecture support macro fusion
    for insn pair "CONDGEN + CONDJMP". Refer to
    "Intel Architectures Optimization Reference Manual". */
@@ -570,6 +632,9 @@ ix86_macro_fusion_p ()
 bool
 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
 {
+  if (TARGET_FUSE_MOV_AND_ALU
+      && ix86_fuse_mov_alu_p (condgen, condjmp))
+    return true;
   rtx src, dest;
   enum rtx_code ccode;
   rtx compare_set = NULL_RTX, test_if, cond;
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index ed26136faee52c9ea0f54fdb7f039091a0c791f9..d7e2ad7fd250b71a494feb6e3c910b9910276b0e 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -143,10 +143,17 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
 
 /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
    jump instruction when the alu instruction produces the CCFLAG consumed by
-   the conditional jump instruction. */
+   the conditional jump instruction.
+
+   TODO: znver5 supports fusing with SUB, ADD, INC, DEC, OR, AND,
+   There is also limitation for immediate and displacement supported.  */
 DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
-		  m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC)
+	  m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC | m_ZNVER5)
 
+/* X86_TUNE_FUSE_MOV_AND_ALU: mov and alu in case mov is reg-reg mov
+   and the destination is used by alu.  alu must be one of
+   ADD, ADC, AND, XOR, OR, SUB, SBB, INC, DEC, NOT, SAL, SHL, SHR, SAR.  */
+DEF_TUNE (X86_TUNE_FUSE_MOV_AND_ALU, "fuse_mov_and_alu", m_ZNVER5)
 
 /*****************************************************************************/
 /* Function prologue, epilogue and function calling sequences.               */