diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0670beb92271ef33f2765195b1cf2ab103dfcd4e..049fdb07fc365b14775e7213218ccc33ba1b0edd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2017-01-17  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	* config/rs6000/altivec.h (vec_rlmi): New #define.
+	(vec_vrlnm): Likewise.
+	(vec_rlnm): Likewise.
+	* config/rs6000/altivec.md (UNSPEC_VRLMI): New UNSPEC enum value.
+	(UNSPEC_VRLNM): Likewise.
+	(VIlong): New mode iterator.
+	(altivec_vrl<VI_char>mi): New define_insn.
+	(altivec_vrl<VI_char>nm): Likewise.
+	* config/rs6000/rs6000-builtin.def (VRLWNM): New monomorphic
+	function entry.
+	(VRLDNM): Likewise.
+	(RLNM): New polymorphic function entry.
+	(VRLWMI): New monomorphic function entry.
+	(VRLDMI): Likewise.
+	(RLMI): New polymorphic function entry.
+	* config/rs6000/r6000-c.c (altivec_overloaded_builtin_table): Add
+	new entries for P9V_BUILTIN_VEC_RLMI and P9V_BUILTIN_VEC_RLNM.
+	* doc/extend.texi: Add description of vec_rlmi, vec_rlnm, and
+	vec_vrlnm.
+
 2017-01-17  Jakub Jelinek  <jakub@redhat.com>
 
 	PR debug/78839
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 17bc33e9de3d71c6683a809bca69039bbb459431..790298dc41566a0ace6417be738eb424ad21f888 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -168,6 +168,9 @@
 #define vec_re __builtin_vec_re
 #define vec_round __builtin_vec_round
 #define vec_recipdiv __builtin_vec_recipdiv
+#define vec_rlmi __builtin_vec_rlmi
+#define vec_vrlnm __builtin_vec_rlnm
+#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c)))
 #define vec_rsqrt __builtin_vec_rsqrt
 #define vec_rsqrte __builtin_vec_rsqrte
 #define vec_vsubfp __builtin_vec_vsubfp
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2a26007c163b28bc5f0529bcdfcc742dd70e3cda..746cddeedcc548b8f2aa803a7bd2c27f869dfe33 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -156,6 +156,8 @@
    UNSPEC_CMPRB
    UNSPEC_CMPRB2
    UNSPEC_CMPEQB
+   UNSPEC_VRLMI
+   UNSPEC_VRLNM
 ])
 
 (define_c_enum "unspecv"
@@ -168,8 +170,10 @@
 
 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
-;; Short vec in modes
+;; Short vec int modes
 (define_mode_iterator VIshort [V8HI V16QI])
+;; Longer vec int modes for rotate/mask ops
+(define_mode_iterator VIlong [V2DI V4SI])
 ;; Vec float modes
 (define_mode_iterator VF [V4SF])
 ;; Vec modes, pity mode iterators are not composable
@@ -1627,6 +1631,25 @@
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vrl<VI_char>mi"
+  [(set (match_operand:VIlong 0 "register_operand" "=v")
+        (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0")
+	                (match_operand:VIlong 2 "register_operand" "v")
+		        (match_operand:VIlong 3 "register_operand" "v")]
+		       UNSPEC_VRLMI))]
+  "TARGET_P9_VECTOR"
+  "vrl<VI_char>mi %0,%2,%3"
+  [(set_attr "type" "veclogical")])
+
+(define_insn "altivec_vrl<VI_char>nm"
+  [(set (match_operand:VIlong 0 "register_operand" "=v")
+        (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v")
+		        (match_operand:VIlong 2 "register_operand" "v")]
+		       UNSPEC_VRLNM))]
+  "TARGET_P9_VECTOR"
+  "vrl<VI_char>nm %0,%1,%2"
+  [(set_attr "type" "veclogical")])
+
 (define_insn "altivec_vsl"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 1cdf9a87959f6b2fe36929eea6ad6dcc071ba6b6..f7085f87c85b21320d8a810d75fd72bb79e24b5a 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1927,12 +1927,22 @@ BU_P9V_OVERLOAD_2 (VSRV,	"vsrv")
 BU_P9V_AV_2 (VADUB,		"vadub",		CONST,  vaduv16qi3)
 BU_P9V_AV_2 (VADUH,		"vaduh",		CONST,  vaduv8hi3)
 BU_P9V_AV_2 (VADUW,		"vaduw",		CONST,  vaduv4si3)
+BU_P9V_AV_2 (VRLWNM,		"vrlwnm",		CONST,	altivec_vrlwnm)
+BU_P9V_AV_2 (VRLDNM,		"vrldnm",		CONST,	altivec_vrldnm)
 
 /* ISA 3.0 vector overloaded 2 argument functions. */
 BU_P9V_OVERLOAD_2 (VADU,	"vadu")
 BU_P9V_OVERLOAD_2 (VADUB,	"vadub")
 BU_P9V_OVERLOAD_2 (VADUH,	"vaduh")
 BU_P9V_OVERLOAD_2 (VADUW,	"vaduw")
+BU_P9V_OVERLOAD_2 (RLNM,	"rlnm")
+
+/* ISA 3.0 3-argument vector functions.  */
+BU_P9V_AV_3 (VRLWMI,		"vrlwmi",		CONST,	altivec_vrlwmi)
+BU_P9V_AV_3 (VRLDMI,		"vrldmi",		CONST,	altivec_vrldmi)
+
+/* ISA 3.0 vector overloaded 3-argument functions.  */
+BU_P9V_OVERLOAD_3 (RLMI,	"rlmi")
 
 /* 1 argument vsx scalar functions added in ISA 3.0 (power9).  */
 BU_P9V_64BIT_VSX_1 (VSEEDP,	"scalar_extract_exp",	CONST,	xsxexpdp)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 3bc1f738f5089df6e7dc19be39d1f28aa6f6a047..a1da94e95ad92b5a0743cc990b1f4eebc7ef18d8 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2214,6 +2214,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLWMI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index f8864eb502bb4577e352edc8ed9eb0107a8c28d3..7e98397b344237986a52705a78a67334077c9b01 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18187,6 +18187,43 @@ If any of the enabled test conditions is true, the corresponding entry
 in the result vector is -1.  Otherwise (all of the enabled test
 conditions are false), the corresponding entry of the result vector is 0.
 
+The following built-in functions are available for the PowerPC family
+of processors, starting with ISA 3.0 or later (@option{-mcpu=power9}):
+@smallexample
+vector unsigned int vec_rlmi (vector unsigned int, vector unsigned int,
+                              vector unsigned int);
+vector unsigned long long vec_rlmi (vector unsigned long long,
+                                    vector unsigned long long,
+                                    vector unsigned long long);
+vector unsigned int vec_rlnm (vector unsigned int, vector unsigned int,
+                              vector unsigned int);
+vector unsigned long long vec_rlnm (vector unsigned long long,
+                                    vector unsigned long long,
+                                    vector unsigned long long);
+vector unsigned int vec_vrlnm (vector unsigned int, vector unsigned int);
+vector unsigned long long vec_vrlnm (vector unsigned long long,
+                                     vector unsigned long long);
+@end smallexample
+
+The result of @code{vec_rlmi} is obtained by rotating each element of
+the first argument vector left and inserting it under mask into the
+second argument vector.  The third argument vector contains the mask
+beginning in bits 11:15, the mask end in bits 19:23, and the shift
+count in bits 27:31, of each element.
+
+The result of @code{vec_rlnm} is obtained by rotating each element of
+the first argument vector left and ANDing it with a mask specified by
+the second and third argument vectors.  The second argument vector
+contains the shift count for each element in the low-order byte.  The
+third argument vector contains the mask end for each element in the
+low-order byte, with the mask begin in the next higher byte.
+
+The result of @code{vec_vrlnm} is obtained by rotating each element
+of the first argument vector left and ANDing it with a mask.  The
+second argument vector contains the mask  beginning in bits 11:15,
+the mask end in bits 19:23, and the shift count in bits 27:31,
+of each element.
+
 If the cryptographic instructions are enabled (@option{-mcrypto} or
 @option{-mcpu=power8}), the following builtins are enabled.
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8697c8a1f3808770aa421ac42242dbe6b88f8925..7cdf31c73d39303ded560ac5c4b1fb014c9f2a85 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2017-01-17  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	* vec-rlmi-rlnm.c: New file.
+
 2017-01-17  Nathan Sidwell  <nathan@acm.org>
 
 	PR c++/61636
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c b/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c
new file mode 100644
index 0000000000000000000000000000000000000000..c4f791dbba6f81ab2a86a20aa1877b3209549167
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c
@@ -0,0 +1,69 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+#include <altivec.h>
+
+vector unsigned int
+rlmi_test_1 (vector unsigned int x, vector unsigned int y,
+	     vector unsigned int z)
+{
+  return vec_rlmi (x, y, z);
+}
+
+vector unsigned long long
+rlmi_test_2 (vector unsigned long long x, vector unsigned long long y,
+	     vector unsigned long long z)
+{
+  return vec_rlmi (x, y, z);
+}
+
+vector unsigned int
+vrlnm_test_1 (vector unsigned int x, vector unsigned int y)
+{
+  return vec_vrlnm (x, y);
+}
+
+vector unsigned long long
+vrlnm_test_2 (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_vrlnm (x, y);
+}
+
+vector unsigned int
+rlnm_test_1 (vector unsigned int x, vector unsigned int y,
+	     vector unsigned int z)
+{
+  return vec_rlnm (x, y, z);
+}
+
+vector unsigned long long
+rlnm_test_2 (vector unsigned long long x, vector unsigned long long y,
+	     vector unsigned long long z)
+{
+  return vec_rlnm (x, y, z);
+}
+
+/* Expected code generation for rlmi_test_1 is vrlwmi.
+   Expected code generation for rlmi_test_2 is vrldmi.
+   Expected code generation for vrlnm_test_1 is vrlwnm.
+   Expected code generation for vrlnm_test_2 is vrldnm.
+   Expected code generation for the others is more complex, because
+   the second and third arguments are combined by a shift and OR,
+   and because there is no splat-immediate doubleword.
+    - For rlnm_test_1: vspltisw, vslw, xxlor, vrlwnm.
+    - For rlnm_test_2: xxspltib, vextsb2d, vsld, xxlor, vrldnm.
+   There is a choice of splat instructions in both cases, so we
+   just check for "splt".  */
+
+/* { dg-final { scan-assembler-times "vrlwmi" 1 } } */
+/* { dg-final { scan-assembler-times "vrldmi" 1 } } */
+/* { dg-final { scan-assembler-times "splt" 2 } } */
+/* { dg-final { scan-assembler-times "vextsb2d" 1 } } */
+/* { dg-final { scan-assembler-times "vslw" 1 } } */
+/* { dg-final { scan-assembler-times "vsld" 1 } } */
+/* { dg-final { scan-assembler-times "xxlor" 2 } } */
+/* { dg-final { scan-assembler-times "vrlwnm" 2 } } */
+/* { dg-final { scan-assembler-times "vrldnm" 2 } } */