From 5dace4bf90ee311424ac625f9c1e2a9693df1ba9 Mon Sep 17 00:00:00 2001
From: Thomas Koenig <tkoenig@gcc.gnu.org>
Date: Sun, 18 Jun 2017 18:04:19 +0000
Subject: [PATCH] re PR fortran/52473 (CSHIFT slow - inline it?)

2017-06-18  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/52473
	* m4/cshift0.m4:  For arrays that are contiguous up to
	shift, implement blocked algorighm for cshift.
	* generated/cshift0_c10.c:  Regenerated.
	* generated/cshift0_c16.c:  Regenerated.
	* generated/cshift0_c4.c:  Regenerated.
	* generated/cshift0_c8.c:  Regenerated.
	* generated/cshift0_i1.c:  Regenerated.
	* generated/cshift0_i16.c:  Regenerated.
	* generated/cshift0_i2.c:  Regenerated.
	* generated/cshift0_i4.c:  Regenerated.
	* generated/cshift0_i8.c:  Regenerated.
	* generated/cshift0_r10.c:  Regenerated.
	* generated/cshift0_r16.c:  Regenerated.
	* generated/cshift0_r4.c:  Regenerated.
	* generated/cshift0_r8.c:  Regenerated.

2017-06-18  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/52473
	* gfortran.dg/cshift_1.f90:  New test.

From-SVN: r249350
---
 gcc/testsuite/ChangeLog                |   5 ++
 gcc/testsuite/gfortran.dg/cshift_1.f90 | 108 +++++++++++++++++++++++
 libgfortran/ChangeLog                  |  19 ++++
 libgfortran/generated/cshift0_c10.c    | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_c16.c    | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_c4.c     | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_c8.c     | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_i1.c     | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_i16.c    | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_i2.c     | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_i4.c     | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_i8.c     | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_r10.c    | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_r16.c    | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_r4.c     | 117 ++++++++++++++++++++-----
 libgfortran/generated/cshift0_r8.c     | 117 ++++++++++++++++++++-----
 libgfortran/m4/cshift0.m4              | 117 ++++++++++++++++++++-----
 17 files changed, 1434 insertions(+), 336 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/cshift_1.f90

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ea83e353d8cd..9200ee89f86f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-06-18  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+	PR fortran/52473
+	* gfortran.dg/cshift_1.f90:  New test.
+
 2017-06-17  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	Remove dg-skip-if, dg-xfail-if, dg-xfail-run-if default args.
diff --git a/gcc/testsuite/gfortran.dg/cshift_1.f90 b/gcc/testsuite/gfortran.dg/cshift_1.f90
new file mode 100644
index 000000000000..e2024ea99ddd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cshift_1.f90
@@ -0,0 +1,108 @@
+! { dg-do  run }
+! Take cshift through its paces to make sure no boundary
+! cases are wrong.
+
+module kinds
+  integer, parameter :: sp = selected_real_kind(6) ! Single precision
+end module kinds
+
+module replacements
+  use kinds
+contains
+  subroutine cshift_sp_3_v1 (array, shift, dim, res)
+    integer, parameter :: wp = sp
+    real(kind=wp), dimension(:,:,:), intent(in) :: array
+    integer, intent(in) :: shift, dim
+    real(kind=wp), dimension(:,:,:), intent(out) :: res
+    integer :: i,j,k
+    integer :: sh, rsh
+    integer :: n
+    integer :: n2, n3
+    res = 0
+    n3 = size(array,3)
+    n2 = size(array,2)
+    n1 = size(array,1)
+    if (dim == 1) then
+       n = n1
+       sh = modulo(shift, n)
+       rsh = n - sh
+       do k=1, n3
+          do j=1, n2
+             do i=1, rsh
+                res(i,j,k) = array(i+sh,j,k)
+             end do
+             do i=rsh+1,n
+                res(i,j,k) = array(i-rsh,j,k)
+             end do
+          end do
+       end do
+    else if (dim == 2) then
+       n = n2
+       sh = modulo(shift,n)
+       rsh = n - sh
+       do k=1, n3
+          do j=1, rsh
+             do i=1, n1
+                res(i,j,k) = array(i,j+sh, k)
+             end do
+          end do
+          do j=rsh+1, n
+             do i=1, n1
+                res(i,j,k) = array(i,j-rsh, k)
+             end do
+          end do
+       end do
+    else if (dim == 3) then
+       n = n3
+       sh = modulo(shift, n)
+       rsh = n - sh
+       do k=1, rsh
+          do j=1, n2
+             do i=1, n1
+                res(i,j,k) = array(i, j, k+sh)
+             end do
+          end do
+       end do
+       do k=rsh+1, n
+          do j=1, n2
+             do i=1, n1
+                res(i,j, k) = array(i, j, k-rsh)
+             end do
+          end do          
+       end do
+    else
+       stop "Wrong argument to dim"
+    end if
+  end subroutine cshift_sp_3_v1
+end module replacements
+
+program testme
+  use kinds
+  use replacements
+  implicit none
+  integer, parameter :: wp = sp  ! Working precision
+  INTEGER, PARAMETER :: n = 7
+  real(kind=wp), dimension(:,:,:), allocatable :: a,b,c
+  integer i, j, k
+  real:: t1, t2
+  integer, parameter :: nrep = 20
+
+  allocate (a(n,n,n), b(n,n,n),c(n,n,n))
+  call random_number(a)
+  do k = 1,3
+   do i=-3,3,2
+     call cshift_sp_3_v1 (a, i, k, b)
+     c = cshift(a,i,k)
+     if (any (c /= b)) call abort
+   end do
+  end do
+  deallocate (b,c)
+  allocate (b(n-1,n-1,n-1),c(n-1,n-1,n-1))
+  do k=1,3
+     do i=-3,3,2
+        call cshift_sp_3_v1 (a(1:n-1,1:n-1,1:n-1), i, k, b)
+        c = cshift(a(1:n-1,1:n-1,1:n-1), i, k)
+        if (any (c /= b)) call abort
+     end do
+  end do
+end program testme
diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog
index 1f5a0264172c..4f7b79dc12a2 100644
--- a/libgfortran/ChangeLog
+++ b/libgfortran/ChangeLog
@@ -1,3 +1,22 @@
+2017-06-18  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+	PR fortran/52473
+	* m4/cshift0.m4:  For arrays that are contiguous up to
+	shift, implement blocked algorighm for cshift.
+	* generated/cshift0_c10.c:  Regenerated.
+	* generated/cshift0_c16.c:  Regenerated.
+	* generated/cshift0_c4.c:  Regenerated.
+	* generated/cshift0_c8.c:  Regenerated.
+	* generated/cshift0_i1.c:  Regenerated.
+	* generated/cshift0_i16.c:  Regenerated.
+	* generated/cshift0_i2.c:  Regenerated.
+	* generated/cshift0_i4.c:  Regenerated.
+	* generated/cshift0_i8.c:  Regenerated.
+	* generated/cshift0_r10.c:  Regenerated.
+	* generated/cshift0_r16.c:  Regenerated.
+	* generated/cshift0_r4.c:  Regenerated.
+	* generated/cshift0_r8.c:  Regenerated.
+
 2017-06-06  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
 	PR fortran/80975
diff --git a/libgfortran/generated/cshift0_c10.c b/libgfortran/generated/cshift0_c10.c
index c123d66d1aa5..120ea91bea76 100644
--- a/libgfortran/generated/cshift0_c10.c
+++ b/libgfortran/generated/cshift0_c10.c
@@ -51,6 +51,9 @@ cshift0_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_c16.c b/libgfortran/generated/cshift0_c16.c
index 7a80006b44ea..821cf29c2d62 100644
--- a/libgfortran/generated/cshift0_c16.c
+++ b/libgfortran/generated/cshift0_c16.c
@@ -51,6 +51,9 @@ cshift0_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_c4.c b/libgfortran/generated/cshift0_c4.c
index 92f7099b893d..83c751e27870 100644
--- a/libgfortran/generated/cshift0_c4.c
+++ b/libgfortran/generated/cshift0_c4.c
@@ -51,6 +51,9 @@ cshift0_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_c8.c b/libgfortran/generated/cshift0_c8.c
index 995ea90bd5c7..920f05735ad2 100644
--- a/libgfortran/generated/cshift0_c8.c
+++ b/libgfortran/generated/cshift0_c8.c
@@ -51,6 +51,9 @@ cshift0_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_i1.c b/libgfortran/generated/cshift0_i1.c
index afb91136cd1b..320134da2218 100644
--- a/libgfortran/generated/cshift0_i1.c
+++ b/libgfortran/generated/cshift0_i1.c
@@ -51,6 +51,9 @@ cshift0_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_i16.c b/libgfortran/generated/cshift0_i16.c
index aa0384268cf6..ca1fa3b82539 100644
--- a/libgfortran/generated/cshift0_i16.c
+++ b/libgfortran/generated/cshift0_i16.c
@@ -51,6 +51,9 @@ cshift0_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_i2.c b/libgfortran/generated/cshift0_i2.c
index c5c7d9e41d30..841ae38292b0 100644
--- a/libgfortran/generated/cshift0_i2.c
+++ b/libgfortran/generated/cshift0_i2.c
@@ -51,6 +51,9 @@ cshift0_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_i4.c b/libgfortran/generated/cshift0_i4.c
index 22bd33426f92..87713655d6a5 100644
--- a/libgfortran/generated/cshift0_i4.c
+++ b/libgfortran/generated/cshift0_i4.c
@@ -51,6 +51,9 @@ cshift0_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_i8.c b/libgfortran/generated/cshift0_i8.c
index 5d40cc708c42..7c55358fee8a 100644
--- a/libgfortran/generated/cshift0_i8.c
+++ b/libgfortran/generated/cshift0_i8.c
@@ -51,6 +51,9 @@ cshift0_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_r10.c b/libgfortran/generated/cshift0_r10.c
index 4f2feae68e26..c2f5ca687cac 100644
--- a/libgfortran/generated/cshift0_r10.c
+++ b/libgfortran/generated/cshift0_r10.c
@@ -51,6 +51,9 @@ cshift0_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_r16.c b/libgfortran/generated/cshift0_r16.c
index d18a207e430c..abbea68a4f85 100644
--- a/libgfortran/generated/cshift0_r16.c
+++ b/libgfortran/generated/cshift0_r16.c
@@ -51,6 +51,9 @@ cshift0_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_r4.c b/libgfortran/generated/cshift0_r4.c
index 5b9e3ae7e7d9..078856609644 100644
--- a/libgfortran/generated/cshift0_r4.c
+++ b/libgfortran/generated/cshift0_r4.c
@@ -51,6 +51,9 @@ cshift0_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/generated/cshift0_r8.c b/libgfortran/generated/cshift0_r8.c
index eacc8cfc3409..13e3a5c02d43 100644
--- a/libgfortran/generated/cshift0_r8.c
+++ b/libgfortran/generated/cshift0_r8.c
@@ -51,6 +51,9 @@ cshift0_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -63,33 +66,99 @@ cshift0_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
diff --git a/libgfortran/m4/cshift0.m4 b/libgfortran/m4/cshift0.m4
index 3a824409682a..04459d72195d 100644
--- a/libgfortran/m4/cshift0.m4
+++ b/libgfortran/m4/cshift0.m4
@@ -52,6 +52,9 @@ cshift0_'rtype_code` ('rtype` *ret, const 'rtype` *array, ptrdiff_t shift,
   index_type len;
   index_type n;
 
+  bool do_blocked;
+  index_type r_ex, a_ex;
+
   which = which - 1;
   sstride[0] = 0;
   rstride[0] = 0;
@@ -64,33 +67,99 @@ cshift0_'rtype_code` ('rtype` *ret, const 'rtype` *array, ptrdiff_t shift,
   soffset = 1;
   len = 0;
 
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+  r_ex = 1;
+  a_ex = 1;
+
+  if (which > 0)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          if (roffset == 0)
-            roffset = 1;
-          soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
-          if (soffset == 0)
-            soffset = 1;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
-          n++;
-        }
+      /* Test if both ret and array are contiguous.  */
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
+  n = 0;
+
+  if (do_blocked)
+    {
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = cshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = cshift(a,sh*n1*n2,1)
+
+	 we can used a more blocked algorithm for dim>1.  */
+      sstride[0] = 1;
+      rstride[0] = 1;
+      roffset = 1;
+      soffset = 1;
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);      
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	  n++;
+	}
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
+    }
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      if (roffset == 0)
+		roffset = 1;
+	      soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      if (soffset == 0)
+		soffset = 1;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+	      n++;
+	    }
+	}
+      if (sstride[0] == 0)
+	sstride[0] = 1;
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+
+      dim = GFC_DESCRIPTOR_RANK (array);
     }
-  if (sstride[0] == 0)
-    sstride[0] = 1;
-  if (rstride[0] == 0)
-    rstride[0] = 1;
 
-  dim = GFC_DESCRIPTOR_RANK (array);
   rstride0 = rstride[0];
   sstride0 = sstride[0];
   rptr = ret->base_addr;
-- 
GitLab