diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 771c337ecf89ada965c79e7a1151e95d9c6ff751..14d3d4d6ecb5c5aafce858f14d01ab6c10c60199 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,29 @@
+2012-01-10  Georg-Johann Lay  <avr@gjlay.de>
+
+	PR target/49868
+	Extend __pgmx semantics to linearize memory.
+	* config/avr/avr.md (mov<mode>): Use avr_xload_libgcc_p to
+	determine if code comes inline or from libgcc.
+	(MOVMEM_r_d:HI): Add "w" to constraint for better preference.
+	(movmem_qi, movmem_qi): Set constraint #2 to "n".
+	(movmem_qi_elpm, movmem_hi_elpm): Remove insns.
+	(movmemx_qi, movmemx_hi): New insns.
+	(xload_<mode>_libgcc): Rewrite to new insn condition.
+	(xload_<mode>): Remove insns.
+	* config/avr/avr.c (avr_out_xload): Rewrite: Only need to handle
+	cases that don't satisfy avr_xload_libgcc_p().
+	(avr_addr_space_convert): Allow converting in any direction.
+	(avr_addr_space_subset_p): Return always true.
+	(avr_xload_libgcc_p): Rewrite to fit new __pgmx semantics.
+	(avr_emit_movmemhi): Ditto.
+	(avr_out_lpm): No need to handle ADDR_SPACE_PGMX any more.
+	(avr_out_movmem): Ditto.
+	(AVR_SYMBOL_FLAG_PROGMEM): New macro.
+	(AVR_SYMBOL_SET_ADDR_SPACE): New macro.
+	(AVR_SYMBOL_GET_ADDR_SPACE): New macro.
+	(avr_encode_section_info): Encode 'progmem' in symbol flags.
+	(output_reload_in_const): Don't zero-extend any 24-bit symbols.
+
 2012-01-10  Richard Guenther  <rguenther@suse.de>
 
 	PR tree-optimization/50913
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index a62835c093bfbffda4ec2ce6496265c2344df0e2..8d0a57fd1df67b915baa273fe63e04155ce1a7d1 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -62,6 +62,22 @@
    This must be the rightmost field of machine dependent section flags.  */
 #define AVR_SECTION_PROGMEM (0xf * SECTION_MACH_DEP)
 
+/* Similar 4-bit region for SYMBOL_REF_FLAGS.  */
+#define AVR_SYMBOL_FLAG_PROGMEM (0xf * SYMBOL_FLAG_MACH_DEP)
+
+/* Similar 4-bit region in SYMBOL_REF_FLAGS:
+   Set address-space AS in SYMBOL_REF_FLAGS of SYM  */
+#define AVR_SYMBOL_SET_ADDR_SPACE(SYM,AS)                       \
+  do {                                                          \
+    SYMBOL_REF_FLAGS (sym) &= ~AVR_SYMBOL_FLAG_PROGMEM;         \
+    SYMBOL_REF_FLAGS (sym) |= (AS) * SYMBOL_FLAG_MACH_DEP;      \
+  } while (0)
+
+/* Read address-space from SYMBOL_REF_FLAGS of SYM  */
+#define AVR_SYMBOL_GET_ADDR_SPACE(SYM)                          \
+  ((SYMBOL_REF_FLAGS (sym) & AVR_SYMBOL_FLAG_PROGMEM)           \
+   / SYMBOL_FLAG_MACH_DEP)
+
 /* Known address spaces.  The order must be the same as in the respective
    enum from avr.h (or designated initialized must be used).  */
 const avr_addrspace_t avr_addrspace[] =
@@ -2427,8 +2443,7 @@ avr_xload_libgcc_p (enum machine_mode mode)
   int n_bytes = GET_MODE_SIZE (mode);
   
   return (n_bytes > 1
-          && avr_current_arch->n_segments > 1
-          && !AVR_HAVE_ELPMX);
+          || avr_current_arch->n_segments > 1);
 }
 
 
@@ -2597,8 +2612,7 @@ avr_out_lpm (rtx insn, rtx *op, int *plen)
   int regno_dest;
   int segment;
   RTX_CODE code;
-  
-  addr_space_t as;
+  addr_space_t as = MEM_ADDR_SPACE (src);
 
   if (plen)
     *plen = 0;
@@ -2611,24 +2625,11 @@ avr_out_lpm (rtx insn, rtx *op, int *plen)
       return "";
     }
 
-  as = MEM_ADDR_SPACE (src);
-
   addr = XEXP (src, 0);
   code = GET_CODE (addr);
 
   gcc_assert (REG_P (dest));
-  
-  if (as == ADDR_SPACE_PGMX)
-    {
-      /* We are called from avr_out_xload because someone wrote
-         __pgmx on a device with just one flash segment.  */
-
-      gcc_assert (LO_SUM == code);
-
-      addr = XEXP (addr, 1);
-    }
-  else
-    gcc_assert (REG == code || POST_INC == code);
+  gcc_assert (REG == code || POST_INC == code);
 
   xop[0] = dest;
   xop[1] = addr;
@@ -2766,76 +2767,28 @@ avr_out_lpm (rtx insn, rtx *op, int *plen)
 }
 
 
-/* Worker function for xload_<mode> and xload_8 insns.  */
+/* Worker function for xload_8 insn.  */
 
 const char*
-avr_out_xload (rtx insn, rtx *op, int *plen)
+avr_out_xload (rtx insn ATTRIBUTE_UNUSED, rtx *op, int *plen)
 {
-  rtx xop[5];
-  rtx reg = op[0];
-  int n_bytes = GET_MODE_SIZE (GET_MODE (reg));
-  unsigned int regno = REGNO (reg);
-
-  if (avr_current_arch->n_segments == 1)
-    return avr_out_lpm (insn, op, plen);
+  rtx xop[4];
 
-  xop[0] = reg;
+  xop[0] = op[0];
   xop[1] = op[1];
   xop[2] = lpm_addr_reg_rtx;
-  xop[3] = lpm_reg_rtx;
-  xop[4] = tmp_reg_rtx;
-  
-  avr_asm_len ("out __RAMPZ__,%1", xop, plen, -1);
-  
-  if (1 == n_bytes)
-    {
-      if (AVR_HAVE_ELPMX)
-        return avr_asm_len ("elpm %0,%a2", xop, plen, 1);
-      else
-        return avr_asm_len ("elpm" CR_TAB
-                            "mov %0,%3", xop, plen, 2);
-    }
-
-  gcc_assert (AVR_HAVE_ELPMX);
-  
-  if (!reg_overlap_mentioned_p (reg, lpm_addr_reg_rtx))
-    {
-      /* Insn clobbers the Z-register so we can use post-increment.  */
-      
-      avr_asm_len                    ("elpm %A0,%a2+", xop, plen, 1);
-      if (n_bytes >= 2)  avr_asm_len ("elpm %B0,%a2+", xop, plen, 1);
-      if (n_bytes >= 3)  avr_asm_len ("elpm %C0,%a2+", xop, plen, 1);
-      if (n_bytes >= 4)  avr_asm_len ("elpm %D0,%a2+", xop, plen, 1);
-
-      return "";
-    }
+  xop[3] = AVR_HAVE_LPMX ? op[0] : lpm_reg_rtx;
 
-  switch (n_bytes)
-    {
-    default:
-      gcc_unreachable();
-      
-    case 2:
-      gcc_assert (regno == REGNO (lpm_addr_reg_rtx));
+  if (plen)
+    *plen = 0;
 
-      return avr_asm_len ("elpm %4,%a2+" CR_TAB
-                          "elpm %B0,%a2" CR_TAB
-                          "mov %A0,%4", xop, plen, 3);
+  avr_asm_len ("ld %3,%a2" CR_TAB
+               "sbrs %1,7", xop, plen, 2);
 
-    case 3:
-    case 4:
-      gcc_assert (regno + 2 == REGNO (lpm_addr_reg_rtx));
-      
-      avr_asm_len ("elpm %A0,%a2+" CR_TAB
-                   "elpm %B0,%a2+", xop, plen, 2);
+  avr_asm_len (AVR_HAVE_LPMX ? "lpm %3,%a2" : "lpm", xop, plen, 1);
 
-      if (n_bytes == 3)
-        return avr_asm_len ("elpm %C0,%a2", xop, plen, 1);
-      else
-        return avr_asm_len ("elpm %4,%a2+" CR_TAB
-                            "elpm %D0,%a2" CR_TAB
-                            "mov %C0,%4", xop, plen, 3);
-    }
+  if (REGNO (xop[0]) != REGNO (xop[3]))
+    avr_asm_len ("mov %0,%3", xop, plen, 1);
   
   return "";
 }
@@ -6673,8 +6626,8 @@ avr_assemble_integer (rtx x, unsigned int size, int aligned_p)
       default_assemble_integer (avr_const_address_lo16 (x),
                                 GET_MODE_SIZE (HImode), aligned_p);
       
-      fputs ("\t.warning\t\"assembling 24-bit address needs binutils extension for hh8(",
-             asm_out_file);
+      fputs ("\t.warning\t\"assembling 24-bit address needs binutils"
+             " extension for hh8(", asm_out_file);
       output_addr_const (asm_out_file, x);
       fputs (")\"\n", asm_out_file);
       
@@ -7277,6 +7230,23 @@ avr_encode_section_info (tree decl, rtx rtl, int new_decl_p)
     }
 
   default_encode_section_info (decl, rtl, new_decl_p);
+
+  if (decl && DECL_P (decl)
+      && TREE_CODE (decl) != FUNCTION_DECL
+      && MEM_P (rtl)
+      && SYMBOL_REF == GET_CODE (XEXP (rtl, 0)))
+   {
+      rtx sym = XEXP (rtl, 0);
+      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (decl));
+
+      /* PSTR strings are in generic space but located in flash:
+         patch address space.  */
+      
+      if (-1 == avr_progmem_p (decl, DECL_ATTRIBUTES (decl)))
+        as = ADDR_SPACE_PGM;
+
+      AVR_SYMBOL_SET_ADDR_SPACE (sym, as);
+    }
 }
 
 
@@ -9019,10 +8989,8 @@ output_reload_in_const (rtx *op, rtx clobber_reg, int *len, bool clear_p)
           xop[1] = src;
           xop[2] = clobber_reg;
 
-          if (n >= 2 + (avr_current_arch->n_segments > 1))
-            avr_asm_len ("mov %0,__zero_reg__", xop, len, 1);
-          else
-            avr_asm_len (asm_code[n][ldreg_p], xop, len, ldreg_p ? 1 : 2);
+          avr_asm_len (asm_code[n][ldreg_p], xop, len, ldreg_p ? 1 : 2);
+          
           continue;
         }
 
@@ -9596,51 +9564,57 @@ avr_addr_space_convert (rtx src, tree type_from, tree type_to)
     avr_edump ("\n%!: op = %r\nfrom = %t\nto = %t\n",
                src, type_from, type_to);
 
+  /* Up-casting from 16-bit to 24-bit pointer.  */
+  
   if (as_from != ADDR_SPACE_PGMX
       && as_to == ADDR_SPACE_PGMX)
     {
-      rtx new_src;
-      int n_segments = avr_current_arch->n_segments;
-      RTX_CODE code = GET_CODE (src);
+      int msb;
+      rtx sym = src;
+      rtx reg = gen_reg_rtx (PSImode);
+
+      while (CONST == GET_CODE (sym) || PLUS == GET_CODE (sym))
+        sym = XEXP (sym, 0);
+
+      /* Look at symbol flags:  avr_encode_section_info set the flags
+         also if attribute progmem was seen so that we get the right
+         promotion for, e.g. PSTR-like strings that reside in generic space
+         but are located in flash.  In that case we patch the incoming
+         address space.  */
 
-      if (CONST == code
-          && PLUS == GET_CODE (XEXP (src, 0))
-          && SYMBOL_REF == GET_CODE (XEXP (XEXP (src, 0), 0))
-          && CONST_INT_P (XEXP (XEXP (src, 0), 1)))
+      if (SYMBOL_REF == GET_CODE (sym)
+          && ADDR_SPACE_PGM == AVR_SYMBOL_GET_ADDR_SPACE (sym))
         {
-          HOST_WIDE_INT offset = INTVAL (XEXP (XEXP (src, 0), 1));
-          const char *name = XSTR (XEXP (XEXP (src, 0), 0), 0);
-          
-          new_src = gen_rtx_SYMBOL_REF (PSImode, ggc_strdup (name));
-          new_src = gen_rtx_CONST (PSImode,
-                                   plus_constant (new_src, offset));
-          return new_src;
+          as_from = ADDR_SPACE_PGM;
         }
 
-      if (SYMBOL_REF == code)
-          {
-            const char *name = XSTR (src, 0);
-            
-            return gen_rtx_SYMBOL_REF (PSImode, ggc_strdup (name));
-          }
-      
+      /* Linearize memory: RAM has bit 23 set.  */
+             
+      msb = ADDR_SPACE_GENERIC_P (as_from)
+        ? 0x80
+        : avr_addrspace[as_from].segment % avr_current_arch->n_segments;
+
       src = force_reg (Pmode, src);
       
-      if (ADDR_SPACE_GENERIC_P (as_from)
-          || as_from == ADDR_SPACE_PGM
-          || n_segments == 1)
-        {
-          return gen_rtx_ZERO_EXTEND (PSImode, src);
-        }
-      else
-        {
-          int segment = avr_addrspace[as_from].segment % n_segments;
+      emit_insn (msb == 0
+                 ? gen_zero_extendhipsi2 (reg, src)
+                 : gen_n_extendhipsi2 (reg, gen_int_mode (msb, QImode), src));
+          
+      return reg;
+    }
 
-          new_src = gen_reg_rtx (PSImode);
-          emit_insn (gen_n_extendhipsi2 (new_src, GEN_INT (segment), src));
+  /* Down-casting from 24-bit to 16-bit throws away the high byte.  */
 
-          return new_src;
-        }
+  if (as_from == ADDR_SPACE_PGMX
+      && as_to != ADDR_SPACE_PGMX)
+    {
+      rtx new_src = gen_reg_rtx (Pmode);
+
+      src = force_reg (PSImode, src);
+      
+      emit_move_insn (new_src,
+                      simplify_gen_subreg (Pmode, src, PSImode, 0));
+      return new_src;
     }
   
   return src;
@@ -9650,19 +9624,16 @@ avr_addr_space_convert (rtx src, tree type_from, tree type_to)
 /* Implement `TARGET_ADDR_SPACE_SUBSET_P'.  */
 
 static bool
-avr_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+avr_addr_space_subset_p (addr_space_t subset ATTRIBUTE_UNUSED,
+                         addr_space_t superset ATTRIBUTE_UNUSED)
 {
-  if (subset == ADDR_SPACE_PGMX
-      && superset != ADDR_SPACE_PGMX)
-    {
-      return false;
-    }
+  /* Allow any kind of pointer mess.  */
   
   return true;
 }
 
 
-/* Worker function for movmemhi insn.
+/* Worker function for movmemhi expander.
    XOP[0]  Destination as MEM:BLK
    XOP[1]  Source      "     "
    XOP[2]  # Bytes to copy
@@ -9692,12 +9663,14 @@ avr_emit_movmemhi (rtx *xop)
   a_src  = XEXP (xop[1], 0);
   a_dest = XEXP (xop[0], 0);
 
-  /* See if constant fits in 8 bits.  */
-
-  loop_mode = (count <= 0x100) ? QImode : HImode;
-
   if (PSImode == GET_MODE (a_src))
     {
+      gcc_assert (as == ADDR_SPACE_PGMX);
+
+      loop_mode = (count < 0x100) ? QImode : HImode;
+      loop_reg = gen_rtx_REG (loop_mode, 24);
+      emit_move_insn (loop_reg, gen_int_mode (count, loop_mode));
+
       addr1 = simplify_gen_subreg (HImode, a_src, PSImode, 0);
       a_hi8 = simplify_gen_subreg (QImode, a_src, PSImode, 2);
     }
@@ -9705,41 +9678,35 @@ avr_emit_movmemhi (rtx *xop)
     {
       int segment = avr_addrspace[as].segment % avr_current_arch->n_segments;
       
+      if (segment
+          && avr_current_arch->n_segments > 1)
+        {
+          a_hi8 = GEN_INT (segment);
+          emit_move_insn (rampz_rtx, a_hi8 = copy_to_mode_reg (QImode, a_hi8));
+        }
+      else if (!ADDR_SPACE_GENERIC_P (as))
+        {
+          as = ADDR_SPACE_PGM;
+        }
+      
       addr1 = a_src;
 
-      if (segment)
-        a_hi8 = GEN_INT (segment);
-    }
-
-  if (a_hi8
-      && avr_current_arch->n_segments > 1)
-    {
-      emit_move_insn (rampz_rtx, a_hi8 = copy_to_mode_reg (QImode, a_hi8));
-    }
-  else if (!ADDR_SPACE_GENERIC_P (as))
-    {
-      as = ADDR_SPACE_PGM;
+      loop_mode = (count <= 0x100) ? QImode : HImode;
+      loop_reg = copy_to_mode_reg (loop_mode, gen_int_mode (count, loop_mode));
     }
 
   xas = GEN_INT (as);
 
-  /* Create loop counter register */
-
-  loop_reg = copy_to_mode_reg (loop_mode, gen_int_mode (count, loop_mode));
-
-  /* Copy pointers into new pseudos - they will be changed */
-
-  addr0 = copy_to_mode_reg (HImode, a_dest);
-  addr1 = copy_to_mode_reg (HImode, addr1);
-
   /* FIXME: Register allocator might come up with spill fails if it is left
-        on its own.  Thus, we allocate the pointer registers by hand.  */
+        on its own.  Thus, we allocate the pointer registers by hand:
+        Z = source address
+        X = destination address  */
 
   emit_move_insn (lpm_addr_reg_rtx, addr1);
   addr1 = lpm_addr_reg_rtx;
 
   reg_x = gen_rtx_REG (HImode, REG_X);
-  emit_move_insn (reg_x, addr0);
+  emit_move_insn (reg_x, a_dest);
   addr0 = reg_x;
 
   /* FIXME: Register allocator does a bad job and might spill address
@@ -9748,30 +9715,30 @@ avr_emit_movmemhi (rtx *xop)
         load and store as seperate insns.  Instead, we perform the copy
         by means of one monolithic insn.  */
 
-  if (ADDR_SPACE_GENERIC_P (as))
+  gcc_assert (TMP_REGNO == LPM_REGNO);
+
+  if (as != ADDR_SPACE_PGMX)
     {
+      /* Load instruction ([E]LPM or LD) is known at compile time:
+         Do the copy-loop inline.  */
+      
       rtx (*fun) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
         = QImode == loop_mode ? gen_movmem_qi : gen_movmem_hi;
 
       insn = fun (addr0, addr1, xas, loop_reg,
                   addr0, addr1, tmp_reg_rtx, loop_reg);
     }
-  else if (as == ADDR_SPACE_PGM)
-    {
-      rtx (*fun) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
-        = QImode == loop_mode ? gen_movmem_qi : gen_movmem_hi;
-
-      insn = fun (addr0, addr1, xas, loop_reg, addr0, addr1,
-                  AVR_HAVE_LPMX ? tmp_reg_rtx : lpm_reg_rtx, loop_reg);
-    }
   else
     {
+      rtx loop_reg16 = gen_rtx_REG (HImode, 24);
+      rtx r23 = gen_rtx_REG (QImode, 23);
       rtx (*fun) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
-        = QImode == loop_mode ? gen_movmem_qi_elpm : gen_movmem_hi_elpm;
+        = QImode == loop_mode ? gen_movmemx_qi : gen_movmemx_hi;
+
+      emit_move_insn (r23, a_hi8);
       
       insn = fun (addr0, addr1, xas, loop_reg, addr0, addr1,
-                  AVR_HAVE_ELPMX ? tmp_reg_rtx : lpm_reg_rtx, loop_reg,
-                  a_hi8, a_hi8, GEN_INT (RAMPZ_ADDR));
+                  lpm_reg_rtx, loop_reg16, r23, r23, GEN_INT (RAMPZ_ADDR));
     }
 
   set_mem_addr_space (SET_SRC (XVECEXP (insn, 0, 0)), as);
@@ -9838,21 +9805,12 @@ avr_out_movmem (rtx insn ATTRIBUTE_UNUSED, rtx *xop, int *plen)
     case ADDR_SPACE_PGM3:
     case ADDR_SPACE_PGM4:
     case ADDR_SPACE_PGM5:
-    case ADDR_SPACE_PGMX:
 
       if (AVR_HAVE_ELPMX)
         avr_asm_len ("elpm %6,%a1+", xop, plen, 1);
       else
         avr_asm_len ("elpm" CR_TAB
                      "adiw %1,1", xop, plen, 2);
-      
-      if (as == ADDR_SPACE_PGMX
-          && !AVR_HAVE_ELPMX)
-        {
-          avr_asm_len ("adc %8,__zero_reg__" CR_TAB
-                       "out __RAMPZ__,%8", xop, plen, 2);
-        }
-      
       break;
     }
 
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 9f3c3f107a0c4685f4e1894a61617fe4aeecde1a..7ce211d211fe305ea4fb8130673ad217383a214e 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -390,6 +390,7 @@
         (match_operand:QI 1 "memory_operand"    "m"))
    (clobber (reg:HI REG_Z))]
   "can_create_pseudo_p()
+   && !avr_xload_libgcc_p (QImode)
    && avr_mem_pgmx_p (operands[1])
    && REG_P (XEXP (operands[1], 0))"
   { gcc_unreachable(); }
@@ -414,8 +415,7 @@
         (match_operand:MOVMODE 1 "memory_operand"    "m"))
    (clobber (reg:QI 21))
    (clobber (reg:HI REG_Z))]
-  "QImode != <MODE>mode
-   && can_create_pseudo_p()
+  "can_create_pseudo_p()
    && avr_mem_pgmx_p (operands[1])
    && REG_P (XEXP (operands[1], 0))"
   { gcc_unreachable(); }
@@ -426,35 +426,19 @@
     rtx reg_z = gen_rtx_REG (HImode, REG_Z);
     rtx addr_hi8 = simplify_gen_subreg (QImode, addr, PSImode, 2);
     addr_space_t as = MEM_ADDR_SPACE (operands[1]);
-    rtx hi8, insn;
+    rtx insn;
 
+    /* Split the address to R21:Z */
     emit_move_insn (reg_z, simplify_gen_subreg (HImode, addr, PSImode, 0));
+    emit_move_insn (gen_rtx_REG (QImode, 21), addr_hi8);
 
-    if (avr_xload_libgcc_p (<MODE>mode))
-      {
-        emit_move_insn (gen_rtx_REG (QImode, 21), addr_hi8);
-        insn = emit_insn (gen_xload_<mode>_libgcc ());
-        emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, 22));
-      }
-    else if (avr_current_arch->n_segments == 1
-             && GET_MODE_SIZE (<MODE>mode) > 2
-             && !AVR_HAVE_LPMX)
-      {
-        rtx src = gen_rtx_MEM (<MODE>mode, reg_z);
-
-        as = ADDR_SPACE_PGM;
-        insn = emit_insn (gen_load_<mode>_libgcc (src));
-        emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, 22));
-      }
-    else
-      {
-        hi8 = gen_reg_rtx (QImode);
-        emit_move_insn (hi8, addr_hi8);
-        insn = emit_insn (gen_xload_<mode> (operands[0], hi8));
-      }
-
+    /* Load with code from libgcc */
+    insn = emit_insn (gen_xload_<mode>_libgcc ());
     set_mem_addr_space (SET_SRC (single_set (insn)), as);
 
+    /* Move to destination */
+    emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, 22));
+
     DONE;
   })
 
@@ -462,16 +446,19 @@
 ;; These insns must be prior to respective generic move insn.
 
 (define_insn "xload_8"
-  [(set (match_operand:QI 0 "register_operand"                    "=r")
-        (mem:QI (lo_sum:PSI (match_operand:QI 1 "register_operand" "r")
+  [(set (match_operand:QI 0 "register_operand"                   "=&r,r")
+        (mem:QI (lo_sum:PSI (match_operand:QI 1 "register_operand" "r,r")
                             (reg:HI REG_Z))))]
-  ""
+  "!avr_xload_libgcc_p (QImode)"
   {
     return avr_out_xload (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "xload")
-   (set_attr "cc" "clobber")])
+  [(set_attr "length" "3,4")
+   (set_attr "adjust_len" "*,xload")
+   (set_attr "isa" "lpmx,lpm")
+   (set_attr "cc" "none")])
 
+;; "xload_qi_libgcc"
 ;; "xload_hi_libgcc"
 ;; "xload_psi_libgcc"
 ;; "xload_si_libgcc"
@@ -482,35 +469,14 @@
                                  (reg:HI REG_Z))))
    (clobber (reg:QI 21))
    (clobber (reg:HI REG_Z))]
-  "<MODE>mode != QImode
-   && avr_xload_libgcc_p (<MODE>mode)"
+  "avr_xload_libgcc_p (<MODE>mode)"
   {
     rtx x_bytes = GEN_INT (GET_MODE_SIZE (<MODE>mode));
 
-    /* Devices with ELPM* also have CALL.  */
-
-    output_asm_insn ("call __xload_%0", &x_bytes);
+    output_asm_insn ("%~call __xload_%0", &x_bytes);
     return "";
   }
-  [(set_attr "length" "2")
-   (set_attr "cc" "clobber")])
-
-;; "xload_hi"
-;; "xload_psi"
-;; "xload_si"
-;; "xload_sf"
-(define_insn "xload_<mode>"
-  [(set (match_operand:MOVMODE 0 "register_operand"                    "=r")
-        (mem:MOVMODE (lo_sum:PSI (match_operand:QI 1 "register_operand" "r")
-                                 (reg:HI REG_Z))))
-   (clobber (scratch:HI))
-   (clobber (reg:HI REG_Z))]
-  "<MODE>mode != QImode
-   && !avr_xload_libgcc_p (<MODE>mode)"
-  {
-    return avr_out_xload (insn, operands, NULL);
-  }
-  [(set_attr "adjust_len" "xload")
+  [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
 
@@ -521,7 +487,6 @@
 ;; "movsi"
 ;; "movsf"
 ;; "movpsi"
-
 (define_expand "mov<mode>"
   [(set (match_operand:MOVMODE 0 "nonimmediate_operand" "")
         (match_operand:MOVMODE 1 "general_operand" ""))]
@@ -548,7 +513,7 @@
       if (!REG_P (addr))
         src = replace_equiv_address (src, copy_to_mode_reg (PSImode, addr));
 
-      if (QImode == <MODE>mode)
+      if (!avr_xload_libgcc_p (<MODE>mode))
         emit_insn (gen_xload8_A (dest, src));
       else
         emit_insn (gen_xload<mode>_A (dest, src));
@@ -869,10 +834,10 @@
   })
 
 (define_mode_attr MOVMEM_r_d [(QI "r")
-                              (HI "d")])
+                              (HI "wd")])
 
-;; $0, $4 : & dest
-;; $1, $5 : & src
+;; $0, $4 : & dest (REG_X)
+;; $1, $5 : & src  (REG_Z)
 ;; $2     : Address Space
 ;; $3, $7 : Loop register
 ;; $6     : Scratch register
@@ -882,7 +847,7 @@
 (define_insn "movmem_<mode>"
   [(set (mem:BLK (match_operand:HI 0 "register_operand" "x"))
         (mem:BLK (match_operand:HI 1 "register_operand" "z")))
-   (unspec [(match_operand:QI 2 "const_int_operand"     "LP")]
+   (unspec [(match_operand:QI 2 "const_int_operand"     "n")]
            UNSPEC_MOVMEM)
    (use (match_operand:QIHI 3 "register_operand"       "<MOVMEM_r_d>"))
    (clobber (match_operand:HI 4 "register_operand"     "=0"))
@@ -897,29 +862,28 @@
    (set_attr "cc" "clobber")])
 
 ;; Ditto and
-;; $8, $9 : hh8 (& src)
+;; $3, $7 : Loop register = R24
+;; $8, $9 : hh8 (& src)   = R23
 ;; $10    : RAMPZ_ADDR
 
-;; "movmem_qi_elpm"
-;; "movmem_hi_elpm"
-(define_insn "movmem_<mode>_elpm"
+;; "movmemx_qi"
+;; "movmemx_hi"
+(define_insn "movmemx_<mode>"
   [(set (mem:BLK (match_operand:HI 0 "register_operand"             "x"))
         (mem:BLK (lo_sum:PSI (match_operand:QI 8 "register_operand" "r")
                              (match_operand:HI 1 "register_operand" "z"))))
    (unspec [(match_operand:QI 2 "const_int_operand"                 "n")]
            UNSPEC_MOVMEM)
-   (use (match_operand:QIHI 3 "register_operand"                   "<MOVMEM_r_d>"))
+   (use (match_operand:QIHI 3 "register_operand"                   "w"))
    (clobber (match_operand:HI 4 "register_operand"                 "=0"))
    (clobber (match_operand:HI 5 "register_operand"                 "=1"))
    (clobber (match_operand:QI 6 "register_operand"                 "=&r"))
-   (clobber (match_operand:QIHI 7 "register_operand"               "=3"))
+   (clobber (match_operand:HI 7 "register_operand"                 "=3"))
    (clobber (match_operand:QI 9 "register_operand"                 "=8"))
    (clobber (mem:QI (match_operand:QI 10 "io_address_operand"       "n")))]
   ""
-  {
-    return avr_out_movmem (insn, operands, NULL);
-  }
-  [(set_attr "adjust_len" "movmem")
+  "%~call __movmemx_<mode>"
+  [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
 
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 778848ba5bc058c09efd174e2a73ef4085851b3f..c29925f93fa7a4c6ff00b09cc5950bfc7fb2c29a 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,13 @@
+2012-01-10  Georg-Johann Lay  <avr@gjlay.de>
+
+	PR target/49868
+	Extend __pgmx semantics to linearize memory.
+	* config/avr/t-avr (LIB1ASMFUNCS): Add _xload_1, _movmemx.
+	* config/avr/lib1funcs.S (__xload_1): New function.
+	(__movmemx_qi, __movmemx_hi): New functions.
+	(__xload_2, __xload_3, __xload_4): Rewrite to fit new __pgmx
+	semantics.
+	
 2012-01-09  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* config/sparc/sol2-unwind.h (sparc64_is_sighandler): Check that the
diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index b51999323db5eca8b33c97fa7ac147c65395e52e..93ce2281534085a525f9d783867cae945619c208 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -2061,19 +2061,14 @@ ENDF __load_4
 #endif /* L_load_3 || L_load_3 */
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Loading n bytes from Flash; n = 2,3,4
-;; R22... = Flash[R21:Z]
+;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
+;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
 ;; Clobbers: __tmp_reg__, R21, R30, R31
 
-#if (defined (L_xload_2)            \
+#if (defined (L_xload_1)            \
+     || defined (L_xload_2)         \
      || defined (L_xload_3)         \
-     || defined (L_xload_4))        \
-    && defined (__AVR_HAVE_ELPM__)  \
-    && !defined (__AVR_HAVE_ELPMX__)
-
-#if !defined (__AVR_HAVE_RAMPZ__)
-#error Need RAMPZ
-#endif /* have RAMPZ */
+     || defined (L_xload_4))
 
 ;; Destination
 #define D0  22
@@ -2086,6 +2081,9 @@ ENDF __load_4
 #define HHI8  21
 
 .macro  .xload dest, n
+#if defined (__AVR_HAVE_ELPMX__)
+    elpm    \dest, Z+
+#elif defined (__AVR_HAVE_ELPM__)
     elpm
     mov     \dest, r0
 .if \dest != D0+\n-1
@@ -2093,39 +2091,161 @@ ENDF __load_4
     adc     HHI8, __zero_reg__
     out     __RAMPZ__, HHI8
 .endif
-.endm
+#elif defined (__AVR_HAVE_LPMX__)
+    lpm     \dest, Z+
+#else
+    lpm
+    mov     \dest, r0
+.if \dest != D0+\n-1
+    adiw    r30, 1
+.endif
+#endif
+.endm ; .xload
+
+#if defined (L_xload_1)
+DEFUN __xload_1
+#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_RAMPZ__)
+    ld      D0, Z
+    sbrs    HHI8, 7
+    lpm     D0, Z
+    ret
+#else
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_RAMPZ__)
+    out     __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_RAMPZ__ */
+    .xload  D0, 1
+    ret
+1:  ld      D0, Z
+    ret
+#endif /* LPMx && ! RAMPZ */
+ENDF __xload_1
+#endif /* L_xload_1 */
 
 #if defined (L_xload_2)
 DEFUN __xload_2
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_RAMPZ__)
     out     __RAMPZ__, HHI8
-    .xload D0, 2
-    .xload D1, 2
+#endif /* __AVR_HAVE_RAMPZ__ */
+    .xload  D0, 2
+    .xload  D1, 2
+    ret
+1:  ld      D0, Z+
+    ld      D1, Z+
     ret
 ENDF __xload_2
 #endif /* L_xload_2 */
 
 #if defined (L_xload_3)
 DEFUN __xload_3
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_RAMPZ__)
     out     __RAMPZ__, HHI8
-    .xload D0, 3
-    .xload D1, 3
-    .xload D2, 3
+#endif /* __AVR_HAVE_RAMPZ__ */
+    .xload  D0, 3
+    .xload  D1, 3
+    .xload  D2, 3
+    ret
+1:  ld      D0, Z+
+    ld      D1, Z+
+    ld      D2, Z+
     ret
 ENDF __xload_3
 #endif /* L_xload_3 */
 
 #if defined (L_xload_4)
 DEFUN __xload_4
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_RAMPZ__)
     out     __RAMPZ__, HHI8
-    .xload D0, 4
-    .xload D1, 4
-    .xload D2, 4
-    .xload D3, 4
+#endif /* __AVR_HAVE_RAMPZ__ */
+    .xload  D0, 4
+    .xload  D1, 4
+    .xload  D2, 4
+    .xload  D3, 4
+    ret
+1:  ld      D0, Z+
+    ld      D1, Z+
+    ld      D2, Z+
+    ld      D3, Z+
     ret
 ENDF __xload_4
 #endif /* L_xload_4 */
 
-#endif /* L_xload_{2|3|4} && ELPM */
+#endif /* L_xload_{1|2|3|4} */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; memcopy from Address Space __pgmx to RAM
+;; R23:Z = Source Address
+;; X     = Destination Address
+;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
+
+#if defined (L_movmemx)
+
+#define HHI8  23
+#define LOOP  24
+
+DEFUN __movmemx_qi
+    ;; #Bytes to copy fity in 8 Bits (1..255)
+    ;; Zero-extend Loop Counter
+    clr     LOOP+1
+    ;; FALLTHRU
+ENDF __movmemx_qi
+
+DEFUN __movmemx_hi
+
+;; Read from where?
+    sbrc    HHI8, 7
+    rjmp    1f
+
+;; Read from Flash
+
+#if defined (__AVR_HAVE_RAMPZ__)
+    out     __RAMPZ__, HHI8
+#endif
+
+0:  ;; Load 1 Byte from Flash...
+
+#if defined (__AVR_HAVE_ELPMX__)
+    elpm    r0, Z+
+#elif defined (__AVR_HAVE_ELPM__)
+    elpm
+    adiw    r30, 1
+    adc     HHI8, __zero_reg__
+    out     __RAMPZ__, HHI8
+#elif defined (__AVR_HAVE_LPMX__)
+    lpm     r0, Z+
+#else
+    lpm
+    adiw    r30, 1
+#endif
+
+    ;; ...and store that Byte to RAM Destination
+    st      X+, r0
+    sbiw    LOOP, 1
+    brne    0b
+    ret
+
+;; Read from RAM
+
+1:  ;; Read 1 Byte from RAM...
+    ld      r0, Z+
+    ;; and store that Byte to RAM Destination
+    st      X+, r0
+    sbiw    LOOP, 1
+    brne    0b
+    ret
+ENDF __movmemx_hi
+
+#undef HHI8
+#undef LOOP
+
+#endif /* L_movmemx */
 
 
 .section .text.libgcc.builtins, "ax", @progbits
diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr
index 656816032c448b1530fa28235c2369d4fc59061c..43caa94ca2a272f4d25f1d0669588864a4b32df7 100644
--- a/libgcc/config/avr/t-avr
+++ b/libgcc/config/avr/t-avr
@@ -27,7 +27,8 @@ LIB1ASMFUNCS = \
 	_tablejump \
 	_tablejump_elpm \
 	_load_3 _load_4 \
-	_xload_2 _xload_3 _xload_4 \
+	_xload_1 _xload_2 _xload_3 _xload_4 \
+	_movmemx \
 	_copy_data \
 	_clear_bss \
 	_ctors \