diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 41545d2f364e7..24042be4a7137 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -337,12 +337,24 @@ end
 
 zero{T}(x::AbstractArray{T}) = fill!(similar(x), zero(T))
 
-## iteration support for arrays as ranges ##
+## iteration support for arrays by iterating over `eachindex` in the array ##
+# Allows fast iteration by default for both LinearFast and LinearSlow arrays
+
+# While the definitions for LinearFast are all simple enough to inline on their
+# own, LinearSlow's CartesianRange is more complicated and requires explicit
+# inlining. The real @inline macro is not available this early in the bootstrap,
+# so this internal macro splices the meta Expr directly into the function body.
+macro _inline_meta()
+    Expr(:meta, :inline)
+end
+start(A::AbstractArray) = (@_inline_meta(); itr = eachindex(A); (itr, start(itr)))
+next(A::AbstractArray,i) = (@_inline_meta(); (idx, s) = next(i[1], i[2]); (A[idx], (i[1], s)))
+done(A::AbstractArray,i) = done(i[1], i[2])
+
+# eachindex iterates over all indices. LinearSlow definitions are later.
+eachindex(A::AbstractArray) = (@_inline_meta; eachindex(linearindexing(A), A))
+eachindex(::LinearFast, A::AbstractArray) = 1:length(A)
 
-start(A::AbstractArray) = _start(A,linearindexing(A))
-_start(::AbstractArray,::LinearFast) = 1
-next(a::AbstractArray,i) = (a[i],i+1)
-done(a::AbstractArray,i) = (i > length(a))
 isempty(a::AbstractArray) = (length(a) == 0)
 
 ## Conversions ##
diff --git a/base/array.jl b/base/array.jl
index 98e147533466a..55f464964ab1b 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -297,6 +297,11 @@ end
 
 collect(itr) = collect(eltype(itr), itr)
 
+## Iteration ##
+start(A::Array) = 1
+next(a::Array,i) = (a[i],i+1)
+done(a::Array,i) = (i > length(a))
+
 ## Indexing: getindex ##
 
 getindex(a::Array) = arrayref(a,1)
diff --git a/base/dict.jl b/base/dict.jl
index 12cc943c720b6..77e4ed5b8cb48 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -713,7 +713,7 @@ function skip_deleted(h::Dict, i)
 end
 
 start(t::Dict) = skip_deleted(t, 1)
-done(t::Dict, i) = done(t.vals, i)
+done(t::Dict, i) = i > length(t.vals)
 next(t::Dict, i) = ((t.keys[i],t.vals[i]), skip_deleted(t,i+1))
 
 isempty(t::Dict) = (t.count == 0)
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 212d5ed9bc538..38b8ec3d231d1 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -1,11 +1,11 @@
 ### Multidimensional iterators
 module IteratorsMD
 
-import Base: eltype, length, start, _start, done, next, last, getindex, setindex!, linearindexing, min, max
+import Base: eltype, length, start, done, next, last, getindex, setindex!, linearindexing, min, max, eachindex
 import Base: simd_outer_range, simd_inner_length, simd_index
 import Base: @nref, @ncall, @nif, @nexprs, LinearFast, LinearSlow, to_index
 
-export CartesianIndex, CartesianRange, eachindex
+export CartesianIndex, CartesianRange
 
 # Traits for linear indexing
 linearindexing(::BitArray) = LinearFast()
@@ -110,60 +110,37 @@ stagedfunction CartesianRange{N}(I::CartesianIndex{N})
 end
 CartesianRange{N}(sz::NTuple{N,Int}) = CartesianRange(CartesianIndex(sz))
 
-stagedfunction eachindex{T,N}(A::AbstractArray{T,N})
+stagedfunction eachindex{T,N}(::LinearSlow, A::AbstractArray{T,N})
     startargs = fill(1, N)
     stopargs = [:(size(A,$i)) for i=1:N]
-    :(CartesianRange(CartesianIndex{$N}($(startargs...)), CartesianIndex{$N}($(stopargs...))))
+    meta = Expr(:meta, :inline)
+    :($meta; CartesianRange(CartesianIndex{$N}($(startargs...)), CartesianIndex{$N}($(stopargs...))))
 end
 
 eltype{I}(::Type{CartesianRange{I}}) = I
 eltype{I}(::CartesianRange{I}) = I
 
-stagedfunction start{I}(iter::CartesianRange{I})
-    N=length(I)
-    finishedex = Expr(:(||), [:(iter.stop[$i] < iter.start[$i]) for i = 1:N]...)
-    :(return $finishedex, iter.start)
-end
-
-stagedfunction _start{T,N}(A::AbstractArray{T,N}, ::LinearSlow)
-    args = fill(1, N)
-    :(return isempty(A), CartesianIndex{$N}($(args...)))
-end
-
-# Prevent an ambiguity warning
-next(R::StepRange, state::(Bool, CartesianIndex{1})) = (index=state[2]; return R[index], (index[1]==length(R), CartesianIndex{1}(index[1]+1)))
-next{T}(R::UnitRange{T}, state::(Bool, CartesianIndex{1})) = (index=state[2]; return R[index], (index[1]==length(R), CartesianIndex{1}(index[1]+1)))
-done(R::StepRange, state::(Bool, CartesianIndex{1})) = state[1]
-done(R::UnitRange, state::(Bool, CartesianIndex{1})) = state[1]
-
-stagedfunction next{T,N}(A::AbstractArray{T,N}, state::(Bool, CartesianIndex{N}))
-    I = state[2]
-    finishedex = (N==0 ? true : :(newindex[$N] > size(A, $N)))
+start(iter::CartesianRange) = iter.start
+stagedfunction next{I<:CartesianIndex}(iter::CartesianRange{I}, state)
+    N = length(I)
     meta = Expr(:meta, :inline)
     quote
         $meta
-        index=state[2]
-        @inbounds v = A[index]
-        newindex=@nif $N d->(index[d] < size(A, d)) d->@ncall($N, $I, k->(k>d ? index[k] : k==d ? index[k]+1 : 1))
-        finished=$finishedex
-        v, (finished,newindex)
+        index=state
+        @nif $N d->(index[d] < iter.stop[d]) d->(@nexprs($N, k->(ind_k = ifelse(k>=d, index[k] + (k==d), iter.start[k]))))
+        newindex = @ncall $N $I ind
+        index, newindex
     end
 end
-stagedfunction next{I<:CartesianIndex}(iter::CartesianRange{I}, state::(Bool, I))
+stagedfunction done{I<:CartesianIndex}(iter::CartesianRange{I}, state)
     N = length(I)
-    finishedex = (N==0 ? true : :(newindex[$N] > iter.stop[$N]))
-    meta = Expr(:meta, :inline)
-    quote
-        $meta
-        index=state[2]
-        newindex=@nif $N d->(index[d] < iter.stop[d]) d->@ncall($N, $I, k->(k>d ? index[k] : k==d ? index[k]+1 : iter.start[k]))
-        finished=$finishedex
-        index, (finished,newindex)
-    end
+    :(state[$N] > iter.stop[$N])
 end
 
-done{T,N}(A::AbstractArray{T,N}, state::(Bool, CartesianIndex{N})) = state[1]
-done{I<:CartesianIndex}(iter::CartesianRange{I}, state::(Bool, I)) = state[1]
+# 0-d cartesian ranges are special-cased to iterate once and only once
+start{I<:CartesianIndex{0}}(iter::CartesianRange{I}) = false
+next{I<:CartesianIndex{0}}(iter::CartesianRange{I}, state) = iter.start, true
+done{I<:CartesianIndex{0}}(iter::CartesianRange{I}, state) = state
 
 stagedfunction length{I<:CartesianIndex}(iter::CartesianRange{I})
     N = length(I)
@@ -502,7 +479,7 @@ function copy!{T,N}(dest::AbstractArray{T,N}, src::AbstractArray{T,N})
             break
         end
     end
-    if samesize
+    if samesize && linearindexing(dest) == linearindexing(src)
         for I in eachindex(dest)
             @inbounds dest[I] = src[I]
         end
diff --git a/base/reducedim.jl b/base/reducedim.jl
index c4c0cf31f3717..92177099c175c 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -209,7 +209,7 @@ function _mapreducedim!{T,N}(f, op, R::AbstractArray, A::AbstractArray{T,N})
         end
     else
         sizeR = CartesianIndex{N}(size(R))
-        @inbounds @simd for IA in eachindex(A)
+        @inbounds @simd for IA in CartesianRange(size(A))
             IR = min(IA, sizeR)
             R[IR] = op(R[IR], f(A[IA]))
         end
@@ -306,7 +306,7 @@ function findminmax!{T,N}(f, Rval, Rind, A::AbstractArray{T,N})
         end
     else
         sizeR = CartesianIndex(size(Rval))
-        @inbounds for IA in eachindex(A)
+        @inbounds for IA in CartesianRange(size(A))
             IR = min(sizeR, IA)
             k += 1
             tmpAv = A[IA]
diff --git a/doc/stdlib/arrays.rst b/doc/stdlib/arrays.rst
index 4817e662ebd28..5b645d3f581d2 100644
--- a/doc/stdlib/arrays.rst
+++ b/doc/stdlib/arrays.rst
@@ -33,29 +33,31 @@ Basic functions
 
 .. function:: eachindex(A)
 
-   Creates an iterable object for visiting each multi-dimensional index of the AbstractArray ``A``.  Example for a 2-d array::
+   Creates an iterable object for visiting each index of an AbstractArray ``A`` in an efficient manner. For array types that have opted into fast linear indexing (like ``Array``), this is simply the range ``1:length(A)``. For other array types, this returns a specialized Cartesian range to efficiently index into the array with indices specified for every dimension. Example for a sparse 2-d array::
 
-    julia> A = rand(2,3)
-    2x3 Array{Float64,2}:
-     0.960084  0.629326  0.625155
-     0.432588  0.955903  0.991614
+    julia> A = sprand(2, 3, 0.5)
+    2x3 sparse matrix with 4 Float64 entries:
+        [1, 1]  =  0.598888
+        [1, 2]  =  0.0230247
+        [1, 3]  =  0.486499
+        [2, 3]  =  0.809041
 
     julia> for iter in eachindex(A)
-	       @show iter.I_1, iter.I_2
-	       @show A[iter]
-	   end
+               @show iter.I_1, iter.I_2
+               @show A[iter]
+           end
     (iter.I_1,iter.I_2) = (1,1)
-    A[iter] = 0.9600836263003063
+    A[iter] = 0.5988881393454597
     (iter.I_1,iter.I_2) = (2,1)
-    A[iter] = 0.4325878255452178
+    A[iter] = 0.0
     (iter.I_1,iter.I_2) = (1,2)
-    A[iter] = 0.6293256402775211
+    A[iter] = 0.02302469881746183
     (iter.I_1,iter.I_2) = (2,2)
-    A[iter] = 0.9559027084099654
+    A[iter] = 0.0
     (iter.I_1,iter.I_2) = (1,3)
-    A[iter] = 0.6251548453735303
+    A[iter] = 0.4864987874354343
     (iter.I_1,iter.I_2) = (2,3)
-    A[iter] = 0.9916142534546522
+    A[iter] = 0.8090413606455655
 
 .. function:: countnz(A)
 
diff --git a/test/arrayops.jl b/test/arrayops.jl
index c48d136e09f8f..a2bb56a6c9aae 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -993,7 +993,7 @@ I2 = CartesianIndex((-1,5,2))
 
 @test length(I1) == 3
 
-a = zeros(2,3)
+a = spzeros(2,3)
 @test CartesianRange(size(a)) == eachindex(a)
 a[CartesianIndex{2}(2,3)] = 5
 @test a[2,3] == 5
@@ -1015,22 +1015,24 @@ indexes = collect(R)
 @test length(R) == 12
 
 r = 2:3
-state = start(eachindex(r))
-@test !done(r, state)
-_, state = next(r, state)
-@test !done(r, state)
-val, state = next(r, state)
-@test done(r, state)
-@test val == 3
-r = 2:3:8
-state = start(eachindex(r))
-@test !done(r, state)
-_, state = next(r, state)
-_, state = next(r, state)
-@test !done(r, state)
-val, state = next(r, state)
-@test val == 8
-@test done(r, state)
+itr = eachindex(r)
+state = start(itr)
+@test !done(itr, state)
+_, state = next(itr, state)
+@test !done(itr, state)
+val, state = next(itr, state)
+@test done(itr, state)
+@test r[val] == 3
+r = sparse(collect(2:3:8))
+itr = eachindex(r)
+state = start(itr)
+@test !done(itr, state)
+_, state = next(itr, state)
+_, state = next(itr, state)
+@test !done(itr, state)
+val, state = next(itr, state)
+@test r[val] == 8
+@test done(itr, state)
 
 
 #rotates