diff --git a/bench/bench_existence.py b/bench/bench_existence.py
new file mode 100644
index 0000000000000..a8487cdcd76ec
--- /dev/null
+++ b/bench/bench_existence.py
@@ -0,0 +1,285 @@
+from __future__ import division
+
+import os
+import sys
+from itertools import cycle
+
+from timeit import Timer
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from bokeh.mpl import to_bokeh
+from numpy.random import randint
+
+from mpltools import style
+style.use('ggplot')
+
+class ExistenceBenchmarks(object):
+
+
+    def time_py_dict(look_for, look_in):
+        df_look_for = pd.DataFrame(look_for, columns=['data'])
+        dict_look_in = dict(zip(look_in, look_in))
+
+        def time_this():
+            result = df_look_for[[x in dict_look_in for x in df_look_for.data]]
+            return result.drop_duplicates().sort('data')
+            
+        return time_this
+        
+        
+    def time_isin_list(look_for, look_in):
+        df_look_for = pd.DataFrame(look_for, columns=['data'])
+        list_look_in = list(look_in)
+        
+        def time_this():
+            result = df_look_for[df_look_for.data.isin(list_look_in)]
+            return result.drop_duplicates().sort('data')
+            
+        return time_this
+        
+        
+    def time_isin_dict(look_for, look_in):
+        df_look_for = pd.DataFrame(look_for, columns=['data'])
+        dict_look_in = dict(zip(look_in, look_in))
+        
+        def time_this():
+            result = df_look_for[df_look_for.data.isin(dict_look_in)]
+            return result.drop_duplicates().sort('data')
+            
+        return time_this
+        
+        
+    def time_isin_series(look_for, look_in):
+        series_look_in = pd.Series(look_in)
+        df_look_for = pd.DataFrame(look_for, columns=['data'])
+        
+        def time_this():
+            result = df_look_for[df_look_for.data.isin(series_look_in)]
+            return result.drop_duplicates().sort('data')
+            
+        return time_this
+        
+        
+    def time_join(look_for, look_in):
+        series_look_in = pd.Series(look_in, index=look_in)
+        series_look_in.name = 'series_data'
+        df_look_for = pd.DataFrame(look_for, columns=['data'], index=look_for)
+        
+        def time_this():
+            result = df_look_for.join(series_look_in, how='inner')
+            return result.drop_duplicates()
+            
+        return time_this
+        
+        
+    def time_join_no_dups(look_for, look_in):
+        series_look_in = pd.Series(look_in, index=look_in)
+        series_look_in.name = 'series_data'
+        df_look_for = pd.DataFrame(look_for, columns=['data'], index=look_for)
+        
+        def time_this():
+            df_look_for.drop_duplicates(inplace=True)
+            series_look_in.drop_duplicates(inplace=True)
+            result = df_look_for.join(series_look_in, how='inner')
+            return result.sort('data')
+            
+        return time_this
+        
+        
+    def time_query_in(look_for, look_in):
+        series_look_in = pd.Series(look_in)
+        series_look_in.name = 'data'
+        df_look_for = pd.DataFrame(look_for, columns=['data'])
+        
+        def time_this():
+            # series_look_in is not visible to .query unless defined in local function scope.
+            s_look_in = series_look_in
+            result = df_look_for.query('data in @s_look_in')
+            return result.drop_duplicates().sort('data')
+    
+        return time_this
+        
+    
+def run_bench(to_time, repeat, look_sets, x_axis, linestyle='-'):
+    func_results = []
+    markers = cycle(['o', 's', '+', '^', 'v', 'x', 'D', '*'])
+    
+    for time_func_name in to_time:
+        marker=markers.next()
+        colors = cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k'])
+        for set_name, look_set in look_sets:
+            color=colors.next()
+            plot_results = []
+            for look_for, look_in in look_set:
+                func = ExistenceBenchmarks.__dict__[time_func_name](look_for, look_in)
+                result = func()
+                t = Timer(func)
+                elapsed = t.timeit(number=repeat) / repeat
+                name = time_func_name.replace('time_', '') + ' ' + set_name + ' (%.1f%%)' % ((len(result) / len(look_for)) * 100)
+                func_results.append((name, look_for, look_in, elapsed))
+                plot_results.append(elapsed)
+            plt.plot(x_axis, plot_results, marker=marker, color=color, label=name, linestyle=linestyle)
+            
+            
+def test_timed(to_time):
+    look_for = randint(0, 10000, 5000)
+    look_in = randint(5000, 15000, 5000)
+    
+    first_result = ExistenceBenchmarks.__dict__[to_time[0]](look_for, look_in)()
+
+    for time_func_name in to_time[1:]:
+        func = ExistenceBenchmarks.__dict__[time_func_name](look_for, look_in)
+        result = func()
+        if np.array_equal(first_result['data'].values, result['data'].values):
+            pass
+        else:
+            raise AssertionError("%s and %s have unmatched output." % (to_time[0], time_func_name))
+
+        
+if __name__ == '__main__':
+
+    pandas_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
+    static_path = os.path.join(pandas_dir, 'doc', 'source', '_static')
+    join_path = lambda p: os.path.join(static_path, p)
+    
+    to_time = [key for key in ExistenceBenchmarks.__dict__ if key.startswith('time_')]
+        
+    
+    if len(sys.argv) != 2:
+        print 'usage: <--test, --run>'
+        print '\t--test : Ensure that all timed functions are returning identical output.'
+        print '\t--run  : Generate plots for all timed functions.'
+        sys.exit()
+    
+    if sys.argv[1] == '--test':
+        test_timed(to_time)
+        
+    elif sys.argv[1] == '--run':
+        test_timed(to_time)
+        
+        def save_plot(filename, subtitle):
+            fname = join_path(filename)
+            plt.axes().set_xscale('log')
+            x1,x2,y1,y2 = plt.axis()
+            # plt.axis((x1, x2, 0, y_limit))
+            plt.legend(loc=2, prop={'size':8})
+            plt.title('Existence Comparisons%s' % subtitle)
+            plt.xlabel('% Overlap of X Elements')
+            plt.ylabel('Time(s)')
+            plt.savefig(fname)
+            plt.clf()
+        
+        def unordered(exp_range, repeat):   
+            rng = [2**x for x in exp_range]
+                
+            # 25% overlap
+            look_set_25 = \
+                [(randint(0, 100*i, 50*i), randint(75*i, 175*i, 50*i)) for i in rng]
+                
+            look_set_50 = \
+                [(randint(0, 100*i, 50*i), randint(50*i, 150*i, 50*i)) for i in rng]
+                
+            look_set_75 = \
+                [(randint(0, 100*i, 50*i), randint(25*i, 125*i, 50*i)) for i in rng]
+                
+            look_set_100 = \
+                [(randint(0, 100*i, 50*i), randint(0*i, 100*i, 50*i)) for i in rng]
+                
+            look_sets = []
+            look_sets.append(('25% overlap',  look_set_25))
+            look_sets.append(('50% overlap',  look_set_50))
+            look_sets.append(('75% overlap',  look_set_75))
+            look_sets.append(('100% overlap', look_set_100))
+            
+            x_axis = [100*i for i in rng]
+            run_bench(to_time, 10, look_sets, x_axis, linestyle='-')
+        
+        
+        def from_ordered(exp_range, repeat):
+            rng = [2**x for x in exp_range]
+                
+            # 25% overlap
+            look_set_25 = \
+                [(sorted(randint(0, 100*i, 50*i)), randint(75*i, 175*i, 50*i)) for i in rng]
+                
+            look_set_50 = \
+                [(sorted(randint(0, 100*i, 50*i)), randint(50*i, 150*i, 50*i)) for i in rng]
+                
+            look_set_75 = \
+                [(sorted(randint(0, 100*i, 50*i)), randint(25*i, 125*i, 50*i)) for i in rng]
+                
+            look_set_100 = \
+                [(sorted(randint(0, 100*i, 50*i)), randint(0*i, 100*i, 50*i)) for i in rng]
+                
+            look_sets = []
+            look_sets.append(('25% overlap, for-ordered',  look_set_25))
+            look_sets.append(('50% overlap, for-ordered',  look_set_50))
+            look_sets.append(('75% overlap, for-ordered',  look_set_75))
+            look_sets.append(('100% overlap, for-ordered', look_set_100))
+            
+            x_axis = [100*i for i in rng]
+            run_bench(to_time, 10, look_sets, x_axis, linestyle='-.')
+        
+
+        def both_ordered(exp_range, repeat):
+            rng = [2**x for x in exp_range]
+                
+            # 25% overlap
+            look_set_25 = \
+                [(sorted(randint(0, 100*i, 50*i)), sorted(randint(75*i, 175*i, 50*i))) for i in rng]
+                
+            look_set_50 = \
+                [(sorted(randint(0, 100*i, 50*i)), sorted(randint(50*i, 150*i, 50*i))) for i in rng]
+                
+            look_set_75 = \
+                [(sorted(randint(0, 100*i, 50*i)), sorted(randint(25*i, 125*i, 50*i))) for i in rng]
+                
+            look_set_100 = \
+                [(sorted(randint(0, 100*i, 50*i)), sorted(randint(0*i, 100*i, 50*i))) for i in rng]
+                
+            look_sets = []
+            look_sets.append(('25% overlap, both-ordered',  look_set_25))
+            look_sets.append(('50% overlap, both-ordered',  look_set_50))
+            look_sets.append(('75% overlap, both-ordered',  look_set_75))
+            look_sets.append(('100% overlap, both-ordered', look_set_100))
+            
+            x_axis = [100*i for i in rng]
+            run_bench(to_time, repeat, look_sets, x_axis, linestyle=':')
+            
+        
+        plt.figure(figsize=(32, 24))
+        unordered(range(1, 10), 10)
+        from_ordered(range(1, 10), 10)
+        both_ordered(range(1, 10), 10)
+        save_plot('existence-perf-small.png', ': Small')
+        
+        plt.figure(figsize=(32, 24))
+        unordered(range(10, 15), 3)
+        from_ordered(range(10, 15), 3)
+        both_ordered(range(10, 15), 3)
+        save_plot('existence-perf-large.png', ': Large')
+        
+        plt.figure(figsize=(16, 12))
+        unordered(range(1, 10), 10)
+        save_plot('existence-perf-unordered-small.png', ': Unordered Small')
+        
+        plt.figure(figsize=(16, 12))
+        from_ordered(range(1, 10), 10)
+        save_plot('existence-perf-from-ordered-small.png', ': From-Ordered Small')
+        
+        plt.figure(figsize=(16, 12))
+        both_ordered(range(1, 10), 10)
+        save_plot('existence-perf-both-ordered-small.png', ': Both-Ordered Small')
+        
+        plt.figure(figsize=(16, 12))
+        unordered(range(10, 15), 3)
+        save_plot('existence-perf-unordered-large.png', ': Unordered Large')
+        
+        plt.figure(figsize=(16, 12))
+        from_ordered(range(10, 15), 3)
+        save_plot('existence-perf-from-ordered-large.png', ': From-Ordered Large')
+        
+        plt.figure(figsize=(16, 12))
+        both_ordered(range(10, 15), 3)
+        save_plot('existence-perf-both-ordered-large.png', ': Both-Ordered Large')
\ No newline at end of file
diff --git a/doc/source/_static/existence-perf-large.png b/doc/source/_static/existence-perf-large.png
new file mode 100644
index 0000000000000..5c0766a2afb3c
Binary files /dev/null and b/doc/source/_static/existence-perf-large.png differ
diff --git a/doc/source/_static/existence-perf-small.png b/doc/source/_static/existence-perf-small.png
new file mode 100644
index 0000000000000..6150cc47037a5
Binary files /dev/null and b/doc/source/_static/existence-perf-small.png differ
diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst
index 00c76632ce17b..554543c0f51df 100644
--- a/doc/source/enhancingperf.rst
+++ b/doc/source/enhancingperf.rst
@@ -668,3 +668,171 @@ In general, :meth:`DataFrame.query`/:func:`pandas.eval` will
 evaluate the subexpressions that *can* be evaluated by ``numexpr`` and those
 that must be evaluated in Python space transparently to the user. This is done
 by inferring the result type of an expression from its arguments and operators.
+
+Existence (IsIn, Inner Join, Dict/Hash, Query)
+----------------------------------------------------
+
+Existence is the process of testing if an item exists in another list of items, and
+in the case of a DataFrame, we're testing each value of a column for existence in 
+another collection of items.
+
+There are a number of different ways to test for existence using pandas and the 
+following methods are a few of those. The comments correspond to the legend
+in the plots further down.
+
+
+:meth:`DataFrame.isin`
+
+.. code-block:: python
+
+    # isin_list
+    df[df.index.isin(lst)]
+    # isin_dict
+    df[df.index.isin(dct)]
+    # isin_series
+    df[df.index.isin(series)] 
+    
+    
+    
+:meth:`DataFrame.query`
+
+.. code-block:: python
+    
+    # The '@' symbol is used with `query` to reference local variables. Names
+    # without '@' will reference the DataFrame's columns or index.
+    
+    # query_in list
+    df.query('index in @lst')
+    # query_in Series
+    df.query('index in @series')
+    
+    # A list can be used with `query('.. == ..')` to test for existence
+    # but other data structures such as the `pandas.Series` have
+    # a different behaviour.
+    
+    df.query('index == @lst')
+    
+    
+:meth:`DataFrame.apply`
+
+.. code-block:: python
+    
+    df[df.index.apply(lambda x: x in lst)]
+    
+    
+:meth:`DataFrame.join`
+
+.. code-block:: python
+
+    # join
+    df.join(lst, how='inner')
+    
+    # this can actually be fast for small DataFrames
+    df[[x in dct for x in df.index]]
+    
+    # isin_series, query_in Series, pydict,
+    # join and isin_list are included in the plots below.
+    
+
+As seen below, generally using a ``Series`` is better than using pure python data
+structures for anything larger than very small datasets of around 1000 records.
+The fastest two being ``join(series)``:
+
+.. code-block:: python
+
+    lst = range(1000000)
+    series = Series(lst, name='data')
+
+    df = DataFrame(lst, columns=['ID'])
+    
+    df.join(series, how='inner')
+    # 100 loops, best of 3: 19.2 ms per loop
+    
+list vs Series:
+
+.. code-block:: python
+
+    df[df.index.isin(lst)]
+    # 1 loops, best of 3: 1.06 s per loop
+    
+    df[df.index.isin(series)]
+    # 1 loops, best of 3: 477 ms per loop
+
+df.index vs df.column doesn't make a difference here:
+
+.. code-block:: python
+
+    df[df.ID.isin(series)]
+    # 1 loops, best of 3: 474 ms per loop
+    
+    df[df.index.isin(series)]
+    # 1 loops, best of 3: 475 ms per loop
+
+The ``query`` 'in' syntax has the same performance as ``isin``.
+
+.. code-block:: python
+
+    df.query('index in @lst')
+    # 1 loops, best of 3: 1.04 s per loop
+    
+    df.query('index in @series')
+    # 1 loops, best of 3: 451 ms per loop
+    
+    df.query('index == @lst')
+    # 1 loops, best of 3: 1.03 s per loop
+    
+
+For ``join``, the data must be the index in the ``DataFrame`` and the index in the ``Series``
+for the best performance. The ``Series`` must also have a ``name``. ``join`` defaults to a
+left join so we need to specify 'inner' for existence.
+
+.. code-block:: python
+
+    df.join(series, how='inner')
+    # 100 loops, best of 3: 19.7 ms per loop
+
+Smaller datasets:
+
+.. code-block:: python
+
+    df = DataFrame([1,2,3,4], columns=['ID'])
+    lst = range(10000)
+    dct = dict(zip(lst, lst))
+    series = Series(lst, name='data')
+
+    df.join(series, how='inner')
+    # 1000 loops, best of 3: 866 us per loop
+    
+    df[df.ID.isin(dct)]
+    # 1000 loops, best of 3: 809 us per loop
+    
+    df[df.ID.isin(lst)]
+    # 1000 loops, best of 3: 853 us per loop
+    
+    df[df.ID.isin(series)]
+    # 100 loops, best of 3: 2.22 ms per loop
+
+It's actually faster to use ``apply`` or a list comprehension for these small cases.
+
+.. code-block:: python
+
+    df[[x in dct for x in df.ID]]
+    # 1000 loops, best of 3: 266 us per loop
+    
+    df[df.ID.apply(lambda x: x in dct)]
+    # 1000 loops, best of 3: 364 us per loop
+
+    
+Here is a visualization of some of the benchmarks above. You can see that except for with
+very small datasets, ``isin(Series)`` and ``join(Series)`` quickly become faster than the
+pure python data structures. 
+
+.. image:: _static/existence-perf-small.png
+
+However, ``isin(Series)`` still presents fairly poor exponential performance where ``join`` is quite
+fast for large datasets. There is some overhead involved in ensuring your data is the index
+in both your left and right datasets but that time should be clearly outweighed by the gains of
+the join itself. For extremely large datasets, you may start bumping into memory limits since ``join``
+does not perform any disk chunking, etc.
+
+.. image:: _static/existence-perf-large.png
\ No newline at end of file