Merge pull request #85 from quantopian/fix-cum_returns-index

Scott Sanderson · web-flow · commit 1c0978da444c · 2018-02-27T20:09:01.000-05:00
BUG: Preserve output index in `cum_returns`.
diff --git a/empyrical/stats.py b/empyrical/stats.py
@@ -37,7 +37,8 @@ def unary_vectorized_roll(arr, window, out=None, **kwargs):
         window : int
             Size of the rolling window in terms of the periodicity of the data.
         out : array-like, optional
-            The array to store the store the output.
+            Array to use as output buffer.
+            If not passed, a new array will be created.
         **kwargs
             Forwarded to :func:`~empyrical.{name}`.
 
@@ -58,7 +59,7 @@ def unary_vectorized_roll(arr, window, out=None, **kwargs):
             out = np.empty(0, dtype='float64')
 
         if allocated_output and isinstance(arr, pd.Series):
-            out = pd.Series(out)
+            out = pd.Series(out, index=arr.index[-len(out):])
 
         return out
 
@@ -84,7 +85,8 @@ def binary_vectorized_roll(lhs, rhs, window, out=None, **kwargs):
         window : int
             Size of the rolling window in terms of the periodicity of the data.
         out : array-like, optional
-            The array to store the store the output.
+            Array to use as output buffer.
+            If not passed, a new array will be created.
         **kwargs
             Forwarded to :func:`~empyrical.{name}`.
 
@@ -109,9 +111,9 @@ def binary_vectorized_roll(lhs, rhs, window, out=None, **kwargs):
 
         if allocated_output:
             if out.ndim == 1 and isinstance(lhs, pd.Series):
-                out = pd.Series(out)
+                out = pd.Series(out, index=lhs.index[-len(out):])
             elif out.ndim == 2 and isinstance(lhs, pd.Series):
-                out = pd.DataFrame(out)
+                out = pd.DataFrame(out, index=lhs.index[-len(out):])
         return out
 
     binary_vectorized_roll.__doc__ = binary_vectorized_roll.__doc__.format(
@@ -203,16 +205,16 @@ def cum_returns(returns, starting_value=0, out=None):
     starting_value : float, optional
        The starting returns.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
     cumulative_returns : array-like
         Series of cumulative returns.
     """
-
     if len(returns) < 1:
-        return type(returns)([])
+        return returns.copy()
 
     nanmask = np.isnan(returns)
     if np.any(nanmask):
@@ -233,9 +235,9 @@ def cum_returns(returns, starting_value=0, out=None):
 
     if allocated_output:
         if returns.ndim == 1 and isinstance(returns, pd.Series):
-            out = pd.Series(out)
+            out = pd.Series(out, index=returns.index)
         elif isinstance(returns, pd.DataFrame):
-            out = pd.DataFrame(out)
+            out = pd.DataFrame(out, index=returns.index)
 
     return out
 
@@ -318,7 +320,8 @@ def max_drawdown(returns, out=None):
         Daily returns of the strategy, noncumulative.
         - See full explanation in :func:`~empyrical.stats.cum_returns`.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -471,7 +474,8 @@ def annual_volatility(returns,
         returns into annual returns. Value should be the annual frequency of
         `returns`.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -635,7 +639,8 @@ def sharpe_ratio(returns,
         returns into annual returns. Value should be the annual frequency of
         `returns`.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -713,7 +718,8 @@ def sortino_ratio(returns,
         The downside risk of the given inputs, if known. Will be calculated if
         not provided.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -792,7 +798,8 @@ def downside_risk(returns,
         returns into annual returns. Value should be the annual frequency of
         `returns`.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -857,7 +864,8 @@ def excess_sharpe(returns, factor_returns, out=None):
     factor_returns: float / series
         Benchmark return to compare returns against.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -962,7 +970,8 @@ def alpha_beta(returns,
         returns into annual returns. Value should be the annual frequency of
         `returns`.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -994,7 +1003,8 @@ def roll_alpha_beta(returns, factor_returns, window=10, **kwargs):
     window : int
         Size of the rolling window in terms of the periodicity of the data.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
     **kwargs
         Forwarded to :func:`~empyrical.alpha_beta`.
     """
@@ -1046,7 +1056,8 @@ def alpha_beta_aligned(returns,
         returns into annual returns. Value should be the annual frequency of
         `returns`.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -1114,7 +1125,8 @@ def alpha(returns,
         The beta for the given inputs, if already known. Will be calculated
         internally if not provided.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -1182,7 +1194,8 @@ def alpha_aligned(returns,
         The beta for the given inputs, if already known. Will be calculated
         internally if not provided.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -1241,7 +1254,8 @@ def beta(returns, factor_returns, risk_free=0.0, out=None):
         Constant risk-free return throughout the period. For example, the
         interest rate on a three month us treasury bill.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
@@ -1282,7 +1296,8 @@ def beta_aligned(returns, factor_returns, risk_free=0.0, out=None):
         Constant risk-free return throughout the period. For example, the
         interest rate on a three month us treasury bill.
     out : array-like, optional
-        The array to store the store the output.
+        Array to use as output buffer.
+        If not passed, a new array will be created.
 
     Returns
     -------
diff --git a/empyrical/tests/test_stats.py b/empyrical/tests/test_stats.py
@@ -1,8 +1,8 @@
 from __future__ import division
 
-import random
 from copy import copy
 from operator import attrgetter
+import random
 from unittest import TestCase, SkipTest
 
 from parameterized import parameterized
@@ -13,13 +13,31 @@
 from scipy import stats
 from six import iteritems, wraps
 
+try:
+    from pandas.testing import assert_index_equal
+except ImportError:
+    # This moved in pandas 0.20.
+    from pandas.util.testing import assert_index_equal
+
 import empyrical
 import empyrical.utils as emutils
 
 DECIMAL_PLACES = 8
 
 
-class TestStats(TestCase):
+class BaseTestCase(TestCase):
+    def assert_indexes_match(self, result, expected):
+        """
+        Assert that two pandas objects have the same indices.
+
+        This is a method instead of a free function so that we can override it
+        to be a no-op in suites like TestStatsArrays that unwrap pandas objects
+        into ndarrays.
+        """
+        assert_index_equal(result.index, expected.index)
+
+
+class TestStats(BaseTestCase):
 
     # Simple benchmark, no drawdown
     simple_benchmark = pd.Series(
@@ -157,6 +175,8 @@ def test_cum_returns(self, returns, starting_value, expected):
                 expected[i],
                 4)
 
+        self.assert_indexes_match(cum_returns, returns)
+
     @parameterized.expand([
         (empty_returns, 0, np.nan),
         (one_return, 0, one_return[0]),
@@ -996,6 +1016,8 @@ def test_roll_max_drawdown(self, returns, window, expected):
             np.asarray(expected),
             4)
 
+        self.assert_indexes_match(test, returns[-len(expected):])
+
     @parameterized.expand([
         (empty_returns, 6, []),
         (negative_returns, 6, [-18.09162052, -26.79897486, -26.69138263,
@@ -1009,6 +1031,8 @@ def test_roll_sharpe_ratio(self, returns, window, expected):
             np.asarray(expected),
             DECIMAL_PLACES)
 
+        self.assert_indexes_match(test, returns[-len(expected):])
+
     @parameterized.expand([
         (empty_returns, empty_returns, np.nan),
         (one_return, one_return, 1.),
@@ -1057,6 +1081,7 @@ def test_roll_alpha_beta(self, returns, benchmark, window, expected):
             window,
         )
         if isinstance(test, pd.DataFrame):
+            self.assert_indexes_match(test, benchmark[-len(expected):])
             test = test.values
 
         alpha_test = [t[0] for t in test]
@@ -1114,9 +1139,11 @@ def test_roll_down_capture(self, returns, factor_returns, window,
             np.asarray(expected),
             DECIMAL_PLACES)
 
+        self.assert_indexes_match(test, returns[-len(expected):])
+
     @parameterized.expand([
         (empty_returns, empty_returns, 1, []),
-        (one_return, one_return, 1,  1.),
+        (one_return, one_return, 1,  [1.]),
         (mixed_returns, mixed_returns, 6, [1., 1., 1., 1.]),
         (positive_returns, mixed_returns,
          6, [0.00128406, 0.00291564, 0.00171499, 0.0777048]),
@@ -1132,6 +1159,8 @@ def test_roll_up_capture(self, returns, factor_returns, window, expected):
             np.asarray(expected),
             DECIMAL_PLACES)
 
+        self.assert_indexes_match(test, returns[-len(expected):])
+
     @parameterized.expand([
         (empty_returns, simple_benchmark, (np.nan, np.nan)),
         (one_return, one_return, (np.nan, np.nan)),
@@ -1290,6 +1319,9 @@ class TestStatsArrays(TestStats):
     def empyrical(self):
         return PassArraysEmpyricalProxy(self, (np.ndarray, float))
 
+    def assert_indexes_match(self, result, expected):
+        pass
+
 
 class TestStatsIntIndex(TestStats):
     """
@@ -1308,8 +1340,11 @@ def empyrical(self):
             lambda obj: type(obj)(obj.values, index=np.arange(len(obj))),
         )
 
+    def assert_indexes_match(self, result, expected):
+        pass
+
 
-class TestHelpers(TestCase):
+class TestHelpers(BaseTestCase):
     """
     Tests for helper methods and utils.
     """
@@ -1376,7 +1411,7 @@ def test_roll_max_window(self):
         self.assertTrue(res.size == 0)
 
 
-class Test2DStats(TestCase):
+class Test2DStats(BaseTestCase):
     """
     Tests for functions that are capable of outputting a DataFrame.
     """
@@ -1429,6 +1464,8 @@ def test_cum_returns_df(self, returns, starting_value, expected):
             4,
         )
 
+        self.assert_indexes_match(cum_returns, returns)
+
     @property
     def empyrical(self):
         """
@@ -1455,6 +1492,9 @@ class Test2DStatsArrays(Test2DStats):
     def empyrical(self):
         return PassArraysEmpyricalProxy(self, np.ndarray)
 
+    def assert_indexes_match(self, result, expected):
+        pass
+
 
 class ReturnTypeEmpyricalProxy(object):
     """
diff --git a/empyrical/utils.py b/empyrical/utils.py
@@ -166,10 +166,13 @@ def _roll_ndarray(func, window, *args, **kwargs):
 
 def _roll_pandas(func, window, *args, **kwargs):
     data = {}
+    index_values = []
     for i in range(window, len(args[0]) + 1):
         rets = [s.iloc[i-window:i] for s in args]
-        data[args[0].index[i - 1]] = func(*rets, **kwargs)
-    return pd.Series(data)
+        index_value = args[0].index[i - 1]
+        index_values.append(index_value)
+        data[index_value] = func(*rets, **kwargs)
+    return pd.Series(data, index=type(args[0].index)(index_values))
 
 
 def cache_dir(environ=environ):