# Standard libraries
import pandas as pd
import numpy as np
from scipy import stats
# Visualization
import matplotlib.pyplot as plt
import datetime
import os 
# os.system("pip install seaborn")
import seaborn as sns
pd.set_option('display.max_columns', None)
from sklearn.linear_model import LinearRegression

# Data Collection
# os.system("pip install wrds") 
import wrds
# os.system("pip install pandas-datareader")
import pandas_datareader.data as web


db = wrds.Connection()

Enter your WRDS username [gabri]:gnaval
Enter your password:········
WRDS recommends setting up a .pgpass file.
You can find more info here:
https://www.postgresql.org/docs/9.5/static/libpq-pgpass.html.
Loading library list...
Done


# Audit Analytics Dataset
xls = pd.ExcelFile('../data/audit_analytics.xlsx')
aa_records_df = pd.read_excel(xls, 'PublicCyber')


# Data Cleaning
aa_records_df = aa_records_df[aa_records_df['Ticker'].isna() != True].reset_index(drop=True)
table_columns = ['Company name', 'Ticker', 'Date of Breach', 'Date Became Aware of Breach', 'Date of Disclosure',
                 'Number of Records', 'Type of Info', 'Information', 'Attack', 'Region', 'SIC Code']
aa_records_df = aa_records_df[aa_records_df.columns.intersection(table_columns)]


def nearest(items, pivot):
    """
    Gets closest day in a set (used to obtain stock price X months after disclosure)
    """
    return min(items, key=lambda x: abs((x - pivot).days))


today = datetime.datetime.today().date()

def stock_after_disclosure(row, num_months):
    """
    Returns an array containing the monthly stock price of a firm after date of disclosure (0 - num_months months after breach).
    If firm exists in YahooFinance database, but no stock price available for a month (either b/c that date has yet to occur 
    or b/c simply N/A),
    returns np.nan.
    If firm does not exist in YahooFinance database, return array of np.nan's.
    
    Parameters: 
    row : Dataframe row
        Input dataframe's row (used along with df.apply)
    num_months : int
        Month limit
    """
    start = pd.to_datetime(row['Date of Disclosure']) 
    end = start + pd.DateOffset(months = num_months)
    start -= datetime.timedelta(days=1) 
    try:
        df = web.DataReader(row['Ticker'], 'yahoo', start, end)
        lst = []
        for month in range(0, num_months + 1):
            if today <= (start + pd.DateOffset(months = month)).date():
                for x in range(month, num_months + 1):
                    lst.append(np.nan)
                break
            date = nearest(df.index, (start + pd.DateOffset(months = month)))
            lst.append(df.loc[date]["Close"])
        return lst
    except Exception as e:
        print("Error at %s" % row['Ticker'])
        print(repr(e))
        return [np.nan] * (num_months + 1)


lst = []
months_after = 12 #Toggle this value
col = []
# Set the column names for new dataframe
for i in range(0, months_after + 1):
    col.append("Stock Price (%s months DoD)" % i)

# Create array of arrays that contains stock prices after date of disclosure for each breach
for index, row in aa_records_df.iterrows():
    x = stock_after_disclosure(row, months_after)
    lst.append(x)


# Merge stock price after breach with original dataframe
stock_prices = pd.DataFrame(lst, columns = col)
stock_price_aa_records = pd.concat([aa_records_df, stock_prices], axis=1, join='inner')


def analyst_stock_price(row):
    """
    Returns the median and mean of analyst stock price forecasts for a firm, where the forecasts are within a month after the beach. 
    These forecasts predict the stock price 12 months into the future.
    
    Parameters
        row - Dataframe row
        Input dataframe's row (used along with df.apply)
    Returns
        List of length 2. [median, mean]
    """
    date = pd.to_datetime(row['Date of Disclosure'])
    
    sql_query="""
    SELECT VALUE as stock_price
    FROM ibes.ptgdet
    WHERE OFTIC ='{}' AND CAST(HORIZON as int) = 12 AND ANNDATS BETWEEN '{}' and '{}'
    """.format(row['Ticker'], date, date + pd.DateOffset(months = 1))
    
    df = db.raw_sql(sql_query)
    
    if len(df.index) == 0:
        return [np.nan] * 2
    return [df['stock_price'].median(), df['stock_price'].mean()]


# Create array of arrays that contains stock prices after date of disclosure for each breach
lst = []
for index, row in stock_price_aa_records.iterrows():
    lst.append(analyst_stock_price(row))
    
# Merge stock price after breach with original dataframe
median_mean_df = pd.DataFrame(lst, columns = ['median stock forecast', 'mean stock forecast'])
stock_price_aa_records = pd.concat([stock_price_aa_records, median_mean_df], axis=1, join='inner')


stock_prices = pd.DataFrame()
n = 1 
for x in col[1:]:
    stock_prices[n] = stock_price_aa_records.apply(lambda row: (row[x] - row[col[0]])/row[col[0]], axis = 1)
    n += 1
    
t1 = stock_price_aa_records.apply(lambda row: (row['median stock forecast']-row['Stock Price (0 months DoD)'])/row['Stock Price (0 months DoD)'], axis=1)
t2 = stock_price_aa_records.apply(lambda row: (row['mean stock forecast']-row['Stock Price (0 months DoD)'])/row['Stock Price (0 months DoD)'], axis=1)

analysts_percent = pd.concat([stock_prices[12], pd.DataFrame(t1), pd.DataFrame(t2)], axis=1, join='inner')
analysts_percent.columns = ['Actual', 'Analyst Median', 'Analyst Mean']


boxplot = sns.boxplot(x="variable", y = "value", data=pd.melt(stock_prices).dropna())
boxplot.set(xlabel="Months after Disclosure", ylabel='Percent Stock Price Change') 
boxplot.set_title("Percent Change of Actual Stock Price (Box Plot)")
plt.show()

ax = sns.violinplot(x='variable', y='value', data=pd.melt(stock_prices).dropna())

plt.xlabel('Months after Disclosure')
plt.ylabel('Percent Stock Price Change')
ax.set_title("Percent Change of Actual Stock Price (Violin Plot)")
ax.plot()
plt.show()


plt.plot(np.arange(1, 13), stock_prices.mean())
plt.xlabel('Months after Disclosure')
plt.ylabel('Percent Stock Price Change')
plt.title("Percent Change of Actual Stock Price (Mean)")
plt.show()


plt.plot(np.arange(1, 13), stock_prices.median())
plt.xlabel('Months after Disclosure')
plt.ylabel('Percent Stock Price Change')
plt.title("Percent Change of Actual Stock Price (Median)")
plt.show()


# Let's take 5% trimmed mean (get rid of some outliers)
trimmed_means = []
for x in range(1, months_after + 1):
    trimmed_means.append(stats.trim_mean(stock_prices[x].dropna(), 0.05))
plt.plot(np.arange(1, 13), trimmed_means)
plt.xlabel('Months after Disclosure')
plt.ylabel('Percent Stock Price Change')
plt.title("Percent Change of Actual Stock Price (5% Trimmed Mean)")
plt.show()


violinplot = sns.violinplot(x="variable", y = "value", data=pd.melt(analysts_percent[['Actual', 'Analyst Median', 'Analyst Mean']]).dropna())
violinplot.set(xlabel="Prediction Type", ylabel='Stock Price Percent Change')
violinplot.set_title("Violin Plot of Stock Price Percent Changes")
plt.show()


z_scores = stats.zscore(analysts_percent['Analyst Median'].dropna())

abs_z_scores = np.abs(z_scores)
medians_no_outliers = analysts_percent['Analyst Median'].dropna()[abs_z_scores < 3]

z_scores = stats.zscore(analysts_percent['Analyst Mean'].dropna())

abs_z_scores = np.abs(z_scores)
means_no_outliers = analysts_percent['Analyst Mean'].dropna()[abs_z_scores < 3]


violinplot = sns.violinplot(x="variable", y = "value", data=pd.melt(pd.concat([analysts_percent['Actual'],medians_no_outliers, means_no_outliers], axis=1)))
violinplot.set(xlabel="Prediction Type", ylabel='Stock Price Percent Change') 
violinplot.set_title("Violin Plot of Stock Price Percents (Analyst Outliers Removed)")
plt.show()


analysts_percent['Median Residual'] = analysts_percent.apply(lambda row: row['Actual'] - row["Analyst Median"], axis =1)
analysts_percent['Mean Residual'] = analysts_percent.apply(lambda row: row['Actual'] - row["Analyst Mean"], axis =1)

violinplot = sns.violinplot(x="variable", y = "value", data=pd.melt(analysts_percent[['Median Residual', 'Mean Residual']]))
violinplot.set(xlabel="Prediction Type", ylabel='Stock Price Percent Change') 
violinplot.set_title("Violin Plot of Stock Price Percent Residuals")
plt.show()


print(analysts_percent['Median Residual'].describe())
print()
print(analysts_percent['Mean Residual'].describe())

count    476.000000
mean      -1.758415
std       17.179386
min     -231.820000
25%       -0.278717
50%       -0.033483
75%        0.189021
max        4.721649
Name: Median Residual, dtype: float64

count    476.000000
mean      -1.703157
std       16.497254
min     -208.660000
25%       -0.285764
50%       -0.028402
75%        0.193967
max        4.721649
Name: Mean Residual, dtype: float64


z_scores = stats.zscore(analysts_percent['Median Residual'].dropna())

abs_z_scores = np.abs(z_scores)
no_outliers_median = analysts_percent['Median Residual'].dropna()[abs_z_scores < 3]

z_scores = stats.zscore(analysts_percent['Mean Residual'].dropna())

abs_z_scores = np.abs(z_scores)
no_outliers_mean = analysts_percent['Mean Residual'].dropna()[abs_z_scores < 3]

violinplot = sns.violinplot(x="variable", y = "value", data=pd.melt(pd.concat([no_outliers_mean, no_outliers_median], axis=1)))
violinplot.set(xlabel="Prediction Type", ylabel='Stock Price Percent Change') 
violinplot.set_title("Violin Plot of Stock Price Percent Residuals (No Outliers)")
plt.show()


print("Median residual p-value: %s" % (stats.ttest_1samp(a=no_outliers_median, popmean=0).pvalue/2))
print("Mean residual p-value: %s" % (stats.ttest_1samp(a=no_outliers_mean, popmean=0).pvalue/2))

Median residual p-value: 0.0013188772935642584
Mean residual p-value: 0.0005504523294492413


print("Median residual p-value: %s" % (stats.ttest_1samp(a=analysts_percent['Median Residual'].dropna(), popmean=0).pvalue/2))
print("Mean residual p-value: %s" % (stats.ttest_1samp(analysts_percent['Mean Residual'].dropna(), popmean=0).pvalue/2))

Median residual p-value: 0.013002146782097813
Mean residual p-value: 0.012376210232805244


# Standard libraries
import pandas as pd
import numpy as np
from scipy import stats
# Visualization
import matplotlib.pyplot as plt
import datetime
import os 

# os.system("pip install wrds") # TODO: Probably put this in utils.py
import wrds
# os.system("pip install pandas-datareader")
import pandas_datareader.data as web
import yfinance as yf
# os.system("pip install seaborn")
import seaborn as sns
import tqdm
pd.set_option('display.max_columns', None)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from pandas.tseries.offsets import BDay
from datetime import date

from typing import Tuple, List
# Note we don't actually need pandas_datareader. Could import/use YahooFinanceAPI which gives same info


df = pd.read_csv("../data/stock_indicators.csv")


date_cols= ["Date Became Aware of Breach", "Date of Disclosure"]
df[date_cols] = df[date_cols].apply(lambda x: pd.to_datetime(x, errors="coerce"))


df = df.dropna(subset=['Ticker'])


print("Date of Disclosure NAs: ", df["Date of Disclosure"].isna().sum())
print("Ticker NAs: ", df["Ticker"].isna().sum())

Date of Disclosure NAs:  0
Ticker NAs:  0


# Fix Ticker substocks
df["Ticker"] = df["Ticker"].apply(lambda x: x.replace(".", "-"))


df.head(50)


max_normal_range = 1000
max_event_range = 10

earliest_breach_date = min(df["Date of Disclosure"])
latest_breach_date = max(df["Date of Disclosure"])
sp_start = earliest_breach_date - pd.to_timedelta(max_normal_range, unit="D")
sp_end = date.today()


sp500 = yf.download("^GSPC", start=sp_start, end=sp_end)

[*********************100%***********************]  1 of 1 completed


sp500.head()


sns.lineplot(data=sp500, x="Date", y="Adj Close")

<AxesSubplot:xlabel='Date', ylabel='Adj Close'>


from contextlib import contextmanager
import sys, os

# This context is used to suppress output by redirecting it to null
# Some of the libraries used output verbose logs
@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout


def window_date(date: datetime.datetime, window=(0, 0), business=True):
    if business:
        return (
            date + BDay(window[0]), 
            date + BDay(window[1])
        )
    return (
        date + pd.to_timedelta(window[0], unit="D"), 
        date + pd.to_timedelta(window[1], unit="D")
    )

def date_range(df, window_dates: Tuple[datetime.datetime, datetime.datetime]):
    return df.loc[(df.index >= window_dates[0]) & (df.index <= window_dates[1])]

def market_model(stock, sp500, window_date):
    # Normal Model Estimation
    window_sp500 = date_range(sp500, window_date)
    window_stock = date_range(stock, window_date)
    
    # pct_change - first entry is always nan
    sp500_returns = window_sp500["Adj Close"].pct_change()[1:].values.reshape(-1, 1) # "X"
    stock_returns = window_stock["Adj Close"].pct_change()[1:].values.reshape(-1, 1) # "Y"
    
    joined = pd.DataFrame({
        "sp500": window_sp500["Adj Close"], 
        "stock": window_stock["Adj Close"], 
    })
    if joined.isnull().values.any():
        print("Missing values for regression")
        
    joined = joined.dropna()
    
    if len(joined) == 0:
        raise ValueError("No stock values available. Ticker not available?")
        
    joined["sp500_returns"] = joined["sp500"].pct_change()
    joined["stock_returns"] = joined["stock"].pct_change()
    
    sp500_returns = joined["sp500_returns"].values[1:].reshape(-1, 1)
    stock_returns = joined["stock_returns"].values[1:].reshape(-1, 1)
    # Join the returns
    reg = LinearRegression()
    try:
        reg.fit(
            sp500_returns, 
            stock_returns
        )
        (alpha, beta) = reg.intercept_.item(), reg.coef_.item()
    except Exception as e:
#         print("<=========================== [Regression Error] ===========================>")
#         print(sp500_returns.shape)
#         print(stock_returns.shape)
#         print(joined.head())
#         print("<=========================== [Regression Error] ===========================>")
        raise ValueError("Model failed to fit")
    
    stock_returns_pred = reg.predict(sp500_returns)
    
    mse = mean_squared_error(stock_returns, stock_returns_pred)
    r2 = r2_score(stock_returns, stock_returns_pred)
    
    # Reformatted
    sp_ret = sp500_returns.ravel()
    st_ret = stock_returns.ravel()
    sp_ret = np.insert(sp_ret, 0, np.nan)
    st_ret = np.insert(st_ret, 0, np.nan)

    dat = pd.DataFrame({
        "sp500": window_sp500["Adj Close"], 
        "window_stock": window_stock["Adj Close"], 
        "sp500_returns": sp_ret,
        "stock_returns": st_ret,
    }, index=window_sp500["Adj Close"].index)
    
    return {
        "alpha": alpha,
        "beta": beta,
        "mse": mse,
        "r2": r2,
        "data": dat
    }


saves = {}

def normal_return_model(row, est_window=(-200, -7), event_window=(-7, 7), windows = [], coerce=True, debug=True):
    if est_window[0] > est_window[1] or event_window[0] > event_window[1]:
        raise ValueError("Invalid Window: Windows must be in form (a, b), a < b")
    if est_window[1] > event_window[0]:
        raise ValueError("Overlapping windows!")
        
    ticker = row["Ticker"]
    disclosure_date = row["Date of Disclosure"]
    
    # Convert windows to date windows
    est_window_date = window_date(disclosure_date, est_window)
    event_window_date = window_date(disclosure_date, event_window)
    
    start_date = est_window_date[0]
    end_date = event_window_date[1]
    
    # Ensure dates are valid
    today_date = datetime.date.today()
    if end_date > today_date:
        if coerce:
            end_date = today_date
        else:
            raise ValueError(
                f"Ending interval of {end_date.strftime('%Y-%m-%d')} is later than today {today_date.strftime('%Y-%m-%d')}"
            )
    
    if debug:
        print(f"Ticker: {row['Ticker']}")
        print(f"Date of Disclosure: {row['Date of Disclosure'].strftime('%Y-%m-%d')}")
        print(f"estimation window: {est_window_date}")
        print(f"event window: {event_window_date}")
    # Download
    
    try:
        with suppress_stdout():
            raw_prices = yf.download(ticker, start=start_date, end=end_date)
            
            
            raw_prices.dropna(axis = 0, how = 'all', inplace = True)
            if len(raw_prices) == 0:
                raise ValueError("Download Failed.")
    except Exception as e:
        raise ValueError('Download Failed.')
        
    # Linear Regression
    try:
        results = market_model(raw_prices, sp500, est_window_date)
        saves["results"] = results
        alpha, beta = results["alpha"], results["beta"]
    except Exception as e:
        raise ValueError('Model fit failed.')
        
    
    # Draw
    if debug:
        model_df = results["data"]
        saves["model_df"] = model_df
    
    # Event interval
    event_df = pd.DataFrame()
    event_df["actual"] = date_range(raw_prices, event_window_date)["Adj Close"].pct_change()[1:]
    event_df["sp500"] = date_range(sp500, event_window_date)["Adj Close"].pct_change()[1:]
    event_df["normal"] = alpha + beta * event_df["sp500"]
    event_df["abnormal"] = event_df["actual"] - event_df["normal"]
    
    # Compute CARs
    
    CAR = {}
    for w in windows:
        dates = window_date(disclosure_date, w)
        CAR[w] = date_range(event_df["abnormal"], dates).sum()
    return {
        "market_model": results,
        "event_df": event_df,
        "CAR": CAR,
    }

normal_return_model(df.iloc[0], windows = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)], debug=False)

{'market_model': {'alpha': -0.001284162345006583,
  'beta': 0.7296611253742332,
  'mse': 0.0002460786922381916,
  'r2': 0.2188656264703921,
  'data':                   sp500  window_stock  sp500_returns  stock_returns
  Date                                                               
  2020-05-21  2948.510010    392.742981            NaN            NaN
  2020-05-22  2955.449951    391.908356       0.002354      -0.002125
  2020-05-26  2991.770020    394.551361       0.012289       0.006744
  2020-05-27  3036.129883    396.786987       0.014827       0.005666
  2020-05-28  3029.729980    401.228424      -0.002108       0.011194
  ...                 ...           ...            ...            ...
  2021-02-09  3911.229980    381.960236      -0.001114       0.011233
  2021-02-10  3909.879883    377.377808      -0.000345      -0.011997
  2021-02-11  3916.379883    380.562561       0.001662       0.008439
  2021-02-12  3934.830078    380.013458       0.004711      -0.001443
  2021-02-16  3932.590088    374.532471      -0.000569      -0.014423
  
  [186 rows x 4 columns]},
 'event_df':               actual     sp500    normal  abnormal
 Date                                              
 2021-02-17  0.006318 -0.000320 -0.001518  0.007835
 2021-02-18  0.002146 -0.004416 -0.004506  0.006652
 2021-02-19 -0.003753 -0.001855 -0.002638 -0.001116
 2021-02-22  0.007906 -0.007733 -0.006927  0.014833
 2021-02-23 -0.004212  0.001256 -0.000367 -0.003844
 2021-02-24  0.008221  0.011352  0.006999  0.001222
 2021-02-25 -0.002124 -0.024479 -0.019145  0.017022
 2021-02-26 -0.002470 -0.004750 -0.004750  0.002280
 2021-03-01  0.013881  0.023791  0.016075 -0.002194
 2021-03-02  0.003585 -0.008081 -0.007180  0.010766
 2021-03-03 -0.009552 -0.013066 -0.010818  0.001266
 2021-03-04  0.015551 -0.013417 -0.011074  0.026625
 2021-03-05  0.027872  0.019496  0.012941  0.014931,
 'CAR': {(0, 1): 0.019302050313805285,
  (0, 2): 0.01710821476068645,
  (0, 3): 0.027873788717697093,
  (0, 4): 0.029139694771063075,
  (0, 5): 0.05576498665816655}}


import traceback

CARs = []
windows = [(0, i) for i in range(5)]
windows.extend([(-i, 0) for i in range(5)])
windows.extend([(-i, i) for i in range(5)])
print(windows)
arr = []
raw_results = []
for i, row in tqdm.tqdm(df.iterrows(), total=len(df)):
    try:
        result = normal_return_model(row, windows = windows, debug=False)
        car = result['CAR']
        obj = {
            'alpha': result['market_model']['alpha'],
            'beta': result['market_model']['beta']
        }
        obj.update({f'CAR{k}': v for k, v in car.items()})
        arr.append(obj)
        raw_results.append(result)
    except Exception as e:
#         print("<=============== Error ===============>")
#         print(f"Row #: {i}")
#         print(f"Ticker: {row['Ticker']}")
#         print("<================ End ================>")
#         traceback.print_exc()
        arr.append({})
        raw_results.append({})

assert len(arr) == len(df)

  0%|          | 0/737 [00:00<?, ?it/s]

[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 0), (-1, 0), (-2, 0), (-3, 0), (-4, 0), (0, 0), (-1, 1), (-2, 2), (-3, 3), (-4, 4)]

  2%|▏         | 18/737 [00:03<02:30,  4.78it/s]

Missing values for regression

 11%|█         | 79/737 [00:17<02:40,  4.10it/s]

Missing values for regression

 15%|█▌        | 114/737 [00:26<01:59,  5.20it/s]

Missing values for regression

 53%|█████▎    | 389/737 [02:09<08:56,  1.54s/it]

Missing values for regression


data_df = pd.DataFrame(arr)
data_df.head(50)
data_df.to_csv("../data/car_window_multi.csv")


import pickle
pkl_path = "../data/raw_results.pkl"

pickle.dump(raw_results, open(pkl_path , "wb"))


import pickle
pkl_path = "../data/raw_results.pkl"
raw_results = pickle.load(open(pkl_path , "rb"))


data_df = pd.read_csv("../data/stock_indicators.csv")


df = pd.read_csv("../data/car_window_multi.csv")


df = pd.concat((df, data_df), axis=1)
df.head()


# Find indexes by company name (Equifax)
# df.index[df['Ticker'] == 'EFX']
np.where(df['Ticker'] == 'SWI')

# Equifax hack: 409
# Solarwinds: 43

(array([43]),)


raw_results[0].keys()

dict_keys(['market_model', 'event_df', 'CAR'])


result_df.head()


df.iloc[43].to_dict()

{'Unnamed: 0': 43,
 'alpha': 0.0004655641252782,
 'beta': 1.0417132941855325,
 'CAR(0, 0)': -0.1628033843666006,
 'CAR(0, 1)': -0.2562399554711582,
 'CAR(0, 2)': -0.2602133259154164,
 'CAR(0, 3)': -0.2905258718800476,
 'CAR(0, 4)': -0.4816520391182123,
 'CAR(-1, 0)': -0.1589699362794033,
 'CAR(-2, 0)': -0.1495057703652739,
 'CAR(-3, 0)': -0.1594120671368935,
 'CAR(-4, 0)': -0.150825417665077,
 'CAR(-1, 1)': -0.2524065073839608,
 'CAR(-2, 2)': -0.2469157119140897,
 'CAR(-3, 3)': -0.2871345546503404,
 'CAR(-4, 4)': -0.4696740724166887,
 'Company name': 'SolarWinds Corp',
 'Ticker': 'SWI',
 'Date of Breach': '3/2020 - 6/2020',
 'Date Became Aware of Breach': nan,
 'Date of Disclosure': '2020-12-14',
 'Number of Records': '18000',
 'Information': 'ND',
 'Stock Price (0 months DoD)': 19.6200008392334,
 'Stock Price (1 months DoD)': 14.640000343322754,
 'Stock Price (2 months DoD)': 17.040000915527344,
 'Stock Price (3 months DoD)': 16.81999969482422,
 'Stock Price (4 months DoD)': 18.049999237060547,
 'Stock Price (5 months DoD)': nan,
 'Stock Price (6 months DoD)': nan,
 'Stock Price (7 months DoD)': nan,
 'Stock Price (8 months DoD)': nan,
 'Stock Price (9 months DoD)': nan,
 'Stock Price (10 months DoD)': nan,
 'Stock Price (11 months DoD)': nan,
 'Stock Price (12 months DoD)': nan,
 'median stock forecast': nan,
 'mean stock forecast': nan,
 'count stock forecast': 0,
 'Financial (Type of Info)': 0,
 'Intrusion (Type of Info)': 0,
 'ND (Type of Info)': 1,
 'Other (Type of Info)': 0,
 'Personal (Type of Info)': 0,
 'Exploit (Attack)': 0,
 'Forged Cookies (Attack)': 0,
 'Malware (Attack)': 1,
 'Misconfiguration (Attack)': 0,
 'ND (Attack)': 0,
 'Phishing (Attack)': 0,
 'Ransomware (Attack)': 0,
 'SQLi (Attack)': 0,
 'Spoofing (Attack)': 0,
 'Unauthorized Access (Attack)': 0,
 'Virus (Attack)': 0,
 'rootkit (Attack)': 0,
 'Construction (Industry)': 0,
 'Finance, Insurance, Real Estate (Industry)': 0,
 'Manufacturing (Industry)': 0,
 'Mining (Industry)': 0,
 'N/A (Industry)': 0,
 'Retail Trade (Industry)': 0,
 'Services (Industry)': 1,
 'Transportation & Public Utilities (Industry)': 0,
 'Wholesale Trade (Industry)': 0,
 'Canada (Region)': 0,
 'Foreign (Region)': 0,
 'US Mid Atlantic (Region)': 0,
 'US Midwest (Region)': 0,
 'US New England (Region)': 0,
 'US Southeast (Region)': 0,
 'US Southwest (Region)': 1,
 'US West (Region)': 0}


index = 43
# Need to match raw_results with table???
market_model_data = raw_results[index]["market_model"]
model_df = market_model_data["data"]
alpha = market_model_data["alpha"]
beta = market_model_data["beta"]

company_name = df.iloc[index]["Company name"]
company_ticker = df.iloc[index]["Ticker"]
breach_date = df.iloc[index]["Date of Disclosure"]
event_df = raw_results[index]["event_df"]

print(f"i:{index} > {company_name} ({company_ticker}), breach date: {breach_date.date()}")
print(f"Market Model: Y = {alpha:.3f} + {beta:.3f} * X")
model_df.head()

i:43 > SolarWinds Corp (SWI), breach date: 2020-12-14
Market Model: Y = 0.000 + 1.042 * X


estimation_return = pd.melt(model_df[["sp500_returns", "stock_returns"]], ignore_index=False)
melted.head()

# filtered.tail(100)
plt.figure(figsize=(8,5))
ax = sns.lineplot(data=estimation_return, x=estimation_return.index, y="value", hue="variable")
ax.set_title(f"{company_name} Adjusted Stock Return vs. S&P500 Returns (Estimation Period)")
ax.set_ylabel("Returns")
plt.xticks(rotation=45)
ax.legend(loc=1)
plt.show()


sp500_returns = model_df["sp500_returns"]
stock_returns = model_df["stock_returns"]

plt.figure(figsize=(8,5))
# Display regression line
X = sp500_returns.values
Y = alpha + beta * X
ax = sns.lineplot(x=X, y=Y, label=f'Market Model: Y = {alpha:.3f} + {beta:.3f} * X')
# ----------------------------
sns.scatterplot(data=model_df, x="sp500_returns", y="stock_returns", color="black", alpha=0.5)

ax.legend()
ax.set_title(f"{company_name} Regression Model: Adjusted Stock Return vs. S&P500 Returns")
ax.set_ylabel("Stock Returns")
ax.set_xlabel("S&P 500 Market Returns")

Text(0.5, 0, 'S&P 500 Market Returns')


diff = (pd.Series(event_df.index - breach_date, index = event_df.index))
event_df["day_diff"] = diff
event_df.head()


plt.figure(figsize=(8,5))
event_return = pd.melt(event_df[['sp500', 'actual']], ignore_index=False)

<Figure size 576x360 with 0 Axes>


ax = sns.lineplot(data=event_return, x=event_return.index, y="value", hue="variable")
ax.set_title(f"{company_name} Adjusted Stock Return vs. S&P500 Returns (Event Period)")
ax.set_ylabel("Returns")
plt.xticks(rotation=45)
ax.legend(loc=1)
plt.show()


f = pd.concat([estimation_return, event_return], axis=0)

plt.figure(figsize=(12, 8))
ax = sns.lineplot(data=f, x=f.index, y="value", hue="variable")
ax.set_title(f"{company_name} Adjusted Stock Return vs. S&P500 Returns (Full Period)")
ax.set_ylabel("Returns")
plt.xticks(rotation=45)
ax.legend(loc=3)
plt.show()


car_data = date_range(event_df, window_date(breach_date, CAR_range)) # Get the range of abnormal returns of interest
car_data.head()


# Plotted with abnormal returns

plt.figure(figsize=(10,5))
ax = sns.lineplot(x=X, y=Y, label=f'Market Model: Y = {alpha:.3f} + {beta:.3f} * X')
sns.scatterplot(data=model_df, x="sp500_returns", y="stock_returns", color="black", alpha=0.2, label="Actual Returns [Estimation]")
sns.scatterplot(data=car_data, x="sp500", y="actual", color="red", alpha=0.7, label=f"Actual Returns [Breach ({breach_date.date()})]")
ax.legend(loc=3)
ax.set_title(f"{company_name}: Actual Returns vs. S&P500 Returns")
ax.set_ylabel("Stock Returns")
ax.set_xlabel("S&P 500 Market Returns")


for line in range(0,car_data.shape[0]):
     ax.text(car_data["sp500"][line] + 0.0005, car_data["actual"][line], 
     f"+{car_data['day_diff'][line].days} days", horizontalalignment='left', 
     size='medium', color='black')
        
plt.show()


data_df.describe()
# sns.histplot(data=data_df, x="CAR(0, 3)")


data_df.head()
data_df.columns

Index(['alpha', 'beta', 'CAR(0, 0)', 'CAR(0, 1)', 'CAR(0, 2)', 'CAR(0, 3)',
       'CAR(0, 4)', 'CAR(-1, 0)', 'CAR(-2, 0)', 'CAR(-3, 0)', 'CAR(-4, 0)',
       'CAR(-1, 1)', 'CAR(-2, 2)', 'CAR(-3, 3)', 'CAR(-4, 4)'],
      dtype='object')


post_car_columns = ['CAR(0, 0)', 'CAR(0, 1)', 'CAR(0, 2)', 'CAR(0, 3)', 'CAR(0, 4)']
pre_car_columns = ['CAR(-1, 0)', 'CAR(-2, 0)', 'CAR(-3, 0)', 'CAR(-4, 0)']
wide_car_columns = ['CAR(-1, 1)', 'CAR(-2, 2)', 'CAR(-3, 3)', 'CAR(-4, 4)']
post_car = data_df[post_car_columns]
pre_car = data_df[pre_car_columns]
wide_car = data_df[wide_car_columns]


plt.figure(figsize=(6,5))

A = 2
B = 2
fig, axs = plt.subplots(A, B, figsize=(6 * A, 5 * B))
fig.tight_layout()
plt.subplots_adjust(hspace=0.2)
for ax, col in zip(axs.flat, post_car_columns):
    ax = sns.histplot(data_df[col], ax=ax)
    ax.set_title(col)
    ax.set_xlabel("Cumulative Average Return")
    ax.set_ylabel("Frequency")
plt.show()

<Figure size 432x360 with 0 Axes>


import scipy
f = pd.DataFrame({name: scipy.stats.ttest_1samp(data.dropna().values, 0) for (name, data) in post_car.iteritems()})
f.insert(0, 'Description', ["t-statistic", "p-value"])
print(f.to_latex())

\begin{tabular}{llrrrrr}
\toprule
{} &  Description &  CAR(0, 0) &  CAR(0, 1) &  CAR(0, 2) &  CAR(0, 3) &  CAR(0, 4) \\
\midrule
0 &  t-statistic &  -2.496676 &  -2.858065 &  -2.285445 &  -2.065081 &  -1.743648 \\
1 &      p-value &   0.012765 &   0.004389 &   0.022585 &   0.039283 &   0.081659 \\
\bottomrule
\end{tabular}


{name: scipy.stats.ttest_1samp(data.dropna().values, 0) for (name, data) in post_car.iteritems()}

{'CAR(0, 0)': Ttest_1sampResult(statistic=-2.4966763846105864, pvalue=0.012765110304080349),
 'CAR(0, 1)': Ttest_1sampResult(statistic=-2.8580654163502506, pvalue=0.004388953890536081),
 'CAR(0, 2)': Ttest_1sampResult(statistic=-2.285444653004651, pvalue=0.0225849026795037),
 'CAR(0, 3)': Ttest_1sampResult(statistic=-2.0650810416001937, pvalue=0.03928298728076561),
 'CAR(0, 4)': Ttest_1sampResult(statistic=-1.7436480736939275, pvalue=0.08165947977566047)}

	Open	High	Low	Close	Adj Close	Volume
Date
2002-06-12	1013.260010	1021.849976	1002.580017	1020.260010	1020.260010	1795720000
2002-06-13	1020.260010	1023.469971	1008.119995	1009.559998	1009.559998	1405500000
2002-06-14	1009.559998	1009.559998	981.630005	1007.270020	1007.270020	1549000000
2002-06-17	1007.270020	1036.170044	1007.270020	1036.170044	1036.170044	1236600000
2002-06-18	1036.170044	1040.829956	1030.920044	1037.140015	1037.140015	1193100000

	sp500	window_stock	sp500_returns	stock_returns
Date
2020-03-09	2746.560059	16.139999	NaN	NaN
2020-03-10	2882.229980	16.799999	0.049396	0.040892
2020-03-11	2741.379883	16.709999	-0.048868	-0.005357
2020-03-12	2480.639893	14.650000	-0.095113	-0.123279
2020-03-13	2711.020020	15.840000	0.092871	0.081229

	actual	sp500	normal	abnormal	day_diff
Date
2020-12-04	0.022477	0.008836	0.009670	0.012806	-10 days
2020-12-07	0.009483	-0.001936	-0.001551	0.011034	-7 days
2020-12-08	0.011956	0.002787	0.003369	0.008587	-6 days
2020-12-09	-0.017722	-0.007949	-0.007815	-0.009906	-5 days
2020-12-10	0.008591	-0.001285	-0.000873	0.009464	-4 days

	actual	sp500	normal	abnormal	day_diff
Date
2020-12-14	-0.166879	-0.004359	-0.004076	-0.162803	0 days
2020-12-15	-0.079511	0.012921	0.013926	-0.093437	1 days
2020-12-16	-0.001661	0.001773	0.002312	-0.003973	2 days
2020-12-17	-0.023849	0.005758	0.006463	-0.030313	3 days
2020-12-18	-0.194318	-0.003511	-0.003192	-0.191126	4 days

	alpha	beta	CAR(0, 0)	CAR(0, 1)	CAR(0, 2)	CAR(0, 3)	CAR(0, 4)	CAR(-1, 0)	CAR(-2, 0)	CAR(-3, 0)	CAR(-4, 0)	CAR(-1, 1)	CAR(-2, 2)	CAR(-3, 3)	CAR(-4, 4)
count	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000	701.000000
mean	0.000053	1.023100	-0.002988	-0.005523	-0.005461	-0.005951	-0.005077	-0.003543	-0.005016	-0.006480	-0.005464	-0.006079	-0.007490	-0.009443	-0.007553
std	0.002481	0.406109	0.031683	0.051165	0.063268	0.076296	0.077086	0.051530	0.057690	0.067630	0.071852	0.067590	0.083621	0.100194	0.102354
min	-0.015719	-0.675626	-0.283234	-0.500287	-0.612629	-0.981688	-0.969539	-0.339825	-0.467816	-0.676899	-0.744292	-0.689882	-0.806871	-1.149207	-0.924367
25%	-0.000688	0.792516	-0.009730	-0.015455	-0.017480	-0.020186	-0.021391	-0.013725	-0.018967	-0.021282	-0.022933	-0.019143	-0.025747	-0.029474	-0.031421
50%	0.000077	1.012842	-0.001301	-0.001397	-0.001998	-0.001914	-0.000498	-0.001707	-0.002375	-0.002215	-0.003452	-0.001759	-0.002515	-0.003207	-0.004596
75%	0.000810	1.245130	0.006023	0.009438	0.013467	0.015015	0.019075	0.010556	0.010965	0.015660	0.016742	0.012224	0.016889	0.021759	0.027082
max	0.045698	2.803848	0.178117	0.315104	0.568382	0.786172	0.602221	0.833082	0.864915	0.914935	0.908253	0.744021	0.883055	0.933075	0.859731

Event Study: Cybersecurity Breaches Effects on Stock Price

Introduction

Some Background

What are we doing and why?

How will we do this?

Set Up

Data Collection

Data Transformation and Management

Exploratory Data Analysis

Hypothesis Testing

Aside: What if we hadn't removed the outliers for the residuals?

Event-Study

Conclusion

TODO FIX THIS¶

Introduction¶

Data Loading¶

Market Model¶

Stock price retrieval¶

Reloading the data¶

Individual Company Analysis¶

General Company Data¶

Market Model Return History (Estimation)¶

Event Analysis¶

CAR Plots¶

	Unnamed: 0	Company name	Ticker	Date of Breach	Date Became Aware of Breach	Date of Disclosure	Number of Records	Information	Stock Price (0 months DoD)	Stock Price (1 months DoD)	Stock Price (2 months DoD)	Stock Price (3 months DoD)	Stock Price (4 months DoD)	Stock Price (5 months DoD)	Stock Price (6 months DoD)	Stock Price (7 months DoD)	Stock Price (8 months DoD)	Stock Price (9 months DoD)	Stock Price (10 months DoD)	Stock Price (11 months DoD)	Stock Price (12 months DoD)	median stock forecast	mean stock forecast	Financial (Type of Info)	ND (Type of Info)	Other (Type of Info)	Personal (Type of Info)	Malware (Attack)	Misconfiguration (Attack)	ND (Attack)	Phishing (Attack)	Ransomware (Attack)	Unauthorized Access (Attack)	Finance, Insurance, Real Estate (Industry)	Manufacturing (Industry)	Retail Trade (Industry)	Services (Industry)	Transportation & Public Utilities (Industry)	Wholesale Trade (Industry)	Canada (Region)	Foreign (Region)	US Mid Atlantic (Region)	US Midwest (Region)	US New England (Region)	US Southeast (Region)	US Southwest (Region)	US West (Region)
0	0	HUMANA INC	HUM	10/12/2020 - 12/16/2020	2020-12-22	2021-02-25	NaN	SSN \| Name \| DoB \| Address \| Phone Number \| Em...	381.399994	414.660004	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	0	1	1	0	0	0	0	0	0	0	1	0	0	0	0	0
1	1	HERBALIFE NUTRITION LTD.	HLF	2020-10-01 00:00:00	NaT	2021-02-23	NaN	Name \| Phone Number \| Address \| DOB \| Membersh...	45.279999	46.299999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	1	0	0	0	0	0	1	0	0	0	0	0	1	0	1	0	0	0	0	0	0
2	2	Ultra Clean Holdings, Inc.	UCTT	NaN	NaT	2021-02-23	NaN	SSN \| Driver's License	48.369999	55.090000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	1	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
3	3	KROGER CO	KR	NaN	2021-01-23	2021-02-19	NaN	Name \| Email \| Phone Number \| Address \| DOB \| ...	33.840000	34.689999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	1	0	0	0	0
4	4	CINTAS CORP	CTAS	9/16/2020 - 9/20/2020	2020-09-20	2021-02-15	NaN	Name \| Bank Account	345.100006	351.600006	350.079987	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	1	0	0	0	0	0	1	0	1	0	0	0	0	0	0	0	1	0	0	0	0
5	5	Blueknight Energy Partners, L.P.	BKEP	NaN	2021-01-12	2021-02-12	NaN	Name \| SSN \| Bank Account \| Other	2.120000	3.120000	3.170000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	1	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	1	0
6	6	SOUTH JERSEY INDUSTRIES INC	SJI	NaN	NaT	2021-02-11	NaN	SSN \| Account Number	23.480000	28.799999	24.620001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	0	1	0	0	0	0	0
7	7	Globalstar, Inc.	GSAT	NaN	NaT	2021-02-10	NaN	Account Number	2.080000	1.590000	1.330000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	0	0	0	0	1	0	0
8	8	WestRock Co	WRK	NaN	2021-01-23	2021-02-08	NaN	ND	42.720001	53.180000	52.790001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	1	0	0
9	9	CARVANA CO.	CVNA	NaN	NaT	2021-02-05	NaN	Driver's License	284.529999	269.989990	267.329987	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	1	0
10	10	INDEPENDENT BANK CORP	INDB	NaN	NaT	2021-02-05	NaN	Account Number	77.739998	87.699997	84.639999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	1	0	0	0	1	0	0	0	0	0	0	0	0	0	1	0	0	0
11	11	FORWARD AIR CORP	FWRD	2020-12-01 00:00:00	NaT	2021-02-04	NaN	ND	73.389999	88.739998	89.400002	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	0	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	1	0	0
12	12	Tesla, Inc.	TSLA	NaN	NaT	2021-02-03	NaN	Intellectual property	872.789978	686.440002	661.750000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	0	0	1	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
13	13	INTEL CORP	INTC	2020-09-29 00:00:00	NaT	2021-02-03	NaN	SSN \| Other \| Bank Account \| DOB \| Driver's Li...	58.000000	61.240002	64.550003	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	1	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
14	14	CarParts.com, Inc.	PRTS	06/21/2020 - 06/28/2020	2020-06-28	2021-02-01	NaN	Name \| Address \| SSN \| Driver's License	16.490000	19.170000	14.280000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	1	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	1
15	15	UNITED STATES CELLULAR CORP	USM	NaN	NaT	2021-01-29	NaN	Name \| Addres \| Phone Number \| Other	31.860001	29.770000	35.630001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	0	1	0	0	0	0	1	0	0	0	0	1	0	0	0	0
16	16	JOHNSON & JOHNSON	JNJ	2020-03-01 00:00:00	NaT	2021-01-27	NaN	Name\|SSN\|Other\|Bank Account	170.479996	158.460007	164.929993	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	1	0	0	0	0	0
17	17	Petco Health & Wellness Company, Inc.	WOOF	2/11/2020 - 8/9/2020	2020-10-02	2021-01-25	30000	Name \| Address \| Email \| Passwords \| Credit Card	27.000000	21.809999	21.709999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	1
18	18	National Western Life Group, Inc.	NWLI	8/7/2020 - 8/10/2020	2020-08-15	2021-01-25	NaN	Name \| Other	200.720001	212.500000	236.039993	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	1	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1	0
19	19	TORONTO DOMINION BANK	TD	9/14/2020 - 10/29/2020	2020-12-21	2021-01-22	NaN	Bank Account \| Credit Card \| Debit Card	58.959999	60.950001	65.580002	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	1	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	0
20	20	Walmart Inc.	WMT	NaN	NaT	2021-01-22	7000000	Name \| Phone Number \| Credit Card \| Password \|...	144.850006	137.690002	132.369995	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	1	0	0
21	21	INTEL CORP	INTC	NaN	NaT	2021-01-22	NaN	Earnings Data	62.459999	60.709999	65.629997	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	1	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
22	22	Clarus Corp	CLAR	2020-10-04 00:00:00	NaT	2021-01-22	1825	SSN \| Driver's License	15.710000	17.719999	16.639999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
23	23	BioNTech SE	BNTX	2020-12-09 00:00:00	NaT	2021-01-19	NaN	Intellectual Property	104.699997	112.650002	103.650002	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	0	0	0	0	0	1	0	1	0	0	0	0	0	1	0	0	0	0	0	0
24	24	PFIZER INC	PFE	2020-12-09 00:00:00	NaT	2021-01-19	NaN	Intellectual Property	36.730000	34.560001	35.770000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	1	0	0	0	0	0
25	25	UNITED PARCEL SERVICE INC	UPS	2021-01-08 00:00:00	NaT	2021-01-17	3000	ND	156.279999	161.750000	162.820007	179.710007	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	1	0	0
26	26	NORFOLK SOUTHERN CORP	NSC	2021-01-08 00:00:00	NaT	2021-01-17	3000	ND	249.839996	256.399994	256.390015	270.859985	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	1	0	0	0	0	0	1	0	0	0	1	0	0	0	0	0
27	27	SHIFT TECHNOLOGIES, INC.	SFT	2020-09-17 00:00:00	2020-09-18	2021-01-14	NaN	ND	8.500000	10.150000	10.150000	8.205000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	1
28	28	Ubiquiti Inc.	UI	NaN	NaT	2021-01-12	NaN	Name \| Address \| Phone Number \| Email \| Password	257.450012	356.559998	336.230011	275.179993	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	0	1	0	1	0	0	0	0	0	0	1	0	0	0	0	0
29	29	Mimecast Ltd	MIME	NaN	NaT	2021-01-12	3600	Other	51.400002	42.889999	42.590000	41.070000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	0	0	0	0	0	1	0	0	0	1	0	0	0	1	0	0	0	0	0	0
30	30	Veritex Holdings, Inc.	VBTX	4/2/2020 - 4/27/2020	2020-05-08	2021-01-11	NaN	ND	28.270000	28.610001	31.590000	34.279999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	1	0
31	31	AURORA CANNABIS INC	ACB	2020-12-25 00:00:00	NaT	2021-01-04	NaN	ND	9.520000	13.750000	10.460000	9.140000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	0	0	1	0	0	0	0	1	0	0	0	0	1	0	0	0	0	0	0	0
32	32	Matson, Inc.	MATX	NaN	2020-10-14	2021-01-04	NaN	Name \| SSN \| Other	56.689999	61.380001	73.360001	67.320000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	1
33	33	T-Mobile US, Inc.	TMUS	NaN	NaT	2020-12-30	200000	Phone Number \| Other	133.139999	126.080002	123.570000	123.459999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	0	1	0	0	0	0	1	0	0	0	0	0	0	0	1	0
34	34	WHIRLPOOL CORP /DE/	WHR	NaN	NaT	2020-12-28	NaN	Other	185.190002	212.399994	190.080002	223.089996	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	0	0
35	35	Colfax CORP	CFX	11/8/2020 - 11/9/2020	2020-11-16	2020-12-23	NaN	Name\|SSN\|Other\|Bank Account	36.189999	40.599998	44.910000	45.720001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	1	0	0	0	0	0
36	36	VMWARE, INC.	VMW	NaN	NaT	2020-12-21	NaN	ND	142.229996	132.669998	144.679993	145.979996	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	1	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	1
37	37	NVIDIA CORP	NVDA	NaN	NaT	2020-12-21	NaN	ND	533.289978	534.630005	597.059998	513.830017	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
38	38	INTEL CORP	INTC	NaN	NaT	2020-12-21	NaN	ND	46.360001	58.669998	63.009998	63.759998	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
39	39	CISCO SYSTEMS, INC.	CSCO	NaN	NaT	2020-12-21	NaN	ND	44.880001	45.340000	45.680000	48.980000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
40	40	MICROSOFT CORP	MSFT	NaN	NaT	2020-12-17	NaN	Intellectual Property	219.279999	212.649994	243.699997	237.710007	260.739990	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	1	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	1
41	41	MEDNAX, INC.	MD	NaN	2020-06-19	2020-12-16	45	Name \| Address \| Email \| DOB \| SSN \| Driver's ...	22.860001	23.600000	27.719999	28.209999	26.670000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	0	1	0	0
42	42	CLEAN HARBORS INC	CLH	NaN	2020-09-02	2020-12-15	772	Name \| SSN \| Driver's License \| Bank Account \|...	72.099998	81.949997	85.419998	89.500000	87.400002	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	0	0	0	0	0	0	0	1	0	0	0	0	1	0	0	0	0	0	1	0	0	0
43	43	SolarWinds Corp	SWI	3/2020 - 6/2020	NaT	2020-12-14	18000	ND	19.620001	14.640000	17.040001	16.820000	18.049999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	0	0	1	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	1	0
44	44	Spotify Technology S.A.	SPOT	NaN	2020-11-12	2020-12-09	NaN	Email \| Username \| Password \| DOB \| Other	332.859985	353.109985	323.790009	258.329987	280.940002	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	1	0	0	0	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0
45	45	FireEye, Inc.	FEYE	NaN	NaT	2020-12-08	NaN	Intellectual Property	15.410000	22.770000	21.459999	19.070000	19.690001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	0	0	1	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	1
46	46	II-VI INC	IIVI	NaN	2020-11-19	2020-12-07	NaN	Name \| Address \| DOB \| SSN \| Other	72.320000	79.389999	88.000000	74.610001	75.470001	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	1	0	0	0	1	0	0	0	0	0	0	1	0	0	0	0	0
47	47	BERKSHIRE HATHAWAY INC	BRK-A	NaN	2020-08-22	2020-12-01	NaN	Name \| Address \| SSN \| Other	343688.000000	345095.000000	344100.000000	377755.000000	390360.000000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	0	1	0	0	0	0	1	0	1	0	0	0	0	0	0	0	0	1	0	0	0	0
48	48	MINERALS TECHNOLOGIES INC	MTX	2020-10-20 00:00:00	2020-10-22	2020-11-30	NaN	Name \| Address \| Phone Number \| SSN \| DOB \| Em...	60.669998	61.130001	61.630001	74.070000	74.669998	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	1	0	0	0	0	0	1	0	1	0	0	0	0	0	0	1	0	0	0	0	0
49	49	BELDEN INC.	BDC	NaN	NaT	2020-11-27	NaN	ND	39.810001	41.570000	47.849998	44.209999	43.840000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	1	0	0	0	0	0	1	0	1	0	0	0	0	0	0	0	0	0	1	0	0

	Unnamed: 0	alpha	beta	CAR(0, 0)	CAR(0, 1)	CAR(0, 2)	CAR(0, 3)	CAR(0, 4)	CAR(-1, 0)	CAR(-2, 0)	CAR(-3, 0)	CAR(-4, 0)	CAR(-1, 1)	CAR(-2, 2)	CAR(-3, 3)	CAR(-4, 4)	Unnamed: 0	Company name	Ticker	Date of Breach	Date Became Aware of Breach	Date of Disclosure	Number of Records	Information	Stock Price (0 months DoD)	Stock Price (1 months DoD)	Stock Price (2 months DoD)	Stock Price (3 months DoD)	Stock Price (4 months DoD)	Stock Price (5 months DoD)	Stock Price (6 months DoD)	Stock Price (7 months DoD)	Stock Price (8 months DoD)	Stock Price (9 months DoD)	Stock Price (10 months DoD)	Stock Price (11 months DoD)	Stock Price (12 months DoD)	median stock forecast	mean stock forecast	Financial (Type of Info)	Other (Type of Info)	Personal (Type of Info)	ND (Attack)	Unauthorized Access (Attack)	Finance, Insurance, Real Estate (Industry)	Manufacturing (Industry)	Retail Trade (Industry)	Wholesale Trade (Industry)	Foreign (Region)	US Mid Atlantic (Region)	US Midwest (Region)	US West (Region)
0	0.0	-0.001284	0.729661	0.017022	0.019302	0.017108	0.027874	0.029140	0.018244	0.014400	0.029233	0.028117	0.020525	0.014486	0.040085	0.040235	0	HUMANA INC	HUM	10/12/2020 - 12/16/2020	2020-12-22 00:00:00	2021-02-25	NaN	SSN \| Name \| DoB \| Address \| Phone Number \| Em...	381.399994	414.660004	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	1	1	0	0	0	0	1	0	0
1	1.0	0.000552	0.627575	0.004401	-0.005910	-0.005190	0.001705	-0.003996	-0.054406	-0.083507	-0.183648	-0.186695	-0.064717	-0.093099	-0.186345	-0.195093	1	HERBALIFE NUTRITION LTD.	HLF	2020-10-01 00:00:00	NaN	2021-02-23	NaN	Name \| Phone Number \| Address \| DOB \| Membersh...	45.279999	46.299999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	1	1	0	1	0	0	0	1	1	0	0	0
2	2.0	0.003177	1.406970	-0.041744	-0.004872	-0.064055	-0.024123	0.032155	-0.082999	-0.033173	-0.074919	-0.089921	-0.046126	-0.055484	-0.057299	-0.016022	2	Ultra Clean Holdings, Inc.	UCTT	NaN	NaN	2021-02-23	NaN	SSN \| Driver's License	48.369999	55.090000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	1	0	0	1	0	0	0	0	0	1
3	3.0	0.000393	0.006037	0.004938	-0.008048	-0.025120	-0.034058	-0.044685	-0.010849	0.009854	0.000922	0.000922	-0.023835	-0.020204	-0.038074	-0.048700	3	KROGER CO	KR	NaN	2021-01-23 00:00:00	2021-02-19	NaN	Name \| Email \| Phone Number \| Address \| DOB \| ...	33.840000	34.689999	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	0	1	0	1	0	0	1	0	0	0	1	0
4	4.0	0.000799	1.244020	0.000000	0.007677	-0.010081	-0.001435	0.009003	-0.002613	-0.026126	-0.027469	-0.026339	0.005064	-0.036207	-0.028904	-0.017335	4	CINTAS CORP	CTAS	9/16/2020 - 9/20/2020	2020-09-20 00:00:00	2021-02-15	NaN	Name \| Bank Account	345.100006	351.600006	350.079987	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	0	1	0	1	0	1	0	0	0	0	1	0