1175 lines
40 KiB
Plaintext
1175 lines
40 KiB
Plaintext
{
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0,
|
||
"metadata": {
|
||
"colab": {
|
||
"name": "Chapter 5: Statistical Hypothesis Testing.ipynb",
|
||
"provenance": [],
|
||
"collapsed_sections": [
|
||
"WJhmgDxsVHEO",
|
||
"MkgcD5YRqY2t",
|
||
"YdiAgoIkqkgY",
|
||
"zPLd6dYaxMkT",
|
||
"OJMPHhRcyblx",
|
||
"CvS6L1Js4duu",
|
||
"7ob5bPMC7wnc",
|
||
"i9LHPNrU_aNd",
|
||
"LROas1z3DtJL",
|
||
"7c95dMu8HLoA",
|
||
"QFnla-p6YV94",
|
||
"7JMLIct9Ydku",
|
||
"IWdTeJz-ZuWc",
|
||
"6sR1d3BHS9Hh",
|
||
"_beFJ80gTH85",
|
||
"uXF1GqteUu0E",
|
||
"PpZ12cY4Vv_5",
|
||
"uCyetBhlW3E3"
|
||
]
|
||
},
|
||
"kernelspec": {
|
||
"name": "python3",
|
||
"display_name": "Python 3"
|
||
},
|
||
"language_info": {
|
||
"name": "python"
|
||
}
|
||
},
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"# **Chapter 5: Statistical Hypothesis Testing**"
|
||
],
|
||
"metadata": {
|
||
"id": "RZ4sKXgZqP5R"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"**Table of Content:**\n",
|
||
"\n",
|
||
"- [Import Libraries](#Import_Libraries)\n",
|
||
"- [5.1. Normality Tests](#Normality_Tests)\n",
|
||
" - [5.1.1. Shapiro-Wilk Test](#Shapiro-Wilk_Test)\n",
|
||
" - [5.1.2. D’Agostino’s $K^2$ Test](#D’Agostino’s_Test)\n",
|
||
" - [5.1.3. Anderson-Darling Test](#Anderson-Darling_Test)\n",
|
||
"- [5.2. Correlation Tests](#Correlation_Tests)\n",
|
||
" - [5.2.1. Pearson’s Correlation Coefficient](#Pearson’s_Correlation_Coefficient)\n",
|
||
" - [5.2.2. Spearman’s Rank Correlation](#Spearman’s_Rank_Correlation)\n",
|
||
" - [5.2.3. Kendall’s Rank Correlation](#Kendall’s_Rank_Correlation)\n",
|
||
" - [5.2.4. Chi-Squared Test](#Chi-Squared_Test)\n",
|
||
"- [5.3. Stationary Tests](#Stationary_Tests)\n",
|
||
" - [5.3.1. Augmented Dickey-Fuller Unit Root Test](#Augmented_Dickey-Fuller_Unit_Root_Test)\n",
|
||
" - [5.3.2. Kwiatkowski-Phillips-Schmidt-Shin Test](#Kwiatkowski-Phillips-Schmidt-Shin_Test) \n",
|
||
"- [5.4. Other Tests](#Other_Tests)\n",
|
||
" - [5.4.1. Mann-Whitney U-Test](#Mann-Whitney_U-Test)\n",
|
||
" - [5.4.2. Wilcoxon Signed-Rank Test](#Wilcoxon_Signed-Rank-Test)\n",
|
||
" - [5.4.3. Kruskal-Wallis H Test](#Kruskal-Wallis_H_Test)\n",
|
||
" - [5.4.4. Friedman Test](#Friedman_Test) "
|
||
],
|
||
"metadata": {
|
||
"id": "V9XOxBPqCABJ"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Import_Libraries'></a>\n",
|
||
"\n",
|
||
"## **Import Libraries**"
|
||
],
|
||
"metadata": {
|
||
"id": "WJhmgDxsVHEO"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"!pip install --upgrade scipy"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "vXStgb2JU6c0",
|
||
"outputId": "f227901d-8551-4fde-bb9a-4bc5e3bef1ab"
|
||
},
|
||
"execution_count": 1,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
||
"Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (1.4.1)\n",
|
||
"Collecting scipy\n",
|
||
" Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)\n",
|
||
"\u001b[K |████████████████████████████████| 38.1 MB 1.3 MB/s \n",
|
||
"\u001b[?25hRequirement already satisfied: numpy<1.23.0,>=1.16.5 in /usr/local/lib/python3.7/dist-packages (from scipy) (1.21.6)\n",
|
||
"Installing collected packages: scipy\n",
|
||
" Attempting uninstall: scipy\n",
|
||
" Found existing installation: scipy 1.4.1\n",
|
||
" Uninstalling scipy-1.4.1:\n",
|
||
" Successfully uninstalled scipy-1.4.1\n",
|
||
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
||
"albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n",
|
||
"Successfully installed scipy-1.7.3\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import matplotlib.patches as mpatches\n",
|
||
"import seaborn as sns\n",
|
||
"import math\n",
|
||
"from scipy import stats\n",
|
||
"from scipy.stats import norm\n",
|
||
"from scipy.stats import chi2\n",
|
||
"from scipy.stats import t\n",
|
||
"from scipy.stats import f\n",
|
||
"from scipy.stats import bernoulli\n",
|
||
"from scipy.stats import binom\n",
|
||
"from scipy.stats import nbinom\n",
|
||
"from scipy.stats import geom\n",
|
||
"from scipy.stats import poisson\n",
|
||
"from scipy.stats import uniform\n",
|
||
"from scipy.stats import randint\n",
|
||
"from scipy.stats import expon\n",
|
||
"from scipy.stats import gamma\n",
|
||
"from scipy.stats import beta\n",
|
||
"from scipy.stats import weibull_min\n",
|
||
"from scipy.stats import hypergeom\n",
|
||
"from scipy.stats import shapiro\n",
|
||
"from scipy.stats import pearsonr\n",
|
||
"from scipy.stats import normaltest\n",
|
||
"from scipy.stats import anderson\n",
|
||
"from scipy.stats import spearmanr\n",
|
||
"from scipy.stats import kendalltau\n",
|
||
"from scipy.stats import chi2_contingency\n",
|
||
"from scipy.stats import ttest_ind\n",
|
||
"from scipy.stats import ttest_rel\n",
|
||
"from scipy.stats import mannwhitneyu\n",
|
||
"from scipy.stats import wilcoxon\n",
|
||
"from scipy.stats import kruskal\n",
|
||
"from scipy.stats import friedmanchisquare\n",
|
||
"from statsmodels.tsa.stattools import adfuller\n",
|
||
"from statsmodels.tsa.stattools import kpss\n",
|
||
"from statsmodels.stats.weightstats import ztest\n",
|
||
"from scipy.integrate import quad\n",
|
||
"from IPython.display import display, Latex\n",
|
||
"\n",
|
||
"import warnings\n",
|
||
"warnings.filterwarnings('ignore')\n",
|
||
"warnings.simplefilter(action='ignore', category=FutureWarning)"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "ZPuphzTmU-P8",
|
||
"outputId": "db6527ea-c413-4457-e747-8f817f99006b"
|
||
},
|
||
"execution_count": 2,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stderr",
|
||
"text": [
|
||
"/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
|
||
" import pandas.util.testing as tm\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Normality_Tests'></a>\n",
|
||
"\n",
|
||
"## **5.1. Normality Tests:**"
|
||
],
|
||
"metadata": {
|
||
"id": "MkgcD5YRqY2t"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Shapiro-Wilk_Test'></a>\n",
|
||
"\n",
|
||
"### **5.1.1. Shapiro-Wilk Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "YdiAgoIkqkgY"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"$H_0$ : The sample has a Normal (Gaussian) distribution\n",
|
||
"\n",
|
||
"$H_1$ : The sample does not have a Normal (Gaussian) distribution.\n",
|
||
"\n",
|
||
"Assumptions: \n",
|
||
"* Observations in each sample are independent and identically distributed (iid).\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Shapiro-Wilk Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.shapiro.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "5THGAbaPqrjg"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 100\n",
|
||
"alpha = 0.05\n",
|
||
"np.random.seed(1)\n",
|
||
"data = np.random.normal(0, 1, N)\n",
|
||
"\n",
|
||
"Test_statistic, p_value = shapiro(data)\n",
|
||
"print(f'Test_statistic_shapiro = {Test_statistic}, p_value = {p_value}', '\\n')\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, The data is probably normal.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.')"
|
||
],
|
||
"metadata": {
|
||
"id": "oEXRlb4QwyRK",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "c857ada2-317e-4851-b71f-356d76a9cb22"
|
||
},
|
||
"execution_count": 3,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_shapiro = 0.9920045137405396, p_value = 0.8215526342391968 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='D’Agostino’s_Test'></a>\n",
|
||
"\n",
|
||
"### **5.1.2. D’Agostino’s $K^2$ Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "zPLd6dYaxMkT"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"$H_0$ : The sample has a Normal (Gaussian) distribution\n",
|
||
"\n",
|
||
"$H_1$ : The sample does not have a Normal (Gaussian) distribution.\n",
|
||
"\n",
|
||
"Assumptions: \n",
|
||
"* Observations in each sample are independent and identically distributed (iid).\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[D’Agostino’s $K^2$ Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.normaltest.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "AZnwcLLAxVJI"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 100\n",
|
||
"alpha = 0.05\n",
|
||
"np.random.seed(1)\n",
|
||
"data = np.random.normal(0, 1, N)\n",
|
||
"\n",
|
||
"Test_statistic, p_value = normaltest(data)\n",
|
||
"print(f\"Test_statistic_D'Agostino's K-squared = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, The data is probably normal.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "f5f2c919-1de4-415d-a57e-d478d55e4456",
|
||
"id": "AwDtLtHkxVJK"
|
||
},
|
||
"execution_count": 4,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_D'Agostino's K-squared = 0.10202388832581702, p_value = 0.9502673203169621 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Anderson-Darling_Test'></a>\n",
|
||
"\n",
|
||
"### **5.1.3. Anderson-Darling Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "OJMPHhRcyblx"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"$H_0$ : The sample has a Normal (Gaussian) distribution\n",
|
||
"\n",
|
||
"$H_1$ : The sample does not have a Normal (Gaussian) distribution.\n",
|
||
"\n",
|
||
"Assumptions: \n",
|
||
"* Observations in each sample are independent and identically distributed (iid).\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Anderson-Darling Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html)\n",
|
||
"\n",
|
||
"Critical values provided are for the following significance levels:\n",
|
||
"\n",
|
||
"normal/exponential:\n",
|
||
"\n",
|
||
"$15\\%, 10\\%, 5\\%, 2.5\\%, 1\\%$\n",
|
||
"\n",
|
||
"logistic:\n",
|
||
"\n",
|
||
"$25\\%, 10\\%, 5\\%, 2.5\\%, 1\\%, 0.5\\%$\n",
|
||
"\n",
|
||
"Gumbel:\n",
|
||
"\n",
|
||
"$25\\%, 10\\%, 5\\%, 2.5\\%, 1\\%$\n",
|
||
"\n",
|
||
"If the test statistic is larger than these critical values then for the corresponding significance level, the null hypothesis that the data come from the chosen distribution can be rejected."
|
||
],
|
||
"metadata": {
|
||
"id": "gU9cF9z9ybly"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 100\n",
|
||
"np.random.seed(1)\n",
|
||
"data = np.random.normal(0, 1, N)\n",
|
||
"\n",
|
||
"Test_statistic, critical_values, significance_level = anderson(data, dist='norm')\n",
|
||
"print(f'Test_statistic_anderson = {Test_statistic}', '\\n')\n",
|
||
"\n",
|
||
"for i in range(len(critical_values)):\n",
|
||
" sl, cv = significance_level[i], critical_values[i]\n",
|
||
" if Test_statistic > cv:\n",
|
||
" print(f'(Test statistic = {Test_statistic}) > (critical value = {sl}%), therefore for the corresponding significance level, the null hpothesis cannot be rejected.')\n",
|
||
" else:\n",
|
||
" print(f'(Test statistic = {Test_statistic}) > (critical value = {sl}%), therefore for the corresponding significance level, the null hpothesis is rejected.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "G041pJhiy4ra",
|
||
"outputId": "eb4d9936-b41a-4218-865d-eed99eb45eb7"
|
||
},
|
||
"execution_count": 5,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_anderson = 0.2196508855594459 \n",
|
||
"\n",
|
||
"(Test statistic = 0.2196508855594459) > (critical value = 15.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
|
||
"(Test statistic = 0.2196508855594459) > (critical value = 10.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
|
||
"(Test statistic = 0.2196508855594459) > (critical value = 5.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
|
||
"(Test statistic = 0.2196508855594459) > (critical value = 2.5%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
|
||
"(Test statistic = 0.2196508855594459) > (critical value = 1.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Note that you can use Anderson-Darling test for other distributions. \n",
|
||
"\n",
|
||
"The valid values are: {‘norm’, ‘expon’, ‘logistic’, ‘gumbel’, ‘gumbel_l’, ‘gumbel_r’, ‘extreme1’}"
|
||
],
|
||
"metadata": {
|
||
"id": "JJ5mE7YozixQ"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Correlation_Tests'></a>\n",
|
||
"\n",
|
||
"## **5.2. Correlation Tests:**"
|
||
],
|
||
"metadata": {
|
||
"id": "CvS6L1Js4duu"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Pearson’s_Correlation_Coefficient'></a>\n",
|
||
"\n",
|
||
"### **5.2.1. Pearson’s Correlation Coefficient:**"
|
||
],
|
||
"metadata": {
|
||
"id": "7ob5bPMC7wnc"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether two data sample have a linear relationship.\n",
|
||
"\n",
|
||
"$H_0$: The two data are independent.\n",
|
||
"\n",
|
||
"$H_1$: There is a dependency between the two data.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in each data sample are independent and identically distributed (iid).\n",
|
||
"* Observations in each data sample are normally distributed.\n",
|
||
"* Observations in each data sample have the same variance.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Pearson’s Correlation Coefficient Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "yNCyRmLS8NIj"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 10\n",
|
||
"alpha = 0.05\n",
|
||
"np.random.seed(1)\n",
|
||
"data1 = np.random.normal(0, 1, N)\n",
|
||
"data2 = np.random.normal(0, 1, N) + 2\n",
|
||
"\n",
|
||
"Test_statistic, p_value = pearsonr(data1, data2)\n",
|
||
"print(f\"Test_statistic_Pearson's Correlation = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "P6ENjsEd0lbp",
|
||
"outputId": "2eb12605-e669-466e-91ff-905169eb6995"
|
||
},
|
||
"execution_count": 6,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Pearson's Correlation = 0.6556177144470315, p_value = 0.03957633895447448 \n",
|
||
"\n",
|
||
"Since p_value < 0.05, reject null hypothesis. Therefore, Two data are probably dependent.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"This test is parametric."
|
||
],
|
||
"metadata": {
|
||
"id": "WbRpE09hGQXz"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Spearman’s_Rank_Correlation'></a>\n",
|
||
"\n",
|
||
"### **5.2.2. Spearman’s Rank Correlation:**"
|
||
],
|
||
"metadata": {
|
||
"id": "i9LHPNrU_aNd"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether two data samples have a monotonic relationship.\n",
|
||
"\n",
|
||
"$H_0$: The two data are independent.\n",
|
||
"\n",
|
||
"$H_1$: There is a dependency between the two data.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in each data sample are independent and identically distributed (iid).\n",
|
||
"* Observations in each data sample can be ranked.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Spearman’s Rank Correlation Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "s4Itzt6K_kaj"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 10\n",
|
||
"alpha = 0.05\n",
|
||
"np.random.seed(1)\n",
|
||
"data1 = np.random.normal(0, 1, N)\n",
|
||
"data2 = np.random.normal(0, 1, N) + 2\n",
|
||
"\n",
|
||
"Test_statistic, p_value = spearmanr(data1, data2, alternative = 'two-sided')\n",
|
||
"print(f\"Test_statistic_Spearman's Rank Correlation = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "vSdyARWiAQuh",
|
||
"outputId": "17841159-7ace-4bc8-8cae-18dc3d2bccd5"
|
||
},
|
||
"execution_count": 7,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Spearman's Rank Correlation = 0.7818181818181817, p_value = 0.007547007781067878 \n",
|
||
"\n",
|
||
"Since p_value < 0.05, reject null hypothesis. Therefore, Two data are probably dependent.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Alternative hypothesis can be {‘two-sided’, ‘less’, ‘greater’}.\n",
|
||
"\n",
|
||
"'two-sided': the correlation is non-zero\n",
|
||
"\n",
|
||
"'less': the correlation is negative (less than zero)\n",
|
||
"\n",
|
||
"'greater': the correlation is positive (greater than zero)"
|
||
],
|
||
"metadata": {
|
||
"id": "wibHxhC4BO5b"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Kendall’s_Rank_Correlation'></a>\n",
|
||
"\n",
|
||
"### **5.2.3. Kendall’s Rank Correlation:**"
|
||
],
|
||
"metadata": {
|
||
"id": "LROas1z3DtJL"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether two data samples have a monotonic relationship.\n",
|
||
"\n",
|
||
"$H_0$: The two data are independent.\n",
|
||
"\n",
|
||
"$H_1$: There is a dependency between the two data.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in each data sample are independent and identically distributed (iid).\n",
|
||
"* Observations in each data sample can be ranked.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Kendall’s Rank Correlation Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "084h0SpxDzCX"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 10\n",
|
||
"alpha = 0.05\n",
|
||
"np.random.seed(1)\n",
|
||
"data1 = np.random.normal(0, 1, N)\n",
|
||
"data2 = np.random.normal(0, 1, N) + 2\n",
|
||
"\n",
|
||
"Test_statistic, p_value = kendalltau(data1, data2)\n",
|
||
"print(f\"Test_statistic_Kendall's Rank Correlation = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "Gzxj53vaEUzF",
|
||
"outputId": "0f7af409-1a8d-4dbe-f656-63902010e85c"
|
||
},
|
||
"execution_count": 8,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Kendall's Rank Correlation = 0.6, p_value = 0.016666115520282188 \n",
|
||
"\n",
|
||
"Since p_value < 0.05, reject null hypothesis. Therefore, Two data are probably dependent.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Chi-Squared_Test'></a>\n",
|
||
"\n",
|
||
"### **5.2.4. Chi-Squared Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "7c95dMu8HLoA"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether two categorical variables are related or independent.\n",
|
||
"\n",
|
||
"$H_0$: The two data are independent.\n",
|
||
"\n",
|
||
"$H_1$: There is a dependency between the two data.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations used in the calculation of the contingency table are independent.\n",
|
||
"* 25 or more examples in each cell of the contingency table.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Chi-Squared Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2_contingency.html)\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"degrees of freedom: $(rows - 1) * (cols - 1)$"
|
||
],
|
||
"metadata": {
|
||
"id": "tcV0_bRnHRI8"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 10\n",
|
||
"alpha = 0.05\n",
|
||
"table = [[10, 20, 30],\n",
|
||
"\t\t\t [6, 9, 17]]\n",
|
||
"\n",
|
||
"Test_statistic, p_value, dof, expected = chi2_contingency(table)\n",
|
||
"print(f\"Test_statistic_Chi-Squared = {Test_statistic}, p_value = {p_value}, df = {dof}, \\n\", f\"Expected = {expected}\",\"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "0Ik8RPjBHOwr",
|
||
"outputId": "aa7be138-2f00-429d-f6ba-eda5f09a94fd"
|
||
},
|
||
"execution_count": 9,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Chi-Squared = 0.27157465150403504, p_value = 0.873028283380073, df = 2, \n",
|
||
" Expected = [[10.43478261 18.91304348 30.65217391]\n",
|
||
" [ 5.56521739 10.08695652 16.34782609]] \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Stationary_Tests'></a>\n",
|
||
"\n",
|
||
"## **5.3. Stationary Tests:**"
|
||
],
|
||
"metadata": {
|
||
"id": "QFnla-p6YV94"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Augmented_Dickey-Fuller_Unit_Root_Test'></a>\n",
|
||
"\n",
|
||
"### **5.3.1. Augmented Dickey-Fuller Unit Root Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "7JMLIct9Ydku"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether a time series has a unit root, e.g. has a trend or more generally is autoregressive.\n",
|
||
"\n",
|
||
"$H_0$: A unit root is present (series is non-stationary).\n",
|
||
"\n",
|
||
"$H_1$: A unit root is not present (series is stationary).\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in are temporally ordered.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Augmented Dickey-Fuller Unit Root Test Doc](https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "FbuyfZBIYoPj"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"alpha = 0.05\n",
|
||
"data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
|
||
"\n",
|
||
"Test_statistic, p_value, lags, obs, crit, t = adfuller(data)\n",
|
||
"print(f\"Test_statistic_Mann-Whitney = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, the series is probably stationary.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, the series is probably non-stationary.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "HnKfeoZfYXPt",
|
||
"outputId": "ad4c4075-ecda-4be5-f482-940bb90840b1"
|
||
},
|
||
"execution_count": 10,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Mann-Whitney = 0.5171974540944098, p_value = 0.9853865316323872 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, the series is probably non-stationary.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Kwiatkowski-Phillips-Schmidt-Shin_Test'></a>\n",
|
||
"\n",
|
||
"### **5.3.2. Kwiatkowski-Phillips-Schmidt-Shin Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "IWdTeJz-ZuWc"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether a time series is trend stationary or not.\n",
|
||
"\n",
|
||
"$H_0$: The time series is trend-stationary.\n",
|
||
"\n",
|
||
"$H_1$: The time series is not trend-stationary.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in are temporally ordered.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Kwiatkowski-Phillips-Schmidt-Shin Test Doc](https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.kpss.html#statsmodels.tsa.stattools.kpss)"
|
||
],
|
||
"metadata": {
|
||
"id": "IrK61Uv-ZuWc"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"alpha = 0.05\n",
|
||
"data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
|
||
"\n",
|
||
"Test_statistic, p_value, lags, crit = kpss(data)\n",
|
||
"print(f\"Test_statistic_Kwiatkowski = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, the series is probably not trend-stationary.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, the series is probably trend-stationary.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "d1d6462e-7130-418d-b277-e34f419ad68b",
|
||
"id": "EtboNoivZuWd"
|
||
},
|
||
"execution_count": 11,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Kwiatkowski = 0.4099630996309963, p_value = 0.072860732917674 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, the series is probably trend-stationary.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Other_Tests'></a>\n",
|
||
"\n",
|
||
"## **5.4. Other Tests:**"
|
||
],
|
||
"metadata": {
|
||
"id": "6sR1d3BHS9Hh"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Mann-Whitney_U-Test'></a>\n",
|
||
"\n",
|
||
"### **5.4.1. Mann-Whitney U-Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "_beFJ80gTH85"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether the distributions of two independent samples are equal or not.\n",
|
||
"\n",
|
||
"$H_0$: The distributions of both samples are equal.\n",
|
||
"\n",
|
||
"$H_1$: The distributions of both samples are not equal.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in each sample are independent and identically distributed (iid).\n",
|
||
"* Observations in each sample can be ranked.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Mann-Whitney U Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "xDJkmODOTOrP"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 10\n",
|
||
"alpha = 0.05\n",
|
||
"data1 = np.random.normal(0, 1, N)\n",
|
||
"data2 = np.random.normal(0, 1, N)\n",
|
||
"\n",
|
||
"Test_statistic, p_value = mannwhitneyu(data1, data2, alternative='two-sided')\n",
|
||
"print(f\"Test_statistic_Mann-Whitney = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data distributions are probably not equal.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "UJaGUqmfTjr4",
|
||
"outputId": "0e6858f5-0e32-4345-a83a-ce2b2ed6b517"
|
||
},
|
||
"execution_count": 12,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Mann-Whitney = 61.0, p_value = 0.4273553138978077 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Wilcoxon_Signed-Rank_Test'></a>\n",
|
||
"\n",
|
||
"### **5.4.2. Wilcoxon Signed-Rank Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "uXF1GqteUu0E"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether the distributions of two paired samples are equal or not.\n",
|
||
"\n",
|
||
"$H_0$: The distributions of both samples are equal.\n",
|
||
"\n",
|
||
"$H_1$: The distributions of both samples are not equal.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in each sample are independent and identically distributed (iid).\n",
|
||
"* Observations in each sample can be ranked.\n",
|
||
"* Observations across each sample are paired.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Wilcoxon Signed-Rank Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "wQUAp_96Uu0E"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 10\n",
|
||
"alpha = 0.05\n",
|
||
"data1 = np.random.normal(0, 1, N)\n",
|
||
"data2 = np.random.normal(0, 1, N)\n",
|
||
"\n",
|
||
"Test_statistic, p_value = wilcoxon(data1, data2, alternative='two-sided')\n",
|
||
"print(f\"Test_statistic_Wilcoxon = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data distributions are probably not equal.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "96500652-9c3f-4794-fb0e-6fdf3302b134",
|
||
"id": "_45cKDykUu0F"
|
||
},
|
||
"execution_count": 13,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Wilcoxon = 24.0, p_value = 0.76953125 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Kruskal-Wallis_H_Test'></a>\n",
|
||
"\n",
|
||
"### **5.4.3. Kruskal-Wallis H Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "PpZ12cY4Vv_5"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether the distributions of two or more independent samples are equal or not.\n",
|
||
"\n",
|
||
"$H_0$: The distributions of all samples are equal.\n",
|
||
"\n",
|
||
"$H_1$: The distributions of one or more samples are not equal.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in each sample are independent and identically distributed (iid).\n",
|
||
"* Observations in each sample can be ranked.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Kruskal-Wallis H Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "yPeW-V0QVv_6"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"N = 10\n",
|
||
"alpha = 0.05\n",
|
||
"data1 = np.random.normal(0, 1, N)\n",
|
||
"data2 = np.random.normal(0, 1, N)\n",
|
||
"\n",
|
||
"Test_statistic, p_value = kruskal(data1, data2)\n",
|
||
"print(f\"Test_statistic_Wilcoxon = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data distributions are probably not equal.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "5f856ccd-41f9-420d-997c-0e3507d0ca44",
|
||
"id": "gZfSggVCVv_7"
|
||
},
|
||
"execution_count": 14,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Wilcoxon = 1.462857142857132, p_value = 0.22647606604348455 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"<a name='Friedman_Test'></a>\n",
|
||
"\n",
|
||
"### **5.4.4. Friedman Test:**"
|
||
],
|
||
"metadata": {
|
||
"id": "uCyetBhlW3E3"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Tests whether the distributions of two or more paired samples are equal or not.\n",
|
||
"\n",
|
||
"$H_0$: The distributions of both samples are equal.\n",
|
||
"\n",
|
||
"$H_1$: The distributions of both samples are not equal.\n",
|
||
"\n",
|
||
"Assumptions:\n",
|
||
"* Observations in each sample are independent and identically distributed (iid).\n",
|
||
"* Observations in each sample can be ranked.\n",
|
||
"* Observations across each sample are paired.\n",
|
||
"\n",
|
||
"$\\\\ $\n",
|
||
"\n",
|
||
"[Friedman Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.friedmanchisquare.html)"
|
||
],
|
||
"metadata": {
|
||
"id": "fTbP_K-AW3E5"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"alpha = 0.05\n",
|
||
"data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]\n",
|
||
"data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]\n",
|
||
"data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]\n",
|
||
"\n",
|
||
"Test_statistic, p_value = friedmanchisquare(data1, data2, data3)\n",
|
||
"print(f\"Test_statistic_Friedman = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
|
||
"\n",
|
||
"if p_value < alpha:\n",
|
||
"\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, data distributions are probably not equal.')\n",
|
||
"else:\n",
|
||
"\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, data distributions are probably equal.')"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "042a5986-7d2e-4c15-b2f7-5a72f8714294",
|
||
"id": "w0if4yFkW3E5"
|
||
},
|
||
"execution_count": 15,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Test_statistic_Friedman = 0.8000000000000114, p_value = 0.6703200460356356 \n",
|
||
"\n",
|
||
"Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, data distributions are probably equal.\n"
|
||
]
|
||
}
|
||
]
|
||
}
|
||
]
|
||
} |