1793 lines
73 KiB
Plaintext
1793 lines
73 KiB
Plaintext
|
{
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 0,
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"name": "Chapter 3: Confidence Intervals.ipynb",
|
||
|
"provenance": [],
|
||
|
"collapsed_sections": [
|
||
|
"WJhmgDxsVHEO",
|
||
|
"rzplSNRK6pOR",
|
||
|
"JYXjgns760tV",
|
||
|
"j-dAKBdRgjMk",
|
||
|
"XqnRkA8t18kd",
|
||
|
"t8ukxT389zQi",
|
||
|
"f_p42Mn-MC3E",
|
||
|
"LGrRkUrrO3qG",
|
||
|
"fyBlrUYCPUzl",
|
||
|
"KiM27pBRqGIU",
|
||
|
"Bt-qEVhPYD5x",
|
||
|
"Djupsq6Kklgq"
|
||
|
]
|
||
|
},
|
||
|
"kernelspec": {
|
||
|
"name": "python3",
|
||
|
"display_name": "Python 3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"name": "python"
|
||
|
}
|
||
|
},
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"# **`Chapter 3: Confidence Intervals`**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "dBRZ1hY24Rtd"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**Table of Content:**\n",
|
||
|
"\n",
|
||
|
"- [Import Libraries](#Import_Libraries)\n",
|
||
|
"- [3.1. Confidence Interval for the Mean of a Normal Population](#Confidence_Interval_for_the_Mean_of_a_Normal_Population)\n",
|
||
|
" - [3.1.1. Known Standard Deviation](#Known_Standard_Deviation)\n",
|
||
|
" - [3.1.2. Unknown Standard Deviation](#Unknown_Standard_Deviation)\n",
|
||
|
"\n",
|
||
|
"- [3.2. Confidence Interval for the Variance of a Normal Population](#Confidence_Interval_for_the_Variance_of_a_Normal_Population)\n",
|
||
|
" - [3.2.1. Unknown Mean of the Population](#Unknown_Mean_of_the_Population)\n",
|
||
|
" - [3.2.2. Known Mean of the Population](#Known_Mean_of_the_Population)\n",
|
||
|
"\n",
|
||
|
"- [3.3. Confidence Interval for the Difference in Means of Two Normal Population](#Confidence_Interval_for_the_Difference_in_Means_of_Two_Normal_Populations)\n",
|
||
|
" - [3.3.1. Known Variances](#Known_Variances)\n",
|
||
|
" - [3.3.2. Unknown but Equal Variances](#Unknown_but_Equal_Variances)\n",
|
||
|
"\n",
|
||
|
"- [3.4. Confidence Interval for the Ratio of Variances of Two Normal Populations](#Confidence_Interval_for_the_Ratio_of_Variances_of_Two_Normal_Populations)\n",
|
||
|
"- [3.5. Confidence Interval for the Mean of a Bernoulli Random Variable](#Confidence_Interval_for_the_Mean_of_a_Bernoulli_Random_Variable)\n"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "hCvB3MUGy4as"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Import_Libraries'></a>\n",
|
||
|
"\n",
|
||
|
"## **Import Libraries**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "WJhmgDxsVHEO"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"!pip install --upgrade scipy"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "vXStgb2JU6c0",
|
||
|
"outputId": "5bc2ff25-f5a5-4054-de13-75841a76c9ff"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
||
|
"Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (1.4.1)\n",
|
||
|
"Collecting scipy\n",
|
||
|
" Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)\n",
|
||
|
"\u001b[K |████████████████████████████████| 38.1 MB 1.2 MB/s \n",
|
||
|
"\u001b[?25hRequirement already satisfied: numpy<1.23.0,>=1.16.5 in /usr/local/lib/python3.7/dist-packages (from scipy) (1.21.6)\n",
|
||
|
"Installing collected packages: scipy\n",
|
||
|
" Attempting uninstall: scipy\n",
|
||
|
" Found existing installation: scipy 1.4.1\n",
|
||
|
" Uninstalling scipy-1.4.1:\n",
|
||
|
" Successfully uninstalled scipy-1.4.1\n",
|
||
|
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
||
|
"albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n",
|
||
|
"Successfully installed scipy-1.7.3\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import matplotlib.patches as mpatches\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"import math\n",
|
||
|
"from scipy import stats\n",
|
||
|
"from scipy.stats import norm\n",
|
||
|
"from scipy.stats import chi2\n",
|
||
|
"from scipy.stats import t\n",
|
||
|
"from scipy.stats import f\n",
|
||
|
"from scipy.stats import bernoulli\n",
|
||
|
"from scipy.stats import binom\n",
|
||
|
"from scipy.stats import nbinom\n",
|
||
|
"from scipy.stats import geom\n",
|
||
|
"from scipy.stats import poisson\n",
|
||
|
"from scipy.stats import uniform\n",
|
||
|
"from scipy.stats import randint\n",
|
||
|
"from scipy.stats import expon\n",
|
||
|
"from scipy.stats import gamma\n",
|
||
|
"from scipy.stats import beta\n",
|
||
|
"from scipy.stats import weibull_min\n",
|
||
|
"from scipy.stats import hypergeom\n",
|
||
|
"from scipy.stats import shapiro\n",
|
||
|
"from scipy.stats import pearsonr\n",
|
||
|
"from scipy.stats import normaltest\n",
|
||
|
"from scipy.stats import anderson\n",
|
||
|
"from scipy.stats import spearmanr\n",
|
||
|
"from scipy.stats import kendalltau\n",
|
||
|
"from scipy.stats import chi2_contingency\n",
|
||
|
"from scipy.stats import ttest_ind\n",
|
||
|
"from scipy.stats import ttest_rel\n",
|
||
|
"from scipy.stats import mannwhitneyu\n",
|
||
|
"from scipy.stats import wilcoxon\n",
|
||
|
"from scipy.stats import kruskal\n",
|
||
|
"from scipy.stats import friedmanchisquare\n",
|
||
|
"from statsmodels.tsa.stattools import adfuller\n",
|
||
|
"from statsmodels.tsa.stattools import kpss\n",
|
||
|
"from statsmodels.stats.weightstats import ztest\n",
|
||
|
"from scipy.integrate import quad\n",
|
||
|
"from IPython.display import display, Latex\n",
|
||
|
"\n",
|
||
|
"import warnings\n",
|
||
|
"warnings.filterwarnings('ignore')\n",
|
||
|
"warnings.simplefilter(action='ignore', category=FutureWarning)"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "ZPuphzTmU-P8",
|
||
|
"outputId": "ab0b6d4c-7596-4dd3-da9f-6b4f073e73f2"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stderr",
|
||
|
"text": [
|
||
|
"/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
|
||
|
" import pandas.util.testing as tm\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Confidence_Interval_for_the_Mean_of_a_Normal_Population'></a>\n",
|
||
|
"\n",
|
||
|
"## **3.1. Confidence Interval for the Mean of a Normal Population:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "rzplSNRK6pOR"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Known_Standard_Deviation'></a>\n",
|
||
|
"\n",
|
||
|
"### **3.1.1. Known Standard Deviation:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "JYXjgns760tV"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a known variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$P(-\\ Z_{\\frac{\\alpha}{2}}\\ \\leq\\ \\frac{\\overline{X}-\\mu}{\\frac{\\sigma}{\\sqrt{n}}}\\ \\leq\\ Z_{\\frac{\\alpha}{2}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ Z_{\\frac{\\alpha}{2}} \\frac{\\sigma}{\\sqrt{n}}\\ \\leq\\ \\mu\\ \\leq\\ \\overline{X}\\ +\\ Z_{\\frac{\\alpha}{2}} \\frac{\\sigma}{\\sqrt{n}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the mean of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X}\\ -\\ Z_{\\frac{\\alpha}{2}} \\frac{\\sigma}{\\sqrt{n}},\\ \\overline{X}\\ +\\ Z_{\\frac{\\alpha}{2}} \\frac{\\sigma}{\\sqrt{n}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "10TKKoby6-D_"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a known variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$P(-\\infty\\ \\leq\\ \\frac{\\overline{X}-\\mu}{\\frac{\\sigma}{\\sqrt{n}}}\\ \\leq\\ Z_{\\alpha}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\infty\\ \\leq\\ \\mu\\ \\leq\\ \\overline{X}\\ +\\ Z_{\\alpha} \\frac{\\sigma}{\\sqrt{n}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the mean of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[-\\infty,\\ \\overline{X}\\ +\\ Z_{\\alpha} \\frac{\\sigma}{\\sqrt{n}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "fGHYWk5y-JS2"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**C. One-sided Upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a known variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$P(-\\ Z_{\\alpha}\\ \\leq\\ \\frac{\\overline{X}-\\mu}{\\frac{\\sigma}{\\sqrt{n}}}\\ \\leq\\ \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ Z_{\\alpha} \\frac{\\sigma}{\\sqrt{n}}\\ \\leq\\ \\mu\\ \\leq\\ \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the mean of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X}\\ -\\ Z_{\\alpha} \\frac{\\sigma}{\\sqrt{n}},\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "yYGKzkCl_ws7"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_mean_with_known_variance:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" population_sd : known standrad deviation of the population\n",
|
||
|
" n : optional, number of sample members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" Sample_mean : mean of the sample\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, population_sd, c_level, type_c, Sample_mean = 0., n = 0., data=None):\n",
|
||
|
" self.Sample_mean = Sample_mean\n",
|
||
|
" self.population_sd = population_sd\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n = n\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data = data\n",
|
||
|
" if data is not None:\n",
|
||
|
" self.Sample_mean = np.mean(list(data))\n",
|
||
|
" self.n = len(list(data))\n",
|
||
|
"\n",
|
||
|
" confidence_interval_for_mean_with_known_variance.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" c_u = self.Sample_mean + (-norm.ppf((1-self.c_level)/2)) * (self.population_sd/np.sqrt(self.n))\n",
|
||
|
" c_l = self.Sample_mean - (-norm.ppf((1-self.c_level)/2)) * (self.population_sd/np.sqrt(self.n))\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" c_u = self.Sample_mean + (-norm.ppf(1-self.c_level)) * (self.population_sd/np.sqrt(self.n))\n",
|
||
|
" display(Latex(f'$\\mu \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" c_l = self.Sample_mean - (-norm.ppf(1-self.c_level)) * (self.population_sd/np.sqrt(self.n))\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "eai7LbK24ZjV"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"np.random.seed(1)\n",
|
||
|
"data = np.random.normal(loc = 2, scale = 3, size = 20)\n",
|
||
|
"confidence_interval_for_mean_with_known_variance(population_sd = 3, c_level = 0.95, type_c = 'two_sided_confidence', data=data);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 39
|
||
|
},
|
||
|
"id": "9comBWfSWbuY",
|
||
|
"outputId": "11da123a-7ee5-422f-f45e-2cf805331141"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$0.2851222797529398 \\leq \\mu \\leq 2.9146899014826846$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"data = [5, 8.5, 12, 15, 7, 9, 7.5, 6.5, 10.5]\n",
|
||
|
"confidence_interval_for_mean_with_known_variance(population_sd = 2, c_level = 0.95, type_c = 'lower_confidence', data=data);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 39
|
||
|
},
|
||
|
"id": "rikL21RHdlwn",
|
||
|
"outputId": "8d7db470-bec4-40f8-e699-33ec34abe3fc"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$\\mu \\leq 10.096569084634314$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Unknown_Standard_Deviation'></a>\n",
|
||
|
"\n",
|
||
|
"### **3.1.2. Unknown Standard Deviation:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "j-dAKBdRgjMk"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a unknown variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ t_{\\frac{\\alpha}{2},n-1}\\ <\\ \\frac{\\overline{X}-\\mu}{\\frac{S}{\\sqrt{n}}} <\\ t_{\\frac{\\alpha}{2},n-1}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ t_{\\frac{\\alpha}{2},n-1} \\frac{S}{\\sqrt{n}}\\ <\\ \\mu\\ <\\ \\overline{X}\\ +\\ t_{\\frac{\\alpha}{2},n-1} \\frac{S}{\\sqrt{n}}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the mean of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X}\\ -\\ t_{\\frac{\\alpha}{2},n-1} \\frac{S}{\\sqrt{n}},\\ \\overline{X}\\ +\\ t_{\\frac{\\alpha}{2},n-1} \\frac{S}{\\sqrt{n}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "vtKhaPgmij5l"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a known variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$P(-\\infty\\ \\leq\\ \\frac{\\overline{X}-\\mu}{\\frac{S}{\\sqrt{n}}}\\ \\leq\\ t_{\\alpha,n-1}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\infty\\ \\leq\\ \\mu\\ \\leq\\ \\overline{X}\\ +\\ t_{\\alpha,n-1} \\frac{S}{\\sqrt{n}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the mean of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[-\\infty,\\ \\overline{X}\\ +\\ t_{\\alpha,n-1} \\frac{S}{\\sqrt{n}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "izR0qZWSjUv-"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a known variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$P(-t_{\\alpha,n-1} \\leq\\ \\frac{\\overline{X}-\\mu}{\\frac{S}{\\sqrt{n}}}\\ \\leq\\ \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ t_{\\alpha,n-1} \\frac{S}{\\sqrt{n}} \\leq\\ \\mu\\ \\leq\\ \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the mean of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X}\\ -\\ t_{\\alpha,n-1} \\frac{S}{\\sqrt{n}},\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "OVn_SYxdkZDR"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_mean_with_unknown_variance:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" n : optional, number of sample members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" Sample_std : optional, std of the sample\n",
|
||
|
" Sample_mean : optional, mean of the sample\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, c_level, type_c, Sample_std = 0., Sample_mean = 0., n = 0., data=None):\n",
|
||
|
" self.Sample_mean = Sample_mean\n",
|
||
|
" self.Sample_std = Sample_std\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n = n\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data = data\n",
|
||
|
" if data is not None:\n",
|
||
|
" self.Sample_mean = np.mean(list(data))\n",
|
||
|
" self.Sample_std = np.std(list(data), ddof=1)\n",
|
||
|
" self.n = len(list(data))\n",
|
||
|
"\n",
|
||
|
" confidence_interval_for_mean_with_unknown_variance.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" c_u = self.Sample_mean + (t.isf((1-self.c_level)/2, self.n-1)) * (self.Sample_std/np.sqrt(self.n))\n",
|
||
|
" c_l = self.Sample_mean - (t.isf((1-self.c_level)/2, self.n-1)) * (self.Sample_std/np.sqrt(self.n))\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" c_u = self.Sample_mean + (t.isf(1-self.c_level, self.n-1)) * (self.Sample_std/np.sqrt(self.n))\n",
|
||
|
" display(Latex(f'$\\mu \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" c_l = self.Sample_mean - (t.isf(1-self.c_level, self.n-1)) * (self.Sample_std/np.sqrt(self.n))\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "__wyr1psgzld"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"data = [5, 8.5, 12, 15, 7, 9, 7.5, 6.5, 10.5]\n",
|
||
|
"confidence_interval_for_mean_with_unknown_variance(c_level = 0.95, type_c = 'two_sided_confidence', data=data);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "BzJ6wQBHmvwI",
|
||
|
"outputId": "01b45d36-41dc-4346-b701-5587ab239217"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$6.630805969849321 \\leq \\mu \\leq 11.369194030150679$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Confidence_Interval_for_the_Variance_of_a_Normal_Population'></a>\n",
|
||
|
"\n",
|
||
|
"## **3.2. Confidence Interval for the Variance of a Normal Population:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "XqnRkA8t18kd"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Unknown_Mean_of_the_Population'></a>\n",
|
||
|
"\n",
|
||
|
"### **3.2.1. Unknown Mean of the Population:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "t8ukxT389zQi"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a unknown variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\ \\chi^2_{1-\\frac{\\alpha}{2}, n-1}\\ \\leq\\ \\frac{(n-1)\\ S^2}{\\sigma^2} \\ \\leq \\chi^2_{\\frac{\\alpha}{2}, n-1} $\n",
|
||
|
"\n",
|
||
|
"$\\ \\frac{(n-1)\\ S^2}{\\chi^2_{\\frac{\\alpha}{2}, n-1}} \\leq\\ \\sigma^2 \\leq\\ \\frac{(n-1)\\ S^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n-1}}$\n",
|
||
|
"\n",
|
||
|
"$\\ \\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{\\frac{\\alpha}{2}, n-1}}} \\leq\\ \\sigma \\leq\\ \\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n-1}}}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the variance of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{(n-1)\\ S^2}{\\chi^2_{\\frac{\\alpha}{2}, n-1}},\\ \\frac{(n-1)\\ S^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n-1}}]$\n",
|
||
|
"\n",
|
||
|
"and the $1-\\alpha$ confidence interval for the standard deviation of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{\\frac{\\alpha}{2}, n-1}}},\\ \\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n-1}}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "fwZHtEgR2Jd6"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a unknown variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\ 0 \\leq\\ \\sigma^2 \\leq\\ \\frac{(n-1)\\ S^2}{\\chi^2_{1-\\alpha, n-1}}$\n",
|
||
|
"\n",
|
||
|
"$\\ 0 \\leq\\ \\sigma \\leq\\ \\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{1-\\alpha, n-1}}}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the variance of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[0,\\ \\frac{(n-1)\\ S^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n-1}}]$\n",
|
||
|
"\n",
|
||
|
"and the $1-\\alpha$ confidence interval for the standard deviation of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[0,\\ \\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{1-\\alpha, n-1}}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "NdiBcaI24-PO"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**C. One-sided Upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\\mu$ and a unknown variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\ \\frac{(n-1)\\ S^2}{\\chi^2_{\\alpha, n-1}} \\leq\\ \\sigma^2 \\leq\\ \\infty$\n",
|
||
|
"\n",
|
||
|
"$\\ \\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{\\alpha, n-1}}} \\leq\\ \\sigma \\leq\\ \\infty$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the variance of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{(n-1)\\ S^2}{\\chi^2_{\\alpha, n-1}},\\ \\infty]$\n",
|
||
|
"\n",
|
||
|
"and the $1-\\alpha$ confidence interval for the standard deviation of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\sqrt{\\frac{(n-1)\\ S^2}{\\chi^2_{\\alpha, n-1}}},\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "jeU7NJdw6mlj"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_var_with_unknown_mean:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" population_sd : known standrad deviation of the population\n",
|
||
|
" Sample_var : optional, variance of the sample\n",
|
||
|
" n : optional, number of sample members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, c_level, type_c, Sample_var = 0., n = 0., data=None):\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n = n\n",
|
||
|
" self.Sample_var = Sample_var\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data = data\n",
|
||
|
" if data is not None:\n",
|
||
|
" self.n = len(list(data))\n",
|
||
|
" self.Sample_var = np.std(list(data), ddof=1)**2\n",
|
||
|
"\n",
|
||
|
" confidence_interval_for_var_with_unknown_mean.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" alpha = 1 - self.c_level\n",
|
||
|
" c_u = ((self.n-1) * self.Sample_var) / chi2.isf(1-(alpha/2), self.n-1)\n",
|
||
|
" c_l = ((self.n-1) * self.Sample_var) / chi2.isf(alpha/2, self.n-1)\n",
|
||
|
" c_u_r = np.sqrt(c_u)\n",
|
||
|
" c_l_r = np.sqrt(c_l)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\sigma^2 \\leq {c_u}$'))\n",
|
||
|
" display(Latex(f'${c_l_r} \\leq \\sigma \\leq {c_u_r}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" alpha = 1 - self.c_level\n",
|
||
|
" c_u = ((self.n-1) * self.Sample_var) / chi2.isf(1-alpha, self.n-1)\n",
|
||
|
" c_u_r = np.sqrt(c_u)\n",
|
||
|
" display(Latex(f'$0 \\leq \\sigma^2 \\leq {c_u}$'))\n",
|
||
|
" display(Latex(f'$0 \\leq \\sigma \\leq {c_u_r}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" alpha = 1 - self.c_level\n",
|
||
|
" c_l = ((self.n-1) * self.Sample_var) / chi2.isf((alpha), self.n-1)\n",
|
||
|
" c_l_r = np.sqrt(c_l)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\sigma^2$'))\n",
|
||
|
" display(Latex(f'${c_l_r} \\leq \\sigma$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "5laNCDiU2FwW"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"data = [.123, .133, .124, .125, .126, .128, .120, .124, .130, .126]\n",
|
||
|
"confidence_interval_for_var_with_unknown_mean(c_level = 0.9, type_c = 'two_sided_confidence', data=data);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "R9guzCQHBGdH",
|
||
|
"outputId": "7d590ce4-0021-41b5-b0fd-253838c365d5"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$7.2640323116473104e-06 \\leq \\sigma^2 \\leq 3.6961151636179396e-05$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
},
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$0.0026951868787984464 \\leq \\sigma \\leq 0.006079568375812496$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Known_Mean_of_the_Population'></a>\n",
|
||
|
"\n",
|
||
|
"### **3.2.2. Known Mean of the Population:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "f_p42Mn-MC3E"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\\mu$ and a unknown variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$S' = \\sqrt{\\frac{\\sum_{i=1}^n\\ (x_i\\ -\\ \\overline{x})^2}{n}}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\ \\chi^2_{1-\\frac{\\alpha}{2}, n}\\ \\leq\\ \\frac{(n)\\ S'^2}{\\sigma^2} \\ \\leq \\chi^2_{\\frac{\\alpha}{2}, n} $\n",
|
||
|
"\n",
|
||
|
"$\\ \\frac{(n)\\ S'^2}{\\chi^2_{\\frac{\\alpha}{2}, n}} \\leq\\ \\sigma^2 \\leq\\ \\frac{(n)\\ S'^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n}}$\n",
|
||
|
"\n",
|
||
|
"$\\ \\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{\\frac{\\alpha}{2}, n}}} \\leq\\ \\sigma \\leq\\ \\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n}}}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the variance of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{(n)\\ S'^2}{\\chi^2_{\\frac{\\alpha}{2}, n}},\\ \\frac{(n)\\ S'^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n}}]$\n",
|
||
|
"\n",
|
||
|
"and the $1-\\alpha$ confidence interval for the standard deviation of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{\\frac{\\alpha}{2}, n}}},\\ \\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n}}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "qDBOjmIFMQnw"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\\mu$ and a unknown variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\ 0\\ \\leq\\ \\frac{(n)\\ S'^2}{\\sigma^2} \\ \\leq \\chi^2_{1-\\alpha, n} $\n",
|
||
|
"\n",
|
||
|
"$\\ 0 \\leq\\ \\sigma^2 \\leq\\ \\frac{(n)\\ S'^2}{\\chi^2_{1-\\alpha, n}}$\n",
|
||
|
"\n",
|
||
|
"$\\ 0 \\leq\\ \\sigma \\leq\\ \\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{1-\\alpha, n}}}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the variance of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[0,\\ \\frac{(n)\\ S'^2}{\\chi^2_{1-\\frac{\\alpha}{2}, n}}]$\n",
|
||
|
"\n",
|
||
|
"and the $1-\\alpha$ confidence interval for the standard deviation of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[0,\\ \\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{1-\\alpha, n}}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "RxHYrQXhMQnx"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**C. One-sided Upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\\mu$ and a unknown variance $\\sigma^2$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu, \\sigma^2)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\ \\chi^2_{1-\\alpha, n}\\ \\leq\\ \\frac{(n)\\ S'^2}{\\sigma^2} \\ \\leq \\infty $\n",
|
||
|
"\n",
|
||
|
"$\\ \\frac{(n)\\ S'^2}{\\chi^2_{\\alpha, n}} \\leq\\ \\sigma^2 \\leq\\ \\infty$\n",
|
||
|
"\n",
|
||
|
"$\\ \\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{\\alpha, n}}} \\leq\\ \\sigma \\leq\\ \\infty$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the variance of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{(n)\\ S'^2}{\\chi^2_{\\alpha, n}},\\ \\infty]$\n",
|
||
|
"\n",
|
||
|
"and the $1-\\alpha$ confidence interval for the standard deviation of a normal population is:\n",
|
||
|
"\n",
|
||
|
"$[\\sqrt{\\frac{(n)\\ S'^2}{\\chi^2_{\\alpha, n}}},\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "pdrZXyj-MQny"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_var_with_known_mean:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" population_sd : known standrad deviation of the population\n",
|
||
|
" Sample_var : optional, variance of the sample\n",
|
||
|
" n : optional, number of sample members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, c_level, type_c, Sample_var = 0., n = 0., data=None):\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n = n\n",
|
||
|
" self.Sample_var = Sample_var\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data = data\n",
|
||
|
" if data is not None:\n",
|
||
|
" self.n = len(list(data))\n",
|
||
|
" self.Sample_var = np.std(list(data))**2\n",
|
||
|
"\n",
|
||
|
" confidence_interval_for_var_with_known_mean.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" alpha = 1 - self.c_level\n",
|
||
|
" c_u = ((self.n) * self.Sample_var) / chi2.isf(1-(alpha/2), self.n)\n",
|
||
|
" c_l = ((self.n) * self.Sample_var) / chi2.isf(alpha/2, self.n)\n",
|
||
|
" c_u_r = np.sqrt(c_u)\n",
|
||
|
" c_l_r = np.sqrt(c_l)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\sigma^2 \\leq {c_u}$'))\n",
|
||
|
" display(Latex(f'${c_l_r} \\leq \\sigma \\leq {c_u_r}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" alpha = 1 - self.c_level\n",
|
||
|
" c_u = ((self.n) * self.Sample_var) / chi2.isf(1-alpha, self.n)\n",
|
||
|
" c_u_r = np.sqrt(c_u)\n",
|
||
|
" display(Latex(f'$0 \\leq \\sigma^2 \\leq {c_u}$'))\n",
|
||
|
" display(Latex(f'$0 \\leq \\sigma \\leq {c_u_r}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" alpha = 1 - self.c_level\n",
|
||
|
" c_l = ((self.n) * self.Sample_var) / chi2.isf((alpha), self.n)\n",
|
||
|
" c_l_r = np.sqrt(c_l)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\sigma^2$'))\n",
|
||
|
" display(Latex(f'${c_l_r} \\leq \\sigma$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "_53bdypaM_zk"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"data = [.123, .133, .124, .125, .126, .128, .120, .124, .130, .126]\n",
|
||
|
"confidence_interval_for_var_with_known_mean(c_level = 0.9, type_c = 'two_sided_confidence', data=data);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "HsAuQHyiNa7q",
|
||
|
"outputId": "5cb6a1ae-1c2d-410d-c645-f345cc99cbc8"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$6.713265119259053e-06 \\leq \\sigma^2 \\leq 3.119052532672652e-05$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
},
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$0.002590996935401324 \\leq \\sigma \\leq 0.0055848478338023245$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Confidence_Interval_for_the_Difference_in_Means_of_Two_Normal_Populations'></a>\n",
|
||
|
"\n",
|
||
|
"## **3.3. Confidence Interval for the Difference in Means of Two Normal Populations:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "LGrRkUrrO3qG"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Known_Variances'></a>\n",
|
||
|
"\n",
|
||
|
"### **3.3.1. Known Variances:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "fyBlrUYCPUzl"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a known variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a known variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_{n_x} \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_{n_y} \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ Z_{\\frac{\\alpha}{2}}\\ \\leq\\ \\frac{\\overline{X}-\\overline{Y} -\\ (\\mu_x - \\mu_y)}{\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}\\ \\leq\\ Z_{\\frac{\\alpha}{2}}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X} - \\overline{Y}-\\ Z_{\\frac{\\alpha}{2}} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}\\ \\leq\\ \\mu_x\\ - \\mu_y\\ \\leq\\ \\overline{X} - \\overline{Y}+\\ Z_{\\frac{\\alpha}{2}} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the difference in Means of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X} - \\overline{Y}-\\ Z_{\\frac{\\alpha}{2}} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}},\\ \\overline{X} - \\overline{Y}+\\ Z_{\\frac{\\alpha}{2}} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "KwH7pIJtmLfS"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a known variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a known variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_{n_x} \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_{n_y} \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ \\infty\\ \\leq\\ \\frac{\\overline{X}-\\overline{Y} -\\ (\\mu_x - \\mu_y)}{\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}\\ \\leq\\ Z_{\\alpha}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ \\infty\\ \\leq\\ \\mu_x\\ - \\mu_y\\ \\leq\\ \\overline{X} - \\overline{Y}+\\ Z_{\\alpha} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}\\ ) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the difference in Means of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[-\\infty,\\ \\overline{X} - \\overline{Y}+\\ Z_{\\alpha} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "QpffIAp_oC9K"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**C. One-sided Upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a known variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a known variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_{n_x} \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_{n_y} \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ Z_{\\alpha}\\ \\leq\\ \\frac{\\overline{X}-\\overline{Y} -\\ (\\mu_x - \\mu_y)}{\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}\\ \\leq\\ \\infty) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X} - \\overline{Y}-\\ Z_{\\alpha} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}}\\ \\leq\\ \\mu_x\\ - \\mu_y\\ \\leq\\ \\infty) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the difference in Means of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X} - \\overline{Y}-\\ Z_{\\alpha} {\\sqrt{(\\frac{\\sigma^2_x}{n_x}) + (\\frac{\\sigma^2_y}{n_y})}},\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "xJ2xK9a6pNlQ"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_two_mean_with_known_variances:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" population_sd1 : known standrad deviation of the population1\n",
|
||
|
" population_sd2 : known standrad deviation of the population2\n",
|
||
|
" n1 : optional, number of sample1 members\n",
|
||
|
" n2 : optional, number of sample2 members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" Sample_mean1 : optional, mean of the sample1\n",
|
||
|
" Sample_mean2 : optional, mean of the sample2\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, population_sd1, population_sd2, c_level, type_c, Sample_mean1 = 0., Sample_mean2 = 0., n1 = 0., n2 = 0., data1=None, data2=None):\n",
|
||
|
" self.Sample_mean1 = Sample_mean1\n",
|
||
|
" self.Sample_mean2 = Sample_mean2\n",
|
||
|
" self.population_sd1 = population_sd1\n",
|
||
|
" self.population_sd2 = population_sd2\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n1 = n1\n",
|
||
|
" self.n2 = n2\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data1 = data1\n",
|
||
|
" self.data2 = data2\n",
|
||
|
" if data1 is not None:\n",
|
||
|
" self.Sample_mean1 = np.mean(list(data1))\n",
|
||
|
" self.n1 = len(list(data1))\n",
|
||
|
" if data2 is not None:\n",
|
||
|
" self.Sample_mean2 = np.mean(list(data2))\n",
|
||
|
" self.n2 = len(list(data2)) \n",
|
||
|
"\n",
|
||
|
" confidence_interval_for_two_mean_with_known_variances.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" c_u = self.Sample_mean1 - self.Sample_mean2 + (-norm.ppf((1-self.c_level)/2)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)\n",
|
||
|
" c_l = self.Sample_mean1 - self.Sample_mean2 - (-norm.ppf((1-self.c_level)/2)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu_x - \\mu_y \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" c_u = self.Sample_mean1 - self.Sample_mean2 + (-norm.ppf(1-self.c_level)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)\n",
|
||
|
" display(Latex(f'$\\mu_x - \\mu_y \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" c_l = self.Sample_mean1 - self.Sample_mean2 - (-norm.ppf(1-self.c_level)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu_x - \\mu_y$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "KccWG8REO5AV"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"data1 = [36,44,41,53,38,36,34,54,52,37,51,44,35,44]\n",
|
||
|
"data2 = [52,64,38,68,66,52,60,44,48,46,70,62]\n",
|
||
|
"confidence_interval_for_two_mean_with_known_variances(population_sd1 = np.sqrt(40), population_sd2 = 10, c_level = 0.95, \n",
|
||
|
" type_c = 'two_sided_confidence', data1=data1, data2=data2);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "3k4wDPQksf4c",
|
||
|
"outputId": "ee22dfdb-a906-44e4-e43d-296ef38fad63"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$-19.604123716241833 \\leq \\mu_x - \\mu_y \\leq -6.491114378996268$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Unknown_but_Equal_Variances'></a>\n",
|
||
|
"\n",
|
||
|
"### **3.3.2. Unknown but Equal Variances:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "KiM27pBRqGIU"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a unknown variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a unknown variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_{n_x} \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_{n_y} \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$S_p^2 = \\frac{(n_x-1)S_x^2 + (n_y-1)S_y^2}{n_x+n_y-2}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ t_{\\frac{\\alpha}{2}, n_x-n_y-2}\\ \\leq\\ \\frac{\\overline{X}-\\overline{Y} -\\ (\\mu_x - \\mu_y)}{{{S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}}}\\ \\leq\\ t_{\\frac{\\alpha}{2}, n_x-n_y-2}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X} - \\overline{Y}-\\ t_{\\frac{\\alpha}{2}, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}\\ \\leq\\ \\mu_x\\ - \\mu_y\\ \\leq\\ \\overline{X} - \\overline{Y}+\\ t_{\\frac{\\alpha}{2}, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the difference in Means of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X} - \\overline{Y}-\\ t_{\\frac{\\alpha}{2}, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}},\\ \\overline{X} - \\overline{Y}+\\ t_{\\frac{\\alpha}{2}, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "mRzN17zcuBzG"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a unknown variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a unknown variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_{n_x} \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_{n_y} \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$S_p^2 = \\frac{(n_x-1)S_x^2 + (n_y-1)S_y^2}{n_x+n_y-2}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ \\infty\\ \\leq\\ \\frac{\\overline{X}-\\overline{Y} -\\ (\\mu_x - \\mu_y)}{{{S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}}}\\ \\leq\\ t_{\\alpha, n_x-n_y-2}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ \\infty\\ \\leq\\ \\mu_x\\ - \\mu_y\\ \\leq\\ \\overline{X} - \\overline{Y}+\\ t_{\\alpha, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the difference in Means of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[-\\infty,\\ \\overline{X} - \\overline{Y}+\\ t_{\\alpha, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "kbFCWqUE-qFS"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**C. One-sided Upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a unknown variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a unknown variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_{n_x} \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_{n_y} \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$S_p^2 = \\frac{(n_x-1)S_x^2 + (n_y-1)S_y^2}{n_x+n_y-2}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ t_{\\alpha, n_x-n_y-2}\\ \\leq\\ \\frac{\\overline{X}-\\overline{Y} -\\ (\\mu_x - \\mu_y)}{{{S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}}}\\ \\leq\\ \\infty) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X} - \\overline{Y}-\\ t_{\\alpha, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}\\ \\leq\\ \\mu_x\\ - \\mu_y\\ \\leq\\ \\infty) = 1- \\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for the difference in Means of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X} - \\overline{Y}-\\ t_{\\alpha, n_x-n_y-2}\\ {S_p} \\sqrt{\\frac{1}{n_x} + \\frac{1}{n_y}}\\ ,\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "155C-e99_8qt"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_two_mean_with_unknown_variances:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" n1 : optional, number of sample1 members\n",
|
||
|
" n2 : optional, number of sample2 members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" Sample_mean1 : optional, mean of the sample1\n",
|
||
|
" Sample_mean2 : optional, mean of the sample2\n",
|
||
|
" S1 : optional, std of the sample1\n",
|
||
|
" S2 : optional, std of the sample2\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, c_level, type_c, Sample_mean1 = 0., S1 = 0., S2 = 0., Sample_mean2 = 0., n1 = 0., n2 = 0., data1=None, data2=None):\n",
|
||
|
" self.Sample_mean1 = Sample_mean1\n",
|
||
|
" self.Sample_mean2 = Sample_mean2\n",
|
||
|
" self.S1 = S1\n",
|
||
|
" self.S2 = S2\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n1 = n1\n",
|
||
|
" self.n2 = n2\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data1 = data1\n",
|
||
|
" self.data2 = data2\n",
|
||
|
" if data1 is not None:\n",
|
||
|
" self.Sample_mean1 = np.mean(list(data1))\n",
|
||
|
" self.n1 = len(list(data1))\n",
|
||
|
" self.S1 = np.std(list(data1), ddof = 1)\n",
|
||
|
" if data2 is not None:\n",
|
||
|
" self.Sample_mean2 = np.mean(list(data2))\n",
|
||
|
" self.n2 = len(list(data2)) \n",
|
||
|
" self.S2 = np.std(list(data2), ddof = 1)\n",
|
||
|
" \n",
|
||
|
" self.SP2 = ((self.n1-1)*(self.S1**2) + (self.n2-1)*(self.S2**2)) / (self.n1+self.n2-2)\n",
|
||
|
"\n",
|
||
|
" confidence_interval_for_two_mean_with_unknown_variances.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" alpha = 1-self.c_level\n",
|
||
|
" c_u = self.Sample_mean1 - self.Sample_mean2 + (t.isf(alpha/2, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))\n",
|
||
|
" c_l = self.Sample_mean1 - self.Sample_mean2 - (t.isf(alpha/2, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu_x - \\mu_y \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" alpha = 1-self.c_level\n",
|
||
|
" c_u = self.Sample_mean1 - self.Sample_mean2 + (t.isf(alpha, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))\n",
|
||
|
" display(Latex(f'$\\mu_x - \\mu_y \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" alpha = 1-self.c_level\n",
|
||
|
" c_l = self.Sample_mean1 - self.Sample_mean2 - (t.isf(alpha, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\mu_x - \\mu_y$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "YsSOe8TEqRfy"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"data1 = [140,136,138,150,152,144,132,142,150,154,136,142]\n",
|
||
|
"data2 = [144,132,136,140,128,150,130,134,130,146,128,131,137,135]\n",
|
||
|
"confidence_interval_for_two_mean_with_unknown_variances(c_level = 0.9, type_c = 'two_sided_confidence', data1=data1, data2=data2);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "_j0HozbIDoxI",
|
||
|
"outputId": "78ae25b1-5bc7-400e-bf3f-d158715d4c8e"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$2.4981644975614827 \\leq \\mu_x - \\mu_y \\leq 11.930406931009962$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Confidence_Interval_for_the_Ratio_of_Variances_of_Two_Normal_Populations'></a>\n",
|
||
|
"## **3.4. Confidence Interval for the Ratio of Variances of Two Normal Populations:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "Bt-qEVhPYD5x"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a unknown variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a unknown variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$S^2_x = \\frac{\\sum_{i=1}^{n_x}\\ (x_i\\ -\\ \\overline{x})^2}{{n_x}-1}$\n",
|
||
|
"\n",
|
||
|
"$S^2_y = \\frac{\\sum_{i=1}^{n_y}\\ (y_i\\ -\\ \\overline{y})^2}{{n_y}-1}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(F_{{1-\\frac{\\alpha}{2}}, n_x-1, n_y-1}\\ \\leq\\ \\frac{S^2_x}{S^2_y}/\\frac{\\sigma^2_x}{\\sigma^2_y} \\ \\leq F_{{\\frac{\\alpha}{2}}, n_x-1, n_y-1}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{\\frac{\\alpha}{2}, n_x-1, n_y-1}} \\leq\\ \\frac{\\sigma^2_x}{\\sigma^2_y} \\leq\\ \\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{1-\\frac{\\alpha}{2}, n_x-1, n_y-1}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{\\frac{\\alpha}{2}, n_x-1, n_y-1}}} \\leq\\ \\frac{\\sigma_x}{\\sigma_y} \\leq\\ \\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{1-\\frac{\\alpha}{2}, n_x-1, n_y-1}}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of variances of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{\\frac{\\alpha}{2}, n_x-1, n_y-1}}, \\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{1-\\frac{\\alpha}{2}, n_x-1, n_y-1}}]$\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of standard deviations of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{\\frac{\\alpha}{2}, n_x-1, n_y-1}}}\\ ,\\ \\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{1-\\frac{\\alpha}{2}, n_x-1, n_y-1}}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "yjRHUnfRaKnh"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a unknown variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a unknown variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$S^2_x = \\frac{\\sum_{i=1}^{n_x}\\ (x_i\\ -\\ \\overline{x})^2}{{n_x}-1}$\n",
|
||
|
"\n",
|
||
|
"$S^2_y = \\frac{\\sum_{i=1}^{n_y}\\ (y_i\\ -\\ \\overline{y})^2}{{n_y}-1}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(0 \\leq\\ \\frac{\\sigma^2_x}{\\sigma^2_y} \\leq\\ \\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{1-\\alpha, n_x-1, n_y-1}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(0 \\leq\\ \\frac{\\sigma_x}{\\sigma_y} \\leq\\ \\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{1-\\alpha, n_x-1, n_y-1}}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of variances of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[0\\ ,\\ \\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{1-\\alpha, n_x-1, n_y-1}}]$\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of standard deviations of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[0\\ ,\\ \\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{1-\\alpha, n_x-1, n_y-1}}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "qZnUvRNddfbx"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**C. One-sided Upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n_x$ from a normal distribution having an unknown mean $\\mu_x$ and a unknown variance $\\sigma^2_x$. \n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n$ is a sample of size $n_y$ from a normal distribution having an unknown mean $\\mu_y$ and a unknown variance $\\sigma^2_y$.\n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim N( \\mu_x, \\sigma^2_x)$\n",
|
||
|
"\n",
|
||
|
"$Y_1, Y_2, ..., Y_n \\sim N( \\mu_y, \\sigma^2_y)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"$S^2_x = \\frac{\\sum_{i=1}^{n_x}\\ (x_i\\ -\\ \\overline{x})^2}{{n_x}-1}$\n",
|
||
|
"\n",
|
||
|
"$S^2_y = \\frac{\\sum_{i=1}^{n_y}\\ (y_i\\ -\\ \\overline{y})^2}{{n_y}-1}$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{\\alpha, n_x-1, n_y-1}} \\leq\\ \\frac{\\sigma^2_x}{\\sigma^2_y} \\leq\\ \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{\\alpha, n_x-1, n_y-1}}} \\leq\\ \\frac{\\sigma_x}{\\sigma_y} \\leq\\ \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of variances of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{S^2_x}{S^2_y}\\ \\times \\frac{1}{F_{\\alpha, n_x-1, n_y-1}}, \\infty]$\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of standard deviations of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\frac{S_x}{S_y}\\ \\times \\sqrt{\\frac{1}{F_{\\alpha, n_x-1, n_y-1}}}\\ ,\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "l0gydmtze0L6"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_ratio_variances:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" n1 : optional, number of sample1 members\n",
|
||
|
" n2 : optional, number of sample2 members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" Sample_mean1 : optional, mean of the sample1\n",
|
||
|
" Sample_mean2 : optional, mean of the sample2\n",
|
||
|
" S1 : optional, std of the sample1\n",
|
||
|
" S2 : optional, std of the sample2\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, c_level, type_c, S1 = 0., S2 = 0., n1 = 0., n2 = 0., data1=None, data2=None):\n",
|
||
|
" self.S1 = S1\n",
|
||
|
" self.S2 = S2\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n1 = n1\n",
|
||
|
" self.n2 = n2\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data1 = data1\n",
|
||
|
" self.data2 = data2\n",
|
||
|
" if data1 is not None:\n",
|
||
|
" self.n1 = len(list(data1))\n",
|
||
|
" self.S1 = np.std(list(data1), ddof = 1)\n",
|
||
|
" if data2 is not None:\n",
|
||
|
" self.n2 = len(list(data2)) \n",
|
||
|
" self.S2 = np.std(list(data2), ddof = 1)\n",
|
||
|
" \n",
|
||
|
" confidence_interval_for_ratio_variances.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" alpha = 1-self.c_level\n",
|
||
|
" c_u = ((self.S1**2)/(self.S2**2)) * (1/f.isf(1-alpha/2, self.n1-1, self.n1-1))\n",
|
||
|
" c_l = ((self.S1**2)/(self.S2**2)) * (1/f.isf(alpha/2, self.n1-1, self.n1-1))\n",
|
||
|
" c_u_r = np.sqrt(c_u)\n",
|
||
|
" c_l_r = np.sqrt(c_l)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\sigma^2_x/ \\sigma^2_y \\leq {c_u}$'))\n",
|
||
|
" display(Latex(f'${c_l_r} \\leq \\sigma_x/ \\sigma_y \\leq {c_u_r}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" alpha = 1-self.c_level\n",
|
||
|
" c_u = ((self.S1**2)/(self.S2**2)) * (1/f.isf(1-alpha, self.n1-1, self.n1-1))\n",
|
||
|
" c_u_r = np.sqrt(c_u)\n",
|
||
|
" display(Latex(f'$\\sigma^2_x/ \\sigma^2_y \\leq {c_u}$'))\n",
|
||
|
" display(Latex(f'$\\sigma_x/ \\sigma_y \\leq {c_u_r}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" alpha = 1-self.c_level\n",
|
||
|
" c_l = ((self.S1**2)/(self.S2**2)) * (1/f.isf(alpha, self.n1-1, self.n1-1))\n",
|
||
|
" c_l_r = np.sqrt(c_l)\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\sigma^2_x/ \\sigma^2_y$'))\n",
|
||
|
" display(Latex(f'${c_l} \\leq \\sigma_x/ \\sigma_y$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "tFM9PjrWYGr3"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"confidence_interval_for_ratio_variances(c_level = 0.95, type_c = 'two_sided_confidence', S1 = 2.51, S2 = 1.9, n1 = 10, n2 = 10);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "HgzCfL8AjHoy",
|
||
|
"outputId": "c1e36e7d-fa65-49e2-956f-ff3302e37557"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$0.4334780396566579 \\leq \\sigma^2_x/ \\sigma^2_y \\leq 7.026084708199053$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
},
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$0.6583904917726697 \\leq \\sigma_x/ \\sigma_y \\leq 2.6506762737458254$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"<a name='Confidence_Interval_for_the_Mean_of_a_Bernoulli_Random_Variable'></a>\n",
|
||
|
"\n",
|
||
|
"## **3.5. Confidence Interval for the Mean of a Bernoulli Random Variable:**"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "Djupsq6Kklgq"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**A. Two-sided Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ (large) from a bernoulli distribution having an unknown parameter $P$. \n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim Ber(P)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"We accept $H_0$ if:\n",
|
||
|
"\n",
|
||
|
"$P(-\\ Z_{\\frac{\\alpha}{2}}\\ \\leq\\ \\frac{\\overline{X}\\ -\\ P}{\\sqrt{\\frac{P(1-P)}{n}}}\\ \\leq Z_{\\frac{\\alpha}{2}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ Z_{\\frac{\\alpha}{2}} \\sqrt{\\frac{P(1-P)}{n}} \\ \\leq\\ P \\ \\leq \\overline{X}\\ +\\ Z_{\\frac{\\alpha}{2}} \\sqrt{\\frac{P(1-P)}{n}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"Approximtely:\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ Z_{\\frac{\\alpha}{2}} \\sqrt{\\frac{\\overline{X}(1-\\overline{X})}{n}} \\ \\leq\\ P \\ \\leq \\overline{X}\\ +\\ Z_{\\frac{\\alpha}{2}} \\sqrt{\\frac{\\overline{X}(1-\\overline{X})}{n}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of variances of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X}\\ -\\ Z_{\\frac{\\alpha}{2}} \\sqrt{\\frac{P\\overline{X}(1-\\overline{X})}{n}}\\ ,\\ \\overline{X}\\ +\\ Z_{\\frac{\\alpha}{2}} \\sqrt{\\frac{\\overline{X}(1-\\overline{X})}{n}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "QLb52JRhlaKx"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**B. One-sided Lower Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ (large) from a bernoulli distribution having an unknown parameter $P$. \n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim Ber(P)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"We accept $H_0$ if:\n",
|
||
|
"\n",
|
||
|
"$P(-\\infty\\ \\leq\\ \\frac{\\overline{X}\\ -\\ P}{\\sqrt{\\frac{P(1-P)}{n}}}\\ \\leq Z_{\\alpha}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(-\\ \\infty \\ \\leq\\ P \\ \\leq \\overline{X}\\ +\\ Z_{\\alpha} \\sqrt{\\frac{P(1-P)}{n}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"Approxiamtely:\n",
|
||
|
"\n",
|
||
|
"$P(-\\ \\infty \\ \\leq\\ P \\ \\leq \\overline{X}\\ +\\ Z_{\\alpha} \\sqrt{\\frac{\\overline{X}(1-\\overline{X})}{n}}) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of variances of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[-\\infty\\ ,\\ \\overline{X}\\ +\\ Z_{\\alpha} \\sqrt{\\frac{\\overline{X}(1-\\overline{X})}{n}}]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "OeVw7qYlnL5Z"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"**C. One-sided Upper Confidence Interval:**\n",
|
||
|
"\n",
|
||
|
"Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ (large) from a bernoulli distribution having an unknown parameter $P$. \n",
|
||
|
"\n",
|
||
|
"$X_1, X_2, ..., X_n \\sim Ber(P)$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Significance level = $\\alpha$\n",
|
||
|
"\n",
|
||
|
"We accept $H_0$ if:\n",
|
||
|
"\n",
|
||
|
"$P(-\\ Z_{\\alpha}\\ \\leq\\ \\frac{\\overline{X}\\ -\\ P}{\\sqrt{\\frac{P(1-P)}{n}}}\\ \\leq \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ Z_{\\alpha} \\sqrt{\\frac{P(1-P)}{n}} \\ \\leq\\ P \\ \\leq \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"Approxiamtely:\n",
|
||
|
"\n",
|
||
|
"$P(\\overline{X}\\ -\\ Z_{\\alpha} \\sqrt{\\frac{\\overline{X}(1-\\overline{X})}{n}} \\ \\leq\\ P \\ \\leq \\infty) = 1-\\alpha$\n",
|
||
|
"\n",
|
||
|
"$\\\\ $\n",
|
||
|
"\n",
|
||
|
"Therefore, the $1-\\alpha$ confidence interval for ratio of variances of two normal populatios is:\n",
|
||
|
"\n",
|
||
|
"$[\\overline{X}\\ -\\ Z_{\\alpha} \\sqrt{\\frac{\\overline{X}(1-\\overline{X})}{n}}\\ ,\\ \\infty]$"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "wSlhJ0A6nwyW"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"class confidence_interval_for_p_bernoulli:\n",
|
||
|
" \"\"\"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" n : optional, number of sample members\n",
|
||
|
" c_level : % confidence level\n",
|
||
|
" type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'\n",
|
||
|
" Sample_mean : mean of the sample\n",
|
||
|
" data : optional, if you do not know the Sample_mean and n, just pass the data\n",
|
||
|
" \"\"\"\n",
|
||
|
" def __init__(self, c_level, type_c, Sample_mean = 0., n = 0., data=None):\n",
|
||
|
" self.Sample_mean = Sample_mean\n",
|
||
|
" self.type_c = type_c\n",
|
||
|
" self.n = n\n",
|
||
|
" self.c_level = c_level\n",
|
||
|
" self.data = data\n",
|
||
|
" if data is not None:\n",
|
||
|
" self.Sample_mean = np.mean(list(data))\n",
|
||
|
" self.n = len(list(data))\n",
|
||
|
"\n",
|
||
|
" confidence_interval_for_p_bernoulli.__test(self)\n",
|
||
|
" \n",
|
||
|
" def __test(self):\n",
|
||
|
" if self.type_c == 'two_sided_confidence':\n",
|
||
|
" c_u = self.Sample_mean + (-norm.ppf((1-self.c_level)/2)) * (np.sqrt(self.Sample_mean*(1-self.Sample_mean)/self.n))\n",
|
||
|
" c_l = self.Sample_mean - (-norm.ppf((1-self.c_level)/2)) * (np.sqrt(self.Sample_mean*(1-self.Sample_mean)/self.n))\n",
|
||
|
" display(Latex(f'${c_l} \\leq P \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'lower_confidence':\n",
|
||
|
" c_u = self.Sample_mean + (-norm.ppf(1-self.c_level)) * (np.sqrt(self.Sample_mean*(1-self.Sample_mean)/self.n))\n",
|
||
|
" display(Latex(f'$P \\leq {c_u}$'))\n",
|
||
|
" elif self.type_c == 'upper_confidence':\n",
|
||
|
" c_l = self.Sample_mean - (-norm.ppf(1-self.c_level)) * (np.sqrt(self.Sample_mean*(1-self.Sample_mean)/self.n))\n",
|
||
|
" display(Latex(f'${c_l} \\leq P$'))"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "jUyHpFqalR6l"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"confidence_interval_for_p_bernoulli(c_level = 0.95, type_c = 'two_sided_confidence', Sample_mean = 0.2, n = 100);"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "9mxXutjYqTUi",
|
||
|
"outputId": "c9ed9318-7efd-4468-8d1a-8410d648f081"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.Latex object>"
|
||
|
],
|
||
|
"text/latex": "$0.12160144061839785 \\leq P \\leq 0.2783985593816022$"
|
||
|
},
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
}
|