From 57715cacec8d0f0d3d1436a26f92ae5c0f0e128e Mon Sep 17 00:00:00 2001
From: Jan Aalmoes <jan.aalmoes@inria.fr>
Date: Tue, 27 Aug 2024 21:07:18 +0200
Subject: debut du background sur ZF

---
 background/proba.tex | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 background/proba.tex

(limited to 'background/proba.tex')

diff --git a/background/proba.tex b/background/proba.tex
new file mode 100644
index 0000000..bea43e7
--- /dev/null
+++ b/background/proba.tex
@@ -0,0 +1,42 @@
+
+Probability theory is deeply linked with machine learning and most of the properties of machine learning, such as differential privacy, fairness definitions, utility metrics... are often mathematically written within this framework.
+This paper does not differ and hence we provide a short background of this field and how it connects with the previously defined notions of ML introduced in section \ref{sec:ml}.
+
+Soit $A$ un ensemble.
+L'ensemble des parties de $A$ est $\mathcal{P}(A)$. 
+Chaque élément $a \in \mathcal{P}(A)$ est tel que $a \subset A$.
+Une tribue $\mathcal{A}$ est un sous esemble de $\mathcal{P}(A)$ qui contien $\emptyset$, $A$ par complémentaire est union dénombrable.
+Nous disons que $(A,\mathcal{A})$ est un espace mesurable.
+Une mesure $d$ est une fonction $d$:$\mathcal{A}$ $\rightarrow$ $[0,+\infty]$ telle que $d(\emptyset) = 0$ et $d\left(\bigcup_{i\in \mathbb{N}} A_i\right) = \sum_{i\in \mathbb{N}}d(A_i)$ pour chaque $(A_1, A_2, \cdots) \in \mathcal{A}^\mathbb{N} $ avec $\forall (i,j) A_i\cap A_j = \emptyset$.
+Nous disons alors que $(A, \mathcal{A}, d)$ est un espace mesuré.
+Nous appelons fonction mesurable un fonction de $A$ à $B$ telle que  $\forall b\in\mathcal{B}$~$f^{-1}(b)\in\mathcal{A}$.
+Nous notons alors $f:(A, \mathcal{A})\rightarrow (B, \mathcal{B})$ ou $f:(A, \mathcal{A},d)\rightarrow (B, \mathcal{B})$
+
+Dans le cas particulier où $d(A) = 1$, nous appelons $d$ une mesure de probabilité.
+ $(A,\mathcal{A},d)$ est alors un espace probailisé et les fonctions mesurables sur cet espace sont appelés variables aléatoires.
+Le loi de probabilité d'une variable aléatoire $f$ sur $(X,\mathcal{X})$ est la mesure de probabilite suivante :
+$d_X :\mathcal{X}\rightarrow [0,1]$, $x\mapsto d(X^{-1}(x))$.
+
+Having introduced probability theory, we explicit the relation with the ML theory described previously.
+Let $I$ a finite set, $\mathcal{X}$, $\mathcal{S}$ and $\mathcal{Y}$ the sets of features, sensitive attribute and label.
+Let $d:I\rightarrow \mathcal{X}\times\mathcal{S}\times\mathcal{Y}$ a dataset.
+Let $\#$ be the measure on $(I,\mathcal{P}(I))$ which maps to every $a$ in $\mathcal{P}(I)$ the number of elements of $a$.
+Let $P:\mathcal{P}(I)\rightarrow [0,1]$, $a\mapsto \frac{\#(a)}{\#(I)}$.
+Then $(I, \mathcal{P}(I), P)$ is a probability space.
+On this space we can define the following random variables:
+\begin{itemize}
+    \item $X:I\rightarrow \mathcal{X},~i\mapsto (d(i))_0$
+    \item $S:I\rightarrow \mathcal{S},~i\mapsto (d(i))_1$
+    \item $Y:I\rightarrow \mathcal{Y},~i\mapsto (d(i))_2$
+\end{itemize}
+Where for a vector $u$, $u_j$ refers to the $j$th element of $u$.
+
+From there we can define various random variables that will be useful in the rest of the paper.
+For instance $\hat{Y}=f\circ X$ is random variable that represents the prediction of a trained machine learning model $f$. 
+We can use it to write the accuracy in a compact way: $P(\hat{Y}=Y)$ by using the well accepted abuse of notations that for a random variable $A$ and an event $a$, 
+$\{A\in a\} = \{i\in\mathcal{P}(I)~|~A(i)\in a\} = A^{-1}(a)$.
+The accuracy is a reliable metric of a trained model's utility when $P(Y=0) = P(Y=1) = \frac{1}{2}$ but not so much when there is unbalance in $Y$. 
+To take into account an eventual unbalanced distribution of the labels, we will consider the balanced accuracy : 
+$\frac{P(\hat{Y}=0|Y=0) + P(\hat{Y}=1|Y=1)}{2}$.
+
+Finally in the context of attribute inference attack at inference time, we define the random variable $\hat{S}=a\circ \hat{Y}$ where here $a$ is a machine learning model trained to infer sensitive attribute from model's output. 
-- 
cgit v1.2.3


From dc5a898dc39326fa3f733f3d9e006bbe3d1f8e4c Mon Sep 17 00:00:00 2001
From: Jan Aalmoes <jan.aalmoes@inria.fr>
Date: Thu, 29 Aug 2024 10:58:32 +0200
Subject: Fin ensemble et fonctions, debut ML et Mesure/Proba

---
 background/proba.tex | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'background/proba.tex')

diff --git a/background/proba.tex b/background/proba.tex
index bea43e7..6050ef7 100644
--- a/background/proba.tex
+++ b/background/proba.tex
@@ -1,15 +1,19 @@
 
-Probability theory is deeply linked with machine learning and most of the properties of machine learning, such as differential privacy, fairness definitions, utility metrics... are often mathematically written within this framework.
-This paper does not differ and hence we provide a short background of this field and how it connects with the previously defined notions of ML introduced in section \ref{sec:ml}.
+La théorie des probability est profondément liée au machine learning.
+Les propriétés de modèles comme la confidentialité différencielle, les définitions d'équitée, les métriques d'utilité, etc. que nous aborderons en Section~\ref{sec:background-ml} s'ecrivent en terme de probabilité.
+Ainsi nous présentons les notions de probabitlié et de théorie d la mesure que nous allons utiliser.
+A la manière de la Section~\ref{sec:background-set}, notre présentation à principalement le but de fixer les objets que nous utiliserons dans les prochaines sections et nous pas d'être un cours complet. 
+Si le lecteur souhaite en apprendre plus sur la theorie de la mesur nous le renvoyons vers les notes de cours de Thierry Gallay de l'université Joseph Fourrier~\cite{mesure}.
+Si il souhait explorer plus en avant les probabilités il poura consulter les notes de cour de Jean-François Le Gall de l'Ecole Normale Supérieur de Paris~\cite{proba}.
 
 Soit $A$ un ensemble.
-L'ensemble des parties de $A$ est $\mathcal{P}(A)$. 
-Chaque élément $a \in \mathcal{P}(A)$ est tel que $a \subset A$.
-Une tribue $\mathcal{A}$ est un sous esemble de $\mathcal{P}(A)$ qui contien $\emptyset$, $A$ par complémentaire est union dénombrable.
+Nous appelons une tribue que nous notons $\mathcal{A}$ un sous esemble de $\mathcal{P}(A)$ qui contien $\emptyset$ et $A$, qui est stable par complémentaire et qui est stable par union d'un nombre dénombrable d'elements de $\mathcal{A}$.
 Nous disons que $(A,\mathcal{A})$ est un espace mesurable.
-Une mesure $d$ est une fonction $d$:$\mathcal{A}$ $\rightarrow$ $[0,+\infty]$ telle que $d(\emptyset) = 0$ et $d\left(\bigcup_{i\in \mathbb{N}} A_i\right) = \sum_{i\in \mathbb{N}}d(A_i)$ pour chaque $(A_1, A_2, \cdots) \in \mathcal{A}^\mathbb{N} $ avec $\forall (i,j) A_i\cap A_j = \emptyset$.
+
+Nous appelons mesure, une fonction $d$ :$\mathcal{A}$ $\rightarrow$ $[0,+\infty]$ telle que $d(\emptyset) = 0$ et $d\left(\bigcup_{i\in \mathbb{N}} A_i\right) = \sum_{i\in \mathbb{N}}d(A_i)$ pour tout $(A_1, A_2, \cdots) \in \mathcal{A}^\mathbb{N} $ avec $\forall (i,j) A_i\cap A_j = \emptyset$.
 Nous disons alors que $(A, \mathcal{A}, d)$ est un espace mesuré.
-Nous appelons fonction mesurable un fonction de $A$ à $B$ telle que  $\forall b\in\mathcal{B}$~$f^{-1}(b)\in\mathcal{A}$.
+
+Nous appelons fonction mesurable, une fonction de $A$ à $B$ telle que  $\forall b\in\mathcal{B}$~$f^{-1}(b)\in\mathcal{A}$.
 Nous notons alors $f:(A, \mathcal{A})\rightarrow (B, \mathcal{B})$ ou $f:(A, \mathcal{A},d)\rightarrow (B, \mathcal{B})$
 
 Dans le cas particulier où $d(A) = 1$, nous appelons $d$ une mesure de probabilité.
-- 
cgit v1.2.3


From 0e95544f85b523a95fb05b36c4e6b8789c73abfa Mon Sep 17 00:00:00 2001
From: Jan Aalmoes <jan.aalmoes@inria.fr>
Date: Wed, 4 Sep 2024 00:12:49 +0200
Subject: traduction classification fini

---
 background/proba.tex | 87 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 30 deletions(-)

(limited to 'background/proba.tex')

diff --git a/background/proba.tex b/background/proba.tex
index 6050ef7..2cb0098 100644
--- a/background/proba.tex
+++ b/background/proba.tex
@@ -1,46 +1,73 @@
 
-La théorie des probability est profondément liée au machine learning.
+La théorie des probability est profondément liée à l'apprentissage automatique.
 Les propriétés de modèles comme la confidentialité différencielle, les définitions d'équitée, les métriques d'utilité, etc. que nous aborderons en Section~\ref{sec:background-ml} s'ecrivent en terme de probabilité.
 Ainsi nous présentons les notions de probabitlié et de théorie d la mesure que nous allons utiliser.
-A la manière de la Section~\ref{sec:background-set}, notre présentation à principalement le but de fixer les objets que nous utiliserons dans les prochaines sections et nous pas d'être un cours complet. 
+A la manière de la Section~\ref{sec:background-set}, notre présentation à principalement le but de fixer les objets que nous utiliserons dans les prochaines sections et non pas d'être un cours complet. 
 Si le lecteur souhaite en apprendre plus sur la theorie de la mesur nous le renvoyons vers les notes de cours de Thierry Gallay de l'université Joseph Fourrier~\cite{mesure}.
-Si il souhait explorer plus en avant les probabilités il poura consulter les notes de cour de Jean-François Le Gall de l'Ecole Normale Supérieur de Paris~\cite{proba}.
+Si il souhait explorer plus en avant les probabilités il poura consulter les notes de cours de Jean-François Le Gall de l'Ecole Normale Supérieur de Paris~\cite{proba}.
 
 Soit $A$ un ensemble.
-Nous appelons une tribue que nous notons $\mathcal{A}$ un sous esemble de $\mathcal{P}(A)$ qui contien $\emptyset$ et $A$, qui est stable par complémentaire et qui est stable par union d'un nombre dénombrable d'elements de $\mathcal{A}$.
+Nous appelons une tribue que nous notons $\mathcal{A}$ un sous esemble de $\mathcal{P}(A)$ qui contien $\emptyset$ et $A$, qui est stable par complémentaire et qui est stable par union dénombrable d'elements de $\mathcal{A}$.
 Nous disons que $(A,\mathcal{A})$ est un espace mesurable.
+Soit maintenant $A\subset\mathcal{P}(A)$, nous appellons $\sigma(A)$ la plus petite tribue pour l'intersection qui contienne tous les élements de $A$.
 
 Nous appelons mesure, une fonction $d$ :$\mathcal{A}$ $\rightarrow$ $[0,+\infty]$ telle que $d(\emptyset) = 0$ et $d\left(\bigcup_{i\in \mathbb{N}} A_i\right) = \sum_{i\in \mathbb{N}}d(A_i)$ pour tout $(A_1, A_2, \cdots) \in \mathcal{A}^\mathbb{N} $ avec $\forall (i,j) A_i\cap A_j = \emptyset$.
 Nous disons alors que $(A, \mathcal{A}, d)$ est un espace mesuré.
 
+Soit $(A, \mathcal{A}, d)$ et $(B, \mathcal{B}, e)$ deux espaces mesurés.
+Nous définissons alors 
+\begin{equation*}
+    \mathcal{A}\otimes\mathcal{B} = \sigma\left(
+    \left\{
+        a\times b \mid a\in\mathcal{A}\wedge b\in\mathcal{B}
+    \right\}\right)
+\end{equation*}
+et de plus la mesure produit de $d$ et $e$, que l'on note $d\otimes e$, est l'unique mesure telle que 
+\begin{equation*}
+    \forall a\in\mathcal{A}\forall b\in\mathcal{B}~d\otimes e(a\times b) = d(a)\cdot e(b)
+\end{equation*}
+Alors l'espace $(A\times B,\mathcal{A}\otimes\mathcal{B},d\otimes e)$ est un espace mesuré.
+
 Nous appelons fonction mesurable, une fonction de $A$ à $B$ telle que  $\forall b\in\mathcal{B}$~$f^{-1}(b)\in\mathcal{A}$.
 Nous notons alors $f:(A, \mathcal{A})\rightarrow (B, \mathcal{B})$ ou $f:(A, \mathcal{A},d)\rightarrow (B, \mathcal{B})$
+Nous definisson la mesure image de $f$ par $d$, que nous notons $d_f$, par l'expression suivante : 
+\begin{equation}
+    d_f:
+    \left\{
+        \begin{matrix}
+            \mathcal{B}\rightarrow [0,+\infty]\\
+            b\mapsto d\left(f^{-1}(b)\right)
+
+        \end{matrix}
+    \right.
+\end{equation}
 
 Dans le cas particulier où $d(A) = 1$, nous appelons $d$ une mesure de probabilité.
  $(A,\mathcal{A},d)$ est alors un espace probailisé et les fonctions mesurables sur cet espace sont appelés variables aléatoires.
-Le loi de probabilité d'une variable aléatoire $f$ sur $(X,\mathcal{X})$ est la mesure de probabilite suivante :
-$d_X :\mathcal{X}\rightarrow [0,1]$, $x\mapsto d(X^{-1}(x))$.
-
-Having introduced probability theory, we explicit the relation with the ML theory described previously.
-Let $I$ a finite set, $\mathcal{X}$, $\mathcal{S}$ and $\mathcal{Y}$ the sets of features, sensitive attribute and label.
-Let $d:I\rightarrow \mathcal{X}\times\mathcal{S}\times\mathcal{Y}$ a dataset.
-Let $\#$ be the measure on $(I,\mathcal{P}(I))$ which maps to every $a$ in $\mathcal{P}(I)$ the number of elements of $a$.
-Let $P:\mathcal{P}(I)\rightarrow [0,1]$, $a\mapsto \frac{\#(a)}{\#(I)}$.
-Then $(I, \mathcal{P}(I), P)$ is a probability space.
-On this space we can define the following random variables:
-\begin{itemize}
-    \item $X:I\rightarrow \mathcal{X},~i\mapsto (d(i))_0$
-    \item $S:I\rightarrow \mathcal{S},~i\mapsto (d(i))_1$
-    \item $Y:I\rightarrow \mathcal{Y},~i\mapsto (d(i))_2$
-\end{itemize}
-Where for a vector $u$, $u_j$ refers to the $j$th element of $u$.
-
-From there we can define various random variables that will be useful in the rest of the paper.
-For instance $\hat{Y}=f\circ X$ is random variable that represents the prediction of a trained machine learning model $f$. 
-We can use it to write the accuracy in a compact way: $P(\hat{Y}=Y)$ by using the well accepted abuse of notations that for a random variable $A$ and an event $a$, 
-$\{A\in a\} = \{i\in\mathcal{P}(I)~|~A(i)\in a\} = A^{-1}(a)$.
-The accuracy is a reliable metric of a trained model's utility when $P(Y=0) = P(Y=1) = \frac{1}{2}$ but not so much when there is unbalance in $Y$. 
-To take into account an eventual unbalanced distribution of the labels, we will consider the balanced accuracy : 
-$\frac{P(\hat{Y}=0|Y=0) + P(\hat{Y}=1|Y=1)}{2}$.
-
-Finally in the context of attribute inference attack at inference time, we define the random variable $\hat{S}=a\circ \hat{Y}$ where here $a$ is a machine learning model trained to infer sensitive attribute from model's output. 
+Le loi de probabilité d'une variable aléatoire $f$ sur $(X,\mathcal{X})$ est la mesure image de $f$ sur $d$.
+Nous dirons que deux variables aléatoire $f$ et $g$ sont indépendantes si et seulement si la loi de la variables aléatoire $h:\omega\mapsto (f(\omega),g(\omega))$ est la mesur produit de la loi de $f$ et $g$.
+
+
+%Having introduced probability theory, we explicit the relation with the ML theory described previously.
+%Let $I$ a finite set, $\mathcal{X}$, $\mathcal{S}$ and $\mathcal{Y}$ the sets of features, sensitive attribute and label.
+%Let $d:I\rightarrow \mathcal{X}\times\mathcal{S}\times\mathcal{Y}$ a dataset.
+%Let $\#$ be the measure on $(I,\mathcal{P}(I))$ which maps to every $a$ in $\mathcal{P}(I)$ the number of elements of $a$.
+%Let $P:\mathcal{P}(I)\rightarrow [0,1]$, $a\mapsto \frac{\#(a)}{\#(I)}$.
+%Then $(I, \mathcal{P}(I), P)$ is a probability space.
+%On this space we can define the following random variables:
+%\begin{itemize}
+%    \item $X:I\rightarrow \mathcal{X},~i\mapsto (d(i))_0$
+%    \item $S:I\rightarrow \mathcal{S},~i\mapsto (d(i))_1$
+%    \item $Y:I\rightarrow \mathcal{Y},~i\mapsto (d(i))_2$
+%\end{itemize}
+%MWhere for a vector $u$, $u_j$ refers to the $j$th element of $u$.
+
+%From there we can define various random variables that will be useful in the rest of the paper.
+%For instance $\hat{Y}=f\circ X$ is random variable that represents the prediction of a trained machine learning model $f$. 
+%We can use it to write the accuracy in a compact way: $P(\hat{Y}=Y)$ by using the well accepted abuse of notations that for a random variable $A$ and an event $a$, 
+%$\{A\in a\} = \{i\in\mathcal{P}(I)~|~A(i)\in a\} = A^{-1}(a)$.
+%The accuracy is a reliable metric of a trained model's utility when $P(Y=0) = P(Y=1) = \frac{1}{2}$ but not so much when there is unbalance in $Y$. 
+%To take into account an eventual unbalanced distribution of the labels, we will consider the balanced accuracy : 
+%$\frac{P(\hat{Y}=0|Y=0) + P(\hat{Y}=1|Y=1)}{2}$.
+%
+%Finally in the context of attribute inference attack at inference time, we define the random variable $\hat{S}=a\circ \hat{Y}$ where here $a$ is a machine learning model trained to infer sensitive attribute from model's output. 
-- 
cgit v1.2.3


From 7fc151d6a198d13dc9e1374522ec396d72905d3f Mon Sep 17 00:00:00 2001
From: Jan Aalmoes <jan.aalmoes@inria.fr>
Date: Wed, 11 Sep 2024 11:08:02 +0200
Subject: Ajout notations

---
 background/proba.tex | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'background/proba.tex')

diff --git a/background/proba.tex b/background/proba.tex
index 2cb0098..5bce111 100644
--- a/background/proba.tex
+++ b/background/proba.tex
@@ -13,6 +13,20 @@ Soit maintenant $A\subset\mathcal{P}(A)$, nous appellons $\sigma(A)$ la plus pet
 
 Nous appelons mesure, une fonction $d$ :$\mathcal{A}$ $\rightarrow$ $[0,+\infty]$ telle que $d(\emptyset) = 0$ et $d\left(\bigcup_{i\in \mathbb{N}} A_i\right) = \sum_{i\in \mathbb{N}}d(A_i)$ pour tout $(A_1, A_2, \cdots) \in \mathcal{A}^\mathbb{N} $ avec $\forall (i,j) A_i\cap A_j = \emptyset$.
 Nous disons alors que $(A, \mathcal{A}, d)$ est un espace mesuré.
+Pour un espace mesurable $(A,\mathcal{P}(A))$, la mesure de dirac est la mesure telle que pour $a\in A$ 
+\begin{equation*}
+    \delta_a : \left\{
+        \begin{matrix}
+        \mathcal{P}(A)\rightarrow \{0,1\}\\
+        B\mapsto\left\{
+            \begin{matrix}
+                1&\text{si}&a\in B\\
+                0&\text{sinon}&
+            \end{matrix}
+            \right.
+        \end{matrix}
+        \right.
+\end{equation*}
 
 Soit $(A, \mathcal{A}, d)$ et $(B, \mathcal{B}, e)$ deux espaces mesurés.
 Nous définissons alors 
@@ -47,6 +61,8 @@ Dans le cas particulier où $d(A) = 1$, nous appelons $d$ une mesure de probabil
 Le loi de probabilité d'une variable aléatoire $f$ sur $(X,\mathcal{X})$ est la mesure image de $f$ sur $d$.
 Nous dirons que deux variables aléatoire $f$ et $g$ sont indépendantes si et seulement si la loi de la variables aléatoire $h:\omega\mapsto (f(\omega),g(\omega))$ est la mesur produit de la loi de $f$ et $g$.
 
+De plus, dans le cas des variables aléatoires, il est courant de d'écrir $\{f\in A\}$ pour $f^{-1}(A)$ et $\{f=a\}$ pour $f^{-1}(\{a\})$.
+
 
 %Having introduced probability theory, we explicit the relation with the ML theory described previously.
 %Let $I$ a finite set, $\mathcal{X}$, $\mathcal{S}$ and $\mathcal{Y}$ the sets of features, sensitive attribute and label.
-- 
cgit v1.2.3


From 4aae3ea0427a6c9e9a8519a38d9d9d0ac5f0ec9c Mon Sep 17 00:00:00 2001
From: Jan Aalmoes <jan.aalmoes@inria.fr>
Date: Sat, 21 Sep 2024 16:27:27 +0200
Subject: fin intro

---
 background/proba.tex | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'background/proba.tex')

diff --git a/background/proba.tex b/background/proba.tex
index 5bce111..1cfe29e 100644
--- a/background/proba.tex
+++ b/background/proba.tex
@@ -56,6 +56,21 @@ Nous definisson la mesure image de $f$ par $d$, que nous notons $d_f$, par l'exp
     \right.
 \end{equation}
 
+\begin{definition}{Intégrale}
+    Soient $(E,\mathcal{E},\mu)$ et $(F,\mathcal{F},\nu)$ un espace mesuré.
+    Pour une fonction $f=\sum_{i\in I}\alpha_i 1_{A_i}$, nous dirons étagé, 
+    Avec $\{A_i\mid i\in I\} \subset \mathcal{F}$.
+    Alors $\int_E f d\nu= \sum_{i\in I}\alpha_i \nu(A_i)$.
+
+    Soit $g$ un fonction mesurable.
+    Alors il existe une suite $\{(f_n)\}_{n\in\mathbb{N}}$ de fonctions étagés telle que $lim_{n\rightarrow +\infty} f_n = g$.
+    Voir la Définition~\ref{def:background-dif-lim} pour une définition de la limite.
+    On définit alors 
+    \begin{equation*}
+        \int_{E}gd\nu = lim_{n\rightarrow +\infty}\int_{E}f_n d\nu
+    \end{equation*}
+\end{definition}
+
 Dans le cas particulier où $d(A) = 1$, nous appelons $d$ une mesure de probabilité.
  $(A,\mathcal{A},d)$ est alors un espace probailisé et les fonctions mesurables sur cet espace sont appelés variables aléatoires.
 Le loi de probabilité d'une variable aléatoire $f$ sur $(X,\mathcal{X})$ est la mesure image de $f$ sur $d$.
@@ -63,6 +78,13 @@ Nous dirons que deux variables aléatoire $f$ et $g$ sont indépendantes si et s
 
 De plus, dans le cas des variables aléatoires, il est courant de d'écrir $\{f\in A\}$ pour $f^{-1}(A)$ et $\{f=a\}$ pour $f^{-1}(\{a\})$.
 
+\begin{definition}{Esperence}
+    Pour une variable aléatoire $X$, on définit l'espérence de $X$ par la formule suivante.
+    \begin{equation*}
+        E(X) = \int_{\Omega}X(\omega)dP(\omega)
+    \end{equation*}
+\end{definition}
+
 
 %Having introduced probability theory, we explicit the relation with the ML theory described previously.
 %Let $I$ a finite set, $\mathcal{X}$, $\mathcal{S}$ and $\mathcal{Y}$ the sets of features, sensitive attribute and label.
-- 
cgit v1.2.3