diff options
-rw-r--r-- | background/ml.tex | 32 | ||||
-rw-r--r-- | biblio.bib | 390 |
2 files changed, 406 insertions, 16 deletions
diff --git a/background/ml.tex b/background/ml.tex index d1c08a3..d1f95b0 100644 --- a/background/ml.tex +++ b/background/ml.tex @@ -309,19 +309,19 @@ L'idée de la convolution est d'extraire des représentations\footnote{\textit{F \subsubsection{Modèle generatif} \label{sec:background-generation} -A generator is a function that takes as input a real dataset and outputs a synthetic dataset. -This definition is general enough so that the identity function is a generator. -Even though synthetic datasets are supposedly different than real world datasets. -We refer to the output of the identity generator as real data while referring to the output of another generator as synthetic data. - -In addition to the identity generator we use General Adversarial Networks (GAN)~\cite{gan}. -The goal of a GAN is to generate realistic samples given a distribution of multivariate data. -To do so a GAN leverages two neural networks: a generator and a discriminator. -The domain of the generator (its input space) is of low dimension with respect to its codomain (its output space) which has the same dimension as the data we want to generate. -For instance with 64 by 64 images, the codomain is a matrix with 64 rows and 64 columns. -To generate a new sample, we evaluate the generator on a sample of a multivariate standard normal distribution where the dimension is the domain's dimension. -This output is the new generated synthetic data point. - -The discriminator is only used when training the GAN with the goal of making sure that the generator produces realistic data. -To do so, the discriminator is a neural network with a classification goal: infer if a sample is synthetic or real. -Hence in the training procedure, the discriminator and the generator are in competition: the generator goal is to fool the discriminator into classifying synthetic data as real data. +Une generateur est un fonction qui prend un entrée en jeu de données réel et renvoi un jeu de donnée sythetique. +Cette définition est suffisament générale pour que l'identitée soit un générateur. +Nous dirons que la sortie du generateur identité sont des données réels et nous appellerons donnée synthetique la sortie de n'importe quel autre générateur. + +En plus du générateur identitée nous utiliserons des réseaux de neuronnes adversariels generatifs~\footnote{\textit{Genertaiv Adversarial Network}} (GAN)~\cite{gan}. +Le but d'un GAN est de générer des échantillons réalisation étant donné une loi de probabilité. +Pour arriver à cela, un GAN utilise deux réseaux de neuronnes : un générateur et un discriminateur. +Le domaine du générateur est de petit dimension relativement à son codomaine. +La dimension du codomaine est la même que celle des données que l'on souhaite générer. +Par exemple pour générer de images de taille 64 par 64, le codomaine est $\mathbb{R}_{64,64}$. +Pour générer une donnée, nous évaluons le générateur sur un point generer à partir d'une loi normale multidimensionelle. +La sortie de générateur est la nouvelle donnée généré. + +Le discriminateur est utilisé uniquement lors de l'entraînement du GAN et à a pour but de s'assurer que le générateur produise des données réalistes. +Pour cela, le discriminateur est un réseau de neurones ayant une tâche de classification : inférer si une donnée est synthétique et réel. +Ainsi, dans la procédure d'entraînement, le discriminateur et el générateur sont en compétition : le but du générateur est de tromper le discriminateur à classifier une donnée synthétique comme réel. @@ -946,3 +946,393 @@ abstract = {This paper explores the use of metaphorical personification (anthrop year={2021} } + +############################################### +#Synthetic +@misc{carlini2022membershipinferenceattacksprinciples, + title={Membership Inference Attacks From First Principles}, + author={Nicholas Carlini and Steve Chien and Milad Nasr and Shuang Song and Andreas Terzis and Florian Tramer}, + year={2022}, + eprint={2112.03570}, + archivePrefix={arXiv}, + primaryClass={cs.CR}, + url={https://arxiv.org/abs/2112.03570}, +} + +@article{brayne2015predictive, + title={Predictive policing}, + author={Brayne, Sarah and Rosenblat, Alex and Boyd, Danah}, + journal={Data \& Civil Rights: A New Era Of Policing And Justice}, + pages={2015--1027}, + year={2015} +} + +@inproceedings{barthelemy:hal-01837361, + TITLE = {{Pl@ntNet, une plate-forme innovante d'agr{\'e}gation et partage d'observations botaniques}}, + AUTHOR = {Barth{\'e}l{\'e}my, Daniel and Boujemaa, Nozha and Molino, Jean-Fran{\c c}ois and Joly, Alexis and Go{\"e}au, Herv{\'e} and Baki{\'c}, Vera and Selmi, Souheil and Champ, Julien and Carre, Jennifer and Chouet, Mathias and Perronnet, Aur{\'e}lien and Vignau, Christelle and Dufour-Kowalski, Samuel and Affouard, Antoine and Barbe, Julien and Bonnet, Pierre}, + URL = {https://hal.science/hal-01837361}, + BOOKTITLE = {{International Conference ‘Botanists of the Twenty-first Century'}}, + ADDRESS = {Paris, France}, + ORGANIZATION = {{UNESCO}}, + HAL_LOCAL_REFERENCE = {DEVMP}, + EDITOR = {No{\"e}line R. Rakotoarisoa and Stephen Blackmore and Bernard Riera}, + PAGES = {191-197}, + YEAR = {2014}, + MONTH = Sep, + KEYWORDS = {Pl@ntNet ; Botany ; Plateforme participative ; Observations botaniques}, + PDF = {https://hal.science/hal-01837361/file/DB_etal_plantnet_plateforme_2016_1.pdf}, + HAL_ID = {hal-01837361}, + HAL_VERSION = {v1}, +} + +@misc{plantnet, + title={Pl@ntNet}, + howpublished={\url{https://identify.plantnet.org/}}, + note={Dernier accès: 2024-07-24} +} + + +@article{dunn2018wearables, + title={Wearables and the medical revolution}, + author={Dunn, Jessilyn and Runge, Ryan and Snyder, Michael}, + journal={Personalized medicine}, + volume={15}, + number={5}, + pages={429--448}, + year={2018}, + publisher={Taylor \& Francis} +} + +@misc{gtrend, + title={Google trend Intelligence Artificielle}, + howpublished={\url{https://trends.google.com/trends/explore?date=all&geo=FR&q=intelligence%20artificielle&hl=en-US}}, + note={Dernier accès: 2024-07-24} +} + +@misc{france2030, + title={France 2030}, + howpublished={\url{https://www.info.gouv.fr/grand-dossier/france-2030}}, + note={Dernier accès: 2024-07-24} +} + +@misc{stratfr, + title={La stratégie nationale pour l'intelligence artificielle}, + howpublished={\url{https://www.entreprises.gouv.fr/fr/numerique/enjeux/la-strategie-nationale-pour-l-ia}}, + note={Dernier accès: 2024-07-24} +} + +@misc{applewatch, + title={WatchOS 11 brings powerful health and fitness insights, and even more personalization and connectivity }, + howpublished={\url{https://www.apple.com/newsroom/2024/06/watchos-11-brings-powerful-health-and-fitness-insights/}}, + note={Dernier accès: 2024-07-24} +} + +%%%%%%%%%%%CLIMATE CHANGE BACKGROUND +@article{barnes2019viewing, + title={Viewing forced climate patterns through an AI lens}, + author={Barnes, Elizabeth A and Hurrell, James W and Ebert-Uphoff, Imme and Anderson, Chuck and Anderson, David}, + journal={Geophysical Research Letters}, + volume={46}, + number={22}, + pages={13389--13398}, + year={2019}, + publisher={Wiley Online Library} +} + +@article{slater2023hybrid, + title={Hybrid forecasting: blending climate predictions with AI models}, + author={Slater, Louise J and Arnal, Louise and Boucher, Marie-Am{\'e}lie and Chang, Annie Y-Y and Moulds, Simon and Murphy, Conor and Nearing, Grey and Shalev, Guy and Shen, Chaopeng and Speight, Linda and others}, + journal={Hydrology and earth system sciences}, + volume={27}, + number={9}, + pages={1865--1889}, + year={2023}, + publisher={Copernicus Publications G{\"o}ttingen, Germany} +} + +%%%%%%%%%%%%ENERGY BACKGROUND +@article{jin2020energy, + title={Energy and AI}, + author={Jin, Donghan and Ocone, Raffaella and Jiao, Kui and Xuan, Jin}, + journal={Energy and AI}, + volume={1}, + pages={100002}, + year={2020}, + publisher={Elsevier} +} + +@article{kumar2020distributed, + title={Distributed energy resources and the application of AI, IoT, and blockchain in smart grids}, + author={Kumar, Nallapaneni Manoj and Chand, Aneesh A and Malvoni, Maria and Prasad, Kushal A and Mamun, Kabir A and Islam, FR and Chopra, Shauhrat S}, + journal={Energies}, + volume={13}, + number={21}, + pages={5739}, + year={2020}, + publisher={MDPI} +} + +@article{kumari2020blockchain, + title={Blockchain and AI amalgamation for energy cloud management: Challenges, solutions, and future directions}, + author={Kumari, Aparna and Gupta, Rajesh and Tanwar, Sudeep and Kumar, Neeraj}, + journal={Journal of Parallel and Distributed Computing}, + volume={143}, + pages={148--166}, + year={2020}, + publisher={Elsevier} +} + +@article{ngarambe2020use, + title={The use of artificial intelligence (AI) methods in the prediction of thermal comfort in buildings: Energy implications of AI-based thermal comfort controls}, + author={Ngarambe, Jack and Yun, Geun Young and Santamouris, Mat}, + journal={Energy and Buildings}, + volume={211}, + pages={109807}, + year={2020}, + publisher={Elsevier} +} + + +%%%%%OPEN AI + +@misc{openaibfm, + title={OpenAI, cette société qui révolutionne l'intelligence artificielle}, + howpublished={\url{https://www.bfmtv.com/tech/intelligence-artificielle/open-ai-cette-societe-qui-revolutionne-l-intelligence-artificielle_DN-202311200564.html}}, + note={Dernier accès: 2024-07-24} +} + +@misc{openaiinter, + title={Intelligence artificielle : pourquoi Sam Altman, créateur de ChatGPT, a été débarqué d'OpenAI}, + howpublished={\url{https://www.radiofrance.fr/franceinter/ce-que-l-on-sait-du-renvoi-de-sam-altman-patron-d-openai-et-createur-de-chatgpt-5672369}}, + note={Dernier accès: 2024-07-24} +} + +@misc{openaint, + title={OpenAI Says It Has Begun Training a New Flagship A.I. Model}, + howpublished={\url{https://www.nytimes.com/2024/05/28/technology/openai-gpt4-new-model.html}}, + note={Dernier accès: 2024-07-24} +} + +@misc{openaibg, + title={ChatGPT sets record for fastest-growing user base - analyst note}, + howpublished={\url{https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/}}, + note={Dernier accès: 2024-07-24} +} + +@misc{gptjournal, + title={ChatGPT : le quotidien Le Monde signe un partenariat avec OpenAI, une première en France}, + howpublished={\url{https://www.radiofrance.fr/franceinter/podcasts/l-info-de-france-inter/les-doc-france-inter-du-jeudi-14-mars-3-7619379}}, + note={Dernier accès: 2024-07-24} +} + +@article{beraja2023ai, + title={AI-tocracy}, + author={Beraja, Martin and Kao, Andrew and Yang, David Y and Yuchtman, Noam}, + journal={The Quarterly Journal of Economics}, + volume={138}, + number={3}, + pages={1349--1402}, + year={2023}, + publisher={Oxford University Press} +} + + + + + +@misc{song2020overlearning, + title={Overlearning Reveals Sensitive Attributes}, + author={Congzheng Song and Vitaly Shmatikov}, + year={2020}, + eprint={1905.11742}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} + +@article{EO, + author = {Moritz Hardt and + Eric Price and + Nathan Srebro}, + title = {Equality of Opportunity in Supervised Learning}, + journal = {CoRR}, + volume = {abs/1610.02413}, + year = {2016}, + url = {http://arxiv.org/abs/1610.02413}, + eprinttype = {arXiv}, + eprint = {1610.02413}, + timestamp = {Tue, 26 Apr 2022 09:17:17 +0200}, + biburl = {https://dblp.org/rec/journals/corr/HardtPS16.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{hawkins2004problem, + title={The problem of overfitting}, + author={Hawkins, Douglas M}, + journal={Journal of chemical information and computer sciences}, + volume={44}, + number={1}, + pages={1--12}, + year={2004}, + publisher={ACS Publications} +} + +@misc{yeom, + title={Privacy Risk in Machine Learning: Analyzing the Connection to Overfitting}, + author={Samuel Yeom and Irene Giacomelli and Matt Fredrikson and Somesh Jha}, + year={2018}, + eprint={1709.01604}, + archivePrefix={arXiv}, + primaryClass={cs.CR} +} + +@misc{vgg16, + title={Very Deep Convolutional Networks for Large-Scale Image Recognition}, + author={Karen Simonyan and Andrew Zisserman}, + year={2015}, + eprint={1409.1556}, + archivePrefix={arXiv}, + primaryClass={cs.CV}, + url={https://arxiv.org/abs/1409.1556}, +} +@misc{CGAN, + title={Conditional Generative Adversarial Nets}, + author={Mehdi Mirza and Simon Osindero}, + year={2014}, + eprint={1411.1784}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/1411.1784}, +} +@ARTICLE{cnn, + + author={Rawat, Waseem and Wang, Zenghui}, + + journal={Neural Computation}, + + title={Deep Convolutional Neural Networks for Image Classification: A Comprehensive Review}, + + year={2017}, + + volume={29}, + + number={9}, + + pages={2352-2449}, + + keywords={}, + + doi={10.1162/neco_a_00990}} + +@misc{dcgan, + title={Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks}, + author={Alec Radford and Luke Metz and Soumith Chintala}, + year={2016}, + eprint={1511.06434}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/1511.06434} +} +@inproceedings{gan, +author = {Goodfellow, Ian J. and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, +title = {Generative adversarial nets}, +year = {2014}, +publisher = {MIT Press}, +address = {Cambridge, MA, USA}, +booktitle = {Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2}, +pages = {2672–2680}, +numpages = {9}, +location = {Montreal, Canada}, +series = {NIPS'14} +} +@misc{ctgan, + title={Modeling Tabular data using Conditional GAN}, + author={Lei Xu and Maria Skoularidou and Alfredo Cuesta-Infante and Kalyan Veeramachaneni}, + year={2019}, + eprint={1907.00503}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/1907.00503}, +} +@article{bellovin2019privacy, + title={Privacy and synthetic datasets}, + author={Bellovin, Steven M and Dutta, Preetam K and Reitinger, Nathan}, + journal={Stan. Tech. L. Rev.}, + volume={22}, + pages={1}, + year={2019}, + publisher={HeinOnline} +} + +@inproceedings{ping2017datasynthesizer, + title={Datasynthesizer: Privacy-preserving synthetic datasets}, + author={Ping, Haoyue and Stoyanovich, Julia and Howe, Bill}, + booktitle={Proceedings of the 29th International Conference on Scientific and Statistical Database Management}, + pages={1--5}, + year={2017} +} + +@inproceedings{kuppa2021towards, + title={Towards improving privacy of synthetic datasets}, + author={Kuppa, Aditya and Aouad, Lamine and Le-Khac, Nhien-An}, + booktitle={Annual Privacy Forum}, + pages={106--119}, + year={2021}, + organization={Springer} +} + +@article{tai2023user, + title={User-Driven Synthetic Dataset Generation with Quantifiable Differential Privacy}, + author={Tai, Bo-Chen and Tsou, Yao-Tung and Li, Szu-Chuang and Huang, Yennun and Tsai, Pei-Yuan and Tsai, Yu-Cheng}, + journal={IEEE Transactions on Services Computing}, + year={2023}, + publisher={IEEE} +} +@article{stadler2020synthetic, + title={Synthetic data-A privacy mirage}, + author={Stadler, Theresa and Oprisanu, Bristena and Troncoso, Carmela}, + journal={arXiv preprint arXiv:2011.07018}, + year={2020}, + publisher={Nov} +} + +@inproceedings{jordon2021hide, + title={Hide-and-seek privacy challenge: Synthetic data generation vs. patient re-identification}, + author={Jordon, James and Jarrett, Daniel and Saveliev, Evgeny and Yoon, Jinsung and Elbers, Paul and Thoral, Patrick and Ercole, Ari and Zhang, Cheng and Belgrave, Danielle and van der Schaar, Mihaela}, + booktitle={NeurIPS 2020 Competition and Demonstration Track}, + pages={206--215}, + year={2021}, + organization={PMLR} +} + +@inproceedings{abadi2016deep, + title={Deep learning with differential privacy}, + author={Abadi, Martin and Chu, Andy and Goodfellow, Ian and McMahan, H Brendan and Mironov, Ilya and Talwar, Kunal and Zhang, Li}, + booktitle={Proceedings of the 2016 ACM SIGSAC conference on computer and communications security}, + pages={308--318}, + year={2016} +} + +@inproceedings{shokri2017membership, + title={Membership inference attacks against machine learning models}, + author={Shokri, Reza and Stronati, Marco and Song, Congzheng and Shmatikov, Vitaly}, + booktitle={2017 IEEE symposium on security and privacy (SP)}, + pages={3--18}, + year={2017}, + organization={IEEE} +} + +@article{ding2021retiring, + title={Retiring Adult: New Datasets for Fair Machine Learning}, + author={Ding, Frances and Hardt, Moritz and Miller, John and Schmidt, Ludwig}, + journal={Advances in Neural Information Processing Systems}, + volume={34}, + year={2021} +} + +@inproceedings{zhifei2017cvpr, + title={Age Progression/Regression by Conditional Adversarial Autoencoder}, + author={Zhang, Zhifei and Song, Yang and Qi, Hairong}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2017}, + organization={IEEE} +} |