# -*- coding: utf-8 -*- # # Script to generate in English and French, graphs for the # birthday problem. # More precisely, it generates two SVG files representing the # probability of no match of two identical birthday one the same # wrt the number of person in the considered group. # # ************************************************************** # http://en.wikipedia.org/wiki/Birthday_problem # From Wikipedia, the free encyclopedia: # In probability theory, the birthday problem or birthday # paradox concerns the probability that, in a set of n # randomly chosen people, some pair of them will have the # same birthday. By the pigeonhole principle, the probability # reaches 100% when the number of people reaches 367 # (since there are 366 possible birthdays, including February # 29). However, 99% probability is reached with just 57 people, # and 50% probability with 23 people. These conclusions are # based on the assumption that each day of the year (except # February 29) is equally probable for a birthday. # # The mathematics behind this problem led to a well-known # cryptographic attack called the birthday attack, which # uses this probabilistic model to reduce the complexity # of cracking a hash function. # # Text under the # Creative Commons Attribution-ShareAlike License # ************************************************************** # # Implementation: # To ensure numerical accuracy, one evaluates the log10 of the # probabibity of no match. This allows to converts the # probability formula from a product formula to a sum formula. # # # Guillaume Jacquenot # 2013/03/10 import matplotlib.pyplot as plt from matplotlib import rc rc('font',**{'family':'serif','serif':['Palatino'],'size':14}) rc('text', usetex=True) import numpy as np def BirthdaymatchComputationLog10(): ''' This function evaluates the log10 probability of no match for the birthday paradox. This ensures no approximation on the result. $\log _{10} \left( {\bar p(n)} \right) = \sum\limits_{i = 365 + 1 - n}^{365} {\log _{10} \left( i \right)} - n\log _{10} \left( {365} \right)$ ''' n=np.arange(1,365) nR=np.arange(365,1,-1) p=np.cumsum(np.log10(nR))-n*np.log10(365) return n,p def BirthdaymatchGenerateTitle(logTitle=False): if logTitle: title='$\\log _{10} \\left( {\\bar p(n)} \\right)\ = \\sum\\limits_{i = 365 + 1 - n}^{365}\ {\\log _{10} \\left( i \\right)}\ - n\\log _{10} \\left( {365} \\right)$' else: title='$\\bar p(n) = \\frac{365!}{365^n\ \\left( {365 - n} \\right)!}$' return title def Birthdaymatch(\ labels={'xlabel':'Number of people',\ 'ylabel':'Probability of no match',\ 'title':'Birthday paradox'},\ outputFilename = r'Birthdaymatch.svg'): n,p = BirthdaymatchComputationLog10() fig, ax = plt.subplots() plt.plot(n,p,c='k', linestyle='-') plt.grid(True, ls='-', c='#a0a0a0') plt.xlabel(labels['xlabel']) plt.ylabel(labels['ylabel']) plt.title(labels['title']+' - '+BirthdaymatchGenerateTitle()) fig.canvas.draw() labels = [item.get_text() for item in ax.get_yticklabels()] labels = [label[1:] if label.startswith('$') else label for label in labels] labels = [label[0:-1] if label.endswith('$') else label for label in labels] labels = ['$10^{'+label+'}$' for label in labels] ax.set_yticklabels(labels) plt.savefig(outputFilename) Birthdaymatch() Birthdaymatch(\ labels={'xlabel':u"Nombre de personnes",\ 'ylabel':u"Probabilit\\'e de non correspondance",\ 'title':u"Paradoxe des anniversaires"},\ outputFilename = r'Birthdaymatch_FR.svg')