#!/usr/bin/env python
# coding: utf-8
# In[1]:
import operator
import pathlib
import re
import pandas as pd
FORMAT = {'na_values': '', 'keep_default_na': False}
CH = {'country_code': 'CH',
'url': 'https://de.wikipedia.org/wiki/Liste_der_gr%C3%B6ssten_Glocken_der_Schweiz',
'format': {'index_col': 'Name', **FORMAT},
'note_col': 'Schlagton (HT-1/16)'}
DE = {'country_code': 'DE',
'url': 'https://de.wikipedia.org/wiki/Liste_von_Glocken_in_Deutschland',
'format': {'index_col': 'Name', **FORMAT},
'note_col': 'Ton:'}
FR = {'country_code': 'FR',
'url': 'https://fr.wikipedia.org/wiki/Liste_des_bourdons_de_France',
'format': {'index_col': 'Nom', **FORMAT},
'note_col': 'Note (éventuelle justesse en 16e de ton)'}
NOTES = [note
for c_note in 'CDEFGAB'
for sign in ('', '#')
for note in [f'{c_note}{sign}']
if note not in ('E#', 'B#')]
assert len(NOTES) == 12
MIDI_NOTES = [midi_note
for octave in range(-1, 10)
for note in NOTES
for midi_note in [f'{note}{octave:d}']
if midi_note not in ('G#9', 'A9', 'A#9', 'B9')]
assert len(MIDI_NOTES) == 128
assert MIDI_NOTES[60] == 'C4'
assert MIDI_NOTES[69] == 'A4'
pd.Series(MIDI_NOTES, dtype='string').to_frame('midi_note')
# In[2]:
def iterenharmonic(raw_c_notes='abcdefg'):
"""Yield pairs of raw note name and corresponding canonical representation."""
for raw_note in raw_c_notes:
yield raw_note, raw_note.upper()
yield 'h', 'B'
for raw_note in raw_c_notes:
sharp = 'C' if raw_note == 'b' else 'F' if raw_note == 'e' else f'{raw_note[0].upper()}#'
yield f'{raw_note}_sharp', sharp
for raw_note, enh_note in zip(raw_c_notes, raw_c_notes[-1:] + raw_c_notes[:-1]):
flat = 'B' if raw_note == 'c' else 'E' if raw_note == 'f' else f'{enh_note.upper()}#'
yield f'{raw_note}_flat', flat
CANONICAL = dict(iterenharmonic())
assert set(CANONICAL.values()) == set(NOTES)
pd.Series(CANONICAL, dtype='string').to_frame('note')
# In[3]:
def read_bells_html(country_code, url, format, *, note_col, encoding='utf-8'):
del note_col
path = pathlib.Path(f'bells_{country_code.lower()}.csv')
if not path.exists():
df = pd.concat(pd.read_html(url, **format))
df.convert_dtypes().to_csv(path, encoding=encoding)
return pd.read_csv(path, encoding=encoding, **format).convert_dtypes()
ch = read_bells_html(**CH)
de = read_bells_html(**DE)
fr = read_bells_html(**FR)
ch.info()
de.info()
fr.info()
# In[4]:
fr['Poids en kg'] = fr['Poids en kg'].fillna(fr['Masse (en kg)'])
fr[FR['note_col']] = fr[FR['note_col']].fillna(fr["Note (diapason de l'époque)"])
fr['Date'] = fr['Date'].fillna(fr['Année'])
fr.drop(['Masse (en kg)', "Note (diapason de l'époque)", 'Année'], axis='columns', inplace=True)
fr.info()
# In[5]:
BELL_NOTE = re.compile(r'''
(?P[a-h]) # a0 = A3
(?:
(?Pis)
|
(?Pe?s)
)?
[ \N{NO-BREAK SPACE}]?
(?P[0-7]|['′]{,7}|º)
(?:
(?:
[ \N{NO-BREAK SPACE}]?
(?:
(?P[+])
|
(?P[-\u2013\u2212])
)
(?P1[0-6]|[1-9])
(?:/16)?
)?
)?
(?:
(?:\[\d\]) # footnote
|
(?:,.*)
)?
''', flags=re.VERBOSE)
de[DE['note_col']].str.extract(BELL_NOTE).fillna('').head(10)
# In[6]:
BELL_NOTE_FR = re.compile(r'''
(?PDo|R(?:e|é)|Mi|Fa|Sol|La|Si) # La2 = A3
[ ]?
(?:
(?P[#])
|
(?P♭|b)
)?
[ ]?
(?P[1-4]|²)
(?:
[ ]
\(?
(?:
(?P[+])
|
(?P[-])
)
(?P1[0-6]|[1-9])
(?:,\d+)? # ignore
/16
°?
\)?
)?
(?:
[ ]
\(?
(?:haut|bas)
\)?
)?
''', flags=re.VERBOSE)
BASE_FR = {'Do': 'c',
'Re': 'd', 'Ré': 'd',
'Mi': 'e',
'Fa': 'f',
'Sol': 'g',
'La': 'a',
'Si': 'b'}
fr[FR['note_col']].str.extract(BELL_NOTE_FR).fillna('').head(10)
# In[7]:
MIDI_NOTE_OPTIONAL_DELTA = re.compile(r'''
(?P
[A-G]
[#]?
(?:-1|[0-9])?
)
(?:
(?P
[+-]
(?:1[0-6]|[1-9])
)
/16
)?
''', flags=re.VERBOSE)
def get_note(match: pd.Series, *, french: bool, as_midi: bool, include_delta: bool) -> str:
base = match.base
if not base:
return None
if french:
base = BASE_FR[base]
if match.sharp:
base += '_sharp'
elif match.flat:
base += '_flat'
note = CANONICAL[base]
if french:
octave_fr = (2 if match.octave_fr == '²'
else int(match.octave_fr) if match.octave_fr
else 0)
octave = 1 + octave_fr
else:
primes = (int(match.primes) if match.primes.isdigit()
else len(match.primes) if match.primes.startswith(("'", '′'))
else 0 if match.primes == 'º'
else 0)
octave = 3 + primes
if match.delta:
sign = '+' if match.pos or not match.neg else '-'
delta = int(sign + match.delta)
else:
delta = 0
if delta < -7:
index = NOTES.index(note) - 1
if index < 0:
octave -= 1
note = NOTES[index % len(NOTES)]
delta += 16
elif delta > 8:
index = NOTES.index(note) + 1
if index >= len(NOTES):
octave += 1
note = NOTES[index % len(NOTES)]
delta -= 16
if as_midi:
note += str(octave)
if include_delta and delta:
note += f'{delta:+d}/16'
assert MIDI_NOTE_OPTIONAL_DELTA.fullmatch(note)
return note
def to_notes(series, *, french=False, as_midi=False, include_delta=False, verbose=False):
if verbose:
print(*sorted(series), sep='|')
pattern = BELL_NOTE_FR if french else BELL_NOTE
missed = series[~series.str.fullmatch(pattern)]
if not missed.empty:
print(f'missed: {missed.tolist()}')
matches = series.str.extract(pattern).fillna('')
return matches.apply(get_note, axis='columns', french=french, as_midi=as_midi, include_delta=include_delta)
assert pd.Series(['gis0+2', 'a0+1/16']).pipe(to_notes).equals(pd.Series(['G#', 'A']))
# In[8]:
FIGSIZE = (6 * 72 / 100, 4 * 72 / 100)
def note_stats(bell_notes, *, french: bool = False):
(bell_notes.value_counts().to_frame('n_bells')
.assign(note=lambda x: x.index.to_series().pipe(to_notes, french=french))
.groupby('note')['n_bells'].sum().to_frame('n_bells')
.plot.bar(figsize=FIGSIZE))
# In[9]:
note_stats(de[DE['note_col']])
# In[10]:
note_stats(ch[CH['note_col']])
# In[11]:
note_stats(fr[FR['note_col']], french=True)
# In[12]:
def get_frequency(midi_note_optional_delta: str, *, pitch_reference: int = 440) -> float:
midi_note, delta = MIDI_NOTE_OPTIONAL_DELTA.fullmatch(midi_note_optional_delta).groups()
midi_number = MIDI_NOTES.index(midi_note)
if delta:
midi_number += int(delta) / 16
frequency = 2 ** ((midi_number - 69) / 12) * pitch_reference
return round(frequency, 3)
assert get_frequency('A4') == 440
assert get_frequency('G#3') == 207.652
assert get_frequency('G#3-8/16') == 201.741
assert round(get_frequency('E4+6/16', pitch_reference=435), 1) == 333
def frequency_stats(bell_notes, *, plot='bar', french=False, include_deltas=False, pitch_reference=440):
df = (bell_notes.value_counts().to_frame('n_bells')
.assign(midi_note=lambda x: x.index.to_series().pipe(to_notes, french=french, as_midi=True,
include_delta=include_deltas))
.dropna()
.assign(frequency=lambda x: x['midi_note'].apply(get_frequency, pitch_reference=pitch_reference)))
operator.methodcaller(plot, figsize=FIGSIZE)(df.groupby('frequency')['n_bells'].sum().to_frame('n_bells').plot)
return (df.groupby(['midi_note', 'frequency'])['n_bells'].sum().to_frame('n_bells')
.reset_index('frequency').sort_values(by='frequency', ascending=False))
# In[13]:
frequency_stats(de[DE['note_col']])
# In[14]:
frequency_stats(ch[CH['note_col']])
# In[15]:
frequency_stats(fr[FR['note_col']], french=True)
# In[16]:
frequency_stats(de[DE['note_col']], include_deltas=True, plot='area').head(60)
# In[17]:
frequency_stats(ch[CH['note_col']], include_deltas=True).head(40)
# In[18]:
frequency_stats(fr[FR['note_col']], french=True, include_deltas=True)