import operator
import pathlib
import re
import pandas as pd
FORMAT = {'na_values': '', 'keep_default_na': False}
CH = {'country_code': 'CH',
'url': 'https://de.wikipedia.org/wiki/Liste_der_gr%C3%B6ssten_Glocken_der_Schweiz',
'format': {'index_col': 'Name', **FORMAT},
'note_col': 'Schlagton (HT-1/16)'}
DE = {'country_code': 'DE',
'url': 'https://de.wikipedia.org/wiki/Liste_von_Glocken_in_Deutschland',
'format': {'index_col': 'Name', **FORMAT},
'note_col': 'Ton:'}
FR = {'country_code': 'FR',
'url': 'https://fr.wikipedia.org/wiki/Liste_des_bourdons_de_France',
'format': {'index_col': 'Nom', **FORMAT},
'note_col': 'Note (éventuelle justesse en 16e de ton)'}
NOTES = [note
for c_note in 'CDEFGAB'
for sign in ('', '#')
for note in [f'{c_note}{sign}']
if note not in ('E#', 'B#')]
assert len(NOTES) == 12
MIDI_NOTES = [midi_note
for octave in range(-1, 10)
for note in NOTES
for midi_note in [f'{note}{octave:d}']
if midi_note not in ('G#9', 'A9', 'A#9', 'B9')]
assert len(MIDI_NOTES) == 128
assert MIDI_NOTES[60] == 'C4'
assert MIDI_NOTES[69] == 'A4'
pd.Series(MIDI_NOTES, dtype='string').to_frame('midi_note')
midi_note | |
---|---|
0 | C-1 |
1 | C#-1 |
2 | D-1 |
3 | D#-1 |
4 | E-1 |
... | ... |
123 | D#9 |
124 | E9 |
125 | F9 |
126 | F#9 |
127 | G9 |
128 rows × 1 columns
def iterenharmonic(raw_c_notes='abcdefg'):
"""Yield pairs of raw note name and corresponding canonical representation."""
for raw_note in raw_c_notes:
yield raw_note, raw_note.upper()
yield 'h', 'B'
for raw_note in raw_c_notes:
sharp = 'C' if raw_note == 'b' else 'F' if raw_note == 'e' else f'{raw_note[0].upper()}#'
yield f'{raw_note}_sharp', sharp
for raw_note, enh_note in zip(raw_c_notes, raw_c_notes[-1:] + raw_c_notes[:-1]):
flat = 'B' if raw_note == 'c' else 'E' if raw_note == 'f' else f'{enh_note.upper()}#'
yield f'{raw_note}_flat', flat
CANONICAL = dict(iterenharmonic())
assert set(CANONICAL.values()) == set(NOTES)
pd.Series(CANONICAL, dtype='string').to_frame('note')
note | |
---|---|
a | A |
b | B |
c | C |
d | D |
e | E |
f | F |
g | G |
h | B |
a_sharp | A# |
b_sharp | C |
c_sharp | C# |
d_sharp | D# |
e_sharp | F |
f_sharp | F# |
g_sharp | G# |
a_flat | G# |
b_flat | A# |
c_flat | B |
d_flat | C# |
e_flat | D# |
f_flat | E |
g_flat | F# |
def read_bells_html(country_code, url, format, *, note_col, encoding='utf-8'):
del note_col
path = pathlib.Path(f'bells_{country_code.lower()}.csv')
if not path.exists():
df = pd.concat(pd.read_html(url, **format))
df.convert_dtypes().to_csv(path, encoding=encoding)
return pd.read_csv(path, encoding=encoding, **format).convert_dtypes()
ch = read_bells_html(**CH)
de = read_bells_html(**DE)
fr = read_bells_html(**FR)
ch.info()
de.info()
fr.info()
<class 'pandas.core.frame.DataFrame'> Index: 78 entries, Grosse Glocke to nan Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Ort, Kirche 78 non-null string 1 Schlagton (HT-1/16) 78 non-null string 2 Masse (kg, ca.) 78 non-null string 3 Giesser, Gussort 78 non-null string 4 Gussjahr 78 non-null string 5 Disposition des Gesamtgeläuts 78 non-null string dtypes: string(6) memory usage: 4.3+ KB <class 'pandas.core.frame.DataFrame'> Index: 467 entries, Petersglocke (im Volksmund Decke Pitter, Dicker Pitter) to Katharinaglocke Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Nr. 467 non-null Int64 1 Bild 0 non-null Int64 2 Gebäude 467 non-null string 3 Standort 467 non-null string 4 Bundesland 467 non-null string 5 Ton: 467 non-null string 6 Masse (kg) 467 non-null string 7 Ø (mm) 453 non-null string 8 Jahr 467 non-null string 9 Gießer, Gussort 467 non-null string 10 Werkstoff 467 non-null string 11 Konf. 451 non-null string 12 Bemerkungen 54 non-null string dtypes: Int64(2), string(11) memory usage: 52.0+ KB <class 'pandas.core.frame.DataFrame'> Index: 186 entries, La Savoyarde Classé MH to Ancienne cloche Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Édifice 163 non-null string 1 Lieu 186 non-null string 2 Poids en kg 151 non-null string 3 Note (éventuelle justesse en 16e de ton) 159 non-null string 4 Date 156 non-null string 5 Fondeur 152 non-null string 6 Masse (en kg) 23 non-null string 7 Note (diapason de l'époque) 23 non-null string 8 Année 23 non-null string 9 Disparue 23 non-null string dtypes: string(10) memory usage: 16.0+ KB
fr['Poids en kg'].fillna(fr['Masse (en kg)'], inplace=True)
fr[FR['note_col']].fillna(fr["Note (diapason de l'époque)"], inplace=True)
fr['Date'].fillna(fr['Année'], inplace=True)
fr.drop(['Masse (en kg)', "Note (diapason de l'époque)", 'Année'], axis='columns', inplace=True)
fr.info()
<class 'pandas.core.frame.DataFrame'> Index: 186 entries, La Savoyarde Classé MH to Ancienne cloche Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Édifice 163 non-null string 1 Lieu 186 non-null string 2 Poids en kg 174 non-null string 3 Note (éventuelle justesse en 16e de ton) 182 non-null string 4 Date 179 non-null string 5 Fondeur 152 non-null string 6 Disparue 23 non-null string dtypes: string(7) memory usage: 11.6+ KB
BELL_NOTE = re.compile('''
(?P<base>[a-h]) # a0 = A3
(?:
(?P<sharp>is)
|
(?P<flat>e?s)
)?
[ \N{NO-BREAK SPACE}]?
(?P<primes>[0-7]|['′]{,7}|º)
(?:
(?:
[ \N{NO-BREAK SPACE}]?
(?:
(?P<pos>[+])
|
(?P<neg>[-\u2013\u2212])
)
(?P<delta>1[0-6]|[1-9])
(?:/16)?
)?
)?
(?:
(?:\[\d\]) # footnote
|
(?:,.*)
)?
''', flags=re.VERBOSE)
de[DE['note_col']].str.extract(BELL_NOTE).fillna('').head(10)
base | sharp | flat | primes | pos | neg | delta | |
---|---|---|---|---|---|---|---|
Name | |||||||
Petersglocke (im Volksmund Decke Pitter, Dicker Pitter) | c | 0 | |||||
Glocke für die Weltausstellung | d | 0 | |||||
Kaiserglocke[3] | e | s | 0 | ||||
Christus-Friedens-Glocke[4][5] | e | 0 | − | 3 | |||
Gloriosa[6] | e | 0 | |||||
Kreuzglocke[7] | e | 0 | + | 6 | |||
Gloriosa[8] | e | 0 | |||||
Christus- und Friedensglocke | e | 0 | |||||
Freiheitsglocke | e | 0 | |||||
Christus-Salvator-Glocke[9] | e | 0 | + | 4 |
BELL_NOTE_FR = re.compile('''
(?P<base>Do|R(?:e|é)|Mi|Fa|Sol|La|Si) # La2 = A3
[ ]?
(?:
(?P<sharp>[#])
|
(?P<flat>♭|b)
)?
[ ]?
(?P<octave_fr>[1-4]|²)
(?:
[ ]
\(?
(?:
(?P<pos>[+])
|
(?P<neg>[-])
)
(?P<delta>1[0-6]|[1-9])
(?:,\d+)? # ignore
/16
°?
\)?
)?
(?:
[ ]
\(?
(?:haut|bas)
\)?
)?
''', flags=re.VERBOSE)
BASE_FR = {'Do': 'c',
'Re': 'd', 'Ré': 'd',
'Mi': 'e',
'Fa': 'f',
'Sol': 'g',
'La': 'a',
'Si': 'b'}
fr[FR['note_col']].str.extract(BELL_NOTE_FR).fillna('').head(10)
base | sharp | flat | octave_fr | pos | neg | delta | |
---|---|---|---|---|---|---|---|
Nom | |||||||
La Savoyarde Classé MH | Do | # | 2 | ||||
Emmanuel Classé MH | Fa | # | 2 | - | 6 | ||
Charlotte Classé MH | Fa | 2 | |||||
Savinienne Classé MH | Ré | 2 | + | 2 | |||
Jeanne d'Arc | Fa | 2 | |||||
Cloche du Saint-Esprit (Totenglocke) Classé MH | Sol | # | 2 | ||||
Thérèse | Fa | 2 | |||||
Ferdinand | Fa | # | 2 | ||||
Marie-Joséphine | Mi | ♭ | 2 | ||||
Charles Marie | Fa | # | 2 |
MIDI_NOTE_OPTIONAL_DELTA = re.compile(r'''
(?P<midi_note>
[A-G]
[#]?
(?:-1|[0-9])?
)
(?:
(?P<delta>
[+-]
(?:1[0-6]|[1-9])
)
/16
)?
''', flags=re.VERBOSE)
def get_note(match: pd.Series, *, french: bool, as_midi: bool, include_delta: bool) -> str:
base = match.base
if not base:
return None
if french:
base = BASE_FR[base]
if match.sharp:
base += '_sharp'
elif match.flat:
base += '_flat'
note = CANONICAL[base]
if french:
octave_fr = (2 if match.octave_fr == '²'
else int(match.octave_fr) if match.octave_fr
else 0)
octave = 1 + octave_fr
else:
primes = (int(match.primes) if match.primes.isdigit()
else len(match.primes) if match.primes.startswith(("'", '′'))
else 0 if match.primes == 'º'
else 0)
octave = 3 + primes
if match.delta:
sign = '+' if match.pos or not match.neg else '-'
delta = int(sign + match.delta)
else:
delta = 0
if delta < -7:
index = NOTES.index(note) - 1
if index < 0:
octave -= 1
note = NOTES[index % len(NOTES)]
delta += 16
elif delta > 8:
index = NOTES.index(note) + 1
if index >= len(NOTES):
octave += 1
note = NOTES[index % len(NOTES)]
delta -= 16
if as_midi:
note += str(octave)
if include_delta and delta:
note += f'{delta:+d}/16'
assert MIDI_NOTE_OPTIONAL_DELTA.fullmatch(note)
return note
def to_notes(series, *, french=False, as_midi=False, include_delta=False, verbose=False):
if verbose:
print(*sorted(series), sep='|')
pattern = BELL_NOTE_FR if french else BELL_NOTE
missed = series[~series.str.fullmatch(pattern)]
if not missed.empty:
print(f'missed: {missed.tolist()}')
matches = series.str.extract(pattern).fillna('')
return matches.apply(get_note, axis='columns', french=french, as_midi=as_midi, include_delta=include_delta)
assert pd.Series(['gis0+2', 'a0+1/16']).pipe(to_notes).equals(pd.Series(['G#', 'A']))
FIGSIZE = (6 * 72 / 100, 4 * 72 / 100)
def note_stats(bell_notes, *, french: bool = False):
(bell_notes.value_counts().to_frame('n_bells')
.assign(note=lambda x: x.index.to_series().pipe(to_notes, french=french))
.groupby('note')['n_bells'].sum().to_frame('n_bells')
.plot.bar(figsize=FIGSIZE))
note_stats(de[DE['note_col']])
missed: ['h0/c1', 'a’0', 'es’+5', 'cis’+2', 'dis’-2', 'd’-1']
note_stats(ch[CH['note_col']])
note_stats(fr[FR['note_col']], french=True)
missed: ['Do', '(?)', 'Do #']
def get_frequency(midi_note_optional_delta: str, *, pitch_reference: int = 440) -> float:
midi_note, delta = MIDI_NOTE_OPTIONAL_DELTA.fullmatch(midi_note_optional_delta).groups()
midi_number = MIDI_NOTES.index(midi_note)
if delta:
midi_number += int(delta) / 16
frequency = 2 ** ((midi_number - 69) / 12) * pitch_reference
return round(frequency, 3)
assert get_frequency('A4') == 440
assert get_frequency('G#3') == 207.652
assert get_frequency('G#3-8/16') == 201.741
assert round(get_frequency('E4+6/16', pitch_reference=435), 1) == 333
def frequency_stats(bell_notes, *, plot='bar', french=False, include_deltas=False, pitch_reference=440):
df = (bell_notes.value_counts().to_frame('n_bells')
.assign(midi_note=lambda x: x.index.to_series().pipe(to_notes, french=french, as_midi=True,
include_delta=include_deltas))
.dropna()
.assign(frequency=lambda x: x['midi_note'].apply(get_frequency, pitch_reference=pitch_reference)))
operator.methodcaller(plot, figsize=FIGSIZE)(df.groupby('frequency')['n_bells'].sum().to_frame('n_bells').plot)
return (df.groupby(['midi_note', 'frequency'])['n_bells'].sum().to_frame('n_bells')
.reset_index('frequency').sort_values(by='frequency', ascending=False))
frequency_stats(de[DE['note_col']])
missed: ['h0/c1', 'a’0', 'es’+5', 'cis’+2', 'dis’-2', 'd’-1']
frequency | n_bells | |
---|---|---|
midi_note | ||
C5 | 523.251 | 1 |
G4 | 391.995 | 1 |
F4 | 349.228 | 1 |
E4 | 329.628 | 1 |
D#4 | 311.127 | 1 |
D4 | 293.665 | 2 |
C#4 | 277.183 | 3 |
C4 | 261.626 | 8 |
B3 | 246.942 | 31 |
A#3 | 233.082 | 2 |
A3 | 220.000 | 196 |
G#3 | 207.652 | 107 |
G3 | 195.998 | 59 |
F#3 | 184.997 | 22 |
F3 | 174.614 | 11 |
E3 | 164.814 | 11 |
D#3 | 155.563 | 5 |
D3 | 146.832 | 2 |
C#3 | 138.591 | 2 |
C3 | 130.813 | 1 |
frequency_stats(ch[CH['note_col']])
frequency | n_bells | |
---|---|---|
midi_note | ||
B3 | 246.942 | 1 |
G#3 | 207.652 | 43 |
G3 | 195.998 | 18 |
F#3 | 184.997 | 10 |
F3 | 174.614 | 4 |
E3 | 164.814 | 2 |
frequency_stats(fr[FR['note_col']], french=True)
missed: ['Do', '(?)', 'Do #']
frequency | n_bells | |
---|---|---|
midi_note | ||
C#4 | 277.183 | 2 |
C4 | 261.626 | 4 |
B3 | 246.942 | 25 |
A#3 | 233.082 | 27 |
A3 | 220.000 | 34 |
G#3 | 207.652 | 22 |
G3 | 195.998 | 28 |
F#3 | 184.997 | 14 |
F3 | 174.614 | 11 |
E3 | 164.814 | 3 |
D#3 | 155.563 | 3 |
D3 | 146.832 | 3 |
C#3 | 138.591 | 2 |
C3 | 130.813 | 1 |
frequency_stats(de[DE['note_col']], include_deltas=True, plot='area').head(60)
missed: ['h0/c1', 'a’0', 'es’+5', 'cis’+2', 'dis’-2', 'd’-1']
frequency | n_bells | |
---|---|---|
midi_note | ||
C5-6/16 | 512.039 | 1 |
G4 | 391.995 | 1 |
F4+7/16 | 358.166 | 1 |
E4+3/16 | 333.217 | 1 |
D#4 | 311.127 | 1 |
D4 | 293.665 | 2 |
C#4+4/16 | 281.214 | 1 |
C#4-2/16 | 275.189 | 1 |
C#4-4/16 | 273.209 | 1 |
C4+7/16 | 268.321 | 1 |
C4+4/16 | 265.431 | 1 |
C4+2/16 | 263.521 | 1 |
C4 | 261.626 | 3 |
C4-4/16 | 257.875 | 1 |
C4-7/16 | 255.097 | 1 |
B3+8/16 | 254.178 | 1 |
B3+5/16 | 251.440 | 3 |
B3+4/16 | 250.533 | 1 |
B3+3/16 | 249.631 | 2 |
B3+1/16 | 247.835 | 1 |
B3 | 246.942 | 14 |
B3-2/16 | 245.165 | 2 |
B3-4/16 | 243.401 | 3 |
B3-5/16 | 242.524 | 2 |
B3-6/16 | 241.650 | 1 |
B3-7/16 | 240.779 | 1 |
A#3+5/16 | 237.327 | 2 |
A3+8/16 | 226.446 | 1 |
A3+7/16 | 225.630 | 2 |
A3+6/16 | 224.817 | 2 |
A3+5/16 | 224.007 | 3 |
A3+4/16 | 223.200 | 2 |
A3+3/16 | 222.396 | 4 |
A3+2/16 | 221.594 | 4 |
A3+1/16 | 220.796 | 4 |
A3 | 220.000 | 160 |
A3-1/16 | 219.207 | 2 |
A3-2/16 | 218.417 | 3 |
A3-3/16 | 217.630 | 1 |
A3-4/16 | 216.846 | 3 |
A3-5/16 | 216.064 | 2 |
A3-6/16 | 215.286 | 2 |
A3-7/16 | 214.510 | 1 |
G#3+8/16 | 213.737 | 3 |
G#3+7/16 | 212.967 | 1 |
G#3+6/16 | 212.199 | 2 |
G#3+5/16 | 211.435 | 4 |
G#3+4/16 | 210.673 | 3 |
G#3+3/16 | 209.914 | 2 |
G#3+2/16 | 209.157 | 3 |
G#3+1/16 | 208.403 | 2 |
G#3 | 207.652 | 62 |
G#3-1/16 | 206.904 | 4 |
G#3-2/16 | 206.158 | 3 |
G#3-3/16 | 205.416 | 4 |
G#3-4/16 | 204.675 | 6 |
G#3-5/16 | 203.938 | 3 |
G#3-6/16 | 203.203 | 1 |
G#3-7/16 | 202.471 | 4 |
G3+8/16 | 201.741 | 3 |
frequency_stats(ch[CH['note_col']], include_deltas=True).head(40)
frequency | n_bells | |
---|---|---|
midi_note | ||
B3 | 246.942 | 1 |
G#3 | 207.652 | 43 |
G3 | 195.998 | 17 |
G3-5/16 | 192.492 | 1 |
F#3 | 184.997 | 10 |
F3 | 174.614 | 4 |
E3+3/16 | 166.608 | 1 |
E3 | 164.814 | 1 |
frequency_stats(fr[FR['note_col']], french=True, include_deltas=True)
missed: ['Do', '(?)', 'Do #']
frequency | n_bells | |
---|---|---|
midi_note | ||
C#4 | 277.183 | 2 |
C4 | 261.626 | 4 |
B3 | 246.942 | 25 |
A#3 | 233.082 | 26 |
A#3-5/16 | 228.912 | 1 |
A3+7/16 | 225.630 | 1 |
A3 | 220.000 | 33 |
G#3 | 207.652 | 22 |
G3+5/16 | 199.568 | 1 |
G3 | 195.998 | 27 |
F#3 | 184.997 | 12 |
F#3-4/16 | 182.345 | 1 |
F#3-6/16 | 181.033 | 1 |
F3 | 174.614 | 11 |
E3 | 164.814 | 3 |
D#3 | 155.563 | 3 |
D3+2/16 | 147.896 | 1 |
D3 | 146.832 | 2 |
C#3 | 138.591 | 2 |
C3 | 130.813 | 1 |