In [1]:

import operator
import pathlib
import re

import pandas as pd

FORMAT = {'na_values': '', 'keep_default_na': False}

CH = {'country_code': 'CH',
      'url': 'https://de.wikipedia.org/wiki/Liste_der_gr%C3%B6ssten_Glocken_der_Schweiz',
      'format': {'index_col': 'Name', **FORMAT},
      'note_col': 'Schlagton (HT-1/16)'}

DE = {'country_code': 'DE',
      'url': 'https://de.wikipedia.org/wiki/Liste_von_Glocken_in_Deutschland',
      'format': {'index_col': 'Name',  **FORMAT},
      'note_col': 'Ton:'}

FR = {'country_code': 'FR',
      'url': 'https://fr.wikipedia.org/wiki/Liste_des_bourdons_de_France',
      'format': {'index_col': 'Nom',  **FORMAT},
      'note_col': 'Note (éventuelle justesse en 16e de ton)'}

NOTES = [note
         for c_note in 'CDEFGAB'
         for sign in ('',  '#')
         for note in [f'{c_note}{sign}']
         if note not in ('E#', 'B#')]

assert len(NOTES) == 12

MIDI_NOTES = [midi_note
              for octave in range(-1, 10)
              for note in NOTES
              for midi_note in [f'{note}{octave:d}']
              if midi_note not in ('G#9', 'A9', 'A#9', 'B9')]

assert len(MIDI_NOTES) == 128
assert MIDI_NOTES[60] == 'C4'
assert MIDI_NOTES[69] == 'A4'


pd.Series(MIDI_NOTES, dtype='string').to_frame('midi_note')

Out[1]:

	midi_note
0	C-1
1	C#-1
2	D-1
3	D#-1
4	E-1
...	...
123	D#9
124	E9
125	F9
126	F#9
127	G9

128 rows × 1 columns

In [2]:

def iterenharmonic(raw_c_notes='abcdefg'):
    """Yield pairs of raw note name and corresponding canonical representation."""
    for raw_note in raw_c_notes:
        yield raw_note, raw_note.upper()

    yield 'h', 'B'

    for raw_note in raw_c_notes:
        sharp = 'C' if raw_note == 'b' else 'F' if raw_note == 'e' else f'{raw_note[0].upper()}#'
        yield f'{raw_note}_sharp', sharp

    for raw_note, enh_note in zip(raw_c_notes, raw_c_notes[-1:] + raw_c_notes[:-1]):
        flat = 'B' if raw_note == 'c' else 'E' if raw_note == 'f' else f'{enh_note.upper()}#'
        yield f'{raw_note}_flat', flat


CANONICAL = dict(iterenharmonic())

assert set(CANONICAL.values()) == set(NOTES)

pd.Series(CANONICAL, dtype='string').to_frame('note')

Out[2]:

	note
a	A
b	B
c	C
d	D
e	E
f	F
g	G
h	B
a_sharp	A#
b_sharp	C
c_sharp	C#
d_sharp	D#
e_sharp	F
f_sharp	F#
g_sharp	G#
a_flat	G#
b_flat	A#
c_flat	B
d_flat	C#
e_flat	D#
f_flat	E
g_flat	F#

In [3]:

def read_bells_html(country_code, url, format, *, note_col, encoding='utf-8'):
    del note_col
    path = pathlib.Path(f'bells_{country_code.lower()}.csv')
    if not path.exists():
        df = pd.concat(pd.read_html(url, **format))
        df.convert_dtypes().to_csv(path, encoding=encoding)
    return pd.read_csv(path, encoding=encoding, **format).convert_dtypes()


ch = read_bells_html(**CH)
de = read_bells_html(**DE)
fr = read_bells_html(**FR)

ch.info()
de.info()
fr.info()

<class 'pandas.core.frame.DataFrame'>
Index: 78 entries, Grosse Glocke to nan
Data columns (total 6 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   Ort, Kirche                    78 non-null     string
 1   Schlagton (HT-1/16)            78 non-null     string
 2   Masse (kg, ca.)                78 non-null     string
 3   Giesser, Gussort               78 non-null     string
 4   Gussjahr                       78 non-null     string
 5   Disposition des Gesamtgeläuts  78 non-null     string
dtypes: string(6)
memory usage: 4.3+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 467 entries, Petersglocke (im Volksmund Decke Pitter, Dicker Pitter) to Katharinaglocke
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Nr.              467 non-null    Int64 
 1   Bild             0 non-null      Int64 
 2   Gebäude          467 non-null    string
 3   Standort         467 non-null    string
 4   Bundesland       467 non-null    string
 5   Ton:             467 non-null    string
 6   Masse (kg)       467 non-null    string
 7   Ø (mm)           453 non-null    string
 8   Jahr             467 non-null    string
 9   Gießer, Gussort  467 non-null    string
 10  Werkstoff        467 non-null    string
 11  Konf.            451 non-null    string
 12  Bemerkungen      54 non-null     string
dtypes: Int64(2), string(11)
memory usage: 52.0+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 186 entries, La Savoyarde  Classé MH to Ancienne cloche
Data columns (total 10 columns):
 #   Column                                    Non-Null Count  Dtype 
---  ------                                    --------------  ----- 
 0   Édifice                                   163 non-null    string
 1   Lieu                                      186 non-null    string
 2   Poids en kg                               151 non-null    string
 3   Note (éventuelle justesse en 16e de ton)  159 non-null    string
 4   Date                                      156 non-null    string
 5   Fondeur                                   152 non-null    string
 6   Masse (en kg)                             23 non-null     string
 7   Note (diapason de l'époque)               23 non-null     string
 8   Année                                     23 non-null     string
 9   Disparue                                  23 non-null     string
dtypes: string(10)
memory usage: 16.0+ KB

In [4]:

fr['Poids en kg'].fillna(fr['Masse (en kg)'], inplace=True)
fr[FR['note_col']].fillna(fr["Note (diapason de l'époque)"], inplace=True)
fr['Date'].fillna(fr['Année'], inplace=True)
fr.drop(['Masse (en kg)', "Note (diapason de l'époque)", 'Année'], axis='columns', inplace=True)

fr.info()

<class 'pandas.core.frame.DataFrame'>
Index: 186 entries, La Savoyarde  Classé MH to Ancienne cloche
Data columns (total 7 columns):
 #   Column                                    Non-Null Count  Dtype 
---  ------                                    --------------  ----- 
 0   Édifice                                   163 non-null    string
 1   Lieu                                      186 non-null    string
 2   Poids en kg                               174 non-null    string
 3   Note (éventuelle justesse en 16e de ton)  182 non-null    string
 4   Date                                      179 non-null    string
 5   Fondeur                                   152 non-null    string
 6   Disparue                                  23 non-null     string
dtypes: string(7)
memory usage: 11.6+ KB

In [5]:

BELL_NOTE = re.compile('''
(?P<base>[a-h])  # a0 = A3
(?:
    (?P<sharp>is)
    |
    (?P<flat>e?s)
)?
[ \N{NO-BREAK SPACE}]?
(?P<primes>[0-7]|['′]{,7}|º)
(?:
    (?:
        [ \N{NO-BREAK SPACE}]?
        (?:
            (?P<pos>[+])
            |
            (?P<neg>[-\u2013\u2212])
        )
        (?P<delta>1[0-6]|[1-9])
        (?:/16)?
    )?
)?
(?:
  (?:\[\d\])  # footnote
  |
  (?:,.*)
)?
''', flags=re.VERBOSE)


de[DE['note_col']].str.extract(BELL_NOTE).fillna('').head(10)

Out[5]:

	base	flat	primes	pos	neg	delta
Name
Petersglocke (im Volksmund Decke Pitter, Dicker Pitter)	c		0
Glocke für die Weltausstellung	d		0
Kaiserglocke[3]	e	s	0
Christus-Friedens-Glocke[4][5]	e		0		−	3
Gloriosa[6]	e		0
Kreuzglocke[7]	e		0	+		6
Gloriosa[8]	e		0
Christus- und Friedensglocke	e		0
Freiheitsglocke	e		0
Christus-Salvator-Glocke[9]	e		0	+		4

In [6]:

BELL_NOTE_FR = re.compile('''
(?P<base>Do|R(?:e|é)|Mi|Fa|Sol|La|Si)  # La2 = A3
[ ]?
(?:
    (?P<sharp>[#])
    |
    (?P<flat>♭|b)
)?
[ ]?
(?P<octave_fr>[1-4]|²)
(?:
    [ ]
    \(?
    (?:
         (?P<pos>[+])
         |
         (?P<neg>[-])
    )
    (?P<delta>1[0-6]|[1-9])
    (?:,\d+)?  # ignore
    /16
    °?
    \)?
)?
(?:
    [ ]
    \(?
    (?:haut|bas)
    \)?
)?
''', flags=re.VERBOSE)

BASE_FR = {'Do': 'c',
           'Re': 'd', 'Ré': 'd',
           'Mi': 'e',
           'Fa': 'f',
           'Sol': 'g',
           'La': 'a',
           'Si': 'b'}


fr[FR['note_col']].str.extract(BELL_NOTE_FR).fillna('').head(10)

Out[6]:

	base	sharp	flat	octave_fr	pos	neg	delta
Nom
La Savoyarde Classé MH	Do	#		2
Emmanuel Classé MH	Fa	#		2		-	6
Charlotte Classé MH	Fa			2
Savinienne Classé MH	Ré			2	+		2
Jeanne d'Arc	Fa			2
Cloche du Saint-Esprit (Totenglocke) Classé MH	Sol	#		2
Thérèse	Fa			2
Ferdinand	Fa	#		2
Marie-Joséphine	Mi		♭	2
Charles Marie	Fa	#		2

In [7]:

MIDI_NOTE_OPTIONAL_DELTA = re.compile(r'''
(?P<midi_note>
    [A-G]
    [#]?
    (?:-1|[0-9])?
)
(?:
    (?P<delta>
        [+-]
        (?:1[0-6]|[1-9])
    )
    /16
)?
''', flags=re.VERBOSE)


def get_note(match: pd.Series, *, french: bool, as_midi: bool, include_delta: bool) -> str:
    base = match.base
    if not base:
        return None

    if french:
        base = BASE_FR[base]

    if match.sharp:
        base += '_sharp'
    elif match.flat:
        base += '_flat'

    note = CANONICAL[base]

    if french:
        octave_fr = (2 if match.octave_fr == '²'
                     else int(match.octave_fr) if match.octave_fr
                     else 0)
        octave = 1 + octave_fr
    else:
        primes = (int(match.primes) if match.primes.isdigit()
                  else len(match.primes) if match.primes.startswith(("'", '′'))
                  else 0 if match.primes == 'º'
                  else 0)
        octave = 3 + primes

    if match.delta:
        sign = '+' if match.pos or not match.neg else '-'
        delta = int(sign + match.delta)
    else:
        delta = 0

    if delta < -7:
        index = NOTES.index(note) - 1
        if index < 0:
            octave -= 1
        note = NOTES[index % len(NOTES)]
        delta += 16
    elif delta > 8:
        index = NOTES.index(note) + 1
        if index >= len(NOTES):
            octave += 1
        note = NOTES[index % len(NOTES)]
        delta -= 16

    if as_midi:
        note += str(octave)
        if include_delta and delta:
            note += f'{delta:+d}/16'

    assert MIDI_NOTE_OPTIONAL_DELTA.fullmatch(note)
    return note


def to_notes(series, *, french=False, as_midi=False, include_delta=False, verbose=False):
    if verbose:
        print(*sorted(series), sep='|')

    pattern = BELL_NOTE_FR if french else BELL_NOTE

    missed = series[~series.str.fullmatch(pattern)]
    if not missed.empty:
        print(f'missed: {missed.tolist()}')

    matches = series.str.extract(pattern).fillna('')
    return matches.apply(get_note, axis='columns', french=french, as_midi=as_midi, include_delta=include_delta)


assert pd.Series(['gis0+2', 'a0+1/16']).pipe(to_notes).equals(pd.Series(['G#', 'A']))

In [8]:

FIGSIZE = (6 * 72 / 100, 4 * 72 / 100)


def note_stats(bell_notes, *, french: bool = False):
    (bell_notes.value_counts().to_frame('n_bells')
     .assign(note=lambda x: x.index.to_series().pipe(to_notes, french=french))
     .groupby('note')['n_bells'].sum().to_frame('n_bells')
     .plot.bar(figsize=FIGSIZE))

In [9]:

note_stats(de[DE['note_col']])

missed: ['h0/c1', 'a’0', 'es’+5', 'cis’+2', 'dis’-2', 'd’-1']

In [10]:

note_stats(ch[CH['note_col']])

In [11]:

note_stats(fr[FR['note_col']], french=True)

missed: ['Do', '(?)', 'Do #']

In [12]:

def get_frequency(midi_note_optional_delta: str, *, pitch_reference: int = 440) -> float:
    midi_note, delta = MIDI_NOTE_OPTIONAL_DELTA.fullmatch(midi_note_optional_delta).groups()
    midi_number = MIDI_NOTES.index(midi_note)
    if delta:
        midi_number += int(delta) / 16
        
    frequency = 2 ** ((midi_number - 69) / 12) * pitch_reference
    return round(frequency, 3)


assert get_frequency('A4') == 440
assert get_frequency('G#3') == 207.652

assert get_frequency('G#3-8/16') == 201.741

assert round(get_frequency('E4+6/16', pitch_reference=435), 1) == 333


def frequency_stats(bell_notes, *, plot='bar', french=False, include_deltas=False, pitch_reference=440):
    df = (bell_notes.value_counts().to_frame('n_bells')
          .assign(midi_note=lambda x: x.index.to_series().pipe(to_notes, french=french, as_midi=True,
                                                               include_delta=include_deltas))
          .dropna()
          .assign(frequency=lambda x: x['midi_note'].apply(get_frequency, pitch_reference=pitch_reference)))

    operator.methodcaller(plot, figsize=FIGSIZE)(df.groupby('frequency')['n_bells'].sum().to_frame('n_bells').plot)

    return (df.groupby(['midi_note', 'frequency'])['n_bells'].sum().to_frame('n_bells')
            .reset_index('frequency').sort_values(by='frequency', ascending=False))

In [13]:

frequency_stats(de[DE['note_col']])

missed: ['h0/c1', 'a’0', 'es’+5', 'cis’+2', 'dis’-2', 'd’-1']

Out[13]:

	frequency	n_bells
midi_note
C5	523.251	1
G4	391.995	1
F4	349.228	1
E4	329.628	1
D#4	311.127	1
D4	293.665	2
C#4	277.183	3
C4	261.626	8
B3	246.942	31
A#3	233.082	2
A3	220.000	196
G#3	207.652	107
G3	195.998	59
F#3	184.997	22
F3	174.614	11
E3	164.814	11
D#3	155.563	5
D3	146.832	2
C#3	138.591	2
C3	130.813	1

In [14]:

frequency_stats(ch[CH['note_col']])

Out[14]:

	frequency	n_bells
midi_note
B3	246.942	1
G#3	207.652	43
G3	195.998	18
F#3	184.997	10
F3	174.614	4
E3	164.814	2

In [15]:

frequency_stats(fr[FR['note_col']], french=True)

missed: ['Do', '(?)', 'Do #']

Out[15]:

	frequency	n_bells
midi_note
C#4	277.183	2
C4	261.626	4
B3	246.942	25
A#3	233.082	27
A3	220.000	34
G#3	207.652	22
G3	195.998	28
F#3	184.997	14
F3	174.614	11
E3	164.814	3
D#3	155.563	3
D3	146.832	3
C#3	138.591	2
C3	130.813	1

In [16]:

frequency_stats(de[DE['note_col']], include_deltas=True, plot='area').head(60)

missed: ['h0/c1', 'a’0', 'es’+5', 'cis’+2', 'dis’-2', 'd’-1']

Out[16]:

	frequency	n_bells
midi_note
C5-6/16	512.039	1
G4	391.995	1
F4+7/16	358.166	1
E4+3/16	333.217	1
D#4	311.127	1
D4	293.665	2
C#4+4/16	281.214	1
C#4-2/16	275.189	1
C#4-4/16	273.209	1
C4+7/16	268.321	1
C4+4/16	265.431	1
C4+2/16	263.521	1
C4	261.626	3
C4-4/16	257.875	1
C4-7/16	255.097	1
B3+8/16	254.178	1
B3+5/16	251.440	3
B3+4/16	250.533	1
B3+3/16	249.631	2
B3+1/16	247.835	1
B3	246.942	14
B3-2/16	245.165	2
B3-4/16	243.401	3
B3-5/16	242.524	2
B3-6/16	241.650	1
B3-7/16	240.779	1
A#3+5/16	237.327	2
A3+8/16	226.446	1
A3+7/16	225.630	2
A3+6/16	224.817	2
A3+5/16	224.007	3
A3+4/16	223.200	2
A3+3/16	222.396	4
A3+2/16	221.594	4
A3+1/16	220.796	4
A3	220.000	160
A3-1/16	219.207	2
A3-2/16	218.417	3
A3-3/16	217.630	1
A3-4/16	216.846	3
A3-5/16	216.064	2
A3-6/16	215.286	2
A3-7/16	214.510	1
G#3+8/16	213.737	3
G#3+7/16	212.967	1
G#3+6/16	212.199	2
G#3+5/16	211.435	4
G#3+4/16	210.673	3
G#3+3/16	209.914	2
G#3+2/16	209.157	3
G#3+1/16	208.403	2
G#3	207.652	62
G#3-1/16	206.904	4
G#3-2/16	206.158	3
G#3-3/16	205.416	4
G#3-4/16	204.675	6
G#3-5/16	203.938	3
G#3-6/16	203.203	1
G#3-7/16	202.471	4
G3+8/16	201.741	3

In [17]:

frequency_stats(ch[CH['note_col']], include_deltas=True).head(40)

Out[17]:

	frequency	n_bells
midi_note
B3	246.942	1
G#3	207.652	43
G3	195.998	17
G3-5/16	192.492	1
F#3	184.997	10
F3	174.614	4
E3+3/16	166.608	1
E3	164.814	1

In [18]:

frequency_stats(fr[FR['note_col']], french=True, include_deltas=True)

missed: ['Do', '(?)', 'Do #']

Out[18]:

	frequency	n_bells
midi_note
C#4	277.183	2
C4	261.626	4
B3	246.942	25
A#3	233.082	26
A#3-5/16	228.912	1
A3+7/16	225.630	1
A3	220.000	33
G#3	207.652	22
G3+5/16	199.568	1
G3	195.998	27
F#3	184.997	12
F#3-4/16	182.345	1
F#3-6/16	181.033	1
F3	174.614	11
E3	164.814	3
D#3	155.563	3
D3+2/16	147.896	1
D3	146.832	2
C#3	138.591	2
C3	130.813	1