#!/usr/bin/env python # coding: utf-8 # In[1]: import operator import pathlib import re import pandas as pd FORMAT = {'na_values': '', 'keep_default_na': False} CH = {'country_code': 'CH', 'url': 'https://de.wikipedia.org/wiki/Liste_der_gr%C3%B6ssten_Glocken_der_Schweiz', 'format': {'index_col': 'Name', **FORMAT}, 'note_col': 'Schlagton (HT-1/16)'} DE = {'country_code': 'DE', 'url': 'https://de.wikipedia.org/wiki/Liste_von_Glocken_in_Deutschland', 'format': {'index_col': 'Name', **FORMAT}, 'note_col': 'Ton:'} FR = {'country_code': 'FR', 'url': 'https://fr.wikipedia.org/wiki/Liste_des_bourdons_de_France', 'format': {'index_col': 'Nom', **FORMAT}, 'note_col': 'Note (éventuelle justesse en 16e de ton)'} NOTES = [note for c_note in 'CDEFGAB' for sign in ('', '#') for note in [f'{c_note}{sign}'] if note not in ('E#', 'B#')] assert len(NOTES) == 12 MIDI_NOTES = [midi_note for octave in range(-1, 10) for note in NOTES for midi_note in [f'{note}{octave:d}'] if midi_note not in ('G#9', 'A9', 'A#9', 'B9')] assert len(MIDI_NOTES) == 128 assert MIDI_NOTES[60] == 'C4' assert MIDI_NOTES[69] == 'A4' pd.Series(MIDI_NOTES, dtype='string').to_frame('midi_note') # In[2]: def iterenharmonic(raw_c_notes='abcdefg'): """Yield pairs of raw note name and corresponding canonical representation.""" for raw_note in raw_c_notes: yield raw_note, raw_note.upper() yield 'h', 'B' for raw_note in raw_c_notes: sharp = 'C' if raw_note == 'b' else 'F' if raw_note == 'e' else f'{raw_note[0].upper()}#' yield f'{raw_note}_sharp', sharp for raw_note, enh_note in zip(raw_c_notes, raw_c_notes[-1:] + raw_c_notes[:-1]): flat = 'B' if raw_note == 'c' else 'E' if raw_note == 'f' else f'{enh_note.upper()}#' yield f'{raw_note}_flat', flat CANONICAL = dict(iterenharmonic()) assert set(CANONICAL.values()) == set(NOTES) pd.Series(CANONICAL, dtype='string').to_frame('note') # In[3]: def read_bells_html(country_code, url, format, *, note_col, encoding='utf-8'): del note_col path = pathlib.Path(f'bells_{country_code.lower()}.csv') if not path.exists(): df = pd.concat(pd.read_html(url, **format)) df.convert_dtypes().to_csv(path, encoding=encoding) return pd.read_csv(path, encoding=encoding, **format).convert_dtypes() ch = read_bells_html(**CH) de = read_bells_html(**DE) fr = read_bells_html(**FR) ch.info() de.info() fr.info() # In[4]: fr['Poids en kg'] = fr['Poids en kg'].fillna(fr['Masse (en kg)']) fr[FR['note_col']] = fr[FR['note_col']].fillna(fr["Note (diapason de l'époque)"]) fr['Date'] = fr['Date'].fillna(fr['Année']) fr.drop(['Masse (en kg)', "Note (diapason de l'époque)", 'Année'], axis='columns', inplace=True) fr.info() # In[5]: BELL_NOTE = re.compile(r''' (?P[a-h]) # a0 = A3 (?: (?Pis) | (?Pe?s) )? [ \N{NO-BREAK SPACE}]? (?P[0-7]|['′]{,7}|º) (?: (?: [ \N{NO-BREAK SPACE}]? (?: (?P[+]) | (?P[-\u2013\u2212]) ) (?P1[0-6]|[1-9]) (?:/16)? )? )? (?: (?:\[\d\]) # footnote | (?:,.*) )? ''', flags=re.VERBOSE) de[DE['note_col']].str.extract(BELL_NOTE).fillna('').head(10) # In[6]: BELL_NOTE_FR = re.compile(r''' (?PDo|R(?:e|é)|Mi|Fa|Sol|La|Si) # La2 = A3 [ ]? (?: (?P[#]) | (?P♭|b) )? [ ]? (?P[1-4]|²) (?: [ ] \(? (?: (?P[+]) | (?P[-]) ) (?P1[0-6]|[1-9]) (?:,\d+)? # ignore /16 °? \)? )? (?: [ ] \(? (?:haut|bas) \)? )? ''', flags=re.VERBOSE) BASE_FR = {'Do': 'c', 'Re': 'd', 'Ré': 'd', 'Mi': 'e', 'Fa': 'f', 'Sol': 'g', 'La': 'a', 'Si': 'b'} fr[FR['note_col']].str.extract(BELL_NOTE_FR).fillna('').head(10) # In[7]: MIDI_NOTE_OPTIONAL_DELTA = re.compile(r''' (?P [A-G] [#]? (?:-1|[0-9])? ) (?: (?P [+-] (?:1[0-6]|[1-9]) ) /16 )? ''', flags=re.VERBOSE) def get_note(match: pd.Series, *, french: bool, as_midi: bool, include_delta: bool) -> str: base = match.base if not base: return None if french: base = BASE_FR[base] if match.sharp: base += '_sharp' elif match.flat: base += '_flat' note = CANONICAL[base] if french: octave_fr = (2 if match.octave_fr == '²' else int(match.octave_fr) if match.octave_fr else 0) octave = 1 + octave_fr else: primes = (int(match.primes) if match.primes.isdigit() else len(match.primes) if match.primes.startswith(("'", '′')) else 0 if match.primes == 'º' else 0) octave = 3 + primes if match.delta: sign = '+' if match.pos or not match.neg else '-' delta = int(sign + match.delta) else: delta = 0 if delta < -7: index = NOTES.index(note) - 1 if index < 0: octave -= 1 note = NOTES[index % len(NOTES)] delta += 16 elif delta > 8: index = NOTES.index(note) + 1 if index >= len(NOTES): octave += 1 note = NOTES[index % len(NOTES)] delta -= 16 if as_midi: note += str(octave) if include_delta and delta: note += f'{delta:+d}/16' assert MIDI_NOTE_OPTIONAL_DELTA.fullmatch(note) return note def to_notes(series, *, french=False, as_midi=False, include_delta=False, verbose=False): if verbose: print(*sorted(series), sep='|') pattern = BELL_NOTE_FR if french else BELL_NOTE missed = series[~series.str.fullmatch(pattern)] if not missed.empty: print(f'missed: {missed.tolist()}') matches = series.str.extract(pattern).fillna('') return matches.apply(get_note, axis='columns', french=french, as_midi=as_midi, include_delta=include_delta) assert pd.Series(['gis0+2', 'a0+1/16']).pipe(to_notes).equals(pd.Series(['G#', 'A'])) # In[8]: FIGSIZE = (6 * 72 / 100, 4 * 72 / 100) def note_stats(bell_notes, *, french: bool = False): (bell_notes.value_counts().to_frame('n_bells') .assign(note=lambda x: x.index.to_series().pipe(to_notes, french=french)) .groupby('note')['n_bells'].sum().to_frame('n_bells') .plot.bar(figsize=FIGSIZE)) # In[9]: note_stats(de[DE['note_col']]) # In[10]: note_stats(ch[CH['note_col']]) # In[11]: note_stats(fr[FR['note_col']], french=True) # In[12]: def get_frequency(midi_note_optional_delta: str, *, pitch_reference: int = 440) -> float: midi_note, delta = MIDI_NOTE_OPTIONAL_DELTA.fullmatch(midi_note_optional_delta).groups() midi_number = MIDI_NOTES.index(midi_note) if delta: midi_number += int(delta) / 16 frequency = 2 ** ((midi_number - 69) / 12) * pitch_reference return round(frequency, 3) assert get_frequency('A4') == 440 assert get_frequency('G#3') == 207.652 assert get_frequency('G#3-8/16') == 201.741 assert round(get_frequency('E4+6/16', pitch_reference=435), 1) == 333 def frequency_stats(bell_notes, *, plot='bar', french=False, include_deltas=False, pitch_reference=440): df = (bell_notes.value_counts().to_frame('n_bells') .assign(midi_note=lambda x: x.index.to_series().pipe(to_notes, french=french, as_midi=True, include_delta=include_deltas)) .dropna() .assign(frequency=lambda x: x['midi_note'].apply(get_frequency, pitch_reference=pitch_reference))) operator.methodcaller(plot, figsize=FIGSIZE)(df.groupby('frequency')['n_bells'].sum().to_frame('n_bells').plot) return (df.groupby(['midi_note', 'frequency'])['n_bells'].sum().to_frame('n_bells') .reset_index('frequency').sort_values(by='frequency', ascending=False)) # In[13]: frequency_stats(de[DE['note_col']]) # In[14]: frequency_stats(ch[CH['note_col']]) # In[15]: frequency_stats(fr[FR['note_col']], french=True) # In[16]: frequency_stats(de[DE['note_col']], include_deltas=True, plot='area').head(60) # In[17]: frequency_stats(ch[CH['note_col']], include_deltas=True).head(40) # In[18]: frequency_stats(fr[FR['note_col']], french=True, include_deltas=True)