Mt Wilson Sunspot database 1917-1985

From the readme:

Sunspot umbral position and area information were digitized from the Mount Wilson daily white-light solar images some years ago (Howard, Gilman, and Gilman, 1984). These photographic images exist in a series that extends from 1917 through the present time. The digitized data extend from 1917 through 1985. Details about the observations, the measurement procedure, and the analysis techniques used earlier may be obtained from the earlier reference (Howard, Gilman, and Gilman, 1984).

These data were first published in Howard, Gilman, and Gilman, 1984. The data are accessible online here.

In [28]:
%matplotlib inline
import numpy as np
import matplotlib
matplotlib.rcParams['font.size'] = 14

import matplotlib.pyplot as plt
from glob import glob

paths = glob('/Users/bmmorris/data/Mt_Wilson_Tilt/*/gspot??.dat')

from astropy.time import Time
import astropy.units as u
import string
from astropy.table import Table

def split_interval(string, n, cast_to_type=float):
    return [cast_to_type(string[i:i+n]) for i in range(0, len(string), n)]

base_time = Time('1915-01-01')
all_years_array = []

header = ("jd n_spots_leading n_spots_following n_spots_day_1 n_spots_day_2 "
          "rotation_rate latitude_drift area_weighted_latitude_day_1 area_weighted_longitude_day_1 "
          "area_weighted_longitude_day_2 area_day_1 area_day_2 tilt_day_1 delta_polarity_separation "
          "area_weighted_longitude_day_1_leading area_weighted_longitude_day_1_following "
          "area_weighted_latitude_day_1_leading area_weighted_latitude_day_1_following "
          "area_leading area_following area_weighted_longitude_day_2_leading " 
          "area_weighted_longitude_day_2_following delta_tilt").split()

for path in paths:
    f = open(path).read().splitlines()

    n_rows = len(f) // 3
    n_columns = 23
    yearly_array = np.zeros((n_rows, n_columns))

    for i in range(n_rows):
        # First five ints specify time, afterwards specify sunspot data
        int_list = split_interval(f[0+i*3][:18], 2, int)
        month, day, year_minus_1900, hour, minute = int_list[:5]
        year = year_minus_1900 + 1900
        jd = Time("{year:d}-{month:02d}-{day:02d} {hour:02d}:{minute:02d}"
                  .format(**locals())).jd
        row = [jd] + int_list[5:] + split_interval(f[1+i*3], 7) + split_interval(f[2+i*3][1:], 7)
        yearly_array[i, :] = row

    all_years_array.append(yearly_array)

table = Table(np.vstack(all_years_array), names=header)
WARNING: ErfaWarning: ERFA function "dtf2d" yielded 1 of "dubious year (Note 6)" [astropy._erfa.core]
In [17]:
table
Out[17]:
<Table length=36355>
jdn_spots_leadingn_spots_followingn_spots_day_1n_spots_day_2rotation_ratelatitude_driftarea_weighted_latitude_day_1area_weighted_longitude_day_1area_weighted_longitude_day_2area_day_1area_day_2tilt_day_1delta_polarity_separationarea_weighted_longitude_day_1_leadingarea_weighted_longitude_day_1_followingarea_weighted_latitude_day_1_leadingarea_weighted_latitude_day_1_followingarea_leadingarea_followingarea_weighted_longitude_day_2_leadingarea_weighted_longitude_day_2_followingdelta_tilt
float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64float64
2421232.781251.01.02.03.013.1510.41713.142-11.2490.31419.61513.818-25.35-0.372-10.382-11.39213.54213.0762.77816.8370.9190.269-2.74
2421232.781251.01.02.02.013.5680.46413.207-4.1127.8479.46820.93813.4680.11-3.352-4.20913.02913.2291.0688.47.8636.944-36.03
2421232.781251.03.04.012.012.1761.18413.70518.6129.24746.21658.587-2.1830.82919.61617.67813.74213.6722.22823.98830.83828.035-0.8
2421233.830562.01.03.02.013.8640.77813.5380.31413.61913.8189.298-28.0930.1470.9190.26913.85213.5150.94912.86914.26113.40412.33
2421233.830561.01.02.03.014.0730.09313.6487.84721.36820.93810.639-22.565-0.4347.8636.94413.65413.28320.5720.36621.39721.17-43.1
2421233.830562.010.012.05.014.6150.46614.82929.24743.32758.58733.791-2.981-0.78830.83828.03514.90914.76825.32233.26444.09942.146-9.04
2421234.797920.00.01.04.013.760.132-22.199-28.179-15.9053.5422.5730.00.00.00.00.00.00.00.00.0-16.1430.0
2421234.797921.01.02.03.014.281-0.38614.34113.61926.3949.29820.175-15.763-0.25114.26113.40414.51614.2822.3316.96726.75426.1364.41
2421234.797921.02.03.03.014.197-0.75113.74421.36834.06310.63914.355-65.6620.49321.39721.1713.80513.3199.2981.3434.4533.59430.02
2421234.797922.03.05.07.014.066-0.50615.3143.32755.89533.79179.416-12.0211.77444.09942.14615.46915.06820.43713.35558.33755.392-27.64
.....................................................................
2446388.254861.01.02.03.014.494-0.733-8.95221.78435.2371.7480.15-27.5150.92521.81621.414-8.969-8.7621.6080.13935.48235.114-47.08
2446388.254860.00.01.01.014.4350.2521.451-0.99312.4010.921.2660.00.00.00.00.00.00.00.00.00.00.0
2446389.251.03.04.03.015.528-0.143-9.63542.24556.0226.33.703-4.8590.12542.49540.972-9.656-9.5285.2661.03556.06654.4313.51
2446404.210423.01.04.06.015.263-0.41419.57426.95141.312.3594.3230.2730.32929.08725.79218.39920.2110.8291.5342.91539.0-10.74
2446413.320140.00.01.018.011.877-1.2784.156-9.6482.2684.27716.2190.00.00.00.00.00.00.00.00.0-0.6010.0
2446413.320142.01.03.09.014.186-0.071-9.377-34.565-20.1231.2528.443-16.9071.291-32.389-35.735-10.03-9.0260.4380.814-17.019-21.6471.29
2446414.413896.012.018.017.013.829-0.5572.7572.26815.6516.2199.338-25.858-0.3894.293-0.6013.7381.3699.5076.71217.70112.9984.56
2446414.413894.05.09.015.013.839-0.02-9.299-20.123-6.7298.44323.571-15.618-0.885-17.019-21.647-10.155-8.8792.7815.663-4.84-8.7123.51
2446415.4555611.06.017.011.014.713-0.7222.17715.6526.6649.33824.766-21.2960.95817.70112.9982.9761.1445.2674.07229.39923.858-1.34
2446417.192362.01.03.02.014.810.2640.96535.63549.5692.0550.145.77-1.19336.52435.4390.0521.1660.3711.68449.66949.469-102.12
In [29]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5), sharey=True)
ax[0].plot(table['area_weighted_longitude_day_1'], 
           table['area_weighted_latitude_day_1'], 'k.', alpha=0.2)
ax[1].hist(table['area_weighted_latitude_day_1'], 100, 
           histtype='stepfilled', color='k', orientation='horizontal')

ax[0].set(xlabel='Longitude', ylabel='Latitude')
ax[1].set(xlabel='Frequency')
fig.subplots_adjust(wspace=0.05)
plt.show()
In [30]:
plt.hist(table['area_day_1'], 50, log=True, histtype='stepfilled', color='k')
plt.xlabel('Spot group area [$\mu$Hem]')
plt.ylabel('Frequency')
plt.show()
In [31]:
img, xedges, yedges = np.histogram2d(table['area_day_1'], 
                                     np.abs(table['area_weighted_latitude_day_1']), 50)
plt.imshow(np.log(img), cmap=plt.cm.viridis, interpolation='nearest', origin='lower')
plt.xlabel('| Latitude |')
plt.ylabel('Area [$\mu$Hem]')
plt.show()
/Users/bmmorris/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: RuntimeWarning: divide by zero encountered in log
  app.launch_new_instance()
In [32]:
unique_dates = np.sort(np.unique(table['jd']))


fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)#, projection='hammer')

n_days = 50
cmap = lambda x: plt.cm.viridis(x/n_days)
for i, day in enumerate(unique_dates[:n_days]):
    day = table[table['jd'] == day]

    ax.plot(day['area_weighted_longitude_day_1'], 
            day['area_weighted_latitude_day_1'], 'o', color=cmap(i))
ax.set_xlim([-60, 60])
ax.set_ylim([-60, 60])
ax.grid()
ax.set(xlabel='longitude', ylabel='latitude')
Out[32]:
[<matplotlib.text.Text at 0x11741ecf8>, <matplotlib.text.Text at 0x115393be0>]

Modeling the distribution of starspot areas

In [33]:
area_per_spot = table['area_day_1'].data/table['n_spots_day_1'].data
not_inf = ~np.isinf(area_per_spot)

plt.hist(area_per_spot[not_inf], 100, log=True, histtype='stepfilled', color='k')
plt.xlabel('Mean area per spot group [$\mu$Hem]')
plt.ylabel('Frequency')
plt.show()
/Users/bmmorris/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in true_divide
  if __name__ == '__main__':
In [34]:
alledges = np.logspace(np.log10(1), np.log10(80-10), 100)
bin_centers = 0.5*(alledges[:-1] + alledges[1:])
n_spots_per_area_bin = np.zeros_like(bin_centers)
n_spots_per_area_bin_err = np.zeros_like(bin_centers)

for i in range(len(bin_centers)):
    within_bin = ((alledges[i] < area_per_spot[not_inf]) & 
                  (alledges[i+1] > area_per_spot[not_inf]))
    n_spots_per_area_bin[i] = np.count_nonzero(within_bin)
    n_spots_per_area_bin_err[i] = np.sqrt(n_spots_per_area_bin[i])
    
fig, ax = plt.subplots()
#ax.set_yscale("log")#, nonposy='clip')
ax.set_xscale("log")
ax.errorbar(bin_centers, n_spots_per_area_bin, n_spots_per_area_bin_err, fmt='.')
Out[34]:
<Container object of 3 artists>
In [35]:
from scipy.optimize import leastsq

def fit_model(p, x):
    a, x0, sigma = p
    return a * np.exp(-0.5 * (x - x0)**2 / sigma**2)

log_bins = np.log(bin_centers)

def chi2(p, x):
    return (fit_model(p, x) - n_spots_per_area_bin) / n_spots_per_area_bin_err

initp = [600, 1.0, 1.0]
bestp, success = leastsq(chi2, initp, args=(log_bins,))

fig, ax = plt.subplots()
ax.errorbar(log_bins, n_spots_per_area_bin, n_spots_per_area_bin_err, fmt='k.')
ax.plot(log_bins, fit_model(bestp, log_bins), color='r', lw=2)
ax.set(xlabel="log(Mean area per spot group [$\mu$Hem])", ylabel="log(frequency)")
Out[35]:
[<matplotlib.text.Text at 0x113b89c50>, <matplotlib.text.Text at 0x116cdea58>]
In [36]:
n_samples = 100000
def draw_samples_from_area_dist(n_samples, lower_limit=1, upper_limit=10**4.5):
    samples = []
    while len(samples) < n_samples:
        sample = np.random.lognormal(mean=bestp[1], sigma=bestp[2])
        if sample > lower_limit and sample < upper_limit: 
            samples.append(sample)
    return np.array(samples)

fig, ax = plt.subplots()
ax.hist(np.log(draw_samples_from_area_dist(n_samples)), 100)
ax.set(xlabel="log( Spot area [$\mu$Hem] )", ylabel="freq")
plt.show()

Area in a hemisphere:

$$A_{hemisphere} = 2\pi R_*^2$$

Area in a spot in units of spot radii:

$$A_{spot} = \pi R_{spot}^2$$

Area in a spot in units of hemispheres:

$$ A_{sp,hem} = \frac{\pi R_{spot}^2}{2\pi R_*^2} = \frac{1}{2} \left(\frac{R_{spot}}{R_*}\right)^2$$

Radius of a spot in units of stellar radii given the spot area in hemispheres:

$$ R_{spot} / R_* = \sqrt{2 A_{sp,hem}} $$

How big is a typical sunspot compared to HAT-P-11's proportional cross-section?

In [37]:
rspot_rstar = np.sqrt(1e-6 * 2 * np.exp(log_bins))

rp_rs = 0.058330305324663184

fig, ax = plt.subplots()
ax.errorbar(rspot_rstar / rp_rs, n_spots_per_area_bin, n_spots_per_area_bin_err, fmt='k.')
ax.plot(rspot_rstar / rp_rs, fit_model(bestp, log_bins), color='r', lw=2)
ax.set(xlabel="$R_{sp}/R_{planet}$", ylabel="frequency")
Out[37]:
[<matplotlib.text.Text at 0x117003668>, <matplotlib.text.Text at 0x117c5cfd0>]

How big is a typical sunspot ?

In [38]:
fig, ax = plt.subplots()
ax.errorbar(rspot_rstar, n_spots_per_area_bin, n_spots_per_area_bin_err, fmt='k.')
ax.plot(rspot_rstar, fit_model(bestp, log_bins), color='r', lw=2)
ax.set(xlabel="$R_{sp}/R_{*}$", ylabel="frequency")
Out[38]:
[<matplotlib.text.Text at 0x117419eb8>, <matplotlib.text.Text at 0x11700f828>]

How big is a sunspot in physical units:

In [49]:
from astropy.constants import R_sun
import astropy.units as u

fig, ax = plt.subplots()

r_spot_km = (rspot_rstar * R_sun).to(1000 * u.km).value

ax.errorbar(r_spot_km, n_spots_per_area_bin, n_spots_per_area_bin_err, fmt='k.')
ax.plot(r_spot_km, fit_model(bestp, log_bins), color='r', lw=2)
ax.set(xlabel="$R_{sp}$ [1000 * km]", ylabel="frequency")

# for l in ax.get_xticklabels():
#     l.set_rotation(45)
#     l.set_ha('right')
Out[49]:
[<matplotlib.text.Text at 0x11638ab00>, <matplotlib.text.Text at 0x1166a7978>]

Butterfly diagram

In [83]:
latitude_grid = np.linspace(-45, 45, 80)
time_grid = Time(np.linspace(Time('1917-01-01').jd, Time('1985-01-01').jd, 200), format='jd')

butterfly_grid = np.zeros((len(latitude_grid)-1, len(time_grid)))

for i in range(len(time_grid)-1):
    within_time_bin = ((table['jd'] > time_grid[i].jd) & 
                       (table['jd'] < time_grid[i+1].jd))
    spots_in_time_bin = table[within_time_bin]
    freq, edges = np.histogram(spots_in_time_bin['area_weighted_latitude_day_1'], bins=latitude_grid)
    butterfly_grid[:, i] = freq

fontsize = 14
fig, ax = plt.subplots(figsize=(14, 4))
cax = ax.imshow(np.log10(1 + butterfly_grid), cmap=plt.cm.viridis, 
                interpolation='nearest', origin='lower')
cbar = fig.colorbar(cax, pad=0.01)#, ticks=[-1, 0, 1])
cbar.ax.set_ylabel('$\log_{10} \, N_{spots}$', fontsize=fontsize)
xtickskip = 15
ax.set_xticks(list(range(len(time_grid)))[::xtickskip])
ax.set_xticklabels([i.strftime("%Y") 
                    for i in time_grid[::xtickskip].datetime])

desired_yticks = np.arange(-45, 60, 15)
ax.set_yticks(np.interp(desired_yticks, latitude_grid, np.arange(len(latitude_grid))))
ax.set_yticklabels(desired_yticks)

ax.grid(color='silver', ls=':')
ax.set_xlabel('Year', fontsize=fontsize) 
ax.set_ylabel('Latitude [degrees]', fontsize=fontsize)

fig.savefig("butterfly.png", bbox_inches='tight', dpi=200)
plt.show()
WARNING: ErfaWarning: ERFA function "dtf2d" yielded 1 of "dubious year (Note 6)" [astropy._erfa.core]
WARNING: ErfaWarning: ERFA function "d2dtf" yielded 9 of "dubious year (Note 5)" [astropy._erfa.core]