#!/usr/bin/env python # coding: utf-8 # # Loading "tele-software" from 1984 # # This notebook is available as a [public gist](https://gist.github.com/rjw57/1fde826a1d77806b7f64bdd19a058586). If you like it, feel free to [let me know](https://twitter.com/richwareham) on Twitter. # # Recently my attention was drawn on Twitter towards a video from a 1984 episode of the Thames Television programme "Database" on sending e-mail via the phone line. What's most interesting is that the episode includes an experimental piece of "tele-software" which is broadcast as an audio signal over the end credits. # # You can watch the video here: # In[1]: from IPython.display import YouTubeVideo YouTubeVideo('szdbKz5CyhA') # I wondered how hard it would be to load (and run) this software from 1984. I knew the software was for the [BBC Micro](https://en.wikipedia.org/wiki/BBC_Micro) which was a machine I had growing up. It'd be a nice nostalgia trip to try and load some software again. # # The first thing to do is to download the video and snip out the audio section. Below I've got a little snippet of bash script (assuming you're running on a Unix-like machine) which will download and snip out the audio. If you're not running on a Unix machine, you can download the video and use a program like [Audacity](http://www.audacityteam.org/) to save the audio segment. **YOU MUST SAVE IT AS AN UNCOMPRESSED WAV FILE.** Python cannot load compressed WAV files. # In[2]: get_ipython().run_cell_magic('bash', '', '# You must have yourtube-dl and ffmpeg installed to run this script\n\n# Change to a suitable download directory\ncd ~/Downloads\n\n# Download video\n[ -f video.mp4 ] || youtube-dl -o video.mp4 szdbKz5CyhA\n\n# Tele-software starts at around 5:38. Use ffmpeg to snip out the relevent section and to\n# convert the audio track to an uncompressed wav.\n[ -f database-telesoftware.m4a ] || ffmpeg -i video.mp4 -ss 5:38.5 -vn -acodec copy database-telesoftware.m4a\n[ -f database-telesoftware.wav ] || ffmpeg -i database-telesoftware.m4a -acodec pcm_s16le database-telesoftware.wav\n') # Let's check that our audio segment was clipped out correctly. Here I'm embedding the compressed AAC audio into the notebook to keep the size of the notebook down but you should be able to load the WAV file in the same way: # In[3]: import os from IPython.display import Audio Audio(os.path.expanduser('~/Downloads/database-telesoftware.m4a')) # Now we have the audio data, we can use the Python [wave](https://docs.python.org/3/library/wave.html) module from the standard library to load it. # In[4]: import wave import contextlib import numpy as np # Where is the audio file on disk? audio_fn = os.path.expanduser('~/Downloads/database-telesoftware.wav') # Use contextlib's closing wrapper on wave.open() since the returned object must # have .close() called on it with contextlib.closing(wave.open(audio_fn)) as wf: # Record the sample rate, number of channels and load the raw samples as # little-endian 16-bit values sample_rate = wf.getframerate() n_channels = wf.getnchannels() samples = np.fromstring(wf.readframes(wf.getnframes()), dtype=' The signal may be in one of three states; zero, one or no carrier. # In[13]: # Create an array to hold the decoded bit for each input sample. Represent a # "zero" by 0, "one" by 1 and "no carrier" by -1. bits = np.zeros(samples.shape[0], dtype=np.int8) bits[one_response > zero_response] = 1 bits[np.maximum(zero_response, one_response) < 500] = -1 # Let's look at the decoded bits for all of the signal. # In[14]: plt.plot(ts[::100], bits[::100]) plt.ylim(-1.1, 1.1) plt.grid(True) plt.xlabel('Time [s]') plt.ylabel('Detected bit') # We now want to find where in the signal the data actually starts. We know from the documentation that: # # > To allow the tape deck circuitry to settle, each data stream is preceded by 5.1 seconds of 2400 Hz leader tone. # # So data starts at the first "zero" bit after around 5 seconds of "one" bits. We can write a quick loop to examine every "zero" bit and find the first one which is preceeded by 5 seconds of "one"s. # In[15]: leader_len = int(5 * sample_rate) print('Leader length (samples):', leader_len) # look for leader start zero_idxs = np.nonzero(bits == 0)[0] found_leader = False for idx in zero_idxs: if idx < leader_len: continue if np.all(bits[idx-leader_len:idx] == 1): found_leader = True break data_start = idx print('Found leader:', found_leader) print('Data start index:', data_start) # ## Data layer # # We've now found the start of the data. We now need to conver from the on-tape data storage format to a list of bytes we can interpret. From the documentation: # # > Data is recorded asynchronously on the tape in 8-bit bytes. Each byte consists of one start bit (a zero), 8 data bits lowest first, and one stop bit (a one). # # We can calculate the number of samples for one data bit. It will be the number of samples in one cycle of a 1200Hz sinusoid. We then loop through the per-sample bit labels one data bit's number of samples at a time looking for regions which match the description above. Then we extract the bytes and add them to a list. Finally, we form a Python ``bytes`` object from them. # In[16]: one_bit_len = int(sample_rate / 1200) start_idx = data_start idx = start_idx + (one_bit_len>>1) bs = [] while idx < bits.shape[0]: if bits[idx] != 0 or bits[idx + one_bit_len*9] != 1: break byte = 0 for j in range(8): byte += int(bits[idx+(1+j)*one_bit_len]) * (1<>1 bs = bytes(bs) print("Parsed {} bytes".format(len(bs))) # Let's look at the parsed bytes: # In[17]: print(repr(bs)) # There's definitely recognisable data there. # ## Block layer # # The [Acorn Cassette Format](http://beebwiki.mdfs.net/Acorn_cassette_format) consists of several data blocks. Each block records information on a block of data within each file. We can write a function based on the description on the wiki page to parse each block from raw bytes. # In[18]: from collections import deque, namedtuple import codecs CassetteBlock = namedtuple('CassetteBlock', 'filename load_addr exec_addr block_num block_flag data') def parse_acorn_cassette_block(block): """Parse one Acorn Cassette Format block from a sequence of bytes. Returns a CassetteBlock, deque tuple containing the parsed block and the remaining unparsed bytes. Note: this function does not check CRCs. In this application, if we've received the data incorrectly, there's not much we can do beyond flipping bits. """ # Look for synchronisation byte block = deque(block) while block.popleft() != 0x2a: pass # Parse filename fn_chars = [] while True: fn_char = block.popleft() if fn_char == 0x00: break fn_chars.append(fn_char) filename = codecs.decode(bytes(fn_chars), 'ascii') def popint(n_bytes): rv = 0 for idx in range(n_bytes): rv += block.popleft() * (1<<(8*idx)) return rv # Parse load address, execution address and block number load_addr = popint(4) exec_addr = popint(4) block_num = popint(2) # Parse data block length data_len = popint(2) # Extract block flag byte block_flag = popint(1) # Address of next file addr_next_file = popint(4) # Note: CRCs are stored *high byte first* header_crc = (1<<8)*block.popleft() + block.popleft() data = bytes(block.popleft() for _ in range(data_len)) if data_len > 0: data_crc = (1<<8)*block.popleft() + block.popleft() record = CassetteBlock( filename=filename, load_addr=load_addr, exec_addr=exec_addr, block_num=block_num, block_flag=block_flag, data=data, ) return record, block # Now we iterate through each block and build up a dictionary mapping filenames to file contents. # In[19]: # A dict which will map file names to bytes-objects with their contents. file_data = {} raw_data = bs while len(raw_data) > 0: block, raw_data = parse_acorn_cassette_block(raw_data) file_data[block.filename] = file_data.get(block.filename, bytes()) + block.data # Which files have we in the tele-software download? # In[20]: print(', '.join(file_data.keys())) # Just one file named ``THAMES``. We can write all the files out to the filesystem. # In[21]: for fn, data in file_data.items(): out_path = os.path.join(os.path.expanduser('~/Downloads/'), fn) print('Writing length {} file {} to {}'.format(len(data), fn, out_path)) with open(out_path, 'wb') as fobj: fobj.write(data) # Our file is only 923 bytes long. A short BASIC program perhaps? # ## Running the program # To run the program, I used [BeebEm](http://www.mkw.me.uk/beebem/). This emulator has a nice feature where you can import a file from the host machine to an emulated disk. You can also save screenshots. Let's see if the ``THAMES`` file loads into BASIC. # In[22]: from IPython.display import Image Image(os.path.expanduser('~/Downloads/beebem-cat.png')) # So far, so good. We can ``LIST`` the program to make sure that it is BASIC. # In[23]: Image(os.path.expanduser('~/Downloads/beebem-list-1.png')) # In[24]: Image(os.path.expanduser('~/Downloads/beebem-list-2.png')) # In[25]: Image(os.path.expanduser('~/Downloads/beebem-list-3.png')) # It's definitely a BASIC program. Let's try ``RUN``-ing it. # In[26]: Image(os.path.expanduser('~/Downloads/beebem-scrn-1.png')) # A competition? After pressing the space bar, we get a crossword. # In[27]: Image(os.path.expanduser('~/Downloads/beebem-scrn-2.png')) # After a bit of head-scratching, consulting with work colleagues and Googling song lyrics, I got the solution: # # D # C R # POLICE # R N # TANKER # L # # It turns out that if you Google these words, one finds [another brave soul](http://marnanel.dreamwidth.org/361588.html) on the Internet who wrote their own demodulator. (Their frequency detection was far simpler than mine.) At least they [got the same answers](http://marnanel.livejournal.com/1651435.html).