%reload_ext autoreload
%autoreload 2
%matplotlib inline
from aws_setup import *
vpc_name='fast-ai'
vpc = get_vpc(vpc_name); vpc
ec2.Vpc(id='vpc-6e6b2a17')
efs_tag = f'{vpc_name}-efs'
efs = create_efs(efs_tag, vpc, performance_mode='maxIO')
instance_name = f'{vpc_name}-instance'
# Recommend a high compute instance as we need to do multi-threaded resizing later on
instance_type = 'c5.4xlarge'
spot_price = get_spot_prices()[instance_type]
bid_price = "%.4f" % (float(spot_price)*3)
print(f'Spot price: {spot_price}, Bid price: {bid_price}')
'0.301400'
launch_specs = LaunchSpecs(vpc, instance_type=instance_type).build()
launch_specs['BlockDeviceMappings'][0]['Ebs']['VolumeSize'] = 1000
launch_specs
{'BlockDeviceMappings': [{'DeviceName': '/dev/sda1', 'Ebs': {'DeleteOnTermination': True, 'VolumeSize': 1000, 'VolumeType': 'gp2'}}], 'ImageId': 'ami-8c4288f4', 'InstanceType': 'c5.4xlarge', 'KeyName': 'aws-key-fast-ai', 'NetworkInterfaces': [{'AssociatePublicIpAddress': True, 'DeviceIndex': 0, 'Groups': ['sg-f60fca88'], 'SubnetId': 'subnet-f056ff89'}]}
instance = create_spot_instance(instance_name, launch_specs, spot_price=bid_price); instance
Waiting on spot fullfillment... Fullfillment completed. InstanceId: i-0cabe3a45ec1ef32c Rebooting... Completed. SSH: ssh -i ~/.ssh/aws-key-fast-ai.pem ubuntu@54.202.209.226
ec2.Instance(id='i-0cabe3a45ec1ef32c')
# instance = get_instance(instance_name); instance
get_ssh_command(instance)
client = connect_to_instance(instance)
Connecting to SSH... Connected!
efs_addr = get_efs_address('fast-ai-efs'); efs_addr
'fs-0ea233a7.efs.us-west-2.amazonaws.com'
_ = run_command(client, 'mkdir ~/efs_mount')
efs_mount_cmd = f'sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 {efs_addr}:/ ~/efs_mount'
_ = run_command(client, efs_mount_cmd)
_ = run_command(client, 'ls efs_mount') # no reformatting
run_command returned: efs_saved.txt
tsess = TmuxSession(client, 'sess')
_ = run_command(client, 'mkdir ~/.kaggle')
kaggle_file = Path.home()/'.kaggle/kaggle.json'
upload_file(client, str(kaggle_file), '.kaggle/kaggle.json')
('', '')
download_kaggle_file = Path.cwd()/'upload_scripts/download_kaggle_imagenet.sh'
upload_file(client, str(download_kaggle_file), 'download_kaggle_imagenet.sh')
('', '')
tsess.run_cmd('bash download_kaggle_imagenet.sh')
('', '')
# imagenet_formatting.sh uses this for multithreaded resizing
# resize_imags.py methods are taken from fast.ai dataset.py
upload_path = Path.cwd()/'upload_scripts/resize_images.py'
upload_file(client, str(upload_path), 'resize_images.py')
('', '')
# creates sizes 80, 160, 320, 375 and stores files in EFS
upload_path = Path.cwd()/'upload_scripts/imagenet_formatting.sh'
upload_file(client, str(upload_path), 'imagenet_formatting.sh')
('', '')
tsess.run_cmd('bash imagenet_formatting.sh')