#!/usr/bin/env python
# coding: utf-8

# # 8. 텍스트와 바이너리 파일

# ## 8.1 두 가지 종류의 파일: 텍스트와 바이너리

# ![](images_skill_up/8-1.PNG)

# ![](images_skill_up/8-2.PNG)

# - 1) 저수준 (low-level) 파일 접근 모드
#   - 텍스트 모드: 개행 문자가 자동으로 변역되어 개행-캐리지 리턴(\\n\\r) 쌍을 대체함
#     - 개행 문자를 지닌 텍스트 파일을 보여줄 때 실제 줄바꿈하여 보여줌 
#   - 바이너리 모드: 위와 같은 대체 없음
#   
#   
# - 2) 저장되어지는 데이터 타입 
#   - 텍스트 모드: 표준 파이썬 문자열 (ASCII/UNICODE) 을 사용하여 읽기/쓰기 수행 
#   - 바이너리 모드: byte 타입을 사용하여 읽기/쓰기 수행
#   
#   
# - 3) 숫자 쓰기
#   - 텍스트 모드: 모든 숫자 데이터는 문자열 타입으로 변환되어 저장됨
#   - 바이너리 모드: 숫자 그 자체 byte 타입으로 저장됨
# 

# ## 8.2 바이너리 파일을 사용하는 경우

# - struct 패키지
# - pickle 패키지 (이것만 알아도 됨)
# - shelve 패키지

# ## 8.3 파일/딕셔너리 시스템

# In[1]:


import os


# - 프로세스를 시작, 종료, 반복하는 함수: **spawn, kill, abort, fork**
# - 파일/디렉토리 시스템을 변경하거나 탐색하는 함수: **rename, removedirs, chroot, getcwd, rmdir, listdir, makedir, mkdir**
# - 파일 플래그와 다른 속성들을 수정하는 함수: **chflags, chmod, chown**
# - 환경 변수를 가져오거나 조정하는 함수: **getenv, getenvb, putenv**
# - 신규 시스템 명령어를 실행하는 함수: **exec**
# - 파일 I/O에 저수준 접근을 하는 함수: **open, read, write**

# In[3]:


print(os.getcwd())


# In[7]:


print(os.listdir(), len(os.listdir()))


# In[8]:


os.mkdir("my_dir")


# In[9]:


print(os.listdir(), len(os.listdir()))


# In[10]:


print(os.path.isfile("stock_load.py"))


# In[12]:


print(os.path.isfile("my_dir"))


# In[13]:


print(os.path.isdir("my_dir"))


# In[14]:


print(os.path.join("my_dir", "sub_dir_1", "sub_dir_2"))


# ## 8.4	파일을 열 때 발생하는 예외 다루기

# ### 다양한 파일 처리 모드
# 
# > f = open(fname, 'r')
# 
# - open 내장 함수의 두번째 인자 mode 설명
#   - 두번째 인자 mode 생략시에는 읽기 전용(r) 모드로 설정
# 
# |  Mode  |          간단 설명        |    자세한 설명
# |--------|-----------------------------|------------|
# |  'r'   |          읽기 전용(기본 모드)     | 파일 객체를 읽기 모드로 생성하고, 파일 포인터를 파일 처음 위치에 놓는다.|
# |  'w'   |          쓰기 전용(기존 파일 내용 삭제)         | 파일이 존재하지 않으면 새로운 파일을 쓰기 모드로 생성하고, 해당 파일이 이미 존재하면 내용을 모두 없에면서 쓰기 모드로 생성하고, 파일 포인터를 파일 처음 위치에 놓는다.  |
# |  'a'   |      파일 끝에 추가(쓰기 전용)   | 파일이 존재하지 않으면 새롭게 파일을 생성하면서 쓰기 모드로 생성하고, 해당 파일이 이미 존재하면 파일 객체을 쓰기 모드로 생성하면서 파일 포인터를 파일의 마지막 위치에 놓는다. 따라서, 이후 작성되는 내용은 파일의 뒷 부분에 추가됨.|
# |  'r+'  |      읽고 쓰기 | 파일 객체를 읽고 쓸 수 있도록 생성한다. 파일 포인터를 파일 처음 위치에 놓는다. |
# |  'w+'  |      읽고 쓰기(기존 파일 내용 삭제) | 파일 객체를 읽고 쓸 수 있도록 생성한다. 파일이 존재하지 않으면 새로운 파일을 생성하고, 해당 파일이 이미 존재하면 내용을 모두 없에면서 생성하고, 파일 포인터를 파일 처음 위치에 놓는다.|
# |  'a+'  |      읽고 쓰기(파일 끝에 추가) | 파일 객체를 읽고 쓸 수 있도록 생성한다. 파일이 존재하지 않으면 새롭게 파일을 생성하고, 해당 파일이 이미 존재하면 파일 객체을 생성하면서 파일 포인터를 파일의 마지막 위치에 놓는다 (그래서, 이후 작성되는 내용은 파일의 뒷 부분에 추가). |

# ### 파이썬 예외 처리 구문
# <img src="images/try_exception.png" width="50%" />

# In[16]:


try:
    fname = input('Enter file to read:')
    f = open(fname, 'r')
    print(f.read())
except FileNotFoundError:
    print('File', fname, 'not found. Terminating.')


# In[ ]:


while True:
    try:
        fname = input('Enter file name: ')
        if not fname: 				# 빈 문자열이 입력되면 종료한다.
            break
        f = open(fname) 				# 파일 열기를 시도한다.
        print(f.read())
        f.close()
        break
    except FileNotFoundError:
        print('File could not be found. Re-enter.')


# In[ ]:


while True:
    fname = input('Enter file name: ')
    if not fname:
        break
    try:
        f = open(fname) 				# 파일 열기를 시도한다.
    except FileNotFoundError:
        print('File could not be found. Re-enter.')
    else:
        print(f.read())
        f.close()
        break


# ## 8.5 'with' 키워드 사용하기
# - 파일을 Open한 이후 올바로 닫지 않고, 자원을 해제하지 않은 상태로 갑자기 종료가 되는 상황 발샐 가능
#   - 파일 I/O를 잘 수행하다가 발생하는 예외 상황

# ### with 구문

# <img src="images/with_as_1.png" width="50%" />
# <img src="images/with_as_2.png" width="50%" />

# In[21]:


with open('stock_load.py', 'r') as f:
    lst = f.readlines()
    for thing in lst:
        print(thing, end='')


# ## 8.6 읽기/쓰기 연산의 요약

# ![](tables_skill_up/t0801-1.PNG)

# ![](tables_skill_up/t0801-2.PNG)

# ## 8.7 텍스트 파일 작업 상세하게 알아보기

# In[22]:


with open('file.txt', 'w') as f:
    f.write('To be or not to be\n')
    f.write('That is the question.\n')
    f.write('Whether tis nobler in the mind\n')
    f.write('To suffer the slings and arrows\n')

with open('file.txt', 'r') as f:
    print(f.read())


# In[23]:


with open('file.txt', 'r') as f:
    s = ' ' 			# 빈 칸으로 초기화한다.
    while s:
        s = f.readline()
        print(s)


# In[24]:


with open('file.txt', 'r') as f:
    s = ' ' 			# 빈 칸으로 초기화한다.
    while s:
        s = f.readline()
        s = s.rstrip('\n')
        print(s)


# In[25]:


with open('file.txt', 'r') as f:
    str_list = f.readlines()
    for s in str_list:
        print(s, end='')


# ## 8.8 파일 포인터('seek') 사용하기
# - 파일에 대한 순차 접근이 아닌 임의 접근이 필요할 때 사용 (실용적으로는 잘 활용되지 않음)
# 
# > f.seek(pos, orig)
# - orig로 지정된 위치를 기준으로 파일 내 임의 접근 위치 pos 로 이동한다.
# - orig
#   - 0: 파일의 시작 지점
#   - 1: 현재 위치
#   - 2: 파일의 끝 지점
# 
# > f.tell()
# - 파일 처음 시작 부터 계산하여 현재 위치를 반환한다.

# In[33]:


with open('file.txt', 'r') as f:
    print(f.seekable())
    print(f.read(5))
    print(f.tell())
    print("*" * 80)
    f.seek(0, 0)
    print(f.tell())    
    print(f.read(5))


# ## 8.9 RPN 프로젝트 안에서 텍스트 읽기
# 
# ### (생략)

# ## 8.10	바이너리 직접 읽기/쓰기
# - 바이트 타입의 값 생성
#   - 문자열 바로 앞에 b 접두어 표기
#   - 문자열 내용에 숫자만 있는 경우 일반적으로 \\x와 함께 16진수로 바이트(8비트) 표기

# In[59]:


with open('my.dat', 'wb') as f:
    b = b'\x01\x02\x03\x0f\x10\x1f'
    print(type(b))

    f.write(b)
    
    b = b'hello'
    print(type(b))

    f.write(b)


# In[60]:


print(os.listdir())


# In[61]:


with open('my.dat', 'rb') as f:
    bss = f.read()
    print(type(bss), len(bss))
    for i in bss:
        print(i, end=' ')


# ### 바이너리 데이터를 저수준 (low-level)으로 직접 쓰고 읽는 작업은 잘 하지 않는다.
# ### 대신 다음과 같은 고수준 (high-level) 방식으로 쓰고 읽는다.
# #### 1. struct 패키지 사용
# #### 2. pickle 패키지 사용 (가장 많이 이용하는 패키지) 
# #### 3. shelve 패키지 사용

# ## 8.11 데이터를 고정-길이 필드로 변환하기 (struct)

# #### (생략)

# ## 8.12 피클링 패키지 사용하기

# ![](images_skill_up/8-3.PNG)

# In[62]:


import pickle

with open('goo.dat', 'wb') as f:
    pickle.dump([1, 2, 3], f)
    pickle.dump('Hello!', f)
    pickle.dump(3.141592, f)


# In[63]:


with open('goo.dat', 'rb') as f:
    a = pickle.load(f)
    b = pickle.load(f)
    c = pickle.load(f)
    print(type(a), a)
    print(type(b), b)
    print(type(c), c)


# In[66]:


if type(a) == list:
    print('The length of a is {0}'.format(len(a)))


# - pickle의 유일한 단점: pickle로 여러 객체를 저장(dump)한 파일 내에 얼마나 많은 객체가 있는지 직접 확인할 수 없음
#   - load 를 여러번 호출하여 객체를 가져오다 보면 더 이상 객체를 가져올 수 없을 때 --> **EOFError 예외 발생**

# In[67]:


loaded = []
with open('goo.dat', 'rb') as f:
    while True:
        try:
            item = pickle.load(f)
        except EOFError:
            print('Loaded', len(loaded), 'items.')
            break
        print(type(item), item)
        loaded.append(item)


# ## 8.13 shelve 패키지 사용하기

# (생략)

# ***
# ***
# ## [추가 내용] 파일과 디렉토리 다루기
# ***
# ***

# ***
# ### 파일 다루기
# ***

# #### 1-1 파일 목록 얻기
# - os.listdir('경로')
#   - 디렉토리 안에 들어 있는 각 파일 목록 반환

# In[68]:


import os

print(os.listdir('.'))   # 현재 디렉토리의 파일 목록 얻기
print()

print(os.listdir('../')) # 현재 디렉토리의 부모 디렉토리의 파일 목록 얻기


# #### 1-2 파일 종류 알아보기
# - os.path 모듈의 다음 함수들은 파일의 종류를 판단하여 True 또는 False를 반환한다.
#   - isfile(filepath)
#     - 순수 파일이면 True
#   - isdir(filepath)
#     - 디렉토리이면 True
#   - islink(filepath)
#     - 심볼릭링크이면 True

# In[69]:


import os
def filetype(fpath):
    print(fpath, ':', end="")

    if os.path.isfile(fpath):
        print('Regular file')

    if os.path.isdir(fpath):
        print('Directory')

    if os.path.islink(fpath):
        print('Symbolic link')

flist = os.listdir('.')
for fname in flist:
    filetype(fname)


# #### 1-3 파일 조작하기
# #### 1) 파일 이름 변경하기
# - os.rename(old_filepath, new_filepath)

# In[4]:


s = """Its power: Python developers typically report
they are able to develop applications in a half
to a tenth the amount of time it takes them to do
the same work in such languages as C."""

with open('t.txt', 'w') as f:
    f.write(s) # 문자열을 파일에 기록


# In[5]:


import os
os.rename('t.txt', 't1.txt')  # t.txt를 t1.txt로 바꾼다


# #### 2) 파일 이동하기
# - os.rename(old_filepath, new_filepath)

# In[6]:


os.mkdir('example')


# In[7]:


os.rename('t1.txt', './example/t1.txt') # 현재 작업 디렉토리의 t1.txt를 example에 t1.txt이름으로 옮긴다.


# #### 3) 파일 복사하기
# - shutil 모듈 활용
# - shutil.copyfile(src_filepath, dest_filepath)

# In[9]:


import shutil

s = """Its power: Python developers typically report
they are able to develop applications in a half
to a tenth the amount of time it takes them to do
the same work in such languages as C."""

with open('t.txt', 'w') as f:
    f.write(s) # 문자열을 파일에 기록
    
shutil.copyfile('t.txt', 't_copied.txt')


# #### 1-4 파일 이름 다루기

# #### 1) 상대 경로를 절대 경로로 변환하기 [중요]
# - os.path.abspath(상대경로)
#   - 실제 파일 존재와는 무관하게 절대경로로 변경함

# In[10]:


import os
print(os.path.abspath('o.txt'))


# #### 2) 주어진 경로의 파일이 존재하는지 확인 [중요]
# - os.path.exists(filepath)

# In[11]:


f = '/Users/yhhan/git/python-e-learning/sample.txt'
print(os.path.exists(f))
print(os.path.exists('sample.txt'))
print(os.path.exists('asdf.txt'))


# #### 3) 현재/부모 디렉토리를 가리키는 이름 얻기

# In[12]:


print(os.curdir) #현재 디렉토리
print(os.pardir) #부모 디렉토리


# #### 4) 디렉토리 분리 문자 얻기

# In[13]:


print(os.sep)


# #### 1-5 경로명 분리하기

# #### 1) 경로와 파일명으로 분리

# In[15]:


f = '/Users/yhhan/git/python-e-learning/t.txt'

print(os.path.basename(f)) # 파일명만 추출
print(os.path.dirname(f))  # 디렉토리 경로 추출


# #### 2) 경로명과 파일명을 한번에 분리

# In[16]:


print(os.path.split(f))


# #### 3) MS 윈도우즈에서 드라이브명과 파일 경로명을 분리

# In[17]:


print(os.path.splitdrive(f))


# #### 4) 확장자 분리

# In[18]:


print(os.path.splitext(f))


# #### 1-6 경로명 생성하기 [중요]

# - Linux and Mac

# In[19]:


path = os.path.join("/", "Users", "yhhan", "git", "python-e-learning", "t.txt")
print(path)


# - Windows

# In[20]:


path = os.path.join("c:\\", "Users", "yhhan")
print(path)


# ***
# ### 디렉토리 다루기
# ***

# #### 2-1 디렉토리에 관련된 일반 작업

# #### 1) 현재 작업 디렉토리 알아보기

# In[21]:


import os
print(os.getcwd())


# #### 2) 작업 디렉토리 변경하기

# In[22]:


path = os.path.join("/", "Users", "yhhan", "Public")
#path = os.path.join("c:\\", "Users", "yhhan", "Public")

os.chdir(path)
print(os.getcwd())


# #### 3) 디렉토리 만들기

# In[26]:


import os

os.mkdir('temp_new')        # 0755 기본 모드(rwxr-xr-x)로 만들어짐

get_ipython().run_line_magic('ls', '-al temp_new')
# os.mkdir('temp2', 0700) # 0700 모드(rwx------)로 만들어짐


os.mkdir('temp_new_2', 0o700)
os.makedirs('temp_new_2/level1/level2') #0755 기본 모드, 중간에 필요한 디렉토리도 모두생성


# In[27]:


import os

os.mkdir('temp3', 0o700)

path = os.path.join("temp3", "level1", "level2")
os.makedirs(path) #0755 기본 모드, 중간에 필요한 디렉토리도 모두생성


# #### 4) 디렉토리 삭제

# In[28]:


os.rmdir('temp') #디렉토리에 내용이 없을 때 삭제가능


# In[29]:


os.rmdir('temp3') #디렉토리에 다른 파일이 있으면 삭제할 수 없음


# #### 5) 다단계 디렉토리 삭제
# - os.removedirs(filepath)
#   - filepath에 지정된 디렉토리들 중 맨 오른쪽 디렉토리 부터 차례차례로 삭제한다.
#   - 디렉토리에 다른 파일이 있으면 삭제하기 않고 중단

# In[30]:


os.removedirs('temp2/level1/level2')