#!/usr/bin/env python
# coding: utf-8

# This notebook was prepared by [Donne Martin](http://donnemartin.com). Source and license info is on [GitHub](https://github.com/donnemartin/data-science-ipython-notebooks).

# # HDFS

# Run an HDFS command:

# In[ ]:


get_ipython().system('hdfs')


# Run a file system command on the file systems (FsShell):

# In[ ]:


get_ipython().system('hdfs dfs')


# List the user's home directory:

# In[ ]:


get_ipython().system('hdfs dfs -ls')


# List the HDFS root directory:

# In[ ]:


get_ipython().system('hdfs dfs -ls /')


# Copy a local file to the user's directory on HDFS:

# In[ ]:


get_ipython().system('hdfs dfs -put file.txt file.txt')


# Display the contents of the specified HDFS file:

# In[ ]:


get_ipython().system('hdfs dfs -cat file.txt')


# Print the last 10 lines of the file to the terminal:

# In[ ]:


get_ipython().system('hdfs dfs -cat file.txt | tail -n 10')


# View a directory and all of its files:

# In[ ]:


get_ipython().system('hdfs dfs -cat dir/* | less')


# Copy an HDFS file to local:

# In[ ]:


get_ipython().system('hdfs dfs -get file.txt file.txt')


# Create a directory on HDFS:

# In[ ]:


get_ipython().system('hdfs dfs -mkdir dir')


# Recursively delete the specified directory and all of its contents:

# In[ ]:


get_ipython().system('hdfs dfs -rm -r dir')


# Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):

# In[ ]:


data = sc.textFile ("hdfs://hdfs-host:port/path/file.txt")