unicode_string = u'\u3042\u308a\u304c\u3068\u3046' print unicode_string print len(unicode_string) file_path = r'/home/johann/Desktop' #Adjust as required (e.g. r'C:\documents') import os os.chdir(file_path) my_string = open('myfile.txt', 'rb').read().rstrip() print len(my_string) my_unicode_string = my_string.decode('utf-8') print len(my_unicode_string) import codecs my_new_string = codecs.open('myfile.txt', 'rb', 'utf-8').read().rstrip() print len(my_new_string) print 'this is a string'[0] print 'this is a string'[0:4]