import urllib
source = urllib.urlopen("http://cse.kut.ac.kr/").read()
len_source = len(source)
print "Initial length of source:", len_source
source_new = None
i = 0
while (i < len_source):
found = False
if (found != True and source[i] == "<" and source[i+1 : i+7].lower() == "script"):
found = True;
j = i + 7;
while (True):
if (source[j] == "<" and source[j+1] == "/" and source[j+2 : j+8].lower() == "script"):
#print "found - script"
source = source[:i] + ' ' + source[j+9:]
break
j = j + 1
if (j > len_source):
break
len_source = len(source)
i = i + 1
len_source = len(source)
#print len_source
i = 0
while (i < len_source):
found = False
if (found != True and source[i] == "<" and source[i+1 : i+6].lower() == "style"):
found = True;
j = i + 6;
while (True):
if (source[j] == "<" and source[j+1] == "/" and source[j+2 : j+7].lower() == "style"):
#print "found - style"
source = source[:i] + ' ' + source[j+8:]
break
j = j + 1
if (j > len_source):
break
len_source = len(source)
i = i + 1
len_source = len(source)
#print len_source
i = 0
while (i < len_source):
found = False
if (found != True and source[i] == "<"):
found = True;
j = i + 1;
while (True):
if (source[j] == ">"):
#print "found - HTML tag"
source = source[:i] + ' ' + source[j+1:]
break
j = j + 1
if (j > len_source):
break
len_source = len(source)
i = i + 1
len_source = len(source)
print "Last length of source:", len_source
words = source.split()
#for word in words:
# print word
print "Total num of plain words:", len(words)
for word in words:
print '"' + word + '" ',
Initial length of source: 36201 Last length of source: 5421 Total num of plain words: 189 "컴퓨터공학부" "홈페이지" "컴퓨터공학부" ":" "교육목표" ":" "찾아오시는길" ":" "졸업작품" ":" "공학설계" ":" "취업조사2014" ":" "취업조사2013" "교수진" ":" "학생회" "졸업이수요건" ":" "컴퓨터S/W" ":" "컴퓨터H/W" ":" "컴퓨터스마트IT" ":" "컴퓨터시스템응용" ":" "장학제도" "모집요강" ":" "입시Q&A" "공지사항" ":" "자유게시판" ":" "익명게시판" ":" "취업게시판" ":" "동문게시판" ":" "사진게시판" ":" "학부사진겔러리" ":" "고장신고게시판" "강의자료실" ":" "일반자료실" ":" "서식및양식자료실" "대학원소개" ":" "연구실소개" ":" "대학원게시판" "공학인증안내" ":" "공학인증게시판" ":" "C++" "2014/03/27" "공학설계" "포스터발표경진대회" "투표..." "2013/12/10" "2013년" "졸업작품" "브로셔" "2013/10/07" "제" "4대" "컴퓨터공학부" "학회장/부학..." "2012/11/05" "디지털" "시스템" "설계1-" "[실습-조교]" "2012/09/09" "2015년" "상반기" "삼성" "소프트웨어" "..." "2014/09/25" "과" "잠바" "불출공지입니다!!" "2014/09/23" "9/18" "NHN" "Entertainment" "캠퍼스" "..." "2014/09/17" "컴퓨터공학부" "학술" "소모임" "씨앗(S..." "2014/09/07" "펜타시큐리티시스템" "대학생" "마케..." "2014/09/05" "(주)엠프론티어" "시스템" "운영/개발" "..." "2014/09/15" "컴퓨터공학부" "학부사무실" "청년인턴..." "2014/08/06" "(주)신진엠텍" "2014/06/16" "엑시콘" "기술영업" "채용." "-" "꼭" "지도..." "2014/04/18" "온라인평생교육원" "이러닝" "사업" "공..." "2014/04/16" "2014년" "졸업..." "김재우처장..." "2014년" "스승..." "[특성화사업단]튜터링" "프로그램" "..." "2014/09/29" "(공지)2014년" "졸업작품" "영어발표" "..." "2014/09/25" "[특성화사업단]" "IT경진대회" "참가지원" "2014/09/25" "긴급]" "졸업작품브로셔" "검토" "2014/09/25" "레드햇" "챌린지" "안내" "2014/09/23" "[특성화사업단]" "해외(일본)" "기술..." "2014/09/23" "[특성화사업단]튜터링" "모집공고(..." "2014/09/22" "KT," "멤버십" "강화한" "상품" "패키지" "..." "2014-09-29" "[IP노믹스]MS," "모바일" "전기통신" "..." "2014-09-29" "미래부," "도로정보감지레이더용" "34..." "2014-09-29" "소니·화웨이" "외산폰" "대공습···단..." "2014-09-29" "통신사" "첫" "핵심기능" "NFV" "구현" "노..." "2014-09-29"