Breaking the file by lines

In [1]:
botsText= open('message.txt',encoding='utf-8').read().split("\n")
In [2]:
len(botsText)
Out[2]:
155520
In [3]:
botsText[41854]
Out[3]:
'Bot: false   Sat Apr 21 2018 16:19:49 GMT+0000 (UTC)   ip 144.76.38.73    Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)  Path: /Discourse/XTags/BASIC'

Lets try some regex

In [4]:
import re
import json
from random import randint

Spliting bots into a big list

OK OK it is possible to make a list of dictionaries

In [6]:
bots=[]
ip_regex=re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
wp_regex=re.compile(r'(?<=POST) {.*}')
path_regex=re.compile(r'(?<=Path:) .*')
time_regex=re.compile(r"((?<=Bot: false\ ) .*(?=GMT))|((?<=Bot: true\ ) .*(?=GMT))")
for bot in range(0,len(botsText)):
    b=botsText[bot] #this bot
    ip=ip_regex.search(b).group(0)
    path=path_regex.findall(b)[0]
    wp_p=wp_regex.findall(b)
    time=time_regex.findall(b)[0][0]
    agent_regex=re.compile(r"(?<="+ip+r').*(?=Path)')
    agent=" ".join(agent_regex.search(b).group(0).split())
    

    bots.append({"ip":ip,"date":time,"path":path,"post":"","agent":agent})

    #if post:
    if(len(wp_p)>0):
        try:
            p=json.loads(wp_p[0])
        except:
            p=wp_p[0]
#             print(p)
        bots[bot]["post"]=(p)
        agent_regex=re.compile(r"(?<="+ip+r').*(?=POST)')
        agent=" ".join(agent_regex.search(b).group(0).split())
        bots[bot]["agent"]=agent
In [7]:
bots[15046]
Out[7]:
{'agent': 'cheeringbot',
 'date': '',
 'ip': '162.243.17.139',
 'path': ' /yougotthis',
 'post': ''}

Ok, so things seem to be inside of a big list of dictionaries, lets find out something useful

In [8]:
from collections import Counter
In [10]:
ips=[]
agents=[]
passwords=[]
for bot in bots:
    ips.append(bot["ip"])
    agents.append(bot["agent"])
    if(bot["post"]!=""):
        try:
            passwords.append(bot["post"]["pwd"])
        except:
            x=1 #do nothing
#             print(bot["post"])

Lets see which IP visited the most

In [13]:
ip_count=Counter(ips)
ip_count.most_common(10)
top5000=ip_count.most_common(5000)
print("Most common ip is: "+ str(ip_count.most_common(1)))
Most common ip is: [('103.224.250.160', 20706)]

What about the User Agents

In [15]:
agent_count=Counter(agents)
agent_count.most_common(100)
Out[15]:
[('Mozilla/5.0', 82206),
 ('Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)', 29660),
 ('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
  18295),
 ('Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0', 10495),
 ('cheeringbot', 3700),
 ('Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)', 3222),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
  1423),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
  1272),
 ('Googlebot-Image/1.0', 1087),
 ('hourlybot', 676),
 ('LightspeedSystemsCrawler Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US',
  503),
 ('Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)',
  285),
 ('spambot', 200),
 ('undefined', 187),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 179),
 ('Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0', 124),
 ('Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
  114),
 ('Mozilla/5.0 (Linux; U; Android 2.2) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1',
  107),
 ('Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)', 101),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0.3 Safari/604.5.6',
  100),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
  94),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
  85),
 ('Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)', 85),
 ('facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
  69),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
  62),
 ('wonderingbot', 60),
 ('Python-urllib/2.7', 55),
 ('ZmEu', 54),
 ('Mozilla/5.0 zgrab/0.x', 47),
 ('Mozilla', 35),
 ('python-requests/2.18.4', 34),
 ('Mozilla/5.0 (Linux; Android 7.0) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Focus/4.0.2 Chrome/56.0.2924.87 Mobile Safari/537.36',
  28),
 ('Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
  25),
 ('Mozilla/5.0 (compatible; ips-agent)', 22),
 ('Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0', 18),
 ('Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
  18),
 ('test bot', 18),
 ('Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
  17),
 (', 212.109.17.68 Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  16),
 ('Mozilla/5.0 (Linux; U; Android 4.2.2; es-us; NYX_JOIN Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30',
  16),
 ('Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.21022; .NET CLR 3.5.30729; .NET CLR 3.0.30618)',
  16),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
  15),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
  15),
 ('feedingbot', 15),
 ('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
  14),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
  14),
 ('goodluckbot', 13),
 ('hellobot', 12),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1 Safari/605.1.15',
  12),
 (', 176.115.152.5 Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  11),
 ('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
  10),
 ('Go-http-client/1.1', 10),
 ('Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
  10),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
  9),
 ('Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; [email protected])', 9),
 ('Mozilla/5.0(WindowsNT6.1;rv:31.0)Gecko/20100101Firefox/31.0', 9),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:58.0) Gecko/20100101 Firefox/58.0',
  9),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36',
  9),
 ('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html',
  9),
 ('Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1', 8),
 ('Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) CriOS/65.0.3325.152 Mobile/15D100 Safari/604.1',
  8),
 ('Wget(linux)', 8),
 ('Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)',
  7),
 ('User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705',
  7),
 ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 Google Favicon',
  7),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31',
  7),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0',
  7),
 ('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.x.x) Gecko/20041107 Firefox/x.x',
  7),
 ('Cloud mapping experiment. Contact [email protected]', 6),
 ('python-requests/2.12.3', 6),
 ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
  6),
 ('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6',
  5),
 ('masscan/1.0 (https://github.com/robertdavidgraham/masscan)', 5),
 ('Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64)', 5),
 ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36',
  5),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
  5),
 ('Mozilla/7.0 (compatible; MSIE 8.0; Windows Seven)', 5),
 ('stealthbot', 5),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
  5),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0',
  4),
 ('curl/7.55.1', 4),
 (', 178.57.123.150 Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  4),
 ('Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0', 4),
 ('okhttp/3.5.0', 4),
 (', 79.134.3.25 Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  4),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
  4),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:59.0) Gecko/20100101 Firefox/59.0',
  4),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
  4),
 ('Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/65.0.3325.109 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/167.0.0.42.94;]',
  4),
 (', 94.139.100.110 Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  4),
 ('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us; rv:1.7.12) Gecko/20050919 Firefox/1.0.7',
  4),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
  4),
 (', 94.29.124.77 Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  4),
 (', 194.190.24.138 Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  4),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0',
  4),
 ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
  4),
 ('Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)', 4),
 ('Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)', 4),
 ('Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', 4),
 ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
  4)]

What about the most common passwords?

In [16]:
pass_count=Counter(passwords)
pass_count.most_common(25)
Out[16]:
[('admin1234', 8),
 ('admin123456', 8),
 ('abc123', 8),
 ('pass123', 7),
 ('passw0rd', 7),
 ('toor', 7),
 ('qwerty123', 7),
 ('123qwerty', 7),
 ('1234561', 7),
 ('ambulance', 7),
 ('hahaha', 7),
 ('leader', 7),
 ('skater', 7),
 ('sun', 7),
 ('pass1', 7),
 ('password01', 7),
 ('1313', 7),
 ('argent', 7),
 ('bridge', 7),
 ('Welcome1', 7),
 ('000', 6),
 ('0000', 6),
 ('00000000', 6),
 ('007007', 6),
 ('0123', 6)]

Lets look at the wordpress attacks

In [25]:
wp_bots=[]
for bot in bots:
    try:
        if(bot["post"]["rememberme"]=="forever"):
    #         wp_bots.append("ip: "+bot["ip"]+" agent: "+bot["agent"])
            wp_bots.append(bot["ip"])
    except:
        ignore=bot

len(wp_bots)
Out[25]:
5010
In [27]:
wp_count=Counter(wp_bots)
wp_count.most_common(20)
Out[27]:
[('95.79.33.65', 23),
 ('176.197.78.250', 22),
 ('93.157.168.25', 21),
 ('37.143.19.62', 20),
 ('46.61.152.185', 19),
 ('5.104.206.2', 19),
 ('185.59.58.52', 18),
 ('91.192.173.25', 18),
 ('185.13.112.104', 18),
 ('62.140.230.163', 17),
 ('109.172.30.247', 17),
 ('192.95.168.172', 16),
 ('93.170.177.233', 16),
 ('80.64.18.225', 16),
 ('185.81.248.18', 16),
 ('188.0.135.155', 16),
 ('176.120.211.202', 16),
 ('212.118.40.117', 16),
 ('46.35.243.27', 15),
 ('195.9.51.62', 15)]

Let's see how many unique ips

In [28]:
unique_wp=list(set(wp_bots))
print("WP attacks: "+str(len(wp_bots)))
print("WP Unique IPs: "+str(len(unique_wp)))
WP attacks: 5010
WP Unique IPs: 2110

This is really inreresting because out of the 5010 hacking attempts, they all came from 2110 differnt ip address. But the attacks are identicall and follow a particular order. Meaning that the attacker has access to over 2000 machines or some sort of vpn/tor to hide his real origin.

In [29]:
import pygeoip

IP locator, this is a library to find where the geo location of an ip might be. Not sure how accurate

In [30]:
def ipLocator(ip):
    GeoIPDatabase = 'GeoLiteCity.dat'
    ipData = pygeoip.GeoIP(GeoIPDatabase)
    record = ipData.record_by_name(ip)
#     print("The geolocation for IP Address %s is:" % ip)
#     print("Accurate Location: %s, %s, %s" % (record['city'], record['region_code'], record['country_name']))
#     print("General Location: %s" % (record['metro_code']))
    data=ipData.record_by_addr(ip)
#     print(data)
    return(data)
In [32]:
top10=wp_count.most_common(10)
top10[0]
Out[32]:
('95.79.33.65', 23)

Lets see where the top attacker comes form

In [35]:
ipLocator(top10[0][0])
Out[35]:
{'area_code': 0,
 'city': 'Nizhniy Novgorod',
 'continent': 'EU',
 'country_code': 'RU',
 'country_code3': 'RUS',
 'country_name': 'Russian Federation',
 'dma_code': 0,
 'latitude': 56.326899999999995,
 'longitude': 44.00749999999999,
 'metro_code': None,
 'postal_code': '603028',
 'region_code': '51',
 'time_zone': 'Europe/Moscow'}
In [36]:
locations=[]
for ip in top10:
    data=ipLocator(ip[0])
    lon=data["longitude"]
    lat=data["latitude"]
    url="https://www.google.com/maps/place/"+str(lat)+","+str(lon)
    loc=("("+str(lat)+","+str(lon)+")")
    locations.append(loc)
#     print(url)
#     !/usr/bin/open -a "/Applications/Google Chrome.app" {url}
top_ten_ip_locations="["+(",".join(locations))+"]"
# locat.append(json.loads(",".join(locations)))
In [38]:
key="Get your own key"
In [39]:
from ast import literal_eval
top_ten_ip_locations=literal_eval(top_ten_ip_locations)
type(top_ten_ip_locations)
Out[39]:
list
In [40]:
import gmaps
import gmaps.datasets
import gmaps.geojson_geometries
gmaps.configure(api_key=key)

marker_locations = top_ten_ip_locations

fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations)
fig.add_layer(markers)
fig

top 100

In [41]:
top100=wp_count.most_common(100)
locations=[]
for ip in top100:
    data=ipLocator(ip[0])
    lon=data["longitude"]
    lat=data["latitude"]
#     url="https://www.google.com/maps/place/"+str(lat)+","+str(lon)
    loc=("("+str(lat)+","+str(lon)+")")
    locations.append(loc)
#     !/usr/bin/open -a "/Applications/Google Chrome.app" {url}
top_100_ip_locations="["+(",".join(locations))+"]"
top_100_ip_locations=literal_eval(top_100_ip_locations)

marker_locations = top_100_ip_locations

fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations)
fig.add_layer(markers)
fig

The previous maps are useful but it would be cooler to show this as a heatmap

In [42]:
top100=wp_count.most_common(700)
locations=[]
visits=[]
for ip in top100:
    data=ipLocator(ip[0])
    try:
        lon=data["longitude"]
        lat=data["latitude"]
    #     url="https://www.google.com/maps/place/"+str(lat)+","+str(lon)
        loc=("("+str(lat)+","+str(lon)+")")
        locations.append(loc)
        visits.append(ip[1])
    except:
        do="nothing"
#     !/usr/bin/open -a "/Applications/Google Chrome.app" {url}
top_100_ip_locations="["+(",".join(locations))+"]"
top_100_ip_locations=literal_eval(top_100_ip_locations)

marker_locations = top_100_ip_locations

fig = gmaps.figure(map_type='SATELLITE',layout={'width': '960px', 'height': '560px'},center=(20,0),zoom_level=2)
fig.add_layer(gmaps.heatmap_layer(marker_locations,weights=visits,max_intensity=25,point_radius=10.0))
fig

Grouping by IPs

Bots coming with the same IP should be treated as one.

In [45]:
def checkIP(ip):
    for bot in range(0,len(unique_bots)):
        if unique_bots[bot]["ip"]== ip:
            return bot
        
    return False
In [47]:
unique_bots=[]
ip_regex=re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
wp_regex=re.compile(r'(?<=POST) {.*}')
path_regex=re.compile(r'(?<=Path:) .*')
time_regex=re.compile(r"((?<=Bot: false\ ) .*(?=GMT))|((?<=Bot: true) .*(?=GMT))")
for bot in range(0,len(botsText)):
    b=botsText[bot] #this bot
    ip=ip_regex.search(b).group(0)
    path=path_regex.findall(b)[0]
    wp_p=wp_regex.findall(b)
    time=time_regex.findall(b)[0][0]
    agent_regex=re.compile(r"(?<="+ip+r').*(?=POST)|(?<='+ip+r').*(?=Path)')
    agent=" ".join(agent_regex.search(b).group(0).split())
    
    botMatchIndex=checkIP(ip)
    if botMatchIndex is False:
        unique_bots.append({"ip":ip,"date":[time],"path":[path],"post":[],"agent":[agent]})
        #if post:
        if(len(wp_p)>0):
            try:
                p=json.loads(wp_p[0])
            except:
                p=wp_p[0]
#                 print(p)
            #Since we are just appending to uniqeBots, this bot has to be the last one we added, len(unique_bots)-1
            unique_bots[len(unique_bots)-1]["post"].append(p)
    else:
        unique_bots[botMatchIndex]["agent"].append(agent)
        unique_bots[botMatchIndex]["date"].append(time)
        unique_bots[botMatchIndex]["path"].append(path)
        if(len(wp_p)>0):
            try:
                p=json.loads(wp_p[0])
            except:
                p=wp_p[0]
#                 print(p)
            #Since we are just appending to uniqeBots, this bot has to be the last one we added, len(unique_bots)-1
            unique_bots[botMatchIndex]["post"].append(p)
In [48]:
print(len(unique_bots))
print(len(bots))
3499
155520

Testing a bot to see what it looks like

In [50]:
    unique_bots[123]
Out[50]:
{'agent': ['Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0',
  'Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0'],
 'date': ['  Wed Apr 04 2018 11:44:59 ',
  ' Wed Apr 04 2018 11:44:59 ',
  '  Tue Apr 17 2018 12:13:56 ',
  ' Tue Apr 17 2018 12:13:56 ',
  '  Thu Apr 19 2018 12:12:11 ',
  ' Thu Apr 19 2018 12:12:11 ',
  '  Mon Apr 23 2018 11:50:44 ',
  ' Mon Apr 23 2018 11:50:44 ',
  '  Mon Apr 23 2018 14:50:43 ',
  ' Mon Apr 23 2018 14:50:44 ',
  '  Fri Apr 27 2018 14:49:58 ',
  ''],
 'ip': '194.135.211.170',
 'path': [' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php',
  ' /wp-login.php'],
 'post': [{'log': 'symbiosis',
   'pwd': '333',
   'redirect_to': 'wp-login-error.html',
   'rememberme': 'forever',
   'submit': 'Login'},
  {'log': 'administrator',
   'pwd': 'tarzan',
   'redirect_to': 'wp-login-error.html',
   'rememberme': 'forever',
   'submit': 'Login'},
  {'log': 'fitnessweightwarrior',
   'pwd': '******',
   'redirect_to': 'wp-login-error.html',
   'rememberme': 'forever',
   'submit': 'Login'},
  {'log': 'admin',
   'pwd': 'pass12',
   'redirect_to': 'wp-login-error.html',
   'rememberme': 'forever',
   'submit': 'Login'},
  {'log': 'administrator',
   'pwd': 'xxx',
   'redirect_to': 'wp-login-error.html',
   'rememberme': 'forever',
   'submit': 'Login'},
  {'log': 'symbiosis',
   'pwd': 'mud',
   'redirect_to': 'wp-login-error.html',
   'rememberme': 'forever',
   'submit': 'Login'}]}
In [51]:
def bot_by_ip(ip):
    for bot in unique_bots:
        if(bot["ip"]==ip):
            return bot
    return ("ip not found")
        
        
In [52]:
top10
Out[52]:
[('95.79.33.65', 23),
 ('176.197.78.250', 22),
 ('93.157.168.25', 21),
 ('37.143.19.62', 20),
 ('46.61.152.185', 19),
 ('5.104.206.2', 19),
 ('185.59.58.52', 18),
 ('91.192.173.25', 18),
 ('185.13.112.104', 18),
 ('62.140.230.163', 17)]

Lets take a look by individual bot

In [53]:
pwd=[]
user=[]
p=bot_by_ip(top10[0][0])["post"]
for attempt in p:
    pwd.append(attempt["pwd"])
    user.append(attempt["log"])

passwords tried by top 1 bot:

In [54]:
pwd
Out[54]:
['happy1',
 '11223344',
 '31337',
 '43214321',
 'adam',
 'administrator12345',
 'diablo',
 'invalid',
 'Kristy',
 'monitor',
 'lier',
 'N/A',
 'she',
 'slash',
 'sniper',
 'michael',
 'zyxel',
 'maxima',
 'lutte',
 'mirror',
 'rosebud',
 'team',
 'smooth']
In [55]:
user
Out[55]:
['administrator',
 'symbiosis',
 'administrator',
 'admin',
 'fitnessweightwarrior',
 'admin',
 'admin',
 'admin',
 'admin',
 'administrator',
 'admin',
 'fitnessweightwarrior',
 'admin',
 'admin',
 'fitnessweightwarrior',
 'admin',
 'administrator',
 'admin',
 'admin',
 'administrator',
 'administrator',
 'admin',
 'fitnessweightwarrior']
In [56]:
bot_by_ip(top100[0][0])["date"]
Out[56]:
['  Fri Apr 06 2018 07:09:52 ',
 ' Fri Apr 06 2018 07:09:52 ',
 '  Sat Apr 07 2018 04:22:08 ',
 ' Sat Apr 07 2018 04:22:08 ',
 '  Sun Apr 08 2018 22:30:28 ',
 ' Sun Apr 08 2018 22:30:28 ',
 '  Mon Apr 09 2018 03:47:56 ',
 ' Mon Apr 09 2018 03:47:56 ',
 '  Mon Apr 09 2018 13:48:47 ',
 ' Mon Apr 09 2018 13:48:47 ',
 '  Mon Apr 09 2018 15:51:46 ',
 ' Mon Apr 09 2018 15:51:46 ',
 '  Wed Apr 11 2018 18:17:02 ',
 ' Wed Apr 11 2018 18:17:03 ',
 '  Fri Apr 13 2018 12:33:16 ',
 ' Fri Apr 13 2018 12:33:16 ',
 '  Fri Apr 13 2018 18:25:41 ',
 ' Fri Apr 13 2018 18:25:41 ',
 '  Fri Apr 13 2018 23:02:05 ',
 ' Fri Apr 13 2018 23:02:05 ',
 '  Sat Apr 14 2018 03:06:21 ',
 ' Sat Apr 14 2018 03:06:21 ',
 '  Sun Apr 15 2018 01:21:12 ',
 ' Sun Apr 15 2018 01:21:12 ',
 '  Mon Apr 16 2018 19:18:34 ',
 ' Mon Apr 16 2018 19:18:34 ',
 '  Tue Apr 17 2018 09:00:11 ',
 ' Tue Apr 17 2018 09:00:11 ',
 '  Tue Apr 17 2018 09:41:50 ',
 ' Tue Apr 17 2018 09:41:50 ',
 '  Wed Apr 18 2018 11:36:58 ',
 ' Wed Apr 18 2018 11:36:59 ',
 '  Fri Apr 20 2018 22:10:36 ',
 '  Sat Apr 21 2018 21:07:49 ',
 '  Sun Apr 22 2018 01:46:24 ',
 '  Mon Apr 23 2018 03:08:40 ',
 ' Mon Apr 23 2018 03:08:40 ',
 '  Fri Apr 27 2018 11:43:51 ',
 '',
 '  Fri Apr 27 2018 14:04:39 ',
 '',
 '  Sat Apr 28 2018 00:15:48 ',
 '',
 '  Sat Apr 28 2018 13:46:36 ',
 '',
 '  Sun Apr 29 2018 14:53:11 ',
 '',
 '  Sun Apr 29 2018 17:20:58 ',
 '']

POEM?

In [63]:
import random
In [64]:
r_bot=random.choice(top100)[0]
data=bot_by_ip(r_bot)
date=data["date"][0].strip().split(" ")


print("Dear "+r_bot+",\n")
print("I saw you for the first time back in "+ date[1]+" "+date[2]+", it was a "+date[0])
print("It was "+date[4])
print("")
print("You were looking for " +data["post"][0]["pwd"])
print("")
print("")
print("You asked me to remember you " +data["post"][0]["rememberme"])
print("")
print("I told you:")
print("\""+data["post"][0]["rememberme"]+" is a long time, come back and I might\"")
print("")
print("")
print("")

print("and you did:")
for i in range(0,5):
    print("")
    
prev_date=date
for i in range (0, len(data["date"])):
    date=data["date"][i].strip().split(" ")
    if(date[0]==prev_date[0] and date[1]==prev_date[1] and date[2]==prev_date[2]):
        print(" and again that same night")
    else:
        try:
            print("again in "+date[0]+" "+date[1]+" "+date[2])
        except:
            a=0
    prev_date=date
    i=i+1
print("")


user=[]
pwd=[]
for i in range (0, len(data["post"])):
    user.append(data["post"][i]["log"])
    pwd.append(data["post"][i]["pwd"])
    
user=", my ".join(user)
pwd=" for you, ".join(pwd)
print("I want you to be my "+user )
print("I have "+pwd)

for i in range(0,10):
    print("")
print("I wonder who you are")
print("I wonder where you are")
print(r_bot+" will you come back?")
for i in range(0,10):
    print("")
cute_name=r_bot.split(".")[3]
print(r_bot+" can I call you "+cute_name)
for i in range(0,10):
    print("")
who_cares=random.choice(top10)


print(who_cares[0]+" keeps comming in your absense.")
print(str(who_cares[1])+" times so far.")

for i in range(0,10):
    print("")
    
print(cute_name+" will you come back after visiting all those other servers??")
for i in range(0,10):
    print("")

print(cute_name+" don't make me come find you")
for i in range(0,30):
    print("")
print(cute_name+" Please understand, I tried to resist")
for i in range(0,30):
    print("")
bot_loc=ipLocator(r_bot)
lon=bot_loc["longitude"]
lat=bot_loc["latitude"]
loc=[(lat,lon)]
fig = gmaps.figure()
markers = gmaps.marker_layer(loc)
fig.add_layer(markers)
fig
Dear 145.255.11.228,

I saw you for the first time back in Apr 07, it was a Sat
It was 09:33:13

You were looking for 1232


You asked me to remember you forever

I told you:
"forever is a long time, come back and I might"



and you did:





 and again that same night
 and again that same night
again in Sun Apr 08
 and again that same night

I want you to be my admin, my admin
I have 1232 for you, 1qazxsw2










I wonder who you are
I wonder where you are
145.255.11.228 will you come back?










145.255.11.228 can I call you 228










5.104.206.2 keeps comming in your absense.
19 times so far.










228 will you come back after visiting all those other servers??










228 don't make me come find you






























228 Please understand, I tried to resist






























POEM 2

In [ ]:
wp_count=Counter(wp_bots)
top500=wp_count.most_common(150)
bot=random.choice(top500)
unique_wp=list(set(wp_bots))

print(bot)


locations=[]
data=ipLocator(bot[0])
lon=data["longitude"]
lat=data["latitude"]
url="https://www.google.com/maps/place/"+str(lat)+","+str(lon)
loc=("("+str(lat)+","+str(lon)+")")
locations.append(loc)
    # webbrowser.open(url,new=1)
#     print(url)
    # call["/usr/bin/open -a "/Applications/Google Chrome.app" {url}
    # !/usr/bin/open -a "/Applications/Google Chrome.app" {url}
top_ten_ip_locations="["+(",".join(locations))+"]"
# locat.append(json.loads(",".join(locations)))


cont=input("Welcome back\nWhat would you like to do tonight?\n")
# print("you said: "+str(cont))
# time.sleep(1)
cont=input("yes\n")
cont=input("Are you sure?\n")
# time.sleep(1)
# pswd = getpass.getpass('Please confirm you want to do this: ')
# time.sleep(1)

cont=input("It was here ")
# print(bot_by_ip(bot[0])["date"])
bot_data=bot_by_ip(bot[0])
# print(bot_data)
dates=bot_data["date"]
print(dates[len(dates)-2])

cont=input("")
print("yes")
cont=input("")
print("It wanted to get access, it tried:")
attempt=bot_data["post"]
print("User: "+attempt[len(attempt)-1]["log"]+" Password: "+attempt[len(attempt)-1]["pwd"]+", it asked you to remember it forever")
cont=input("")
print("What do you want me to say?")
# print (pswd)


r_bot=bot[0]
data=bot_by_ip(r_bot)
date=data["date"][0].strip().split(" ")
for i in range(0,30):
    print()

print("Dear "+r_bot+",\n")
print("I saw you for the first time back in "+ date[1]+" "+date[2]+", it was a "+date[0])
print("It was "+date[4])
print("")
print("You were looking for " +data["post"][0]["pwd"])
print("")
print("")
print("You asked me to remember you " +data["post"][0]["rememberme"])
print("")
print("I told you:")
print("\""+data["post"][0]["rememberme"]+" is a long time, come back and I might\"")
print("")
print("")
print("")

print("and you did:")
for i in range(0,5):
    print("")
    
prev_date=date
for i in range (0, len(data["date"])):
    date=data["date"][i].strip().split(" ")
    if(date[0]==prev_date[0] and date[1]==prev_date[1] and date[2]==prev_date[2]):
        print(" and again that same night")
    else:
        try:
            print("again in "+date[0]+" "+date[1]+" "+date[2])
        except:
            a=0
    prev_date=date
    i=i+1
print("")


user=[]
pwd=[]
for i in range (0, len(data["post"])):
    user.append(data["post"][i]["log"])
    pwd.append(data["post"][i]["pwd"])
    
user=", my ".join(user)
pwd=" for you, ".join(pwd)
print("I want you to be my "+user )
print("I have "+pwd)

for i in range(0,10):
    print("")
print("I wonder who you are")
print("I wonder where you are")
print(r_bot+" will you come back?")
for i in range(0,10):
    print("")
cute_name=r_bot.split(".")[3]
print(r_bot+" can I call you "+cute_name)
for i in range(0,10):
    print("")
who_cares=random.choice(top10)


print(who_cares[0]+" keeps comming in your absense.")
print(str(who_cares[1])+" times so far.")

for i in range(0,10):
    print("")
    
print(cute_name+" what are you looking for?")
print(cute_name+" who are you looking for?")

for i in range(0,10):
    print("")
print(cute_name+" will you come back?")
for i in range(0,10):
    print("")
print(cute_name+" will you please come back?")
for i in range(0,10):
    print("")


print(cute_name+" don't make me come find you")
for i in range(0,30):
    print("")
print(cute_name+" Please understand, I tried to resist")
for i in range(0,30):
    print("")
bot_loc=ipLocator(r_bot)
lon=bot_loc["longitude"]
lat=bot_loc["latitude"]
loc=[(lat,lon)]

print(cute_name+", I'll be here waiting")

#uncomment to open on a new browser window
# webbrowser.open(url,new=1)
('94.76.125.54', 11)
In [ ]: