import pandas as pd
import numpy as np
import string
import pylab
import re
import pandas
import time
import os
import collections
import matplotlib
import struct
import socket
import json
from datetime import datetime
from netaddr import IPNetwork, IPAddress
%matplotlib inline
print pd.__version__
pylab.rcParams['figure.figsize'] = (16.0, 5.0)
0.13.0rc1-32-g81053f9
# Mapping of fields of the files we want to read in and initial setup of pandas dataframes
logs_to_process = {
'conn.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','proto','service','duration','orig_bytes','resp_bytes','conn_state','local_orig','missed_bytes','history','orig_pkts','orig_ip_bytes','resp_pkts','resp_ip_bytes','tunnel_parents','threat','sample'],
'dns.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','proto','trans_id','query','qclass','qclass_name','qtype','qtype_name','rcode','rcode_name','AA','TC','RD','RA','Z','answers','TTLs','rejected','threat','sample'],
'files.log' : ['ts','fuid','tx_hosts','rx_hosts','conn_uids','source','depth','analyzers','mime_type','filename','duration','local_orig','is_orig','seen_bytes','total_bytes','missing_bytes','overflow_bytes','timedout','parent_fuid','md5','sha1','sha256','extracted','threat','sample'],
'ftp.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','user','password','command','arg','mime_type','file_size','reply_code','reply_msg','data_channel.passive','data_channel.orig_h','data_channel.resp_h','data_channel.resp_p','fuid','threat','sample'],
'http.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','trans_depth','method','host','uri','referrer','user_agent','request_body_len','response_body_len','status_code','status_msg','info_code','info_msg','filename','tags','username','password','proxied','orig_fuids','orig_mime_types','resp_fuids','resp_mime_types','threat','sample'],
'notice.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','fuid','file_mime_type','file_desc','proto','note','msg','sub','src','dst','p','n','peer_descr','actions','suppress_for','dropped','remote_location.country_code','remote_location.region','remote_location.city','remote_location.latitude','remote_location.longitude','threat','sample'],
'signatures.log' : ['ts','src_addr','src_port','dst_addr','dst_port','note','sig_id','event_msg','sub_msg','sig_count','host_count','threat','sample'],
'smtp.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','trans_depth','helo','mailfrom','rcptto','date','from','to','reply_to','msg_id','in_reply_to','subject','x_originating_ip','first_received','second_received','last_reply','path','user_agent','fuids','is_webmail','threat','sample'],
'ssl.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','version','cipher','server_name','session_id','subject','issuer_subject','not_valid_before','not_valid_after','last_alert','client_subject','client_issuer_subject','cert_hash','validation_status','threat','sample'],
'tunnel.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','tunnel_type','action','threat','sample'],
'weird.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','name','addl','notice','peer','threat','sample']
}
conndf = pd.DataFrame(columns=logs_to_process['conn.log'])
dnsdf = pd.DataFrame(columns=logs_to_process['dns.log'])
filesdf = pd.DataFrame(columns=logs_to_process['files.log'])
ftpdf = pd.DataFrame(columns=logs_to_process['ftp.log'])
httpdf = pd.DataFrame(columns=logs_to_process['http.log'])
noticedf = pd.DataFrame(columns=logs_to_process['notice.log'])
sigdf = pd.DataFrame(columns=logs_to_process['signatures.log'])
smtpdf = pd.DataFrame(columns=logs_to_process['smtp.log'])
ssldf = pd.DataFrame(columns=logs_to_process['ssl.log'])
tunneldf = pd.DataFrame(columns=logs_to_process['tunnel.log'])
weirddf = pd.DataFrame(columns=logs_to_process['weird.log'])
# Process the directory structure
# If you download the complete PCAP zip from Contagio and unzip a structure like:
# PCAPS_TRAFFIC_PATTERNS
# |->CRIME
# |-> <sample>
# |->APT
# |-> <sample>
# |->METASPLOIT
# |-> <sample>
#
# Will appear and this is the structure that's walk CRIME/APT/METASPLOIT will make their way into the "threat" tag
# while the sample/PCAP name will wind up in "sample"
#
# Bro data generated via the "run_bro.sh" shell script (this places all Bro output in the respective sample directories and
# contributes to the directory structure above
for dirName, subdirList, fileList in os.walk('.'):
#print('Found directory: %s' % dirName)
for fname in fileList:
tags = dirName.split('/')
if len(tags) == 4 and fname in logs_to_process:
#print ('%s/%s' %(dirName, fname))
logname = fname.split('.')
try:
tempdf = pd.read_csv(dirName+'/'+fname, sep='\t',skiprows=8, header=None,
names=logs_to_process[fname][:-2], skipfooter=1)
tempdf['threat'] = tags[2]
tempdf['sample'] = tags[3]
if tags[2] == "0":
print ('%s/%s' %(dirName, fname))
if fname == 'conn.log':
conndf = conndf.append(tempdf)
if fname == 'dns.log':
dnsdf = dnsdf.append(tempdf)
if fname == 'files.log':
filesdf = filesdf.append(tempdf)
if fname == 'ftp.log':
ftpdf = ftpdf.append(tempdf)
if fname == 'http.log':
httpdf = httpdf.append(tempdf)
if fname == 'notice.log':
noticedf = noticedf.append(tempdf)
if fname == 'signatures.log':
sigdf = sigdf.append(tempdf)
if fname == 'smtp.log':
smtpdf = smtpdf.append(tempdf)
if fname == 'ssl.log':
ssldf = ssldf.append(tempdf)
if fname == 'tunnel.log':
tunneldf = tunneldf.append(tempdf)
if fname == 'weird.log':
weirddf = weirddf.append(tempdf)
except Exception as e:
print "[*] error: %s, on %s/%s" % (str(e), dirName, fname)
# Read in and configure the maxmind db (free ASN)
maxmind = pd.read_csv("./GeoIPASNum2.csv", sep=',', header=None, names=['low','high','asn'])
maxmind['low'] = maxmind['low'].astype(int)
maxmind['high'] = maxmind['high'].astype(int)
# Helper Functions
def ip2int(addr):
try:
return struct.unpack("!I", socket.inet_aton(addr))[0]
except Exception as e:
pass
#print "Error: %s - %s" % (str(e), addr)
return 0
maxcache = {}
def maxmind_lookup(ip):
if ip in maxcache:
return maxcache[ip]
i = ip2int(ip)
if i == 0:
return "UNKNOWN"
results = list(maxmind.loc[(maxmind["low"] < i) & (maxmind['high'] > i)]['asn'])
if len(results) > 0:
maxcache[ip] = results[0]
return results[0]
maxcache[ip] = "UNKNOWN"
return "UNKNOWN"
def box_plot_df_setup(series_a, series_b):
# Count up all the times that a category from series_a
# matches up with a category from series_b. This is
# basically a gigantic contingency table
cont_table = collections.defaultdict(lambda : collections.Counter())
for val_a, val_b in zip(series_a.values, series_b.values):
cont_table[val_a][val_b] += 1
# Create a dataframe
# A dataframe with keys from series_a as the index, series_b_keys
# as the columns and the counts as the values.
dataframe = pd.DataFrame(cont_table.values(), index=cont_table.keys())
dataframe.fillna(0, inplace=True)
return dataframe
def is_ip(ip):
try:
socket.inet_aton(ip)
return True
except socket.error:
return False
# misc cleanup of the Bro conn.log dataframe
try:
conndf.orig_bytes[conndf.orig_bytes == '-'] = 0
except Exception as e:
pass
try:
conndf.resp_bytes[conndf.resp_bytes == '-'] = 0
except Exception as e:
pass
conndf['orig_bytes'] = conndf['orig_bytes'].astype(long)
conndf['resp_bytes'] = conndf['resp_bytes'].astype(long)
conndf['total_bytes'] = conndf['orig_bytes'] + conndf['resp_bytes']
# and augmentation (asn)
conndf['maxmind_asn'] = conndf['id.resp_h'].map(maxmind_lookup)
# add date
good_datetime = [datetime.fromtimestamp(float(date)) for date in conndf['ts'].values]
conndf['date'] = pd.Series(good_datetime, index=conndf.index)
# reindex the dataframes
conndf = conndf.reindex()
httpdf = httpdf.reindex()
dnsdf = dnsdf.reindex()
noticedf = noticedf.reindex()
filesdf = filesdf.reindex()
smtpdf = smtpdf.reindex()
for threat in ['APT', 'CRIME']:
subset = conndf[conndf['threat'] == threat][['date','sample']]
subset['count'] = 1
pivot = pd.pivot_table(subset, values='count', rows=['date'], cols=['sample'], fill_value=0)
by = lambda x: lambda y: getattr(y, x)
grouped = pivot.groupby([by('year'),by('month')]).sum()
ax = grouped.plot()
pylab.ylabel('Connections')
pylab.xlabel('Date Recorded')
patches, labels = ax.get_legend_handles_labels()
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2, title="Sample Name")
Here we can just get a feeling of the various high-level dimensions of the data
print "Total Samples: %s" % conndf['sample'].nunique()
print ""
print "APT Samples: %s" % conndf[conndf['threat'] == 'APT']['sample'].nunique()
print "Crime Samples: %s" % conndf[conndf['threat'] == 'CRIME']['sample'].nunique()
print "Metasploit Samples: %s" % conndf[conndf['threat'] == 'METASPLOIT']['sample'].nunique()
print ""
print "Connection Log Entries: %s" % conndf.shape[0]
print "DNS Log Entries: %s" % dnsdf.shape[0]
print "HTTP Log Entries: %s" % httpdf.shape[0]
print "Files Log Entries: %s" % filesdf.shape[0]
print "SMTP Log Entries: %s" % smtpdf.shape[0]
print "Weird Log Entries: %s" % weirddf.shape[0]
print "SSL Log Entries: %s" % ssldf.shape[0]
print "Notice Log Entries: %s" % noticedf.shape[0]
print "Tunnel Log Entries: %s" % tunneldf.shape[0]
print "Signature Log Entries: %s" % sigdf.shape[0]
Total Samples: 105 APT Samples: 31 Crime Samples: 68 Metasploit Samples: 6 Connection Log Entries: 104413 DNS Log Entries: 108371 HTTP Log Entries: 22927 Files Log Entries: 19289 SMTP Log Entries: 4088 Weird Log Entries: 1081 SSL Log Entries: 351 Notice Log Entries: 252 Tunnel Log Entries: 2 Signature Log Entries: 1
This is an example of how to go from an alert (in this case a Bro signature match) to gathering information about the alert via the other data in just a few lines of code.
We'll want to
# Get all the destination addresses from all the signature hits, in this case it's only one.
sig_dst_ips = sigdf['dst_addr'].tolist()
sigdf[['dst_addr', 'dst_port','sig_id','sub_msg','threat','sample']]
dst_addr | dst_port | sig_id | sub_msg | threat | sample | |
---|---|---|---|---|---|---|
0 | 199.192.156.134 | 443 | windows_reverse_shell | POST /bbs/info.asp HTTP/1.1^M^JHost: 199.192.1... | APT | Mswab_Yayih_FD1BE09E499E8E380424B3835FC973A8_2... |
1 rows × 6 columns
# Let's see what other information we can gather about the network sessions surrounding that signature
for ip in sig_dst_ips:
print "**** IP: %s ****" %ip
print " ** Flow Information **"
print conndf[conndf['id.resp_h'] == ip][['id.resp_p','proto','service','duration','conn_state','orig_ip_bytes','resp_ip_bytes']]
print " ** HTTP Information **"
print httpdf[httpdf['id.resp_h'] == ip][['method','host','uri','user_agent']]
files = httpdf[httpdf['id.resp_h'] == ip]['orig_fuids']
flist = files.append(httpdf[httpdf['id.resp_h'] == ip]['resp_fuids']).tolist()
# We use SHA1 because that's what gets tossed in the Bro notice.log for the Team Cymru MHR alerts
print " ** File SHA1 **"
for f in flist:
if f != '-':
sha1 = filesdf[filesdf['fuid'] == f]['sha1'].tolist()
for m in sha1:
print "Sample Hash: %s" % m
if noticedf[noticedf['sub'].str.contains(m)][['sub','sample']].shape[0] > 0:
print noticedf[noticedf['sub'].str.contains(m)][['sub','sample']]
print "Filename: %s mime-type: %s" % (filesdf[filesdf['sha1'] == m]['filename'].tolist()[0], filesdf[filesdf['sha1'] == m]['mime_type'].tolist()[0])
print ""
#print md5
**** IP: 199.192.156.134 **** ** Flow Information ** id.resp_p proto service duration conn_state orig_ip_bytes resp_ip_bytes 4 443 tcp http 110.4259 SF 436 346 5 443 tcp http 7.229701 SF 501 360 6 443 tcp http 0.750663 SF 377 333 7 443 tcp http 10.36404 SF 517 361 8 443 tcp http 0.698435 RSTO 378 293 9 443 tcp - 8.462028 SF 1682 392 10 443 tcp http 69.32855 RSTO 363 306 [7 rows x 7 columns] ** HTTP Information ** method host uri user_agent 0 POST 199.192.156.134 /bbs/info.asp - 1 POST 199.192.156.134 /bbs/info.asp - 2 POST 199.192.156.134 /bbs/info.asp - 3 POST 199.192.156.134 /bbs/info.asp - 4 POST 199.192.156.134 /bbs/info.asp - 5 POST 199.192.156.134 /bbs/info.asp - [6 rows x 4 columns] ** File SHA1 ** Sample Hash: b0122bc7cedf885857e61764806ada3cf40c0934 Filename: - mime-type: binary Sample Hash: 6dac3f9397e89d9a5db495f78f66007533f59fcd Filename: - mime-type: binary Sample Hash: dfc7310ffd971d36fe1d84c1b6eba0c0025cc4db Filename: - mime-type: binary Sample Hash: 2c7a0c7577bfefd2cc52677671a423de81328876 Filename: - mime-type: binary Sample Hash: bd201fad1039536e84ce0019a03db6fa2f52f4d3 Filename: - mime-type: binary Sample Hash: c21f5b7fa4023f68861c11ec4b8fea98595ea8f6 Filename: - mime-type: binary Sample Hash: d01e7d3c2358aea02008176834b5e9f37404a7bc Filename: - mime-type: binary Sample Hash: c83da355ebfc8f842c45216621062719394ff106 Filename: - mime-type: binary Sample Hash: abbbfc0234d408498b344bcd899ae5f4081e6da6 Filename: - mime-type: binary Sample Hash: 5bbf8274b821c61c578b6d3fe9578b6a169767be Filename: - mime-type: binary
print dnsdf.qtype_name.value_counts()
NB 51487 A 42021 MX 10990 - 3825 PTR 16 * 14 SRV 7 NS 6 AAAA 5 dtype: int64
for q in dnsdf['qtype_name'].unique().tolist():
print "Query Type: %s" % q
print dnsdf[dnsdf['qtype_name'] == q]['query'].value_counts().head(5)
print ""
Query Type: A star-trakers.com 482 oiexgmycrtwirsgcmv.com 308 tthayebvhdmntiyeuxw.com 261 google.com 246 ymcwineqkj.com 241 dtype: int64 Query Type: - - 3825 dtype: int64 Query Type: NB NOLOGO1093.COM 6544 NOLOGO0094.NET 6533 FIBLOLPP.COM 725 XNUQKDWEK.COM 725 UWSCTPIHLT.COM 725 dtype: int64 Query Type: NS de 2 org 2 com 2 dtype: int64 Query Type: PTR 221.107.164.128.in-addr.arpa.localdomain 4 221.107.164.128.in-addr.arpa 4 lb._dns-sd._udp.0.0.29.172.in-addr.arpa 1 lb._dns-sd._udp.va.comcast.net 1 db._dns-sd._udp.va.comcast.net 1 dtype: int64 Query Type: SRV *\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 4 orion [00:50:56:c0:00:08]._workstation._tcp.local 3 dtype: int64 Query Type: MX cpmwc.com 2864 fluidsystemsbots.com 1261 webworkz.com 748 alltel.net 678 fast-solutions.net 426 dtype: int64 Query Type: * xps-8300 10 revetonforever 4 dtype: int64 Query Type: AAAA time.windows.com 2 mfodjf393843218.us 2 javadl-esd-secure.oracle.com 1 dtype: int64
dnsdf['rcode_name'].value_counts()
- 62842 NXDOMAIN 24067 NOERROR 21003 REFUSED 311 SERVFAIL 146 NOTAUTH 1 NOTZONE 1 dtype: int64
Want to take a guess at which one(s) possibly use a DGA to connect/find to C2 domains?
dnsdf[dnsdf['rcode_name'] == 'NXDOMAIN']['sample'].value_counts().head(10)
BIN_Ramnitpcap_2012-01 20651 BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321BF84B 1711 BIN_Wordpress_Mutopy_Symmi_20A6EBF61243B760DD65F897236B6AD3-ShortRun 432 cryptolocker_9CBB128E8211A7CD00729C159815CB1C 258 BIN_Cutwail-Pushdo(2)_582DE032477E099EB1024D84C73E98C1 236 BIN_CitadelPacked_2012-05 223 BIN_torpigminiloader_011C1CA6030EE091CE7C20CD3AAECFA0 181 purplehaze 131 BIN_Cutwail-Pushdo(1)_582DE032477E099EB1024D84C73E98C1 128 BIN_torpigminiloader_C3366B6006ACC1F8DF875EAA114796F0 28 dtype: int64
Generally in HTTP traffic we expect to see a value in the 'Host' header. This indicates the virtual host that the client is connecting to on a given IP address. It could be interesting to see what hostnames are present in the HTTP 'Host' header, yet no DNS query was seen. Keep in mind this could be as simple as it wasn't recorded/included in the PCAP, or that it really didn't happen.
intersect_hostnames = set(pd.Series(list(set(httpdf['host']).intersection(set(dnsdf['query'])))))
interesting = []
tempdf = pd.DataFrame()
for hn in list(set(httpdf['host'])):
if hn not in intersect_hostnames and not is_ip(hn):
#print hn
interesting.append(hn)
tempdf = tempdf.append(httpdf[httpdf['host'] == hn])
tempdf['count'] = 1
tempdf[['host', 'id.resp_h', 'sample', 'count']].groupby(['sample', 'host', 'id.resp_h']).sum().sort('count', ascending=0)
count | |||
---|---|---|---|
sample | host | id.resp_h | |
purplehaze | insideentrepreneurs.com | 209.114.50.164 | 20 |
BIN_ZeroAccess_Sirefef_29A35124ABEAD63CD8DB2BBB469CBC7A_2013-05 | www.e-zeeinternet.com | 209.68.32.176 | 9 |
EK_popads_109.236.80.170_2013-08-13 | tqhsy.8taglik.info | 109.236.80.170 | 8 |
EK_BIN_Blackhole_leadingto_Medfos_0512E73000BCCCE5AFD2E9329972208A_2013-04 | autorepairgreeley.info | 198.100.45.44 | 7 |
EK_Smokekt150(Malwaredontneedcoffee)_2012-09 | bigfatcounters.com | 213.108.252.185 | 6 |
BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F955F991940_2013-08 | dgyqimolcqm.cm | 31.184.244.182 | 5 |
81.17.26.187 | 4 | ||
EK_popads_109.236.80.170_2013-08-13 | qkvuz.12taglik.info | 109.236.80.170 | 4 |
xrp.8taglik.info | 109.236.80.170 | 3 | |
EK_Smokekt150(Malwaredontneedcoffee)_2012-09 | LODKDKD12.INFO | 62.76.188.226 | 3 |
BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F955F991940_2013-08 | dgyqimolcqm.cm | 81.17.18.18 | 3 |
BIN_ZeroAccess_3169969E91F5FE5446909BBAB6E14D5D_2012-10 | izhsuqbtcsx.cm | 31.184.244.180 | 2 |
RealPlayer_rmoc3260.dll_ActiveX_Control_Remote_Code_Execution_Exploit | freak | 192.168.0.15 | 1 |
BIN_Wordpress_Mutopy_Symmi_20A6EBF61243B760DD65F897236B6AD3-ShortRun | VARNAJALAMARTS.com | 198.154.237.48 | 1 |
iMesh_7.1.0.x(IMWeb.dll_7.0.0.x)_Remote_Heap_Overflow_Exploit | freak | 192.168.0.15 | 1 |
BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F955F991940_2013-08 | SVRIntl-crl.verisign.com | 23.4.181.163 | 1 |
Yahoo_Music_Jukebox_2.2-AddImage()_ActiveX_Remote_BOF_Exploit(2) | freak | 192.168.0.15 | 1 |
BIN_sality_CEAF4D9E1F408299144E75D7F29C1810 | livelife-eg.com | 97.74.182.1 | 1 |
NUVICO_DVR_NVDV4__PdvrAtl_Module_(PdvrAt.DLL_1.0.1.25)_BoF_Exploit | freak | 192.168.0.15 | 1 |
purplehaze | d.pixel.trafficmp.com | 107.20.175.29 | 1 |
EK_Smokekt150(Malwaredontneedcoffee)_2012-09 | delivery.trafficbroker.com | 192.168.186.6 | 1 |
Sejoong_Namo_ActiveSquare_6_NamoInstaller.dll-ActiveX_BoF_Exploit | freak | 192.168.0.15 | 1 |
Microsoft_SQL_Server_Distributed_Management_Objects_BoF_Exploit | freak | 192.168.0.15 | 1 |
BIN_DNSWatch_protux_4F8A44EF66384CCFAB737C8D7ADB4BB8_2012-11 | vcvcvcvc.dyndns.org | 114.244.44.115 | 1 |
24 rows × 1 columns
From the looks of the above, it seems that for some of the samples DNS traffic wasn't logged vs. malware doing something tricky. We can verify (below) that there really doesn't appear to be any DNS traffic related to one of the domains.
print dnsdf[dnsdf['query'] == "dgyqimolcqm.cm"]
print dnsdf[dnsdf.answers.str.contains('dgyqimolcqm.cm')]
print dnsdf[dnsdf['sample'] == "BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9"]['query'].value_counts().head(50)
Empty DataFrame Columns: [ts, uid, id.orig_h, id.orig_p, id.resp_h, id.resp_p, proto, trans_id, query, qclass, qclass_name, qtype, qtype_name, rcode, rcode_name, AA, TC, RD, RA, Z, answers, TTLs, rejected, threat, sample] Index: [] [0 rows x 25 columns] Empty DataFrame Columns: [ts, uid, id.orig_h, id.orig_p, id.resp_h, id.resp_p, proto, trans_id, query, qclass, qclass_name, qtype, qtype_name, rcode, rcode_name, AA, TC, RD, RA, Z, answers, TTLs, rejected, threat, sample] Index: [] [0 rows x 25 columns] Series([], dtype: int64)
Well, at least there's always HTTP traffic to look at for the above domain.
httpdf[httpdf['host'] == "dgyqimolcqm.cm"][['id.orig_h','id.orig_p','id.resp_h','id.resp_p','uri','sample','threat']]
id.orig_h | id.orig_p | id.resp_h | id.resp_p | uri | sample | threat | |
---|---|---|---|---|---|---|---|
2 | 192.168.248.165 | 1138 | 81.17.26.187 | 80 | /X11HXlhHWF1bR1hbWUZcXA8KCloKW19QCF0NDF8LXlpZC... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
4 | 192.168.248.165 | 1143 | 81.17.26.187 | 80 | /X1xHXVBHW1pHWF1fRlxcDwoKWgpbX1AIXQ0MXwteWlkLD... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
42 | 192.168.248.165 | 1146 | 81.17.26.187 | 80 | /WFBQR1hYXEdYWFxHWFpfRgoFAAoCVhwbBVQIITtZCi0GH... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
108 | 192.168.248.165 | 1204 | 81.17.26.187 | 80 | /UFxHW1hYR1hQWkdYUUZWCgUADVRaWRgFDFgYAANRDAcTWQ== | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
150 | 192.168.248.165 | 1229 | 81.17.18.18 | 80 | /X19HW1tZR15HW11aRg1bWVoNWVENXloLXwxeW19ZXl5ZC... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
221 | 192.168.248.165 | 1257 | 81.17.18.18 | 80 | /X19HW1tZR15HW11eRg1bWVoNWVENXloLXwxeW19ZXl5ZC... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
258 | 192.168.248.165 | 1263 | 81.17.18.18 | 80 | /WFBQR1hYXEdYWFxHWFpfRgoFAAoCVhwbBVQIITtZCi0GH... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
363 | 192.168.248.165 | 1328 | 31.184.244.182 | 80 | /X19HW1tZR15HW11eRg9YWAxeUF8PWg8MXw9eXVALX1pdW... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
365 | 192.168.248.165 | 1332 | 31.184.244.182 | 80 | /WF5dR1haXkdYXV1HWFFaRgoFAAoCRxkBGVYKBQAKAg0IH... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
446 | 192.168.248.165 | 1350 | 31.184.244.182 | 80 | /UFxHW1hYR1hQWkdYXEZWCgUADVRbGAULXlgYAANQWVATWQ== | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
634 | 192.168.248.165 | 1419 | 31.184.244.182 | 80 | /WFBQR1hYXEdYWFxHWFpfRgoFAAoCVhwbBVQIITtZCi0GH... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
921 | 192.168.248.165 | 1561 | 31.184.244.182 | 80 | /XFFZWkcaAAcNDAUKBQAKAkcKBgRGVhlUUTtcHCo7ASEjX... | BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F9... | CRIME |
12 rows × 7 columns
print "%s Unique User-Agents in %s samples." % (httpdf['user_agent'].nunique(), httpdf['sample'].nunique())
190 Unique User-Agents in 83 samples.
tempdf = pd.DataFrame(columns=['sample','num_ua'])
for sample in list(set(httpdf['sample'])):
tempdf = tempdf.append({'sample':sample, 'num_ua':httpdf[httpdf['sample'] == sample]['user_agent'].nunique()}, ignore_index=True)
tempdf.sort('num_ua', ascending=0).head()
sample | num_ua | |
---|---|---|
6 | BIN_dirtjumper_2011-10 | 103 |
63 | purplehaze | 7 |
26 | EK_Smokekt150(Malwaredontneedcoffee)_2012-09 | 7 |
24 | EK_popads_109.236.80.170_2013-08-13 | 6 |
79 | BIN_ZeusGameover_2012-02 | 6 |
5 rows × 2 columns
Let's check one of the ones that doesn't completely stand out, purplehaze
# Well, at least we know what UA this sample uses for C2, and it seems we can see some other OS activity as well
tsample = 'purplehaze'
httpdf[httpdf['sample'] == tsample].user_agent.value_counts()
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.1) 17179 Mozilla/4.0 (compatible; UPnP/1.0; Windows NT/5.1) 25 Mozilla/4.0 (Windows XP 5.1) Java/1.6.0_26 23 - 16 Mozilla/4.0 (compatible; UPnP/1.0; Windows 9x) 10 Microsoft-CryptoAPI/5.131.2600.5512 4 contype 2 dtype: int64
httpdf['count'] = 1
grouped = httpdf[httpdf['sample'] == tsample][['sample','user_agent','host','count']].groupby(['sample', 'user_agent', 'host']).sum()
grouped.sort('count', ascending = 0).head(10)
count | |||
---|---|---|---|
sample | user_agent | host | |
purplehaze | Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.1) | reallysweetgames.com | 1830 |
webgameroom.com | 1587 | ||
deadrush.com | 1043 | ||
ui.mevio.com | 558 | ||
redirect.xmladfeed.com | 354 | ||
114337.arb.xmladfeed.com | 344 | ||
redirect.ad-feeds.com | 342 | ||
b.scorecardresearch.com | 339 | ||
static3.filmannex.com | 254 | ||
log.adap.tv | 232 |
10 rows × 1 columns
Now for something more interesting, Dirtjumper
tsample = 'BIN_dirtjumper_2011-10'
httpdf[httpdf['sample'] == tsample].user_agent.value_counts()
Mozilla/4.0 (compatible; MSIE 6.0; Symbian OS; Nokia 6600/5.27.0; 6329) Opera 8.00 [ru] 18 Mozilla/4.1 (compatible; MSIE 5.0; Symbian OS; Nokia 6600;452) Opera 6.20 [ru] 11 Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.8.1.20) Gecko/20081217 Firefox/2.0.0.20 10 Mozilla/4.0 (compatible; MSIE 7.0b; Win32) 9 Mozilla/4.0 (compatible; MSIE 6.0; Symbian OS; Nokia 6600/5.27.0; 6936) Opera 8.50 [ru] 9 Mozilla/4.0 (compatible; MSIE 6.0; Nitro) Opera 8.50 [en] 8 Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.7.8) Gecko/20050609 Firefox/1.0.4 8 Mozilla/4.0 (compatible; MSIE 5.17; Mac_PowerPC) 8 Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) 7 Mozilla/4.0 (compatible; MSIE 6.0; MSN 2.5; Windows 98) 7 Opera/9.50 (Windows NT 5.1; U; ru) 7 Opera/9.23 (Windows NT 5.1; U; ru) 6 Mozilla/4.0 (compatible; MSIE 5.0; SunOS 5.9 sun4u; X11) 6 Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3 6 Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; YPC 3.0.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) 6 ... mozilla/4.0 (compatible; msie 7.0; windows nt 5.1; trident/4.0; ...) 2 Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.0.2) Gecko/2008091620 Firefox/3.0.2 2 Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) 2 Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9 2 Mozilla/4.0 (compatible; MSIE 6.0; Symbian OS; Nokia 6630/4.03.38; 6937) Opera 8.50 [es] 1 Opera/9.80 (X11; Linux x86_64; U; en) Presto/2.2.15 Version/10.10 1 Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9) Gecko/2008052906 Firefox/3.0 1 Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727) 1 Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 SeaMonkey/1.0.4 1 Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9a1) Gecko/20061204 GranParadiso/3.0a1 1 Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322) 1 Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1) Gecko/20090624 Firefox/3.5 1 Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1 1 Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9 1 Mozilla/4.0 (compatible; MSIE 6.0; Nitro) Opera 8.50 [es-es] 1 Length: 103, dtype: int64
grouped = httpdf[httpdf['sample'] == tsample][['sample','user_agent','host','count']].groupby(['sample', 'host']).sum()
grouped.sort('count', ascending = 0)
count | ||
---|---|---|
sample | host | |
BIN_dirtjumper_2011-10 | www.tadawulfx.com | 386 |
ukashsepeti.com | 4 | |
asdaddddaaaa.com | 1 |
3 rows × 1 columns
grouped = httpdf[httpdf['sample'] == tsample][['sample','user_agent','host','count']].groupby(['sample', 'user_agent', 'host']).sum()
grouped.sort('count', ascending = 0)
count | |||
---|---|---|---|
sample | user_agent | host | |
BIN_dirtjumper_2011-10 | Mozilla/4.0 (compatible; MSIE 6.0; Symbian OS; Nokia 6600/5.27.0; 6329) Opera 8.00 [ru] | www.tadawulfx.com | 18 |
Mozilla/4.1 (compatible; MSIE 5.0; Symbian OS; Nokia 6600;452) Opera 6.20 [ru] | www.tadawulfx.com | 10 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.8.1.20) Gecko/20081217 Firefox/2.0.0.20 | www.tadawulfx.com | 10 | |
Mozilla/4.0 (compatible; MSIE 6.0; Symbian OS; Nokia 6600/5.27.0; 6936) Opera 8.50 [ru] | www.tadawulfx.com | 9 | |
Mozilla/4.0 (compatible; MSIE 7.0b; Win32) | www.tadawulfx.com | 9 | |
Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.7.8) Gecko/20050609 Firefox/1.0.4 | www.tadawulfx.com | 8 | |
Mozilla/4.0 (compatible; MSIE 5.17; Mac_PowerPC) | www.tadawulfx.com | 8 | |
Mozilla/4.0 (compatible; MSIE 6.0; Nitro) Opera 8.50 [en] | www.tadawulfx.com | 8 | |
Opera/9.50 (Windows NT 5.1; U; ru) | www.tadawulfx.com | 7 | |
Mozilla/4.0 (compatible; MSIE 6.0; MSN 2.5; Windows 98) | www.tadawulfx.com | 7 | |
Opera/9.23 (Windows NT 5.1; U; ru) | www.tadawulfx.com | 6 | |
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; YPC 3.0.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) | www.tadawulfx.com | 6 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) | www.tadawulfx.com | 6 | |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3 | www.tadawulfx.com | 6 | |
Mozilla/4.0 (compatible; MSIE 5.0; SunOS 5.9 sun4u; X11) | www.tadawulfx.com | 6 | |
Opera/10.00 (Windows NT 6.0; U; en) Presto/2.2.0 | www.tadawulfx.com | 5 | |
Mozilla/5.0 (X11; U; Linux x86_64; ru; rv:1.9.0.2) Gecko/2008092702 Gentoo Firefox/3.0.2 | www.tadawulfx.com | 5 | |
mozilla/4.0 (compatible; msie 8.0; windows nt 5.1; trident/4.0; ...) | www.tadawulfx.com | 5 | |
Mozilla/4.0 (compatible; MSIE 6.0; Symbian OS; Nokia 6600/5.27.0; 9424) Opera 8.65 [ru] | www.tadawulfx.com | 5 | |
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322) | www.tadawulfx.com | 5 | |
Opera/8.51 (Windows NT 5.1; U; en) | www.tadawulfx.com | 5 | |
Mozilla/4.0 (compatible; MSIE 6.0; Nitro) Opera 8.50 [it] | www.tadawulfx.com | 5 | |
Mozilla/4.0 (compatible; MSIE 6.0; Nitro) Opera 8.50 [de] | www.tadawulfx.com | 5 | |
Opera/9.50 (Windows NT 6.0; U; en) | www.tadawulfx.com | 5 | |
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.89 Safari/532.5 | www.tadawulfx.com | 5 | |
Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10 | www.tadawulfx.com | 4 | |
Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0) | www.tadawulfx.com | 4 | |
Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0) | www.tadawulfx.com | 4 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.4) Gecko/20060516 SeaMonkey/1.0.2 | www.tadawulfx.com | 4 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; nl; rv:1.8) Gecko/20051107 Firefox/1.5 | www.tadawulfx.com | 4 | |
Mozilla/4.0 (compatible; MSIE 5.0; Windows 2000) Opera 6.03 [en] | www.tadawulfx.com | 4 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1 | www.tadawulfx.com | 4 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 | www.tadawulfx.com | 4 | |
Mozilla/2.0 (compatible; MSIE 3.01; Windows 98) | www.tadawulfx.com | 4 | |
Mozilla/1.22 (compatible; MSIE 1.5; Windows NT) | www.tadawulfx.com | 4 | |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.2) Gecko/20060308 Firefox/1.5.0.2 | www.tadawulfx.com | 4 | |
Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.2.15 Version/10.20 | www.tadawulfx.com | 4 | |
Opera/9.0 (Windows NT 5.1; U; en) | www.tadawulfx.com | 4 | |
Opera/9.00 (Wii; U; ; 1038-58; Wii Shop Channel/1.0; en) | www.tadawulfx.com | 4 | |
Opera/9.02 (Windows NT 5.1; U; en) | www.tadawulfx.com | 4 | |
Opera/9.10 (Windows NT 5.1; U; en) | www.tadawulfx.com | 4 | |
Opera/9.80 (Windows NT 5.1; U; en) Presto/2.5.18 Version/10.50 | www.tadawulfx.com | 4 | |
Mozilla/5.0 (X11; U; Linux x86_64; ru; rv:1.9.1.1) Gecko/20090730 Gentoo Firefox/3.5.1 | www.tadawulfx.com | 4 | |
Opera/9.80 (Windows NT 6.1; U; ru) Presto/2.2.15 Version/10.00 | www.tadawulfx.com | 4 | |
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.65 Safari/525.19 | www.tadawulfx.com | 4 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3 | www.tadawulfx.com | 3 | |
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0) | www.tadawulfx.com | 3 | |
Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.1) Gecko/20090716 Ubuntu/9.04 (jaunty) Shiretoko/3.5.1 | www.tadawulfx.com | 3 | |
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.50 | www.tadawulfx.com | 3 | |
Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.1) Gecko/20090715 Firefox/3.5.1 | www.tadawulfx.com | 3 | |
Opera/7.23 (Windows 98; U) [en] | www.tadawulfx.com | 3 | |
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0; .NET CLR 2.0.50727) | www.tadawulfx.com | 3 | |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.2) Gecko/20070221 SUSE/2.0.0.2-6.1 Firefox/2.0.0.2 | www.tadawulfx.com | 3 | |
Opera/8.0 (X11; Linux i686; U; cs) | www.tadawulfx.com | 3 | |
Mozilla/5.0 (Windows NT 5.1; U; en) Opera 8.50 | www.tadawulfx.com | 3 | |
Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.3a) Gecko/20030105 Phoenix/0.5 | www.tadawulfx.com | 3 | |
Mozilla/4.0 (compatible; MSIE 6.0; Symbian OS; Nokia 6600/5.27.0; 1665) Opera 8.60 [ru] | www.tadawulfx.com | 3 | |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.6) Gecko/20060808 Fedora/1.5.0.6-2.fc5 Firefox/1.5.0.6 pango-text | www.tadawulfx.com | 3 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13 | www.tadawulfx.com | 3 | |
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.25 Safari/525.19 | www.tadawulfx.com | 3 | |
... |
108 rows × 1 columns
First we can check the coverage of the ASN database (thanks again Maxmind). The coverage appears to be pretty good, only not marking RFC1918 addresses in addtion to broadcast, etc... IPs. However, we can see a few IP addresses that aren't covered, oh well. :)
Keep in mind we're only looking at destination IP addresses.
conndf[conndf['maxmind_asn'] == "UNKNOWN"]['id.resp_h'].value_counts()
172.29.0.255 4203 10.0.2.255 2774 71.74.56.243 382 71.74.56.244 367 192.168.106.2 341 172.16.165.2 242 172.29.0.1 235 172.29.0.116 136 74.120.140.21 112 208.81.191.111 110 224.0.0.252 100 255.255.255.255 99 ff02::1:3 98 172.16.253.254 92 239.255.255.250 76 ... 192.168.2.2 1 ff02::2 1 74.120.140.23 1 172.16.148.184 1 172.29.0.111 1 31.184.245.202 1 fe80::ffff:ffff:fffe 1 173.23.253.246 1 209.17.74.150 1 115.254.253.254 1 87.120.169.3 1 192.168.106.155 1 172.16.0.255 1 208.93.140.130 1 192.168.254.1 1 Length: 110, dtype: int64
What about if we look at some of the samples marked APT, and see how they map to some of the various ASNs that we now have. It looks like, at first glance a couple of samples are operating out of the same ASN (AS4134 Chinanet and AS3356 L3 Communications)
ax = box_plot_df_setup(conndf[conndf['threat'] == 'APT']['sample'], conndf[conndf['threat'] == 'APT']['maxmind_asn']).T.plot(kind='bar', stacked=True)
pylab.ylabel('Sample Occurrences')
pylab.xlabel('ASN (Autonomous System Number)')
patches, labels = ax.get_legend_handles_labels()
ax.legend(patches, labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., title="Sample Name")
<matplotlib.legend.Legend at 0x11997d210>
Just showing how to look at all the various AS contacted by a single sample. It also points out that the multiple parking above on L3 might be due to DNS requests.
conndf[conndf['sample'] == "BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1"][['maxmind_asn','id.resp_h']]
maxmind_asn | id.resp_h | |
---|---|---|
0 | UNKNOWN | 172.16.253.254 |
1 | UNKNOWN | 255.255.255.255 |
2 | UNKNOWN | 172.16.253.129 |
3 | AS15169 Google Inc. | 8.8.8.8 |
4 | AS3356 Level 3 Communications | 4.2.2.2 |
5 | AS53850 GorillaServers, Inc. | 192.200.99.194 |
6 | AS53850 GorillaServers, Inc. | 192.200.99.194 |
7 | AS53850 GorillaServers, Inc. | 192.200.99.194 |
8 | UNKNOWN | 255.255.255.255 |
9 | UNKNOWN | 172.16.253.132 |
10 | AS53850 GorillaServers, Inc. | 192.200.99.194 |
11 | UNKNOWN | 255.255.255.255 |
12 | UNKNOWN | 172.16.253.130 |
13 rows × 2 columns
We have to look at them, let's see if we can make it interesting. Starting with the samples who transferred the most amount of bytes, broken down by which ports (in each sample) were used. A brief examination of some of the rows is also performed.
conndf['count'] = 1
grouped = conndf.groupby(['sample', 'id.resp_p']).sum()
grouped.sort('total_bytes', ascending = 0).head(10)
ts | id.orig_p | orig_bytes | resp_bytes | missed_bytes | orig_pkts | orig_ip_bytes | resp_pkts | resp_ip_bytes | total_bytes | count | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|
sample | id.resp_p | |||||||||||
purplehaze | 80 | 9.584038e+12 | 21282798 | 14945798 | 172420089 | 29203 | 124500 | 20040903 | 160493 | 179127211 | 187365887 | 7217 |
1935 | 6.639908e+09 | 19212 | 19172 | 37680956 | 8057592 | 13532 | 560457 | 20374 | 30477819 | 37700128 | 5 | |
BIN_LoadMoney_MailRu_dl_4e801b46068b31b82dac65885a58ed9e_2013-04 | 80 | 4.242826e+02 | 15894 | 2648 | 28139301 | 0 | 14765 | 593368 | 28269 | 29270121 | 28141949 | 15 |
BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321BF84B | 25 | 2.326434e+13 | 51049454 | 7609979 | 2814589 | 0 | 89551 | 11325211 | 79440 | 6111735 | 10424568 | 17241 |
BIN_ArdamaxKeylogger_E33AF9E602CBB7AC3634C2608150DD18 | 587 | 1.359930e+09 | 1043 | 10343395 | 324 | 0 | 10074 | 10749283 | 10180 | 407564 | 10343719 | 1 |
BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F955F991940_2013-08 | 80 | 5.559369e+11 | 563199 | 663932 | 8706987 | 0 | 4682 | 854508 | 9845 | 9364779 | 9370919 | 412 |
BIN_Cutwail_284Fb18Fab33C93Bc69Ce392D08Fd250_2012-10 | 80 | 4.179328e+03 | 117955 | 41749 | 4931282 | 0 | 2984 | 162037 | 5144 | 5137374 | 4973031 | 105 |
BIN_ZeusGameover_2012-02 | 80 | 9.033005e+10 | 102617 | 11477 | 3721029 | 0 | 1943 | 89581 | 2792 | 3833009 | 3732506 | 68 |
BIN_9002_D4ED654BCDA42576FDDFE03361608CAA_2013-01-30 | 53 | 1.357444e+09 | 1143 | 3580503 | 117612 | 0 | 3073 | 3703423 | 3588 | 261361 | 3698115 | 1 |
XTremeRAT_DAEBFDED736903D234214ED4821EAF99_2013-04-13 | 336 | 1.631920e+10 | 12601 | 3482315 | 0 | 0 | 2715 | 3591003 | 0 | 0 | 3482315 | 12 |
10 rows × 11 columns
# That port 1935 from above might be interesting, where's it going?
conndf[conndf['id.resp_p'] == 1935][['id.resp_h','proto']]
id.resp_h | proto | |
---|---|---|
5541 | 69.22.155.28 | tcp |
5561 | 68.142.111.111 | tcp |
6746 | 184.51.157.60 | tcp |
7095 | 184.84.220.133 | tcp |
7509 | 184.51.157.60 | tcp |
5 rows × 2 columns
# Same with port 336
conndf[conndf['id.resp_p'] == 336][['id.resp_h','proto']]
id.resp_h | proto | |
---|---|---|
0 | 197.163.56.70 | tcp |
3 | 197.163.56.70 | tcp |
4 | 197.163.56.70 | tcp |
5 | 197.163.56.70 | tcp |
6 | 197.163.56.70 | tcp |
7 | 197.163.56.70 | tcp |
10 | 197.163.56.70 | tcp |
11 | 197.163.56.70 | tcp |
12 | 197.163.56.70 | tcp |
15 | 197.163.56.70 | tcp |
16 | 197.163.56.70 | tcp |
17 | 197.163.56.70 | tcp |
12 rows × 2 columns
smtpdf.sample.value_counts()
BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321BF84B 4085 BIN_Sanny-Daws_338D0B855421867732E05399A2D56670_2012-10 2 BIN_ArdamaxKeylogger_E33AF9E602CBB7AC3634C2608150DD18 1 dtype: int64
Looks like we've found quite a few (wonder if they still work). I'm also amazed at how many are included with one sample. There are at most 3 other hosts in this list not related to the Asprox sample, but it seems that Asprox sends quite a bit of email and includes a pretty good list of open relays.
print "Unique Hosts found as the HELO portion of SMTP traffic: %s" % smtpdf.helo.nunique()
print ""
print "Some of the examples"
print smtpdf.helo.value_counts().head(10)
Unique Hosts found as the HELO portion of SMTP traffic: 89 Some of the examples apostille123.com 132 bestcatscratchpost.com 112 costaricaposters.com 109 olgapost.com 105 highschoolapostles.com 104 kevinpostmotors.com 97 posteraday.com 92 postroomsupplies.com 89 bancopostaclienteitaly.com 87 alliedpostal.com 87 dtype: int64
Maybe we can find some patterns in senders and subjects?
smtpdf['count'] = 1
grouped = smtpdf[smtpdf['from'] != "-"][['from','subject','count']].groupby(['from', 'subject']).sum()
grouped.sort('count', ascending = 0).head(20)
count | ||
---|---|---|
from | subject | |
Economy Shipping <support_id81@highperfpostgresql.com> | Delivery Notification ID#EN95887556F | 6 |
Next Day Air Saver <message_id98@olgapost.com> | Delivery Notification ID#EN79318987H | 4 |
Economy Shipping <support_id55@posturalvertigo.com> | Delivery Status Notification | 3 |
One Day Shipping <personal_id86@taskoprupostasi.com> | Ship Notification ID#EN05842223A | 3 |
Mail International <contact_id72@bestcatscratchpost.com> | Delivery Notification ID#EN18841053F | 3 |
Logistics Services <delivery.id78@kevinpostmotors.com> | Ship Notification ID#EN43279293A | 3 |
Next Day Air Saver <us_04@halfpriceposters.com> | Delivery Status Notification ID#EN56869729X | 3 |
Standard Shipping <status_id46@goppost.com> | Delivery Notification ID#EN28866699H | 3 |
One Day Shipping <personal_id63@taskoprupostasi.com> | Delivery Notification | 3 |
Expedited Shipping <federal_id94@scooterspost.com> | Ship Notification ID#EN28765320A | 3 |
One Day Shipping <personal_id78@taskoprupostasi.com> | Delivery Notification | 3 |
Priority Mail <status_60@hissignpost.com> | Delivery Notification ID#EN60271900F | 3 |
One Day Shipping <customer.id15@costaricaposters.com> | Delivery Notification ID#EN13576648J | 3 |
One Day Shipping <item_05@npcompost.com> | Delivery Status Notification ID#EN75648058P | 3 |
Expedited Shipping <federal_id73@scooterspost.com> | Delivery Notification ID#EN92085505H | 3 |
One Day Shipping <personal_id99@taskoprupostasi.com> | Delivery Notification ID#EN41600040F | 3 |
Priority Mail <status_80@hissignpost.com> | Delivery Notification ID#EN80773754H | 3 |
Mail International <contact_id98@bestcatscratchpost.com> | Delivery Status Notification ID#EN58347354P | 3 |
Mail International <help_id50@alexanderapostol.com> | Delivery Status Notification ID#EN15607017P | 3 |
Logistics Services <delivery.id07@kevinpostmotors.com> | Delivery Status Notification ID#EN45696799P | 3 |
20 rows × 1 columns
Network traffic is fine and dandy, but now it's time for some more eye-candy!
It's nice to have the context around how systems communicate. We've got some great stats/data surrounding the c2 and delivery mechanisms, so let's see how they related to the files that get transferred. Bro can extract files from IRC, SMTP, HTTP, and FTP out of the box.
What's the most popular, and what does it look like per-protocol?
ax = box_plot_df_setup(filesdf['source'], filesdf['mime_type']).T.plot(kind='bar', stacked=True)
pylab.xlabel('Mime-Type')
pylab.ylabel('Number of Files')
patches, labels = ax.get_legend_handles_labels()
ax.legend(patches, labels, title="Service Type")
<matplotlib.legend.Legend at 0x11372fcd0>
ax = box_plot_df_setup(filesdf.loc[(filesdf["mime_type"] != 'text/html') & (filesdf['mime_type'] != 'text/plain')]['source'], filesdf.loc[(filesdf["mime_type"] != 'text/html') & (filesdf['mime_type'] != 'text/plain')]['mime_type']).T.plot(kind='bar', stacked=True)
pylab.xlabel('Mime-Type')
pylab.ylabel('Number of Files')
patches, labels = ax.get_legend_handles_labels()
ax.legend(patches, labels, title="Service Type")
<matplotlib.legend.Legend at 0x1192ccc90>
filesdf['count'] = 1
filesdf[filesdf['filename'] != '-'][['source','mime_type','seen_bytes','count']].groupby(['source','mime_type']).sum().sort('count', ascending=0).head(10)
seen_bytes | count | ||
---|---|---|---|
source | mime_type | ||
SMTP | image/jpeg | 7554332 | 26 |
HTTP | binary | 2922872 | 13 |
application/x-dosexec | 1717057 | 12 | |
application/pdf | 126691 | 7 | |
image/png | 2534 | 2 | |
image/gif | 1082697 | 2 | |
text/plain | 57254 | 1 | |
image/jpeg | 9972 | 1 |
8 rows × 2 columns
I wonder if filenames get reused, or perhaps there are some super common ones that might be interesting to look for in network traffic.
filesdf[filesdf['filename'] != '-'][['source','mime_type','filename','count']].groupby(['source','mime_type','filename']).sum().sort('count', ascending=0).head(10)
count | |||
---|---|---|---|
source | mime_type | filename | |
HTTP | binary | COMMON.BIN | 6 |
application/x-dosexec | contacts.exe | 3 | |
image/png | ad516503a11cd5ca435acc9bb6523536.png | 2 | |
binary | setusating.bin | 2 | |
application/x-dosexec | readme.exe | 2 | |
info.exe | 1 | ||
image/gif | maumauwebtvB.gif | 1 | |
maumauwebtvA.gif | 1 | ||
binary | pg.dll.crp | 1 | |
fp10na.dll.crp | 1 |
10 rows × 1 columns
filesdf[filesdf['filename'] != '-'][['sample','mime_type','filename','count']].groupby(['sample','mime_type','filename']).sum().sort('count', ascending=0).head(10)
count | |||
---|---|---|---|
sample | mime_type | filename | |
BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321BF84B | binary | COMMON.BIN | 6 |
BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F955F991940_2013-08 | image/png | ad516503a11cd5ca435acc9bb6523536.png | 2 |
BIN_ZeusGameover_2012-02 | application/x-dosexec | contacts.exe | 2 |
BIN_Zeus_b1551c676a54e9127cd0e7ea283b92cc-2012-04 | binary | setusating.bin | 2 |
purplehaze | text/plain | jquery-1.3.2.min.js | 1 |
BIN_ArdamaxKeylogger_E33AF9E602CBB7AC3634C2608150DD18 | image/jpeg | Jun_06_2013__09_46_41.jpg | 1 |
Jun_06_2013__09_46_39.jpg | 1 | ||
Jun_06_2013__09_46_38.jpg | 1 | ||
Jun_06_2013__09_46_37.jpg | 1 | ||
Jun_06_2013__09_46_36.jpg | 1 |
10 rows × 1 columns
noticedf['count'] = 1
noticedf[['note','msg','count']].groupby(['note','msg']).sum().sort('count', ascending=0)
count | ||
---|---|---|
note | msg | |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (unable to get local issuer certificate) | 199 |
SSL certificate validation failed with (certificate is not yet valid) | 14 | |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 68% Last seen: 2012-12-09 06:16:04 | 9 |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (self signed certificate) | 5 |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 41% Last seen: 2013-01-17 10:23:10 | 3 |
Malware Hash Registry Detection rate: 32% Last seen: 2012-01-21 16:31:03 | 3 | |
Malware Hash Registry Detection rate: 36% Last seen: 2013-06-01 07:16:06 | 2 | |
Malware Hash Registry Detection rate: 50% Last seen: 2012-09-22 14:46:06 | 1 | |
Malware Hash Registry Detection rate: 23% Last seen: 2012-04-11 18:01:02 | 1 | |
Malware Hash Registry Detection rate: 60% Last seen: 2013-06-01 03:06:57 | 1 | |
Malware Hash Registry Detection rate: 55% Last seen: 2012-08-27 07:01:03 | 1 | |
Scan::Address_Scan | 192.168.248.165 scanned at least 25 unique hosts on port 25/tcp in 0m30s | 1 |
192.168.248.165 scanned at least 25 unique hosts on port 25/tcp in 0m38s | 1 | |
192.168.248.165 scanned at least 25 unique hosts on port 25/tcp in 3m3s | 1 | |
Signatures::Sensitive_Signature | 10.0.2.15: ATTACK-RESPONSES Microsoft cmd.exe banner (reverse-shell originator) | 1 |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 24% Last seen: 2012-02-06 12:06:50 | 1 |
Malware Hash Registry Detection rate: 41% Last seen: 2012-04-19 11:16:03 | 1 | |
Malware Hash Registry Detection rate: 25% Last seen: 2012-02-01 14:40:28 | 1 | |
Malware Hash Registry Detection rate: 27% Last seen: 2012-12-01 02:31:03 | 1 | |
Malware Hash Registry Detection rate: 55% Last seen: 2012-04-24 10:46:03 | 1 | |
Malware Hash Registry Detection rate: 32% Last seen: 2012-12-01 05:16:09 | 1 | |
Malware Hash Registry Detection rate: 36% Last seen: 2012-02-05 20:46:02 | 1 | |
Malware Hash Registry Detection rate: 40% Last seen: 2013-06-15 00:00:44 | 1 | |
Malware Hash Registry Detection rate: 81% Last seen: 2013-10-31 23:33:08 | 1 |
24 rows × 1 columns
# We can get a slightly different look at the world by throwing some ports into the mix! Looks like we might have some winners here.
noticedf[['note','msg','id.resp_p','count']].groupby(['note','msg','id.resp_p']).sum().sort('count', ascending=0)
count | |||
---|---|---|---|
note | msg | id.resp_p | |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (unable to get local issuer certificate) | 443 | 120 |
9001 | 56 | ||
SSL certificate validation failed with (certificate is not yet valid) | 443 | 14 | |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 68% Last seen: 2012-12-09 06:16:04 | 80 | 9 |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (unable to get local issuer certificate) | 80 | 5 |
10203 | 3 | ||
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 32% Last seen: 2012-01-21 16:31:03 | 80 | 3 |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (unable to get local issuer certificate) | 44945 | 3 |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 41% Last seen: 2013-01-17 10:23:10 | 80 | 3 |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (self signed certificate) | 443 | 3 |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 36% Last seen: 2013-06-01 07:16:06 | 80 | 2 |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (unable to get local issuer certificate) | 9101 | 2 |
SSL certificate validation failed with (self signed certificate) | 443 | 2 | |
SSL certificate validation failed with (unable to get local issuer certificate) | 5001 | 1 | |
5251 | 1 | ||
11443 | 1 | ||
7540 | 1 | ||
8443 | 1 | ||
22 | 1 | ||
9002 | 1 | ||
9060 | 1 | ||
6001 | 1 | ||
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 81% Last seen: 2013-10-31 23:33:08 | 80 | 1 |
SSL::Invalid_Server_Cert | SSL certificate validation failed with (unable to get local issuer certificate) | 39030 | 1 |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 36% Last seen: 2012-02-05 20:46:02 | 80 | 1 |
Malware Hash Registry Detection rate: 60% Last seen: 2013-06-01 03:06:57 | 80 | 1 | |
Malware Hash Registry Detection rate: 55% Last seen: 2012-08-27 07:01:03 | 80 | 1 | |
Malware Hash Registry Detection rate: 55% Last seen: 2012-04-24 10:46:03 | 80 | 1 | |
Malware Hash Registry Detection rate: 50% Last seen: 2012-09-22 14:46:06 | 8888 | 1 | |
Malware Hash Registry Detection rate: 41% Last seen: 2012-04-19 11:16:03 | 80 | 1 | |
Malware Hash Registry Detection rate: 40% Last seen: 2013-06-15 00:00:44 | 80 | 1 | |
Malware Hash Registry Detection rate: 32% Last seen: 2012-12-01 05:16:09 | 80 | 1 | |
Scan::Address_Scan | 192.168.248.165 scanned at least 25 unique hosts on port 25/tcp in 0m38s | - | 1 |
TeamCymruMalwareHashRegistry::Match | Malware Hash Registry Detection rate: 27% Last seen: 2012-12-01 02:31:03 | 80 | 1 |
Malware Hash Registry Detection rate: 25% Last seen: 2012-02-01 14:40:28 | 80 | 1 | |
Malware Hash Registry Detection rate: 24% Last seen: 2012-02-06 12:06:50 | 80 | 1 | |
Malware Hash Registry Detection rate: 23% Last seen: 2012-04-11 18:01:02 | 80 | 1 | |
Signatures::Sensitive_Signature | 10.0.2.15: ATTACK-RESPONSES Microsoft cmd.exe banner (reverse-shell originator) | 443 | 1 |
Scan::Address_Scan | 192.168.248.165 scanned at least 25 unique hosts on port 25/tcp in 3m3s | - | 1 |
192.168.248.165 scanned at least 25 unique hosts on port 25/tcp in 0m30s | - | 1 |
40 rows × 1 columns
noticedf[noticedf['note'] == 'Scan::Address_Scan']['sample']
2 BIN_Cutwail-Pushdo(1)_582DE032477E099EB1024D84... 1 BIN_Cutwail-Pushdo(2)_582DE032477E099EB1024D84... 0 BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B... Name: sample, dtype: object
We've come full circle, it looks like we've got more confirmation that we have some malware samples that are really good at SPAM, and disply it by connecting to lots of hosts in rapid succession.
ssldf['id.resp_p'].value_counts()
443 271 9001 57 80 5 10203 3 44945 3 9101 2 5001 1 7540 1 6001 1 9060 1 9002 1 8443 1 11443 1 5251 1 39030 1 22 1 dtype: int64
ssldf.subject.value_counts().head(10)
- 46 emailAddress=marry.smith@ltu.edu,CN=ITU Server,OU=VeriSign Trust Network,O=Internet Widgits Pty Ltd,L=Salisbury,ST=North Carolina,C=US 24 CN=*.google.com,O=Google Inc,L=Mountain View,ST=California,C=US 7 CN=www.3ktww4bg.net 6 CN=www.ktq2go444i.net 5 CN=eric-office 5 CN=www.ohfe52bk6gyfzojwgts.net 4 CN=www.hstk2emyai4yqa5.net 4 CN=*.gstatic.com,O=Google Inc,L=Mountain View,ST=California,C=US 4 CN=www.km6ptswm7mo.net 4 dtype: int64
ssldf['count'] = 1
ssldf[['version','cipher','count']].groupby(['version','cipher']).sum().sort('count', ascending=0)
count | ||
---|---|---|
version | cipher | |
TLSv10 | TLS_DHE_RSA_WITH_AES_256_CBC_SHA | 211 |
SSLv3 | TLS_RSA_WITH_RC4_128_MD5 | 70 |
TLS_RSA_WITH_RC4_128_SHA | 21 | |
TLSv10 | TLS_RSA_WITH_RC4_128_SHA | 18 |
TLS_RSA_WITH_RC4_128_MD5 | 15 | |
- | - | 13 |
TLSv10 | TLS_RSA_WITH_AES_128_CBC_SHA | 3 |
7 rows × 1 columns
Since we've got a decent grasp on the ports used, the types of ciphers present as well as popular certs that were seen in malware, perhaps there are a couple of ways we can begin to relate that information back to samples to get an idea of what the sample might be doing or how it works.
ssldf[['sample','server_name','id.resp_p','count']].groupby(['sample','id.resp_p','server_name']).sum().sort('count', ascending=0)
count | |||
---|---|---|---|
sample | id.resp_p | server_name | |
purplehaze | 443 | - | 43 |
PDF_CVE-2011-2462_Pdf_2011-12 | 443 | - | 36 |
BIN_ZeroAccess_Sirefef_C2A9CCC8C6A6DF1CA1725F955F991940_2013-08 | 443 | - | 16 |
BIN_Cutwail-Pushdo(2)_582DE032477E099EB1024D84C73E98C1 | 443 | - | 9 |
BIN_Ramnitpcap_2012-01 | 443 | - | 7 |
BIN_Vobfus_634AA845F5B0B519B6D8A8670B994906_2012-12 | 443 | - | 5 |
BIN_TrojanPage_86893886C7CBC7310F7675F4EFDE0A29 | 443 | - | 5 |
BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112f9_2013-01 | 443 | - | 4 |
BIN_Googledocs_macadocs_2012-12 | 443 | - | 4 |
BIN_Cutwail-Pushdo(1)_582DE032477E099EB1024D84C73E98C1 | 443 | - | 2 |
EK_Blackholev2_2012-09 | 443 | - | 2 |
EK_Blackholev1_2012-08 | 443 | - | 2 |
BIN_Tbot_FC7C3E087789824F34A9309DA2388CE5_2012-12 | 10203 | www.pcnia4i6e6w.com | 1 |
BIN_Tbot_2E1814CCCF0C3BB2CC32E0A0671C0891_2012-12 | 443 | www.o4rtqjectd6cr7xj2plup.com | 1 |
9001 | www.6fwotxu2.com | 1 | |
www.54qrxwvimf35.com | 1 | ||
www.2jh3iq.com | 1 | ||
443 | www.zzrv3tbbn4a.com | 1 | |
www.zupgh57porobex5l6rn7gn4b.com | 1 | ||
www.zkt4.com | 1 | ||
www.wv5npsememeyqlxeejajjh.com | 1 | ||
www.tntgu2nvt3x4wguftukjoauw.com | 1 | ||
www.rx4a.com | 1 | ||
www.phllv4qobdq66lvikg4.com | 1 | ||
www.odvlsr75agy44jkafb5.com | 1 | ||
www.nhoqywktzrxr.com | 1 | ||
9001 | www.h6v4rzfaoh7iwjbwchdkxk5r.com | 1 | |
443 | www.mdxu5pezm5gctjsiz57jnjlbc.com | 1 | |
www.m5467gyzaao3dogqgkgnsjz4.com | 1 | ||
www.jwjftcuh7svsqg7il5z.com | 1 | ||
www.jozhagprwwaiayfwtyp.com | 1 | ||
www.jil7bq.com | 1 | ||
www.igi4wpls4vqtpv.com | 1 | ||
www.hoi7duw.com | 1 | ||
www.gl4fqk3ut2jrhm4hhbn735.com | 1 | ||
www.fk4pprq42hsvl2wey.com | 1 | ||
www.enh3nbiuvze2zmjh2e.com | 1 | ||
9001 | www.d6dh.com | 1 | |
www.kyswssz.com | 1 | ||
www.jwrpsthzrih.com | 1 | ||
443 | www.cb4bqglwg.com | 1 | |
BIN_Tbot_5375FB5E867680FFB8E72D29DB9ABBD5_2012-12 | 443 | www.czjs7.com | 1 |
www.clwfhegzhknjxrqgo.com | 1 | ||
www.bt5qn4edtog.com | 1 | ||
www.amdspuvfnwejdbac3s4eyiiei.com | 1 | ||
www.a4grdymgccamccd.com | 1 | ||
www.7f56wbkr.com | 1 | ||
www.5sja.com | 1 | ||
www.5gwwuuomvh4aayxc47lnqag.com | 1 | ||
www.4v2ddyxbnjeeys.com | 1 | ||
www.4ae7bhbe3vwykaetow67swg.com | 1 | ||
www.3yksb5uu6h2vacmutmwhlohm5.com | 1 | ||
80 | www.rbgnlzi3jgetoxkzqy75gf.com | 1 | |
BIN_Tbot_2E1814CCCF0C3BB2CC32E0A0671C0891_2012-12 | 44945 | www.w6wo5d7enjs3nx3xcsvxhnq7u.com | 1 |
10203 | www.sqzhpbncwezqjocze2arciro.com | 1 | |
9002 | www.7mmi6y7nhxxl3xdtoquu.com | 1 | |
9001 | www.yvrfwi3jvukj.com | 1 | |
www.xy6a.com | 1 | ||
www.w3woqnnv2hker.com | 1 | ||
www.p5r5vru7a.com | 1 | ||
... |
228 rows × 1 columns
To run the visualization in your web browser:
data = {'name' : 'ssl'}
samples = list(set(ssldf['sample'].tolist()))
data['children'] = list()
sampleindex = 0
for sample in samples:
data['children'].append({'name' : sample, 'children' : list()})
ports = set(ssldf[ssldf['sample'] == sample]['id.resp_p'].tolist())
portindex = 0
for port in ports:
data['children'][sampleindex]['children'].append({'name' : str(port), 'children' : list()})
hostnames = set(list(ssldf.loc[(ssldf['id.resp_p'] == int(port)) & (ssldf['sample'] == sample)]['server_name']))
for hostname in hostnames:
data['children'][sampleindex]['children'][portindex]['children'].append({'name' : hostname, 'size' : 1})
portindex += 1
sampleindex += 1
json.dump(data, open('ssl.json', 'w'))
To show how easy D3 can be once you have the JSON output, you can also point your browser (after folliwng the steps above) to http://localhost:9999/ssl_cartesian.html
And you'll get some output similar to:
Note: If you run these at home, you can zoom in and zoom out with your browswer hot-keys to get a much nicer view of the graph.
# Ports per sample
ax = box_plot_df_setup(ssldf['id.resp_p'], ssldf['sample']).T.plot(kind='bar', stacked=True)
pylab.ylabel('Total # of connections')
pylab.xlabel('Samples')
patches, labels = ax.get_legend_handles_labels()
ax.legend(patches, labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., title="Port")
<matplotlib.legend.Legend at 0x1197bef10>
# Or as you might see it in an operational sense...
ax = box_plot_df_setup(ssldf['id.resp_p'], ssldf['id.orig_h']).T.plot(kind='bar', stacked=True)
pylab.ylabel('Total # of connections')
pylab.xlabel('Source IP')
patches, labels = ax.get_legend_handles_labels()
ax.legend(patches, labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., title="Port")
<matplotlib.legend.Legend at 0x113347510>
Tbot uses TOR for communication. (http://contagiodump.blogspot.com/2012/12/dec-2012-skynet-tor-botnet-trojantbot.html)
The weird.log shows protocol issues/anomalies as well as information pertaining to possible data loss, etc... We weren't able to find anything exciting in there, but that doesn't mean you won't!
weirddf.name.value_counts()
data_before_established 169 unescaped_special_URI_char 166 possible_split_routing 164 line_terminated_with_single_CR 133 NUL_in_line 111 unknown_protocol_2 71 bad_HTTP_request 65 DNS_Conn_count_too_large 51 inappropriate_FIN 34 HTTP_version_mismatch 21 connection_originator_SYN_ack 20 truncated_link_frame 9 unmatched_HTTP_reply 9 window_recision 8 SYN_inside_connection 6 unescaped_%_in_URI 5 above_hole_data_without_any_acks 5 DNS_truncated_ans_too_short 5 DNS_truncated_RR_rdlength_lt_len 4 DNS_label_too_long 4 premature_connection_reuse 4 DNS_label_len_gt_pkt 3 dns_changed_number_of_responses 3 SYN_with_data 2 DNS_label_forward_compress_offset 1 SYN_after_close 1 empty_http_request 1 double_%_in_URI 1 malformed_ssh_identification 1 DNS_truncated_len_lt_hdr_len 1 illegal_%_at_end_of_URI 1 truncated_IP 1 DNS_RR_unknown_type 1 dtype: int64