# Lets start to interact with workbench, please note there is NO specific client to workbench, # Just use the ZeroRPC Python, Node.js, or CLI interfaces. import zerorpc c = zerorpc.Client(timeout=120) c.connect("tcp://127.0.0.1:4242") # Load in the PCAP file with open('../data/pcap/gold_xxx.pcap','rb') as f: pcap_md5 = c.store_sample(f.read(), 'gold_xxx', 'pcap') # We can also ask workbench for a python dictionary of all the info from this PCAP, # because sometimes visualization are useful and sometimes organized data is useful. output = c.work_request('view_pcap_details', pcap_md5)['view_pcap_details'] output # Critical Code: Transition from Bro logs to Pandas Dataframes # This one line of code populates dataframes from the Bro logs, # streaming client/server generators, zero-copy, efficient, awesome... import pandas as pd dataframes = {name:pd.DataFrame(c.stream_sample(bro_log, None)) for name, bro_log in output['bro_logs'].iteritems()} # Look at DNS logs dataframes['dns_log'][['query','answers','qtype_name']].head(10) # Look at Conn logs dataframes['conn_log'].head(10) # Simple Stats with Pandas Dataframe dataframes['conn_log'][['missed_bytes','orig_ip_bytes','resp_ip_bytes','resp_pkts']].describe() # Simple Filtering with Pandas Dataframe not_80_df = dataframes['conn_log'][dataframes['conn_log']['id.resp_p'] != 80] not_80_df.head(10) # Now we group by host and show the different response mime types for each host group_host = dataframes['http_log'].groupby(['host','id.resp_h','resp_mime_types','uri'])[['response_body_len']].sum() group_host # Plotting defaults import matplotlib.pyplot as plt %matplotlib inline plt.rcParams['font.size'] = 12.0 plt.rcParams['figure.figsize'] = 15.0, 8.0 # Plot hosts and mime-types plot_df = dataframes['http_log'].groupby(['host','resp_mime_types'])[['response_body_len']].sum().unstack() plot_df['response_body_len'].plot(kind='bar', stacked=True) plt.xlabel('Domain') plt.ylabel('Response Bytes') plt.xticks(rotation=45, ha='right')