#!/usr/bin/env python # coding: utf-8 # ## General info # Dates of interest # # * 01/06/2015 # * 01/27/2015 # * 12/11/2014 # # Timeframes of interest : Between 4 and 6 PM (but should still consider the entire day) # # Keywords: JPMorgan, wire, transfer # ## Mounting the image # Mounting the image read-only with NTFS specific parameters # In[ ]: mount -o ro,loop,show_sys_files,streams_interface=windows /mnt/hgfs/ssd/039533.001 /mnt/usb/ # ## Creating timeline # Used 'log2timeline' from SANS SIFT VM # Image local time is set to CET (Paris time) # Output format set to CSV # In[ ]: log2timeline -z CET -r -p -f win7 -o csv -w /cases/bodyfile /mnt/usb # ## Analyzing the timeline using Apache Spark # Timeline analysis can be difficult and very time consuming if the CSV files are too large, or if we have multiple images to go through. # # Spark can make that job much easier and more efficient. # In[ ]: # Create a Spark SQL context from pyspark.sql import SQLContext sqlContext = SQLContext(sc) # Load CSV files into a Spark DataFrame df = sqlContext.load(source="com.databricks.spark.csv", header="true", path = "/user/cloudera/bodyfile") # In[99]: # Count the number of rows in the DataFrame and the schema from the CSV df.printSchema() df.count() # In[ ]: # This displays the DataFrame's column names based on the CSV header df.columns # In[ ]: # Register the DataFrame as a Spark SQL table called 'tl' so we can run queries using SQL syntax sqlContext.registerDataFrameAsTable(df, 'tl') # Cache the table in memory for faster lookups sqlContext.cacheTable('tl') #RDD Name Storage Level Cached Partitions Fraction Cached Size in Memory Size in Tachyon Size on Disk #In-memory table tl Memory Deserialized 1x Replicated 5 100% 286.2 MB 0.0 B 0.0 B # ### Keyword search # #### Did the user execute any files that seem potentially malicious? # In[92]: # Collect all the rows into a Python list that only contains rows matching certain conditions filtered = sqlContext.sql("select * from tl where `date` like '01/%/2015' and short like '%wire%'").collect() # Print out the results for i in filtered: print i.date+" "+i.time+" "+i.source+" "+i.MACB+" "+i.short+" "+i.desc # - On 01/27/2015 the user appears to have executed a file named wire_tr91297_pdf.exe that was compressed as a ZIP file (Temp1_wire_tr91297.zip) # - It was not possible to recover the file from the file-system # - It would be reasonable to conclude this file was malware # - We can be certain the malware was executed, but it is unclear whether it continued to run normally # ## System activity: Deleted files # # ###On which dates were files deleted the most? # In[89]: deletedFilesDF = sqlContext.sql("SELECT `date`, short FROM tl WHERE `date` LIKE '%/%/2015' AND short LIKE '%DELETED%'") deletedFilesRowList = deletedFilesDF.collect() # In[90]: deletedFileListDate = [] deletedFileList = [] for deletedFile in deletedFilesRowList: deletedFileListDate.append(deletedFile.date) deletedFileList.append(deletedFile.short) # In[91]: import pandas as pd from collections import Counter dates = Counter(deletedFileListDate) counts = dates index = [] data = [] for k,v in counts.iteritems(): index.append(k) data.append(v) ts = pd.TimeSeries(data, index) figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='r') ts.plot(kind="barh") # #### Display the files that were deleted # ( limited it to 10 results to avoid bloating the notebook with results ) # In[114]: i = 0 for file in deletedFilesRowList: if file.date == '07/15/2015' or file.date == '07/13/2015' or file.date == '08/11/2015': if i < 10: print file i=i+1 # - On 07/15/2015 and 07/13/2015 the user deleted browisng history # - On 08/11/2015 an application was uninstalled # ### Web history on the dates in question # In[96]: visitedList = [] myDates = ['12/11/2014', '01/06/2015', '01/27/2015'] for i in myDates: webhist = sqlContext.sql("select * from tl where source='WEBHIST' and `date` like '%s' limit 20 " %(i) ).collect() for i in webhist: visitedList.append(i.date + " " + i.short) # In[115]: for i in visitedList: dateurl = i.split(" ") url = dateurl[0] + " " + dateurl[2] print url #