In [1]:
import datetime
import pandas as pd
import os
import json

import elasticsearch
import elasticsearch.helpers

import concurrent

import sys
sys.path.append('/opt/2IMS40')
sys.path.append('../')

# Import files from current directory
from search_index import search_index
from es_client import es_client

# next command ensures that plots appear inside the notebook
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns  # also improves the look of plots
sns.set()  # set Seaborn defaults
plt.rcParams['figure.figsize'] = 10, 5  # default hor./vert. size of plots, in inches
plt.rcParams['lines.markeredgewidth'] = 1  # to fix issue with seaborn box plots; needed after import seaborn

es_client = es_client(
    #host='https://elasticsearch.smeets.ee:443', local=False
)
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.14) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
/usr/local/lib/python3.8/dist-packages/elasticsearch/_sync/client/__init__.py:394: SecurityWarning: Connecting to 'https://localhost:9200' using TLS with verify_certs=False is insecure
  _transport = transport_class(
In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_kddcup99
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.covariance import EllipticEnvelope
from collections import defaultdict
In [3]:
# For constructing and saving trees
import graphviz
In [4]:
# Find process creations
df = search_index(es_client, "source: (sysmon.json) AND Event.System.EventID: 1", size=None).dropna(how='all', axis=1)

df.shape
Out[4]:
(11242, 43)
In [5]:
process_info = df[['Event.EventData.Image', 'Event.EventData.ParentImage', 'Event.EventData.ProcessGuid', 'Event.EventData.ParentProcessGuid', 'Event.EventData.CommandLine', 'Event.System.EventRecordID', 'Event.System.TimeCreated.#attributes.SystemTime']]
# pd.DataFrame(process_info['Event.EventData.ParentImage'].value_counts())
In [6]:
malware_procs = process_info[process_info['Event.EventData.Image'].str.contains('Systdeeem.exe') | process_info['Event.EventData.Image'].str.contains('2ecb')]
malware_procs
Out[6]:
Event.EventData.Image Event.EventData.ParentImage Event.EventData.ProcessGuid Event.EventData.ParentProcessGuid Event.EventData.CommandLine Event.System.EventRecordID Event.System.TimeCreated.#attributes.SystemTime
729 C:\Users\User\Downloads\2ecbf5a27adc238af0b125... C:\Windows\explorer.exe EDA6DF62-B871-63A5-3401-000000002800 EDA6DF62-B6AF-63A5-8A00-000000002800 "C:\Users\User\Downloads\2ecbf5a27adc238af0b12... 8239289 2022-12-23T14:17:21.173591Z
1017 C:\Users\User\AppData\Roaming\Microsoft\Window... C:\Windows\explorer.exe EDA6DF62-C90D-63A5-A800-000000002900 EDA6DF62-C8F1-63A5-8800-000000002900 "C:\Users\User\AppData\Roaming\Microsoft\Windo... 8338140 2022-12-23T15:28:13.674590Z
In [7]:
malware_procs_guid = malware_procs['Event.EventData.ProcessGuid']
malware_procs_guid
Out[7]:
729     EDA6DF62-B871-63A5-3401-000000002800
1017    EDA6DF62-C90D-63A5-A800-000000002900
Name: Event.EventData.ProcessGuid, dtype: object
In [8]:
df = search_index(es_client, 'source: (sysmon.json) AND (NOT Event.System.EventID: 1) AND Event.EventData.ProcessGuid: "' + 'EDA6DF62-B871-63A5-3401-000000002800' + '"', size=None).dropna(how='all', axis=1)
df
Out[8]:
_index _id sort Event.EventData.RuleName Event.EventData.UtcTime Event.System.Channel Event.System.Computer Event.System.EventID Event.System.EventRecordID Event.System.Execution.#attributes.ProcessID ... Event.EventData.FileVersion Event.EventData.OriginalFileName Event.EventData.Product Event.EventData.ImageLoaded Event.EventData.Signature Event.EventData.SignatureStatus Event.EventData.Signed source Event.EventData.CreationUtcTime Event.EventData.TargetFilename
0 host_events 4338891 [1671805041230] - 2022-12-23 14:17:21.219 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239364 3424 ... 10.0.22000.918 (WinBuild.160101.0800) ntdll.dll Microsoft® Windows® Operating System C:\Windows\System32\ntdll.dll Microsoft Windows Valid true 04_host_windows/sysmon.json NaN NaN
1 host_events 4338892 [1671805041244] - 2022-12-23 14:17:21.219 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239365 3424 ... 1.0.0.0 JHhH.exe JHhH C:\Users\User\Downloads\2ecbf5a27adc238af0b125... - Unavailable false 04_host_windows/sysmon.json NaN NaN
2 host_events 4338894 [1671805041261] - 2022-12-23 14:17:21.241 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239367 3424 ... 10.0.22000.1165 (WinBuild.160101.0800) ntdll.dll Microsoft® Windows® Operating System C:\Windows\SysWOW64\ntdll.dll Microsoft Windows Valid true 04_host_windows/sysmon.json NaN NaN
3 host_events 4338895 [1671805041272] - 2022-12-23 14:17:21.267 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239368 3424 ... 10.0.22000.71 (WinBuild.160101.0800) wow64.dll Microsoft® Windows® Operating System C:\Windows\System32\wow64.dll Microsoft Windows Valid true 04_host_windows/sysmon.json NaN NaN
4 host_events 4338896 [1671805041273] - 2022-12-23 14:17:21.267 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239369 3424 ... - - - C:\Windows\System32\wow64base.dll Microsoft Windows Valid true 04_host_windows/sysmon.json NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
62 host_events 4339103 [1671805092365] - 2022-12-23 14:18:12.204 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239576 3424 ... 10.0.22000.282 (WinBuild.160101.0800) rsaenh.dll Microsoft® Windows® Operating System C:\Windows\SysWOW64\rsaenh.dll Microsoft Windows Valid true 04_host_windows/sysmon.json NaN NaN
63 host_events 4339106 [1671805092377] - 2022-12-23 14:18:12.204 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239579 3424 ... 10.0.22000.1 (WinBuild.160101.0800) cryptbase.dll Microsoft® Windows® Operating System C:\Windows\SysWOW64\cryptbase.dll Microsoft Windows Valid true 04_host_windows/sysmon.json NaN NaN
64 host_events 4339107 [1671805092389] - 2022-12-23 14:18:12.363 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 7 8239580 3424 ... 10.0.22000.1 (WinBuild.160101.0800) PSAPI Microsoft® Windows® Operating System C:\Windows\SysWOW64\psapi.dll Microsoft Windows Valid true 04_host_windows/sysmon.json NaN NaN
65 host_events 4339126 [1671805092558] - 2022-12-23 14:18:12.553 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 11 8239599 3424 ... NaN NaN NaN NaN NaN NaN NaN 04_host_windows/sysmon.json 2022-12-23 14:18:12.499 C:\Users\User\AppData\Local\Microsoft\CLR_v4.0...
66 host_events 4339133 [1671805092617] - 2022-12-23 14:18:12.607 Microsoft-Windows-Sysmon/Operational WinDev2211Eval 5 8239606 3424 ... NaN NaN NaN NaN NaN NaN NaN 04_host_windows/sysmon.json NaN NaN

67 rows × 37 columns

In [9]:
df.columns
Out[9]:
Index(['_index', '_id', 'sort', 'Event.EventData.RuleName',
       'Event.EventData.UtcTime', 'Event.System.Channel',
       'Event.System.Computer', 'Event.System.EventID',
       'Event.System.EventRecordID',
       'Event.System.Execution.#attributes.ProcessID',
       'Event.System.Execution.#attributes.ThreadID', 'Event.System.Keywords',
       'Event.System.Level', 'Event.System.Opcode',
       'Event.System.Provider.#attributes.Guid',
       'Event.System.Provider.#attributes.Name',
       'Event.System.Security.#attributes.UserID', 'Event.System.Task',
       'Event.System.TimeCreated.#attributes.SystemTime',
       'Event.System.Version', 'Event.EventData.Image',
       'Event.EventData.ProcessGuid', 'Event.EventData.ProcessId',
       'Event.EventData.User', 'Event.EventData.Hashes',
       'Event.EventData.Company', 'Event.EventData.Description',
       'Event.EventData.FileVersion', 'Event.EventData.OriginalFileName',
       'Event.EventData.Product', 'Event.EventData.ImageLoaded',
       'Event.EventData.Signature', 'Event.EventData.SignatureStatus',
       'Event.EventData.Signed', 'source', 'Event.EventData.CreationUtcTime',
       'Event.EventData.TargetFilename'],
      dtype='object')
In [10]:
def build_event_string_color(row):
    # initialize default color and style
    color = 'green'
    style = None
    
    # Determine type of event
    event_id = row['Event.System.EventID']
    global seen_event_ids
    seen_event_ids = seen_event_ids.union([event_id])
    event_record_id = row['Event.System.EventRecordID']
    event_string = '<<B><I>Event ID:</I> ' + str(event_record_id) + '</B><BR/><I>Event Type:</I> ' + str(event_id)
    if event_id == 3:
        event_string += ' - Network connection'
        source_ip = row['Event.EventData.SourceIp']
        source_port = row['Event.EventData.SourcePort']
        destination_ip = row['Event.EventData.DestinationIp']
        destination_port = row['Event.EventData.DestinationPort']
        event_string += '<BR/><I>Source IP:</I> ' + str(source_ip)
        event_string += '<BR/><I>Source Port:</I> ' + str(int(source_port))
        event_string += '<BR/><I>Destination IP:</I> ' + str(destination_ip)
        event_string += '<BR/><I>Destination Port:</I> ' + str(int(destination_port))
        color = 'cyan3'
    if event_id == 5:
        event_string += ' - Process terminated'
        color = 'gray39'
        style = 'filled'
    if event_id == 7:
        event_string += ' - Image loaded'
        color = 'gold'
    if event_id == 11:
        event_string += ' - FileCreate'
        target_file_name = row['Event.EventData.TargetFilename']
        event_string += '<BR/><I>Target Filename:</I> ' + str(target_file_name)
        color = 'hotpink'
    if event_id == 12:
        event_string += ' - RegistryEvent (Object create and delete)'
        event_type = row['Event.EventData.EventType']
        target_object = row['Event.EventData.TargetObject']
        event_string += '<BR/><I>Event Type:</I> ' + str(event_type)
        event_string += '<BR/><I>Target Object:</I> ' + str(target_object)
        color = 'orange'
    if event_id == 17:
        event_string += ' - PipeEvent (Pipe Created)'
        event_type = row['Event.EventData.EventType']
        event_string += '<BR/><I>Event Type:</I> ' + str(event_type)
        pipe_name = row['Event.EventData.PipeName']
        event_string += '<BR/><I>Pipe Name:</I> ' + str(pipe_name)
        color = 'rebeccapurple'
    if event_id == 23:
        event_string += ' - FileDelete (File Delete archived)'
        target_file_name = row['Event.EventData.TargetFilename']
        is_executable = row['Event.EventData.IsExecutable']
        event_string += '<BR/><I>Target Filename:</I> ' + str(target_file_name)
        event_string += '<BR/><I>Is Executable:</I> ' + str(is_executable)
        color = 'rosybrown'
    image = row['Event.EventData.Image']
    event_string += '<BR/><I>Loaded Image:</I> ' + str(image)
    time = row['Event.System.TimeCreated.#attributes.SystemTime']
    event_string += '<BR/><I>Time:</I> ' + str(time)
    return event_string + '>', color, style
In [11]:
def child_process_tree(dot, process_guid, process_image, skip_common_events=False):
    row = process_info[process_info['Event.EventData.ProcessGuid'] == process_guid].iloc[0]
    event_record_id = row['Event.System.EventRecordID']
    event_string = '<<B><I>Event ID:</I> ' + str(event_record_id) + '</B><BR/><I>Event Type:</I> 1'
    event_string += ' - Process creation'
    image = row['Event.EventData.Image']
    event_string += '<BR/><I>Loaded Image:</I> ' + str(image)
    time = row['Event.System.TimeCreated.#attributes.SystemTime']
    event_string += '<BR/><I>Time:</I> ' + str(time)
    event_string += '>'
    
    dot.node(process_guid, event_string, color='red', style='filled')
    
    # Find the child processes of this process
    child_procs = process_info[process_info['Event.EventData.ParentProcessGuid'] == process_guid]
    
    # Find events for this process
    proc_events = search_index(es_client, 'source: (sysmon.json) AND (NOT Event.System.EventID: 1) AND Event.EventData.ProcessGuid: "' + process_guid + '"', size=None).dropna(how='all', axis=1)
    
    # Generate subtree for child events
    for event_index, event_proc_row in proc_events.iterrows():
        if not (skip_common_events and (event_proc_row['Event.System.EventID'] in [7, 12])):
            event_record_id = event_proc_row['Event.System.EventRecordID']
            event_string, color, style = build_event_string_color(event_proc_row)

            dot.node(str(event_record_id), event_string, color=color, style=style) # list data for event here!
            # Add edges from higher node to lower one
            dot.edge(process_guid, str(event_record_id), color=color) # PERHAPS ADD COLOR HERE
    
    # Generate subtree for child process
    for child_index, child_proc_row in child_procs.iterrows():
        child_proc_guid = child_proc_row['Event.EventData.ProcessGuid']
        child_process_image = child_proc_row['Event.EventData.Image']
        child_process_tree(dot, child_proc_guid, child_process_image, skip_common_events)
        # Add edges from higher node to lower one
        dot.edge(process_guid, child_proc_guid, color='red')
In [12]:
# Ugly way to determine event IDs we have to process
global seen_event_ids
seen_event_ids = set()

graphs = []
for index, proc_row in malware_procs.iterrows():
    process_guid = proc_row['Event.EventData.ProcessGuid']
    process_image = proc_row['Event.EventData.Image']
    dot = graphviz.Digraph(comment=process_guid)
    child_process_tree(dot, process_guid, process_image)
    
    # Add generated graph to list of graphs
    graphs.append(dot)

for graph in graphs:
    graph.render(graph.comment)

seen_event_ids
Out[12]:
{3, 5, 7, 11, 12, 17, 23}
In [13]:
graphs_uncommon = []
for index, proc_row in malware_procs.iterrows():
    process_guid = proc_row['Event.EventData.ProcessGuid']
    process_image = proc_row['Event.EventData.Image']
    dot = graphviz.Digraph(comment=process_guid)
    child_process_tree(dot, process_guid, process_image, skip_common_events=True)
    
    # Add generated graph to list of graphs
    graphs_uncommon.append(dot)

for graph in graphs_uncommon:
    graph.render('uncommon_' + graph.comment)