import datetime
import pandas as pd
import os
import json
import elasticsearch
import elasticsearch.helpers
import concurrent
import sys
sys.path.append('/opt/2IMS40')
sys.path.append('../')
# Import files from current directory
from search_index import search_index
from es_client import es_client
# next command ensures that plots appear inside the notebook
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns # also improves the look of plots
sns.set() # set Seaborn defaults
plt.rcParams['figure.figsize'] = 10, 5 # default hor./vert. size of plots, in inches
plt.rcParams['lines.markeredgewidth'] = 1 # to fix issue with seaborn box plots; needed after import seaborn
es_client = es_client(
#host='https://elasticsearch.smeets.ee:443', local=False
)
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.14) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
/usr/local/lib/python3.8/dist-packages/elasticsearch/_sync/client/__init__.py:394: SecurityWarning: Connecting to 'https://localhost:9200' using TLS with verify_certs=False is insecure
_transport = transport_class(
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_kddcup99
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.covariance import EllipticEnvelope
from collections import defaultdict
# For constructing and saving trees
import graphviz
# Find process creations
df = search_index(es_client, "source: (sysmon.json) AND Event.System.EventID: 1", size=None).dropna(how='all', axis=1)
df.shape
(11242, 43)
process_info = df[['Event.EventData.Image', 'Event.EventData.ParentImage', 'Event.EventData.ProcessGuid', 'Event.EventData.ParentProcessGuid', 'Event.EventData.CommandLine', 'Event.System.EventRecordID', 'Event.System.TimeCreated.#attributes.SystemTime']]
# pd.DataFrame(process_info['Event.EventData.ParentImage'].value_counts())
malware_procs = process_info[process_info['Event.EventData.Image'].str.contains('Systdeeem.exe') | process_info['Event.EventData.Image'].str.contains('2ecb')]
malware_procs
| Event.EventData.Image | Event.EventData.ParentImage | Event.EventData.ProcessGuid | Event.EventData.ParentProcessGuid | Event.EventData.CommandLine | Event.System.EventRecordID | Event.System.TimeCreated.#attributes.SystemTime | |
|---|---|---|---|---|---|---|---|
| 729 | C:\Users\User\Downloads\2ecbf5a27adc238af0b125... | C:\Windows\explorer.exe | EDA6DF62-B871-63A5-3401-000000002800 | EDA6DF62-B6AF-63A5-8A00-000000002800 | "C:\Users\User\Downloads\2ecbf5a27adc238af0b12... | 8239289 | 2022-12-23T14:17:21.173591Z |
| 1017 | C:\Users\User\AppData\Roaming\Microsoft\Window... | C:\Windows\explorer.exe | EDA6DF62-C90D-63A5-A800-000000002900 | EDA6DF62-C8F1-63A5-8800-000000002900 | "C:\Users\User\AppData\Roaming\Microsoft\Windo... | 8338140 | 2022-12-23T15:28:13.674590Z |
malware_procs_guid = malware_procs['Event.EventData.ProcessGuid']
malware_procs_guid
729 EDA6DF62-B871-63A5-3401-000000002800 1017 EDA6DF62-C90D-63A5-A800-000000002900 Name: Event.EventData.ProcessGuid, dtype: object
df = search_index(es_client, 'source: (sysmon.json) AND (NOT Event.System.EventID: 1) AND Event.EventData.ProcessGuid: "' + 'EDA6DF62-B871-63A5-3401-000000002800' + '"', size=None).dropna(how='all', axis=1)
df
| _index | _id | sort | Event.EventData.RuleName | Event.EventData.UtcTime | Event.System.Channel | Event.System.Computer | Event.System.EventID | Event.System.EventRecordID | Event.System.Execution.#attributes.ProcessID | ... | Event.EventData.FileVersion | Event.EventData.OriginalFileName | Event.EventData.Product | Event.EventData.ImageLoaded | Event.EventData.Signature | Event.EventData.SignatureStatus | Event.EventData.Signed | source | Event.EventData.CreationUtcTime | Event.EventData.TargetFilename | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | host_events | 4338891 | [1671805041230] | - | 2022-12-23 14:17:21.219 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239364 | 3424 | ... | 10.0.22000.918 (WinBuild.160101.0800) | ntdll.dll | Microsoft® Windows® Operating System | C:\Windows\System32\ntdll.dll | Microsoft Windows | Valid | true | 04_host_windows/sysmon.json | NaN | NaN |
| 1 | host_events | 4338892 | [1671805041244] | - | 2022-12-23 14:17:21.219 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239365 | 3424 | ... | 1.0.0.0 | JHhH.exe | JHhH | C:\Users\User\Downloads\2ecbf5a27adc238af0b125... | - | Unavailable | false | 04_host_windows/sysmon.json | NaN | NaN |
| 2 | host_events | 4338894 | [1671805041261] | - | 2022-12-23 14:17:21.241 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239367 | 3424 | ... | 10.0.22000.1165 (WinBuild.160101.0800) | ntdll.dll | Microsoft® Windows® Operating System | C:\Windows\SysWOW64\ntdll.dll | Microsoft Windows | Valid | true | 04_host_windows/sysmon.json | NaN | NaN |
| 3 | host_events | 4338895 | [1671805041272] | - | 2022-12-23 14:17:21.267 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239368 | 3424 | ... | 10.0.22000.71 (WinBuild.160101.0800) | wow64.dll | Microsoft® Windows® Operating System | C:\Windows\System32\wow64.dll | Microsoft Windows | Valid | true | 04_host_windows/sysmon.json | NaN | NaN |
| 4 | host_events | 4338896 | [1671805041273] | - | 2022-12-23 14:17:21.267 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239369 | 3424 | ... | - | - | - | C:\Windows\System32\wow64base.dll | Microsoft Windows | Valid | true | 04_host_windows/sysmon.json | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 62 | host_events | 4339103 | [1671805092365] | - | 2022-12-23 14:18:12.204 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239576 | 3424 | ... | 10.0.22000.282 (WinBuild.160101.0800) | rsaenh.dll | Microsoft® Windows® Operating System | C:\Windows\SysWOW64\rsaenh.dll | Microsoft Windows | Valid | true | 04_host_windows/sysmon.json | NaN | NaN |
| 63 | host_events | 4339106 | [1671805092377] | - | 2022-12-23 14:18:12.204 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239579 | 3424 | ... | 10.0.22000.1 (WinBuild.160101.0800) | cryptbase.dll | Microsoft® Windows® Operating System | C:\Windows\SysWOW64\cryptbase.dll | Microsoft Windows | Valid | true | 04_host_windows/sysmon.json | NaN | NaN |
| 64 | host_events | 4339107 | [1671805092389] | - | 2022-12-23 14:18:12.363 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 7 | 8239580 | 3424 | ... | 10.0.22000.1 (WinBuild.160101.0800) | PSAPI | Microsoft® Windows® Operating System | C:\Windows\SysWOW64\psapi.dll | Microsoft Windows | Valid | true | 04_host_windows/sysmon.json | NaN | NaN |
| 65 | host_events | 4339126 | [1671805092558] | - | 2022-12-23 14:18:12.553 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 11 | 8239599 | 3424 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 04_host_windows/sysmon.json | 2022-12-23 14:18:12.499 | C:\Users\User\AppData\Local\Microsoft\CLR_v4.0... |
| 66 | host_events | 4339133 | [1671805092617] | - | 2022-12-23 14:18:12.607 | Microsoft-Windows-Sysmon/Operational | WinDev2211Eval | 5 | 8239606 | 3424 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 04_host_windows/sysmon.json | NaN | NaN |
67 rows × 37 columns
df.columns
Index(['_index', '_id', 'sort', 'Event.EventData.RuleName',
'Event.EventData.UtcTime', 'Event.System.Channel',
'Event.System.Computer', 'Event.System.EventID',
'Event.System.EventRecordID',
'Event.System.Execution.#attributes.ProcessID',
'Event.System.Execution.#attributes.ThreadID', 'Event.System.Keywords',
'Event.System.Level', 'Event.System.Opcode',
'Event.System.Provider.#attributes.Guid',
'Event.System.Provider.#attributes.Name',
'Event.System.Security.#attributes.UserID', 'Event.System.Task',
'Event.System.TimeCreated.#attributes.SystemTime',
'Event.System.Version', 'Event.EventData.Image',
'Event.EventData.ProcessGuid', 'Event.EventData.ProcessId',
'Event.EventData.User', 'Event.EventData.Hashes',
'Event.EventData.Company', 'Event.EventData.Description',
'Event.EventData.FileVersion', 'Event.EventData.OriginalFileName',
'Event.EventData.Product', 'Event.EventData.ImageLoaded',
'Event.EventData.Signature', 'Event.EventData.SignatureStatus',
'Event.EventData.Signed', 'source', 'Event.EventData.CreationUtcTime',
'Event.EventData.TargetFilename'],
dtype='object')
def build_event_string_color(row):
# initialize default color and style
color = 'green'
style = None
# Determine type of event
event_id = row['Event.System.EventID']
global seen_event_ids
seen_event_ids = seen_event_ids.union([event_id])
event_record_id = row['Event.System.EventRecordID']
event_string = '<<B><I>Event ID:</I> ' + str(event_record_id) + '</B><BR/><I>Event Type:</I> ' + str(event_id)
if event_id == 3:
event_string += ' - Network connection'
source_ip = row['Event.EventData.SourceIp']
source_port = row['Event.EventData.SourcePort']
destination_ip = row['Event.EventData.DestinationIp']
destination_port = row['Event.EventData.DestinationPort']
event_string += '<BR/><I>Source IP:</I> ' + str(source_ip)
event_string += '<BR/><I>Source Port:</I> ' + str(int(source_port))
event_string += '<BR/><I>Destination IP:</I> ' + str(destination_ip)
event_string += '<BR/><I>Destination Port:</I> ' + str(int(destination_port))
color = 'cyan3'
if event_id == 5:
event_string += ' - Process terminated'
color = 'gray39'
style = 'filled'
if event_id == 7:
event_string += ' - Image loaded'
color = 'gold'
if event_id == 11:
event_string += ' - FileCreate'
target_file_name = row['Event.EventData.TargetFilename']
event_string += '<BR/><I>Target Filename:</I> ' + str(target_file_name)
color = 'hotpink'
if event_id == 12:
event_string += ' - RegistryEvent (Object create and delete)'
event_type = row['Event.EventData.EventType']
target_object = row['Event.EventData.TargetObject']
event_string += '<BR/><I>Event Type:</I> ' + str(event_type)
event_string += '<BR/><I>Target Object:</I> ' + str(target_object)
color = 'orange'
if event_id == 17:
event_string += ' - PipeEvent (Pipe Created)'
event_type = row['Event.EventData.EventType']
event_string += '<BR/><I>Event Type:</I> ' + str(event_type)
pipe_name = row['Event.EventData.PipeName']
event_string += '<BR/><I>Pipe Name:</I> ' + str(pipe_name)
color = 'rebeccapurple'
if event_id == 23:
event_string += ' - FileDelete (File Delete archived)'
target_file_name = row['Event.EventData.TargetFilename']
is_executable = row['Event.EventData.IsExecutable']
event_string += '<BR/><I>Target Filename:</I> ' + str(target_file_name)
event_string += '<BR/><I>Is Executable:</I> ' + str(is_executable)
color = 'rosybrown'
image = row['Event.EventData.Image']
event_string += '<BR/><I>Loaded Image:</I> ' + str(image)
time = row['Event.System.TimeCreated.#attributes.SystemTime']
event_string += '<BR/><I>Time:</I> ' + str(time)
return event_string + '>', color, style
def child_process_tree(dot, process_guid, process_image, skip_common_events=False):
row = process_info[process_info['Event.EventData.ProcessGuid'] == process_guid].iloc[0]
event_record_id = row['Event.System.EventRecordID']
event_string = '<<B><I>Event ID:</I> ' + str(event_record_id) + '</B><BR/><I>Event Type:</I> 1'
event_string += ' - Process creation'
image = row['Event.EventData.Image']
event_string += '<BR/><I>Loaded Image:</I> ' + str(image)
time = row['Event.System.TimeCreated.#attributes.SystemTime']
event_string += '<BR/><I>Time:</I> ' + str(time)
event_string += '>'
dot.node(process_guid, event_string, color='red', style='filled')
# Find the child processes of this process
child_procs = process_info[process_info['Event.EventData.ParentProcessGuid'] == process_guid]
# Find events for this process
proc_events = search_index(es_client, 'source: (sysmon.json) AND (NOT Event.System.EventID: 1) AND Event.EventData.ProcessGuid: "' + process_guid + '"', size=None).dropna(how='all', axis=1)
# Generate subtree for child events
for event_index, event_proc_row in proc_events.iterrows():
if not (skip_common_events and (event_proc_row['Event.System.EventID'] in [7, 12])):
event_record_id = event_proc_row['Event.System.EventRecordID']
event_string, color, style = build_event_string_color(event_proc_row)
dot.node(str(event_record_id), event_string, color=color, style=style) # list data for event here!
# Add edges from higher node to lower one
dot.edge(process_guid, str(event_record_id), color=color) # PERHAPS ADD COLOR HERE
# Generate subtree for child process
for child_index, child_proc_row in child_procs.iterrows():
child_proc_guid = child_proc_row['Event.EventData.ProcessGuid']
child_process_image = child_proc_row['Event.EventData.Image']
child_process_tree(dot, child_proc_guid, child_process_image, skip_common_events)
# Add edges from higher node to lower one
dot.edge(process_guid, child_proc_guid, color='red')
# Ugly way to determine event IDs we have to process
global seen_event_ids
seen_event_ids = set()
graphs = []
for index, proc_row in malware_procs.iterrows():
process_guid = proc_row['Event.EventData.ProcessGuid']
process_image = proc_row['Event.EventData.Image']
dot = graphviz.Digraph(comment=process_guid)
child_process_tree(dot, process_guid, process_image)
# Add generated graph to list of graphs
graphs.append(dot)
for graph in graphs:
graph.render(graph.comment)
seen_event_ids
{3, 5, 7, 11, 12, 17, 23}
graphs_uncommon = []
for index, proc_row in malware_procs.iterrows():
process_guid = proc_row['Event.EventData.ProcessGuid']
process_image = proc_row['Event.EventData.Image']
dot = graphviz.Digraph(comment=process_guid)
child_process_tree(dot, process_guid, process_image, skip_common_events=True)
# Add generated graph to list of graphs
graphs_uncommon.append(dot)
for graph in graphs_uncommon:
graph.render('uncommon_' + graph.comment)