In [1]:
import datetime
import pandas as pd
import os
import json

import elasticsearch
import elasticsearch.helpers

import concurrent

import sys
sys.path.append('/opt/2IMS40')
sys.path.append('../')

# Import files from current directory
from search_index import search_index
from es_client import es_client

# next command ensures that plots appear inside the notebook
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns  # also improves the look of plots
sns.set()  # set Seaborn defaults
plt.rcParams['figure.figsize'] = 10, 5  # default hor./vert. size of plots, in inches
plt.rcParams['lines.markeredgewidth'] = 1  # to fix issue with seaborn box plots; needed after import seaborn

es_client = es_client(host='https://elasticsearch.smeets.ee:443', local=False)
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.14) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_kddcup99
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.covariance import EllipticEnvelope
from collections import defaultdict
In [3]:
# For constructing and saving trees
import graphviz
In [4]:
# Find process creations
df = search_index(es_client, "source: (sysmon.json) AND Event.System.EventID: 1", size=None).dropna(how='all', axis=1)

df.shape
Out[4]:
(11242, 43)
In [5]:
process_info = df[['Event.EventData.Image', 'Event.EventData.ParentImage', 'Event.EventData.ProcessGuid', 'Event.EventData.ParentProcessGuid', 'Event.EventData.CommandLine']]
# pd.DataFrame(process_info['Event.EventData.ParentImage'].value_counts())
In [6]:
system_spawned_procs = process_info[process_info['Event.EventData.Image'].str.contains('Systdeeem.exe') | process_info['Event.EventData.Image'].str.contains('2ecb')]
system_spawned_procs
Out[6]:
Event.EventData.Image Event.EventData.ParentImage Event.EventData.ProcessGuid Event.EventData.ParentProcessGuid Event.EventData.CommandLine
729 C:\Users\User\Downloads\2ecbf5a27adc238af0b125... C:\Windows\explorer.exe EDA6DF62-B871-63A5-3401-000000002800 EDA6DF62-B6AF-63A5-8A00-000000002800 "C:\Users\User\Downloads\2ecbf5a27adc238af0b12...
1017 C:\Users\User\AppData\Roaming\Microsoft\Window... C:\Windows\explorer.exe EDA6DF62-C90D-63A5-A800-000000002900 EDA6DF62-C8F1-63A5-8800-000000002900 "C:\Users\User\AppData\Roaming\Microsoft\Windo...
In [7]:
def child_process_tree(dot, child_process_guid, child_process_image):
    dot.node(child_process_guid, graphviz.escape(child_process_image))
    
    # Find the child processes of this process
    child_procs = process_info[process_info['Event.EventData.ParentProcessGuid'] == child_process_guid]
    
    # Generate subtree for child process
    for child_index, child_proc_row in child_procs.iterrows():
        child_proc_guid = child_proc_row['Event.EventData.ProcessGuid']
        child_process_image = child_proc_row['Event.EventData.Image']
        child_process_tree(dot, child_proc_guid, child_process_image)
        # Add edges from higher node to lower one
        dot.edge(child_process_guid, child_proc_guid)
In [8]:
# sys.setrecursionlimit(10000)
graphs = []
for index, proc_row in system_spawned_procs.iterrows():
    process_guid = proc_row['Event.EventData.ProcessGuid']
    process_image = proc_row['Event.EventData.Image']
    dot = graphviz.Digraph(comment=process_guid)
    child_process_tree(dot, process_guid, process_image)
    
    # Add generated graph to list of graphs
    graphs.append(dot)
In [9]:
for graph in graphs:
    graph.render(graph.comment)