In [7]:
import datetime
import pandas as pd
import os
import json
import matplotlib as plt
import re
import numpy as np

import elasticsearch
import elasticsearch.helpers

import concurrent

import sys
sys.path.append('/opt/2IMS40')

# Import files from current directory
from search_index import search_index
from es_client import es_client

# next command ensures that plots appear inside the notebook
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns  # also improves the look of plots
sns.set()  # set Seaborn defaults
plt.rcParams['figure.figsize'] = 10, 5  # default hor./vert. size of plots, in inches
plt.rcParams['lines.markeredgewidth'] = 1  # to fix issue with seaborn box plots; needed after import seaborn

es_client = es_client(local=True)
/usr/local/lib/python3.8/dist-packages/elasticsearch/_sync/client/__init__.py:394: SecurityWarning: Connecting to 'https://localhost:9200' using TLS with verify_certs=False is insecure
  _transport = transport_class(
In [103]:
# Find process creations
df = search_index(es_client, "_exists_:Event.EventData.Image AND programs AND startup AND source:sysmon.json").dropna(how="all", axis=1)
query_images = df["Event.EventData.Image"].str.lower().unique()

print("All the unique images that have logs related to programs/startup folder: \n")
print(query_images)
All the unique images that have logs related to programs/startup folder: 

['c:\\windows\\system32\\wuauclt.exe'
 'c:\\windows\\syswow64\\windowspowershell\\v1.0\\powershell.exe'
 'c:\\windows\\system32\\conhost.exe'
 'c:\\users\\user\\appdata\\roaming\\microsoft\\windows\\start menu\\programs\\startup\\systdeeem.exe'
 'c:\\windows\\microsoft.net\\framework\\v4.0.30319\\regsvcs.exe']
In [116]:
# Gather a list of all unique images that are in the log files

# DO NOT UNCOMMENT AND RUN THIS QUERRY AS IT WILL CRASH THE SERVER IF HANDELED IMPROPPERLY 
# df = search_index(es_client, "_exists_:Event.EventData.Image AND source:sysmon.json").dropna(axis=1)
# all_unique_images = df["Event.EventData.Image"].str.lower().unique()

# Define malware related images
malware_images = set([
    'C:\\Users\\User\\Downloads\\2ecbf5a27adc238af0b125b985ae2a8b1bc14526faea3c9e40e6c3437245d830.exe'.lower(),
    'C:\\Users\\User\\AppData\\Roaming\\Microsoft\\Windows\\Start Menu\\Programs\\Startup\\Systdeeem.exe'.lower(),
    'C:\\Windows\\SysWOW64\\WindowsPowerShell\\v1.0\\powershell.exe'.lower(),
    'C:\\Windows\\System32\\conhost.exe'.lower(),
    'C:\\Windows\\Microsoft.NET\\Framework\\v4.0.30319\\RegSvcs.exe'.lower()
])
In [118]:
true_positives = len(malware_images.intersection(set(query_images)))
print("true_positives:", true_positives)
false_positives = len(set(query_images).difference(malware_images))
print("false_positives:", false_positives)
false_negatives = len(malware_images.difference(set(query_images)))
print("false_negatives:", false_negatives)
true_negatives = len(all_unique_images) - true_positives - false_positives - false_negatives #250
print("true_negatives:", true_negatives)
true_positives: 4
false_positives: 1
false_negatives: 1
true_negatives: 250
In [119]:
accuracy = (true_positives + true_negatives) / (true_positives + false_positives + true_negatives + false_negatives)
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
FPR = false_positives / (false_positives + true_negatives) # false positive rate
TNR = true_negatives / (false_positives + true_negatives)
F1_score = 2 * precision * recall / (precision + recall)

print("Accuracy            = " + "{0:.3f}".format(accuracy))
print("Precision           = " + "{0:.3f}".format(precision))
print("Recall              = " + "{0:.3f}".format(recall))
print("False Positive Rate = " + "{0:.3f}".format(FPR))
print("True  Negative Rate = " + "{0:.3f}".format(TNR))
print("F1-score            = " + "{0:.3f}".format(F1_score))
Accuracy            = 0.992
Precision           = 0.800
Recall              = 0.800
False Positive Rate = 0.004
True  Negative Rate = 0.996
F1-score            = 0.800