In this notebook we will gather all the information needed to do anomaly detection on the lifetime of files
import sys
sys.path.append('/opt/2IMS40')
sys.path.append('../')
# Import files from current directory
from search_index import search_index
from es_client import es_client
import datetime
import pandas as pd
import elasticsearch
import elasticsearch.helpers
from IPython.display import display
from tqdm.auto import tqdm
es_client = es_client(
local=True # connects to 'https://localhost:9200' with invalid certificate
)
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.14) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
/usr/local/lib/python3.8/dist-packages/elasticsearch/_sync/client/__init__.py:394: SecurityWarning: Connecting to 'https://localhost:9200' using TLS with verify_certs=False is insecure
_transport = transport_class(
Gather all log entries from EventID: 11 and EventID: 23
# Find process creations
df = search_index(es_client, "source: (sysmon.json) AND (Event.System.EventID : 11 OR Event.System.EventID : 23)", size=None, debug=True).dropna(how='all', axis=1)
df.shape
(325704, 31)
Start and filter out only Delete Events to later gather all the Create and Update timestamps
file_create_delete = df[['Event.EventData.TargetFilename', 'Event.System.EventID', 'Event.EventData.CreationUtcTime', 'Event.EventData.UtcTime']]
file_times = file_create_delete[file_create_delete['Event.System.EventID'] == 23].reset_index()[['Event.EventData.TargetFilename', 'Event.EventData.UtcTime']]
file_times.rename({'Event.EventData.TargetFilename': 'TargetFilename', 'Event.EventData.UtcTime': 'DeletionTime'}, axis=1, inplace=True)
file_times['CreateTime'] = [0] * file_times.shape[0]
file_times['UpdateTime'] = [0] * file_times.shape[0]
file_times
| TargetFilename | DeletionTime | CreateTime | UpdateTime | |
|---|---|---|---|---|
| 0 | C:\Windows\ServiceProfiles\NetworkService\AppD... | 2022-12-09 09:51:40.809 | 0 | 0 |
| 1 | C:\ProgramData\regid.1991-06.com.microsoft\reg... | 2022-12-09 09:51:40.984 | 0 | 0 |
| 2 | C:\ProgramData\Microsoft\Diagnosis\DownloadedS... | 2022-12-09 09:51:41.265 | 0 | 0 |
| 3 | C:\ProgramData\Microsoft\Diagnosis\parse.dat | 2022-12-09 09:51:41.307 | 0 | 0 |
| 4 | C:\Windows\SoftwareDistribution\Download\1cd81... | 2022-12-09 09:51:41.497 | 0 | 0 |
| ... | ... | ... | ... | ... |
| 116311 | C:\Users\User\AppData\Roaming\Microsoft\Window... | 2022-12-23 18:39:03.490 | 0 | 0 |
| 116312 | C:\Windows\SERVIC~1\LOCALS~1\AppData\Local\Tem... | 2022-12-23 18:39:18.876 | 0 | 0 |
| 116313 | C:\Windows\SERVIC~1\LOCALS~1\AppData\Local\Tem... | 2022-12-23 18:39:18.981 | 0 | 0 |
| 116314 | C:\Windows\SERVIC~1\LOCALS~1\AppData\Local\Tem... | 2022-12-23 18:39:18.988 | 0 | 0 |
| 116315 | C:\Users\User\AppData\Roaming\Microsoft\Window... | 2022-12-23 18:39:39.656 | 0 | 0 |
116316 rows × 4 columns
Gather all Create and Update timestamps matching with the Delete event
only_event_11 = file_create_delete[file_create_delete['Event.System.EventID'] == 11]
unique_filenames = file_times['TargetFilename'].unique()
for target_filename in tqdm(unique_filenames, total=len(unique_filenames)):
only_event_11_filename = only_event_11[(only_event_11['Event.EventData.TargetFilename'] == target_filename)]
for index, row in file_times[file_times['TargetFilename'] == target_filename].iterrows():
events = only_event_11_filename[only_event_11_filename['Event.EventData.UtcTime'] <= row['DeletionTime']]
if events.shape[0] > 0:
last_createfile_event = events.iloc[-1]
file_times.loc[index, 'CreateTime'] = last_createfile_event['Event.EventData.CreationUtcTime']
file_times.loc[index, 'UpdateTime'] = last_createfile_event['Event.EventData.UtcTime']
0%| | 0/101754 [00:00<?, ?it/s]
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) /tmp/ipykernel_139170/2781775442.py in <module> 4 for target_filename in tqdm(unique_filenames, total=len(unique_filenames)): 5 ----> 6 only_event_11_filename = only_event_11[(only_event_11['Event.EventData.TargetFilename'] == target_filename)] 7 8 for index, row in file_times[file_times['TargetFilename'] == target_filename].iterrows(): /usr/local/lib/python3.8/dist-packages/pandas/core/ops/common.py in new_method(self, other) 70 other = item_from_zerodim(other) 71 ---> 72 return method(self, other) 73 74 return new_method /usr/local/lib/python3.8/dist-packages/pandas/core/arraylike.py in __eq__(self, other) 40 @unpack_zerodim_and_defer("__eq__") 41 def __eq__(self, other): ---> 42 return self._cmp_method(other, operator.eq) 43 44 @unpack_zerodim_and_defer("__ne__") /usr/local/lib/python3.8/dist-packages/pandas/core/series.py in _cmp_method(self, other, op) 6241 6242 with np.errstate(all="ignore"): -> 6243 res_values = ops.comparison_op(lvalues, rvalues, op) 6244 6245 return self._construct_result(res_values, name=res_name) /usr/local/lib/python3.8/dist-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op) 285 286 elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str): --> 287 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) 288 289 else: /usr/local/lib/python3.8/dist-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y) 73 result = libops.vec_compare(x.ravel(), y.ravel(), op) 74 else: ---> 75 result = libops.scalar_compare(x.ravel(), y, op) 76 return result.reshape(x.shape) 77 KeyboardInterrupt:
Save dataframe to file_times.csv
file_times[file_times['UpdateTime'] != 0].to_csv('file_times.csv')