Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
qcow/*
db/*
iso/*
*.pyc
7 changes: 4 additions & 3 deletions conf/malrec.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
[Main]
basedir = /home/brendan/malrec
panda = /home/brendan/git/panda/qemu
db = /home/brendan/malrec/db/panda.db
basedir = /home/users/grimm/panda-malrec
panda = /home/users/grimm/panda1/qemu
db = /home/users/grimm/panda-malrec/db/panda.db
volatility = /home/users/grimm/volatility/vol.py

[VM]
mem = 1G
Expand Down
2 changes: 2 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
ls queue/pending/$2/ | parallel -j 40 python scripts/runmal.py conf/malrec.config {/} {%} $1 ; sleep 600
83 changes: 83 additions & 0 deletions scripts/dumpmem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
import tarfile
import subprocess
import errno
import re
import time
from Queue import Queue, Empty
from threading import Thread
import signal

def dumpmem(panda_path, mem, sample_name, dumppath):
mkdir_p(dumppath)
path = os.path.join(dumppath, "memdump")
panda_args = [panda_path,
'-m', mem,
'-replay', sample_name,
'-panda', "n_instruct_mem:file={}".format(path)
]
tarname = "{}.tar.gz".format(dumppath)

rr_path = "/".join(dumppath.split("/")[0:-1])

proc = subprocess.Popen(panda_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=rr_path)

stdout_q = Queue()
stderr_q = Queue()
out_t = Thread(target=enqueue_output, args=(proc.stdout, stdout_q))
err_t = Thread(target=enqueue_output, args=(proc.stderr, stderr_q))
out_t.daemon = True
err_t.daemon = True
out_t.start()
err_t.start()

finished = False
success = False
count = 0

while not finished:
stdout = get_nowait_q(stdout_q)
stderr = get_nowait_q(stderr_q)

print(stdout)

if stdout and re.search("Replay completed successfully", stdout):
print("Success case made")
finished = True
success = True
os.kill(proc.pid, signal.SIGKILL)
else:
time.sleep(2)

out_t.join()
err_t.join()

if success:
with tarfile.open(tarname, "w:gz") as tar:
print("Tart")
tar.add(dumppath, arcname=sample_name)
print("Tarted")
else:
return (False, "Failed to create memdumps for {}".format(sample_name), None)

return (True, "Success for {}".format(sample_name), tarname)

def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise

def enqueue_output(out, queue):
for line in iter(out.readline, b''):
queue.put(line)
out.close()

def get_nowait_q(q):
try:
return q.get_nowait()
except Empty:
pass
60 changes: 47 additions & 13 deletions scripts/runmal.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
import hashlib
import tempfile
import atexit
from dumpmem import dumpmem
from mon_util import mon_cmd, guest_type
import click_buttons

global sample_name, sample_file

def md5_for_file(fname, block_size=2**20):
f = open(fname, 'rb')
md5 = hashlib.md5()
Expand Down Expand Up @@ -52,7 +55,7 @@ def cleanup():
c = conn.cursor()
while True:
try:
c.execute('INSERT INTO samples VALUES(?,?,?)', (run_id, sample_name, sample_md5))
c.execute('INSERT INTO samples VALUES(?,?,?)', (sample_name, sample_name, sample_md5))
break
except sqlite3.OperationalError:
pass
Expand All @@ -61,7 +64,7 @@ def cleanup():
conn.close()

# All done, write the stamp
stampfile = os.path.join(logdir, 'stamps', run_id)
stampfile = os.path.join(logdir, 'stamps', sample_name)

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the idea behind these changes? IIRC, run_id was intended to be a UUID, whereas sample_name was the filename of the sample. I had been trying to use run_id everywhere because we may want to run the same sample more than once. (But I may be misremembering the purpose of these variables)

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right for the purpose of dumping the memory and finding the process in the list of dumped vads later on having the name was pretty important. One solution here would be to add a cli argument for naming scheme type so that either are available. What do you think?

@moyix moyix Jun 3, 2019

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, right now the UUID to filename map is put into an sqlite database so it can be retrieved later for things like matching the VADs. Would that work here?

open(stampfile, 'w').close()

atexit.register(cleanup)
Expand All @@ -71,7 +74,14 @@ def cleanup():

sample_name = sys.argv[2]
instance = int(sys.argv[3])
run_id = str(uuid.uuid4())
malicious = bool(int(sys.argv[4]))
run_subpath = None

if malicious:
run_subpath = os.path.join("malicious", sample_name)
else:
run_subpath = os.path.join("benign", sample_name)


# Setup from config
monitor_port = 1234 + instance
Expand All @@ -81,8 +91,8 @@ def cleanup():
queuedir = os.path.join(basedir, 'queue')
logdir = os.path.join(basedir, 'logs')
rr_logdir = os.path.join(logdir, 'rr')
rr_logname = os.path.join(rr_logdir, run_id)
pcap_name = os.path.join(logdir, 'pcap', run_id + '.pcap')
rr_logname = os.path.join(rr_logdir, run_subpath)
pcap_name = os.path.join(logdir, 'pcap', sample_name+ '.pcap')
logfile = os.path.join(logdir, 'text', time.strftime('%Y%m%d.%H.%M.%S.{0}.log').format(instance))
database = conf.get('Main', 'db')

Expand All @@ -91,14 +101,20 @@ def cleanup():

# Startup msgs
logging.info("Config file: {0}".format(sys.argv[1]))
logging.info("UUID: {0}".format(run_id))
logging.info("Sample: {0}".format(sample_name))

# Claim ownership of this file
logging.info("Moving sample into 'running' queue.")
sample_file = os.path.join(queuedir, 'running', sample_name)
pending_path = None

if malicious:
pending_path = "pending/malicious"
else:
pending_path = "pending/benign"

shutil.move(
os.path.join(queuedir, 'pending', sample_name),
os.path.join(queuedir, pending_path, sample_name),
sample_file
)

Expand All @@ -107,7 +123,7 @@ def cleanup():
logging.info("MD5: {0}".format(sample_md5))

# Make the CD image
iso_file = os.path.join(basedir, 'iso', run_id + '.iso')
iso_file = os.path.join(basedir, 'iso', sample_name + '.iso')
logging.info("Creating CD image {0}".format(iso_file))
genisoimage = ['/usr/bin/genisoimage', '-iso-level', '4', '-l', '-R', '-J', '-o', iso_file, sample_file]
logging.info(str(genisoimage))
Expand Down Expand Up @@ -149,8 +165,8 @@ def cleanup():
]

# Start the QEMU process
panda_stdout = open(os.path.join(logdir, 'text', run_id + '.stdout'), 'w')
panda_stderr = open(os.path.join(logdir, 'text', run_id + '.stderr'), 'w')
panda_stdout = open(os.path.join(logdir, 'text', sample_name + '.stdout'), 'w')
panda_stderr = open(os.path.join(logdir, 'text', sample_name + '.stderr'), 'w')
panda = subprocess.Popen(panda_args, stdin=subprocess.PIPE, stdout=panda_stdout, stderr=panda_stderr)

# Connect to the monitor
Expand Down Expand Up @@ -194,12 +210,11 @@ def cleanup():

# Create our memory access socket
qemu_socket = tempfile.mktemp()
print("Qemu socket")
logging.info("Creating memory access socket: {0}".format(qemu_socket))
mon_cmd("pmemaccess {0}\n".format(qemu_socket), mon)

# Warm up the Volatility part
click_buttons.setup("Win7SP1x64" if is_64bit else "Win7SP1x86", "qemu://" + qemu_socket)

# Run the sample
logging.info("Starting sample.")
# Handle 3 cases: driver, exe, dll
Expand All @@ -220,11 +235,30 @@ def cleanup():
period = 10
for _ in range(exec_time / period):
time.sleep(period)
click_buttons.click_buttons(mon)

# End the record
logging.info("Ending record.")
mon_cmd("end_record\n", mon)
logging.info("Quitting PANDA.")
mon.write("q\n")

success, message, filepath = dumpmem(panda_exe, conf.get('VM', 'mem'), sample_name, rr_logname+"_memdump")

if success:
logging.info(message)

try:
shutil.rmtree(rr_logname+"_memdump")
os.remove(rr_logname+"-rr.cmd")
os.remove(rr_logname+"-rr-nondet.log")
os.remove(rr_logname+"-rr-snp")
except e:
logging.error(e)
raise e

logging.info("Success. Exiting.")
sys.exit(0)
else:
logging.error(message)

raise
132 changes: 132 additions & 0 deletions scripts/targetvaddump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import os
import sys
import tarfile
import glob
import ConfigParser
import subprocess
import time
import errno
from collections import Counter
from pprint import pprint
from functools import partial
from multiprocessing import Pool

def find_possible_targets(folder_name):
proc_counts = Counter()

for memfile in sorted(glob.glob(folder_name + "/*"), key=lambda name: name.split("_")[-1]):
stdout = vol_pslist(memfile)
count = len(stdout.split("\n")[2:])
proc_key = memfile.split("_")[-1]
proc_counts[proc_key] = count

count_by_dump = proc_counts.most_common()

_max = count_by_dump[0]
_min = count_by_dump[-1]

if _max[1] == _min[1]:
raise ValueError("It looks like the EXE was not run in the recording.")

potential_targets = None

max_index = int(_max[0].split("_")[-1])
min_index = int(_max[0].split("_")[-1])

if max_index > len(count_by_dump) // 2:
potential_targets = test_for_targets(_min[1], range(max_index, len(count_by_dump)), proc_counts)
else:
potential_targets = test_for_targets(_min[1], range(min_index, (len(count_by_dump) // 2) + 1 ), proc_counts)

return (_min, potential_targets)

def test_for_targets(baseline, _range, counts):
passing = []

if len(counts.keys()) == 1:
return [0]

for index in _range:
if int(counts[str(index)]) - baseline > 0:
passing.append(index)

return passing

def dump_vads(folder_name, _type, os_procs, dump_index):
vad_path = "/tmp/vads/{}".format(folder_name)

mkdir_p(vad_path)

filename = "/tmp/{0}/{1}/memdump_{2}".format(_type, folder_name, dump_index)

vol_args = ["python",
conf.get("Main", 'volatility'),
'-f', filename,
'--profile', "Win7SP0x86",
'vaddump', '-D', vad_path
]

subprocess.check_output(vol_args, stderr=subprocess.PIPE)

for proc_name in os_procs:
for f in glob.glob(vad_path + "/*{}*".format(proc_name)):
os.remove(f)

def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise

def proc_list_from_dump_index(folder_name, _type, dump_index):
memfile = "/tmp/{0}/{1}/memdump_{2}".format(_type, folder_name, dump_index)
stdout = filter(None, vol_pslist(memfile).split("\n"))
proc_names = map(lambda ln: ln.split(" ")[1], stdout[2:])

return proc_names

def vol_pslist(memfile):
vol_args = ["python",
conf.get("Main", 'volatility'),
'-f', memfile,
'--profile', "Win7SP0x86",
'pslist',
]

return subprocess.check_output(vol_args, stderr=subprocess.PIPE)

def execute(_type, file):
with tarfile.open(file) as memdumps:
try:
memdumps.extractall(path="/tmp/{}/".format(_type))
except:
print("Incomplete targz")

#not the clearest but basically grabbing the untarred folder name from the tarred folder name
folder_name = "_".join((".".join(file.split("/")[-1].split(".")[0:-2])).split("_")[0:-1])
baseline, potential_targets = find_possible_targets("/tmp/{0}/{1}".format(_type, folder_name))
os_procs = proc_list_from_dump_index(folder_name, _type, baseline[0])

map(partial(dump_vads, folder_name, _type, os_procs), potential_targets)

def main():
folder = sys.argv[1]
_type = "benign"

global conf

conf = ConfigParser.ConfigParser()
conf.read("conf/malrec.config")

if bool(int(sys.argv[2])):
_type = "malicious"

p = Pool(10)
files = glob.glob(folder + "/*")
p.map(partial(execute, _type), files)

if __name__ == "__main__":
main()