1   
  2   
  3   
  4   
  5   
  6  import datetime 
  7  import hashlib 
  8  import logging 
  9  import os.path 
 10  import struct 
 11   
 12  try: 
 13      import bson 
 14      HAVE_BSON = True 
 15  except ImportError: 
 16      HAVE_BSON = False 
 17  else: 
 18       
 19      if hasattr(bson, "BSON"): 
 20          bson_decode = lambda d: bson.BSON(d).decode() 
 21       
 22       
 23      elif hasattr(bson, "loads"): 
 24          bson_decode = lambda d: bson.loads(d) 
 25   
 26  from lib.cuckoo.common.abstracts import ProtocolHandler 
 27  from lib.cuckoo.common.utils import get_filename_from_path 
 28  from lib.cuckoo.common.exceptions import CuckooResultError 
 29   
 30  log = logging.getLogger(__name__) 
 31   
 32   
 33  MAX_MESSAGE_LENGTH = 20 * 1024 * 1024 
 34   
 36      return "0x%08x" % (v % 2**32) 
  37   
 39      return "0x%016x" % (v % 2**64) 
  40   
 42      if isinstance(v, (int, long)) and v < 0: 
 43          return v % 2**32 
 44   
 45       
 46      if isinstance(v, str): 
 47          return v.decode("latin-1") 
 48      return v 
  49   
 51       
 52       
 53       
 54       
 55           
 56   
 57       
 58      if isinstance(v, str): 
 59          return v.decode("latin-1") 
 60      return v 
  61   
 63      """Receives and interprets .bson logs from the monitor. 
 64   
 65      The monitor provides us with "info" messages that explain how the function 
 66      arguments will come through later on. This class remembers these info 
 67      mappings and then transforms the api call messages accordingly. 
 68   
 69      Other message types typically get passed through after renaming the 
 70      keys slightly. 
 71      """ 
 72      converters_32bit = { 
 73          None: default_converter_32bit, 
 74          "p": pointer_converter_32bit, 
 75          "x": pointer_converter_32bit, 
 76      } 
 77   
 78      converters_64bit = { 
 79          None: default_converter_64bit, 
 80          "p": pointer_converter_64bit, 
 81          "x": pointer_converter_32bit, 
 82      } 
 83   
 85          self.fd = self.handler 
 86   
 87          self.infomap = {} 
 88          self.flags_value = {} 
 89          self.flags_bitmask = {} 
 90          self.pid = None 
 91          self.is_64bit = False 
 92          self.buffer_sha1 = None 
 93   
 94          if not HAVE_BSON: 
 95              log.critical( 
 96                  "Starting BsonParser, but bson is not available! " 
 97                  "(install with `pip install bson`)" 
 98              ) 
  99   
101           
102          for argument, values in self.flags_value[apiname].items(): 
103              if isinstance(argdict[argument], str): 
104                  value = int(argdict[argument], 16) 
105              else: 
106                  value = argdict[argument] 
107   
108              if value in values: 
109                  flags[argument] = values[value] 
110   
111           
112          for argument, values in self.flags_bitmask[apiname].items(): 
113              if argument in flags: 
114                  continue 
115   
116              flags[argument] = [] 
117   
118              if isinstance(argdict[argument], str): 
119                  value = int(argdict[argument], 16) 
120              else: 
121                  value = argdict[argument] 
122   
123              for key, flag in values: 
124                   
125                  if (value & key) == key: 
126                      flags[argument].append(flag) 
127   
128              flags[argument] = "|".join(flags[argument]) 
 129   
131          """Determines which unserializers (or converters) have to be used in 
132          order to parse the various arguments for this function call. Keeps in 
133          mind whether the current bson is 32-bit or 64-bit.""" 
134          argnames, converters = [], [] 
135   
136          for argument in arginfo: 
137              if isinstance(argument, (tuple, list)): 
138                  argument, argtype = argument 
139              else: 
140                  argtype = None 
141   
142              if self.is_64bit: 
143                  converter = self.converters_64bit[argtype] 
144              else: 
145                  converter = self.converters_32bit[argtype] 
146   
147              argnames.append(argument) 
148              converters.append(converter) 
149   
150          return argnames, converters 
 151   
153          self.fd.seek(0) 
154   
155          while True: 
156              data = self.fd.read(4) 
157              if not data: 
158                  return 
159   
160              if len(data) != 4: 
161                  log.critical("BsonParser lacking data.") 
162                  return 
163   
164              blen = struct.unpack("I", data)[0] 
165              if blen > MAX_MESSAGE_LENGTH: 
166                  log.critical( 
167                      "BSON message larger than MAX_MESSAGE_LENGTH, " 
168                      "stopping handler." 
169                  ) 
170                  return 
171   
172              data += self.fd.read(blen-4) 
173              if len(data) < blen: 
174                  log.critical("BsonParser lacking data.") 
175                  return 
176   
177              try: 
178                  dec = bson_decode(data) 
179              except Exception as e: 
180                  log.warning( 
181                      "BsonParser decoding problem %s on data[:50] %s", 
182                      e, repr(data[:50]) 
183                  ) 
184                  return 
185   
186              mtype = dec.get("type", "none") 
187              index = dec.get("I", -1) 
188   
189              if mtype == "info": 
190                   
191                  name = dec.get("name", "NONAME") 
192                  arginfo = dec.get("args", []) 
193                  category = dec.get("category") 
194   
195                  argnames, converters = self.determine_unserializers(arginfo) 
196                  self.infomap[index] = name, arginfo, argnames, converters, category 
197   
198                  if dec.get("flags_value"): 
199                      self.flags_value[name] = {} 
200                      for arg, values in dec["flags_value"].items(): 
201                          self.flags_value[name][arg] = dict(values) 
202   
203                  if dec.get("flags_bitmask"): 
204                      self.flags_bitmask[name] = {} 
205                      for arg, values in dec["flags_bitmask"].items(): 
206                          self.flags_bitmask[name][arg] = values 
207                  continue 
208   
209               
210              if mtype == "buffer": 
211                  buf = dec.get("buffer") 
212                  sha1 = dec.get("checksum") 
213                  self.buffer_sha1 = hashlib.sha1(buf).hexdigest() 
214   
215                   
216                  if sha1 != self.buffer_sha1: 
217                      log.warning("Incorrect sha1 passed along for a buffer.") 
218   
219                   
220                   
221                   
222                  from lib.cuckoo.core.resultserver import ResultHandler 
223   
224                  if isinstance(self.fd, ResultHandler): 
225                      filepath = os.path.join( 
226                          self.fd.storagepath, "buffer", self.buffer_sha1 
227                      ) 
228                      with open(filepath, "wb") as f: 
229                          f.write(buf) 
230   
231                  continue 
232   
233              tid = dec.get("T", 0) 
234              time = dec.get("t", 0) 
235   
236              parsed = { 
237                  "type": mtype, 
238                  "tid": tid, 
239                  "time": time, 
240              } 
241   
242              if mtype == "debug": 
243                  parsed["message"] = dec.get("msg", "") 
244                  log.info("Debug message from monitor: %s", parsed["message"]) 
245              else: 
246                   
247                  if index not in self.infomap: 
248                      log.warning("Got API with unknown index - monitor needs " 
249                                  "to explain first: {0}".format(dec)) 
250                      continue 
251   
252                  apiname, arginfo, argnames, converters, category = self.infomap[index] 
253                  args = dec.get("args", []) 
254   
255                  if len(args) != len(argnames): 
256                      log.warning( 
257                          "Inconsistent arg count (compared to arg names) " 
258                          "on %s: %s names %s", dec, argnames, apiname 
259                      ) 
260                      continue 
261   
262                  argdict = {} 
263                  for idx, value in enumerate(args): 
264                      argdict[argnames[idx]] = converters[idx](value) 
265   
266                   
267                  if apiname == "__process__": 
268                      parsed["type"] = "process" 
269   
270                      if "TimeLow" in argdict: 
271                          timelow = argdict["TimeLow"] 
272                          timehigh = argdict["TimeHigh"] 
273   
274                          parsed["pid"] = pid = argdict["ProcessIdentifier"] 
275                          parsed["ppid"] = argdict["ParentProcessIdentifier"] 
276                          modulepath = argdict["ModulePath"] 
277   
278                      elif "time_low" in argdict: 
279                          timelow = argdict["time_low"] 
280                          timehigh = argdict["time_high"] 
281   
282                          if "pid" in argdict: 
283                              parsed["pid"] = pid = argdict["pid"] 
284                              parsed["ppid"] = argdict["ppid"] 
285                          else: 
286                              parsed["pid"] = pid = argdict["process_identifier"] 
287                              parsed["ppid"] = argdict["parent_process_identifier"] 
288   
289                          modulepath = argdict["module_path"] 
290   
291                      else: 
292                          raise CuckooResultError( 
293                              "I don't recognize the bson log contents." 
294                          ) 
295   
296                       
297                      vmtimeunix = (timelow + (timehigh << 32)) 
298                      vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 
299                      vmtime = datetime.datetime.fromtimestamp(vmtimeunix) 
300                      parsed["first_seen"] = vmtime 
301   
302                      procname = get_filename_from_path(modulepath) 
303                      parsed["process_path"] = modulepath 
304                      parsed["process_name"] = procname 
305                      parsed["command_line"] = argdict.get("command_line") 
306   
307                       
308                      if argdict.get("is_64bit"): 
309                          self.is_64bit = True 
310   
311                       
312                      parsed["track"] = bool(argdict.get("track", 1)) 
313                      parsed["modules"] = argdict.get("modules", {}) 
314   
315                      self.pid = pid 
316   
317                  elif apiname == "__thread__": 
318                      parsed["pid"] = pid = argdict["ProcessIdentifier"] 
319   
320                   
321                       
322                       
323                       
324                       
325                       
326   
327                  else: 
328                      parsed["type"] = "apicall" 
329                      parsed["pid"] = self.pid 
330                      parsed["api"] = apiname 
331                      parsed["category"] = category 
332                      parsed["status"] = argdict.pop("is_success", 1) 
333                      parsed["return_value"] = argdict.pop("retval", 0) 
334                      parsed["arguments"] = argdict 
335                      parsed["flags"] = {} 
336   
337                      parsed["stacktrace"] = dec.get("s", []) 
338                      parsed["uniqhash"] = dec.get("h", 0) 
339   
340                      if "e" in dec and "E" in dec: 
341                          parsed["last_error"] = dec["e"] 
342                          parsed["nt_status"] = dec["E"] 
343   
344                      if apiname in self.flags_value: 
345                          self.resolve_flags(apiname, argdict, parsed["flags"]) 
346   
347                      if self.buffer_sha1: 
348                          parsed["buffer"] = self.buffer_sha1 
349                          self.buffer_sha1 = None 
350   
351              yield parsed 
  352