1   
  2   
  3   
  4   
  5   
  6  import binascii 
  7  import hashlib 
  8  import logging 
  9  import mmap 
 10  import os 
 11  import re 
 12  import subprocess 
 13   
 14  from lib.cuckoo.common.constants import CUCKOO_ROOT 
 15  from lib.cuckoo.common.whitelist import is_whitelisted_domain 
 16   
 17  try: 
 18      import magic 
 19      HAVE_MAGIC = True 
 20  except ImportError: 
 21      HAVE_MAGIC = False 
 22   
 23  try: 
 24      import pydeep 
 25      HAVE_PYDEEP = True 
 26  except ImportError: 
 27      HAVE_PYDEEP = False 
 28   
 29  try: 
 30      import yara 
 31      HAVE_YARA = True 
 32  except ImportError: 
 33      HAVE_YARA = False 
 34   
 35  try: 
 36      import pefile 
 37      HAVE_PEFILE = True 
 38  except ImportError: 
 39      HAVE_PEFILE = False 
 40   
 41  try: 
 42      import androguard 
 43      HAVE_ANDROGUARD = True 
 44  except ImportError: 
 45      HAVE_ANDROGUARD = False 
 46   
 47  log = logging.getLogger(__name__) 
 48   
 49  FILE_CHUNK_SIZE = 16 * 1024 
 50   
 51  URL_REGEX = ( 
 52       
 53      "(https?:\\/\\/)" 
 54      "(([" 
 55       
 56      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 
 57      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 
 58      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 
 59      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])]|" 
 60       
 61      "[a-zA-Z0-9\\.-]+)" 
 62       
 63      "(\\:\\d+)?" 
 64       
 65      "(/[\\(\\)a-zA-Z0-9_:%?=/\\.-]*)?" 
 66  ) 
 69      """Cuckoo custom dict.""" 
 70   
 73   
 74      __setattr__ = dict.__setitem__ 
 75      __delattr__ = dict.__delitem__ 
  76   
 78      """URL base object.""" 
 79   
 81          """@param url: URL""" 
 82          self.url = url 
   83   
 85      """Basic file object class with all useful utilities.""" 
 86   
 87       
 88      YARA_RULEPATH = os.path.join(CUCKOO_ROOT, "data", "yara", "index_%s.yar") 
 89   
 90       
 91       
 92      notified_yara = False 
 93      notified_pefile = False 
 94      notified_androguard = False 
 95   
 96       
 97       
 98      notified_pydeep = True 
 99   
100       
101       
102       
103      yara_rules = {} 
104   
106          """@param file_path: file path.""" 
107          self.file_path = file_path 
108   
109           
110          self._file_data = None 
111          self._crc32 = None 
112          self._md5 = None 
113          self._sha1 = None 
114          self._sha256 = None 
115          self._sha512 = None 
 116   
118          """Get file name. 
119          @return: file name. 
120          """ 
121          file_name = os.path.basename(self.file_path) 
122          return file_name 
 123   
125          return os.path.exists(self.file_path) and \ 
126              os.path.isfile(self.file_path) and \ 
127              os.path.getsize(self.file_path) != 0 
 128   
130          """Read file contents. 
131          @return: data. 
132          """ 
133          return self.file_data 
 134   
136          """Read file contents in chunks (generator).""" 
137   
138          with open(self.file_path, "rb") as fd: 
139              while True: 
140                  chunk = fd.read(FILE_CHUNK_SIZE) 
141                  if not chunk: 
142                      break 
143                  yield chunk 
 144   
146          """Calculate all possible hashes for this file.""" 
147          crc = 0 
148          md5 = hashlib.md5() 
149          sha1 = hashlib.sha1() 
150          sha256 = hashlib.sha256() 
151          sha512 = hashlib.sha512() 
152   
153          for chunk in self.get_chunks(): 
154              crc = binascii.crc32(chunk, crc) 
155              md5.update(chunk) 
156              sha1.update(chunk) 
157              sha256.update(chunk) 
158              sha512.update(chunk) 
159   
160          self._crc32 = "".join("%02X" % ((crc >> i) & 0xff) 
161                                for i in [24, 16, 8, 0]) 
162          self._md5 = md5.hexdigest() 
163          self._sha1 = sha1.hexdigest() 
164          self._sha256 = sha256.hexdigest() 
165          self._sha512 = sha512.hexdigest() 
 166   
167      @property 
169          if not self._file_data: 
170              self._file_data = open(self.file_path, "rb").read() 
171          return self._file_data 
 172   
174          """Get file size. 
175          @return: file size. 
176          """ 
177          return os.path.getsize(self.file_path) 
 178   
180          """Get CRC32. 
181          @return: CRC32. 
182          """ 
183          if not self._crc32: 
184              self.calc_hashes() 
185          return self._crc32 
 186   
188          """Get MD5. 
189          @return: MD5. 
190          """ 
191          if not self._md5: 
192              self.calc_hashes() 
193          return self._md5 
 194   
196          """Get SHA1. 
197          @return: SHA1. 
198          """ 
199          if not self._sha1: 
200              self.calc_hashes() 
201          return self._sha1 
 202   
204          """Get SHA256. 
205          @return: SHA256. 
206          """ 
207          if not self._sha256: 
208              self.calc_hashes() 
209          return self._sha256 
 210   
212          """ 
213          Get SHA512. 
214          @return: SHA512. 
215          """ 
216          if not self._sha512: 
217              self.calc_hashes() 
218          return self._sha512 
 219   
221          """Get SSDEEP. 
222          @return: SSDEEP. 
223          """ 
224          if not HAVE_PYDEEP: 
225              if not File.notified_pydeep: 
226                  File.notified_pydeep = True 
227                  log.warning("Unable to import pydeep (install with `pip install pydeep`)") 
228              return None 
229   
230          try: 
231              return pydeep.hash_file(self.file_path) 
232          except Exception: 
233              return None 
 234   
236          """Get MIME file type. 
237          @return: file type. 
238          """ 
239          file_type = None 
240          if HAVE_MAGIC: 
241              try: 
242                  ms = magic.open(magic.MAGIC_NONE) 
243                  ms.load() 
244                  file_type = ms.file(self.file_path) 
245              except: 
246                  try: 
247                      file_type = magic.from_file(self.file_path) 
248                  except Exception as e: 
249                      log.debug("Error getting magic from file %s: %s", 
250                                self.file_path, e) 
251              finally: 
252                  try: 
253                      ms.close() 
254                  except: 
255                      pass 
256   
257          if file_type is None: 
258              try: 
259                  p = subprocess.Popen(["file", "-b", self.file_path], 
260                                       stdout=subprocess.PIPE) 
261                  file_type = p.stdout.read().strip() 
262              except Exception as e: 
263                  log.debug("Error running file(1) on %s: %s", 
264                            self.file_path, e) 
265   
266          return file_type 
 267   
269          """Get MIME content file type (example: image/jpeg). 
270          @return: file content type. 
271          """ 
272          file_type = None 
273          if HAVE_MAGIC: 
274              try: 
275                  ms = magic.open(magic.MAGIC_MIME) 
276                  ms.load() 
277                  file_type = ms.file(self.file_path) 
278              except: 
279                  try: 
280                      file_type = magic.from_file(self.file_path, mime=True) 
281                  except: 
282                      pass 
283              finally: 
284                  try: 
285                      ms.close() 
286                  except: 
287                      pass 
288   
289          if file_type is None: 
290              try: 
291                  args = ["file", "-b", "--mime-type", self.file_path] 
292                  file_type = subprocess.check_output(args).strip() 
293              except: 
294                  pass 
295   
296          return file_type 
 297   
299          """Get the exported function names of this PE file.""" 
300          filetype = self.get_type() 
301          if "MS-DOS" not in filetype and "PE32" not in self.get_type(): 
302              return 
303   
304          if not HAVE_PEFILE: 
305              if not File.notified_pefile: 
306                  File.notified_pefile = True 
307                  log.warning("Unable to import pefile (`pip install pefile`)") 
308              return 
309   
310          try: 
311              pe = pefile.PE(self.file_path) 
312              if not hasattr(pe, "DIRECTORY_ENTRY_EXPORT"): 
313                  return 
314   
315              for export in pe.DIRECTORY_ENTRY_EXPORT.symbols: 
316                  if export.name: 
317                      yield export.name 
318          except Exception as e: 
319              log.warning("Error enumerating exported functions: %s", e) 
 320   
322          """Get the imported functions of this PE file.""" 
323          filetype = self.get_type() 
324          if "MS-DOS" not in filetype and "PE32" not in self.get_type(): 
325              return 
326   
327          if not HAVE_PEFILE: 
328              if not File.notified_pefile: 
329                  File.notified_pefile = True 
330                  log.warning("Unable to import pefile (`pip install pefile`)") 
331              return 
332   
333          try: 
334              pe = pefile.PE(self.file_path) 
335              if not hasattr(pe, "DIRECTORY_ENTRY_IMPORT"): 
336                  return 
337   
338              for imp in pe.DIRECTORY_ENTRY_IMPORT: 
339                  for entry in imp.imports: 
340                      yield dict(dll=imp.dll, 
341                                 name=entry.name, 
342                                 ordinal=entry.ordinal, 
343                                 hint=entry.hint, 
344                                 address=entry.address) 
345          except Exception as e: 
346              log.warning("Error enumerating imported functions: %s", e) 
 347   
348 -    def get_apk_entry(self): 
 349          """Get the entry point for this APK. The entry point is denoted by a 
350          package and main activity name.""" 
351          filetype = self.get_type() 
352          if "Zip archive data" not in filetype and "Java archive data" not in filetype: 
353              return "", "" 
354   
355          if not HAVE_ANDROGUARD: 
356              if not File.notified_androguard: 
357                  File.notified_androguard = True 
358                  log.warning("Unable to import androguard (`pip install androguard`)") 
359              return "", "" 
360   
361          try: 
362              a = androguard.core.bytecodes.apk.APK(self.file_path) 
363              if not a.is_valid_APK(): 
364                  return "", "" 
365   
366              package = a.get_package() 
367              if not package: 
368                  log.warning("Unable to find the main package, this analysis " 
369                              "will probably fail.") 
370                  return "", "" 
371   
372              main_activity = a.get_main_activity() 
373              if main_activity: 
374                  log.debug("Picked package %s and main activity %s.", 
375                            package, main_activity) 
376                  return package, main_activity 
377   
378              activities = a.get_activities() 
379              for activity in activities: 
380                  if "main" in activity or "start" in activity: 
381                      log.debug("Choosing package %s and main activity due to " 
382                                "its name %s.", package, activity) 
383                      return package, activity 
384   
385              if activities and activities[0]: 
386                  log.debug("Picked package %s and the first activity %s.", 
387                            package, activities[0]) 
388                  return package, activities[0] 
389          except Exception as e: 
390              log.warning("Error extracting package and main activity: %s.", e) 
391   
392          return "", "" 
 393   
395           
396          try: 
397              new = s.encode("utf-8") 
398          except UnicodeDecodeError: 
399              s = s.lstrip("uU").encode("hex").upper() 
400              s = " ".join(s[i:i+2] for i in range(0, len(s), 2)) 
401              new = "{ %s }" % s 
402   
403          return new 
 404   
406          """Extract matches from the Yara output for version 1.7.7.""" 
407          ret = [] 
408          for _, rule_matches in matches.items(): 
409              for match in rule_matches: 
410                  strings = set() 
411   
412                  for s in match["strings"]: 
413                      strings.add(self._yara_encode_string(s["data"])) 
414   
415                  ret.append({ 
416                      "name": match["rule"], 
417                      "meta": match["meta"], 
418                      "strings": list(strings), 
419                  }) 
420   
421          return ret 
 422   
423 -    def get_yara(self, category="binaries"): 
 475   
477          """Extract all URLs embedded in this file through a simple regex.""" 
478          if not os.path.getsize(self.file_path): 
479              return [] 
480   
481           
482          urls = set() 
483          f = open(self.file_path, "rb") 
484          m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ) 
485   
486          for url in re.findall(URL_REGEX, m): 
487              if not is_whitelisted_domain(url[1]): 
488                  urls.add("".join(url)) 
489   
490          return list(urls) 
 491   
493          """Get all information available. 
494          @return: information dict. 
495          """ 
496          infos = {} 
497          infos["name"] = self.get_name() 
498          infos["path"] = self.file_path 
499          infos["size"] = self.get_size() 
500          infos["crc32"] = self.get_crc32() 
501          infos["md5"] = self.get_md5() 
502          infos["sha1"] = self.get_sha1() 
503          infos["sha256"] = self.get_sha256() 
504          infos["sha512"] = self.get_sha512() 
505          infos["ssdeep"] = self.get_ssdeep() 
506          infos["type"] = self.get_type() 
507          infos["yara"] = self.get_yara() 
508          infos["urls"] = self.get_urls() 
509          return infos 
  510