}()}.
- """
- def _replFunc(*args):
- return [replStr]
- return _replFunc
-
-def removeQuotes(s,l,t):
- """Helper parse action for removing quotation marks from parsed quoted strings.
- To use, add this parse action to quoted string using::
- quotedString.setParseAction( removeQuotes )
- """
- return t[0][1:-1]
-
-def upcaseTokens(s,l,t):
- """Helper parse action to convert tokens to upper case."""
- return [ tt.upper() for tt in map(_ustr,t) ]
-
-def downcaseTokens(s,l,t):
- """Helper parse action to convert tokens to lower case."""
- return [ tt.lower() for tt in map(_ustr,t) ]
-
-def keepOriginalText(s,startLoc,t):
- """DEPRECATED - use new helper method C{L{originalTextFor}}.
- Helper parse action to preserve original parsed text,
- overriding any nested parse actions."""
- try:
- endloc = getTokensEndLoc()
- except ParseException:
- raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
- del t[:]
- t += ParseResults(s[startLoc:endloc])
- return t
-
-def getTokensEndLoc():
- """Method to be called from within a parse action to determine the end
- location of the parsed tokens."""
- import inspect
- fstack = inspect.stack()
- try:
- # search up the stack (through intervening argument normalizers) for correct calling routine
- for f in fstack[2:]:
- if f[3] == "_parseNoCache":
- endloc = f[0].f_locals["loc"]
- return endloc
- else:
- raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
- finally:
- del fstack
-
-def _makeTags(tagStr, xml):
- """Internal helper to construct opening and closing tag expressions, given a tag name"""
- if isinstance(tagStr,basestring):
- resname = tagStr
- tagStr = Keyword(tagStr, caseless=not xml)
- else:
- resname = tagStr.name
-
- tagAttrName = Word(alphas,alphanums+"_-:")
- if (xml):
- tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
- openTag = Suppress("<") + tagStr("tag") + \
- Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
- Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
- else:
- printablesLessRAbrack = "".join(c for c in printables if c not in ">")
- tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
- openTag = Suppress("<") + tagStr("tag") + \
- Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
- Optional( Suppress("=") + tagAttrValue ) ))) + \
- Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
- closeTag = Combine(_L("") + tagStr + ">")
-
- openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
- closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("%s>" % tagStr)
- openTag.tag = resname
- closeTag.tag = resname
- return openTag, closeTag
-
-def makeHTMLTags(tagStr):
- """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
- return _makeTags( tagStr, False )
-
-def makeXMLTags(tagStr):
- """Helper to construct opening and closing tag expressions for XML, given a tag name"""
- return _makeTags( tagStr, True )
-
-def withAttribute(*args,**attrDict):
- """Helper to create a validating parse action to be used with start tags created
- with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
- with a required attribute value, to avoid false matches on common tags such as
- C{} or C{}.
-
- Call C{withAttribute} with a series of attribute names and values. Specify the list
- of filter attributes names and values as:
- - keyword arguments, as in C{(align="right")}, or
- - as an explicit dict with C{**} operator, when an attribute name is also a Python
- reserved word, as in C{**{"class":"Customer", "align":"right"}}
- - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
- For attribute names with a namespace prefix, you must use the second form. Attribute
- names are matched insensitive to upper/lower case.
-
- To verify that the attribute exists, but without specifying a value, pass
- C{withAttribute.ANY_VALUE} as the value.
- """
- if args:
- attrs = args[:]
- else:
- attrs = attrDict.items()
- attrs = [(k,v) for k,v in attrs]
- def pa(s,l,tokens):
- for attrName,attrValue in attrs:
- if attrName not in tokens:
- raise ParseException(s,l,"no matching attribute " + attrName)
- if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
- raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
- (attrName, tokens[attrName], attrValue))
- return pa
-withAttribute.ANY_VALUE = object()
-
-opAssoc = _Constants()
-opAssoc.LEFT = object()
-opAssoc.RIGHT = object()
-
-def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
- """Helper method for constructing grammars of expressions made up of
- operators working in a precedence hierarchy. Operators may be unary or
- binary, left- or right-associative. Parse actions can also be attached
- to operator expressions.
-
- Parameters:
- - baseExpr - expression representing the most basic element for the nested
- - opList - list of tuples, one for each operator precedence level in the
- expression grammar; each tuple is of the form
- (opExpr, numTerms, rightLeftAssoc, parseAction), where:
- - opExpr is the pyparsing expression for the operator;
- may also be a string, which will be converted to a Literal;
- if numTerms is 3, opExpr is a tuple of two expressions, for the
- two operators separating the 3 terms
- - numTerms is the number of terms for this operator (must
- be 1, 2, or 3)
- - rightLeftAssoc is the indicator whether the operator is
- right or left associative, using the pyparsing-defined
- constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
- - parseAction is the parse action to be associated with
- expressions matching this operator expression (the
- parse action tuple member may be omitted)
- - lpar - expression for matching left-parentheses (default=Suppress('('))
- - rpar - expression for matching right-parentheses (default=Suppress(')'))
- """
- ret = Forward()
- lastExpr = baseExpr | ( lpar + ret + rpar )
- for i,operDef in enumerate(opList):
- opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
- if arity == 3:
- if opExpr is None or len(opExpr) != 2:
- raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
- opExpr1, opExpr2 = opExpr
- thisExpr = Forward()#.setName("expr%d" % i)
- if rightLeftAssoc == opAssoc.LEFT:
- if arity == 1:
- matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
- elif arity == 2:
- if opExpr is not None:
- matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
- else:
- matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
- elif arity == 3:
- matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
- Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
- else:
- raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
- elif rightLeftAssoc == opAssoc.RIGHT:
- if arity == 1:
- # try to avoid LR with this extra test
- if not isinstance(opExpr, Optional):
- opExpr = Optional(opExpr)
- matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
- elif arity == 2:
- if opExpr is not None:
- matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
- else:
- matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
- elif arity == 3:
- matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
- Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
- else:
- raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
- else:
- raise ValueError("operator must indicate right or left associativity")
- if pa:
- matchExpr.setParseAction( pa )
- thisExpr <<= ( matchExpr | lastExpr )
- lastExpr = thisExpr
- ret <<= lastExpr
- return ret
-operatorPrecedence = infixNotation
-
-dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
-sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
-quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
-unicodeString = Combine(_L('u') + quotedString.copy())
-
-def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
- """Helper method for defining nested lists enclosed in opening and closing
- delimiters ("(" and ")" are the default).
-
- Parameters:
- - opener - opening character for a nested list (default="("); can also be a pyparsing expression
- - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
- - content - expression for items within the nested lists (default=None)
- - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
-
- If an expression is not provided for the content argument, the nested
- expression will capture all whitespace-delimited content between delimiters
- as a list of separate values.
-
- Use the C{ignoreExpr} argument to define expressions that may contain
- opening or closing characters that should not be treated as opening
- or closing characters for nesting, such as quotedString or a comment
- expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
- The default is L{quotedString}, but if no expressions are to be ignored,
- then pass C{None} for this argument.
- """
- if opener == closer:
- raise ValueError("opening and closing strings cannot be the same")
- if content is None:
- if isinstance(opener,basestring) and isinstance(closer,basestring):
- if len(opener) == 1 and len(closer)==1:
- if ignoreExpr is not None:
- content = (Combine(OneOrMore(~ignoreExpr +
- CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
- ).setParseAction(lambda t:t[0].strip()))
- else:
- content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
- ).setParseAction(lambda t:t[0].strip()))
- else:
- if ignoreExpr is not None:
- content = (Combine(OneOrMore(~ignoreExpr +
- ~Literal(opener) + ~Literal(closer) +
- CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
- ).setParseAction(lambda t:t[0].strip()))
- else:
- content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
- CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
- ).setParseAction(lambda t:t[0].strip()))
- else:
- raise ValueError("opening and closing arguments must be strings if no content expression is given")
- ret = Forward()
- if ignoreExpr is not None:
- ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
- else:
- ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
- return ret
-
-def indentedBlock(blockStatementExpr, indentStack, indent=True):
- """Helper method for defining space-delimited indentation blocks, such as
- those used to define block statements in Python source code.
-
- Parameters:
- - blockStatementExpr - expression defining syntax of statement that
- is repeated within the indented block
- - indentStack - list created by caller to manage indentation stack
- (multiple statementWithIndentedBlock expressions within a single grammar
- should share a common indentStack)
- - indent - boolean indicating whether block must be indented beyond the
- the current level; set to False for block of left-most statements
- (default=True)
-
- A valid block must contain at least one C{blockStatement}.
- """
- def checkPeerIndent(s,l,t):
- if l >= len(s): return
- curCol = col(l,s)
- if curCol != indentStack[-1]:
- if curCol > indentStack[-1]:
- raise ParseFatalException(s,l,"illegal nesting")
- raise ParseException(s,l,"not a peer entry")
-
- def checkSubIndent(s,l,t):
- curCol = col(l,s)
- if curCol > indentStack[-1]:
- indentStack.append( curCol )
- else:
- raise ParseException(s,l,"not a subentry")
-
- def checkUnindent(s,l,t):
- if l >= len(s): return
- curCol = col(l,s)
- if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
- raise ParseException(s,l,"not an unindent")
- indentStack.pop()
-
- NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
- INDENT = Empty() + Empty().setParseAction(checkSubIndent)
- PEER = Empty().setParseAction(checkPeerIndent)
- UNDENT = Empty().setParseAction(checkUnindent)
- if indent:
- smExpr = Group( Optional(NL) +
- #~ FollowedBy(blockStatementExpr) +
- INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
- else:
- smExpr = Group( Optional(NL) +
- (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
- blockStatementExpr.ignore(_bslash + LineEnd())
- return smExpr
-
-alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
-punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
-
-anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
-commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
-_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
-replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
-
-# it's easy to get these comment structures wrong - they're very common, so may as well make them available
-cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
-
-htmlComment = Regex(r"")
-restOfLine = Regex(r".*").leaveWhitespace()
-dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
-cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist))
- print ("tokens = " + str(tokens))
- print ("tokens.columns = " + str(tokens.columns))
- print ("tokens.tables = " + str(tokens.tables))
- print (tokens.asXML("SQL",True))
- except ParseBaseException as err:
- print (teststring + "->")
- print (err.line)
- print (" "*(err.column-1) + "^")
- print (err)
- print()
-
- selectToken = CaselessLiteral( "select" )
- fromToken = CaselessLiteral( "from" )
-
- ident = Word( alphas, alphanums + "_$" )
- columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
- columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
- tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
- tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
- simpleSQL = ( selectToken + \
- ( '*' | columnNameList ).setResultsName( "columns" ) + \
- fromToken + \
- tableNameList.setResultsName( "tables" ) )
-
- test( "SELECT * from XYZZY, ABC" )
- test( "select * from SYS.XYZZY" )
- test( "Select A from Sys.dual" )
- test( "Select AA,BB,CC from Sys.dual" )
- test( "Select A, B, C from Sys.dual" )
- test( "Select A, B, C from Sys.dual" )
- test( "Xelect A, B, C from Sys.dual" )
- test( "Select A, B, C frox Sys.dual" )
- test( "Select" )
- test( "Select ^^^ frox Sys.dual" )
- test( "Select A, B, C from Sys.dual, Table2 " )
diff -Nru remnux-oletools-0.51a/oletools/thirdparty/pyparsing/README remnux-oletools-0.51a/oletools/thirdparty/pyparsing/README
--- remnux-oletools-0.51a/oletools/thirdparty/pyparsing/README 2016-11-04 21:28:21.000000000 +0000
+++ remnux-oletools-0.51a/oletools/thirdparty/pyparsing/README 1970-01-01 00:00:00.000000000 +0000
@@ -1,72 +0,0 @@
-====================================
-PyParsing -- A Python Parsing Module
-====================================
-
-Introduction
-============
-
-The pyparsing module is an alternative approach to creating and executing
-simple grammars, vs. the traditional lex/yacc approach, or the use of
-regular expressions. The pyparsing module provides a library of classes
-that client code uses to construct the grammar directly in Python code.
-
-Here is a program to parse "Hello, World!" (or any greeting of the form
-" , !"):
-
- from pyparsing import Word, alphas
- greet = Word( alphas ) + "," + Word( alphas ) + "!"
- hello = "Hello, World!"
- print hello, "->", greet.parseString( hello )
-
-The program outputs the following:
-
- Hello, World! -> ['Hello', ',', 'World', '!']
-
-The Python representation of the grammar is quite readable, owing to the
-self-explanatory class names, and the use of '+', '|' and '^' operator
-definitions.
-
-The parsed results returned from parseString() can be accessed as a
-nested list, a dictionary, or an object with named attributes.
-
-The pyparsing module handles some of the problems that are typically
-vexing when writing text parsers:
-- extra or missing whitespace (the above program will also handle
- "Hello,World!", "Hello , World !", etc.)
-- quoted strings
-- embedded comments
-
-The .zip file includes examples of a simple SQL parser, simple CORBA IDL
-parser, a config file parser, a chemical formula parser, and a four-
-function algebraic notation parser. It also includes a simple how-to
-document, and a UML class diagram of the library's classes.
-
-
-
-Installation
-============
-
-Do the usual:
-
- python setup.py install
-
-(pyparsing requires Python 2.3.2 or later.)
-
-
-Documentation
-=============
-
-See:
-
- HowToUsePyparsing.html
-
-
-License
-=======
-
- MIT License. See header of pyparsing.py
-
-History
-=======
-
- See CHANGES file.
diff -Nru remnux-oletools-0.51a/oletools/thirdparty/tablestream/tablestream.py remnux-oletools-0.51a/oletools/thirdparty/tablestream/tablestream.py
--- remnux-oletools-0.51a/oletools/thirdparty/tablestream/tablestream.py 2016-11-04 21:28:21.000000000 +0000
+++ remnux-oletools-0.51a/oletools/thirdparty/tablestream/tablestream.py 1970-01-01 00:00:00.000000000 +0000
@@ -1,396 +0,0 @@
-#!/usr/bin/env python
-"""
-tablestream
-
-tablestream can format table data for pretty printing as text,
-to be displayed on the console or written to any file-like object.
-The table data can be provided as rows, each row is an iterable of
-cells. The text in each cell is wrapped to fit into a maximum width
-set for each column.
-Contrary to many table pretty printing libraries, TableStream writes
-each row to the output as soon as it is provided, and the whole table
-does not need to be built in memory before printing.
-It is therefore suitable for large tables, or tables that take time to
-be processed row by row.
-
-Author: Philippe Lagadec - http://www.decalage.info
-License: BSD, see source code or documentation
-"""
-
-#=== LICENSE ==================================================================
-
-# tablestream is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification,
-# are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from __future__ import print_function
-
-#------------------------------------------------------------------------------
-# CHANGELOG:
-# 2015-11-01 v0.01 PL: - first version
-# 2016-01-01 v0.02 PL: - added styles, color support
-# 2016-04-19 v0.03 PL: - enable colorclass on Windows, fixed issue #39
-# 2016-05-25 v0.04 PL: - updated for colorclass 2.2.0 (now a package)
-# 2016-07-29 v0.05 PL: - fixed oletools issue #57, bug when importing colorclass
-# 2016-07-31 v0.06 PL: - handle newline characters properly in each cell
-# 2016-08-28 v0.07 PL: - support for both Python 2.6+ and 3.x
-# - all cells are converted to unicode
-
-__version__ = '0.07'
-
-#------------------------------------------------------------------------------
-# TODO:
-# - several styles
-# - colorized rows or cells
-# - automatic width for the last column, based on max total width
-# - automatic width for selected columns, based on N first lines
-# - determine the console width
-
-# === IMPORTS =================================================================
-
-import textwrap
-import sys, os
-
-# add the thirdparty subfolder to sys.path (absolute+normalized path):
-_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
-# print('_thismodule_dir = %r' % _thismodule_dir)
-# assumption: this module is in a subfolder of thirdparty:
-_thirdparty_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
-# print('_thirdparty_dir = %r' % _thirdparty_dir)
-if not _thirdparty_dir in sys.path:
- sys.path.insert(0, _thirdparty_dir)
-
-import colorclass
-
-# On Windows, colorclass needs to be enabled:
-if os.name == 'nt':
- colorclass.Windows.enable(auto_colors=True)
-
-
-# === PYTHON 2+3 SUPPORT ======================================================
-
-if sys.version_info[0] >= 3:
- # Python 3 specific adaptations
- # py3 range = py2 xrange
- xrange = range
- ustr = str
- # byte strings for to_ustr (with py3, bytearray supports encoding):
- byte_strings = (bytes, bytearray)
-else:
- # Python 2 specific adaptations
- ustr = unicode
- # byte strings for to_ustr (with py2, bytearray does not support encoding):
- byte_strings = bytes
-
-
-# === FUNCTIONS ==============================================================
-
-def to_ustr(obj, encoding='utf8', errors='replace'):
- """
- convert an object to unicode, using the appropriate method
- :param obj: any object, str, bytes or unicode
- :return: unicode string (ustr)
- """
- # if the object is already unicode, return it unchanged:
- if isinstance(obj, ustr):
- return obj
- # if it is a bytes string, decode it using the provided encoding
- elif isinstance(obj, byte_strings):
- return ustr(obj, encoding=encoding, errors=errors)
- # else just convert it to unicode:
- # (an exception is raised if we specify encoding in this case)
- else:
- return ustr(obj)
-
-
-
-# === CLASSES =================================================================
-
-
-class TableStyle(object):
- """
- Style for a TableStream.
- This base class can be derived to create new styles.
- Default style:
- +------+---+
- |Header| +
- +------+---+
- | | |
- +------+---+
- """
- # Header rows:
- header_top = True
- header_top_left = u'+'
- header_top_horiz = u'-'
- header_top_middle = u'+'
- header_top_right = u'+'
-
- header_vertical_left = u'|'
- header_vertical_middle = u'|'
- header_vertical_right = u'|'
-
- # Separator line between header and normal rows:
- header_sep = True
- header_sep_left = u'+'
- header_sep_horiz = u'-'
- header_sep_middle = u'+'
- header_sep_right = u'+'
-
- # Top row if there is no header:
- noheader_top = True
- noheader_top_left = u'+'
- noheader_top_horiz = u'-'
- noheader_top_middle = u'+'
- noheader_top_right = u'+'
-
- # Normal rows
- vertical_left = u'|'
- vertical_middle = u'|'
- vertical_right = u'|'
-
- # Separator line between rows:
- sep = False
- sep_left = u'+'
- sep_horiz = u'-'
- sep_middle = u'+'
- sep_right = u'+'
-
- # Bottom line
- bottom = True
- bottom_left = u'+'
- bottom_horiz = u'-'
- bottom_middle = u'+'
- bottom_right = u'+'
-
-
-class TableStyleSlim(object):
- """
- Style for a TableStream.
- Example:
- ------+---
- Header|
- ------+---
- |
- ------+---
- """
- # Header rows:
- header_top = True
- header_top_left = u''
- header_top_horiz = u'-'
- header_top_middle = u'+'
- header_top_right = u''
-
- header_vertical_left = u''
- header_vertical_middle = u'|'
- header_vertical_right = u''
-
- # Separator line between header and normal rows:
- header_sep = True
- header_sep_left = u''
- header_sep_horiz = u'-'
- header_sep_middle = u'+'
- header_sep_right = u''
-
- # Top row if there is no header:
- noheader_top = True
- noheader_top_left = u''
- noheader_top_horiz = u'-'
- noheader_top_middle = u'+'
- noheader_top_right = u''
-
- # Normal rows
- vertical_left = u''
- vertical_middle = u'|'
- vertical_right = u''
-
- # Separator line between rows:
- sep = False
- sep_left = u''
- sep_horiz = u'-'
- sep_middle = u'+'
- sep_right = u''
-
- # Bottom line
- bottom = True
- bottom_left = u''
- bottom_horiz = u'-'
- bottom_middle = u'+'
- bottom_right = u''
-
-
-
-class TableStream(object):
- """
- a TableStream object can format table data for pretty printing as text,
- to be displayed on the console or written to any file-like object.
- The table data can be provided as rows, each row is an iterable of
- cells. The text in each cell is wrapped to fit into a maximum width
- set for each column.
- Contrary to many table pretty printing libraries, TableStream writes
- each row to the output as soon as it is provided, and the whole table
- does not need to be built in memory before printing.
- It is therefore suitable for large tables, or tables that take time to
- be processed row by row.
- """
-
- def __init__(self, column_width, header_row=None, style=TableStyle,
- outfile=sys.stdout, encoding_in='utf8', encoding_out='utf8'):
- '''
- Constructor for class TableStream
- :param column_width: tuple or list containing the width of each column
- :param header_row: tuple or list containing the header row text
- :param style: style for the table, a TableStyle object
- :param outfile: output file (sys.stdout by default to print on the console)
- :param encoding_in: encoding used when the input text is bytes (UTF-8 by default)
- :param encoding_out: encoding used for the output (UTF-8 by default)
- '''
- self.column_width = column_width
- self.num_columns = len(column_width)
- self.header_row = header_row
- self.encoding_in = encoding_in
- self.encoding_out = encoding_out
- assert (header_row is None) or len(header_row) == self.num_columns
- self.style = style
- self.outfile = outfile
- if header_row is not None:
- self.write_header()
- elif self.style.noheader_top:
- self.write_noheader_top()
-
-
- def write(self, s):
- """
- shortcut for self.outfile.write()
- """
- self.outfile.write(s)
-
- def write_row(self, row, last=False, colors=None):
- assert len(row) == self.num_columns
- columns = []
- max_lines = 0
- for i in xrange(self.num_columns):
- cell = row[i]
- # Convert to string:
- cell = to_ustr(cell, encoding=self.encoding_in)
- # Wrap cell text according to the column width
- # TODO: use a TextWrapper object for each column instead
- # split the string if it contains newline characters, otherwise
- # textwrap replaces them with spaces:
- column = []
- for line in cell.splitlines():
- column.extend(textwrap.wrap(line, width=self.column_width[i]))
- # apply colors to each line of the cell if needed:
- if colors is not None and self.outfile.isatty():
- color = colors[i]
- if color:
- for j in xrange(len(column)):
- # print '%r: %s' % (column[j], type(column[j]))
- column[j] = colorclass.Color(u'{auto%s}%s{/%s}' % (color, column[j], color))
- columns.append(column)
- # determine which column has the highest number of lines
- max_lines = max(len(columns[i]), max_lines)
- # transpose: write output line by line
- for j in xrange(max_lines):
- self.write(self.style.vertical_left)
- for i in xrange(self.num_columns):
- column = columns[i]
- if j file_name is not a glob
- --> file?name is a glob
- --> file* is a glob
- --> file[-._]name is a glob
- --> file[?]name is not a glob (matches literal "file?name")
- --> file[*]name is not a glob (matches literal "file*name")
- --> file[-]name is not a glob (matches literal "file-name")
- --> file-name is not a glob
-
- Also, obviously incorrect globs are treated as non-globs
- --> file[name is not a glob (matches literal "file[name")
- --> file]-[name is treated as a glob
- (it is not a valid glob but detecting errors like this requires
- sophisticated regular expression matching)
-
- Python's glob also works with globs in directory-part of path
- --> dir-part of path is analyzed just like filename-part
- --> thirdparty/*/xglob.py is a (valid) glob
-
- TODO: create a correct regexp to test for validity of ranges
- """
-
- # remove escaped special chars
- cleaned = filespec.replace('[*]', '').replace('[?]', '') \
- .replace('[[]', '').replace('[]]', '').replace('[-]', '')
-
- # check if special chars remain
- return '*' in cleaned or '?' in cleaned or \
- ('[' in cleaned and ']' in cleaned)
diff -Nru remnux-oletools-0.51a/oletools/thirdparty/xxxswf/LICENSE.txt remnux-oletools-0.51a/oletools/thirdparty/xxxswf/LICENSE.txt
--- remnux-oletools-0.51a/oletools/thirdparty/xxxswf/LICENSE.txt 2016-11-04 21:28:21.000000000 +0000
+++ remnux-oletools-0.51a/oletools/thirdparty/xxxswf/LICENSE.txt 1970-01-01 00:00:00.000000000 +0000
@@ -1,3 +0,0 @@
-xxxswf.py is published by Alexander Hanel on
-http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
-without explicit license.
\ No newline at end of file
diff -Nru remnux-oletools-0.51a/oletools/thirdparty/xxxswf/xxxswf.py remnux-oletools-0.51a/oletools/thirdparty/xxxswf/xxxswf.py
--- remnux-oletools-0.51a/oletools/thirdparty/xxxswf/xxxswf.py 2016-11-04 21:28:21.000000000 +0000
+++ remnux-oletools-0.51a/oletools/thirdparty/xxxswf/xxxswf.py 1970-01-01 00:00:00.000000000 +0000
@@ -1,373 +0,0 @@
-# xxxswf.py was created by alexander dot hanel at gmail dot com
-# version 0.1
-# Date - 12-07-2011
-# To do list
-# - Tag Parser
-# - ActionScript Decompiler
-
-# 2016-11-01 PL: - A few changes for Python 2+3 compatibility
-
-import fnmatch
-import hashlib
-import imp
-import math
-import os
-import re
-import struct
-import sys
-import time
-from io import BytesIO
-from optparse import OptionParser
-import zlib
-
-def checkMD5(md5):
-# checks if MD5 has been seen in MD5 Dictionary
-# MD5Dict contains the MD5 and the CVE
-# For { 'MD5':'CVE', 'MD5-1':'CVE-1', 'MD5-2':'CVE-2'}
- MD5Dict = {'c46299a5015c6d31ad5766cb49e4ab4b':'CVE-XXXX-XXXX'}
- if MD5Dict.get(md5):
- print('\t[BAD] MD5 Match on', MD5Dict.get(md5))
- return
-
-def bad(f):
- for idx, x in enumerate(findSWF(f)):
- tmp = verifySWF(f,x)
- if tmp != None:
- yaraScan(tmp)
- checkMD5(hashBuff(tmp))
- return
-
-def yaraScan(d):
-# d = buffer of the read file
-# Scans SWF using Yara
- # test if yara module is installed
- # if not Yara can be downloaded from http://code.google.com/p/yara-project/
- try:
- imp.find_module('yara')
- import yara
- except ImportError:
- print('\t[ERROR] Yara module not installed - aborting scan')
- return
- # test for yara compile errors
- try:
- r = yara.compile(r'rules.yar')
- except:
- pass
- print('\t[ERROR] Yara compile error - aborting scan')
- return
- # get matches
- m = r.match(data=d)
- # print matches
- for X in m:
- print('\t[BAD] Yara Signature Hit: %s' % X)
- return
-
-def findSWF(d):
-# d = buffer of the read file
-# Search for SWF Header Sigs in files
- return [tmp.start() for tmp in re.finditer(b'CWS|FWS', d.read())]
-
-def hashBuff(d):
-# d = buffer of the read file
-# This function hashes the buffer
-# source: http://stackoverflow.com/q/5853830
- if type(d) is str:
- d = BytesIO(d)
- md5 = hashlib.md5()
- while True:
- data = d.read(128)
- if not data:
- break
- md5.update(data)
- return md5.hexdigest()
-
-def verifySWF(f,addr):
- # Start of SWF
- f.seek(addr)
- # Read Header
- header = f.read(3)
- # Read Version
- ver = struct.unpack(' 20:
- print(' - [ERROR] Invalid SWF Version')
- return None
-
- if b'CWS' in header:
- try:
- f.read(3)
- tmp = b'FWS' + f.read(5) + zlib.decompress(f.read())
- print(' - CWS Header')
- return tmp
-
- except:
- pass
- print('- [ERROR]: Zlib decompression error. Invalid CWS SWF')
- return None
-
- elif b'FWS' in header:
- try:
- tmp = f.read(size)
- print(' - FWS Header')
- return tmp
-
- except:
- pass
- print(' - [ERROR] Invalid SWF Size')
- return None
-
- else:
- print(' - [Error] Logic Error Blame Programmer')
- return None
-
-def headerInfo(f):
-# f is the already opended file handle
-# Yes, the format is is a rip off SWFDump. Can you blame me? Their tool is awesome.
- # SWFDump FORMAT
- # [HEADER] File version: 8
- # [HEADER] File is zlib compressed. Ratio: 52%
- # [HEADER] File size: 37536
- # [HEADER] Frame rate: 18.000000
- # [HEADER] Frame count: 323
- # [HEADER] Movie width: 217.00
- # [HEADER] Movie height: 85.00
- if type(f) is str:
- f = BytesIO(f)
- sig = f.read(3)
- print('\t[HEADER] File header: %s' % sig)
- if b'C' in sig:
- print('\t[HEADER] File is zlib compressed.')
- version = struct.unpack('> 3
- print('\t[HEADER] Rect Nbit: %d' % nbit)
- # Curretely the nbit is static at 15. This could be modified in the
- # future. If larger than 9 this will break the struct unpack. Will have
- # to revist must be a more effective way to deal with bits. Tried to keep
- # the algo but damn this is ugly...
- f.seek(ta)
- rect = struct.unpack('>Q', f.read(int(math.ceil((nbit*4)/8.0))))[0]
- tmp = struct.unpack('>7)[2:].zfill(1)
- # bin requires Python 2.6 or higher
- # skips string '0b' and the nbit
- rect = bin(rect)[7:]
- xmin = int(rect[0:nbit-1],2)
- print('\t[HEADER] Rect Xmin: %d' % xmin)
- xmax = int(rect[nbit:(nbit*2)-1],2)
- print('\t[HEADER] Rect Xmax: %d' % xmax)
- ymin = int(rect[nbit*2:(nbit*3)-1],2)
- print('\t[HEADER] Rect Ymin: %d' % ymin)
- # one bit needs to be added, my math might be off here
- ymax = int(rect[nbit*3:(nbit*4)-1] + str(tmp) ,2)
- print('\t[HEADER] Rect Ymax: %d' % ymax)
- framerate = struct.unpack('
+
+
+
+
+
+
+
+
+
+How to Suggest Improvements, Report Issues or Contribute
+This is a personal open-source project, developed on my spare time. Any contribution, suggestion, feedback or bug report is welcome.
+To suggest improvements, report a bug or any issue, please use the issue reporting page, and provide all the information and files to reproduce the problem.
+You may also contact the author directly to send feedback.
+The code is available in a repository on GitHub. You may use it to submit enhancements using forks and pull requests.
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/Contribute.md remnux-oletools-0.51a/remnux-oletools/doc/Contribute.md
--- remnux-oletools-0.51a/remnux-oletools/doc/Contribute.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/Contribute.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,37 @@
+How to Suggest Improvements, Report Issues or Contribute
+========================================================
+
+This is a personal open-source project, developed on my spare time.
+Any contribution, suggestion, feedback or bug report is welcome.
+
+To **suggest improvements, report a bug or any issue**,
+please use the [issue reporting page](https://github.com/decalage2/oletools/issues),
+and provide all the information and files to reproduce the problem.
+
+You may also [contact the author](http://decalage.info/contact) directly
+to **send feedback**.
+
+The code is available in [a repository on GitHub](https://github.com/decalage2/oletools).
+You may use it to **submit enhancements** using forks and pull requests.
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/Home.html remnux-oletools-0.51a/remnux-oletools/doc/Home.html
--- remnux-oletools-0.51a/remnux-oletools/doc/Home.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/Home.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,54 @@
+
+
+
+
+
+
+
+
+
+
+
+This is the home page of the documentation for python-oletools. The latest version can be found online, otherwise a copy is provided in the doc subfolder of the package.
+python-oletools is a package of python tools to analyze Microsoft OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging. It is based on the olefile parser. See http://www.decalage.info/python/oletools for more info.
+Quick links: Home page - Download/Install - Documentation - Report Issues/Suggestions/Questions - Contact the Author - Repository - Updates on Twitter
+Note: python-oletools is not related to OLETools published by BeCubed Software.
+
+
+- olebrowse: A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to view and extract individual data streams.
+- oleid: to analyze OLE files to detect specific characteristics usually found in malicious files.
+- olemeta: to extract all standard properties (metadata) from OLE files.
+- oletimes: to extract creation and modification timestamps of all streams and storages.
+- oledir: to display all the directory entries of an OLE file, including free and orphaned entries.
+- olemap: to display a map of all the sectors in an OLE file.
+- olevba: to extract and analyze VBA Macro source code from MS Office documents (OLE and OpenXML).
+- mraptor: to detect malicious VBA Macros
+- pyxswf: to detect, extract and analyze Flash objects (SWF) that may be embedded in files such as MS Office documents (e.g. Word, Excel) and RTF, which is especially useful for malware analysis.
+- oleobj: to extract embedded objects from OLE files.
+- rtfobj: to extract embedded objects from RTF files.
+- and a few others (coming soon)
+
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/Home.md remnux-oletools-0.51a/remnux-oletools/doc/Home.md
--- remnux-oletools-0.51a/remnux-oletools/doc/Home.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/Home.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,64 @@
+python-oletools v0.50 documentation
+===================================
+
+This is the home page of the documentation for python-oletools. The latest version can be found
+[online](https://github.com/decalage2/oletools/wiki), otherwise a copy is provided in the doc subfolder of the package.
+
+[python-oletools](http://www.decalage.info/python/oletools) is a package of python tools to analyze
+[Microsoft OLE2 files](http://en.wikipedia.org/wiki/Compound_File_Binary_Format)
+(also called Structured Storage, Compound File Binary Format or Compound Document File Format),
+such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging.
+It is based on the [olefile](http://www.decalage.info/olefile) parser.
+See [http://www.decalage.info/python/oletools](http://www.decalage.info/python/oletools) for more info.
+
+**Quick links:**
+[Home page](http://www.decalage.info/python/oletools) -
+[Download/Install](https://github.com/decalage2/oletools/wiki/Install) -
+[Documentation](https://github.com/decalage2/oletools/wiki) -
+[Report Issues/Suggestions/Questions](https://github.com/decalage2/oletools/issues) -
+[Contact the Author](http://decalage.info/contact) -
+[Repository](https://github.com/decalage2/oletools) -
+[Updates on Twitter](https://twitter.com/decalage2)
+
+Note: python-oletools is not related to OLETools published by BeCubed Software.
+
+Tools in python-oletools:
+-------------------------
+
+- **[[olebrowse]]**: A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to
+ view and extract individual data streams.
+- **[[oleid]]**: to analyze OLE files to detect specific characteristics usually found in malicious files.
+- **[[olemeta]]**: to extract all standard properties (metadata) from OLE files.
+- **[[oletimes]]**: to extract creation and modification timestamps of all streams and storages.
+- **[[oledir]]**: to display all the directory entries of an OLE file, including free and orphaned entries.
+- **[[olemap]]**: to display a map of all the sectors in an OLE file.
+- **[[olevba]]**: to extract and analyze VBA Macro source code from MS Office documents (OLE and OpenXML).
+- **[[mraptor]]**: to detect malicious VBA Macros
+- **[[pyxswf]]**: to detect, extract and analyze Flash objects (SWF) that may
+ be embedded in files such as MS Office documents (e.g. Word, Excel) and RTF,
+ which is especially useful for malware analysis.
+- **[[oleobj]]**: to extract embedded objects from OLE files.
+- **[[rtfobj]]**: to extract embedded objects from RTF files.
+- and a few others (coming soon)
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/Install.html remnux-oletools-0.51a/remnux-oletools/doc/Install.html
--- remnux-oletools-0.51a/remnux-oletools/doc/Install.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/Install.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,69 @@
+
+
+
+
+
+
+
+
+
+
+
+Pre-requisites
+The recommended Python version to run oletools is Python 2.7. Python 2.6 is also supported, but as it is not tested as often as 2.7, some features might not work as expected.
+Since oletools v0.50, thanks to contributions by [@Sebdraven](https://twitter.com/Sebdraven), most tools can also run with Python 3.x. As this is quite new, please report any issue you may encounter.
+
+Pip is included with Python since version 2.7.9 and 3.4. If it is not installed on your system, either upgrade Python or see https://pip.pypa.io/en/stable/installing/
+Linux, Mac OSX, Unix
+To download and install/update the latest release version of oletools, run the following command in a shell:
+sudo -H pip install -U oletools
+Important: Since version 0.50, pip will automatically create convenient command-line scripts in /usr/local/bin to run all the oletools from any directory.
+Windows
+To download and install/update the latest release version of oletools, run the following command in a cmd window:
+pip install -U oletools
+Important: Since version 0.50, pip will automatically create convenient command-line scripts to run all the oletools from any directory: olevba, mraptor, oleid, rtfobj, etc.
+How to install the latest development version
+If you want to benefit from the latest improvements in the development version, you may also use pip:
+Linux, Mac OSX, Unix
+sudo -H pip install -U https://github.com/decalage2/oletools/archive/master.zip
+Windows
+pip install -U https://github.com/decalage2/oletools/archive/master.zip
+How to install offline - Computer without Internet access
+First, download the oletools archive on a computer with Internet access: * Latest stable version: from https://github.com/decalage2/oletools/releases * Development version: https://github.com/decalage2/oletools/archive/master.zip
+Copy the archive file to the target computer.
+On Linux, Mac OSX, Unix, run the following command using the filename of the archive that you downloaded:
+sudo -H pip install -U oletools.zip
+On Windows:
+pip install -U oletools.zip
+Old school install using setup.py
+If you cannot use pip, it is still possible to run the setup.py script directly. However, this method will not create the command-line scripts automatically.
+First, download the oletools archive: * Latest stable version: from https://github.com/decalage2/oletools/releases * Development version: https://github.com/decalage2/oletools/archive/master.zip
+Then extract the archive, open a shell and go to the oletools directory.
+Linux, Mac OSX, Unix
+sudo -H python setup.py install
+Windows:
+python setup.py install
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/Install.md remnux-oletools-0.51a/remnux-oletools/doc/Install.md
--- remnux-oletools-0.51a/remnux-oletools/doc/Install.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/Install.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,135 @@
+How to Download and Install python-oletools
+===========================================
+
+Pre-requisites
+--------------
+
+The recommended Python version to run oletools is **Python 2.7**.
+Python 2.6 is also supported, but as it is not tested as often as 2.7, some features
+might not work as expected.
+
+Since oletools v0.50, thanks to contributions by [@Sebdraven](https://twitter.com/Sebdraven),
+most tools can also run with **Python 3.x**. As this is quite new, please
+[report any issue]((https://github.com/decalage2/oletools/issues)) you may encounter.
+
+
+
+Recommended way to Download+Install/Update oletools: pip
+--------------------------------------------------------
+
+Pip is included with Python since version 2.7.9 and 3.4. If it is not installed on your
+system, either upgrade Python or see https://pip.pypa.io/en/stable/installing/
+
+### Linux, Mac OSX, Unix
+
+To download and install/update the latest release version of oletools,
+run the following command in a shell:
+
+```text
+sudo -H pip install -U oletools
+```
+
+**Important**: Since version 0.50, pip will automatically create convenient command-line scripts
+in /usr/local/bin to run all the oletools from any directory.
+
+### Windows
+
+To download and install/update the latest release version of oletools,
+run the following command in a cmd window:
+
+```text
+pip install -U oletools
+```
+
+**Important**: Since version 0.50, pip will automatically create convenient command-line scripts
+to run all the oletools from any directory: olevba, mraptor, oleid, rtfobj, etc.
+
+
+How to install the latest development version
+---------------------------------------------
+
+If you want to benefit from the latest improvements in the development version,
+you may also use pip:
+
+### Linux, Mac OSX, Unix
+
+```text
+sudo -H pip install -U https://github.com/decalage2/oletools/archive/master.zip
+```
+
+### Windows
+
+```text
+pip install -U https://github.com/decalage2/oletools/archive/master.zip
+```
+
+How to install offline - Computer without Internet access
+---------------------------------------------------------
+
+First, download the oletools archive on a computer with Internet access:
+* Latest stable version: from https://github.com/decalage2/oletools/releases
+* Development version: https://github.com/decalage2/oletools/archive/master.zip
+
+Copy the archive file to the target computer.
+
+On Linux, Mac OSX, Unix, run the following command using the filename of the
+archive that you downloaded:
+
+```text
+sudo -H pip install -U oletools.zip
+```
+
+On Windows:
+
+```text
+pip install -U oletools.zip
+```
+
+
+Old school install using setup.py
+---------------------------------
+
+If you cannot use pip, it is still possible to run the setup.py script
+directly. However, this method will not create the command-line scripts
+automatically.
+
+First, download the oletools archive:
+* Latest stable version: from https://github.com/decalage2/oletools/releases
+* Development version: https://github.com/decalage2/oletools/archive/master.zip
+
+Then extract the archive, open a shell and go to the oletools directory.
+
+### Linux, Mac OSX, Unix
+
+```text
+sudo -H python setup.py install
+```
+
+### Windows:
+
+```text
+python setup.py install
+```
+
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/License.html remnux-oletools-0.51a/remnux-oletools/doc/License.html
--- remnux-oletools-0.51a/remnux-oletools/doc/License.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/License.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,56 @@
+
+
+
+
+
+
+
+
+
+
+
+This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license.
+The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec (http://www.decalage.info)
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+
+License for officeparser |
+
+
+
+olevba contains modified source code from the officeparser project, published under the following MIT License (MIT):
+officeparser is copyright (c) 2014 John William Davison
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/License.md remnux-oletools-0.51a/remnux-oletools/doc/License.md
--- remnux-oletools-0.51a/remnux-oletools/doc/License.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/License.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,79 @@
+License for python-oletools
+===========================
+
+This license applies to the [python-oletools](http://www.decalage.info/python/oletools) package, apart from the
+thirdparty folder which contains third-party files published with their own license.
+
+The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec ([http://www.decalage.info](http://www.decalage.info))
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+----------
+License for officeparser
+------------------------
+
+olevba contains modified source code from the [officeparser](https://github.com/unixfreak0037/officeparser) project, published
+under the following MIT License (MIT):
+
+officeparser is copyright (c) 2014 John William Davison
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/mraptor1.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/mraptor1.png differ
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/mraptor.html remnux-oletools-0.51a/remnux-oletools/doc/mraptor.html
--- remnux-oletools-0.51a/remnux-oletools/doc/mraptor.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/mraptor.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,81 @@
+
+
+
+
+
+
+
+
+
+
+mraptor (MacroRaptor)
+mraptor is a tool designed to detect most malicious VBA Macros using generic heuristics. Unlike antivirus engines, it does not rely on signatures.
+In a nutshell, mraptor detects keywords corresponding to the three following types of behaviour that are present in clear text in almost any macro malware: - A: Auto-execution trigger - W: Write to the file system or memory - X: Execute a file or any payload outside the VBA context
+mraptor considers that a macro is suspicious when A and (W or X) is true.
+For more information about mraptor's detection algorithm, see the article How to detect most malicious macros without an antivirus.
+mraptor can be used either as a command-line tool, or as a python module from your own applications.
+It is part of the python-oletools package.
+Usage
+Usage: mraptor.py [options] <filename> [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open all files from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
+ -l LOGLEVEL, --loglevel=LOGLEVEL
+ logging level debug/info/warning/error/critical
+ (default=warning)
+ -m, --matches Show matched strings.
+
+An exit code is returned based on the analysis result:
+ - 0: No Macro
+ - 1: Not MS Office
+ - 2: Macro OK
+ - 10: ERROR
+ - 20: SUSPICIOUS
+Examples
+Scan a single file:
+mraptor.py file.doc
+Scan a single file, stored in a Zip archive with password "infected":
+mraptor.py malicious_file.xls.zip -z infected
+Scan a collection of files stored in a folder:
+mraptor.py "MalwareZoo/VBA/*"
+Important: on Linux/MacOSX, always add double quotes around a file name when you use wildcards such as * and ? . Otherwise, the shell may replace the argument with the actual list of files matching the wildcards before starting the script.
+
+Python 3 support - mraptor3
+As of v0.50, mraptor has been ported to Python 3 thanks to @sebdraven. However, the differences between Python 2 and 3 are significant and for now there is a separate version of mraptor named mraptor3 to be used with Python 3.
+
+How to use mraptor in Python applications
+TODO
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/mraptor.md remnux-oletools-0.51a/remnux-oletools/doc/mraptor.md
--- remnux-oletools-0.51a/remnux-oletools/doc/mraptor.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/mraptor.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,113 @@
+mraptor (MacroRaptor)
+=====================
+
+mraptor is a tool designed to detect most malicious VBA Macros using
+generic heuristics. Unlike antivirus engines, it does not rely on signatures.
+
+In a nutshell, mraptor detects keywords corresponding to the three
+following types of behaviour that are present in clear text in almost
+any macro malware:
+- A: Auto-execution trigger
+- W: Write to the file system or memory
+- X: Execute a file or any payload outside the VBA context
+
+mraptor considers that a macro is suspicious when A and (W or X) is true.
+
+For more information about mraptor's detection algorithm, see the article
+[How to detect most malicious macros without an antivirus](http://www.decalage.info/mraptor).
+
+mraptor can be used either as a command-line tool, or as a python module
+from your own applications.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Usage
+
+```text
+Usage: mraptor.py [options] [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open all files from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
+ -l LOGLEVEL, --loglevel=LOGLEVEL
+ logging level debug/info/warning/error/critical
+ (default=warning)
+ -m, --matches Show matched strings.
+
+An exit code is returned based on the analysis result:
+ - 0: No Macro
+ - 1: Not MS Office
+ - 2: Macro OK
+ - 10: ERROR
+ - 20: SUSPICIOUS
+```
+
+### Examples
+
+Scan a single file:
+
+```text
+mraptor.py file.doc
+```
+
+Scan a single file, stored in a Zip archive with password "infected":
+
+```text
+mraptor.py malicious_file.xls.zip -z infected
+```
+
+Scan a collection of files stored in a folder:
+
+```text
+mraptor.py "MalwareZoo/VBA/*"
+```
+
+**Important**: on Linux/MacOSX, always add double quotes around a file name when you use
+wildcards such as `*` and `?`. Otherwise, the shell may replace the argument with the actual
+list of files matching the wildcards before starting the script.
+
+![](mraptor1.png)
+
+## Python 3 support - mraptor3
+
+As of v0.50, mraptor has been ported to Python 3 thanks to @sebdraven.
+However, the differences between Python 2 and 3 are significant and for now
+there is a separate version of mraptor named mraptor3 to be used with
+Python 3.
+
+
+--------------------------------------------------------------------------
+
+## How to use mraptor in Python applications
+
+TODO
+
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/olebrowse1_menu.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/olebrowse1_menu.png differ
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/olebrowse2_stream.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/olebrowse2_stream.png differ
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/olebrowse3_hexview.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/olebrowse3_hexview.png differ
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.html remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.html
--- remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,59 @@
+
+
+
+
+
+
+
+
+
+
+olebrowse
+olebrowse is a simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to view and extract individual data streams.
+It is part of the python-oletools package.
+Dependencies
+olebrowse requires Tkinter. On Windows and MacOSX, it should be installed with Python, and olebrowse should work out of the box.
+However, on Linux it might be necessary to install the tkinter package for Python separately. For example, on Ubuntu this is done with the following command:
+sudo apt-get install python-tk
+And for Python 3:
+sudo apt-get install python3-tk
+Usage
+olebrowse.py [file]
+If you provide a file it will be opened, else a dialog will allow you to browse folders to open a file. Then if it is a valid OLE file, the list of data streams will be displayed. You can select a stream, and then either view its content in a builtin hexadecimal viewer, or save it to a file for further analysis.
+Screenshots
+Main menu, showing all streams in the OLE file:
+
+Menu with actions for a stream:
+
+Hex view for a stream:
+
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.md remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.md
--- remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olebrowse.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,76 @@
+olebrowse
+=========
+
+olebrowse is a simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to
+view and extract individual data streams.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+Dependencies
+------------
+
+olebrowse requires [Tkinter](https://en.wikipedia.org/wiki/Tkinter).
+On Windows and MacOSX, it should be installed with Python, and
+olebrowse should work out of the box.
+
+However, on Linux it might be necessary to install the tkinter
+package for Python separately. For example, on Ubuntu this is done with the
+following command:
+
+```
+sudo apt-get install python-tk
+```
+
+And for Python 3:
+
+```
+sudo apt-get install python3-tk
+```
+
+
+Usage
+-----
+
+ olebrowse.py [file]
+
+If you provide a file it will be opened, else a dialog will allow you to browse
+folders to open a file. Then if it is a valid OLE file, the list of data streams
+will be displayed. You can select a stream, and then either view its content
+in a builtin hexadecimal viewer, or save it to a file for further analysis.
+
+Screenshots
+-----------
+
+Main menu, showing all streams in the OLE file:
+
+![](olebrowse1_menu.png)
+
+Menu with actions for a stream:
+
+![](olebrowse2_stream.png)
+
+Hex view for a stream:
+
+![](olebrowse3_hexview.png)
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oledir.html remnux-oletools-0.51a/remnux-oletools/doc/oledir.html
--- remnux-oletools-0.51a/remnux-oletools/doc/oledir.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oledir.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+oledir
+oledir is a script to display all the directory entries of an OLE file, including free and orphaned entries.
+It can be used either as a command-line tool, or as a python module from your own applications.
+It is part of the python-oletools package.
+Usage
+Usage: oledir.py <filename>
+Examples
+Scan a single file:
+oledir.py file.doc
+
+
+How to use oledir in Python applications
+TODO
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oledir.md remnux-oletools-0.51a/remnux-oletools/doc/oledir.md
--- remnux-oletools-0.51a/remnux-oletools/doc/oledir.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oledir.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,54 @@
+oledir
+======
+
+oledir is a script to display all the directory entries of an OLE file,
+including free and orphaned entries.
+
+It can be used either as a command-line tool, or as a python module from your own applications.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Usage
+
+```text
+Usage: oledir.py
+```
+
+### Examples
+
+Scan a single file:
+
+```text
+oledir.py file.doc
+```
+
+![](oledir.png)
+
+
+--------------------------------------------------------------------------
+
+## How to use oledir in Python applications
+
+TODO
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/oledir.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/oledir.png differ
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oleid.html remnux-oletools-0.51a/remnux-oletools/doc/oleid.html
--- remnux-oletools-0.51a/remnux-oletools/doc/oleid.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oleid.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,122 @@
+
+
+
+
+
+
+
+
+
+
+
+oleid
+oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, Excel), to detect specific characteristics usually found in malicious files (e.g. malware). For example it can detect VBA macros and embedded Flash objects.
+It is part of the python-oletools package.
+Main Features
+
+- Detect OLE file type from its internal structure (e.g. MS Word, Excel, PowerPoint, ...)
+- Detect VBA Macros
+- Detect embedded Flash objects
+- Detect embedded OLE objects
+- Detect MS Office encryption
+- Can be used as a command-line tool
+- Python API to integrate it in your applications
+
+Planned improvements:
+
+- Extract the most important metadata fields
+- Support for OpenXML files and embedded OLE files
+- Generic VBA macros detection
+- Detect auto-executable VBA macros
+- Extended OLE file types detection
+- Detect unusual OLE structures (fragmentation, unused sectors, etc)
+- Options to scan multiple files
+- Options to scan files from encrypted zip archives
+- CSV output
+
+Usage
+oleid.py <file>
+Example
+Analyzing a Word document containing a Flash object and VBA macros:
+C:\oletools>oleid.py word_flash_vba.doc
+
+Filename: word_flash_vba.doc
++-------------------------------+-----------------------+
+| Indicator | Value |
++-------------------------------+-----------------------+
+| OLE format | True |
+| Has SummaryInformation stream | True |
+| Application name | Microsoft Office Word |
+| Encrypted | False |
+| Word Document | True |
+| VBA Macros | True |
+| Excel Workbook | False |
+| PowerPoint Presentation | False |
+| Visio Drawing | False |
+| ObjectPool | True |
+| Flash objects | 1 |
++-------------------------------+-----------------------+
+How to use oleid in your Python applications
+First, import oletools.oleid, and create an OleID object to scan a file:
+import oletools.oleid
+
+oid = oletools.oleid.OleID(filename)
+Note: filename can be a filename, a file-like object, or a bytes string containing the file to be analyzed.
+Second, call the check() method. It returns a list of Indicator objects.
+Each Indicator object has the following attributes:
+
+- id: str, identifier for the indicator
+- name: str, name to display the indicator
+- description: str, long description of the indicator
+- type: class of the indicator (e.g. bool, str, int)
+- value: value of the indicator
+
+For example, the following code displays all the indicators:
+indicators = oid.check()
+for i in indicators:
+ print 'Indicator id=%s name="%s" type=%s value=%s' % (i.id, i.name, i.type, repr(i.value))
+ print 'description:', i.description
+ print ''
+See the source code of oleid.py for more details.
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oleid.md remnux-oletools-0.51a/remnux-oletools/doc/oleid.md
--- remnux-oletools-0.51a/remnux-oletools/doc/oleid.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oleid.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,117 @@
+oleid
+=====
+
+oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,
+Excel), to detect specific characteristics usually found in malicious files (e.g. malware).
+For example it can detect VBA macros and embedded Flash objects.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Main Features
+
+- Detect OLE file type from its internal structure (e.g. MS Word, Excel, PowerPoint, ...)
+- Detect VBA Macros
+- Detect embedded Flash objects
+- Detect embedded OLE objects
+- Detect MS Office encryption
+- Can be used as a command-line tool
+- Python API to integrate it in your applications
+
+Planned improvements:
+
+- Extract the most important metadata fields
+- Support for OpenXML files and embedded OLE files
+- Generic VBA macros detection
+- Detect auto-executable VBA macros
+- Extended OLE file types detection
+- Detect unusual OLE structures (fragmentation, unused sectors, etc)
+- Options to scan multiple files
+- Options to scan files from encrypted zip archives
+- CSV output
+
+## Usage
+
+```text
+oleid.py
+```
+
+### Example
+
+Analyzing a Word document containing a Flash object and VBA macros:
+
+```text
+C:\oletools>oleid.py word_flash_vba.doc
+
+Filename: word_flash_vba.doc
++-------------------------------+-----------------------+
+| Indicator | Value |
++-------------------------------+-----------------------+
+| OLE format | True |
+| Has SummaryInformation stream | True |
+| Application name | Microsoft Office Word |
+| Encrypted | False |
+| Word Document | True |
+| VBA Macros | True |
+| Excel Workbook | False |
+| PowerPoint Presentation | False |
+| Visio Drawing | False |
+| ObjectPool | True |
+| Flash objects | 1 |
++-------------------------------+-----------------------+
+```
+
+## How to use oleid in your Python applications
+
+First, import oletools.oleid, and create an **OleID** object to scan a file:
+
+```python
+import oletools.oleid
+
+oid = oletools.oleid.OleID(filename)
+```
+
+Note: filename can be a filename, a file-like object, or a bytes string containing the file to be analyzed.
+
+Second, call the **check()** method. It returns a list of **Indicator** objects.
+
+Each Indicator object has the following attributes:
+
+- **id**: str, identifier for the indicator
+- **name**: str, name to display the indicator
+- **description**: str, long description of the indicator
+- **type**: class of the indicator (e.g. bool, str, int)
+- **value**: value of the indicator
+
+For example, the following code displays all the indicators:
+
+```python
+indicators = oid.check()
+for i in indicators:
+ print 'Indicator id=%s name="%s" type=%s value=%s' % (i.id, i.name, i.type, repr(i.value))
+ print 'description:', i.description
+ print ''
+```
+
+See the source code of oleid.py for more details.
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/olemap1.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/olemap1.png differ
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/olemap2.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/olemap2.png differ
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olemap.html remnux-oletools-0.51a/remnux-oletools/doc/olemap.html
--- remnux-oletools-0.51a/remnux-oletools/doc/olemap.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olemap.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+olemap
+olemap is a script to display a map of all the sectors in an OLE file.
+It can be used either as a command-line tool, or as a python module from your own applications.
+It is part of the python-oletools package.
+Usage
+Usage: olemap.py <filename>
+Examples
+Scan a single file:
+olemap.py file.doc
+
+
+
+How to use olemap in Python applications
+TODO
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olemap.md remnux-oletools-0.51a/remnux-oletools/doc/olemap.md
--- remnux-oletools-0.51a/remnux-oletools/doc/olemap.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olemap.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,54 @@
+olemap
+======
+
+olemap is a script to display a map of all the sectors in an OLE file.
+
+It can be used either as a command-line tool, or as a python module from your own applications.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Usage
+
+```text
+Usage: olemap.py
+```
+
+### Examples
+
+Scan a single file:
+
+```text
+olemap.py file.doc
+```
+
+![](olemap1.png)
+
+![](olemap2.png)
+
+--------------------------------------------------------------------------
+
+## How to use olemap in Python applications
+
+TODO
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
Binary files /tmp/tmpjjl7AX/dVIwszLzOn/remnux-oletools-0.51a/remnux-oletools/doc/olemeta1.png and /tmp/tmpjjl7AX/NgkrW0XO1N/remnux-oletools-0.51a/remnux-oletools/doc/olemeta1.png differ
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olemeta.html remnux-oletools-0.51a/remnux-oletools/doc/olemeta.html
--- remnux-oletools-0.51a/remnux-oletools/doc/olemeta.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olemeta.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+
+olemeta is a script to parse OLE files such as MS Office documents (e.g. Word, Excel), to extract all standard properties present in the OLE file.
+It is part of the python-oletools package.
+Usage
+olemeta.py <file>
+Example
+
+
+TODO
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olemeta.md remnux-oletools-0.51a/remnux-oletools/doc/olemeta.md
--- remnux-oletools-0.51a/remnux-oletools/doc/olemeta.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olemeta.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,43 @@
+olemeta
+=======
+
+olemeta is a script to parse OLE files such as MS Office documents (e.g. Word,
+Excel), to extract all standard properties present in the OLE file.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Usage
+
+```text
+olemeta.py
+```
+
+### Example
+
+![](olemeta1.png)
+
+## How to use olemeta in Python applications
+
+TODO
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oleobj.html remnux-oletools-0.51a/remnux-oletools/doc/oleobj.html
--- remnux-oletools-0.51a/remnux-oletools/doc/oleobj.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oleobj.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,44 @@
+
+
+
+
+
+
+
+
+
+
+oleobj
+oleobj is a script to extract embedded objects from OLE files.
+It can be used either as a command-line tool, or as a python module from your own applications.
+It is part of the python-oletools package.
+Usage
+TODO
+
+How to use oleobj in Python applications
+See rtfobj.py source code.
+TODO
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oleobj.md remnux-oletools-0.51a/remnux-oletools/doc/oleobj.md
--- remnux-oletools-0.51a/remnux-oletools/doc/oleobj.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oleobj.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,44 @@
+oleobj
+======
+
+oleobj is a script to extract embedded objects from OLE files.
+
+It can be used either as a command-line tool, or as a python module from your own applications.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Usage
+
+```text
+TODO
+```
+
+--------------------------------------------------------------------------
+
+## How to use oleobj in Python applications
+
+See rtfobj.py source code.
+
+TODO
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oletimes.html remnux-oletools-0.51a/remnux-oletools/doc/oletimes.html
--- remnux-oletools-0.51a/remnux-oletools/doc/oletimes.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oletimes.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+
+
+
+
+oletimes
+oletimes is a script to parse OLE files such as MS Office documents (e.g. Word, Excel), to extract creation and modification times of all streams and storages in the OLE file.
+It is part of the python-oletools package.
+Usage
+oletimes.py <file>
+Example
+Checking the malware sample DIAN_caso-5415.doc:
+>oletimes.py DIAN_caso-5415.doc
+
++----------------------------+---------------------+---------------------+
+| Stream/Storage name | Modification Time | Creation Time |
++----------------------------+---------------------+---------------------+
+| Root | 2014-05-14 12:45:24 | None |
+| '\x01CompObj' | None | None |
+| '\x05DocumentSummaryInform | None | None |
+| ation' | | |
+| '\x05SummaryInformation' | None | None |
+| '1Table' | None | None |
+| 'Data' | None | None |
+| 'Macros' | 2014-05-14 12:45:24 | 2014-05-14 12:45:24 |
+| 'Macros/PROJECT' | None | None |
+| 'Macros/PROJECTwm' | None | None |
+| 'Macros/VBA' | 2014-05-14 12:45:24 | 2014-05-14 12:45:24 |
+| 'Macros/VBA/ThisDocument' | None | None |
+| 'Macros/VBA/_VBA_PROJECT' | None | None |
+| 'Macros/VBA/__SRP_0' | None | None |
+| 'Macros/VBA/__SRP_1' | None | None |
+| 'Macros/VBA/__SRP_2' | None | None |
+| 'Macros/VBA/__SRP_3' | None | None |
+| 'Macros/VBA/dir' | None | None |
+| 'WordDocument' | None | None |
++----------------------------+---------------------+---------------------+
+How to use oletimes in Python applications
+TODO
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/oletimes.md remnux-oletools-0.51a/remnux-oletools/doc/oletimes.md
--- remnux-oletools-0.51a/remnux-oletools/doc/oletimes.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/oletimes.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,72 @@
+oletimes
+========
+
+oletimes is a script to parse OLE files such as MS Office documents (e.g. Word,
+Excel), to extract creation and modification times of all streams and storages
+in the OLE file.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Usage
+
+```text
+oletimes.py
+```
+
+### Example
+
+Checking the malware sample [DIAN_caso-5415.doc](https://malwr.com/analysis/M2I4YWRhM2IwY2QwNDljN2E3ZWFjYTg3ODk4NmZhYmE/):
+
+```text
+>oletimes.py DIAN_caso-5415.doc
+
++----------------------------+---------------------+---------------------+
+| Stream/Storage name | Modification Time | Creation Time |
++----------------------------+---------------------+---------------------+
+| Root | 2014-05-14 12:45:24 | None |
+| '\x01CompObj' | None | None |
+| '\x05DocumentSummaryInform | None | None |
+| ation' | | |
+| '\x05SummaryInformation' | None | None |
+| '1Table' | None | None |
+| 'Data' | None | None |
+| 'Macros' | 2014-05-14 12:45:24 | 2014-05-14 12:45:24 |
+| 'Macros/PROJECT' | None | None |
+| 'Macros/PROJECTwm' | None | None |
+| 'Macros/VBA' | 2014-05-14 12:45:24 | 2014-05-14 12:45:24 |
+| 'Macros/VBA/ThisDocument' | None | None |
+| 'Macros/VBA/_VBA_PROJECT' | None | None |
+| 'Macros/VBA/__SRP_0' | None | None |
+| 'Macros/VBA/__SRP_1' | None | None |
+| 'Macros/VBA/__SRP_2' | None | None |
+| 'Macros/VBA/__SRP_3' | None | None |
+| 'Macros/VBA/dir' | None | None |
+| 'WordDocument' | None | None |
++----------------------------+---------------------+---------------------+
+```
+
+## How to use oletimes in Python applications
+
+TODO
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olevba.html remnux-oletools-0.51a/remnux-oletools/doc/olevba.html
--- remnux-oletools-0.51a/remnux-oletools/doc/olevba.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olevba.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,371 @@
+
+
+
+
+
+
+
+
+
+
+
+olevba
+olevba is a script to parse OLE and OpenXML files such as MS Office documents (e.g. Word, Excel), to detect VBA Macros, extract their source code in clear text, and detect security-related patterns such as auto-executable macros, suspicious VBA keywords used by malware, anti-sandboxing and anti-virtualization techniques, and potential IOCs (IP addresses, URLs, executable filenames, etc). It also detects and decodes several common obfuscation methods including Hex encoding, StrReverse, Base64, Dridex, VBA expressions, and extracts IOCs from decoded strings.
+It can be used either as a command-line tool, or as a python module from your own applications.
+It is part of the python-oletools package.
+olevba is based on source code from officeparser by John William Davison, with significant modifications.
+
+
+- Word 97-2003 (.doc, .dot)
+- Word 2007+ (.docm, .dotm)
+- Word 2003 XML (.xml)
+- Word/Excel MHTML, aka Single File Web Page (.mht)
+- Excel 97-2003 (.xls)
+- Excel 2007+ (.xlsm, .xlsb)
+- PowerPoint 2007+ (.pptm, .ppsm)
+- Text file containing VBA or VBScript source code
+- Password-protected Zip archive containing any of the above
+
+Main Features
+
+- Detect VBA macros in MS Office 97-2003 and 2007+ files, XML, MHT
+- Extract VBA macro source code
+- Detect auto-executable macros
+- Detect suspicious VBA keywords often used by malware
+- Detect anti-sandboxing and anti-virtualization techniques
+- Detect and decodes strings obfuscated with Hex/Base64/StrReverse/Dridex
+- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, &, using a VBA parser built with pyparsing, including custom Hex and Base64 encodings
+- Extract IOCs/patterns of interest such as IP addresses, URLs, e-mail addresses and executable file names
+- Scan multiple files and sample collections (wildcards, recursive)
+- Triage mode for a summary view of multiple files
+- Scan malware samples in password-protected Zip archives
+- Python API to use olevba from your applications
+
+MS Office files encrypted with a password are also supported, because VBA macro code is never encrypted, only the content of the document.
+About VBA Macros
+See this article for more information and technical details about VBA Macros and how they are stored in MS Office documents.
+How it works
+
+- olevba checks the file type: If it is an OLE file (i.e MS Office 97-2003), it is parsed right away.
+- If it is a zip file (i.e. MS Office 2007+), XML or MHTML, olevba looks for all OLE files stored in it (e.g. vbaProject.bin, editdata.mso), and opens them.
+- olevba identifies all the VBA projects stored in the OLE structure.
+- Each VBA project is parsed to find the corresponding OLE streams containing macro code.
+- In each of these OLE streams, the VBA macro source code is extracted and decompressed (RLE compression).
+- olevba looks for specific strings obfuscated with various algorithms (Hex, Base64, StrReverse, Dridex, VBA expressions).
+- olevba scans the macro source code and the deobfuscated strings to find suspicious keywords, auto-executable macros and potential IOCs (URLs, IP addresses, e-mail addresses, executable filenames, etc).
+
+Usage
+Usage: olevba.py [options] <filename> [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open all files from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
+ -t, --triage triage mode, display results as a summary table
+ (default for multiple files)
+ -d, --detailed detailed mode, display full results (default for
+ single file)
+ -a, --analysis display only analysis results, not the macro source
+ code
+ -c, --code display only VBA source code, do not analyze it
+ -i INPUT, --input=INPUT
+ input file containing VBA source code to be analyzed
+ (no parsing)
+ --decode display all the obfuscated strings with their decoded
+ content (Hex, Base64, StrReverse, Dridex, VBA).
+ --attr display the attribute lines at the beginning of VBA
+ source code
+ --reveal display the macro source code after replacing all the
+ obfuscated strings by their decoded content.
+Examples
+Scan a single file:
+olevba.py file.doc
+Scan a single file, stored in a Zip archive with password "infected":
+olevba.py malicious_file.xls.zip -z infected
+Scan a single file, showing all obfuscated strings decoded:
+olevba.py file.doc --decode
+Scan a single file, showing the macro source code with VBA strings deobfuscated:
+olevba.py file.doc --reveal
+Scan VBA source code extracted into a text file:
+olevba.py source_code.vba
+Scan a collection of files stored in a folder:
+olevba.py "MalwareZoo/VBA/*"
+NOTE: On Linux, MacOSX and other Unix variants, it is required to add double quotes around wildcards. Otherwise, they will be expanded by the shell instead of olevba.
+Scan all .doc and .xls files, recursively in all subfolders:
+olevba.py "MalwareZoo/VBA/*.doc" "MalwareZoo/VBA/*.xls" -r
+Scan all .doc files within all .zip files with password, recursively:
+olevba.py "MalwareZoo/VBA/*.zip" -r -z infected -f "*.doc"
+Detailed analysis mode (default for single file)
+When a single file is scanned, or when using the option -d, all details of the analysis are displayed.
+For example, checking the malware sample DIAN_caso-5415.doc:
+>olevba.py c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip -z infected
+===============================================================================
+FILE: DIAN_caso-5415.doc.malware in c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip
+Type: OLE
+-------------------------------------------------------------------------------
+VBA MACRO ThisDocument.cls
+in file: DIAN_caso-5415.doc.malware - OLE stream: Macros/VBA/ThisDocument
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Option Explicit
+Private Declare Function URLDownloadToFileA Lib "urlmon" (ByVal FVQGKS As Long,_
+ByVal WSGSGY As String, ByVal IFRRFV As String, ByVal NCVOLV As Long, _
+ByVal HQTLDG As Long) As Long
+Sub AutoOpen()
+ Auto_Open
+End Sub
+Sub Auto_Open()
+SNVJYQ
+End Sub
+Public Sub SNVJYQ()
+ [Malicious Code...]
+End Sub
+Function OGEXYR(XSTAHU As String, PHHWIV As String) As Boolean
+ [Malicious Code...]
+ Application.DisplayAlerts = False
+ Application.Quit
+End Function
+Sub Workbook_Open()
+ Auto_Open
+End Sub
+
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ANALYSIS:
++------------+----------------------+-----------------------------------------+
+| Type | Keyword | Description |
++------------+----------------------+-----------------------------------------+
+| AutoExec | AutoOpen | Runs when the Word document is opened |
+| AutoExec | Auto_Open | Runs when the Excel Workbook is opened |
+| AutoExec | Workbook_Open | Runs when the Excel Workbook is opened |
+| Suspicious | Lib | May run code from a DLL |
+| Suspicious | Shell | May run an executable file or a system |
+| | | command |
+| Suspicious | Environ | May read system environment variables |
+| Suspicious | URLDownloadToFileA | May download files from the Internet |
+| IOC | http://germanya.com. | URL |
+| | ec/logs/test.exe" | |
+| IOC | http://germanya.com. | URL |
+| | ec/logs/counter.php" | |
+| IOC | germanya.com | Executable file name |
+| IOC | test.exe | Executable file name |
+| IOC | sfjozjero.exe | Executable file name |
++------------+----------------------+-----------------------------------------+
+Triage mode (default for multiple files)
+When several files are scanned, or when using the option -t, a summary of the analysis for each file is displayed. This is more convenient for quick triage of a collection of suspicious files.
+The following flags show the results of the analysis:
+
+- OLE: the file type is OLE, for example MS Office 97-2003
+- OpX: the file type is OpenXML, for example MS Office 2007+
+- XML: the file type is Word 2003 XML
+- MHT: the file type is Word MHTML, aka Single File Web Page (.mht)
+- ?: the file type is not supported
+- M: contains VBA Macros
+- A: auto-executable macros
+- S: suspicious VBA keywords
+- I: potential IOCs
+- H: hex-encoded strings (potential obfuscation)
+- B: Base64-encoded strings (potential obfuscation)
+- D: Dridex-encoded strings (potential obfuscation)
+- V: VBA string expressions (potential obfuscation)
+
+Here is an example:
+c:\>olevba.py \MalwareZoo\VBA\samples\*
+Flags Filename
+----------- -----------------------------------------------------------------
+OLE:MASI--- \MalwareZoo\VBA\samples\DIAN_caso-5415.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_1.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_2.doc.malware
+OLE:MASI--- \MalwareZoo\VBA\samples\DRIDEX_3.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_4.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_5.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_6.doc.malware
+OLE:MAS---- \MalwareZoo\VBA\samples\DRIDEX_7.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_8.doc.malware
+OLE:MASIHBD \MalwareZoo\VBA\samples\DRIDEX_9.xls.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_A.doc.malware
+OLE:------- \MalwareZoo\VBA\samples\Normal_Document.doc
+OLE:M------ \MalwareZoo\VBA\samples\Normal_Document_Macro.doc
+OpX:MASI--- \MalwareZoo\VBA\samples\RottenKitten.xlsb.malware
+OLE:MASI-B- \MalwareZoo\VBA\samples\ROVNIX.doc.malware
+OLE:MA----- \MalwareZoo\VBA\samples\Word within Word macro auto.doc
+Python 3 support - olevba3
+As of v0.50, olevba has been ported to Python 3 thanks to @sebdraven. However, the differences between Python 2 and 3 are significant and for now there is a separate version of olevba named olevba3 to be used with Python 3.
+
+How to use olevba in Python applications
+olevba may be used to open a MS Office file, detect if it contains VBA macros, extract and analyze the VBA source code from your own python applications.
+IMPORTANT: olevba is currently under active development, therefore this API is likely to change.
+Import olevba
+First, import the oletools.olevba package, using at least the VBA_Parser and VBA_Scanner classes:
+from oletools.olevba import VBA_Parser, TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML, TYPE_MHTML
+Parse a MS Office file - VBA_Parser
+To parse a file on disk, create an instance of the VBA_Parser class, providing the name of the file to open as parameter. For example:
+vbaparser = VBA_Parser('my_file_with_macros.doc')
+The file may also be provided as a bytes string containing its data. In that case, the actual filename must be provided for reference, and the file content with the data parameter. For example:
+myfile = 'my_file_with_macros.doc'
+filedata = open(myfile, 'rb').read()
+vbaparser = VBA_Parser(myfile, data=filedata)
+VBA_Parser will raise an exception if the file is not a supported format, such as OLE (MS Office 97-2003), OpenXML (MS Office 2007+), MHTML or Word 2003 XML.
+After parsing the file, the attribute VBA_Parser.type is a string indicating the file type. It can be either TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML or TYPE_MHTML. (constants defined in the olevba module)
+Detect VBA macros
+The method detect_vba_macros of a VBA_Parser object returns True if VBA macros have been found in the file, False otherwise.
+if vbaparser.detect_vba_macros():
+ print 'VBA Macros found'
+else:
+ print 'No VBA Macros found'
+Note: The detection algorithm looks for streams and storage with specific names in the OLE structure, which works fine for all the supported formats listed above. However, for some formats such as PowerPoint 97-2003, this method will always return False because VBA Macros are stored in a different way which is not yet supported by olevba.
+Moreover, if the file contains an embedded document (e.g. an Excel workbook inserted into a Word document), this method may return True if the embedded document contains VBA Macros, even if the main document does not.
+
+The method extract_macros extracts and decompresses source code for each VBA macro found in the file (possibly including embedded files). It is a generator yielding a tuple (filename, stream_path, vba_filename, vba_code) for each VBA macro found.
+
+- filename: If the file is OLE (MS Office 97-2003), filename is the path of the file. If the file is OpenXML (MS Office 2007+), filename is the path of the OLE subfile containing VBA macros within the zip archive, e.g. word/vbaProject.bin.
+- stream_path: path of the OLE stream containing the VBA macro source code
+- vba_filename: corresponding VBA filename
+- vba_code: string containing the VBA source code in clear text
+
+Example:
+for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros():
+ print '-'*79
+ print 'Filename :', filename
+ print 'OLE stream :', stream_path
+ print 'VBA filename:', vba_filename
+ print '- '*39
+ print vba_code
+Alternatively, the VBA_Parser method extract_all_macros returns the same results as a list of tuples.
+Analyze VBA Source Code
+Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained in a file:
+The method analyze_macros from the class VBA_Parser can be used to scan the source code of all VBA modules to find obfuscated strings, suspicious keywords, IOCs, auto-executable macros, etc.
+analyze_macros() takes an optional argument show_decoded_strings: if set to True, the results will contain all the encoded strings found in the code (Hex, Base64, Dridex) with their decoded value. By default, it will only include the strings which contain printable characters.
+VBA_Parser.analyze_macros() returns a list of tuples (type, keyword, description), one for each item in the results.
+
+- type may be either 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String', 'Dridex String' or 'VBA obfuscated Strings'.
+- keyword is the string found for auto-executable macros, suspicious keywords or IOCs. For obfuscated strings, it is the decoded value of the string.
+- description provides a description of the keyword. For obfuscated strings, it is the encoded value of the string.
+
+Example:
+results = vbaparser.analyze_macros()
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+After calling analyze_macros, the following VBA_Parser attributes also provide the number of items found for each category:
+print 'AutoExec keywords: %d' % vbaparser.nb_autoexec
+print 'Suspicious keywords: %d' % vbaparser.nb_suspicious
+print 'IOCs: %d' % vbaparser.nb_iocs
+print 'Hex obfuscated strings: %d' % vbaparser.nb_hexstrings
+print 'Base64 obfuscated strings: %d' % vbaparser.nb_base64strings
+print 'Dridex obfuscated strings: %d' % vbaparser.nb_dridexstrings
+print 'VBA obfuscated strings: %d' % vbaparser.nb_vbastrings
+Deobfuscate VBA Macro Source Code
+The method reveal attempts to deobfuscate the macro source code by replacing all the obfuscated strings by their decoded content. Returns a single string.
+Example:
+print vbaparser.reveal()
+Close the VBA_Parser
+After usage, it is better to call the close method of the VBA_Parser object, to make sure the file is closed, especially if your application is parsing many files.
+vbaparser.close()
+
+Deprecated API
+The following methods and functions are still functional, but their usage is not recommended since they have been replaced by better solutions.
+VBA_Scanner (deprecated)
+The class VBA_Scanner can be used to scan the source code of a VBA module to find obfuscated strings, suspicious keywords, IOCs, auto-executable macros, etc.
+First, create a VBA_Scanner object with a string containing the VBA source code (for example returned by the extract_macros method). Then call the methods scan or scan_summary to get the results of the analysis.
+scan() takes an optional argument include_decoded_strings: if set to True, the results will contain all the encoded strings found in the code (Hex, Base64, Dridex) with their decoded value.
+scan returns a list of tuples (type, keyword, description), one for each item in the results.
+
+- type may be either 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String'.
+- keyword is the string found for auto-executable macros, suspicious keywords or IOCs. For obfuscated strings, it is the decoded value of the string.
+- description provides a description of the keyword. For obfuscated strings, it is the encoded value of the string.
+
+Example:
+vba_scanner = VBA_Scanner(vba_code)
+results = vba_scanner.scan(include_decoded_strings=True)
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+The function scan_vba is a shortcut for VBA_Scanner(vba_code).scan():
+results = scan_vba(vba_code, include_decoded_strings=True)
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+scan_summary returns a tuple with the number of items found for each category: (autoexec, suspicious, IOCs, hex, base64, dridex).
+Detect auto-executable macros (deprecated)
+Deprecated: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
+The function detect_autoexec checks if VBA macro code contains specific macro names that will be triggered when the document/workbook is opened, closed, changed, etc.
+It returns a list of tuples containing two strings, the detected keyword, and the description of the trigger. (See the malware example above)
+Sample usage:
+from oletools.olevba import detect_autoexec
+autoexec_keywords = detect_autoexec(vba_code)
+if autoexec_keywords:
+ print 'Auto-executable macro keywords found:'
+ for keyword, description in autoexec_keywords:
+ print '%s: %s' % (keyword, description)
+else:
+ print 'Auto-executable macro keywords: None found'
+Detect suspicious VBA keywords (deprecated)
+Deprecated: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
+The function detect_suspicious checks if VBA macro code contains specific keywords often used by malware to act on the system (create files, run commands or applications, write to the registry, etc).
+It returns a list of tuples containing two strings, the detected keyword, and the description of the corresponding malicious behaviour. (See the malware example above)
+Sample usage:
+from oletools.olevba import detect_suspicious
+suspicious_keywords = detect_suspicious(vba_code)
+if suspicious_keywords:
+ print 'Suspicious VBA keywords found:'
+ for keyword, description in suspicious_keywords:
+ print '%s: %s' % (keyword, description)
+else:
+ print 'Suspicious VBA keywords: None found'
+
+Deprecated: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
+The function detect_patterns checks if VBA macro code contains specific patterns of interest, that may be useful for malware analysis and detection (potential Indicators of Compromise): IP addresses, e-mail addresses, URLs, executable file names.
+It returns a list of tuples containing two strings, the pattern type, and the extracted value. (See the malware example above)
+Sample usage:
+from oletools.olevba import detect_patterns
+patterns = detect_patterns(vba_code)
+if patterns:
+ print 'Patterns found:'
+ for pattern_type, value in patterns:
+ print '%s: %s' % (pattern_type, value)
+else:
+ print 'Patterns: None found'
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/olevba.md remnux-oletools-0.51a/remnux-oletools/doc/olevba.md
--- remnux-oletools-0.51a/remnux-oletools/doc/olevba.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/olevba.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,544 @@
+olevba
+======
+
+olevba is a script to parse OLE and OpenXML files such as MS Office documents
+(e.g. Word, Excel), to **detect VBA Macros**, extract their **source code** in clear text,
+and detect security-related patterns such as **auto-executable macros**, **suspicious
+VBA keywords** used by malware, anti-sandboxing and anti-virtualization techniques,
+and potential **IOCs** (IP addresses, URLs, executable filenames, etc).
+It also detects and decodes several common **obfuscation methods including Hex encoding,
+StrReverse, Base64, Dridex, VBA expressions**, and extracts IOCs from decoded strings.
+
+It can be used either as a command-line tool, or as a python module from your own applications.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+olevba is based on source code from [officeparser](https://github.com/unixfreak0037/officeparser)
+by John William Davison, with significant modifications.
+
+## Supported formats
+
+- Word 97-2003 (.doc, .dot)
+- Word 2007+ (.docm, .dotm)
+- Word 2003 XML (.xml)
+- Word/Excel MHTML, aka Single File Web Page (.mht)
+- Excel 97-2003 (.xls)
+- Excel 2007+ (.xlsm, .xlsb)
+- PowerPoint 2007+ (.pptm, .ppsm)
+- Text file containing VBA or VBScript source code
+- Password-protected Zip archive containing any of the above
+
+## Main Features
+
+- Detect VBA macros in MS Office 97-2003 and 2007+ files, XML, MHT
+- Extract VBA macro source code
+- Detect auto-executable macros
+- Detect suspicious VBA keywords often used by malware
+- Detect anti-sandboxing and anti-virtualization techniques
+- Detect and decodes strings obfuscated with Hex/Base64/StrReverse/Dridex
+- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, &, using a VBA parser built with
+[pyparsing](http://pyparsing.wikispaces.com), including custom Hex and Base64 encodings
+- Extract IOCs/patterns of interest such as IP addresses, URLs, e-mail addresses and executable file names
+- Scan multiple files and sample collections (wildcards, recursive)
+- Triage mode for a summary view of multiple files
+- Scan malware samples in password-protected Zip archives
+- Python API to use olevba from your applications
+
+MS Office files encrypted with a password are also supported, because VBA macro code is never
+encrypted, only the content of the document.
+
+## About VBA Macros
+
+See [this article](http://www.decalage.info/en/vba_tools) for more information and technical details about VBA Macros
+and how they are stored in MS Office documents.
+
+## How it works
+
+1. olevba checks the file type: If it is an OLE file (i.e MS Office 97-2003), it is parsed right away.
+1. If it is a zip file (i.e. MS Office 2007+), XML or MHTML, olevba looks for all OLE files stored in it (e.g. vbaProject.bin, editdata.mso), and opens them.
+1. olevba identifies all the VBA projects stored in the OLE structure.
+1. Each VBA project is parsed to find the corresponding OLE streams containing macro code.
+1. In each of these OLE streams, the VBA macro source code is extracted and decompressed (RLE compression).
+1. olevba looks for specific strings obfuscated with various algorithms (Hex, Base64, StrReverse, Dridex, VBA expressions).
+1. olevba scans the macro source code and the deobfuscated strings to find suspicious keywords, auto-executable macros
+and potential IOCs (URLs, IP addresses, e-mail addresses, executable filenames, etc).
+
+
+## Usage
+
+```text
+Usage: olevba.py [options] [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open all files from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
+ -t, --triage triage mode, display results as a summary table
+ (default for multiple files)
+ -d, --detailed detailed mode, display full results (default for
+ single file)
+ -a, --analysis display only analysis results, not the macro source
+ code
+ -c, --code display only VBA source code, do not analyze it
+ -i INPUT, --input=INPUT
+ input file containing VBA source code to be analyzed
+ (no parsing)
+ --decode display all the obfuscated strings with their decoded
+ content (Hex, Base64, StrReverse, Dridex, VBA).
+ --attr display the attribute lines at the beginning of VBA
+ source code
+ --reveal display the macro source code after replacing all the
+ obfuscated strings by their decoded content.
+```
+
+### Examples
+
+Scan a single file:
+
+```text
+olevba.py file.doc
+```
+
+Scan a single file, stored in a Zip archive with password "infected":
+
+```text
+olevba.py malicious_file.xls.zip -z infected
+```
+
+Scan a single file, showing all obfuscated strings decoded:
+
+```text
+olevba.py file.doc --decode
+```
+
+Scan a single file, showing the macro source code with VBA strings deobfuscated:
+
+```text
+olevba.py file.doc --reveal
+```
+
+Scan VBA source code extracted into a text file:
+
+```text
+olevba.py source_code.vba
+```
+
+Scan a collection of files stored in a folder:
+
+```text
+olevba.py "MalwareZoo/VBA/*"
+```
+NOTE: On Linux, MacOSX and other Unix variants, it is required to add double quotes around wildcards. Otherwise, they will be expanded by the shell instead of olevba.
+
+Scan all .doc and .xls files, recursively in all subfolders:
+
+```text
+olevba.py "MalwareZoo/VBA/*.doc" "MalwareZoo/VBA/*.xls" -r
+```
+
+Scan all .doc files within all .zip files with password, recursively:
+
+```text
+olevba.py "MalwareZoo/VBA/*.zip" -r -z infected -f "*.doc"
+```
+
+
+### Detailed analysis mode (default for single file)
+
+When a single file is scanned, or when using the option -d, all details of the analysis are displayed.
+
+For example, checking the malware sample [DIAN_caso-5415.doc](https://malwr.com/analysis/M2I4YWRhM2IwY2QwNDljN2E3ZWFjYTg3ODk4NmZhYmE/):
+
+```text
+>olevba.py c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip -z infected
+===============================================================================
+FILE: DIAN_caso-5415.doc.malware in c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip
+Type: OLE
+-------------------------------------------------------------------------------
+VBA MACRO ThisDocument.cls
+in file: DIAN_caso-5415.doc.malware - OLE stream: Macros/VBA/ThisDocument
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Option Explicit
+Private Declare Function URLDownloadToFileA Lib "urlmon" (ByVal FVQGKS As Long,_
+ByVal WSGSGY As String, ByVal IFRRFV As String, ByVal NCVOLV As Long, _
+ByVal HQTLDG As Long) As Long
+Sub AutoOpen()
+ Auto_Open
+End Sub
+Sub Auto_Open()
+SNVJYQ
+End Sub
+Public Sub SNVJYQ()
+ [Malicious Code...]
+End Sub
+Function OGEXYR(XSTAHU As String, PHHWIV As String) As Boolean
+ [Malicious Code...]
+ Application.DisplayAlerts = False
+ Application.Quit
+End Function
+Sub Workbook_Open()
+ Auto_Open
+End Sub
+
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ANALYSIS:
++------------+----------------------+-----------------------------------------+
+| Type | Keyword | Description |
++------------+----------------------+-----------------------------------------+
+| AutoExec | AutoOpen | Runs when the Word document is opened |
+| AutoExec | Auto_Open | Runs when the Excel Workbook is opened |
+| AutoExec | Workbook_Open | Runs when the Excel Workbook is opened |
+| Suspicious | Lib | May run code from a DLL |
+| Suspicious | Shell | May run an executable file or a system |
+| | | command |
+| Suspicious | Environ | May read system environment variables |
+| Suspicious | URLDownloadToFileA | May download files from the Internet |
+| IOC | http://germanya.com. | URL |
+| | ec/logs/test.exe" | |
+| IOC | http://germanya.com. | URL |
+| | ec/logs/counter.php" | |
+| IOC | germanya.com | Executable file name |
+| IOC | test.exe | Executable file name |
+| IOC | sfjozjero.exe | Executable file name |
++------------+----------------------+-----------------------------------------+
+```
+
+### Triage mode (default for multiple files)
+
+When several files are scanned, or when using the option -t, a summary of the analysis for each file is displayed.
+This is more convenient for quick triage of a collection of suspicious files.
+
+The following flags show the results of the analysis:
+
+- **OLE**: the file type is OLE, for example MS Office 97-2003
+- **OpX**: the file type is OpenXML, for example MS Office 2007+
+- **XML**: the file type is Word 2003 XML
+- **MHT**: the file type is Word MHTML, aka Single File Web Page (.mht)
+- **?**: the file type is not supported
+- **M**: contains VBA Macros
+- **A**: auto-executable macros
+- **S**: suspicious VBA keywords
+- **I**: potential IOCs
+- **H**: hex-encoded strings (potential obfuscation)
+- **B**: Base64-encoded strings (potential obfuscation)
+- **D**: Dridex-encoded strings (potential obfuscation)
+- **V**: VBA string expressions (potential obfuscation)
+
+Here is an example:
+
+```text
+c:\>olevba.py \MalwareZoo\VBA\samples\*
+Flags Filename
+----------- -----------------------------------------------------------------
+OLE:MASI--- \MalwareZoo\VBA\samples\DIAN_caso-5415.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_1.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_2.doc.malware
+OLE:MASI--- \MalwareZoo\VBA\samples\DRIDEX_3.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_4.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_5.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_6.doc.malware
+OLE:MAS---- \MalwareZoo\VBA\samples\DRIDEX_7.doc.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_8.doc.malware
+OLE:MASIHBD \MalwareZoo\VBA\samples\DRIDEX_9.xls.malware
+OLE:MASIH-- \MalwareZoo\VBA\samples\DRIDEX_A.doc.malware
+OLE:------- \MalwareZoo\VBA\samples\Normal_Document.doc
+OLE:M------ \MalwareZoo\VBA\samples\Normal_Document_Macro.doc
+OpX:MASI--- \MalwareZoo\VBA\samples\RottenKitten.xlsb.malware
+OLE:MASI-B- \MalwareZoo\VBA\samples\ROVNIX.doc.malware
+OLE:MA----- \MalwareZoo\VBA\samples\Word within Word macro auto.doc
+```
+
+## Python 3 support - olevba3
+
+As of v0.50, olevba has been ported to Python 3 thanks to @sebdraven.
+However, the differences between Python 2 and 3 are significant and for now
+there is a separate version of olevba named olevba3 to be used with
+Python 3.
+
+--------------------------------------------------------------------------
+
+## How to use olevba in Python applications
+
+olevba may be used to open a MS Office file, detect if it contains VBA macros, extract and analyze the VBA source code
+from your own python applications.
+
+IMPORTANT: olevba is currently under active development, therefore this API is likely to change.
+
+### Import olevba
+
+First, import the **oletools.olevba** package, using at least the VBA_Parser and VBA_Scanner classes:
+
+```python
+from oletools.olevba import VBA_Parser, TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML, TYPE_MHTML
+```
+
+### Parse a MS Office file - VBA_Parser
+
+To parse a file on disk, create an instance of the **VBA_Parser** class, providing the name of the file to open as parameter.
+For example:
+
+```python
+vbaparser = VBA_Parser('my_file_with_macros.doc')
+```
+
+The file may also be provided as a bytes string containing its data. In that case, the actual
+filename must be provided for reference, and the file content with the data parameter. For example:
+
+```python
+myfile = 'my_file_with_macros.doc'
+filedata = open(myfile, 'rb').read()
+vbaparser = VBA_Parser(myfile, data=filedata)
+```
+VBA_Parser will raise an exception if the file is not a supported format, such as OLE (MS Office 97-2003), OpenXML
+(MS Office 2007+), MHTML or Word 2003 XML.
+
+After parsing the file, the attribute **VBA_Parser.type** is a string indicating the file type.
+It can be either TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML or TYPE_MHTML. (constants defined in the olevba module)
+
+### Detect VBA macros
+
+The method **detect_vba_macros** of a VBA_Parser object returns True if VBA macros have been found in the file,
+False otherwise.
+
+```python
+if vbaparser.detect_vba_macros():
+ print 'VBA Macros found'
+else:
+ print 'No VBA Macros found'
+```
+Note: The detection algorithm looks for streams and storage with specific names in the OLE structure, which works fine
+for all the supported formats listed above. However, for some formats such as PowerPoint 97-2003, this method will
+always return False because VBA Macros are stored in a different way which is not yet supported by olevba.
+
+Moreover, if the file contains an embedded document (e.g. an Excel workbook inserted into a Word document), this method
+may return True if the embedded document contains VBA Macros, even if the main document does not.
+
+### Extract VBA Macro Source Code
+
+The method **extract_macros** extracts and decompresses source code for each VBA macro found in the file (possibly
+including embedded files). It is a generator yielding a tuple (filename, stream_path, vba_filename, vba_code)
+for each VBA macro found.
+
+- filename: If the file is OLE (MS Office 97-2003), filename is the path of the file.
+ If the file is OpenXML (MS Office 2007+), filename is the path of the OLE subfile containing VBA macros within the zip archive,
+ e.g. word/vbaProject.bin.
+- stream_path: path of the OLE stream containing the VBA macro source code
+- vba_filename: corresponding VBA filename
+- vba_code: string containing the VBA source code in clear text
+
+Example:
+
+```python
+for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros():
+ print '-'*79
+ print 'Filename :', filename
+ print 'OLE stream :', stream_path
+ print 'VBA filename:', vba_filename
+ print '- '*39
+ print vba_code
+```
+Alternatively, the VBA_Parser method **extract_all_macros** returns the same results as a list of tuples.
+
+### Analyze VBA Source Code
+
+Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained
+in a file:
+
+The method **analyze_macros** from the class **VBA_Parser** can be used to scan the source code of all
+VBA modules to find obfuscated strings, suspicious keywords, IOCs, auto-executable macros, etc.
+
+analyze_macros() takes an optional argument show_decoded_strings: if set to True, the results will contain all the encoded
+strings found in the code (Hex, Base64, Dridex) with their decoded value.
+By default, it will only include the strings which contain printable characters.
+
+**VBA_Parser.analyze_macros()** returns a list of tuples (type, keyword, description), one for each item in the results.
+
+- type may be either 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String', 'Dridex String' or
+ 'VBA obfuscated Strings'.
+- keyword is the string found for auto-executable macros, suspicious keywords or IOCs. For obfuscated strings, it is
+ the decoded value of the string.
+- description provides a description of the keyword. For obfuscated strings, it is the encoded value of the string.
+
+Example:
+
+```python
+results = vbaparser.analyze_macros()
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+```
+After calling analyze_macros, the following VBA_Parser attributes also provide the number
+of items found for each category:
+
+```python
+print 'AutoExec keywords: %d' % vbaparser.nb_autoexec
+print 'Suspicious keywords: %d' % vbaparser.nb_suspicious
+print 'IOCs: %d' % vbaparser.nb_iocs
+print 'Hex obfuscated strings: %d' % vbaparser.nb_hexstrings
+print 'Base64 obfuscated strings: %d' % vbaparser.nb_base64strings
+print 'Dridex obfuscated strings: %d' % vbaparser.nb_dridexstrings
+print 'VBA obfuscated strings: %d' % vbaparser.nb_vbastrings
+```
+
+### Deobfuscate VBA Macro Source Code
+
+The method **reveal** attempts to deobfuscate the macro source code by replacing all
+the obfuscated strings by their decoded content. Returns a single string.
+
+Example:
+
+```python
+print vbaparser.reveal()
+```
+
+### Close the VBA_Parser
+
+After usage, it is better to call the **close** method of the VBA_Parser object, to make sure the file is closed,
+especially if your application is parsing many files.
+
+```python
+vbaparser.close()
+```
+
+--------------------------------------------------------------------------
+
+## Deprecated API
+
+The following methods and functions are still functional, but their usage is not recommended
+since they have been replaced by better solutions.
+
+### VBA_Scanner (deprecated)
+
+The class **VBA_Scanner** can be used to scan the source code of a VBA module to find obfuscated strings,
+suspicious keywords, IOCs, auto-executable macros, etc.
+
+First, create a VBA_Scanner object with a string containing the VBA source code (for example returned by the
+extract_macros method). Then call the methods **scan** or **scan_summary** to get the results of the analysis.
+
+scan() takes an optional argument include_decoded_strings: if set to True, the results will contain all the encoded
+strings found in the code (Hex, Base64, Dridex) with their decoded value.
+
+**scan** returns a list of tuples (type, keyword, description), one for each item in the results.
+
+- type may be either 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String'.
+- keyword is the string found for auto-executable macros, suspicious keywords or IOCs. For obfuscated strings, it is
+ the decoded value of the string.
+- description provides a description of the keyword. For obfuscated strings, it is the encoded value of the string.
+
+Example:
+
+```python
+vba_scanner = VBA_Scanner(vba_code)
+results = vba_scanner.scan(include_decoded_strings=True)
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+```
+The function **scan_vba** is a shortcut for VBA_Scanner(vba_code).scan():
+
+```python
+results = scan_vba(vba_code, include_decoded_strings=True)
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+```
+**scan_summary** returns a tuple with the number of items found for each category:
+(autoexec, suspicious, IOCs, hex, base64, dridex).
+
+
+### Detect auto-executable macros (deprecated)
+
+**Deprecated**: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
+
+The function **detect_autoexec** checks if VBA macro code contains specific macro names
+that will be triggered when the document/workbook is opened, closed, changed, etc.
+
+It returns a list of tuples containing two strings, the detected keyword, and the
+description of the trigger. (See the malware example above)
+
+Sample usage:
+
+```python
+from oletools.olevba import detect_autoexec
+autoexec_keywords = detect_autoexec(vba_code)
+if autoexec_keywords:
+ print 'Auto-executable macro keywords found:'
+ for keyword, description in autoexec_keywords:
+ print '%s: %s' % (keyword, description)
+else:
+ print 'Auto-executable macro keywords: None found'
+```
+
+### Detect suspicious VBA keywords (deprecated)
+
+**Deprecated**: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
+
+The function **detect_suspicious** checks if VBA macro code contains specific
+keywords often used by malware to act on the system (create files, run
+commands or applications, write to the registry, etc).
+
+It returns a list of tuples containing two strings, the detected keyword, and the
+description of the corresponding malicious behaviour. (See the malware example above)
+
+Sample usage:
+
+```python
+from oletools.olevba import detect_suspicious
+suspicious_keywords = detect_suspicious(vba_code)
+if suspicious_keywords:
+ print 'Suspicious VBA keywords found:'
+ for keyword, description in suspicious_keywords:
+ print '%s: %s' % (keyword, description)
+else:
+ print 'Suspicious VBA keywords: None found'
+```
+
+### Extract potential IOCs (deprecated)
+
+**Deprecated**: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
+
+The function **detect_patterns** checks if VBA macro code contains specific
+patterns of interest, that may be useful for malware analysis and detection
+(potential Indicators of Compromise): IP addresses, e-mail addresses,
+URLs, executable file names.
+
+It returns a list of tuples containing two strings, the pattern type, and the
+extracted value. (See the malware example above)
+
+Sample usage:
+
+```python
+from oletools.olevba import detect_patterns
+patterns = detect_patterns(vba_code)
+if patterns:
+ print 'Patterns found:'
+ for pattern_type, value in patterns:
+ print '%s: %s' % (pattern_type, value)
+else:
+ print 'Patterns: None found'
+```
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.html remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.html
--- remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,83 @@
+
+
+
+
+
+
+
+
+
+
+pyxswf
+pyxswf is a script to detect, extract and analyze Flash objects (SWF files) that may be embedded in files such as MS Office documents (e.g. Word, Excel), which is especially useful for malware analysis.
+It is part of the python-oletools package.
+pyxswf is an extension to xxxswf.py published by Alexander Hanel.
+Compared to xxxswf, it can extract streams from MS Office documents by parsing their OLE structure properly, which is necessary when streams are fragmented. Stream fragmentation is a known obfuscation technique, as explained on http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/
+It can also extract Flash objects from RTF documents, by parsing embedded objects encoded in hexadecimal format (-f option).
+For this, simply add the -o option to work on OLE streams rather than raw files, or the -f option to work on RTF files.
+Usage
+Usage: pyxswf.py [options] <file.bad>
+
+Options:
+ -o, --ole Parse an OLE file (e.g. Word, Excel) to look for SWF
+ in each stream
+ -f, --rtf Parse an RTF file to look for SWF in each embedded
+ object
+ -x, --extract Extracts the embedded SWF(s), names it MD5HASH.swf &
+ saves it in the working dir. No addition args needed
+ -h, --help show this help message and exit
+ -y, --yara Scans the SWF(s) with yara. If the SWF(s) is
+ compressed it will be deflated. No addition args
+ needed
+ -s, --md5scan Scans the SWF(s) for MD5 signatures. Please see func
+ checkMD5 to define hashes. No addition args needed
+ -H, --header Displays the SWFs file header. No addition args needed
+ -d, --decompress Deflates compressed SWFS(s)
+ -r PATH, --recdir=PATH
+ Will recursively scan a directory for files that
+ contain SWFs. Must provide path in quotes
+ -c, --compress Compresses the SWF using Zlib
+Example 1 - detecting and extracting a SWF file from a Word document on Windows:
+C:\oletools>pyxswf.py -o word_flash.doc
+OLE stream: 'Contents'
+[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
+ [ADDR] SWF 1 at 0x8 - FWS Header
+
+C:\oletools>pyxswf.py -xo word_flash.doc
+OLE stream: 'Contents'
+[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
+ [ADDR] SWF 1 at 0x8 - FWS Header
+ [FILE] Carved SWF MD5: 2498e9c0701dc0e461ab4358f9102bc5.swf
+Example 2 - detecting and extracting a SWF file from a RTF document on Windows:
+C:\oletools>pyxswf.py -xf "rtf_flash.rtf"
+RTF embedded object size 1498557 at index 000036DD
+[SUMMARY] 1 SWF(s) in MD5:46a110548007e04f4043785ac4184558:RTF_embedded_object_0
+00036DD
+ [ADDR] SWF 1 at 0xc40 - FWS Header
+ [FILE] Carved SWF MD5: 2498e9c0701dc0e461ab4358f9102bc5.swf
+How to use pyxswf in Python applications
+TODO
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.md remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.md
--- remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/pyxswf.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,97 @@
+pyxswf
+======
+
+pyxswf is a script to detect, extract and analyze Flash objects (SWF files) that may
+be embedded in files such as MS Office documents (e.g. Word, Excel),
+which is especially useful for malware analysis.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+pyxswf is an extension to [xxxswf.py](http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html) published by Alexander Hanel.
+
+Compared to xxxswf, it can extract streams from MS Office documents by parsing
+their OLE structure properly, which is necessary when streams are fragmented.
+Stream fragmentation is a known obfuscation technique, as explained on
+[http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/](http://web.archive.org/web/20121118021207/http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/)
+
+It can also extract Flash objects from RTF documents, by parsing embedded objects encoded in hexadecimal format (-f option).
+
+For this, simply add the -o option to work on OLE streams rather than raw files, or the -f option to work on RTF files.
+
+## Usage
+
+```text
+Usage: pyxswf.py [options]
+
+Options:
+ -o, --ole Parse an OLE file (e.g. Word, Excel) to look for SWF
+ in each stream
+ -f, --rtf Parse an RTF file to look for SWF in each embedded
+ object
+ -x, --extract Extracts the embedded SWF(s), names it MD5HASH.swf &
+ saves it in the working dir. No addition args needed
+ -h, --help show this help message and exit
+ -y, --yara Scans the SWF(s) with yara. If the SWF(s) is
+ compressed it will be deflated. No addition args
+ needed
+ -s, --md5scan Scans the SWF(s) for MD5 signatures. Please see func
+ checkMD5 to define hashes. No addition args needed
+ -H, --header Displays the SWFs file header. No addition args needed
+ -d, --decompress Deflates compressed SWFS(s)
+ -r PATH, --recdir=PATH
+ Will recursively scan a directory for files that
+ contain SWFs. Must provide path in quotes
+ -c, --compress Compresses the SWF using Zlib
+```
+
+### Example 1 - detecting and extracting a SWF file from a Word document on Windows:
+
+```text
+C:\oletools>pyxswf.py -o word_flash.doc
+OLE stream: 'Contents'
+[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
+ [ADDR] SWF 1 at 0x8 - FWS Header
+
+C:\oletools>pyxswf.py -xo word_flash.doc
+OLE stream: 'Contents'
+[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
+ [ADDR] SWF 1 at 0x8 - FWS Header
+ [FILE] Carved SWF MD5: 2498e9c0701dc0e461ab4358f9102bc5.swf
+```
+
+### Example 2 - detecting and extracting a SWF file from a RTF document on Windows:
+
+```text
+C:\oletools>pyxswf.py -xf "rtf_flash.rtf"
+RTF embedded object size 1498557 at index 000036DD
+[SUMMARY] 1 SWF(s) in MD5:46a110548007e04f4043785ac4184558:RTF_embedded_object_0
+00036DD
+ [ADDR] SWF 1 at 0xc40 - FWS Header
+ [FILE] Carved SWF MD5: 2498e9c0701dc0e461ab4358f9102bc5.swf
+```
+
+## How to use pyxswf in Python applications
+
+TODO
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.html remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.html
--- remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.html 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.html 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,96 @@
+
+
+
+
+
+
+
+
+
+
+
+rtfobj
+rtfobj is a Python module to detect and extract embedded objects stored in RTF files, such as OLE objects. It can also detect OLE Package objects, and extract the embedded files.
+Since v0.50, rtfobj contains a custom RTF parser that has been designed to match MS Word's behaviour, in order to handle obfuscated RTF files. See my article "Anti-Analysis Tricks in Weaponized RTF" for some concrete examples.
+rtfobj can be used as a Python library or a command-line tool.
+It is part of the python-oletools package.
+Usage
+rtfobj [options] <filename> [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open first file from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
+ -l LOGLEVEL, --loglevel=LOGLEVEL
+ logging level debug/info/warning/error/critical
+ (default=warning)
+ -s SAVE_OBJECT, --save=SAVE_OBJECT
+ Save the object corresponding to the provided number
+ to a file, for example "-s 2". Use "-s all" to save
+ all objects at once.
+ -d OUTPUT_DIR use specified directory to save output files.
+rtfobj displays a list of the OLE and Package objects that have been detected, with their attributes such as class and filename.
+When an OLE Package object contains an executable file or script, it is highlighted as such. For example:
+
+To extract an object or file, use the option -s followed by the object number as shown in the table.
+Example:
+rtfobj -s 0
+It extracts and decodes the corresponding object, and saves it as a file named "object_xxxx.bin", xxxx being the location of the object in the RTF file.
+How to use rtfobj in Python applications
+As of v0.50, the API has changed significantly and it is not final yet. For now, see the class RtfObjectParser in the code.
+Deprecated API (still functional):
+rtf_iter_objects(filename) is an iterator which yields a tuple (index, orig_len, object) providing the index of each hexadecimal stream in the RTF file, and the corresponding decoded object.
+Example:
+from oletools import rtfobj
+for index, orig_len, data in rtfobj.rtf_iter_objects("myfile.rtf"):
+ print('found object size %d at index %08X' % (len(data), index))
+
+
+
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.md remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.md
--- remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.md 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/doc/rtfobj.md 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,102 @@
+rtfobj
+======
+
+rtfobj is a Python module to detect and extract embedded objects stored
+in RTF files, such as OLE objects. It can also detect OLE Package objects,
+and extract the embedded files.
+
+Since v0.50, rtfobj contains a custom RTF parser that has been designed to
+match MS Word's behaviour, in order to handle obfuscated RTF files. See my
+article ["Anti-Analysis Tricks in Weaponized RTF"](http://decalage.info/rtf_tricks)
+for some concrete examples.
+
+rtfobj can be used as a Python library or a command-line tool.
+
+It is part of the [python-oletools](http://www.decalage.info/python/oletools) package.
+
+## Usage
+
+```text
+rtfobj [options] [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open first file from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
+ -l LOGLEVEL, --loglevel=LOGLEVEL
+ logging level debug/info/warning/error/critical
+ (default=warning)
+ -s SAVE_OBJECT, --save=SAVE_OBJECT
+ Save the object corresponding to the provided number
+ to a file, for example "-s 2". Use "-s all" to save
+ all objects at once.
+ -d OUTPUT_DIR use specified directory to save output files.
+```
+
+rtfobj displays a list of the OLE and Package objects that have been detected,
+with their attributes such as class and filename.
+
+When an OLE Package object contains an executable file or script, it is
+highlighted as such. For example:
+
+![](rtfobj1.png)
+
+To extract an object or file, use the option -s followed by the object number
+as shown in the table.
+
+Example:
+
+```text
+rtfobj -s 0
+```
+
+It extracts and decodes the corresponding object, and saves it as a file
+named "object_xxxx.bin", xxxx being the location of the object in the RTF file.
+
+
+## How to use rtfobj in Python applications
+
+As of v0.50, the API has changed significantly and it is not final yet.
+For now, see the class RtfObjectParser in the code.
+
+### Deprecated API (still functional):
+
+rtf_iter_objects(filename) is an iterator which yields a tuple
+(index, orig_len, object) providing the index of each hexadecimal stream
+in the RTF file, and the corresponding decoded object.
+
+Example:
+
+```python
+from oletools import rtfobj
+for index, orig_len, data in rtfobj.rtf_iter_objects("myfile.rtf"):
+ print('found object size %d at index %08X' % (len(data), index))
+```
+
+--------------------------------------------------------------------------
+
+python-oletools documentation
+-----------------------------
+
+- [[Home]]
+- [[License]]
+- [[Install]]
+- [[Contribute]], Suggest Improvements or Report Issues
+- Tools:
+ - [[olebrowse]]
+ - [[oleid]]
+ - [[olemeta]]
+ - [[oletimes]]
+ - [[oledir]]
+ - [[olemap]]
+ - [[olevba]]
+ - [[mraptor]]
+ - [[pyxswf]]
+ - [[oleobj]]
+ - [[rtfobj]]
diff -Nru remnux-oletools-0.51a/remnux-oletools/ezhexviewer.py remnux-oletools-0.51a/remnux-oletools/ezhexviewer.py
--- remnux-oletools-0.51a/remnux-oletools/ezhexviewer.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/ezhexviewer.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+"""
+ezhexviewer.py
+
+A simple hexadecimal viewer based on easygui. It should work on any platform
+with Python 2.x or 3.x.
+
+Usage: ezhexviewer.py [file]
+
+Usage in a python application:
+
+ import ezhexviewer
+ ezhexviewer.hexview_file(filename)
+ ezhexviewer.hexview_data(data)
+
+
+ezhexviewer project website: http://www.decalage.info/python/ezhexviewer
+
+ezhexviewer is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2012-09-17 v0.01 PL: - first version
+# 2012-10-04 v0.02 PL: - added license
+# 2016-09-06 v0.50 PL: - added main function for entry points in setup.py
+# 2016-10-26 PL: - fixed to run on Python 2+3
+
+__version__ = '0.50'
+
+#------------------------------------------------------------------------------
+# TODO:
+# + options to set title and msg
+
+
+from thirdparty.easygui import easygui
+import sys
+
+# === PYTHON 2+3 SUPPORT ======================================================
+
+if sys.version_info[0] >= 3:
+ # Python 3 specific adaptations
+ # py3 range = py2 xrange
+ xrange = range
+ PYTHON3 = True
+else:
+ PYTHON3 = False
+
+def xord(char):
+ '''
+ workaround for ord() to work on characters from a bytes string with
+ Python 2 and 3. If s is a bytes string, s[i] is a bytes string of
+ length 1 on Python 2, but it is an integer on Python 3...
+ xord(c) returns ord(c) if c is a bytes string, or c if it is already
+ an integer.
+ :param char: int or bytes of length 1
+ :return: ord(c) if bytes, c if int
+ '''
+ if isinstance(char, int):
+ return char
+ else:
+ return ord(char)
+
+def bchr(x):
+ '''
+ workaround for chr() to return a bytes string of length 1 with
+ Python 2 and 3. On Python 3, chr returns a unicode string, but
+ on Python 2 it is a bytes string.
+ bchr() always returns a bytes string on Python 2+3.
+ :param x: int
+ :return: chr(x) as a bytes string
+ '''
+ if PYTHON3:
+ # According to the Python 3 documentation, bytes() can be
+ # initialized with an iterable:
+ return bytes([x])
+ else:
+ return chr(x)
+
+#------------------------------------------------------------------------------
+# The following code (hexdump3 only) is a modified version of the hex dumper
+# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the
+# PSF license. I added the startindex parameter.
+# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812
+# PSF license: http://docs.python.org/license.html
+# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved
+
+FILTER = b''.join([(len(repr(bchr(x)))<=4 and x != 0x0A) and bchr(x) or b'.' for x in range(256)])
+
+def hexdump3(src, length=8, startindex=0):
+ """
+ Returns a hexadecimal dump of a binary string.
+ length: number of bytes per row.
+ startindex: index of 1st byte.
+ """
+ result=[]
+ for i in xrange(0, len(src), length):
+ s = src[i:i+length]
+ hexa = ' '.join(["%02X" % xord(x) for x in s])
+ printable = s.translate(FILTER)
+ if PYTHON3:
+ # On Python 3, need to convert printable from bytes to str:
+ printable = printable.decode('latin1')
+ result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable))
+ return ''.join(result)
+
+# end of PSF-licensed code.
+#------------------------------------------------------------------------------
+
+
+def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0):
+ hex = hexdump3(data, length=length, startindex=startindex)
+ easygui.codebox(msg=msg, title=title, text=hex)
+
+
+def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0):
+ data = open(filename, 'rb').read()
+ hexview_data(data, msg=msg, title=title, length=length, startindex=startindex)
+
+
+# === MAIN ===================================================================
+
+def main():
+ try:
+ filename = sys.argv[1]
+ except:
+ filename = easygui.fileopenbox()
+ if filename:
+ try:
+ hexview_file(filename, msg='File: %s' % filename)
+ except:
+ easygui.exceptionbox(msg='Error:', title='ezhexviewer')
+
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff -Nru remnux-oletools-0.51a/remnux-oletools/LICENSE.txt remnux-oletools-0.51a/remnux-oletools/LICENSE.txt
--- remnux-oletools-0.51a/remnux-oletools/LICENSE.txt 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/LICENSE.txt 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,54 @@
+LICENSE for the python-oletools package:
+
+This license applies to the python-oletools package, apart from the thirdparty
+folder which contains third-party files published with their own license.
+
+The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec (http://www.decalage.info)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+----------
+
+olevba contains modified source code from the officeparser project, published
+under the following MIT License (MIT):
+
+officeparser is copyright (c) 2014 John William Davison
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff -Nru remnux-oletools-0.51a/remnux-oletools/mraptor3.py remnux-oletools-0.51a/remnux-oletools/mraptor3.py
--- remnux-oletools-0.51a/remnux-oletools/mraptor3.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/mraptor3.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,335 @@
+#!/usr/bin/env python
+"""
+mraptor.py - MacroRaptor
+
+MacroRaptor is a script to parse OLE and OpenXML files such as MS Office
+documents (e.g. Word, Excel), to detect malicious macros.
+
+Supported formats:
+- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
+- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
+- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
+- Word 2003 XML (.xml)
+- Word/Excel Single File Web Page / MHTML (.mht)
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+MacroRaptor is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+# === LICENSE ==================================================================
+
+# MacroRaptor is copyright (c) 2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2016-02-23 v0.01 PL: - first version
+# 2016-02-29 v0.02 PL: - added Workbook_Activate, FileSaveAs
+# 2016-03-04 v0.03 PL: - returns an exit code based on the overall result
+# 2016-03-08 v0.04 PL: - collapse long lines before analysis
+# 2016-07-19 v0.50 SL: - converted to Python 3
+# 2016-08-26 PL: - changed imports for Python 3
+
+__version__ = '0.50py3'
+
+#------------------------------------------------------------------------------
+# TODO:
+
+
+#--- IMPORTS ------------------------------------------------------------------
+
+import sys, logging, optparse, re
+
+from thirdparty.xglob import xglob
+
+# import the python 3 version of tablestream:
+from thirdparty.tablestream import tablestream
+
+# import the python 3 version of olevba
+import olevba3 as olevba
+
+# === LOGGING =================================================================
+
+# a global logger object used for debugging:
+log = olevba.get_logger('mraptor')
+
+
+#--- CONSTANTS ----------------------------------------------------------------
+
+# URL and message to report issues:
+# TODO: make it a common variable for all oletools
+URL_ISSUES = 'https://github.com/decalage2/oletools/issues'
+MSG_ISSUES = 'Please report this issue on %s' % URL_ISSUES
+
+# 'AutoExec', 'AutoOpen', 'Auto_Open', 'AutoClose', 'Auto_Close', 'AutoNew', 'AutoExit',
+# 'Document_Open', 'DocumentOpen',
+# 'Document_Close', 'DocumentBeforeClose',
+# 'DocumentChange','Document_New',
+# 'NewDocument'
+# 'Workbook_Open', 'Workbook_Close',
+
+# TODO: check if line also contains Sub or Function
+re_autoexec = re.compile(r'(?i)\b(?:Auto(?:Exec|_?Open|_?Close|Exit|New)' +
+ r'|Document(?:_?Open|_Close|BeforeClose|Change|_New)' +
+ r'|NewDocument|Workbook(?:_Open|_Activate|_Close))\b')
+
+# MS-VBAL 5.4.5.1 Open Statement:
+RE_OPEN_WRITE = r'(?:\bOpen\b[^\n]+\b(?:Write|Append|Binary|Output|Random)\b)'
+
+re_write = re.compile(r'(?i)\b(?:FileCopy|CopyFile|Kill|CreateTextFile|'
+ + r'VirtualAlloc|RtlMoveMemory|URLDownloadToFileA?|AltStartupPath|'
+ + r'ADODB\.Stream|WriteText|SaveToFile|SaveAs|SaveAsRTF|FileSaveAs|MkDir|RmDir|SaveSetting|SetAttr)\b|' + RE_OPEN_WRITE)
+
+# MS-VBAL 5.2.3.5 External Procedure Declaration
+RE_DECLARE_LIB = r'(?:\bDeclare\b[^\n]+\bLib\b)'
+
+re_execute = re.compile(r'(?i)\b(?:Shell|CreateObject|GetObject|SendKeys|'
+ + r'MacScript|FollowHyperlink|CreateThread|ShellExecute)\b|' + RE_DECLARE_LIB)
+
+# short tag to display file types in triage mode:
+TYPE2TAG = {
+ olevba.TYPE_OLE: 'OLE',
+ olevba.TYPE_OpenXML: 'OpX',
+ olevba.TYPE_Word2003_XML: 'XML',
+ olevba.TYPE_MHTML: 'MHT',
+ olevba.TYPE_TEXT: 'TXT',
+}
+
+
+# === CLASSES =================================================================
+
+class Result_NoMacro(object):
+ exit_code = 0
+ color = 'green'
+ name = 'No Macro'
+
+
+class Result_NotMSOffice(object):
+ exit_code = 1
+ color = 'green'
+ name = 'Not MS Office'
+
+
+class Result_MacroOK(object):
+ exit_code = 2
+ color = 'cyan'
+ name = 'Macro OK'
+
+
+class Result_Error(object):
+ exit_code = 10
+ color = 'yellow'
+ name = 'ERROR'
+
+
+class Result_Suspicious(object):
+ exit_code = 20
+ color = 'red'
+ name = 'SUSPICIOUS'
+
+
+class MacroRaptor(object):
+ """
+ class to scan VBA macro code to detect if it is malicious
+ """
+ def __init__(self, vba_code):
+ """
+ MacroRaptor constructor
+ :param vba_code: string containing the VBA macro code
+ """
+ # collapse long lines first
+ self.vba_code = olevba.vba_collapse_long_lines(vba_code)
+ self.autoexec = False
+ self.write = False
+ self.execute = False
+ self.flags = ''
+ self.suspicious = False
+ self.autoexec_match = None
+ self.write_match = None
+ self.execute_match = None
+ self.matches = []
+
+ def scan(self):
+ """
+ Scan the VBA macro code to detect if it is malicious
+ :return:
+ """
+ m = re_autoexec.search(self.vba_code)
+ if m is not None:
+ self.autoexec = True
+ self.autoexec_match = m.group()
+ self.matches.append(m.group())
+ m = re_write.search(self.vba_code)
+ if m is not None:
+ self.write = True
+ self.write_match = m.group()
+ self.matches.append(m.group())
+ m = re_execute.search(self.vba_code)
+ if m is not None:
+ self.execute = True
+ self.execute_match = m.group()
+ self.matches.append(m.group())
+ if self.autoexec and (self.execute or self.write):
+ self.suspicious = True
+
+ def get_flags(self):
+ flags = ''
+ flags += 'A' if self.autoexec else '-'
+ flags += 'W' if self.write else '-'
+ flags += 'X' if self.execute else '-'
+ return flags
+
+
+# === MAIN ====================================================================
+
+def main():
+ """
+ Main function, called when olevba is run from the command line
+ """
+ global log
+ DEFAULT_LOG_LEVEL = "warning" # Default log level
+ LOG_LEVELS = {
+ 'debug': logging.DEBUG,
+ 'info': logging.INFO,
+ 'warning': logging.WARNING,
+ 'error': logging.ERROR,
+ 'critical': logging.CRITICAL
+ }
+
+ usage = 'usage: %prog [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+ parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ help="logging level debug/info/warning/error/critical (default=%default)")
+ parser.add_option("-m", '--matches', action="store_true", dest="show_matches",
+ help='Show matched strings.')
+
+ # TODO: add logfile option
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print(__doc__)
+ parser.print_help()
+ print('\nAn exit code is returned based on the analysis result:')
+ for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious):
+ print(' - %d: %s' % (result.exit_code, result.name))
+ sys.exit()
+
+ # print banner with version
+ print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
+ print('This is work in progress, please report issues at %s' % URL_ISSUES)
+
+ logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
+ # enable logging in the modules:
+ log.setLevel(logging.NOTSET)
+
+ t = tablestream.TableStream(style=tablestream.TableStyleSlim,
+ header_row=['Result', 'Flags', 'Type', 'File'],
+ column_width=[10, 5, 4, 56])
+
+ exitcode = -1
+ global_result = None
+ # TODO: handle errors in xglob, to continue processing the next files
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+ full_name = '%s in %s' % (filename, container) if container else filename
+ # try:
+ # # Open the file
+ # if data is None:
+ # data = open(filename, 'rb').read()
+ # except:
+ # log.exception('Error when opening file %r' % full_name)
+ # continue
+ if isinstance(data, Exception):
+ result = Result_Error
+ t.write_row([result.name, '', '', full_name],
+ colors=[result.color, None, None, None])
+ t.write_row(['', '', '', str(data)],
+ colors=[None, None, None, result.color])
+ else:
+ filetype = '???'
+ try:
+ vba_parser = olevba.VBA_Parser(filename=filename, data=data, container=container)
+ filetype = TYPE2TAG[vba_parser.type]
+ except Exception as e:
+ # log.error('Error when parsing VBA macros from file %r' % full_name)
+ # TODO: distinguish actual errors from non-MSOffice files
+ result = Result_Error
+ t.write_row([result.name, '', filetype, full_name],
+ colors=[result.color, None, None, None])
+ t.write_row(['', '', '', str(e)],
+ colors=[None, None, None, result.color])
+ continue
+ if vba_parser.detect_vba_macros():
+ vba_code_all_modules = ''
+ try:
+ for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros():
+ vba_code_all_modules += vba_code.decode('utf-8','replace') + '\n'
+ except Exception as e:
+ # log.error('Error when parsing VBA macros from file %r' % full_name)
+ result = Result_Error
+ t.write_row([result.name, '', TYPE2TAG[vba_parser.type], full_name],
+ colors=[result.color, None, None, None])
+ t.write_row(['', '', '', str(e)],
+ colors=[None, None, None, result.color])
+ continue
+ mraptor = MacroRaptor(vba_code_all_modules)
+ mraptor.scan()
+ if mraptor.suspicious:
+ result = Result_Suspicious
+ else:
+ result = Result_MacroOK
+ t.write_row([result.name, mraptor.get_flags(), filetype, full_name],
+ colors=[result.color, None, None, None])
+ if mraptor.matches and options.show_matches:
+ t.write_row(['', '', '', 'Matches: %r' % mraptor.matches])
+ else:
+ result = Result_NoMacro
+ t.write_row([result.name, '', filetype, full_name],
+ colors=[result.color, None, None, None])
+ if result.exit_code > exitcode:
+ global_result = result
+ exitcode = result.exit_code
+
+ print('')
+ print('Flags: A=AutoExec, W=Write, X=Execute')
+ print('Exit code: %d - %s' % (exitcode, global_result.name))
+ sys.exit(exitcode)
+
+if __name__ == '__main__':
+ main()
+
+# Soundtrack: "Dark Child" by Marlon Williams
diff -Nru remnux-oletools-0.51a/remnux-oletools/mraptor_milter.py remnux-oletools-0.51a/remnux-oletools/mraptor_milter.py
--- remnux-oletools-0.51a/remnux-oletools/mraptor_milter.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/mraptor_milter.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,399 @@
+#!/usr/bin/env python
+"""
+mraptor_milter
+
+mraptor_milter is a milter script for the Sendmail and Postfix e-mail
+servers. It parses MS Office documents (e.g. Word, Excel) to detect
+malicious macros. Documents with malicious macros are removed and
+replaced by harmless text files.
+
+Supported formats:
+- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
+- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
+- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
+- Word 2003 XML (.xml)
+- Word/Excel Single File Web Page / MHTML (.mht)
+- Publisher (.pub)
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+mraptor_milter is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+# === LICENSE ==================================================================
+
+# mraptor_milter is copyright (c) 2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# --- CHANGELOG --------------------------------------------------------------
+# 2016-08-08 v0.01 PL: - first version
+# 2016-08-12 v0.02 PL: - added logging to file with time rotation
+# - archive each e-mail to a file before filtering
+# 2016-08-30 v0.03 PL: - added daemonize to run as a Unix daemon
+# 2016-09-06 v0.50 PL: - fixed issue #20, is_zipfile on Python 2.6
+
+__version__ = '0.50'
+
+# --- TODO -------------------------------------------------------------------
+
+# TODO: option to run in the foreground for troubleshooting
+# TODO: option to write logs to the console
+# TODO: options to set listening port and interface
+# TODO: config file for all parameters
+# TODO: option to run as a non-privileged user
+# TODO: handle files in archives
+
+
+# --- IMPORTS ----------------------------------------------------------------
+
+import Milter
+import io
+import time
+import email
+import sys
+import os
+import logging
+import logging.handlers
+import datetime
+import StringIO
+
+from socket import AF_INET6
+
+from oletools import olevba, mraptor
+
+from Milter.utils import parse_addr
+
+if sys.version_info[0] <= 2:
+ # Python 2.x
+ if sys.version_info[1] <= 6:
+ # Python 2.6
+ # use is_zipfile backported from Python 2.7:
+ from oletools.thirdparty.zipfile27 import is_zipfile
+ else:
+ # Python 2.7
+ from zipfile import is_zipfile
+else:
+ # Python 3.x+
+ from zipfile import is_zipfile
+
+
+
+# --- CONSTANTS --------------------------------------------------------------
+
+# TODO: read parameters from a config file
+# at postfix smtpd_milters = inet:127.0.0.1:25252
+SOCKET = "inet:25252@127.0.0.1" # bind to unix or tcp socket "inet:port@ip" or "///.sock"
+TIMEOUT = 30 # Milter timeout in seconds
+# CFG_DIR = "/etc/macromilter/"
+# LOG_DIR = "/var/log/macromilter/"
+
+# TODO: different path on Windows:
+LOGFILE_DIR = '/var/log/mraptor_milter'
+# LOGFILE_DIR = '.'
+LOGFILE_NAME = 'mraptor_milter.log'
+LOGFILE_PATH = os.path.join(LOGFILE_DIR, LOGFILE_NAME)
+
+# Directory where to save a copy of each received e-mail:
+ARCHIVE_DIR = '/var/log/mraptor_milter'
+# ARCHIVE_DIR = '.'
+
+# file to store PID for daemonize
+PIDFILE = "/tmp/mraptor_milter.pid"
+
+
+
+# === LOGGING ================================================================
+
+# Set up a specific logger with our desired output level
+log = logging.getLogger('MRMilter')
+
+# disable logging by default - enable it in main app:
+log.setLevel(logging.CRITICAL+1)
+
+# NOTE: all logging config is done in the main app, not here.
+
+# === CLASSES ================================================================
+
+# Inspired from https://github.com/jmehnle/pymilter/blob/master/milter-template.py
+
+class MacroRaptorMilter(Milter.Base):
+ '''
+ '''
+ def __init__(self):
+ # A new instance with each new connection.
+ # each connection runs in its own thread and has its own myMilter
+ # instance. Python code must be thread safe. This is trivial if only stuff
+ # in myMilter instances is referenced.
+ self.id = Milter.uniqueID() # Integer incremented with each call.
+ self.message = None
+ self.IP = None
+ self.port = None
+ self.flow = None
+ self.scope = None
+ self.IPname = None # Name from a reverse IP lookup
+
+ @Milter.noreply
+ def connect(self, IPname, family, hostaddr):
+ '''
+ New connection (may contain several messages)
+ :param IPname: Name from a reverse IP lookup
+ :param family: IP version 4 (AF_INET) or 6 (AF_INET6)
+ :param hostaddr: tuple (IP, port [, flow, scope])
+ :return: Milter.CONTINUE
+ '''
+ # Examples:
+ # (self, 'ip068.subnet71.example.com', AF_INET, ('215.183.71.68', 4720) )
+ # (self, 'ip6.mxout.example.com', AF_INET6,
+ # ('3ffe:80e8:d8::1', 4720, 1, 0) )
+ self.IP = hostaddr[0]
+ self.port = hostaddr[1]
+ if family == AF_INET6:
+ self.flow = hostaddr[2]
+ self.scope = hostaddr[3]
+ else:
+ self.flow = None
+ self.scope = None
+ self.IPname = IPname # Name from a reverse IP lookup
+ self.message = None # content
+ log.info("[%d] connect from host %s at %s" % (self.id, IPname, hostaddr))
+ return Milter.CONTINUE
+
+ @Milter.noreply
+ def envfrom(self, mailfrom, *rest):
+ '''
+ Mail From - Called at the beginning of each message within a connection
+ :param mailfrom:
+ :param str:
+ :return: Milter.CONTINUE
+ '''
+ self.message = io.BytesIO()
+ # NOTE: self.message is only an *internal* copy of message data. You
+ # must use addheader, chgheader, replacebody to change the message
+ # on the MTA.
+ self.canon_from = '@'.join(parse_addr(mailfrom))
+ self.message.write('From %s %s\n' % (self.canon_from, time.ctime()))
+ log.debug('[%d] Mail From %s %s\n' % (self.id, self.canon_from, time.ctime()))
+ log.debug('[%d] mailfrom=%r, rest=%r' % (self.id, mailfrom, rest))
+ return Milter.CONTINUE
+
+ @Milter.noreply
+ def envrcpt(self, to, *rest):
+ '''
+ RCPT TO
+ :param to:
+ :param str:
+ :return: Milter.CONTINUE
+ '''
+ log.debug('[%d] RCPT TO %r, rest=%r\n' % (self.id, to, rest))
+ return Milter.CONTINUE
+
+ @Milter.noreply
+ def header(self, header_field, header_field_value):
+ '''
+ Add header
+ :param header_field:
+ :param header_field_value:
+ :return: Milter.CONTINUE
+ '''
+ self.message.write("%s: %s\n" % (header_field, header_field_value))
+ return Milter.CONTINUE
+
+ @Milter.noreply
+ def eoh(self):
+ '''
+ End of headers
+ :return: Milter.CONTINUE
+ '''
+ self.message.write("\n")
+ return Milter.CONTINUE
+
+ @Milter.noreply
+ def body(self, chunk):
+ '''
+ Message body (chunked)
+ :param chunk:
+ :return: Milter.CONTINUE
+ '''
+ self.message.write(chunk)
+ return Milter.CONTINUE
+
+ def close(self):
+ return Milter.CONTINUE
+
+ def abort(self):
+ '''
+ Clean up if the connection is closed by client
+ :return: Milter.CONTINUE
+ '''
+ return Milter.CONTINUE
+
+ def archive_message(self):
+ '''
+ Save a copy of the current message in its original form to a file
+ :return: nothing
+ '''
+ date_time = datetime.datetime.utcnow().isoformat('_')
+ # assumption: by combining datetime + milter id, the filename should be unique:
+ # (the only case for duplicates is when restarting the milter twice in less than a second)
+ fname = 'mail_%s_%d.eml' % (date_time, self.id)
+ fname = os.path.join(ARCHIVE_DIR, fname)
+ log.debug('Saving a copy of the original message to file %r' % fname)
+ open(fname, 'wb').write(self.message.getvalue())
+
+ def eom(self):
+ '''
+ This method is called when the end of the email message has been reached.
+ This event also triggers the milter specific actions
+ :return: Milter.ACCEPT or Milter.DISCARD if processing error
+ '''
+ try:
+ # set data pointer back to 0
+ self.message.seek(0)
+ self.archive_message()
+ result = self.check_mraptor()
+ if result is not None:
+ return result
+ else:
+ return Milter.ACCEPT
+ # if error make a fall-back to accept
+ except Exception:
+ exc_type, exc_obj, exc_tb = sys.exc_info()
+ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+ log.exception("[%d] Unexpected error - fall back to ACCEPT: %s %s %s"
+ % (self.id, exc_type, fname, exc_tb.tb_lineno))
+ return Milter.ACCEPT
+
+ def check_mraptor(self):
+ '''
+ Check the attachments of a message using mraptor.
+ If an attachment is identified as suspicious, it is replaced by a simple text file.
+ :return: Milter.ACCEPT or Milter.DISCARD if processing error
+ '''
+ msg = email.message_from_string(self.message.getvalue())
+ result = Milter.ACCEPT
+ try:
+ for part in msg.walk():
+ # for name, value in part.items():
+ # log.debug(' - %s: %r' % (name, value))
+ content_type = part.get_content_type()
+ log.debug('[%d] Content-type: %r' % (self.id, content_type))
+ # TODO: handle any content-type, but check the file magic?
+ if not content_type.startswith('multipart'):
+ filename = part.get_filename(None)
+ log.debug('[%d] Analyzing attachment %r' % (self.id, filename))
+ attachment = part.get_payload(decode=True)
+ attachment_lowercase = attachment.lower()
+ # check if this is a supported file type (if not, just skip it)
+ # TODO: this function should be provided by olevba
+ if attachment.startswith(olevba.olefile.MAGIC) \
+ or is_zipfile(StringIO.StringIO(attachment)) \
+ or 'http://schemas.microsoft.com/office/word/2003/wordml' in attachment \
+ or ('mime' in attachment_lowercase and 'version' in attachment_lowercase
+ and 'multipart' in attachment_lowercase):
+ vba_parser = olevba.VBA_Parser(filename='message', data=attachment)
+ vba_code_all_modules = ''
+ for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros():
+ vba_code_all_modules += vba_code + '\n'
+ m = mraptor.MacroRaptor(vba_code_all_modules)
+ m.scan()
+ if m.suspicious:
+ log.warning('[%d] The attachment %r contains a suspicious macro: replace it with a text file'
+ % (self.id, filename))
+ part.set_payload('This attachment has been removed because it contains a suspicious macro.')
+ part.set_type('text/plain')
+ # TODO: handle case when CTE is absent
+ part.replace_header('Content-Transfer-Encoding', '7bit')
+ # for name, value in part.items():
+ # log.debug(' - %s: %r' % (name, value))
+ # TODO: archive filtered e-mail to a file
+ else:
+ log.debug('The attachment %r is clean.'
+ % filename)
+ except Exception:
+ log.exception('[%d] Error while processing the message' % self.id)
+ # TODO: depending on error, decide to forward the e-mail as-is or not
+ result = Milter.DISCARD
+ # TODO: only do this if the body has actually changed
+ body = str(msg)
+ self.message = io.BytesIO(body)
+ self.replacebody(body)
+ log.info('[%d] Message relayed' % self.id)
+ return result
+
+
+# === MAIN ===================================================================
+
+def main():
+ # banner
+ print('mraptor_milter v%s - http://decalage.info/python/oletools' % __version__)
+ print('logging to file %s' % LOGFILE_PATH)
+ print('Press Ctrl+C to stop.')
+
+ # make sure the log directory exists:
+ try:
+ os.makedirs(LOGFILE_DIR)
+ except:
+ pass
+ # Add the log message handler to the logger
+ # log to files rotating once a day:
+ handler = logging.handlers.TimedRotatingFileHandler(LOGFILE_PATH, when='D', encoding='utf8')
+ # create formatter and add it to the handlers
+ formatter = logging.Formatter('%(asctime)s - %(levelname)8s: %(message)s')
+ handler.setFormatter(formatter)
+ log.addHandler(handler)
+ # enable logging:
+ log.setLevel(logging.DEBUG)
+
+ log.info('Starting mraptor_milter v%s - listening on %s' % (__version__, SOCKET))
+ log.debug('Python version: %s' % sys.version)
+
+ # Register to have the Milter factory create instances of the class:
+ Milter.factory = MacroRaptorMilter
+ flags = Milter.CHGBODY + Milter.CHGHDRS + Milter.ADDHDRS
+ flags += Milter.ADDRCPT
+ flags += Milter.DELRCPT
+ Milter.set_flags(flags) # tell Sendmail which features we use
+ # set the "last" fall back to ACCEPT if exception occur
+ Milter.set_exception_policy(Milter.ACCEPT)
+ # start the milter
+ Milter.runmilter("mraptor_milter", SOCKET, TIMEOUT)
+ log.info('Stopping mraptor_milter.')
+
+
+if __name__ == "__main__":
+
+ # Using daemonize:
+ # See http://daemonize.readthedocs.io/en/latest/
+ from daemonize import Daemonize
+ daemon = Daemonize(app="mraptor_milter", pid=PIDFILE, action=main)
+ daemon.start()
+
+ # Using python-daemon - Does not work as-is, need to create the PID file
+ # See https://pypi.python.org/pypi/python-daemon/
+ # See PEP-3143: https://www.python.org/dev/peps/pep-3143/
+ # import daemon
+ # import lockfile
+ # with daemon.DaemonContext(pidfile=lockfile.FileLock(PIDFILE)):
+ # main()
diff -Nru remnux-oletools-0.51a/remnux-oletools/mraptor.py remnux-oletools-0.51a/remnux-oletools/mraptor.py
--- remnux-oletools-0.51a/remnux-oletools/mraptor.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/mraptor.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,337 @@
+#!/usr/bin/env python
+"""
+mraptor.py - MacroRaptor
+
+MacroRaptor is a script to parse OLE and OpenXML files such as MS Office
+documents (e.g. Word, Excel), to detect malicious macros.
+
+Supported formats:
+- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
+- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
+- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
+- Word 2003 XML (.xml)
+- Word/Excel Single File Web Page / MHTML (.mht)
+- Publisher (.pub)
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+MacroRaptor is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+# === LICENSE ==================================================================
+
+# MacroRaptor is copyright (c) 2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2016-02-23 v0.01 PL: - first version
+# 2016-02-29 v0.02 PL: - added Workbook_Activate, FileSaveAs
+# 2016-03-04 v0.03 PL: - returns an exit code based on the overall result
+# 2016-03-08 v0.04 PL: - collapse long lines before analysis
+# 2016-08-31 v0.50 PL: - added macro trigger InkPicture_Painted
+# 2016-09-05 PL: - added Document_BeforeClose keyword for MS Publisher (.pub)
+# 2016-10-25 PL: - fixed print for Python 3
+
+__version__ = '0.50'
+
+#------------------------------------------------------------------------------
+# TODO:
+
+
+#--- IMPORTS ------------------------------------------------------------------
+
+import sys, logging, optparse, re
+
+from thirdparty.xglob import xglob
+from thirdparty.tablestream import tablestream
+
+import olevba
+
+# === LOGGING =================================================================
+
+# a global logger object used for debugging:
+log = olevba.get_logger('mraptor')
+
+
+#--- CONSTANTS ----------------------------------------------------------------
+
+# URL and message to report issues:
+# TODO: make it a common variable for all oletools
+URL_ISSUES = 'https://github.com/decalage2/oletools/issues'
+MSG_ISSUES = 'Please report this issue on %s' % URL_ISSUES
+
+# 'AutoExec', 'AutoOpen', 'Auto_Open', 'AutoClose', 'Auto_Close', 'AutoNew', 'AutoExit',
+# 'Document_Open', 'DocumentOpen',
+# 'Document_Close', 'DocumentBeforeClose', 'Document_BeforeClose',
+# 'DocumentChange','Document_New',
+# 'NewDocument'
+# 'Workbook_Open', 'Workbook_Close',
+# *_Painted such as InkPicture1_Painted
+# *_GotFocus|LostFocus|MouseHover for other ActiveX objects
+
+# TODO: check if line also contains Sub or Function
+re_autoexec = re.compile(r'(?i)\b(?:Auto(?:Exec|_?Open|_?Close|Exit|New)' +
+ r'|Document(?:_?Open|_Close|_?BeforeClose|Change|_New)' +
+ r'|NewDocument|Workbook(?:_Open|_Activate|_Close)' +
+ r'|\w+_(?:Painted|GotFocus|LostFocus|MouseHover))\b')
+
+# MS-VBAL 5.4.5.1 Open Statement:
+RE_OPEN_WRITE = r'(?:\bOpen\b[^\n]+\b(?:Write|Append|Binary|Output|Random)\b)'
+
+re_write = re.compile(r'(?i)\b(?:FileCopy|CopyFile|Kill|CreateTextFile|'
+ + r'VirtualAlloc|RtlMoveMemory|URLDownloadToFileA?|AltStartupPath|'
+ + r'ADODB\.Stream|WriteText|SaveToFile|SaveAs|SaveAsRTF|FileSaveAs|MkDir|RmDir|SaveSetting|SetAttr)\b|' + RE_OPEN_WRITE)
+
+# MS-VBAL 5.2.3.5 External Procedure Declaration
+RE_DECLARE_LIB = r'(?:\bDeclare\b[^\n]+\bLib\b)'
+
+re_execute = re.compile(r'(?i)\b(?:Shell|CreateObject|GetObject|SendKeys|'
+ + r'MacScript|FollowHyperlink|CreateThread|ShellExecute)\b|' + RE_DECLARE_LIB)
+
+# short tag to display file types in triage mode:
+TYPE2TAG = {
+ olevba.TYPE_OLE: 'OLE',
+ olevba.TYPE_OpenXML: 'OpX',
+ olevba.TYPE_Word2003_XML: 'XML',
+ olevba.TYPE_MHTML: 'MHT',
+ olevba.TYPE_TEXT: 'TXT',
+}
+
+
+# === CLASSES =================================================================
+
+class Result_NoMacro(object):
+ exit_code = 0
+ color = 'green'
+ name = 'No Macro'
+
+
+class Result_NotMSOffice(object):
+ exit_code = 1
+ color = 'green'
+ name = 'Not MS Office'
+
+
+class Result_MacroOK(object):
+ exit_code = 2
+ color = 'cyan'
+ name = 'Macro OK'
+
+
+class Result_Error(object):
+ exit_code = 10
+ color = 'yellow'
+ name = 'ERROR'
+
+
+class Result_Suspicious(object):
+ exit_code = 20
+ color = 'red'
+ name = 'SUSPICIOUS'
+
+
+class MacroRaptor(object):
+ """
+ class to scan VBA macro code to detect if it is malicious
+ """
+ def __init__(self, vba_code):
+ """
+ MacroRaptor constructor
+ :param vba_code: string containing the VBA macro code
+ """
+ # collapse long lines first
+ self.vba_code = olevba.vba_collapse_long_lines(vba_code)
+ self.autoexec = False
+ self.write = False
+ self.execute = False
+ self.flags = ''
+ self.suspicious = False
+ self.autoexec_match = None
+ self.write_match = None
+ self.execute_match = None
+ self.matches = []
+
+ def scan(self):
+ """
+ Scan the VBA macro code to detect if it is malicious
+ :return:
+ """
+ m = re_autoexec.search(self.vba_code)
+ if m is not None:
+ self.autoexec = True
+ self.autoexec_match = m.group()
+ self.matches.append(m.group())
+ m = re_write.search(self.vba_code)
+ if m is not None:
+ self.write = True
+ self.write_match = m.group()
+ self.matches.append(m.group())
+ m = re_execute.search(self.vba_code)
+ if m is not None:
+ self.execute = True
+ self.execute_match = m.group()
+ self.matches.append(m.group())
+ if self.autoexec and (self.execute or self.write):
+ self.suspicious = True
+
+ def get_flags(self):
+ flags = ''
+ flags += 'A' if self.autoexec else '-'
+ flags += 'W' if self.write else '-'
+ flags += 'X' if self.execute else '-'
+ return flags
+
+
+# === MAIN ====================================================================
+
+def main():
+ """
+ Main function, called when olevba is run from the command line
+ """
+ global log
+ DEFAULT_LOG_LEVEL = "warning" # Default log level
+ LOG_LEVELS = {
+ 'debug': logging.DEBUG,
+ 'info': logging.INFO,
+ 'warning': logging.WARNING,
+ 'error': logging.ERROR,
+ 'critical': logging.CRITICAL
+ }
+
+ usage = 'usage: %prog [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+ parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ help="logging level debug/info/warning/error/critical (default=%default)")
+ parser.add_option("-m", '--matches', action="store_true", dest="show_matches",
+ help='Show matched strings.')
+
+ # TODO: add logfile option
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print(__doc__)
+ parser.print_help()
+ print('\nAn exit code is returned based on the analysis result:')
+ for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious):
+ print(' - %d: %s' % (result.exit_code, result.name))
+ sys.exit()
+
+ # print banner with version
+ print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
+ print('This is work in progress, please report issues at %s' % URL_ISSUES)
+
+ logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
+ # enable logging in the modules:
+ log.setLevel(logging.NOTSET)
+
+ t = tablestream.TableStream(style=tablestream.TableStyleSlim,
+ header_row=['Result', 'Flags', 'Type', 'File'],
+ column_width=[10, 5, 4, 56])
+
+ exitcode = -1
+ global_result = None
+ # TODO: handle errors in xglob, to continue processing the next files
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+ full_name = '%s in %s' % (filename, container) if container else filename
+ # try:
+ # # Open the file
+ # if data is None:
+ # data = open(filename, 'rb').read()
+ # except:
+ # log.exception('Error when opening file %r' % full_name)
+ # continue
+ if isinstance(data, Exception):
+ result = Result_Error
+ t.write_row([result.name, '', '', full_name],
+ colors=[result.color, None, None, None])
+ t.write_row(['', '', '', str(data)],
+ colors=[None, None, None, result.color])
+ else:
+ filetype = '???'
+ try:
+ vba_parser = olevba.VBA_Parser(filename=filename, data=data, container=container)
+ filetype = TYPE2TAG[vba_parser.type]
+ except Exception as e:
+ # log.error('Error when parsing VBA macros from file %r' % full_name)
+ # TODO: distinguish actual errors from non-MSOffice files
+ result = Result_Error
+ t.write_row([result.name, '', filetype, full_name],
+ colors=[result.color, None, None, None])
+ t.write_row(['', '', '', str(e)],
+ colors=[None, None, None, result.color])
+ continue
+ if vba_parser.detect_vba_macros():
+ vba_code_all_modules = ''
+ try:
+ for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros():
+ vba_code_all_modules += vba_code + '\n'
+ except Exception as e:
+ # log.error('Error when parsing VBA macros from file %r' % full_name)
+ result = Result_Error
+ t.write_row([result.name, '', TYPE2TAG[vba_parser.type], full_name],
+ colors=[result.color, None, None, None])
+ t.write_row(['', '', '', str(e)],
+ colors=[None, None, None, result.color])
+ continue
+ mraptor = MacroRaptor(vba_code_all_modules)
+ mraptor.scan()
+ if mraptor.suspicious:
+ result = Result_Suspicious
+ else:
+ result = Result_MacroOK
+ t.write_row([result.name, mraptor.get_flags(), filetype, full_name],
+ colors=[result.color, None, None, None])
+ if mraptor.matches and options.show_matches:
+ t.write_row(['', '', '', 'Matches: %r' % mraptor.matches])
+ else:
+ result = Result_NoMacro
+ t.write_row([result.name, '', filetype, full_name],
+ colors=[result.color, None, None, None])
+ if result.exit_code > exitcode:
+ global_result = result
+ exitcode = result.exit_code
+
+ print('')
+ print('Flags: A=AutoExec, W=Write, X=Execute')
+ print('Exit code: %d - %s' % (exitcode, global_result.name))
+ sys.exit(exitcode)
+
+if __name__ == '__main__':
+ main()
+
+# Soundtrack: "Dark Child" by Marlon Williams
diff -Nru remnux-oletools-0.51a/remnux-oletools/olebrowse.py remnux-oletools-0.51a/remnux-oletools/olebrowse.py
--- remnux-oletools-0.51a/remnux-oletools/olebrowse.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/olebrowse.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+"""
+olebrowse.py
+
+A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to
+view and extract individual data streams.
+
+Usage: olebrowse.py [file]
+
+olebrowse project website: http://www.decalage.info/python/olebrowse
+
+olebrowse is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+
+olebrowse is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+__version__ = '0.02'
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2012-09-17 v0.01 PL: - first version
+# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
+
+#------------------------------------------------------------------------------
+# TODO:
+# - menu option to open another file
+# - menu option to display properties
+# - menu option to run other oletools, external tools such as OfficeCat?
+# - for a stream, display info: size, path, etc
+# - stream info: magic, entropy, ... ?
+
+import optparse, sys, os
+from thirdparty.easygui import easygui
+import thirdparty.olefile as olefile
+import ezhexviewer
+
+ABOUT = '~ About olebrowse'
+QUIT = '~ Quit'
+
+
+def about ():
+ """
+ Display information about this tool
+ """
+ easygui.textbox(title='About olebrowse', text=__doc__)
+
+
+def browse_stream (ole, stream):
+ """
+ Browse a stream (hex view or save to file)
+ """
+ #print 'stream:', stream
+ while True:
+ msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream)
+ actions = [
+ 'Hex view',
+## 'Text view',
+## 'Repr view',
+ 'Save stream to file',
+ '~ Back to main menu',
+ ]
+ action = easygui.choicebox(msg, title='olebrowse', choices=actions)
+ if action is None or 'Back' in action:
+ break
+ elif action.startswith('Hex'):
+ data = ole.openstream(stream).getvalue()
+ ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse')
+## elif action.startswith('Text'):
+## data = ole.openstream(stream).getvalue()
+## easygui.codebox(title='Text view - %s' % stream, text=data)
+## elif action.startswith('Repr'):
+## data = ole.openstream(stream).getvalue()
+## easygui.codebox(title='Repr view - %s' % stream, text=repr(data))
+ elif action.startswith('Save'):
+ data = ole.openstream(stream).getvalue()
+ fname = easygui.filesavebox(default='stream.bin')
+ if fname is not None:
+ f = open(fname, 'wb')
+ f.write(data)
+ f.close()
+ easygui.msgbox('stream saved to file %s' % fname)
+
+
+
+def main():
+ """
+ Main function
+ """
+ try:
+ filename = sys.argv[1]
+ except:
+ filename = easygui.fileopenbox()
+ try:
+ ole = olefile.OleFileIO(filename)
+ listdir = ole.listdir()
+ streams = []
+ for direntry in listdir:
+ #print direntry
+ streams.append('/'.join(direntry))
+ streams.append(ABOUT)
+ streams.append(QUIT)
+ stream = True
+ while stream is not None:
+ msg ="Select a stream, or press Esc to exit"
+ title = "olebrowse"
+ stream = easygui.choicebox(msg, title, streams)
+ if stream is None or stream == QUIT:
+ break
+ if stream == ABOUT:
+ about()
+ else:
+ browse_stream(ole, stream)
+ except:
+ easygui.exceptionbox()
+
+
+
+
+if __name__ == '__main__':
+ main()
diff -Nru remnux-oletools-0.51a/remnux-oletools/oledir.py remnux-oletools-0.51a/remnux-oletools/oledir.py
--- remnux-oletools-0.51a/remnux-oletools/oledir.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/oledir.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+"""
+oledir.py
+
+oledir parses OLE files to display technical information about its directory
+entries, including deleted/orphan streams/storages and unused entries.
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+oledir is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+#=== LICENSE ==================================================================
+
+# oledir is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2015-04-17 v0.01 PL: - first version
+# 2015-04-21 v0.02 PL: - improved display with prettytable
+# 2016-01-13 v0.03 PL: - replaced prettytable by tablestream, added colors
+# 2016-07-20 v0.50 SL: - added Python 3 support
+# 2016-08-09 PL: - fixed issue #77 (imports from thirdparty dir)
+
+__version__ = '0.50'
+
+#------------------------------------------------------------------------------
+# TODO:
+# TODO: show FAT/MiniFAT
+# TODO: show errors when reading streams
+
+# === IMPORTS ================================================================
+
+import sys, os
+
+# add the thirdparty subfolder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+# assumption: the thirdparty dir is a subfolder:
+_thirdparty_dir = os.path.normpath(os.path.join(_thismodule_dir, 'thirdparty'))
+# print('_thirdparty_dir = %r' % _thirdparty_dir)
+if not _thirdparty_dir in sys.path:
+ sys.path.insert(0, _thirdparty_dir)
+
+import colorclass
+
+# On Windows, colorclass needs to be enabled:
+if os.name == 'nt':
+ colorclass.Windows.enable(auto_colors=True)
+
+import olefile
+from tablestream import tablestream
+
+
+# === CONSTANTS ==============================================================
+
+STORAGE_NAMES = {
+ olefile.STGTY_EMPTY: 'Empty',
+ olefile.STGTY_STORAGE: 'Storage',
+ olefile.STGTY_STREAM: 'Stream',
+ olefile.STGTY_LOCKBYTES: 'ILockBytes',
+ olefile.STGTY_PROPERTY: 'IPropertyStorage',
+ olefile.STGTY_ROOT: 'Root',
+}
+
+STORAGE_COLORS = {
+ olefile.STGTY_EMPTY: 'green',
+ olefile.STGTY_STORAGE: 'blue',
+ olefile.STGTY_STREAM: 'yellow',
+ olefile.STGTY_LOCKBYTES: 'magenta',
+ olefile.STGTY_PROPERTY: 'magenta',
+ olefile.STGTY_ROOT: 'cyan',
+}
+
+STATUS_COLORS = {
+ 'unused': 'green',
+ '': 'yellow',
+ 'ORPHAN': 'red',
+}
+
+
+# === FUNCTIONS ==============================================================
+
+def sid_display(sid):
+ if sid == olefile.NOSTREAM:
+ return '-' # None
+ else:
+ return sid
+
+
+# === MAIN ===================================================================
+
+def main():
+ # print banner with version
+ print('oledir %s - http://decalage.info/python/oletools' % __version__)
+
+ if os.name == 'nt':
+ colorclass.Windows.enable(auto_colors=True, reset_atexit=True)
+
+ fname = sys.argv[1]
+ print('OLE directory entries in file %s:' % fname)
+ ole = olefile.OleFileIO(fname)
+ # ole.dumpdirectory()
+
+ # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
+ # t.align = 'l'
+ # t.max_width['id'] = 4
+ # t.max_width['Status'] = 6
+ # t.max_width['Type'] = 10
+ # t.max_width['Name'] = 10
+ # t.max_width['Left'] = 5
+ # t.max_width['Right'] = 5
+ # t.max_width['Child'] = 5
+ # t.max_width['1st Sect'] = 8
+ # t.max_width['Size'] = 6
+
+ table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
+ header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
+ style=tablestream.TableStyleSlim)
+
+ # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
+ # TODO: OR fix olefile!
+ # TODO: olefile should store or give access to the raw direntry data on demand
+ # TODO: oledir option to hexdump the raw direntries
+ # TODO: olefile should be less picky about incorrect directory structures
+
+ for id in range(len(ole.direntries)):
+ d = ole.direntries[id]
+ if d is None:
+ # this direntry is not part of the tree: either unused or an orphan
+ d = ole._load_direntry(id) #ole.direntries[id]
+ # print('%03d: %s *** ORPHAN ***' % (id, d.name))
+ if d.entry_type == olefile.STGTY_EMPTY:
+ status = 'unused'
+ else:
+ status = 'ORPHAN'
+ else:
+ # print('%03d: %s' % (id, d.name))
+ status = ''
+ if d.name.startswith('\x00'):
+ # this may happen with unused entries, the name may be filled with zeroes
+ name = ''
+ else:
+ # handle non-printable chars using repr(), remove quotes:
+ name = repr(d.name)[1:-1]
+ left = sid_display(d.sid_left)
+ right = sid_display(d.sid_right)
+ child = sid_display(d.sid_child)
+ entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
+ etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
+ status_color = STATUS_COLORS.get(status, 'red')
+
+ # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s'
+ # %(entry_type, left, right, child))
+ # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
+ table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
+ colors=(None, status_color, etype_color, None, None, None, None, None, None))
+ ole.close()
+ # print t
+
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff -Nru remnux-oletools-0.51a/remnux-oletools/oleid.py remnux-oletools-0.51a/remnux-oletools/oleid.py
--- remnux-oletools-0.51a/remnux-oletools/oleid.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/oleid.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,298 @@
+#!/usr/bin/env python
+"""
+oleid.py
+
+oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,
+Excel), to detect specific characteristics that could potentially indicate that
+the file is suspicious or malicious, in terms of security (e.g. malware).
+For example it can detect VBA macros, embedded Flash objects, fragmentation.
+The results can be displayed or returned as XML for further processing.
+
+Usage: oleid.py
+
+oleid project website: http://www.decalage.info/python/oleid
+
+oleid is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+#=== LICENSE =================================================================
+
+# oleid is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import print_function
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2012-10-29 v0.01 PL: - first version
+# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
+# - improved usage display with -h
+# 2014-11-30 v0.03 PL: - improved output with prettytable
+# 2016-10-25 v0.50 PL: - fixed print and bytes strings for Python 3
+
+__version__ = '0.50'
+
+
+#------------------------------------------------------------------------------
+# TODO:
+# + extract relevant metadata: codepage, author, application, timestamps, etc
+# - detect RTF and OpenXML
+# - fragmentation
+# - OLE package
+# - entropy
+# - detect PE header?
+# - detect NOPs?
+# - list type of each object in object pool?
+# - criticality for each indicator?: info, low, medium, high
+# - support wildcards with glob?
+# - verbose option
+# - csv, xml output
+
+
+#=== IMPORTS =================================================================
+
+import optparse, sys, os, re, zlib, struct
+import thirdparty.olefile as olefile
+from thirdparty.prettytable import prettytable
+
+
+#=== FUNCTIONS ===============================================================
+
+def detect_flash (data):
+ """
+ Detect Flash objects (SWF files) within a binary string of data
+ return a list of (start_index, length, compressed) tuples, or [] if nothing
+ found.
+
+ Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked)
+ http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
+ """
+ #TODO: report
+ found = []
+ for match in re.finditer(b'CWS|FWS', data):
+ start = match.start()
+ if start+8 > len(data):
+ # header size larger than remaining data, this is not a SWF
+ continue
+ #TODO: one struct.unpack should be simpler
+ # Read Header
+ header = data[start:start+3]
+ # Read Version
+ ver = struct.unpack(' 20:
+ continue
+ # Read SWF Size
+ size = struct.unpack(' len(data) or size < 1024:
+ # declared size larger than remaining data, this is not a SWF
+ # or declared size too small for a usual SWF
+ continue
+ # Read SWF into buffer. If compressed read uncompressed size.
+ swf = data[start:start+size]
+ compressed = False
+ if b'CWS' in header:
+ compressed = True
+ # compressed SWF: data after header (8 bytes) until the end is
+ # compressed with zlib. Attempt to decompress it to check if it is
+ # valid
+ compressed_data = swf[8:]
+ try:
+ zlib.decompress(compressed_data)
+ except:
+ continue
+ # else we don't check anything at this stage, we only assume it is a
+ # valid SWF. So there might be false positives for uncompressed SWF.
+ found.append((start, size, compressed))
+ #print 'Found SWF start=%x, length=%d' % (start, size)
+ return found
+
+
+#=== CLASSES =================================================================
+
+class Indicator (object):
+
+ def __init__(self, _id, value=None, _type=bool, name=None, description=None):
+ self.id = _id
+ self.value = value
+ self.type = _type
+ self.name = name
+ if name == None:
+ self.name = _id
+ self.description = description
+
+
+class OleID:
+
+ def __init__(self, filename):
+ self.filename = filename
+ self.indicators = []
+
+ def check(self):
+ # check if it is actually an OLE file:
+ oleformat = Indicator('ole_format', True, name='OLE format')
+ self.indicators.append(oleformat)
+ if not olefile.isOleFile(self.filename):
+ oleformat.value = False
+ return self.indicators
+ # parse file:
+ self.ole = olefile.OleFileIO(self.filename)
+ # checks:
+ self.check_properties()
+ self.check_encrypted()
+ self.check_word()
+ self.check_excel()
+ self.check_powerpoint()
+ self.check_visio()
+ self.check_ObjectPool()
+ self.check_flash()
+ self.ole.close()
+ return self.indicators
+
+ def check_properties (self):
+ suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream')
+ self.indicators.append(suminfo)
+ appname = Indicator('appname', 'unknown', _type=str, name='Application name')
+ self.indicators.append(appname)
+ self.suminfo = {}
+ # check stream SummaryInformation
+ if self.ole.exists("\x05SummaryInformation"):
+ suminfo.value = True
+ self.suminfo = self.ole.getproperties("\x05SummaryInformation")
+ # check application name:
+ appname.value = self.suminfo.get(0x12, 'unknown')
+
+ def check_encrypted (self):
+ # we keep the pointer to the indicator, can be modified by other checks:
+ self.encrypted = Indicator('encrypted', False, name='Encrypted')
+ self.indicators.append(self.encrypted)
+ # check if bit 1 of security field = 1:
+ # (this field may be missing for Powerpoint2000, for example)
+ if 0x13 in self.suminfo:
+ if self.suminfo[0x13] & 1:
+ self.encrypted.value = True
+
+ def check_word (self):
+ word = Indicator('word', False, name='Word Document',
+ description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.')
+ self.indicators.append(word)
+ self.macros = Indicator('vba_macros', False, name='VBA Macros')
+ self.indicators.append(self.macros)
+ if self.ole.exists('WordDocument'):
+ word.value = True
+ # check for Word-specific encryption flag:
+ s = self.ole.openstream(["WordDocument"])
+ # pass header 10 bytes
+ s.read(10)
+ # read flag structure:
+ temp16 = struct.unpack("H", s.read(2))[0]
+ fEncrypted = (temp16 & 0x0100) >> 8
+ if fEncrypted:
+ self.encrypted.value = True
+ s.close()
+ # check for VBA macros:
+ if self.ole.exists('Macros'):
+ self.macros.value = True
+
+ def check_excel (self):
+ excel = Indicator('excel', False, name='Excel Workbook',
+ description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.')
+ self.indicators.append(excel)
+ #self.macros = Indicator('vba_macros', False, name='VBA Macros')
+ #self.indicators.append(self.macros)
+ if self.ole.exists('Workbook') or self.ole.exists('Book'):
+ excel.value = True
+ # check for VBA macros:
+ if self.ole.exists('_VBA_PROJECT_CUR'):
+ self.macros.value = True
+
+ def check_powerpoint (self):
+ ppt = Indicator('ppt', False, name='PowerPoint Presentation',
+ description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.')
+ self.indicators.append(ppt)
+ if self.ole.exists('PowerPoint Document'):
+ ppt.value = True
+
+ def check_visio (self):
+ visio = Indicator('visio', False, name='Visio Drawing',
+ description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.')
+ self.indicators.append(visio)
+ if self.ole.exists('VisioDocument'):
+ visio.value = True
+
+ def check_ObjectPool (self):
+ objpool = Indicator('ObjectPool', False, name='ObjectPool',
+ description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.')
+ self.indicators.append(objpool)
+ if self.ole.exists('ObjectPool'):
+ objpool.value = True
+
+
+ def check_flash (self):
+ flash = Indicator('flash', 0, _type=int, name='Flash objects',
+ description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.')
+ self.indicators.append(flash)
+ for stream in self.ole.listdir():
+ data = self.ole.openstream(stream).read()
+ found = detect_flash(data)
+ # just add to the count of Flash objects:
+ flash.value += len(found)
+ #print stream, found
+
+
+#=== MAIN =================================================================
+
+def main():
+ usage = 'usage: %prog [options] '
+ parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
+## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no argurments are passed
+ if len(args) == 0:
+ parser.print_help()
+ return
+
+ for filename in args:
+ print('\nFilename:', filename)
+ oleid = OleID(filename)
+ indicators = oleid.check()
+
+ #TODO: add description
+ #TODO: highlight suspicious indicators
+ t = prettytable.PrettyTable(['Indicator', 'Value'])
+ t.align = 'l'
+ t.max_width = 39
+ #t.border = False
+
+ for indicator in indicators:
+ #print '%s: %s' % (indicator.name, indicator.value)
+ t.add_row((indicator.name, indicator.value))
+
+ print(t)
+
+if __name__ == '__main__':
+ main()
diff -Nru remnux-oletools-0.51a/remnux-oletools/olemap.py remnux-oletools-0.51a/remnux-oletools/olemap.py
--- remnux-oletools-0.51a/remnux-oletools/olemap.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/olemap.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+"""
+olemap
+
+olemap parses OLE files to display technical information about its structure.
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+olemap is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+#=== LICENSE ==================================================================
+
+# olemap is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2015-11-01 v0.01 PL: - first version
+# 2016-01-13 v0.02 PL: - improved display with tablestream, added colors
+# 2016-07-20 v0.50 SL: - added Python 3 support
+# 2016-09-05 PL: - added main entry point for setup.py
+
+__version__ = '0.50'
+
+#------------------------------------------------------------------------------
+# TODO:
+
+# === IMPORTS ================================================================
+
+import sys
+from thirdparty.olefile import olefile
+from thirdparty.tablestream import tablestream
+
+
+
+def sid_display(sid):
+ if sid == olefile.NOSTREAM:
+ return None
+ else:
+ return sid
+
+STORAGE_NAMES = {
+ olefile.STGTY_EMPTY: 'Empty',
+ olefile.STGTY_STORAGE: 'Storage',
+ olefile.STGTY_STREAM: 'Stream',
+ olefile.STGTY_LOCKBYTES: 'ILockBytes',
+ olefile.STGTY_PROPERTY: 'IPropertyStorage',
+ olefile.STGTY_ROOT: 'Root',
+}
+
+FAT_TYPES = {
+ olefile.FREESECT: "Free",
+ olefile.ENDOFCHAIN: "End of Chain",
+ olefile.FATSECT: "FAT Sector",
+ olefile.DIFSECT: "DIFAT Sector"
+ }
+
+FAT_COLORS = {
+ olefile.FREESECT: "green",
+ olefile.ENDOFCHAIN: "yellow",
+ olefile.FATSECT: "cyan",
+ olefile.DIFSECT: "blue",
+ 'default': None,
+ }
+
+
+# === MAIN ===================================================================
+
+def main():
+ # print banner with version
+ print('olemap %s - http://decalage.info/python/oletools' % __version__)
+
+ fname = sys.argv[1]
+ ole = olefile.OleFileIO(fname)
+
+ print('FAT:')
+ t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
+ for i in range(ole.nb_sect):
+ fat_value = ole.fat[i]
+ fat_type = FAT_TYPES.get(fat_value, '')
+ color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
+ # compute offset based on sector size:
+ offset = ole.sectorsize * (i+1)
+ # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
+ t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value],
+ colors=[None, color_type, None, None])
+ print('')
+
+ print('MiniFAT:')
+ # load MiniFAT if it wasn't already done:
+ ole.loadminifat()
+ for i in range(len(ole.minifat)):
+ fat_value = ole.minifat[i]
+ fat_type = FAT_TYPES.get(fat_value, 'Data')
+ print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value))
+
+ ole.close()
+
+if __name__ == '__main__':
+ main()
diff -Nru remnux-oletools-0.51a/remnux-oletools/olemeta.py remnux-oletools-0.51a/remnux-oletools/olemeta.py
--- remnux-oletools-0.51a/remnux-oletools/olemeta.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/olemeta.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+"""
+olemeta.py
+
+olemeta is a script to parse OLE files such as MS Office documents (e.g. Word,
+Excel), to extract all standard properties present in the OLE file.
+
+Usage: olemeta.py
+
+olemeta project website: http://www.decalage.info/python/olemeta
+
+olemeta is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+#=== LICENSE =================================================================
+
+# olemeta is copyright (c) 2013-2016, Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2013-07-24 v0.01 PL: - first version
+# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
+# - improved usage display
+# 2015-12-29 v0.03 PL: - only display properties present in the file
+# 2016-09-06 v0.50 PL: - added main entry point for setup.py
+# 2016-10-25 PL: - fixed print for Python 3
+# 2016-10-28 PL: - removed the UTF8 codec for console display
+
+__version__ = '0.50'
+
+#------------------------------------------------------------------------------
+# TODO:
+# + optparse
+# + nicer output: table with fixed columns, datetime, etc
+# + CSV output
+# + option to only show available properties (by default)
+
+#=== IMPORTS =================================================================
+
+import sys, codecs
+import thirdparty.olefile as olefile
+from thirdparty.tablestream import tablestream
+
+
+#=== MAIN =================================================================
+
+def main():
+ try:
+ ole = olefile.OleFileIO(sys.argv[1])
+ except IndexError:
+ sys.exit(__doc__)
+
+ # parse and display metadata:
+ meta = ole.get_metadata()
+
+ # console output with UTF8 encoding:
+ # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3
+ console_utf8 = sys.stdout #codecs.getwriter('utf8')(sys.stdout)
+
+ # TODO: move similar code to a function
+
+ print('Properties from the SummaryInformation stream:')
+ t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8)
+ for prop in meta.SUMMARY_ATTRIBS:
+ value = getattr(meta, prop)
+ if value is not None:
+ # TODO: pretty printing for strings, dates, numbers
+ # TODO: better unicode handling
+ # print('- %s: %s' % (prop, value))
+ # if isinstance(value, unicode):
+ # # encode to UTF8, avoiding errors
+ # value = value.encode('utf-8', errors='replace')
+ # else:
+ # value = str(value)
+ t.write_row([prop, value], colors=[None, 'yellow'])
+ t.close()
+ print('')
+
+ print('Properties from the DocumentSummaryInformation stream:')
+ t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8)
+ for prop in meta.DOCSUM_ATTRIBS:
+ value = getattr(meta, prop)
+ if value is not None:
+ # TODO: pretty printing for strings, dates, numbers
+ # TODO: better unicode handling
+ # print('- %s: %s' % (prop, value))
+ # if isinstance(value, unicode):
+ # # encode to UTF8, avoiding errors
+ # value = value.encode('utf-8', errors='replace')
+ # else:
+ # value = str(value)
+ t.write_row([prop, value], colors=[None, 'yellow'])
+ t.close()
+
+ ole.close()
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff -Nru remnux-oletools-0.51a/remnux-oletools/oleobj.py remnux-oletools-0.51a/remnux-oletools/oleobj.py
--- remnux-oletools-0.51a/remnux-oletools/oleobj.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/oleobj.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,451 @@
+#!/usr/bin/env python
+from __future__ import print_function
+"""
+oleobj.py
+
+oleobj is a Python script and module to parse OLE objects and files stored
+into various file formats such as RTF or MS Office documents (e.g. Word, Excel).
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+oleobj is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+# === LICENSE ==================================================================
+
+# oleobj is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2015-12-05 v0.01 PL: - first version
+# 2016-06 PL: - added main and process_file (not working yet)
+# 2016-07-18 v0.48 SL: - added Python 3.5 support
+# 2016-07-19 PL: - fixed Python 2.6-7 support
+
+__version__ = '0.48'
+
+#------------------------------------------------------------------------------
+# TODO:
+# + setup logging (common with other oletools)
+
+
+#------------------------------------------------------------------------------
+# REFERENCES:
+
+# Reference for the storage of embedded OLE objects/files:
+# [MS-OLEDS]: Object Linking and Embedding (OLE) Data Structures
+# https://msdn.microsoft.com/en-us/library/dd942265.aspx
+
+# - officeparser: https://github.com/unixfreak0037/officeparser
+# TODO: oledump
+
+
+#--- IMPORTS ------------------------------------------------------------------
+
+import logging, struct, optparse, os, re, sys
+
+from thirdparty.olefile import olefile
+from thirdparty.xglob import xglob
+
+# === LOGGING =================================================================
+
+class NullHandler(logging.Handler):
+ """
+ Log Handler without output, to avoid printing messages if logging is not
+ configured by the main application.
+ Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
+ see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
+ """
+ def emit(self, record):
+ pass
+
+def get_logger(name, level=logging.CRITICAL+1):
+ """
+ Create a suitable logger object for this module.
+ The goal is not to change settings of the root logger, to avoid getting
+ other modules' logs on the screen.
+ If a logger exists with same name, reuse it. (Else it would have duplicate
+ handlers and messages would be doubled.)
+ The level is set to CRITICAL+1 by default, to avoid any logging.
+ """
+ # First, test if there is already a logger with the same name, else it
+ # will generate duplicate messages (due to duplicate handlers):
+ if name in logging.Logger.manager.loggerDict:
+ #NOTE: another less intrusive but more "hackish" solution would be to
+ # use getLogger then test if its effective level is not default.
+ logger = logging.getLogger(name)
+ # make sure level is OK:
+ logger.setLevel(level)
+ return logger
+ # get a new logger:
+ logger = logging.getLogger(name)
+ # only add a NullHandler for this logger, it is up to the application
+ # to configure its own logging:
+ logger.addHandler(NullHandler())
+ logger.setLevel(level)
+ return logger
+
+# a global logger object used for debugging:
+log = get_logger('oleobj')
+
+
+# === CONSTANTS ==============================================================
+
+# some str methods on Python 2.x return characters,
+# while the equivalent bytes methods return integers on Python 3.x:
+if sys.version_info[0] <= 2:
+ # Python 2.x
+ NULL_CHAR = '\x00'
+else:
+ # Python 3.x
+ NULL_CHAR = 0
+
+
+# === GLOBAL VARIABLES =======================================================
+
+# struct to parse an unsigned integer of 32 bits:
+struct_uint32 = struct.Struct(' FILETIME from olefile
+ self.unknown_long_1, data = read_uint32(data)
+ self.unknown_long_2, data = read_uint32(data)
+ # temp path?
+ self.temp_path, data = data.split(b'\x00', 1)
+ # size of the rest of the data
+ self.actual_size, data = read_uint32(data)
+ self.data = data[0:self.actual_size]
+ # TODO: exception when size > remaining data
+ # TODO: SLACK DATA
+
+
+class OleObject (object):
+ """
+ OLE 1.0 Object
+
+ see MS-OLEDS 2.2 OLE1.0 Format Structures
+ """
+
+ # constants for the format_id attribute:
+ # see MS-OLEDS 2.2.4 ObjectHeader
+ TYPE_LINKED = 0x01
+ TYPE_EMBEDDED = 0x02
+
+
+ def __init__(self, bindata=None):
+ """
+ Constructor for OleObject.
+ If bindata is provided, it will be parsed using the parse() method.
+
+ :param bindata: bytes, OLE 1.0 Object structure containing an OLE object
+ """
+ self.ole_version = None
+ self.format_id = None
+ self.class_name = None
+ self.topic_name = None
+ self.item_name = None
+ self.data = None
+ self.data_size = None
+
+ def parse(self, data):
+ """
+ Parse binary data containing an OLE 1.0 Object structure,
+ to extract the OLE object it contains.
+ (see MS-OLEDS 2.2 OLE1.0 Format Structures)
+
+ :param data: bytes, OLE 1.0 Object structure containing an OLE object
+ :return:
+ """
+ # Header: see MS-OLEDS 2.2.4 ObjectHeader
+ self.ole_version, data = read_uint32(data)
+ self.format_id, data = read_uint32(data)
+ log.debug('OLE version=%08X - Format ID=%08X' % (self.ole_version, self.format_id))
+ assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED)
+ self.class_name, data = read_LengthPrefixedAnsiString(data)
+ self.topic_name, data = read_LengthPrefixedAnsiString(data)
+ self.item_name, data = read_LengthPrefixedAnsiString(data)
+ log.debug('Class name=%r - Topic name=%r - Item name=%r'
+ % (self.class_name, self.topic_name, self.item_name))
+ if self.format_id == self.TYPE_EMBEDDED:
+ # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject
+ #assert self.topic_name != '' and self.item_name != ''
+ self.data_size, data = read_uint32(data)
+ log.debug('Declared data size=%d - remaining size=%d' % (self.data_size, len(data)))
+ # TODO: handle incorrect size to avoid exception
+ self.data = data[:self.data_size]
+ assert len(self.data) == self.data_size
+ self.extra_data = data[self.data_size:]
+
+
+
+def sanitize_filename(filename, replacement='_', max_length=200):
+ """compute basename of filename. Replaces all non-whitelisted characters.
+ The returned filename is always a basename of the file."""
+ basepath = os.path.basename(filename).strip()
+ sane_fname = re.sub(r'[^\w\.\- ]', replacement, basepath)
+
+ while ".." in sane_fname:
+ sane_fname = sane_fname.replace('..', '.')
+
+ while " " in sane_fname:
+ sane_fname = sane_fname.replace(' ', ' ')
+
+ if not len(filename):
+ sane_fname = 'NONAME'
+
+ # limit filename length
+ if max_length:
+ sane_fname = sane_fname[:max_length]
+
+ return sane_fname
+
+
+def process_file(container, filename, data, output_dir=None):
+ if output_dir:
+ if not os.path.isdir(output_dir):
+ log.info('creating output directory %s' % output_dir)
+ os.mkdir(output_dir)
+
+ fname_prefix = os.path.join(output_dir,
+ sanitize_filename(filename))
+ else:
+ base_dir = os.path.dirname(filename)
+ sane_fname = sanitize_filename(filename)
+ fname_prefix = os.path.join(base_dir, sane_fname)
+
+ # TODO: option to extract objects to files (false by default)
+ if data is None:
+ data = open(filename, 'rb').read()
+ print ('-'*79)
+ print ('File: %r - %d bytes' % (filename, len(data)))
+ ole = olefile.OleFileIO(data)
+ index = 1
+ for stream in ole.listdir():
+ objdata = ole.openstream(stream).read()
+ stream_path = '/'.join(stream)
+ log.debug('Checking stream %r' % stream_path)
+ obj = OleObject()
+ try:
+ obj.parse(objdata)
+ print('extract file embedded in OLE object from stream %r:' % stream_path)
+ print('format_id = %d' % obj.format_id)
+ print('class name = %r' % obj.class_name)
+ print('data size = %d' % obj.data_size)
+ # set a file extension according to the class name:
+ class_name = obj.class_name.lower()
+ if class_name.startswith('word'):
+ ext = 'doc'
+ elif class_name.startswith('package'):
+ ext = 'package'
+ else:
+ ext = 'bin'
+
+ fname = '%s_object_%03d.%s' % (fname_prefix, index, ext)
+ print ('saving to file %s' % fname)
+ open(fname, 'wb').write(obj.data)
+ if obj.class_name.lower() == 'package':
+ print ('Parsing OLE Package')
+ opkg = OleNativeStream(bindata=obj.data)
+ print ('Filename = %r' % opkg.filename)
+ print ('Source path = %r' % opkg.src_path)
+ print ('Temp path = %r' % opkg.temp_path)
+ if opkg.filename:
+ fname = '%s_%s' % (fname_prefix,
+ sanitize_filename(opkg.filename))
+ else:
+ fname = '%s_object_%03d.noname' % (fname_prefix, index)
+ print ('saving to file %s' % fname)
+ open(fname, 'wb').write(opkg.data)
+ index += 1
+ except:
+ log.debug('*** Not an OLE 1.0 Object')
+
+
+
+#=== MAIN =================================================================
+
+if __name__ == '__main__':
+ # print banner with version
+ print ('oleobj %s - http://decalage.info/oletools' % __version__)
+ print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
+ print ('Please report any issue at https://github.com/decalage2/oletools/issues')
+ print ('')
+
+ DEFAULT_LOG_LEVEL = "warning" # Default log level
+ LOG_LEVELS = {'debug': logging.DEBUG,
+ 'info': logging.INFO,
+ 'warning': logging.WARNING,
+ 'error': logging.ERROR,
+ 'critical': logging.CRITICAL
+ }
+
+ usage = 'usage: %prog [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ # parser.add_option('-o', '--outfile', dest='outfile',
+ # help='output file')
+ # parser.add_option('-c', '--csv', dest='csv',
+ # help='export results to a CSV file')
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-d", type="str", dest="output_dir",
+ help='use specified directory to output files.', default=None)
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+ parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ help="logging level debug/info/warning/error/critical (default=%default)")
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print (__doc__)
+ parser.print_help()
+ sys.exit()
+
+ # Setup logging to the console:
+ # here we use stdout instead of stderr by default, so that the output
+ # can be redirected properly.
+ logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout,
+ format='%(levelname)-8s %(message)s')
+ # enable logging in the modules:
+ log.setLevel(logging.NOTSET)
+
+
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+ process_file(container, filename, data, options.output_dir)
+
+
diff -Nru remnux-oletools-0.51a/remnux-oletools/oletimes.py remnux-oletools-0.51a/remnux-oletools/oletimes.py
--- remnux-oletools-0.51a/remnux-oletools/oletimes.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/oletimes.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+"""
+oletimes.py
+
+oletimes is a script to parse OLE files such as MS Office documents (e.g. Word,
+Excel), to extract creation and modification times of all streams and storages
+in the OLE file.
+
+Usage: oletimes.py
+
+oletimes project website: http://www.decalage.info/python/oletimes
+
+oletimes is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+#=== LICENSE =================================================================
+
+# oletimes is copyright (c) 2013-2016, Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2013-07-24 v0.01 PL: - first version
+# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
+# - improved usage display
+# 2014-11-30 v0.03 PL: - improved output with prettytable
+# 2016-07-20 v0.50 SL: - added Python 3 support
+# 2016-09-05 PL: - added main entry point for setup.py
+
+__version__ = '0.50'
+
+#------------------------------------------------------------------------------
+# TODO:
+# + optparse
+# + nicer output: table with fixed columns, datetime, etc
+# + CSV output
+# + option to only show available timestamps (by default?)
+
+#=== IMPORTS =================================================================
+
+import sys, datetime
+import thirdparty.olefile as olefile
+from thirdparty.prettytable import prettytable
+
+
+# === MAIN ===================================================================
+
+def main():
+ # print banner with version
+ print('oletimes %s - http://decalage.info/python/oletools' % __version__)
+
+ try:
+ ole = olefile.OleFileIO(sys.argv[1])
+ except IndexError:
+ sys.exit(__doc__)
+
+ def dt2str (dt):
+ """
+ Convert a datetime object to a string for display, without microseconds
+
+ :param dt: datetime.datetime object, or None
+ :return: str, or None
+ """
+ if dt is None:
+ return None
+ dt = dt.replace(microsecond = 0)
+ return str(dt)
+
+ t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time'])
+ t.align = 'l'
+ t.max_width = 26
+ #t.border = False
+
+ #print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime())
+ t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime())))
+
+ for obj in ole.listdir(streams=True, storages=True):
+ #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
+ t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
+
+ print(t)
+
+ ole.close()
+
+if __name__ == '__main__':
+ main()
diff -Nru remnux-oletools-0.51a/remnux-oletools/olevba3.py remnux-oletools-0.51a/remnux-oletools/olevba3.py
--- remnux-oletools-0.51a/remnux-oletools/olevba3.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/olevba3.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,3384 @@
+#!/usr/bin/env python
+"""
+olevba.py
+
+olevba is a script to parse OLE and OpenXML files such as MS Office documents
+(e.g. Word, Excel), to extract VBA Macro code in clear text, deobfuscate
+and analyze malicious macros.
+
+Supported formats:
+- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
+- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
+- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
+- Word 2003 XML (.xml)
+- Word/Excel Single File Web Page / MHTML (.mht)
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+olevba is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+
+olevba is based on source code from officeparser by John William Davison
+https://github.com/unixfreak0037/officeparser
+"""
+
+# === LICENSE ==================================================================
+
+# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+# olevba contains modified source code from the officeparser project, published
+# under the following MIT License (MIT):
+#
+# officeparser is copyright (c) 2014 John William Davison
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2014-08-05 v0.01 PL: - first version based on officeparser code
+# 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser
+# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record
+# 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats
+# and to find the VBA project root anywhere in the file
+# 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL
+# 2014-12-05 v0.05 PL: - refactored most functions into a class, new API
+# - added detect_vba_macros
+# 2014-12-10 v0.06 PL: - hide first lines with VB attributes
+# - detect auto-executable macros
+# - ignore empty macros
+# 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive
+# 2014-12-15 v0.08 PL: - improved display for empty macros
+# - added pattern extraction
+# 2014-12-25 v0.09 PL: - added suspicious keywords detection
+# 2014-12-27 v0.10 PL: - added OptionParser, main and process_file
+# - uses xglob to scan several files with wildcards
+# - option -r to recurse subdirectories
+# - option -z to scan files in password-protected zips
+# 2015-01-02 v0.11 PL: - improved filter_vba to detect colons
+# 2015-01-03 v0.12 PL: - fixed detect_patterns to detect all patterns
+# - process_file: improved display, shows container file
+# - improved list of executable file extensions
+# 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display
+# 2015-01-08 v0.14 PL: - added hex strings detection and decoding
+# - fixed issue #2, decoding VBA stream names using
+# specified codepage and unicode stream names
+# 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d
+# 2015-01-16 v0.16 PL: - fix for issue #3 (exception when module name="text")
+# - added several suspicious keywords
+# - added option -i to analyze VBA source code directly
+# 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions
+# - added scan_vba to run all detection algorithms
+# - decoded hex strings are now also scanned + reversed
+# 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules
+# 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex
+# strings and StrReverse
+# 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded
+# 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding
+# - improved display, shows obfuscation name
+# 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename
+# - added Base64 obfuscation decoding (contribution from
+# @JamesHabben)
+# 2015-02-03 v0.23 PL: - triage now uses VBA_Scanner results, shows Base64 and
+# Dridex strings
+# - exception handling in detect_base64_strings
+# 2015-02-07 v0.24 PL: - renamed option --hex to --decode, fixed display
+# - display exceptions with stack trace
+# - added several suspicious keywords
+# - improved Base64 detection and decoding
+# - fixed triage mode not to scan attrib lines
+# 2015-03-04 v0.25 PL: - added support for Word 2003 XML
+# 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and
+# virtualisation detection
+# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros
+# (issue #10 reported by Greg from SpamStopsHere)
+# 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header
+# (issue #11 reported by Thomas Chopitea)
+# 2015-05-26 v0.29 PL: - improved MSO files parsing, taking into account
+# various data offsets (issue #12)
+# - improved detection of MSO files, avoiding incorrect
+# parsing errors (issue #7)
+# 2015-05-29 v0.30 PL: - added suspicious keywords suggested by @ozhermit,
+# Davy Douhine (issue #9), issue #13
+# 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc)
+# 2015-06-19 PL: - added options -a, -c, --each, --attr
+# 2015-06-21 v0.32 PL: - always display decoded strings which are printable
+# - fix VBA_Scanner.scan to return raw strings, not repr()
+# 2015-07-09 v0.40 PL: - removed usage of sys.stderr which causes issues
+# 2015-07-12 PL: - added Hex function decoding to VBA Parser
+# 2015-07-13 PL: - added Base64 function decoding to VBA Parser
+# 2015-09-06 PL: - improved VBA_Parser, refactored the main functions
+# 2015-09-13 PL: - moved main functions to a class VBA_Parser_CLI
+# - fixed issue when analysis was done twice
+# 2015-09-15 PL: - remove duplicate IOCs from results
+# 2015-09-16 PL: - join long VBA lines ending with underscore before scan
+# - disabled unused option --each
+# 2015-09-22 v0.41 PL: - added new option --reveal
+# - added suspicious strings for PowerShell.exe options
+# 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method
+# 2015-10-10 PL: - added support for text files with VBA source code
+# 2015-11-17 PL: - fixed bug with --decode option
+# 2015-12-16 PL: - fixed bug in main (no options input anymore)
+# - improved logging, added -l option
+# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht
+# - fixed issue #32 by monkeypatching email.feedparser
+# 2016-02-07 PL: - KeyboardInterrupt is now raised properly
+# 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr
+# 2016-02-29 PL: - added Workbook_Activate to suspicious keywords
+# 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
+# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck)
+# 2016-03-16 CH: - added option --no-deobfuscate (temporary)
+# 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
+# - updated suspicious keywords
+# 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans
+# 2016-04-28 CH: - return an exit code depending on the results
+# - improved error and exception handling
+# - improved JSON output
+# 2016-05-12 CH: - added support for PowerPoint 97-2003 files
+# 2016-06-06 CH: - improved handling of unicode VBA module names
+# 2016-06-07 CH: - added option --relaxed, stricter parsing by default
+# 2016-06-12 v0.50 PL: - fixed small bugs in VBA parsing code
+# 2016-07-01 PL: - fixed issue #58 with format() to support Python 2.6
+# 2016-07-29 CH: - fixed several bugs including #73 (Mac Roman encoding)
+# 2016-10-25 PL: - fixed regex bytes strings (PR/issue #100)
+
+__version__ = '0.50'
+
+#------------------------------------------------------------------------------
+# TODO:
+# + setup logging (common with other oletools)
+# + add xor bruteforcing like bbharvest
+# + options -a and -c should imply -d
+
+# TODO later:
+# + performance improvement: instead of searching each keyword separately,
+# first split vba code into a list of words (per line), then check each
+# word against a dict. (or put vba words into a set/dict?)
+# + for regex, maybe combine them into a single re with named groups?
+# + add Yara support, include sample rules? plugins like balbuzard?
+# + add balbuzard support
+# + output to file (replace print by file.write, sys.stdout by default)
+# + look for VBA in embedded documents (e.g. Excel in Word)
+# + support SRP streams (see Lenny's article + links and sample)
+# - python 3.x support
+# - check VBA macros in Visio, Access, Project, etc
+# - extract_macros: convert to a class, split long function into smaller methods
+# - extract_macros: read bytes from stream file objects instead of strings
+# - extract_macros: use combined struct.unpack instead of many calls
+# - all except clauses should target specific exceptions
+
+#------------------------------------------------------------------------------
+# REFERENCES:
+# - [MS-OVBA]: Microsoft Office VBA File Format Structure
+# http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx
+# - officeparser: https://github.com/unixfreak0037/officeparser
+
+
+#--- IMPORTS ------------------------------------------------------------------
+
+import sys, logging
+import struct
+from _io import StringIO,BytesIO
+import math
+import zipfile
+import re
+import optparse
+import binascii
+import base64
+import zlib
+import email # for MHTML parsing
+import string # for printable
+import json # for json output mode (argument --json)
+
+# import lxml or ElementTree for XML parsing:
+try:
+ # lxml: best performance for XML processing
+ import lxml.etree as ET
+except ImportError:
+ try:
+ # Python 2.5+: batteries included
+ import xml.etree.cElementTree as ET
+ except ImportError:
+ try:
+ # Python <2.5: standalone ElementTree install
+ import elementtree.cElementTree as ET
+ except ImportError:
+ raise(ImportError, "lxml or ElementTree are not installed, " \
+ + "see http://codespeak.net/lxml " \
+ + "or http://effbot.org/zone/element-index.htm")
+
+import oletools.thirdparty.olefile as olefile
+from oletools.thirdparty.prettytable import prettytable
+from oletools.thirdparty.xglob import xglob, PathNotFoundException
+from oletools.thirdparty.pyparsing.pyparsing import \
+ CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
+ Optional, QuotedString,Regex, Suppress, Word, WordStart, \
+ alphanums, alphas, hexnums,nums, opAssoc, srange, \
+ infixNotation
+import oletools.ppt_parser as ppt_parser
+
+# monkeypatch email to fix issue #32:
+# allow header lines without ":"
+import email.feedparser
+email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])')
+
+
+# === LOGGING =================================================================
+
+class NullHandler(logging.Handler):
+ """
+ Log Handler without output, to avoid printing messages if logging is not
+ configured by the main application.
+ Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
+ see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
+ """
+ def emit(self, record):
+ pass
+
+def get_logger(name, level=logging.CRITICAL+1):
+ """
+ Create a suitable logger object for this module.
+ The goal is not to change settings of the root logger, to avoid getting
+ other modules' logs on the screen.
+ If a logger exists with same name, reuse it. (Else it would have duplicate
+ handlers and messages would be doubled.)
+ The level is set to CRITICAL+1 by default, to avoid any logging.
+ """
+ # First, test if there is already a logger with the same name, else it
+ # will generate duplicate messages (due to duplicate handlers):
+ if name in logging.Logger.manager.loggerDict:
+ #NOTE: another less intrusive but more "hackish" solution would be to
+ # use getLogger then test if its effective level is not default.
+ logger = logging.getLogger(name)
+ # make sure level is OK:
+ logger.setLevel(level)
+ return logger
+ # get a new logger:
+ logger = logging.getLogger(name)
+ # only add a NullHandler for this logger, it is up to the application
+ # to configure its own logging:
+ logger.addHandler(NullHandler())
+ logger.setLevel(level)
+ return logger
+
+# a global logger object used for debugging:
+log = get_logger('olevba')
+
+
+#=== EXCEPTIONS ==============================================================
+
+class OlevbaBaseException(Exception):
+ """ Base class for exceptions produced here for simpler except clauses """
+ def __init__(self, msg, filename=None, orig_exc=None, **kwargs):
+ if orig_exc:
+ super(OlevbaBaseException, self).__init__(msg +
+ ' ({0})'.format(orig_exc),
+ **kwargs)
+ else:
+ super(OlevbaBaseException, self).__init__(msg, **kwargs)
+ self.msg = msg
+ self.filename = filename
+ self.orig_exc = orig_exc
+
+
+class FileOpenError(OlevbaBaseException):
+ """ raised by VBA_Parser constructor if all open_... attempts failed
+
+ probably means the file type is not supported
+ """
+
+ def __init__(self, filename, orig_exc=None):
+ super(FileOpenError, self).__init__(
+ 'Failed to open file %s' % filename, filename, orig_exc)
+
+
+class ProcessingError(OlevbaBaseException):
+ """ raised by VBA_Parser.process_file* functions """
+
+ def __init__(self, filename, orig_exc):
+ super(ProcessingError, self).__init__(
+ 'Error processing file %s' % filename, filename, orig_exc)
+
+
+class MsoExtractionError(RuntimeError, OlevbaBaseException):
+ """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """
+
+ def __init__(self, msg):
+ MsoExtractionError.__init__(self, msg)
+ OlevbaBaseException.__init__(self, msg)
+
+
+class SubstreamOpenError(FileOpenError):
+ """ special kind of FileOpenError: file is a substream of original file """
+
+ def __init__(self, filename, subfilename, orig_exc=None):
+ super(SubstreamOpenError, self).__init__(
+ str(filename) + '/' + str(subfilename), orig_exc)
+ self.filename = filename # overwrite setting in OlevbaBaseException
+ self.subfilename = subfilename
+
+
+class UnexpectedDataError(OlevbaBaseException):
+ """ raised when parsing is strict (=not relaxed) and data is unexpected """
+
+ def __init__(self, stream_path, variable, expected, value):
+ super(UnexpectedDataError, self).__init__(
+ 'Unexpected value in {0} for variable {1}: '
+ 'expected {2:04X} but found {3:04X}!'
+ .format(stream_path, variable, expected, value))
+ self.stream_path = stream_path
+ self.variable = variable
+ self.expected = expected
+ self.value = value
+
+#--- CONSTANTS ----------------------------------------------------------------
+
+# return codes
+RETURN_OK = 0
+RETURN_WARNINGS = 1 # (reserved, not used yet)
+RETURN_WRONG_ARGS = 2 # (fixed, built into optparse)
+RETURN_FILE_NOT_FOUND = 3
+RETURN_XGLOB_ERR = 4
+RETURN_OPEN_ERROR = 5
+RETURN_PARSE_ERROR = 6
+RETURN_SEVERAL_ERRS = 7
+RETURN_UNEXPECTED = 8
+
+# MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
+MAC_CODEPAGES = {
+ 10000: 'mac-roman',
+ 10001: 'shiftjis', # not found: 'mac-shift-jis',
+ 10003: 'ascii', # nothing appropriate found: 'mac-hangul',
+ 10008: 'gb2321', # not found: 'mac-gb2312',
+ 10002: 'big5', # not found: 'mac-big5',
+ 10005: 'hebrew', # not found: 'mac-hebrew',
+ 10004: 'mac-arabic',
+ 10006: 'mac-greek',
+ 10081: 'mac-turkish',
+ 10021: 'thai', # not found: mac-thai',
+ 10029: 'maccentraleurope', # not found: 'mac-east europe',
+ 10007: 'ascii', # nothing appropriate found: 'mac-russian',
+}
+
+# URL and message to report issues:
+URL_OLEVBA_ISSUES = 'https://github.com/decalage2/oletools/issues'
+MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES
+
+# Container types:
+TYPE_OLE = 'OLE'
+TYPE_OpenXML = 'OpenXML'
+TYPE_Word2003_XML = 'Word2003_XML'
+TYPE_MHTML = 'MHTML'
+TYPE_TEXT = 'Text'
+TYPE_PPT = 'PPT'
+
+# short tag to display file types in triage mode:
+TYPE2TAG = {
+ TYPE_OLE: 'OLE:',
+ TYPE_OpenXML: 'OpX:',
+ TYPE_Word2003_XML: 'XML:',
+ TYPE_MHTML: 'MHT:',
+ TYPE_TEXT: 'TXT:',
+ TYPE_PPT: 'PPT',
+}
+
+
+# MSO files ActiveMime header magic
+MSO_ACTIVEMIME_HEADER = b'ActiveMime'
+
+MODULE_EXTENSION = "bas"
+CLASS_EXTENSION = "cls"
+FORM_EXTENSION = "frm"
+
+# Namespaces and tags for Word2003 XML parsing:
+NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}'
+# the tag contains the VBA macro code:
+TAG_BINDATA = NS_W + 'binData'
+ATTR_NAME = NS_W + 'name'
+
+# Keywords to detect auto-executable macros
+AUTOEXEC_KEYWORDS = {
+ # MS Word:
+ 'Runs when the Word document is opened':
+ ('AutoExec', 'AutoOpen', 'Document_Open', 'DocumentOpen'),
+ 'Runs when the Word document is closed':
+ ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'),
+ 'Runs when the Word document is modified':
+ ('DocumentChange',),
+ 'Runs when a new Word document is created':
+ ('AutoNew', 'Document_New', 'NewDocument'),
+
+ # MS Excel:
+ 'Runs when the Excel Workbook is opened':
+ ('Auto_Open', 'Workbook_Open', 'Workbook_Activate'),
+ 'Runs when the Excel Workbook is closed':
+ ('Auto_Close', 'Workbook_Close'),
+
+ #TODO: full list in MS specs??
+}
+
+# Suspicious Keywords that may be used by malware
+# See VBA language reference: http://msdn.microsoft.com/en-us/library/office/jj692818%28v=office.15%29.aspx
+SUSPICIOUS_KEYWORDS = {
+ #TODO: use regex to support variable whitespaces
+ 'May read system environment variables':
+ ('Environ',),
+ 'May open a file':
+ ('Open',),
+ 'May write to a file (if combined with Open)':
+ #TODO: regex to find Open+Write on same line
+ ('Write', 'Put', 'Output', 'Print #'),
+ 'May read or write a binary file (if combined with Open)':
+ #TODO: regex to find Open+Binary on same line
+ ('Binary',),
+ 'May copy a file':
+ ('FileCopy', 'CopyFile'),
+ #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx
+ #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx
+ 'May delete a file':
+ ('Kill',),
+ 'May create a text file':
+ ('CreateTextFile', 'ADODB.Stream', 'WriteText', 'SaveToFile'),
+ #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx
+ #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6
+ 'May run an executable file or a system command':
+ ('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus',
+ 'vbMinimizedNoFocus', 'WScript.Shell', 'Run', 'ShellExecute'),
+ #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx
+ #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6
+ 'May run PowerShell commands':
+ #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
+ #also: https://bitbucket.org/decalage/oletools/issues/14/olevba-library-update-ioc
+ # ref: https://blog.netspi.com/15-ways-to-bypass-the-powershell-execution-policy/
+ # TODO: add support for keywords starting with a non-alpha character, such as "-noexit"
+ # TODO: '-command', '-EncodedCommand', '-scriptblock'
+ ('PowerShell', 'noexit', 'ExecutionPolicy', 'noprofile', 'command', 'EncodedCommand',
+ 'invoke-command', 'scriptblock', 'Invoke-Expression', 'AuthorizationManager'),
+ 'May run an executable file or a system command using PowerShell':
+ ('Start-Process',),
+ 'May hide the application':
+ ('Application.Visible', 'ShowWindow', 'SW_HIDE'),
+ 'May create a directory':
+ ('MkDir',),
+ 'May save the current workbook':
+ ('ActiveWorkbook.SaveAs',),
+ 'May change which directory contains files to open at startup':
+ #TODO: confirm the actual effect
+ ('Application.AltStartupPath',),
+ 'May create an OLE object':
+ ('CreateObject',),
+ 'May create an OLE object using PowerShell':
+ ('New-Object',),
+ 'May run an application (if combined with CreateObject)':
+ ('Shell.Application',),
+ 'May enumerate application windows (if combined with Shell.Application object)':
+ ('Windows', 'FindWindow'),
+ 'May run code from a DLL':
+ #TODO: regex to find declare+lib on same line
+ ('Lib',),
+ 'May inject code into another process':
+ ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
+ ),
+ 'May download files from the Internet':
+ #TODO: regex to find urlmon+URLDownloadToFileA on same line
+ ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP',
+ 'MSXML2.ServerXMLHTTP', # suggested in issue #13
+ 'User-Agent', # sample from @ozhermit: http://pastebin.com/MPc3iV6z
+ ),
+ 'May download files from the Internet using PowerShell':
+ #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
+ ('Net.WebClient', 'DownloadFile', 'DownloadString'),
+ 'May control another application by simulating user keystrokes':
+ ('SendKeys', 'AppActivate'),
+ #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx
+ 'May attempt to obfuscate malicious function calls':
+ ('CallByName',),
+ #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx
+ 'May attempt to obfuscate specific strings':
+ #TODO: regex to find several Chr*, not just one
+ ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'),
+ #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx
+ 'May read or write registry keys':
+ #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
+ ('RegOpenKeyExA', 'RegOpenKeyEx', 'RegCloseKey'),
+ 'May read registry keys':
+ #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
+ ('RegQueryValueExA', 'RegQueryValueEx',
+ 'RegRead', #with Wscript.Shell
+ ),
+ 'May detect virtualization':
+ # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
+ (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'),
+ 'May detect Anubis Sandbox':
+ # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
+ # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA
+ # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf
+ ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll
+ '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId',
+ '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller'
+ ),
+ 'May detect Sandboxie':
+ # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
+ # ref: http://www.cplusplus.com/forum/windows/96874/
+ ('SbieDll.dll', 'SandboxieControlWndClass'),
+ 'May detect Sunbelt Sandbox':
+ # ref: http://www.cplusplus.com/forum/windows/96874/
+ (r'C:\file.exe',),
+ 'May detect Norman Sandbox':
+ # ref: http://www.cplusplus.com/forum/windows/96874/
+ ('currentuser',),
+ 'May detect CW Sandbox':
+ # ref: http://www.cplusplus.com/forum/windows/96874/
+ ('Schmidti',),
+ 'May detect WinJail Sandbox':
+ # ref: http://www.cplusplus.com/forum/windows/96874/
+ ('Afx:400000:0',),
+ 'Memory manipulation':
+ ('VirtualAllocEx', 'RtlMoveMemory'),
+}
+
+# Regular Expression for a URL:
+# http://en.wikipedia.org/wiki/Uniform_resource_locator
+# http://www.w3.org/Addressing/URL/uri-spec.html
+#TODO: also support username:password@server
+#TODO: other protocols (file, gopher, wais, ...?)
+SCHEME = r'\b(?:http|ftp)s?'
+# see http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains
+TLD = r'(?:xn--[a-zA-Z0-9]{4,20}|[a-zA-Z]{2,20})'
+DNS_NAME = r'(?:[a-zA-Z0-9\-\.]+\.' + TLD + ')'
+#TODO: IPv6 - see https://www.debuggex.com/
+# A literal numeric IPv6 address may be given, but must be enclosed in [ ] e.g. [db8:0cec::99:123a]
+NUMBER_0_255 = r'(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'
+IPv4 = r'(?:' + NUMBER_0_255 + r'\.){3}' + NUMBER_0_255
+# IPv4 must come before the DNS name because it is more specific
+SERVER = r'(?:' + IPv4 + '|' + DNS_NAME + ')'
+PORT = r'(?:\:[0-9]{1,5})?'
+SERVER_PORT = SERVER + PORT
+URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"]
+URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH
+re_url = re.compile(URL_RE)
+
+
+# Patterns to be extracted (IP addresses, URLs, etc)
+# From patterns.py in balbuzard
+RE_PATTERNS = (
+ ('URL', re.compile(URL_RE)),
+ ('IPv4 address', re.compile(IPv4)),
+ # TODO: add IPv6
+ ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@' + SERVER + '\b')),
+ # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(? char
+vba_chr = Suppress(
+ Combine(WordStart(vba_identifier_chars) + CaselessLiteral('Chr')
+ + Optional(CaselessLiteral('B') | CaselessLiteral('W')) + Optional('$'))
+ + '(') + vba_expr_int + Suppress(')')
+
+def vba_chr_tostr(t):
+ try:
+ i = t[0]
+ # normal, non-unicode character:
+ if i>=0 and i<=255:
+ return VbaExpressionString(chr(i))
+ else:
+ return VbaExpressionString(unichr(i).encode('utf-8', 'backslashreplace'))
+ except ValueError:
+ log.exception('ERROR: incorrect parameter value for chr(): %r' % i)
+ return VbaExpressionString('Chr(%r)' % i)
+
+vba_chr.setParseAction(vba_chr_tostr)
+
+
+# --- ASC --------------------------------------------------------------------
+
+# Asc(char) => int
+#TODO: see MS-VBAL 6.1.2.11.1.1 page 240 => AscB, AscW
+vba_asc = Suppress(CaselessKeyword('Asc') + '(') + vba_expr_str + Suppress(')')
+vba_asc.setParseAction(lambda t: ord(t[0]))
+
+
+# --- VAL --------------------------------------------------------------------
+
+# Val(string) => int
+# TODO: make sure the behavior of VBA's val is fully covered
+vba_val = Suppress(CaselessKeyword('Val') + '(') + vba_expr_str + Suppress(')')
+vba_val.setParseAction(lambda t: int(t[0].strip()))
+
+
+# --- StrReverse() --------------------------------------------------------------------
+
+# StrReverse(string) => string
+strReverse = Suppress(CaselessKeyword('StrReverse') + '(') + vba_expr_str + Suppress(')')
+strReverse.setParseAction(lambda t: VbaExpressionString(str(t[0])[::-1]))
+
+
+# --- ENVIRON() --------------------------------------------------------------------
+
+# Environ("name") => just translated to "%name%", that is enough for malware analysis
+environ = Suppress(CaselessKeyword('Environ') + '(') + vba_expr_str + Suppress(')')
+environ.setParseAction(lambda t: VbaExpressionString('%%%s%%' % t[0]))
+
+
+# --- IDENTIFIER -------------------------------------------------------------
+
+#TODO: see MS-VBAL 3.3.5 page 33
+# 3.3.5 Identifier Tokens
+# Latin-identifier = first-Latin-identifier-character *subsequent-Latin-identifier-character
+# first-Latin-identifier-character = (%x0041-005A / %x0061-007A) ; A-Z / a-z
+# subsequent-Latin-identifier-character = first-Latin-identifier-character / DIGIT / %x5F ; underscore
+latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_')
+
+# --- HEX FUNCTION -----------------------------------------------------------
+
+# match any custom function name with a hex string as argument:
+# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
+
+# quoted string of at least two hexadecimal numbers of two digits:
+quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"')
+quoted_hex_string.setParseAction(lambda t: str(t[0]))
+
+hex_function_call = Suppress(latin_identifier) + Suppress('(') + \
+ quoted_hex_string('hex_string') + Suppress(')')
+hex_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_hex(t.hex_string)))
+
+
+# --- BASE64 FUNCTION -----------------------------------------------------------
+
+# match any custom function name with a Base64 string as argument:
+# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
+
+# quoted string of at least two hexadecimal numbers of two digits:
+quoted_base64_string = Suppress('"') + Regex(BASE64_RE) + Suppress('"')
+quoted_base64_string.setParseAction(lambda t: str(t[0]))
+
+base64_function_call = Suppress(latin_identifier) + Suppress('(') + \
+ quoted_base64_string('base64_string') + Suppress(')')
+base64_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_base64(t.base64_string)))
+
+
+# ---STRING EXPRESSION -------------------------------------------------------
+
+def concat_strings_list(tokens):
+ """
+ parse action to concatenate strings in a VBA expression with operators '+' or '&'
+ """
+ # extract argument from the tokens:
+ # expected to be a tuple containing a list of strings such as [a,'&',b,'&',c,...]
+ strings = tokens[0][::2]
+ return VbaExpressionString(''.join(strings))
+
+
+vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call | base64_function_call)
+
+vba_expr_str <<= infixNotation(vba_expr_str_item,
+ [
+ ("+", 2, opAssoc.LEFT, concat_strings_list),
+ ("&", 2, opAssoc.LEFT, concat_strings_list),
+ ])
+
+
+# --- INTEGER EXPRESSION -------------------------------------------------------
+
+def sum_ints_list(tokens):
+ """
+ parse action to sum integers in a VBA expression with operator '+'
+ """
+ # extract argument from the tokens:
+ # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
+ integers = tokens[0][::2]
+ return sum(integers)
+
+
+def subtract_ints_list(tokens):
+ """
+ parse action to subtract integers in a VBA expression with operator '-'
+ """
+ # extract argument from the tokens:
+ # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
+ integers = tokens[0][::2]
+ return reduce(lambda x,y:x-y, integers)
+
+
+def multiply_ints_list(tokens):
+ """
+ parse action to multiply integers in a VBA expression with operator '*'
+ """
+ # extract argument from the tokens:
+ # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
+ integers = tokens[0][::2]
+ return reduce(lambda x,y:x*y, integers)
+
+
+def divide_ints_list(tokens):
+ """
+ parse action to divide integers in a VBA expression with operator '/'
+ """
+ # extract argument from the tokens:
+ # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
+ integers = tokens[0][::2]
+ return reduce(lambda x,y:x/y, integers)
+
+
+vba_expr_int_item = (vba_asc | vba_val | integer)
+
+# operators associativity:
+# https://en.wikipedia.org/wiki/Operator_associativity
+
+vba_expr_int <<= infixNotation(vba_expr_int_item,
+ [
+ ("*", 2, opAssoc.LEFT, multiply_ints_list),
+ ("/", 2, opAssoc.LEFT, divide_ints_list),
+ ("-", 2, opAssoc.LEFT, subtract_ints_list),
+ ("+", 2, opAssoc.LEFT, sum_ints_list),
+ ])
+
+
+# see detect_vba_strings for the deobfuscation code using this grammar
+
+# === MSO/ActiveMime files parsing ===========================================
+
+def is_mso_file(data):
+ """
+ Check if the provided data is the content of a MSO/ActiveMime file, such as
+ the ones created by Outlook in some cases, or Word/Excel when saving a
+ file with the MHTML format or the Word 2003 XML format.
+ This function only checks the ActiveMime magic at the beginning of data.
+ :param data: bytes string, MSO/ActiveMime file content
+ :return: bool, True if the file is MSO, False otherwise
+ """
+ return data.startswith(MSO_ACTIVEMIME_HEADER)
+
+
+# regex to find zlib block headers, starting with byte 0x78 = 'x'
+re_zlib_header = re.compile(r'x')
+
+
+def mso_file_extract(data):
+ """
+ Extract the data stored into a MSO/ActiveMime file, such as
+ the ones created by Outlook in some cases, or Word/Excel when saving a
+ file with the MHTML format or the Word 2003 XML format.
+
+ :param data: bytes string, MSO/ActiveMime file content
+ :return: bytes string, extracted data (uncompressed)
+
+ raise a MsoExtractionError if the data cannot be extracted
+ """
+ # check the magic:
+ assert is_mso_file(data)
+
+ # In all the samples seen so far, Word always uses an offset of 0x32,
+ # and Excel 0x22A. But we read the offset from the header to be more
+ # generic.
+ offsets = [0x32, 0x22A]
+
+ # First, attempt to get the compressed data offset from the header
+ # According to my tests, it should be an unsigned 16 bits integer,
+ # at offset 0x1E (little endian) + add 46:
+ try:
+ offset = struct.unpack_from('> bit_count
+ offset_mask = ~length_mask
+ maximum_length = (0xFFFF >> bit_count) + 3
+ return length_mask, offset_mask, bit_count, maximum_length
+
+
+def decompress_stream(compressed_container):
+ """
+ Decompress a stream according to MS-OVBA section 2.4.1
+
+ compressed_container: string compressed according to the MS-OVBA 2.4.1.3.6 Compression algorithm
+ return the decompressed container as a string (bytes)
+ """
+ # 2.4.1.2 State Variables
+
+ # The following state is maintained for the CompressedContainer (section 2.4.1.1.1):
+ # CompressedRecordEnd: The location of the byte after the last byte in the CompressedContainer (section 2.4.1.1.1).
+ # CompressedCurrent: The location of the next byte in the CompressedContainer (section 2.4.1.1.1) to be read by
+ # decompression or to be written by compression.
+
+ # The following state is maintained for the current CompressedChunk (section 2.4.1.1.4):
+ # CompressedChunkStart: The location of the first byte of the CompressedChunk (section 2.4.1.1.4) within the
+ # CompressedContainer (section 2.4.1.1.1).
+
+ # The following state is maintained for a DecompressedBuffer (section 2.4.1.1.2):
+ # DecompressedCurrent: The location of the next byte in the DecompressedBuffer (section 2.4.1.1.2) to be written by
+ # decompression or to be read by compression.
+ # DecompressedBufferEnd: The location of the byte after the last byte in the DecompressedBuffer (section 2.4.1.1.2).
+
+ # The following state is maintained for the current DecompressedChunk (section 2.4.1.1.3):
+ # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
+ # DecompressedBuffer (section 2.4.1.1.2).
+
+ decompressed_container = b'' # result
+ compressed_current = 0
+
+ sig_byte = compressed_container[compressed_current]
+ if sig_byte != 0x01:
+ raise ValueError('invalid signature byte {0:02X}'.format(sig_byte))
+
+ compressed_current += 1
+
+ #NOTE: the definition of CompressedRecordEnd is ambiguous. Here we assume that
+ # CompressedRecordEnd = len(compressed_container)
+ while compressed_current < len(compressed_container):
+ # 2.4.1.1.5
+ compressed_chunk_start = compressed_current
+ # chunk header = first 16 bits
+ compressed_chunk_header = \
+ struct.unpack("> 12) & 0x07
+ if chunk_signature != 0b011:
+ raise ValueError('Invalid CompressedChunkSignature in VBA compressed stream')
+ # chunk flag = next bit - 1 == compressed, 0 == uncompressed
+ chunk_flag = (compressed_chunk_header >> 15) & 0x01
+ log.debug("chunk size = {0}, compressed flag = {1}".format(chunk_size, chunk_flag))
+
+ #MS-OVBA 2.4.1.3.12: the maximum size of a chunk including its header is 4098 bytes (header 2 + data 4096)
+ # The minimum size is 3 bytes
+ # NOTE: there seems to be a typo in MS-OVBA, the check should be with 4098, not 4095 (which is the max value
+ # in chunk header before adding 3.
+ # Also the first test is not useful since a 12 bits value cannot be larger than 4095.
+ if chunk_flag == 1 and chunk_size > 4098:
+ raise ValueError('CompressedChunkSize > 4098 but CompressedChunkFlag == 1')
+ if chunk_flag == 0 and chunk_size != 4098:
+ raise ValueError('CompressedChunkSize != 4098 but CompressedChunkFlag == 0')
+
+ # check if chunk_size goes beyond the compressed data, instead of silently cutting it:
+ #TODO: raise an exception?
+ if compressed_chunk_start + chunk_size > len(compressed_container):
+ log.warning('Chunk size is larger than remaining compressed data')
+ compressed_end = min([len(compressed_container), compressed_chunk_start + chunk_size])
+ # read after chunk header:
+ compressed_current = compressed_chunk_start + 2
+
+ if chunk_flag == 0:
+ # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk
+ # uncompressed chunk: read the next 4096 bytes as-is
+ #TODO: check if there are at least 4096 bytes left
+ decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]])
+ compressed_current += 4096
+ else:
+ # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk
+ # compressed chunk
+ decompressed_chunk_start = len(decompressed_container)
+ while compressed_current < compressed_end:
+ # MS-OVBA 2.4.1.3.4 Decompressing a TokenSequence
+ # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end))
+ # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or
+ # copy tokens (reference to a previous literal token)
+ flag_byte = compressed_container[compressed_current]
+ compressed_current += 1
+ for bit_index in range(0, 8):
+ # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
+ if compressed_current >= compressed_end:
+ break
+ # MS-OVBA 2.4.1.3.5 Decompressing a Token
+ # MS-OVBA 2.4.1.3.17 Extract FlagBit
+ flag_bit = (flag_byte >> bit_index) & 1
+ #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
+ if flag_bit == 0: # LiteralToken
+ # copy one byte directly to output
+ decompressed_container += bytes([compressed_container[compressed_current]])
+ compressed_current += 1
+ else: # CopyToken
+ # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
+ copy_token = \
+ struct.unpack("> temp2) + 1
+ #log.debug('offset=%d length=%d' % (offset, length))
+ copy_source = len(decompressed_container) - offset
+ for index in range(copy_source, copy_source + length):
+ decompressed_container += bytes([decompressed_container[index]])
+ compressed_current += 2
+ return decompressed_container
+
+
+def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
+ """
+ Extract VBA macros from an OleFileIO object.
+ Internal function, do not call directly.
+
+ vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream
+ vba_project: path to the PROJECT stream
+ :param relaxed: If True, only create info/debug log entry if data is not as expected
+ (e.g. opening substream fails); if False, raise an error in this case
+ This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream
+ """
+ # Open the PROJECT stream:
+ project = ole.openstream(project_path)
+ log.debug('relaxed is %s' % relaxed)
+
+ # sample content of the PROJECT stream:
+
+ ## ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}"
+ ## Document=ThisDocument/&H00000000
+ ## Module=NewMacros
+ ## Name="Project"
+ ## HelpContextID="0"
+ ## VersionCompatible32="393222000"
+ ## CMG="F1F301E705E705E705E705"
+ ## DPB="8F8D7FE3831F2020202020"
+ ## GC="2D2FDD81E51EE61EE6E1"
+ ##
+ ## [Host Extender Info]
+ ## &H00000001={3832D640-CF90-11CF-8E43-00A0C911005A};VBE;&H00000000
+ ## &H00000002={000209F2-0000-0000-C000-000000000046};Word8.0;&H00000000
+ ##
+ ## [Workspace]
+ ## ThisDocument=22, 29, 339, 477, Z
+ ## NewMacros=-4, 42, 832, 510, C
+
+ code_modules = {}
+
+ for line in project:
+ line = line.strip().decode('utf-8','ignore')
+ if '=' in line:
+ # split line at the 1st equal sign:
+ name, value = line.split('=', 1)
+ # looking for code modules
+ # add the code module as a key in the dictionary
+ # the value will be the extension needed later
+ # The value is converted to lowercase, to allow case-insensitive matching (issue #3)
+ value = value.lower()
+ if name == 'Document':
+ # split value at the 1st slash, keep 1st part:
+ value = value.split('/', 1)[0]
+ code_modules[value] = CLASS_EXTENSION
+ elif name == 'Module':
+ code_modules[value] = MODULE_EXTENSION
+ elif name == 'Class':
+ code_modules[value] = CLASS_EXTENSION
+ elif name == 'BaseClass':
+ code_modules[value] = FORM_EXTENSION
+
+ # read data from dir stream (compressed)
+ dir_compressed = ole.openstream(dir_path).read()
+
+ def check_value(name, expected, value):
+ if expected != value:
+ if relaxed:
+ log.error("invalid value for {0} expected {1:04X} got {2:04X}"
+ .format(name, expected, value))
+ else:
+ raise UnexpectedDataError(dir_path, name, expected, value)
+
+ dir_stream = BytesIO(decompress_stream(dir_compressed))
+
+ # PROJECTSYSKIND Record
+ projectsyskind_id = struct.unpack(" 128:
+ log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname))
+ projectname_projectname = dir_stream.read(projectname_sizeof_projectname)
+ unused = projectname_projectname
+
+ # PROJECTDOCSTRING Record
+ projectdocstring_id = struct.unpack(" 2000:
+ log.error(
+ "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring))
+ projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring)
+ projectdocstring_reserved = struct.unpack(" 260:
+ log.error(
+ "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1))
+ projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1)
+ projecthelpfilepath_reserved = struct.unpack(" 1015:
+ log.error(
+ "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants))
+ projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants)
+ projectconstants_reserved = struct.unpack(" 0:
+ code_data = decompress_stream(code_data)
+ # case-insensitive search in the code_modules dict to find the file extension:
+ filext = code_modules.get(modulename_modulename.lower(), 'bin')
+ filename = '{0}.{1}'.format(modulename_modulename, filext)
+ #TODO: also yield the codepage so that callers can decode it properly
+ yield (code_path, filename, code_data)
+ # print '-'*79
+ # print filename
+ # print ''
+ # print code_data
+ # print ''
+ log.debug('extracted file {0}'.format(filename))
+ else:
+ log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname))
+ except (UnexpectedDataError, SubstreamOpenError):
+ raise
+ except Exception as exc:
+ log.info('Error parsing module {0} of {1} in _extract_vba:'
+ .format(projectmodule_index, projectmodules_count),
+ exc_info=True)
+ if not relaxed:
+ raise
+ _ = unused # make pylint happy: now variable "unused" is being used ;-)
+ return
+
+
+def vba_collapse_long_lines(vba_code):
+ """
+ Parse a VBA module code to detect continuation line characters (underscore) and
+ collapse split lines. Continuation line characters are replaced by spaces.
+
+ :param vba_code: str, VBA module code
+ :return: str, VBA module code with long lines collapsed
+ """
+ # TODO: use a regex instead, to allow whitespaces after the underscore?
+ vba_code = vba_code.replace(' _\r\n', ' ')
+ vba_code = vba_code.replace(' _\r', ' ')
+ vba_code = vba_code.replace(' _\n', ' ')
+ return vba_code
+
+
+def filter_vba(vba_code):
+ """
+ Filter VBA source code to remove the first lines starting with "Attribute VB_",
+ which are automatically added by MS Office and not displayed in the VBA Editor.
+ This should only be used when displaying source code for human analysis.
+
+ Note: lines are not filtered if they contain a colon, because it could be
+ used to hide malicious instructions.
+
+ :param vba_code: str, VBA source code
+ :return: str, filtered VBA source code
+ """
+ vba_lines = vba_code.splitlines()
+ start = 0
+ for line in vba_lines:
+ if line.startswith("Attribute VB_") and not ':' in line:
+ start += 1
+ else:
+ break
+ #TODO: also remove empty lines?
+ vba = '\n'.join(vba_lines[start:])
+ return vba
+
+
+def detect_autoexec(vba_code, obfuscation=None):
+ """
+ Detect if the VBA code contains keywords corresponding to macros running
+ automatically when triggered by specific actions (e.g. when a document is
+ opened or closed).
+
+ :param vba_code: str, VBA source code
+ :param obfuscation: None or str, name of obfuscation to be added to description
+ :return: list of str tuples (keyword, description)
+ """
+ #TODO: merge code with detect_suspicious
+ # case-insensitive search
+ #vba_code = vba_code.lower()
+ results = []
+ obf_text = ''
+ if obfuscation:
+ obf_text = ' (obfuscation: %s)' % obfuscation
+ for description, keywords in AUTOEXEC_KEYWORDS.items():
+ for keyword in keywords:
+ #TODO: if keyword is already a compiled regex, use it as-is
+ # search using regex to detect word boundaries:
+ if re.search(r'(?i)\b' + keyword + r'\b', vba_code):
+ #if keyword.lower() in vba_code:
+ results.append((keyword, description + obf_text))
+ return results
+
+
+def detect_suspicious(vba_code, obfuscation=None):
+ """
+ Detect if the VBA code contains suspicious keywords corresponding to
+ potential malware behaviour.
+
+ :param vba_code: str, VBA source code
+ :param obfuscation: None or str, name of obfuscation to be added to description
+ :return: list of str tuples (keyword, description)
+ """
+ # case-insensitive search
+ #vba_code = vba_code.lower()
+ results = []
+ obf_text = ''
+ if obfuscation:
+ obf_text = ' (obfuscation: %s)' % obfuscation
+ for description, keywords in SUSPICIOUS_KEYWORDS.items():
+ for keyword in keywords:
+ # search using regex to detect word boundaries:
+ if re.search(r'(?i)\b' + keyword + r'\b', vba_code):
+ #if keyword.lower() in vba_code:
+ results.append((keyword, description + obf_text))
+ return results
+
+
+def detect_patterns(vba_code, obfuscation=None):
+ """
+ Detect if the VBA code contains specific patterns such as IP addresses,
+ URLs, e-mail addresses, executable file names, etc.
+
+ :param vba_code: str, VBA source code
+ :return: list of str tuples (pattern type, value)
+ """
+ results = []
+ found = set()
+ obf_text = ''
+ if obfuscation:
+ obf_text = ' (obfuscation: %s)' % obfuscation
+ for pattern_type, pattern_re in RE_PATTERNS:
+ for match in pattern_re.finditer(vba_code):
+ value = match.group()
+ if value not in found:
+ results.append((pattern_type + obf_text, value))
+ found.add(value)
+ return results
+
+
+def detect_hex_strings(vba_code):
+ """
+ Detect if the VBA code contains strings encoded in hexadecimal.
+
+ :param vba_code: str, VBA source code
+ :return: list of str tuples (encoded string, decoded string)
+ """
+ results = []
+ found = set()
+ for match in re_hex_string.finditer(vba_code):
+ value = match.group()
+ if value not in found:
+ decoded = binascii.unhexlify(value)
+ results.append((value, decoded.decode('utf-8','replace')))
+ found.add(value)
+ return results
+
+
+def detect_base64_strings(vba_code):
+ """
+ Detect if the VBA code contains strings encoded in base64.
+
+ :param vba_code: str, VBA source code
+ :return: list of str tuples (encoded string, decoded string)
+ """
+ #TODO: avoid matching simple hex strings as base64?
+ results = []
+ found = set()
+ for match in re_base64_string.finditer(vba_code):
+ # extract the base64 string without quotes:
+ value = match.group().strip('"')
+ # check it is not just a hex string:
+ if not re_nothex_check.search(value):
+ continue
+ # only keep new values and not in the whitelist:
+ if value not in found and value.lower() not in BASE64_WHITELIST:
+ try:
+ decoded = base64.b64decode(value)
+ results.append((value, decoded.decode('utf-8','replace')))
+ found.add(value)
+ except (TypeError, ValueError) as exc:
+ log.debug('Failed to base64-decode (%s)' % exc)
+ # if an exception occurs, it is likely not a base64-encoded string
+ return results
+
+
+def detect_dridex_strings(vba_code):
+ """
+ Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples.
+
+ :param vba_code: str, VBA source code
+ :return: list of str tuples (encoded string, decoded string)
+ """
+ from oletools.thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode
+
+ results = []
+ found = set()
+ for match in re_dridex_string.finditer(vba_code):
+ value = match.group()[1:-1]
+ # check it is not just a hex string:
+ if not re_nothex_check.search(value):
+ continue
+ if value not in found:
+ try:
+ decoded = DridexUrlDecode(value)
+ results.append((value, decoded))
+ found.add(value)
+ except Exception as exc:
+ log.debug('Failed to Dridex-decode (%s)' % exc)
+ # if an exception occurs, it is likely not a dridex-encoded string
+ return results
+
+
+def detect_vba_strings(vba_code):
+ """
+ Detect if the VBA code contains strings obfuscated with VBA expressions
+ using keywords such as Chr, Asc, Val, StrReverse, etc.
+
+ :param vba_code: str, VBA source code
+ :return: list of str tuples (encoded string, decoded string)
+ """
+ # TODO: handle exceptions
+ results = []
+ found = set()
+ # IMPORTANT: to extract the actual VBA expressions found in the code,
+ # we must expand tabs to have the same string as pyparsing.
+ # Otherwise, start and end offsets are incorrect.
+ vba_code = vba_code.expandtabs()
+ for tokens, start, end in vba_expr_str.scanString(vba_code):
+ encoded = vba_code[start:end]
+ decoded = tokens[0]
+ if isinstance(decoded, VbaExpressionString):
+ # This is a VBA expression, not a simple string
+ # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded)
+ # remove parentheses and quotes from original string:
+ # if encoded.startswith('(') and encoded.endswith(')'):
+ # encoded = encoded[1:-1]
+ # if encoded.startswith('"') and encoded.endswith('"'):
+ # encoded = encoded[1:-1]
+ # avoid duplicates and simple strings:
+ if encoded not in found and decoded != encoded:
+ results.append((encoded, decoded))
+ found.add(encoded)
+ # else:
+ # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded)
+ return results
+
+
+def json2ascii(json_obj, encoding='utf8', errors='replace'):
+ """ ensure there is no unicode in json and all strings are safe to decode
+
+ works recursively, decodes and re-encodes every string to/from unicode
+ to ensure there will be no trouble in loading the dumped json output
+ """
+ if json_obj is None:
+ pass
+ elif isinstance(json_obj, (bool, int, float)):
+ pass
+ elif isinstance(json_obj, str):
+ # de-code and re-encode
+ dencoded = json_obj
+ if dencoded != json_obj:
+ log.debug('json2ascii: replaced: {0} (len {1})'
+ .format(json_obj, len(json_obj)))
+ log.debug('json2ascii: with: {0} (len {1})'
+ .format(dencoded, len(dencoded)))
+ return dencoded
+ elif isinstance(json_obj, bytes):
+ log.debug('json2ascii: encode unicode: {0}'
+ .format(json_obj.decode(encoding, errors)))
+ # cannot put original into logger
+ # print 'original: ' json_obj
+ return json_obj.decode(encoding, errors)
+ elif isinstance(json_obj, dict):
+ for key in json_obj:
+ json_obj[key] = json2ascii(json_obj[key])
+ elif isinstance(json_obj, (list,tuple)):
+ for item in json_obj:
+ item = json2ascii(item)
+ else:
+ log.debug('unexpected type in json2ascii: {0} -- leave as is'
+ .format(type(json_obj)))
+ return json_obj
+
+
+_have_printed_json_start = False
+
+def print_json(json_dict=None, _json_is_last=False, **json_parts):
+ """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1
+
+ can use in two ways:
+ (1) print_json(some_dict)
+ (2) print_json(key1=value1, key2=value2, ...)
+
+ :param bool _json_is_last: set to True only for very last entry to complete
+ the top-level json-list
+ """
+ global _have_printed_json_start
+
+ if json_dict and json_parts:
+ raise ValueError('Invalid json argument: want either single dict or '
+ 'key=value parts but got both)')
+ elif (json_dict is not None) and (not isinstance(json_dict, dict)):
+ raise ValueError('Invalid json argument: want either single dict or '
+ 'key=value parts but got {0} instead of dict)'
+ .format(type(json_dict)))
+ if json_parts:
+ json_dict = json_parts
+
+ if not _have_printed_json_start:
+ print('[')
+ _have_printed_json_start = True
+
+ lines = json.dumps(json2ascii(json_dict), check_circular=False,
+ indent=4, ensure_ascii=False).splitlines()
+ for line in lines[:-1]:
+ print(' {0}'.format(line))
+ if _json_is_last:
+ print(' {0}'.format(lines[-1])) # print last line without comma
+ print(']')
+ else:
+ print(' {0},'.format(lines[-1])) # print last line with comma
+
+
+class VBA_Scanner(object):
+ """
+ Class to scan the source code of a VBA module to find obfuscated strings,
+ suspicious keywords, IOCs, auto-executable macros, etc.
+ """
+
+ def __init__(self, vba_code):
+ """
+ VBA_Scanner constructor
+
+ :param vba_code: str, VBA source code to be analyzed
+ """
+ # join long lines ending with " _":
+ self.code = vba_collapse_long_lines(vba_code)
+ self.code_hex = ''
+ self.code_hex_rev = ''
+ self.code_rev_hex = ''
+ self.code_base64 = ''
+ self.code_dridex = ''
+ self.code_vba = ''
+ self.strReverse = None
+ # results = None before scanning, then a list of tuples after scanning
+ self.results = None
+ self.autoexec_keywords = None
+ self.suspicious_keywords = None
+ self.iocs = None
+ self.hex_strings = None
+ self.base64_strings = None
+ self.dridex_strings = None
+ self.vba_strings = None
+
+
+ def scan(self, include_decoded_strings=False, deobfuscate=False):
+ """
+ Analyze the provided VBA code to detect suspicious keywords,
+ auto-executable macros, IOC patterns, obfuscation patterns
+ such as hex-encoded strings.
+
+ :param include_decoded_strings: bool, if True, all encoded strings will be included with their decoded content.
+ :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+ :return: list of tuples (type, keyword, description)
+ (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
+ """
+ # First, detect and extract hex-encoded strings:
+ self.hex_strings = detect_hex_strings(self.code)
+ # detect if the code contains StrReverse:
+ self.strReverse = False
+ if 'strreverse' in self.code.lower(): self.strReverse = True
+ # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
+ for encoded, decoded in self.hex_strings:
+ self.code_hex += '\n' + decoded
+ # if the code contains "StrReverse", also append the hex strings in reverse order:
+ if self.strReverse:
+ # StrReverse after hex decoding:
+ self.code_hex_rev += '\n' + decoded[::-1]
+ # StrReverse before hex decoding:
+ self.code_rev_hex += '\n' + str(binascii.unhexlify(encoded[::-1]))
+ #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
+ #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
+ # Detect Base64-encoded strings
+ self.base64_strings = detect_base64_strings(self.code)
+ for encoded, decoded in self.base64_strings:
+ self.code_base64 += '\n' + decoded
+ # Detect Dridex-encoded strings
+ self.dridex_strings = detect_dridex_strings(self.code)
+ for encoded, decoded in self.dridex_strings:
+ self.code_dridex += '\n' + decoded
+ # Detect obfuscated strings in VBA expressions
+ if deobfuscate:
+ self.vba_strings = detect_vba_strings(self.code)
+ else:
+ self.vba_strings = []
+ for encoded, decoded in self.vba_strings:
+ self.code_vba += '\n' + decoded
+ results = []
+ self.autoexec_keywords = []
+ self.suspicious_keywords = []
+ self.iocs = []
+
+ for code, obfuscation in (
+ (self.code, None),
+ (self.code_hex, 'Hex'),
+ (self.code_hex_rev, 'Hex+StrReverse'),
+ (self.code_rev_hex, 'StrReverse+Hex'),
+ (self.code_base64, 'Base64'),
+ (self.code_dridex, 'Dridex'),
+ (self.code_vba, 'VBA expression'),
+ ):
+ if isinstance(code,bytes):
+ code=code.decode('utf-8','replace')
+ self.autoexec_keywords += detect_autoexec(code, obfuscation)
+ self.suspicious_keywords += detect_suspicious(code, obfuscation)
+ self.iocs += detect_patterns(code, obfuscation)
+
+ # If hex-encoded strings were discovered, add an item to suspicious keywords:
+ if self.hex_strings:
+ self.suspicious_keywords.append(('Hex Strings',
+ 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
+ if self.base64_strings:
+ self.suspicious_keywords.append(('Base64 Strings',
+ 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
+ if self.dridex_strings:
+ self.suspicious_keywords.append(('Dridex Strings',
+ 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
+ if self.vba_strings:
+ self.suspicious_keywords.append(('VBA obfuscated Strings',
+ 'VBA string expressions were detected, may be used to obfuscate strings (option --decode to see all)'))
+ # use a set to avoid duplicate keywords
+ keyword_set = set()
+ for keyword, description in self.autoexec_keywords:
+ if keyword not in keyword_set:
+ results.append(('AutoExec', keyword, description))
+ keyword_set.add(keyword)
+ keyword_set = set()
+ for keyword, description in self.suspicious_keywords:
+ if keyword not in keyword_set:
+ results.append(('Suspicious', keyword, description))
+ keyword_set.add(keyword)
+ keyword_set = set()
+ for pattern_type, value in self.iocs:
+ if value not in keyword_set:
+ results.append(('IOC', value, pattern_type))
+ keyword_set.add(value)
+
+ # include decoded strings only if they are printable or if --decode option:
+ for encoded, decoded in self.hex_strings:
+ if include_decoded_strings or is_printable(decoded):
+ results.append(('Hex String', decoded, encoded))
+ for encoded, decoded in self.base64_strings:
+ if include_decoded_strings or is_printable(decoded):
+ results.append(('Base64 String', decoded, encoded))
+ for encoded, decoded in self.dridex_strings:
+ if include_decoded_strings or is_printable(decoded):
+ results.append(('Dridex string', decoded, encoded))
+ for encoded, decoded in self.vba_strings:
+ if include_decoded_strings or is_printable(decoded):
+ results.append(('VBA string', decoded, encoded))
+ self.results = results
+ return results
+
+ def scan_summary(self):
+ """
+ Analyze the provided VBA code to detect suspicious keywords,
+ auto-executable macros, IOC patterns, obfuscation patterns
+ such as hex-encoded strings.
+
+ :return: tuple with the number of items found for each category:
+ (autoexec, suspicious, IOCs, hex, base64, dridex, vba)
+ """
+ # avoid scanning the same code twice:
+ if self.results is None:
+ self.scan()
+ return (len(self.autoexec_keywords), len(self.suspicious_keywords),
+ len(self.iocs), len(self.hex_strings), len(self.base64_strings),
+ len(self.dridex_strings), len(self.vba_strings))
+
+
+def scan_vba(vba_code, include_decoded_strings, deobfuscate=False):
+ """
+ Analyze the provided VBA code to detect suspicious keywords,
+ auto-executable macros, IOC patterns, obfuscation patterns
+ such as hex-encoded strings.
+ (shortcut for VBA_Scanner(vba_code).scan())
+
+ :param vba_code: str, VBA source code to be analyzed
+ :param include_decoded_strings: bool, if True all encoded strings will be included with their decoded content.
+ :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+ :return: list of tuples (type, keyword, description)
+ (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
+ """
+ return VBA_Scanner(vba_code).scan(include_decoded_strings, deobfuscate)
+
+
+#=== CLASSES =================================================================
+
+class VBA_Parser(object):
+ """
+ Class to parse MS Office files, to detect VBA macros and extract VBA source code
+ Supported file formats:
+ - Word 97-2003 (.doc, .dot)
+ - Word 2007+ (.docm, .dotm)
+ - Word 2003 XML (.xml)
+ - Word MHT - Single File Web Page / MHTML (.mht)
+ - Excel 97-2003 (.xls)
+ - Excel 2007+ (.xlsm, .xlsb)
+ - PowerPoint 97-2003 (.ppt)
+ - PowerPoint 2007+ (.pptm, .ppsm)
+ """
+
+ def __init__(self, filename, data=None, container=None, relaxed=False):
+ """
+ Constructor for VBA_Parser
+
+ :param filename: filename or path of file to parse, or file-like object
+
+ :param data: None or bytes str, if None the file will be read from disk (or from the file-like object).
+ If data is provided as a bytes string, it will be parsed as the content of the file in memory,
+ and not read from disk. Note: files must be read in binary mode, i.e. open(f, 'rb').
+
+ :param container: str, path and filename of container if the file is within
+ a zip archive, None otherwise.
+
+ :param relaxed: if True, treat mal-formed documents and missing streams more like MS office:
+ do nothing; if False (default), raise errors in these cases
+
+ raises a FileOpenError if all attemps to interpret the data header failed
+ """
+ #TODO: filename should only be a string, data should be used for the file-like object
+ #TODO: filename should be mandatory, optional data is a string or file-like object
+ #TODO: also support olefile and zipfile as input
+ if data is None:
+ # open file from disk:
+ _file = filename
+ else:
+ # file already read in memory, make it a file-like object for zipfile:
+ _file = BytesIO(data)
+ #self.file = _file
+ self.ole_file = None
+ self.ole_subfiles = []
+ self.filename = filename
+ self.container = container
+ self.relaxed = relaxed
+ self.type = None
+ self.vba_projects = None
+ self.vba_forms = None
+ self.contains_macros = None # will be set to True or False by detect_macros
+ self.vba_code_all_modules = None # to store the source code of all modules
+ # list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code)
+ self.modules = None
+ # Analysis results: list of tuples (type, keyword, description) - See VBA_Scanner
+ self.analysis_results = None
+ # statistics for the scan summary and flags
+ self.nb_macros = 0
+ self.nb_autoexec = 0
+ self.nb_suspicious = 0
+ self.nb_iocs = 0
+ self.nb_hexstrings = 0
+ self.nb_base64strings = 0
+ self.nb_dridexstrings = 0
+ self.nb_vbastrings = 0
+
+ # if filename is None:
+ # if isinstance(_file, basestring):
+ # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE:
+ # self.filename = _file
+ # else:
+ # self.filename = ''
+ # else:
+ # self.filename = ''
+ if olefile.isOleFile(_file):
+ # This looks like an OLE file
+ self.open_ole(_file)
+
+ # if this worked, try whether it is a ppt file (special ole file)
+ self.open_ppt()
+ if self.type is None and zipfile.is_zipfile(_file):
+ # Zip file, which may be an OpenXML document
+ self.open_openxml(_file)
+ if self.type is None:
+ # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML,
+ # or a plain text file containing VBA code
+ if data is None:
+ data = open(filename, 'rb').read()
+ # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
+ if b'http://schemas.microsoft.com/office/word/2003/wordml' in data:
+ self.open_word2003xml(data)
+ # store a lowercase version for the next tests:
+ data_lowercase = data.lower()
+ # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"):
+ # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line
+ # BUT Word accepts a blank line or other MIME headers inserted before,
+ # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored.
+ # And the line is case insensitive.
+ # so we'll just check the presence of mime, version and multipart anywhere:
+ if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \
+ and b'multipart' in data_lowercase:
+ self.open_mht(data)
+ #TODO: handle exceptions
+ #TODO: Excel 2003 XML
+ # Check if this is a plain text VBA or VBScript file:
+ # To avoid scanning binary files, we simply check for some control chars:
+ if self.type is None and b'\x00' not in data:
+ self.open_text(data)
+ if self.type is None:
+ # At this stage, could not match a known format:
+ msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename
+ log.info(msg)
+ raise FileOpenError(msg)
+
+ def open_ole(self, _file):
+ """
+ Open an OLE file
+ :param _file: filename or file contents in a file object
+ :return: nothing
+ """
+ log.info('Opening OLE file %s' % self.filename)
+ try:
+ # Open and parse the OLE file, using unicode for path names:
+ self.ole_file = olefile.OleFileIO(_file, path_encoding=None)
+ # set type only if parsing succeeds
+ self.type = TYPE_OLE
+ except (IOError, TypeError, ValueError) as exc:
+ # TODO: handle OLE parsing exceptions
+ log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc))
+ log.debug('Trace:', exc_info=True)
+
+
+ def open_openxml(self, _file):
+ """
+ Open an OpenXML file
+ :param _file: filename or file contents in a file object
+ :return: nothing
+ """
+ # This looks like a zip file, need to look for vbaProject.bin inside
+ # It can be any OLE file inside the archive
+ #...because vbaProject.bin can be renamed:
+ # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18
+ log.info('Opening ZIP/OpenXML file %s' % self.filename)
+ try:
+ z = zipfile.ZipFile(_file)
+ #TODO: check if this is actually an OpenXML file
+ #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically
+ # check each file within the zip if it is an OLE file, by reading its magic:
+ for subfile in z.namelist():
+ magic = z.open(subfile).read(len(olefile.MAGIC))
+ if magic == olefile.MAGIC:
+ log.debug('Opening OLE file %s within zip' % subfile)
+ ole_data = z.open(subfile).read()
+ try:
+ self.ole_subfiles.append(
+ VBA_Parser(filename=subfile, data=ole_data,
+ relaxed=self.relaxed))
+ except OlevbaBaseException as exc:
+ if self.relaxed:
+ log.info('%s is not a valid OLE file (%s)' % (subfile, exc))
+ log.debug('Trace:', exc_info=True)
+ continue
+ else:
+ raise SubstreamOpenError(self.filename, subfile,
+ exc)
+ z.close()
+ # set type only if parsing succeeds
+ self.type = TYPE_OpenXML
+ except OlevbaBaseException as exc:
+ if self.relaxed:
+ log.info('Error {0} caught in Zip/OpenXML parsing for file {1}'
+ .format(exc, self.filename))
+ log.debug('Trace:', exc_info=True)
+ else:
+ raise
+ except (RuntimeError, zipfile.BadZipfile, zipfile.LargeZipFile, IOError) as exc:
+ # TODO: handle parsing exceptions
+ log.info('Failed Zip/OpenXML parsing for file %r (%s)'
+ % (self.filename, exc))
+ log.debug('Trace:', exc_info=True)
+
+ def open_word2003xml(self, data):
+ """
+ Open a Word 2003 XML file
+ :param data: file contents in a string or bytes
+ :return: nothing
+ """
+ log.info('Opening Word 2003 XML file %s' % self.filename)
+ try:
+ # parse the XML content
+ # TODO: handle XML parsing exceptions
+ et = ET.fromstring(data)
+ # find all the binData elements:
+ for bindata in et.getiterator(TAG_BINDATA):
+ # the binData content is an OLE container for the VBA project, compressed
+ # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
+ # get the filename:
+ fname = bindata.get(ATTR_NAME, 'noname.mso')
+ # decode the base64 activemime
+ mso_data = binascii.a2b_base64(bindata.text)
+ if is_mso_file(mso_data):
+ # decompress the zlib data stored in the MSO file, which is the OLE container:
+ # TODO: handle different offsets => separate function
+ try:
+ ole_data = mso_file_extract(mso_data)
+ self.ole_subfiles.append(
+ VBA_Parser(filename=fname, data=ole_data,
+ relaxed=self.relaxed))
+ except OlevbaBaseException as exc:
+ if self.relaxed:
+ log.info('Error parsing subfile {0}: {1}'
+ .format(fname, exc))
+ log.debug('Trace:', exc_info=True)
+ else:
+ raise SubstreamOpenError(self.filename, fname, exc)
+ else:
+ log.info('%s is not a valid MSO file' % fname)
+ # set type only if parsing succeeds
+ self.type = TYPE_Word2003_XML
+ except OlevbaBaseException as exc:
+ if self.relaxed:
+ log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
+ log.debug('Trace:', exc_info=True)
+ else:
+ raise
+ except Exception as exc:
+ # TODO: differentiate exceptions for each parsing stage
+ # (but ET is different libs, no good exception description in API)
+ # found: XMLSyntaxError
+ log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
+ log.debug('Trace:', exc_info=True)
+
+ def open_mht(self, data):
+ """
+ Open a MHTML file
+ :param data: file contents in a string or bytes
+ :return: nothing
+ """
+ log.info('Opening MHTML file %s' % self.filename)
+ try:
+ if isinstance(data,bytes):
+ data = data.decode('utf8', 'replace')
+ # parse the MIME content
+ # remove any leading whitespace or newline (workaround for issue in email package)
+ stripped_data = data.lstrip('\r\n\t ')
+ # strip any junk from the beginning of the file
+ # (issue #31 fix by Greg C - gdigreg)
+ # TODO: improve keywords to avoid false positives
+ mime_offset = stripped_data.find('MIME')
+ content_offset = stripped_data.find('Content')
+ # if "MIME" is found, and located before "Content":
+ if -1 < mime_offset <= content_offset:
+ stripped_data = stripped_data[mime_offset:]
+ # else if "Content" is found, and before "MIME"
+ # TODO: can it work without "MIME" at all?
+ elif content_offset > -1:
+ stripped_data = stripped_data[content_offset:]
+ # TODO: quick and dirty fix: insert a standard line with MIME-Version header?
+ mhtml = email.message_from_string(stripped_data)
+ # find all the attached files:
+ for part in mhtml.walk():
+ content_type = part.get_content_type() # always returns a value
+ fname = part.get_filename(None) # returns None if it fails
+ # TODO: get content-location if no filename
+ log.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type))
+ part_data = part.get_payload(decode=True)
+ # VBA macros are stored in a binary file named "editdata.mso".
+ # the data content is an OLE container for the VBA project, compressed
+ # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
+ # decompress the zlib data starting at offset 0x32, which is the OLE container:
+ # check ActiveMime header:
+
+ if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data):
+ log.debug('Found ActiveMime header, decompressing MSO container')
+ try:
+ ole_data = mso_file_extract(part_data)
+
+ # TODO: check if it is actually an OLE file
+ # TODO: get the MSO filename from content_location?
+ self.ole_subfiles.append(
+ VBA_Parser(filename=fname, data=ole_data,
+ relaxed=self.relaxed))
+ except OlevbaBaseException as exc:
+ if self.relaxed:
+ log.info('%s does not contain a valid OLE file (%s)'
+ % (fname, exc))
+ log.debug('Trace:', exc_info=True)
+ # TODO: bug here - need to split in smaller functions/classes?
+ else:
+ raise SubstreamOpenError(self.filename, fname, exc)
+ else:
+ log.debug('type(part_data) = %s' % type(part_data))
+ try:
+ log.debug('part_data[0:20] = %r' % part_data[0:20])
+ except TypeError as err:
+ log.debug('part_data has no __getitem__')
+ # set type only if parsing succeeds
+ self.type = TYPE_MHTML
+ except OlevbaBaseException:
+ raise
+ except Exception:
+ log.info('Failed MIME parsing for file %r - %s'
+ % (self.filename, MSG_OLEVBA_ISSUES))
+ log.debug('Trace:', exc_info=True)
+
+ def open_ppt(self):
+ """ try to interpret self.ole_file as PowerPoint 97-2003 using PptParser
+
+ Although self.ole_file is a valid olefile.OleFileIO, we set
+ self.ole_file = None in here and instead set self.ole_subfiles to the
+ VBA ole streams found within the main ole file. That makes most of the
+ code below treat this like an OpenXML file and only look at the
+ ole_subfiles (except find_vba_* which needs to explicitly check for
+ self.type)
+ """
+
+ log.info('Check whether OLE file is PPT')
+ ppt_parser.enable_logging()
+ try:
+ ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
+ for vba_data in ppt.iter_vba_data():
+ self.ole_subfiles.append(VBA_Parser(None, vba_data,
+ container='PptParser'))
+ log.info('File is PPT')
+ self.ole_file.close() # just in case
+ self.ole_file = None # required to make other methods look at ole_subfiles
+ self.type = TYPE_PPT
+ except Exception as exc:
+ if self.container == 'PptParser':
+ # this is a subfile of a ppt --> to be expected that is no ppt
+ log.debug('PPT subfile is not a PPT file')
+ else:
+ log.debug("File appears not to be a ppt file (%s)" % exc)
+
+
+ def open_text(self, data):
+ """
+ Open a text file containing VBA or VBScript source code
+ :param data: file contents in a string or bytes
+ :return: nothing
+ """
+ log.info('Opening text file %s' % self.filename)
+ # directly store the source code:
+ if isinstance(data,bytes):
+ data=data.decode('utf8','replace')
+ self.vba_code_all_modules = data
+ self.contains_macros = True
+ # set type only if parsing succeeds
+ self.type = TYPE_TEXT
+
+
+ def find_vba_projects(self):
+ """
+ Finds all the VBA projects stored in an OLE file.
+
+ Return None if the file is not OLE but OpenXML.
+ Return a list of tuples (vba_root, project_path, dir_path) for each VBA project.
+ vba_root is the path of the root OLE storage containing the VBA project,
+ including a trailing slash unless it is the root of the OLE file.
+ project_path is the path of the OLE stream named "PROJECT" within the VBA project.
+ dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
+
+ If this function returns an empty list for one of the supported formats
+ (i.e. Word, Excel, Powerpoint), then the file does not contain VBA macros.
+
+ :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
+ for each VBA project found if OLE file
+ """
+ log.debug('VBA_Parser.find_vba_projects')
+
+ # if the file is not OLE but OpenXML, return None:
+ if self.ole_file is None and self.type != TYPE_PPT:
+ return None
+
+ # if this method has already been called, return previous result:
+ if self.vba_projects is not None:
+ return self.vba_projects
+
+ # if this is a ppt file (PowerPoint 97-2003):
+ # self.ole_file is None but the ole_subfiles do contain vba_projects
+ # (like for OpenXML files).
+ if self.type == TYPE_PPT:
+ # TODO: so far, this function is never called for PPT files, but
+ # if that happens, the information is lost which ole file contains
+ # which storage!
+ log.warning('Returned info is not complete for PPT types!')
+ self.vba_projects = []
+ for subfile in self.ole_subfiles:
+ self.vba_projects.extend(subfile.find_vba_projects())
+ return self.vba_projects
+
+ # Find the VBA project root (different in MS Word, Excel, etc):
+ # - Word 97-2003: Macros
+ # - Excel 97-2003: _VBA_PROJECT_CUR
+ # - PowerPoint 97-2003: PptParser has identified ole_subfiles
+ # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin.
+ # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word
+ # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word
+ # - Visio 2007: not supported yet (different file structure)
+
+ # According to MS-OVBA section 2.2.1:
+ # - the VBA project root storage MUST contain a VBA storage and a PROJECT stream
+ # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream
+ # - all names are case-insensitive
+
+ def check_vba_stream(ole, vba_root, stream_path):
+ full_path = vba_root + stream_path
+ if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM:
+ log.debug('Found %s stream: %s' % (stream_path, full_path))
+ return full_path
+ else:
+ log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path)
+ return False
+
+ # start with an empty list:
+ self.vba_projects = []
+ # Look for any storage containing those storage/streams:
+ ole = self.ole_file
+ for storage in ole.listdir(streams=False, storages=True):
+ log.debug('Checking storage %r' % storage)
+ # Look for a storage ending with "VBA":
+ if storage[-1].upper() == 'VBA':
+ log.debug('Found VBA storage: %s' % ('/'.join(storage)))
+ vba_root = '/'.join(storage[:-1])
+ # Add a trailing slash to vba_root, unless it is the root of the OLE file:
+ # (used later to append all the child streams/storages)
+ if vba_root != '':
+ vba_root += '/'
+ log.debug('Checking vba_root="%s"' % vba_root)
+
+ # Check if the VBA root storage also contains a PROJECT stream:
+ project_path = check_vba_stream(ole, vba_root, 'PROJECT')
+ if not project_path: continue
+ # Check if the VBA root storage also contains a VBA/_VBA_PROJECT stream:
+ vba_project_path = check_vba_stream(ole, vba_root, 'VBA/_VBA_PROJECT')
+ if not vba_project_path: continue
+ # Check if the VBA root storage also contains a VBA/dir stream:
+ dir_path = check_vba_stream(ole, vba_root, 'VBA/dir')
+ if not dir_path: continue
+ # Now we are pretty sure it is a VBA project structure
+ log.debug('VBA root storage: "%s"' % vba_root)
+ # append the results to the list as a tuple for later use:
+ self.vba_projects.append((vba_root, project_path, dir_path))
+ return self.vba_projects
+
+ def detect_vba_macros(self):
+ """
+ Detect the potential presence of VBA macros in the file, by checking
+ if it contains VBA projects. Both OLE and OpenXML files are supported.
+
+ Important: for now, results are accurate only for Word, Excel and PowerPoint
+
+ Note: this method does NOT attempt to check the actual presence or validity
+ of VBA macro source code, so there might be false positives.
+ It may also detect VBA macros in files embedded within the main file,
+ for example an Excel workbook with macros embedded into a Word
+ document without macros may be detected, without distinction.
+
+ :return: bool, True if at least one VBA project has been found, False otherwise
+ """
+ #TODO: return None or raise exception if format not supported
+ #TODO: return the number of VBA projects found instead of True/False?
+ # if this method was already called, return the previous result:
+ if self.contains_macros is not None:
+ return self.contains_macros
+ # if OpenXML/PPT, check all the OLE subfiles:
+ if self.ole_file is None:
+ for ole_subfile in self.ole_subfiles:
+ if ole_subfile.detect_vba_macros():
+ self.contains_macros = True
+ return True
+ # otherwise, no macro found:
+ self.contains_macros = False
+ return False
+ # otherwise it's an OLE file, find VBA projects:
+ vba_projects = self.find_vba_projects()
+ if len(vba_projects) == 0:
+ self.contains_macros = False
+ else:
+ self.contains_macros = True
+ # Also look for VBA code in any stream including orphans
+ # (happens in some malformed files)
+ ole = self.ole_file
+ for sid in range(len(ole.direntries)):
+ # check if id is already done above:
+ log.debug('Checking DirEntry #%d' % sid)
+ d = ole.direntries[sid]
+ if d is None:
+ # this direntry is not part of the tree: either unused or an orphan
+ d = ole._load_direntry(sid)
+ log.debug('This DirEntry is an orphan or unused')
+ if d.entry_type == olefile.STGTY_STREAM:
+ # read data
+ log.debug('Reading data from stream %r - size: %d bytes' % (d.name, d.size))
+ try:
+ data = ole._open(d.isectStart, d.size).read()
+ log.debug('Read %d bytes' % len(data))
+ if len(data) > 200:
+ log.debug('%r...[much more data]...%r' % (data[:100], data[-50:]))
+ else:
+ log.debug(repr(data))
+ if 'Attribut' in data.decode('utf-8','ignore'):
+ log.debug('Found VBA compressed code')
+ self.contains_macros = True
+ except IOError as exc:
+ if self.relaxed:
+ log.info('Error when reading OLE Stream %r' % d.name)
+ log.debug('Trace:', exc_trace=True)
+ else:
+ raise SubstreamOpenError(self.filename, d.name, exc)
+ return self.contains_macros
+
+ def extract_macros(self):
+ """
+ Extract and decompress source code for each VBA macro found in the file
+
+ Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
+ If the file is OLE, filename is the path of the file.
+ If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
+ within the zip archive, e.g. word/vbaProject.bin.
+ If the file is PPT, result is as for OpenXML but filename is useless
+ """
+ log.debug('extract_macros:')
+ if self.ole_file is None:
+ # This may be either an OpenXML/PPT or a text file:
+ if self.type == TYPE_TEXT:
+ # This is a text file, yield the full code:
+ yield (self.filename, '', self.filename, self.vba_code_all_modules)
+ else:
+ # OpenXML/PPT: recursively yield results from each OLE subfile:
+ for ole_subfile in self.ole_subfiles:
+ for results in ole_subfile.extract_macros():
+ yield results
+ else:
+ # This is an OLE file:
+ self.find_vba_projects()
+ # set of stream ids
+ vba_stream_ids = set()
+ for vba_root, project_path, dir_path in self.vba_projects:
+ # extract all VBA macros from that VBA root storage:
+ for stream_path, vba_filename, vba_code in \
+ _extract_vba(self.ole_file, vba_root, project_path,
+ dir_path, self.relaxed):
+ # store direntry ids in a set:
+ vba_stream_ids.add(self.ole_file._find(stream_path))
+ yield (self.filename, stream_path, vba_filename, vba_code)
+ # Also look for VBA code in any stream including orphans
+ # (happens in some malformed files)
+ ole = self.ole_file
+ for sid in range(len(ole.direntries)):
+ # check if id is already done above:
+ log.debug('Checking DirEntry #%d' % sid)
+ if sid in vba_stream_ids:
+ log.debug('Already extracted')
+ continue
+ d = ole.direntries[sid]
+ if d is None:
+ # this direntry is not part of the tree: either unused or an orphan
+ d = ole._load_direntry(sid)
+ log.debug('This DirEntry is an orphan or unused')
+ if d.entry_type == olefile.STGTY_STREAM:
+ # read data
+ log.debug('Reading data from stream %r' % d.name)
+ data = ole._open(d.isectStart, d.size).read()
+ for match in re.finditer(b'\\x00Attribut[^e]', data, flags=re.IGNORECASE):
+ start = match.start() - 3
+ log.debug('Found VBA compressed code at index %X' % start)
+ compressed_code = data[start:]
+ try:
+ vba_code = decompress_stream(compressed_code)
+ yield (self.filename, d.name, d.name, vba_code)
+ except Exception as exc:
+ # display the exception with full stack trace for debugging
+ log.debug('Error processing stream %r in file %r (%s)' % (d.name, self.filename, exc))
+ log.debug('Traceback:', exc_info=True)
+ # do not raise the error, as it is unlikely to be a compressed macro stream
+
+ def extract_all_macros(self):
+ """
+ Extract and decompress source code for each VBA macro found in the file
+ by calling extract_macros(), store the results as a list of tuples
+ (filename, stream_path, vba_filename, vba_code) in self.modules.
+ See extract_macros for details.
+ """
+ if self.modules is None:
+ self.modules = []
+ for (subfilename, stream_path, vba_filename, vba_code) in self.extract_macros():
+ self.modules.append((subfilename, stream_path, vba_filename, vba_code))
+ self.nb_macros = len(self.modules)
+ return self.modules
+
+
+
+ def analyze_macros(self, show_decoded_strings=False, deobfuscate=False):
+ """
+ runs extract_macros and analyze the source code of all VBA macros
+ found in the file.
+ """
+ if self.detect_vba_macros():
+ # if the analysis was already done, avoid doing it twice:
+ if self.analysis_results is not None:
+ return self.analysis_results
+ # variable to merge source code from all modules:
+ if self.vba_code_all_modules is None:
+ self.vba_code_all_modules = ''
+ for (_, _, _, vba_code) in self.extract_all_macros():
+ #TODO: filter code? (each module)
+ self.vba_code_all_modules += vba_code.decode('utf-8', 'ignore') + '\n'
+ for (_, _, form_string) in self.extract_form_strings():
+ self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n'
+ # Analyze the whole code at once:
+ scanner = VBA_Scanner(self.vba_code_all_modules)
+ self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
+ autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
+ self.nb_autoexec += autoexec
+ self.nb_suspicious += suspicious
+ self.nb_iocs += iocs
+ self.nb_hexstrings += hexstrings
+ self.nb_base64strings += base64strings
+ self.nb_dridexstrings += dridex
+ self.nb_vbastrings += vbastrings
+
+ return self.analysis_results
+
+
+ def reveal(self):
+ # we only want printable strings:
+ analysis = self.analyze_macros(show_decoded_strings=False)
+ # to avoid replacing short strings contained into longer strings, we sort the analysis results
+ # based on the length of the encoded string, in reverse order:
+ analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True)
+ # normally now self.vba_code_all_modules contains source code from all modules
+ deobf_code = self.vba_code_all_modules
+ for kw_type, decoded, encoded in analysis:
+ if kw_type == 'VBA string':
+ #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded)
+ # need to add double quotes around the decoded strings
+ # after escaping double-quotes as double-double-quotes for VBA:
+ decoded = decoded.replace('"', '""')
+ deobf_code = deobf_code.replace(encoded, '"%s"' % decoded)
+ return deobf_code
+ #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees
+
+
+ def find_vba_forms(self):
+ """
+ Finds all the VBA forms stored in an OLE file.
+
+ Return None if the file is not OLE but OpenXML.
+ Return a list of tuples (vba_root, project_path, dir_path) for each VBA project.
+ vba_root is the path of the root OLE storage containing the VBA project,
+ including a trailing slash unless it is the root of the OLE file.
+ project_path is the path of the OLE stream named "PROJECT" within the VBA project.
+ dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
+
+ If this function returns an empty list for one of the supported formats
+ (i.e. Word, Excel, Powerpoint), then the file does not contain VBA forms.
+
+ :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
+ for each VBA project found if OLE file
+ """
+ log.debug('VBA_Parser.find_vba_forms')
+
+ # if the file is not OLE but OpenXML, return None:
+ if self.ole_file is None and self.type != TYPE_PPT:
+ return None
+
+ # if this method has already been called, return previous result:
+ # if self.vba_projects is not None:
+ # return self.vba_projects
+
+ # According to MS-OFORMS section 2.1.2 Control Streams:
+ # - A parent control, that is, a control that can contain embedded controls,
+ # MUST be persisted as a storage that contains multiple streams.
+ # - All parent controls MUST contain a FormControl. The FormControl
+ # properties are persisted to a stream (1) as specified in section 2.1.1.2.
+ # The name of this stream (1) MUST be "f".
+ # - Embedded controls that cannot themselves contain other embedded
+ # controls are persisted sequentially as FormEmbeddedActiveXControls
+ # to a stream (1) contained in the same storage as the parent control.
+ # The name of this stream (1) MUST be "o".
+ # - all names are case-insensitive
+
+ if self.type == TYPE_PPT:
+ # TODO: so far, this function is never called for PPT files, but
+ # if that happens, the information is lost which ole file contains
+ # which storage!
+ ole_files = self.ole_subfiles
+ log.warning('Returned info is not complete for PPT types!')
+ else:
+ ole_files = [self.ole_file, ]
+
+ # start with an empty list:
+ self.vba_forms = []
+
+ # Loop over ole streams
+ for ole in ole_files:
+ # Look for any storage containing those storage/streams:
+ for storage in ole.listdir(streams=False, storages=True):
+ log.debug('Checking storage %r' % storage)
+ # Look for two streams named 'o' and 'f':
+ o_stream = storage + ['o']
+ f_stream = storage + ['f']
+ log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream))
+ if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \
+ and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM:
+ form_path = '/'.join(storage)
+ log.debug('Found VBA Form: %r' % form_path)
+ self.vba_forms.append(storage)
+ return self.vba_forms
+
+ def extract_form_strings(self):
+ """
+ Extract printable strings from each VBA Form found in the file
+
+ Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
+ If the file is OLE, filename is the path of the file.
+ If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
+ within the zip archive, e.g. word/vbaProject.bin.
+ If the file is PPT, result is as for OpenXML but filename is useless
+ """
+ if self.ole_file is None:
+ # This may be either an OpenXML/PPT or a text file:
+ if self.type == TYPE_TEXT:
+ # This is a text file, return no results:
+ return
+ else:
+ # OpenXML/PPT: recursively yield results from each OLE subfile:
+ for ole_subfile in self.ole_subfiles:
+ for results in ole_subfile.extract_form_strings():
+ yield results
+ else:
+ # This is an OLE file:
+ self.find_vba_forms()
+ ole = self.ole_file
+ for form_storage in self.vba_forms:
+ o_stream = form_storage + ['o']
+ log.debug('Opening form object stream %r' % '/'.join(o_stream))
+ form_data = ole.openstream(o_stream).read()
+ # Extract printable strings from the form object stream "o":
+ for m in re_printable_string.finditer(form_data):
+ log.debug('Printable string found in form: %r' % m.group())
+ yield (self.filename, '/'.join(o_stream), m.group())
+
+
+ def close(self):
+ """
+ Close all the open files. This method must be called after usage, if
+ the application is opening many files.
+ """
+ if self.ole_file is None:
+ if self.ole_subfiles is not None:
+ for ole_subfile in self.ole_subfiles:
+ ole_subfile.close()
+ else:
+ self.ole_file.close()
+
+
+
+class VBA_Parser_CLI(VBA_Parser):
+ """
+ VBA parser and analyzer, adding methods for the command line interface
+ of olevba. (see VBA_Parser)
+ """
+
+ def __init__(self, *args, **kwargs):
+ """
+ Constructor for VBA_Parser_CLI.
+ Calls __init__ from VBA_Parser with all arguments --> see doc there
+ """
+ super(VBA_Parser_CLI, self).__init__(*args, **kwargs)
+
+
+ def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
+ """
+ Analyze the provided VBA code, and print the results in a table
+
+ :param vba_code: str, VBA source code to be analyzed
+ :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
+ :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+ :return: None
+ """
+ # print a waiting message only if the output is not redirected to a file:
+ if sys.stdout.isatty():
+ print('Analysis...\r')
+ sys.stdout.flush()
+ results = self.analyze_macros(show_decoded_strings, deobfuscate)
+ if results:
+ t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
+ t.align = 'l'
+ t.max_width['Type'] = 10
+ t.max_width['Keyword'] = 20
+ t.max_width['Description'] = 39
+ for kw_type, keyword, description in results:
+ # handle non printable strings:
+ if not is_printable(keyword):
+ keyword = repr(keyword)
+ if not is_printable(description):
+ description = repr(description)
+ t.add_row((kw_type, keyword, description))
+ print(t)
+ else:
+ print('No suspicious keyword or IOC found.')
+
+ def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False):
+ """
+ Analyze the provided VBA code, and return the results in json format
+
+ :param vba_code: str, VBA source code to be analyzed
+ :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
+ :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+
+ :return: dict
+ """
+ # print a waiting message only if the output is not redirected to a file:
+ if sys.stdout.isatty():
+ print('Analysis...\r')
+ sys.stdout.flush()
+ return [dict(type=kw_type, keyword=keyword, description=description)
+ for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)]
+
+ def process_file(self, show_decoded_strings=False,
+ display_code=True, hide_attributes=True,
+ vba_code_only=False, show_deobfuscated_code=False,
+ deobfuscate=False):
+ """
+ Process a single file
+
+ :param filename: str, path and filename of file on disk, or within the container.
+ :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
+ :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
+ :param display_code: bool, if False VBA source code is not displayed (default True)
+ :param global_analysis: bool, if True all modules are merged for a single analysis (default),
+ otherwise each module is analyzed separately (old behaviour)
+ :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
+ :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+ """
+ #TODO: replace print by writing to a provided output file (sys.stdout by default)
+ # fix conflicting parameters:
+ if vba_code_only and not display_code:
+ display_code = True
+ if self.container:
+ display_filename = '%s in %s' % (self.filename, self.container)
+ else:
+ display_filename = self.filename
+ print('=' * 79)
+ print('FILE:', display_filename)
+ try:
+ #TODO: handle olefile errors, when an OLE file is malformed
+ print('Type: %s' % self.type)
+ if self.detect_vba_macros():
+ #print 'Contains VBA Macros:'
+ for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
+ if hide_attributes:
+ # hide attribute lines:
+ if isinstance(vba_code,bytes):
+ vba_code =vba_code.decode('utf-8','replace')
+ vba_code_filtered = filter_vba(vba_code)
+ else:
+ vba_code_filtered = vba_code
+ print('-' * 79)
+ print('VBA MACRO %s ' % vba_filename)
+ print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)))
+ if display_code:
+ print('- ' * 39)
+ # detect empty macros:
+ if vba_code_filtered.strip() == '':
+ print('(empty macro)')
+ else:
+ print(vba_code_filtered)
+ for (subfilename, stream_path, form_string) in self.extract_form_strings():
+ print('-' * 79)
+ print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
+ print('- ' * 39)
+ print(form_string.decode('utf-8', 'ignore'))
+ if not vba_code_only:
+ # analyse the code from all modules at once:
+ self.print_analysis(show_decoded_strings, deobfuscate)
+ if show_deobfuscated_code:
+ print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n')
+ print(self.reveal())
+ else:
+ print('No VBA macros found.')
+ except OlevbaBaseException:
+ raise
+ except Exception as exc:
+ # display the exception with full stack trace for debugging
+ log.info('Error processing file %s (%s)' % (self.filename, exc))
+ log.debug('Traceback:', exc_info=True)
+ raise ProcessingError(self.filename, exc)
+ print('')
+
+
+ def process_file_json(self, show_decoded_strings=False,
+ display_code=True, hide_attributes=True,
+ vba_code_only=False, show_deobfuscated_code=False,
+ deobfuscate=False):
+ """
+ Process a single file
+
+ every "show" or "print" here is to be translated as "add to json"
+
+ :param filename: str, path and filename of file on disk, or within the container.
+ :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
+ :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
+ :param display_code: bool, if False VBA source code is not displayed (default True)
+ :param global_analysis: bool, if True all modules are merged for a single analysis (default),
+ otherwise each module is analyzed separately (old behaviour)
+ :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
+ :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+ """
+ #TODO: fix conflicting parameters (?)
+
+ if vba_code_only and not display_code:
+ display_code = True
+
+ result = {}
+
+ if self.container:
+ result['container'] = self.container
+ else:
+ result['container'] = None
+ result['file'] = self.filename
+ result['json_conversion_successful'] = False
+ result['analysis'] = None
+ result['code_deobfuscated'] = None
+ result['do_deobfuscate'] = deobfuscate
+
+ try:
+ #TODO: handle olefile errors, when an OLE file is malformed
+ result['type'] = self.type
+ macros = []
+ if self.detect_vba_macros():
+ for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
+ curr_macro = {}
+ if hide_attributes:
+ # hide attribute lines:
+ vba_code_filtered = filter_vba(vba_code.decode('utf-8','replace'))
+ else:
+ vba_code_filtered = vba_code
+
+ curr_macro['vba_filename'] = vba_filename
+ curr_macro['subfilename'] = subfilename
+ curr_macro['ole_stream'] = stream_path
+ if display_code:
+ curr_macro['code'] = vba_code_filtered.strip()
+ else:
+ curr_macro['code'] = None
+ macros.append(curr_macro)
+ if not vba_code_only:
+ # analyse the code from all modules at once:
+ result['analysis'] = self.print_analysis_json(show_decoded_strings,
+ deobfuscate)
+ if show_deobfuscated_code:
+ result['code_deobfuscated'] = self.reveal()
+ result['macros'] = macros
+ result['json_conversion_successful'] = True
+ except Exception as exc:
+ # display the exception with full stack trace for debugging
+ log.info('Error processing file %s (%s)' % (self.filename, exc))
+ log.debug('Traceback:', exc_info=True)
+ raise ProcessingError(self.filename, exc)
+
+ return result
+
+
+ def process_file_triage(self, show_decoded_strings=False, deobfuscate=False):
+ """
+ Process a file in triage mode, showing only summary results on one line.
+ """
+ #TODO: replace print by writing to a provided output file (sys.stdout by default)
+ try:
+ #TODO: handle olefile errors, when an OLE file is malformed
+ if self.detect_vba_macros():
+ # print a waiting message only if the output is not redirected to a file:
+ if sys.stdout.isatty():
+ print('Analysis...\r')
+ sys.stdout.flush()
+ self.analyze_macros(show_decoded_strings=show_decoded_strings,
+ deobfuscate=deobfuscate)
+ flags = TYPE2TAG[self.type]
+ macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
+ if self.contains_macros: macros = 'M'
+ if self.nb_autoexec: autoexec = 'A'
+ if self.nb_suspicious: suspicious = 'S'
+ if self.nb_iocs: iocs = 'I'
+ if self.nb_hexstrings: hexstrings = 'H'
+ if self.nb_base64strings: base64obf = 'B'
+ if self.nb_dridexstrings: dridex = 'D'
+ if self.nb_vbastrings: vba_obf = 'V'
+ flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
+ base64obf, dridex, vba_obf)
+
+ line = '%-12s %s' % (flags, self.filename)
+ print(line)
+
+ # old table display:
+ # macros = autoexec = suspicious = iocs = hexstrings = 'no'
+ # if nb_macros: macros = 'YES:%d' % nb_macros
+ # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
+ # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
+ # if nb_iocs: iocs = 'YES:%d' % nb_iocs
+ # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
+ # # 2nd line = info
+ # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)
+ except Exception as exc:
+ # display the exception with full stack trace for debugging only
+ log.debug('Error processing file %s (%s)' % (self.filename, exc),
+ exc_info=True)
+ raise ProcessingError(self.filename, exc)
+
+
+ # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
+ # header=False, border=False)
+ # t.align = 'l'
+ # t.max_width['filename'] = 30
+ # t.max_width['type'] = 10
+ # t.max_width['macros'] = 6
+ # t.max_width['autoexec'] = 6
+ # t.max_width['suspicious'] = 6
+ # t.max_width['ioc'] = 6
+ # t.max_width['hexstrings'] = 6
+ # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
+ # print t
+
+
+#=== MAIN =====================================================================
+
+def main():
+ """
+ Main function, called when olevba is run from the command line
+ """
+ DEFAULT_LOG_LEVEL = "warning" # Default log level
+ LOG_LEVELS = {
+ 'debug': logging.DEBUG,
+ 'info': logging.INFO,
+ 'warning': logging.WARNING,
+ 'error': logging.ERROR,
+ 'critical': logging.CRITICAL
+ }
+
+ usage = 'usage: %prog [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ # parser.add_option('-o', '--outfile', dest='outfile',
+ # help='output file')
+ # parser.add_option('-c', '--csv', dest='csv',
+ # help='export results to a CSV file')
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+ # output mode; could make this even simpler with add_option(type='choice') but that would make
+ # cmd line interface incompatible...
+ modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)')
+ modes.add_option("-t", '--triage', action="store_const", dest="output_mode",
+ const='triage', default='unspecified',
+ help='triage mode, display results as a summary table (default for multiple files)')
+ modes.add_option("-d", '--detailed', action="store_const", dest="output_mode",
+ const='detailed', default='unspecified',
+ help='detailed mode, display full results (default for single file)')
+ modes.add_option("-j", '--json', action="store_const", dest="output_mode",
+ const='json', default='unspecified',
+ help='json mode, detailed in json format (never default)')
+ parser.add_option_group(modes)
+ parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True,
+ help='display only analysis results, not the macro source code')
+ parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False,
+ help='display only VBA source code, do not analyze it')
+ parser.add_option("--decode", action="store_true", dest="show_decoded_strings",
+ help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex, VBA).')
+ parser.add_option("--attr", action="store_false", dest="hide_attributes", default=True,
+ help='display the attribute lines at the beginning of VBA source code')
+ parser.add_option("--reveal", action="store_true", dest="show_deobfuscated_code",
+ help='display the macro source code after replacing all the obfuscated strings by their decoded content.')
+ parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ help="logging level debug/info/warning/error/critical (default=%default)")
+ parser.add_option('--deobf', dest="deobfuscate", action="store_true", default=False,
+ help="Attempt to deobfuscate VBA expressions (slow)")
+ parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False,
+ help="Do not raise errors if opening of substream fails")
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print(__doc__)
+ parser.print_help()
+ sys.exit(RETURN_WRONG_ARGS)
+
+ # provide info about tool and its version
+ if options.output_mode == 'json':
+ # prints opening [
+ print_json(script_name='olevba', version=__version__,
+ url='http://decalage.info/python/oletools',
+ type='MetaInformation')
+ else:
+ print('olevba %s - http://decalage.info/python/oletools' % __version__)
+
+ logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
+ # enable logging in the modules:
+ log.setLevel(logging.NOTSET)
+
+ # Old display with number of items detected:
+ # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
+ # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
+
+ # with the option --reveal, make sure --deobf is also enabled:
+ if options.show_deobfuscated_code and not options.deobfuscate:
+ log.info('set --deobf because --reveal was set')
+ options.deobfuscate = True
+ if options.output_mode == 'triage' and options.show_deobfuscated_code:
+ log.info('ignoring option --reveal in triage output mode')
+
+ # Column headers (do not know how many files there will be yet, so if no output_mode
+ # was specified, we will print triage for first file --> need these headers)
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %-65s' % ('Flags', 'Filename'))
+ print('%-12s %-65s' % ('-' * 11, '-' * 65))
+
+ previous_container = None
+ count = 0
+ container = filename = data = None
+ vba_parser = None
+ return_code = RETURN_OK
+ try:
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+
+ # handle errors from xglob
+ if isinstance(data, Exception):
+ if isinstance(data, PathNotFoundException):
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - File not found' % ('?', filename))
+ elif options.output_mode != 'json':
+ log.error('Given path %r does not exist!' % filename)
+ return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
+ else RETURN_SEVERAL_ERRS
+ else:
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container))
+ elif options.output_mode != 'json':
+ log.error('Exception opening/reading %r from zip file %r: %s'
+ % (filename, container, data))
+ return_code = RETURN_XGLOB_ERR if return_code == 0 \
+ else RETURN_SEVERAL_ERRS
+ if options.output_mode == 'json':
+ print_json(file=filename, type='error',
+ error=type(data).__name__, message=str(data))
+ continue
+
+ try:
+ # Open the file
+ vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
+ relaxed=options.relaxed)
+
+ if options.output_mode == 'detailed':
+ # fully detailed output
+ vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
+ display_code=options.display_code,
+ hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
+ show_deobfuscated_code=options.show_deobfuscated_code,
+ deobfuscate=options.deobfuscate)
+ elif options.output_mode in ('triage', 'unspecified'):
+ # print container name when it changes:
+ if container != previous_container:
+ if container is not None:
+ print('\nFiles in %s:' % container)
+ previous_container = container
+ # summarized output for triage:
+ vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
+ deobfuscate=options.deobfuscate)
+ elif options.output_mode == 'json':
+ print_json(
+ vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
+ display_code=options.display_code,
+ hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
+ show_deobfuscated_code=options.show_deobfuscated_code,
+ deobfuscate=options.deobfuscate))
+ else: # (should be impossible)
+ raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
+ count += 1
+
+ except (SubstreamOpenError, UnexpectedDataError) as exc:
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - Error opening substream or uenxpected ' \
+ 'content' % ('?', filename))
+ elif options.output_mode == 'json':
+ print_json(file=filename, type='error',
+ error=type(exc).__name__, message=str(exc))
+ else:
+ log.exception('Error opening substream or unexpected '
+ 'content in %s' % filename)
+ return_code = RETURN_OPEN_ERROR if return_code == 0 \
+ else RETURN_SEVERAL_ERRS
+ except FileOpenError as exc:
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - File format not supported' % ('?', filename))
+ elif options.output_mode == 'json':
+ print_json(file=filename, type='error',
+ error=type(exc).__name__, message=str(exc))
+ else:
+ log.exception('Failed to open %s -- probably not supported!' % filename)
+ return_code = RETURN_OPEN_ERROR if return_code == 0 \
+ else RETURN_SEVERAL_ERRS
+ except ProcessingError as exc:
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
+ elif options.output_mode == 'json':
+ print_json(file=filename, type='error',
+ error=type(exc).__name__,
+ message=str(exc.orig_exc))
+ else:
+ log.exception('Error processing file %s (%s)!'
+ % (filename, exc.orig_exc))
+ return_code = RETURN_PARSE_ERROR if return_code == 0 \
+ else RETURN_SEVERAL_ERRS
+ finally:
+ if vba_parser is not None:
+ vba_parser.close()
+
+ if options.output_mode == 'triage':
+ print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
+ 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
+ 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n')
+
+ if count == 1 and options.output_mode == 'unspecified':
+ # if options -t, -d and -j were not specified and it's a single file, print details:
+ vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
+ display_code=options.display_code,
+ hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
+ show_deobfuscated_code=options.show_deobfuscated_code,
+ deobfuscate=options.deobfuscate)
+
+ if options.output_mode == 'json':
+ # print last json entry (a last one without a comma) and closing ]
+ print_json(type='MetaInformation', return_code=return_code,
+ n_processed=count, _json_is_last=True)
+
+ except Exception as exc:
+ # some unexpected error, maybe some of the types caught in except clauses
+ # above were not sufficient. This is very bad, so log complete trace at exception level
+ # and do not care about output mode
+ log.exception('Unhandled exception in main: %s' % exc, exc_info=True)
+ return_code = RETURN_UNEXPECTED # even if there were others before -- this is more important
+ # TODO: print msg with URL to report issues (except in JSON mode)
+
+ # done. exit
+ log.debug('will exit now with code %s' % return_code)
+ sys.exit(return_code)
+
+if __name__ == '__main__':
+ main()
+
+# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
diff -Nru remnux-oletools-0.51a/remnux-oletools/olevba.py remnux-oletools-0.51a/remnux-oletools/olevba.py
--- remnux-oletools-0.51a/remnux-oletools/olevba.py 1970-01-01 00:00:00.000000000 +0000
+++ remnux-oletools-0.51a/remnux-oletools/olevba.py 2016-11-04 21:28:21.000000000 +0000
@@ -0,0 +1,3417 @@
+#!/usr/bin/env python
+"""
+olevba.py
+
+olevba is a script to parse OLE and OpenXML files such as MS Office documents
+(e.g. Word, Excel), to extract VBA Macro code in clear text, deobfuscate
+and analyze malicious macros.
+
+Supported formats:
+- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
+- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
+- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
+- Word 2003 XML (.xml)
+- Word/Excel Single File Web Page / MHTML (.mht)
+- Publisher (.pub)
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+olevba is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+
+olevba is based on source code from officeparser by John William Davison
+https://github.com/unixfreak0037/officeparser
+"""
+
+# === LICENSE ==================================================================
+
+# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+# olevba contains modified source code from the officeparser project, published
+# under the following MIT License (MIT):
+#
+# officeparser is copyright (c) 2014 John William Davison
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2014-08-05 v0.01 PL: - first version based on officeparser code
+# 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser
+# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record
+# 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats
+# and to find the VBA project root anywhere in the file
+# 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL
+# 2014-12-05 v0.05 PL: - refactored most functions into a class, new API
+# - added detect_vba_macros
+# 2014-12-10 v0.06 PL: - hide first lines with VB attributes
+# - detect auto-executable macros
+# - ignore empty macros
+# 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive
+# 2014-12-15 v0.08 PL: - improved display for empty macros
+# - added pattern extraction
+# 2014-12-25 v0.09 PL: - added suspicious keywords detection
+# 2014-12-27 v0.10 PL: - added OptionParser, main and process_file
+# - uses xglob to scan several files with wildcards
+# - option -r to recurse subdirectories
+# - option -z to scan files in password-protected zips
+# 2015-01-02 v0.11 PL: - improved filter_vba to detect colons
+# 2015-01-03 v0.12 PL: - fixed detect_patterns to detect all patterns
+# - process_file: improved display, shows container file
+# - improved list of executable file extensions
+# 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display
+# 2015-01-08 v0.14 PL: - added hex strings detection and decoding
+# - fixed issue #2, decoding VBA stream names using
+# specified codepage and unicode stream names
+# 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d
+# 2015-01-16 v0.16 PL: - fix for issue #3 (exception when module name="text")
+# - added several suspicious keywords
+# - added option -i to analyze VBA source code directly
+# 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions
+# - added scan_vba to run all detection algorithms
+# - decoded hex strings are now also scanned + reversed
+# 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules
+# 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex
+# strings and StrReverse
+# 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded
+# 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding
+# - improved display, shows obfuscation name
+# 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename
+# - added Base64 obfuscation decoding (contribution from
+# @JamesHabben)
+# 2015-02-03 v0.23 PL: - triage now uses VBA_Scanner results, shows Base64 and
+# Dridex strings
+# - exception handling in detect_base64_strings
+# 2015-02-07 v0.24 PL: - renamed option --hex to --decode, fixed display
+# - display exceptions with stack trace
+# - added several suspicious keywords
+# - improved Base64 detection and decoding
+# - fixed triage mode not to scan attrib lines
+# 2015-03-04 v0.25 PL: - added support for Word 2003 XML
+# 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and
+# virtualisation detection
+# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros
+# (issue #10 reported by Greg from SpamStopsHere)
+# 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header
+# (issue #11 reported by Thomas Chopitea)
+# 2015-05-26 v0.29 PL: - improved MSO files parsing, taking into account
+# various data offsets (issue #12)
+# - improved detection of MSO files, avoiding incorrect
+# parsing errors (issue #7)
+# 2015-05-29 v0.30 PL: - added suspicious keywords suggested by @ozhermit,
+# Davy Douhine (issue #9), issue #13
+# 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc)
+# 2015-06-19 PL: - added options -a, -c, --each, --attr
+# 2015-06-21 v0.32 PL: - always display decoded strings which are printable
+# - fix VBA_Scanner.scan to return raw strings, not repr()
+# 2015-07-09 v0.40 PL: - removed usage of sys.stderr which causes issues
+# 2015-07-12 PL: - added Hex function decoding to VBA Parser
+# 2015-07-13 PL: - added Base64 function decoding to VBA Parser
+# 2015-09-06 PL: - improved VBA_Parser, refactored the main functions
+# 2015-09-13 PL: - moved main functions to a class VBA_Parser_CLI
+# - fixed issue when analysis was done twice
+# 2015-09-15 PL: - remove duplicate IOCs from results
+# 2015-09-16 PL: - join long VBA lines ending with underscore before scan
+# - disabled unused option --each
+# 2015-09-22 v0.41 PL: - added new option --reveal
+# - added suspicious strings for PowerShell.exe options
+# 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method
+# 2015-10-10 PL: - added support for text files with VBA source code
+# 2015-11-17 PL: - fixed bug with --decode option
+# 2015-12-16 PL: - fixed bug in main (no options input anymore)
+# - improved logging, added -l option
+# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht
+# - fixed issue #32 by monkeypatching email.feedparser
+# 2016-02-07 PL: - KeyboardInterrupt is now raised properly
+# 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr
+# 2016-02-29 PL: - added Workbook_Activate to suspicious keywords
+# 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
+# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck)
+# 2016-03-16 CH: - added option --no-deobfuscate (temporary)
+# 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
+# - updated suspicious keywords
+# 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans
+# 2016-04-28 CH: - return an exit code depending on the results
+# - improved error and exception handling
+# - improved JSON output
+# 2016-05-12 CH: - added support for PowerPoint 97-2003 files
+# 2016-06-06 CH: - improved handling of unicode VBA module names
+# 2016-06-07 CH: - added option --relaxed, stricter parsing by default
+# 2016-06-12 v0.50 PL: - fixed small bugs in VBA parsing code
+# 2016-07-01 PL: - fixed issue #58 with format() to support Python 2.6
+# 2016-07-29 CH: - fixed several bugs including #73 (Mac Roman encoding)
+# 2016-08-31 PL: - added autoexec keyword InkPicture_Painted
+# - detect_autoexec now returns the exact keyword found
+# 2016-09-05 PL: - added autoexec keywords for MS Publisher (.pub)
+# 2016-09-06 PL: - fixed issue #20, is_zipfile on Python 2.6
+# 2016-09-12 PL: - enabled packrat to improve pyparsing performance
+# 2016-10-25 PL: - fixed raise and print statements for Python 3
+# 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW
+
+__version__ = '0.51a'
+
+#------------------------------------------------------------------------------
+# TODO:
+# + setup logging (common with other oletools)
+# + add xor bruteforcing like bbharvest
+# + options -a and -c should imply -d
+
+# TODO later:
+# + performance improvement: instead of searching each keyword separately,
+# first split vba code into a list of words (per line), then check each
+# word against a dict. (or put vba words into a set/dict?)
+# + for regex, maybe combine them into a single re with named groups?
+# + add Yara support, include sample rules? plugins like balbuzard?
+# + add balbuzard support
+# + output to file (replace print by file.write, sys.stdout by default)
+# + look for VBA in embedded documents (e.g. Excel in Word)
+# + support SRP streams (see Lenny's article + links and sample)
+# - python 3.x support
+# - check VBA macros in Visio, Access, Project, etc
+# - extract_macros: convert to a class, split long function into smaller methods
+# - extract_macros: read bytes from stream file objects instead of strings
+# - extract_macros: use combined struct.unpack instead of many calls
+# - all except clauses should target specific exceptions
+
+#------------------------------------------------------------------------------
+# REFERENCES:
+# - [MS-OVBA]: Microsoft Office VBA File Format Structure
+# http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx
+# - officeparser: https://github.com/unixfreak0037/officeparser
+
+
+#--- IMPORTS ------------------------------------------------------------------
+
+import sys, logging
+import struct
+import cStringIO
+import math
+import zipfile
+import re
+import optparse
+import binascii
+import base64
+import zlib
+import email # for MHTML parsing
+import string # for printable
+import json # for json output mode (argument --json)
+
+# import lxml or ElementTree for XML parsing:
+try:
+ # lxml: best performance for XML processing
+ import lxml.etree as ET
+except ImportError:
+ try:
+ # Python 2.5+: batteries included
+ import xml.etree.cElementTree as ET
+ except ImportError:
+ try:
+ # Python <2.5: standalone ElementTree install
+ import elementtree.cElementTree as ET
+ except ImportError:
+ raise ImportError("lxml or ElementTree are not installed, " \
+ + "see http://codespeak.net/lxml " \
+ + "or http://effbot.org/zone/element-index.htm")
+
+import thirdparty.olefile as olefile
+from thirdparty.prettytable import prettytable
+from thirdparty.xglob import xglob, PathNotFoundException
+from thirdparty.pyparsing.pyparsing import \
+ CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
+ Optional, QuotedString,Regex, Suppress, Word, WordStart, \
+ alphanums, alphas, hexnums,nums, opAssoc, srange, \
+ infixNotation, ParserElement
+import ppt_parser
+
+# monkeypatch email to fix issue #32:
+# allow header lines without ":"
+import email.feedparser
+email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])')
+
+if sys.version_info[0] <= 2:
+ # Python 2.x
+ if sys.version_info[1] <= 6:
+ # Python 2.6
+ # use is_zipfile backported from Python 2.7:
+ from thirdparty.zipfile27 import is_zipfile
+ else:
+ # Python 2.7
+ from zipfile import is_zipfile
+else:
+ # Python 3.x+
+ from zipfile import is_zipfile
+
+# === LOGGING =================================================================
+
+class NullHandler(logging.Handler):
+ """
+ Log Handler without output, to avoid printing messages if logging is not
+ configured by the main application.
+ Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
+ see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
+ """
+ def emit(self, record):
+ pass
+
+def get_logger(name, level=logging.CRITICAL+1):
+ """
+ Create a suitable logger object for this module.
+ The goal is not to change settings of the root logger, to avoid getting
+ other modules' logs on the screen.
+ If a logger exists with same name, reuse it. (Else it would have duplicate
+ handlers and messages would be doubled.)
+ The level is set to CRITICAL+1 by default, to avoid any logging.
+ """
+ # First, test if there is already a logger with the same name, else it
+ # will generate duplicate messages (due to duplicate handlers):
+ if name in logging.Logger.manager.loggerDict:
+ #NOTE: another less intrusive but more "hackish" solution would be to
+ # use getLogger then test if its effective level is not default.
+ logger = logging.getLogger(name)
+ # make sure level is OK:
+ logger.setLevel(level)
+ return logger
+ # get a new logger:
+ logger = logging.getLogger(name)
+ # only add a NullHandler for this logger, it is up to the application
+ # to configure its own logging:
+ logger.addHandler(NullHandler())
+ logger.setLevel(level)
+ return logger
+
+# a global logger object used for debugging:
+log = get_logger('olevba')
+
+
+#=== EXCEPTIONS ==============================================================
+
+class OlevbaBaseException(Exception):
+ """ Base class for exceptions produced here for simpler except clauses """
+ def __init__(self, msg, filename=None, orig_exc=None, **kwargs):
+ if orig_exc:
+ super(OlevbaBaseException, self).__init__(msg +
+ ' ({0})'.format(orig_exc),
+ **kwargs)
+ else:
+ super(OlevbaBaseException, self).__init__(msg, **kwargs)
+ self.msg = msg
+ self.filename = filename
+ self.orig_exc = orig_exc
+
+
+class FileOpenError(OlevbaBaseException):
+ """ raised by VBA_Parser constructor if all open_... attempts failed
+
+ probably means the file type is not supported
+ """
+
+ def __init__(self, filename, orig_exc=None):
+ super(FileOpenError, self).__init__(
+ 'Failed to open file %s' % filename, filename, orig_exc)
+
+
+class ProcessingError(OlevbaBaseException):
+ """ raised by VBA_Parser.process_file* functions """
+
+ def __init__(self, filename, orig_exc):
+ super(ProcessingError, self).__init__(
+ 'Error processing file %s' % filename, filename, orig_exc)
+
+
+class MsoExtractionError(RuntimeError, OlevbaBaseException):
+ """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """
+
+ def __init__(self, msg):
+ MsoExtractionError.__init__(self, msg)
+ OlevbaBaseException.__init__(self, msg)
+
+
+class SubstreamOpenError(FileOpenError):
+ """ special kind of FileOpenError: file is a substream of original file """
+
+ def __init__(self, filename, subfilename, orig_exc=None):
+ super(SubstreamOpenError, self).__init__(
+ str(filename) + '/' + str(subfilename), orig_exc)
+ self.filename = filename # overwrite setting in OlevbaBaseException
+ self.subfilename = subfilename
+
+
+class UnexpectedDataError(OlevbaBaseException):
+ """ raised when parsing is strict (=not relaxed) and data is unexpected """
+
+ def __init__(self, stream_path, variable, expected, value):
+ super(UnexpectedDataError, self).__init__(
+ 'Unexpected value in {0} for variable {1}: '
+ 'expected {2:04X} but found {3:04X}!'
+ .format(stream_path, variable, expected, value))
+ self.stream_path = stream_path
+ self.variable = variable
+ self.expected = expected
+ self.value = value
+
+#--- CONSTANTS ----------------------------------------------------------------
+
+# return codes
+RETURN_OK = 0
+RETURN_WARNINGS = 1 # (reserved, not used yet)
+RETURN_WRONG_ARGS = 2 # (fixed, built into optparse)
+RETURN_FILE_NOT_FOUND = 3
+RETURN_XGLOB_ERR = 4
+RETURN_OPEN_ERROR = 5
+RETURN_PARSE_ERROR = 6
+RETURN_SEVERAL_ERRS = 7
+RETURN_UNEXPECTED = 8
+
+# MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
+MAC_CODEPAGES = {
+ 10000: 'mac-roman',
+ 10001: 'shiftjis', # not found: 'mac-shift-jis',
+ 10003: 'ascii', # nothing appropriate found: 'mac-hangul',
+ 10008: 'gb2321', # not found: 'mac-gb2312',
+ 10002: 'big5', # not found: 'mac-big5',
+ 10005: 'hebrew', # not found: 'mac-hebrew',
+ 10004: 'mac-arabic',
+ 10006: 'mac-greek',
+ 10081: 'mac-turkish',
+ 10021: 'thai', # not found: mac-thai',
+ 10029: 'maccentraleurope', # not found: 'mac-east europe',
+ 10007: 'ascii', # nothing appropriate found: 'mac-russian',
+}
+
+# URL and message to report issues:
+URL_OLEVBA_ISSUES = 'https://github.com/decalage2/oletools/issues'
+MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES
+
+# Container types:
+TYPE_OLE = 'OLE'
+TYPE_OpenXML = 'OpenXML'
+TYPE_Word2003_XML = 'Word2003_XML'
+TYPE_MHTML = 'MHTML'
+TYPE_TEXT = 'Text'
+TYPE_PPT = 'PPT'
+
+# short tag to display file types in triage mode:
+TYPE2TAG = {
+ TYPE_OLE: 'OLE:',
+ TYPE_OpenXML: 'OpX:',
+ TYPE_Word2003_XML: 'XML:',
+ TYPE_MHTML: 'MHT:',
+ TYPE_TEXT: 'TXT:',
+ TYPE_PPT: 'PPT',
+}
+
+
+# MSO files ActiveMime header magic
+MSO_ACTIVEMIME_HEADER = 'ActiveMime'
+
+MODULE_EXTENSION = "bas"
+CLASS_EXTENSION = "cls"
+FORM_EXTENSION = "frm"
+
+# Namespaces and tags for Word2003 XML parsing:
+NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}'
+# the tag |