Windows.Detection.Onenote

This artifact enables detection of malicious .one files and can also be used as an embedded file and metadata parser.

The artifact uses glob targeting and checks file headers to detect .one file types in scope. Secondly the artifact looks for EmbeddedFile and Metadata headers.
Finally the artifact will parse the .one file from discovered offsets and enable filters to determine rows returned.

By default filters target suspicious file content and metadata title text observed in the wild. Modify ContentRegex to . to include all.

The artifact also allows upload of both embedded files and source .one files.


name: Windows.Detection.Onenote
author: Matt Green - @mgreen27
description: |
    This artifact enables detection of malicious .one files and can also be used 
    as an embedded file and metadata parser.
    
    The artifact uses glob targeting and checks file headers to detect .one file 
    types in scope. Secondly the artifact looks for EmbeddedFile and Metadata headers.   
    Finally the artifact will parse the .one file from discovered offsets and 
    enable filters to determine rows returned.  
     
    By default filters target suspicious file content and metadata title text 
    observed in the wild.
    Modify ContentRegex to ```.``` to include all.  
    
    The artifact also allows upload of both embedded files and source .one files.
    
reference:
  - https://github.com/volexity/threat-intel/tree/main/tools/one-extract
  - https://blog.didierstevens.com/2023/01/22/analyzing-malicious-onenote-documents/
  
type: CLIENT

parameters:
   - name: TargetGlob
     description: Glob to target .one files
     default: C:\Users\**
   - name: ContentRegex
     description: Regex to filter for on embedded files or Title Metadata
     default: ^MZ|^TV(oA|pB|pQ|qA|qQ|ro)|^PK|This program cannot be run in dos mode.|@echo|<HTA:APPLICATION|/vbscript|WmiExec|Win32_Process|Powershell|comspec
   - name: PreviewBytes
     description: Number of bytes of embedded files to preview in hex
     type: int64
     default: 10000
   - name: ContentExclude
     description: Regex to filter out on embedded files or Title Metadata
   - name: IncludeAllMetadata
     type: bool
     description: Select to include all Metadata entries
   - name: UploadEmbedded
     type: bool
     description: Select to upload embedded files
   - name: UploadOriginal
     type: bool
     description: Upload original OneNote file
     
     
sources:
  - precondition:
      SELECT OS From info() where OS = 'windows' OR OS = 'linux' OR OS = 'darwin'

    query: |
      LET YaraRule = '''rule onenote_headers {
                strings:
                    $EmbeddedFile = { E7 16 E3 BD 65 26 11 45 A4 C4 8D 4D 0B 7A 9E AC }
                    $Metadata = { F3 1C 00 1C 30 1C 00 1C FF 1D 00 14 82 1d 00 14 }
    
                condition:
                    any of them
            }'''
    
      LET PROFILE = '''[[Metadata, 0, [
                            ["__Adjust", 2, "uint16"],
                            ["__SizeOffset",0,"Value",{"value":"x=>4 + (4 * x.__Adjust)"}],
                            ["Size", "x=>x.__SizeOffset", "uint16"],
                            ["__Title","x=>x.__SizeOffset + 4","String",{length: "x=>x.Size","term":"$$$_NOTERM_$$$"}],
                            ["Title",0,"Value",{"value":"x=>regex_replace(source=x.__Title,re='\\x00',replace='')"}],
                            ["CreateDate", "x=>x.__SizeOffset + 4 + x.Size + 32","WinFileTime"],
                            # TODO remove recursion lookup and find specific details... 
                            ["__FindOffset1", "x=>x.__SizeOffset + 4 + x.Size + 48","String",{length: 100, term_hex: "010100000000"}],
                            ["__FindOffset2", "x=>x.__SizeOffset + 4 + x.Size + 48","String",{length: 100, term_hex: "010000000000"}],
                            ["__ModificationOffset", 0,"Value",{"value":"x=>if(condition= len(list=x.__FindOffset1) < 100 OR len(list=x.__FindOffset2) < 100,
                                    then= if(condition= len(list=x.__FindOffset1) < len(list=x.__FindOffset2),
                                            then= x.__SizeOffset + 4 + x.Size + 48 + len(list=x.__FindOffset1) - 7,
                                            else= x.__SizeOffset + 4 + x.Size + 48 + len(list=x.__FindOffset2) - 7 ),
                                    else= null )"}],
                            ["__ModificationDate", "x=>x.__ModificationOffset","WinFileTime"],
                            ["ModificationDate", 0,"Value",{"value":"x=>if(condition= x.__ModificationOffset, 
                                                                            then= x.__ModificationDate,
                                                                            else= null)"}],
                        ]],
                        [EmbeddedFile, 0, [
                            ["Size", 16, "uint32"],
                            ["Magic",0,"Value",{"value":"x=>magic(accessor='data',path=read_file(filename=FileName,offset=String.Offset + 36,length=int(int=x.Size)))"}],
                            ["Extension",0,"Value",{"value":"x=>magic(type='extension',accessor='data',path=read_file(filename=FileName,offset=String.Offset + 36,length=int(int=x.Size)))"}],
                            ["SHA256",0,"Value",{"value":"x=>hash(hashselect='SHA256',accessor='data',path=read_file(filename=FileName,offset=String.Offset + 36,length=int(int=x.Size))).SHA256"}],
                        ]]]'''
      
      -- firstly we want to find all target files in scope by confirming header
      LET target_files = SELECT OSPath,Name,Size,Mtime,Btime,Ctime,Atime	,
                                hash(path=OSPath) as OneFileHash,
                                format(format='% x',args=[read_file(filename=OSPath,length=16),]) as _Header
        FROM glob(globs=TargetGlob)
        WHERE NOT IsDir
            AND _Header = 'e4 52 5c 7b 8c d8 a7 4d ae b1 53 78 d0 29 96 d3'
        
      -- Hash source file here for performance
      LET target_files_hash = SELECT *, hash(path=OSPath) as Hash FROM target_files
    
      -- finally find all headers and parse from offset
      LET results = SELECT  *,
                if(condition= Type='EmbeddedFile',
                then= read_file(filename=OSPath,offset=Offset,length= int(int=Extracted.Size)),
                else= null ) as _EmbeddedFile 
        FROM foreach(row=target_files_hash, query={
                SELECT  OSPath,Name,Size,
                    dict(Mtime=Mtime,Btime=Btime,Ctime=Ctime,Atime=Atime) as Timestamps,
                    OneFileHash,
                    if(condition= String.Name=~ 'metadata',
                        then= String.Offset - 4,
                        else= String.Offset + 36 ) as Offset,
                    strip(string=String.Name,prefix='\$') as Type,
                    parse_binary(filename=FileName, profile=PROFILE,
                        offset=if(condition= String.Name=~ 'metadata',
                                    then= String.Offset - 4,
                                    else= String.Offset), 
                        struct=if(condition= String.Name=~ 'metadata',
                                    then= 'Metadata',
                                    else= 'EmbeddedFile')) as Extracted
                FROM yara(files=OSPath,rules=YaraRule,number=9999)
                ORDER BY Offset
            })
        WHERE ( _EmbeddedFile =~ ContentRegex OR Extracted.Title =~ ContentRegex
                    AND NOT if(condition= ContentExclude,
                                then= _EmbeddedFile =~ ContentExclude,
                                else = False ) )
            OR if(condition= IncludeAllMetadata, then= Type='Metadata')

      LET upload_embedded = SELECT OSPath,Name,Size,Timestamps,
            OneFileHash,
            Offset, Type, Extracted,
            if(condition= Type='EmbeddedFile',
                then= read_file(filename=_EmbeddedFile,accessor='data',length=PreviewBytes ),
                else= null ) as EmbeddedPreview,
            if(condition= Type='EmbeddedFile',
                then= upload(accessor='scope',file="_EmbeddedFile",
                            name=format(format='%v_%v.extracted',args=[Name,Offset])),
                else= null ) as EmbeddedUpload 
        FROM results
        
      LET no_embedded_upload = SELECT  OSPath,Name,Size,Timestamps,
            OneFileHash, Offset, Type, Extracted,
            if(condition= Type='EmbeddedFile',
                then= read_file(filename=_EmbeddedFile,accessor='data',length=PreviewBytes ),
                else= null ) as EmbeddedPreview
        FROM results
        
      -- output rows, hidden fields dropped
      LET final_results = SELECT * FROM if(condition=UploadEmbedded,
                                                    then= upload_embedded,
                                                    else= no_embedded_upload )
                                                    
      -- finally we may want upload original OneNote file but only once for optimisation..
      LET lookup <= dict()
      LET upload_ospath = SELECT *,
                if(condition=get(item=lookup, field=OSPath.String), 
                    else=if(condition=set(item=lookup, field=OSPath.String, value=TRUE),
                            then=upload(file=OSPath))) AS OneFileUpload
        FROM final_results
      
       SELECT * FROM if(condition=UploadOriginal,
                        then= upload_ospath,
                        else= final_results )
                        
column_types:
  - name: EmbeddedPreview
    type: base64hex