Linux.Search.FileFinder

Find files on the filesystem using the filename or content.

Performance Note

This artifact can be quite expensive, especially if we search file content. It will require opening each file and reading its entire content. To minimize the impact on the endpoint we recommend this artifact is collected with a rate limited way (about 20-50 ops per second).

This artifact is useful in the following scenarios:

  • We need to locate all the places on our network where customer data has been copied.

  • We’ve identified malware in a data breach, named using short random strings in specific folders and need to search for other instances across the network.

  • We believe our user account credentials have been dumped and need to locate them.

  • We need to search for exposed credit card data to satisfy PCI requirements.

  • We have a sample of data that has been disclosed and need to locate other similar files


name: Linux.Search.FileFinder
description: |
  Find files on the filesystem using the filename or content.


  ## Performance Note

  This artifact can be quite expensive, especially if we search file
  content. It will require opening each file and reading its entire
  content. To minimize the impact on the endpoint we recommend this
  artifact is collected with a rate limited way (about 20-50 ops per
  second).

  This artifact is useful in the following scenarios:

    * We need to locate all the places on our network where customer
      data has been copied.

    * We’ve identified malware in a data breach, named using short
      random strings in specific folders and need to search for other
      instances across the network.

    * We believe our user account credentials have been dumped and
      need to locate them.

    * We need to search for exposed credit card data to satisfy PCI
      requirements.

    * We have a sample of data that has been disclosed and need to
      locate other similar files


precondition:
  SELECT * FROM info() where OS = 'linux'

parameters:
  - name: SearchFilesGlob
    default: /home/*
    description: Use a glob to define the files that will be searched.

  - name: SearchFilesGlobTable
    type: csv
    default: |
      Glob
      /home/someuser/*
    description: Alternative specify multiple globs in a table

  - name: YaraRule
    type: yara
    default:
    description: A yara rule to search for matching files.

  - name: Upload_File
    default: N
    type: bool

  - name: Calculate_Hash
    default: N
    type: bool

  - name: MoreRecentThan
    default: ""
    type: timestamp

  - name: ModifiedBefore
    default: ""
    type: timestamp

  - name: ExcludePathRegex
    default: "^/(proc|sys|run|snap)"
    type: regex
    description: If this regex matches the path of any directory we do not even descend inside of it.

  - name: LocalFilesystemOnly
    default: Y
    type: bool
    description: When set we stay on local attached filesystems including loop, attached disk, cdrom, device mapper, and excluding proc, nfs etc.

  - name: OneFilesystem
    default: N
    type: bool
    description: When set we do not follow a link to go on to a different filesystem.

  - name: DoNotFollowSymlinks
    type: bool
    default: N
    description: If specified we are allowed to follow symlinks while globbing

sources:
- query: |
    -- This list comes from cat /proc/devices and represents actual
    -- devices. Most virtual devices like /proc, fuse and network
    -- filesystems have a major number of 0.
    LET LocalDeviceMajor <= (
       253,
       7,   -- loop
       8,   -- sd
       9,   -- md
       11,  -- sr
       65,  -- sd
       66,  -- sd
       67,  -- sd
       68,  -- sd
       69,  -- sd
       70,  -- sd
       71,  -- sd
       128, -- sd
       129, -- sd
       130, -- sd
       131, -- sd
       132, -- sd
       133, -- sd
       134, -- sd
       135, -- sd
       202, -- xvd
       253, -- device-mapper
       254, -- mdp
       259, -- blkext
    )

    LET RecursionCallback = if(
       condition=LocalFilesystemOnly,
         then=if(condition=ExcludePathRegex,
                 then="x=>x.Data.DevMajor IN LocalDeviceMajor AND NOT x.OSPath =~ ExcludePathRegex",
                 else="x=>x.Data.DevMajor IN LocalDeviceMajor"),
         else=if(condition=ExcludePathRegex,
                 then="x=>NOT x.OSPath =~ ExcludePathRegex",
                 else=""))

    LET file_search = SELECT OSPath,
               Sys.mft as Inode,
               Mode.String AS Mode, Size,
               Mtime AS MTime,
               Atime AS ATime,
               Ctime AS CTime,
               IsDir, Mode, Data
        FROM glob(globs=SearchFilesGlobTable.Glob + SearchFilesGlob,
                  recursion_callback=RecursionCallback,
                  one_filesystem=OneFilesystem,
                  accessor="file", nosymlink=DoNotFollowSymlinks)

    LET more_recent = SELECT * FROM if(
        condition=MoreRecentThan,
        then={
          SELECT * FROM file_search
          WHERE MTime > MoreRecentThan
        }, else={
          SELECT * FROM file_search
        })

    LET modified_before = SELECT * FROM if(
        condition=ModifiedBefore,
        then={
          SELECT * FROM more_recent
          WHERE MTime < ModifiedBefore
            AND MTime > MoreRecentThan
        }, else={
          SELECT * FROM more_recent
        })

    LET keyword_search = SELECT * FROM if(
        condition=YaraRule,
        then={
          SELECT * FROM foreach(
            row={
               SELECT * FROM modified_before
               WHERE Mode.IsRegular
            },
            query={
               SELECT OSPath, Inode, Mode,
                      Size, ATime, MTime, CTime,
                      str(str=String.Data) As Keywords

               FROM yara(files=OSPath,
                         key="A",
                         rules=YaraRule,
                         accessor="file")
            })
        }, else={
          SELECT *, NULL AS Keywords FROM modified_before
        })

    SELECT OSPath, Inode, Mode, Size, ATime,
             MTime, CTime, Keywords,
               if(condition=Upload_File and Mode.IsRegular,
                  then=upload(file=OSPath,
                              accessor="file")) AS Upload,
               if(condition=Calculate_Hash and Mode.IsRegular,
                  then=hash(path=OSPath,
                            accessor="file")) AS Hash
    FROM keyword_search

column_types:
  - name: ATime
    type: timestamp
  - name: MTime
    type: timestamp
  - name: CTime
    type: timestamp
  - name: Upload
    type: preview_upload