Collects files using a set of globs. All globs must be on the same device. The globs will be searched in one pass - so you can provide many globs at the same time.
name: Generic.Collectors.File
description: |
Collects files using a set of globs. All globs must be on the same
device. The globs will be searched in one pass - so you can provide
many globs at the same time.
aliases:
- Windows.Collectors.File
parameters:
- name: collectionSpec
description: |
A CSV file with a Glob column with all the globs to collect.
NOTE: Globs must not have a leading device.
type: csv
default: |
Glob
Users\*\NTUser.dat
- name: Root
description: |
On Windows, this is the device to apply all the glob on
(e.g. `C:`). On *NIX, this should be a path to a subdirectory or
/.
default: "C:"
- name: Accessor
default: auto
description: |
On Windows, this can be changed to `ntfs`.
- name: NTFS_CACHE_TIME
type: int
description: How often to flush the NTFS cache. (Default is never).
default: "1000000"
sources:
- name: All Matches Metadata
query: |
LET RootPath <= pathspec(Path=Root, accessor=Accessor)
-- Generate the collection globs for each device
LET specs = SELECT RootPath + Glob AS Glob
FROM collectionSpec
WHERE log(message=format(
format="Processing Device %v with %v: glob is %v",
args=[Root, Accessor, Glob]))
-- Join all the collection rules into a single Glob plugin. This ensure we
-- only make one pass over the filesystem. We only want LFNs.
LET hits = SELECT OSPath AS SourceFile, Size,
Btime AS Created,
Ctime AS Changed,
Mtime AS Modified,
Atime AS LastAccessed
FROM glob(globs=specs.Glob, accessor=Accessor)
WHERE NOT IsDir AND log(message="Found " + SourceFile)
-- Pass all the results to the next query. This will serialize
-- to disk if there are too many results.
LET all_results <=
SELECT Created, Changed, LastAccessed, Modified, Size, SourceFile
FROM hits
SELECT * FROM all_results
- name: Uploads
query: |
-- Upload the files
LET uploaded_files = SELECT * FROM foreach(row={
SELECT * FROM all_results
},
workers=30,
query={
SELECT Created, Changed, LastAccessed, Modified, SourceFile, Size,
upload(file=SourceFile,
accessor=Accessor,
mtime=Modified) AS Upload
FROM scope()
})
-- Separate the hashes into their own column.
SELECT now() AS CopiedOnTimestamp, SourceFile,
Upload.Path AS DestinationFile,
Size AS FileSize, Upload.sha256 AS SourceFileSha256,
Created, Changed, Modified, LastAccessed
FROM uploaded_files