Monitor network use per process using the tool “nethogs”. This artifact will list all processes that produces (non-local) network traffic on the client. The NetstatEnriched artifact is used to provide detailed information about the process using netstat and the process tracker, along with the bytes received and sent in bytes per second.
Note that the tool/package “nethogs” needs to be installed before calling this artifact. Set the parameter InstallNethogs to true in order to automatically install the package and its dependencies (Debian-based systems only).
Using techniques like stacking, rare occurances of processes contacting the Internet can be spotted. Notebook suggestions give you total traffic overview, as well as boilerplate code to plot the traffic for a selected process.
Also see Linux.Event.Network.Nethogs for a nethogs event artifact.
name: Linux.Network.Nethogs
author: 'Andreas Misje - @misje'
description: |
Monitor network use per process using the tool "nethogs". This artifact will
list all processes that produces (non-local) network traffic on the client.
The NetstatEnriched artifact is used to provide detailed information about the
process using netstat and the process tracker, along with the bytes received
and sent in bytes per second.
Note that the tool/package "nethogs" needs to be installed before calling this
artifact. Set the parameter InstallNethogs to true in order to automatically
install the package and its dependencies (Debian-based systems only).
Using techniques like stacking, rare occurances of processes contacting the
Internet can be spotted. Notebook suggestions give you total traffic overview,
as well as boilerplate code to plot the traffic for a selected process.
Also see Linux.Event.Network.Nethogs for a nethogs event artifact.
parameters:
- name: InstallNethogs
description: Install nethogs using apt-get
type: bool
default: false
- name: Duration
type: int
description: Number of seconds to monitor processes
default: 300
- name: NetstatCachePeriod
description: Number of seconds to cache netstat data
type: int
default: 10
- name: ProcessRegex
description: |
Only look for processes whose name / command line matches this regex
type: regex
default: .+
- name: PIDRegex
description: |
Only look for processes whose PID matches this regex
type: regex
default: .+
- name: UIDRegex
description: |
Only look for processes whose owner ID (UID) matches this regex
type: regex
default: .+
precondition:
SELECT * FROM info() where OS = 'linux'
sources:
- query: |
LET Hoggers = SELECT Timestamp,
Process,
int(int=PID) AS PID,
UID,
parse_float(string=Sent) AS Sent,
parse_float(string=Recv) AS Recv
FROM query(
timeout=Duration,
inherit=true,
query={
SELECT *
FROM foreach(
row={
SELECT *
FROM execve(argv=['/usr/sbin/nethogs', '-t', '-C'],
length=10000,
sep='\n\nRefreshing:\n')
},
query={
SELECT timestamp(epoch=now()) AS Timestamp,
*
FROM parse_records_with_regex(
accessor='data',
file=Stdout,
regex='''^\s*(?P<Process>[^\t]+)/(?P<PID>\d+)/(?P<UID>\d+)\t(?P<Sent>[^\t]+)\t(?P<Recv>\S+)''')
WHERE Process =~ ProcessRegex
AND PID =~ PIDRegex
AND UID =~ UIDRegex
})
})
LET Netstat <= memoize(
name='netstat',
key='Pid',
period=NetstatCachePeriod,
query={
SELECT *
FROM Artifact.Linux.Network.NetstatEnriched()
})
LET Result = SELECT *
FROM foreach(
row={
SELECT *
FROM Hoggers
},
query={
SELECT *
FROM foreach(
row={
SELECT
dict(
Timestamp=Timestamp,
Process=Process,
PID=PID,
UID=UID,
Sent=Sent,
Recv=Recv,
ProcInfo=dict(
CommandLine=NULL,
Username=NULL,
StartTime=NULL)) + (get(
item=Netstat,
field=PID) || dict(
Name=NULL,
Laddr=NULL,
Lport=NULL,
Raddr=NULL,
Rport=NULL,
Status=NULL,
ProcInfo=dict(),
CallChain=NULL,
ChildrenTree=NULL)) AS Contents
FROM scope()
WHERE Contents
},
column='Contents')
})
// Leverage the InstallDeb utility to do the actual package install:
LET InstallDeps = SELECT *
FROM if(
condition=InstallNethogs,
then={
SELECT *
FROM Artifact.Linux.Utils.InstallDeb(DebName='nethogs')
})
SELECT *
FROM chain(a_install=InstallDeps,
b_result=Result)
notebook:
- type: vql
name: Traffic
template: |
/*
# Network traffic
{{ $TimeRange := Query "SELECT min(item=Timestamp) AS StartTime, max(item=Timestamp) AS EndTime FROM source() GROUP BY 1" | Expand }}
Network traffic (in bytes per second) between {{ Get $TimeRange "0.StartTime" }}
and {{ Get $TimeRange "0.EndTime" }}
*/
LET ColumnTypes = dict(
_ChildrenTree='tree')
SELECT
Timestamp,
PID,
ProcInfo.Name || Process AS Name,
ProcInfo.CommandLine AS CmdLine,
ProcInfo.Username AS Username,
ProcInfo.StartTime AS StartTime,
Laddr,
Lport,
Raddr,
Rport,
Status,
humanize(
bytes=Sent * 1024) AS Sent,
humanize(
bytes=Recv * 1024) AS Recv,
ProcInfo AS _ProcInfo,
CallChain AS _CalLChain,
ChildrenTree AS _ChildrenTree
FROM source()
LIMIT 50
- type: vql_suggestion
name: Total traffic
template: |
/*
# Network traffic summary
{{ $TimeRange := Query "SELECT min(item=Timestamp) AS StartTime, max(item=Timestamp) AS EndTime FROM source() GROUP BY 1" | Expand }}
This is a **rough estimate** of the total bytes sent and received between
{{ Get $TimeRange "0.StartTime" }} and {{ Get $TimeRange "0.EndTime" }}.
*/
LET Summary = SELECT
PID,
ProcInfo.Name || Process AS Name,
ProcInfo.CommandLine AS CommandLine,
ProcInfo.Username AS Username,
ProcInfo.StartTime AS StartTime,
// nethogs -t outputs a data rate every second. Adding these
// values give us a rough estimate of the data transferred
sum(
item=Sent * 1024) AS Sent,
sum(
item=Recv * 1024) AS Recv
FROM source()
GROUP BY PID, Name
SELECT *,
humanize(
bytes=Sent) AS Sent,
humanize(
bytes=Recv) AS Recv,
humanize(
bytes=Recv + Sent) AS Total
FROM Summary
LIMIT 50
- type: vql_suggestion
name: Plot traffic for PID
template: |
// The process whose traffic to plot:
LET PIDTarget = 1234
/*
{{ $Vars := Query "SELECT PIDTarget, min(item=Timestamp) AS StartTime, max(item=Timestamp) AS EndTime FROM source() GROUP BY 1" | Expand }}
# Network traffic for PID {{ Get $Vars "0.PIDTarget" }}
Network traffic (in bytes per second) between {{ Get $Vars "0.StartTime" }}
and {{ Get $Vars "0.EndTime" }}
*/
LET SinglePSStats = SELECT
Timestamp.Unix AS Timestamp,
Sent * 1024 AS Sent,
Recv * 1024 AS Recv
FROM source()
WHERE PID = PIDTarget
LIMIT 50
/*
{{ Query "SELECT * FROM SinglePSStats" | TimeChart }}
*/
// We do not really need this, but we need to execute some VQL
// in order for the plot to appear:
SELECT *
FROM SinglePSStats