Linux.Network.Nethogs

Monitor network use per process using the tool “nethogs”. This artifact will list all processes that produces (non-local) network traffic on the client. The NetstatEnriched artifact is used to provide detailed information about the process using netstat and the process tracker, along with the bytes received and sent in bytes per second.

Note that the tool/package “nethogs” needs to be installed before calling this artifact. Set the parameter InstallNethogs to true in order to automatically install the package and its dependencies (Debian-based systems only).

Using techniques like stacking, rare occurances of processes contacting the Internet can be spotted. Notebook suggestions give you total traffic overview, as well as boilerplate code to plot the traffic for a selected process.

Also see Linux.Event.Network.Nethogs for a nethogs event artifact.


name: Linux.Network.Nethogs
author: 'Andreas Misje - @misje'
description: |
  Monitor network use per process using the tool "nethogs". This artifact will
  list all processes that produces (non-local) network traffic on the client.
  The NetstatEnriched artifact is used to provide detailed information about the
  process using netstat and the process tracker, along with the bytes received
  and sent in bytes per second.

  Note that the tool/package "nethogs" needs to be installed before calling this
  artifact. Set the parameter InstallNethogs to true in order to automatically
  install the package and its dependencies (Debian-based systems only).

  Using techniques like stacking, rare occurances of processes contacting the
  Internet can be spotted. Notebook suggestions give you total traffic overview,
  as well as boilerplate code to plot the traffic for a selected process.

  Also see Linux.Event.Network.Nethogs for a nethogs event artifact.

parameters:
  - name: InstallNethogs
    description: Install nethogs using apt-get
    type: bool
    default: false

  - name: Duration
    type: int
    description: Number of seconds to monitor processes
    default: 300

  - name: NetstatCachePeriod
    description: Number of seconds to cache netstat data
    type: int
    default: 10

  - name: ProcessRegex
    description: |
      Only look for processes whose name / command line matches this regex
    type: regex
    default: .+

  - name: PIDRegex
    description: |
      Only look for processes whose PID matches this regex
    type: regex
    default: .+

  - name: UIDRegex
    description: |
      Only look for processes whose owner ID (UID) matches this regex
    type: regex
    default: .+

precondition:
  SELECT * FROM info() where OS = 'linux'

sources:
    - query: |
         LET Hoggers = SELECT Timestamp,
                              Process,
                              int(int=PID) AS PID,
                              UID,
                              parse_float(string=Sent) AS Sent,
                              parse_float(string=Recv) AS Recv
           FROM query(
             timeout=Duration,
             inherit=true,
             query={
               SELECT *
               FROM foreach(
                 row={
                   SELECT *
                   FROM execve(argv=['/usr/sbin/nethogs', '-t', '-C'],
                               length=10000,
                               sep='\n\nRefreshing:\n')
                 },
                 query={
                   SELECT timestamp(epoch=now()) AS Timestamp,
                          *
                   FROM parse_records_with_regex(
                     accessor='data',
                     file=Stdout,
                     regex='''^\s*(?P<Process>[^\t]+)/(?P<PID>\d+)/(?P<UID>\d+)\t(?P<Sent>[^\t]+)\t(?P<Recv>\S+)''')
                   WHERE Process =~ ProcessRegex
                    AND PID =~ PIDRegex
                         AND UID =~ UIDRegex
                 })
             })

         LET Netstat <= memoize(
             name='netstat',
             key='Pid',
             period=NetstatCachePeriod,
             query={
               SELECT *
               FROM Artifact.Linux.Network.NetstatEnriched()
             })

         LET Result = SELECT *
           FROM foreach(
             row={
               SELECT *
               FROM Hoggers
             },
             query={
               SELECT *
               FROM foreach(
                 row={
                   SELECT 
                          dict(
                            Timestamp=Timestamp,
                            Process=Process,
                            PID=PID,
                            UID=UID,
                            Sent=Sent,
                            Recv=Recv,
                            ProcInfo=dict(
                              CommandLine=NULL,
                              Username=NULL,
                              StartTime=NULL)) + (get(
                              item=Netstat,
                              field=PID) || dict(
                              Name=NULL,
                              Laddr=NULL,
                              Lport=NULL,
                              Raddr=NULL,
                              Rport=NULL,
                              Status=NULL,
                              ProcInfo=dict(),
                              CallChain=NULL,
                              ChildrenTree=NULL)) AS Contents
                   FROM scope()
                   WHERE Contents
                 },
                 column='Contents')
             })

         // Leverage the InstallDeb utility to do the actual package install:
         LET InstallDeps = SELECT *
           FROM if(
             condition=InstallNethogs,
             then={
               SELECT *
               FROM Artifact.Linux.Utils.InstallDeb(DebName='nethogs')
             })

         SELECT *
         FROM chain(a_install=InstallDeps,
                    b_result=Result)

      notebook:
        - type: vql
          name: Traffic
          template: |
            /*
            # Network traffic

            {{ $TimeRange := Query "SELECT min(item=Timestamp) AS StartTime, max(item=Timestamp) AS EndTime FROM source() GROUP BY 1" | Expand }}
            Network traffic (in bytes per second) between {{ Get $TimeRange "0.StartTime" }}
            and {{ Get $TimeRange "0.EndTime" }}
            */
            LET ColumnTypes = dict(
                _ChildrenTree='tree')

            SELECT 
                   Timestamp,
                   PID,
                   ProcInfo.Name || Process AS Name,
                   ProcInfo.CommandLine AS CmdLine,
                   ProcInfo.Username AS Username,
                   ProcInfo.StartTime AS StartTime,
                   Laddr,
                   Lport,
                   Raddr,
                   Rport,
                   Status,
                   humanize(
                     bytes=Sent * 1024) AS Sent,
                   humanize(
                     bytes=Recv * 1024) AS Recv,
                   ProcInfo AS _ProcInfo,
                   CallChain AS _CalLChain,
                   ChildrenTree AS _ChildrenTree
            FROM source()
            LIMIT 50

        - type: vql_suggestion
          name: Total traffic
          template: |
            /*
            # Network traffic summary

            {{ $TimeRange := Query "SELECT min(item=Timestamp) AS StartTime, max(item=Timestamp) AS EndTime FROM source() GROUP BY 1" | Expand }}
            This is a **rough estimate** of the total bytes sent and received between
            {{ Get $TimeRange "0.StartTime" }} and {{ Get $TimeRange "0.EndTime" }}.
            */
            LET Summary = SELECT 
                     PID,
                     ProcInfo.Name || Process AS Name,
                     ProcInfo.CommandLine AS CommandLine,
                     ProcInfo.Username AS Username,
                     ProcInfo.StartTime AS StartTime,
                     // nethogs -t outputs a data rate every second. Adding these
                     // values give us a rough estimate of the data transferred
                     sum(
                       item=Sent * 1024) AS Sent,
                     sum(
                       item=Recv * 1024) AS Recv
              FROM source()
              GROUP BY PID, Name

            SELECT *,
                   humanize(
                     bytes=Sent) AS Sent,
                   humanize(
                     bytes=Recv) AS Recv,
                   humanize(
                     bytes=Recv + Sent) AS Total
            FROM Summary
            LIMIT 50

        - type: vql_suggestion
          name: Plot traffic for PID
          template: |
            // The process whose traffic to plot:
            LET PIDTarget = 1234

            /*
            {{ $Vars := Query "SELECT PIDTarget, min(item=Timestamp) AS StartTime, max(item=Timestamp) AS EndTime FROM source() GROUP BY 1" | Expand }}
            # Network traffic for PID {{ Get $Vars "0.PIDTarget" }}

            Network traffic (in bytes per second) between {{ Get $Vars "0.StartTime" }}
            and {{ Get $Vars "0.EndTime" }}
            */
            LET SinglePSStats = SELECT 
                                       Timestamp.Unix AS Timestamp,
                                       Sent * 1024 AS Sent,
                                       Recv * 1024 AS Recv
              FROM source()
              WHERE PID = PIDTarget
              LIMIT 50

            /*
            {{ Query "SELECT * FROM SinglePSStats" | TimeChart }}
            */

            // We do not really need this, but we need to execute some VQL
            // in order for the plot to appear:
            SELECT *
            FROM SinglePSStats