From d16c2f4f1fdd7390800e320742ef260e911883c2 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Wed, 28 Jun 2017 13:41:03 -0700
Subject: [PATCH] trappy: add support to parse TGID in systrace

TGID is a fundamental property tracked in systrace for each trace record. Add
support to parse it if its available.

Change-Id: Ie79698d90e0406cc11c52d364144ec08c33dfac4
Signed-off-by: Joel Fernandes <joelaf@google.com>
---
 tests/test_base.py     |  8 ++++----
 tests/test_systrace.py |  3 ++-
 trappy/base.py         | 12 +++++++-----
 trappy/ftrace.py       |  8 +++++---
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/tests/test_base.py b/tests/test_base.py
index a0a4920e..8bebfbaa 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -85,7 +85,7 @@ def test_parse_empty_array(self):
         in_data = """     kworker/4:1-397   [004]   720.741315: thermal_power_cpu_get: cpus=000000f0 freq=1900000 raw_cpu_power=1259 load={} power=61
      kworker/4:1-397   [004]   720.741349: thermal_power_cpu_get: cpus=0000000f freq=1400000 raw_cpu_power=189 load={} power=14"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "cpus", "freq",
+        expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "cpus", "freq",
                                 "raw_cpu_power", "power"])
 
         with open("trace.txt", "w") as fout:
@@ -131,7 +131,7 @@ def test_parse_special_fields(self):
                         timestamp
                         )
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "tag"])
+        expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "tag"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -157,7 +157,7 @@ def test_parse_values_concatenation(self):
 
         in_data = """     rcu_preempt-7     [000]    73.604532: my_sched_stat_runtime:   comm=Space separated taskname pid=7 runtime=262875 [ns] vruntime=17096359856 [ns]"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
+        expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -234,7 +234,7 @@ def test_equals_in_field_value(self):
 
         df = trace.equals_event.data_frame
         self.assertSetEqual(set(df.columns),
-                            set(["__comm", "__pid", "__cpu", "__line", "my_field"]))
+                            set(["__comm", "__pid", "__tgid", "__cpu", "__line", "my_field"]))
         self.assertListEqual(df["my_field"].tolist(),
                              ["foo", "foo=bar", "foo=bar=baz", 1,
                               "1=2", "1=foo", "1foo=2"])
diff --git a/tests/test_systrace.py b/tests/test_systrace.py
index 667bf2cc..6d9429fe 100644
--- a/tests/test_systrace.py
+++ b/tests/test_systrace.py
@@ -84,10 +84,11 @@ def test_systrace_userspace(self):
         self.assertEquals(edfr['event'].iloc[0], 'E')
         self.assertEquals(edfr['data'].iloc[0], None)
 
-    def test_systrace_line_num(self):
+    def test_systrace_line_num_tgid(self):
         """Test for line numbers in a systrace"""
         trace = trappy.SysTrace("trace_sf.html")
         dfr = trace.sched_switch.data_frame
+        self.assertEquals(dfr['__tgid'].iloc[0], 959)
         self.assertEquals(trace.lines, 2506)
         self.assertEquals(dfr['__line'].iloc[0], 0)
         self.assertEquals(dfr['__line'].iloc[1], 6)
diff --git a/trappy/base.py b/trappy/base.py
index 06857b5d..8a7fb385 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -111,6 +111,7 @@ def __init__(self, parse_raw=False, fallback=False):
         self.time_array = []
         self.comm_array = []
         self.pid_array = []
+        self.tgid_array = []
         self.cpu_array = []
         self.parse_raw = parse_raw
         self.cached = False
@@ -152,7 +153,7 @@ def __get_trace_array_lengths(self):
 
         return ret
 
-    def append_data(self, time, comm, pid, cpu, line, data):
+    def append_data(self, time, comm, pid, tgid, cpu, line, data):
         """Append data parsed from a line to the corresponding arrays
 
         The :mod:`DataFrame` will be created from this when the whole trace
@@ -176,6 +177,7 @@ def append_data(self, time, comm, pid, cpu, line, data):
         self.time_array.append(time)
         self.comm_array.append(comm)
         self.pid_array.append(pid)
+        self.tgid_array.append(tgid)
         self.cpu_array.append(cpu)
         self.line_array.append(line)
         self.data_array.append(data)
@@ -226,10 +228,10 @@ def generate_parsed_data(self):
         check_memory_usage = True
         check_memory_count = 1
 
-        for (comm, pid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
-                                              self.cpu_array, self.line_array,
-                                              self.data_array):
-            data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu, "__line": line}
+        for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
+                                              self.tgid_array, self.cpu_array,
+                                              self.line_array, self.data_array):
+            data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
             data_dict.update(self.generate_data_dict(data_str))
 
             # When running out of memory, Pandas has been observed to segfault
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index c0a40c21..ce344c05 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -51,8 +51,8 @@ def _plot_freq_hists(allfreqs, what, axis, title):
                              "Frequency", xlim, "default")
 
 SPECIAL_FIELDS_RE = re.compile(
-                        r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\
-                        r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
+                        r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\
+                        r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
                         r"(?P<timestamp>[0-9]+(?P<us>\.[0-9]+)?): (\w+:\s+)+(?P<data>.+)"
 )
 
@@ -279,6 +279,8 @@ def __populate_data(self, fin, cls_for_unique_word):
             comm = fields_match.group('comm')
             pid = int(fields_match.group('pid'))
             cpu = int(fields_match.group('cpu'))
+            tgid = fields_match.group('tgid')
+            tgid = -1 if (not tgid or '-' in tgid) else int(tgid)
 
             # The timestamp, depending on the trace_clock configuration, can be
             # reported either in [s].[us] or [ns] format. Let's ensure that we
@@ -305,7 +307,7 @@ def __populate_data(self, fin, cls_for_unique_word):
             if "={}" in data_str:
                 data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
 
-            trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str)
+            trace_class.append_data(timestamp, comm, pid, tgid, cpu, self.lines, data_str)
             self.lines += 1
 
     def trace_hasnt_started(self):